]>
git.proxmox.com Git - ceph.git/blob - ceph/src/msg/async/dpdk/dpdk_rte.cc
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
21 #include <rte_config.h>
22 #include <rte_common.h>
23 #include <rte_ethdev.h>
24 #include <rte_version.h>
26 #include "include/str_map.h"
33 static inline std::vector
<char> string2vector(std::string str
) {
34 auto v
= std::vector
<char>(str
.begin(), str
.end());
39 static int bitcount(unsigned long long n
)
41 return std::bitset
<CHAR_BIT
* sizeof(n
)>{n
}.count();
44 static int hex2bitcount(unsigned char c
)
57 static int coremask_bitcount(const char *buf
)
62 ((buf
[1] == 'x') || (buf
[1] == 'X')))
65 for (int i
= 0; buf
[i
] != '\0'; i
++) {
69 count
+= hex2bitcount(c
);
74 bool eal::rte_initialized
= false;
83 auto coremask
= cct
->_conf
.get_val
<std::string
>("ms_dpdk_coremask");
84 int coremaskbit
= coremask_bitcount(coremask
.c_str());
87 || static_cast<uint64_t>(coremaskbit
) < cct
->_conf
->ms_async_op_threads
)
90 t
= std::thread([&]() {
91 // TODO: Inherit these from the app parameters - "opts"
92 std::vector
<std::vector
<char>> args
{
93 string2vector("ceph"),
94 string2vector("-c"), string2vector(cct
->_conf
.get_val
<std::string
>("ms_dpdk_coremask")),
95 string2vector("-n"), string2vector(cct
->_conf
->ms_dpdk_memory_channel
),
98 std::optional
<std::string
> hugepages_path
;
99 if (!cct
->_conf
->ms_dpdk_hugepages
.empty()) {
100 hugepages_path
.emplace(cct
->_conf
->ms_dpdk_hugepages
);
103 // If "hugepages" is not provided and DPDK PMD drivers mode is requested -
104 // use the default DPDK huge tables configuration.
105 if (hugepages_path
) {
106 args
.push_back(string2vector("--huge-dir"));
107 args
.push_back(string2vector(*hugepages_path
));
110 // We don't know what is going to be our networking configuration so we
111 // assume there is going to be a queue per-CPU. Plus we'll give a DPDK
112 // 64MB for "other stuff".
115 std::stringstream ss
;
116 ss
<< std::hex
<< "fffefffe";
118 size_t size_MB
= mem_size(bitcount(x
)) >> 20;
119 std::stringstream size_MB_str
;
120 size_MB_str
<< size_MB
;
122 args
.push_back(string2vector("-m"));
123 args
.push_back(string2vector(size_MB_str
.str()));
124 } else if (!cct
->_conf
->ms_dpdk_pmd
.empty()) {
125 args
.push_back(string2vector("--no-huge"));
128 for_each_pair(cct
->_conf
.get_val
<std::string
>("ms_dpdk_devs_allowlist"), " ",
129 [&args
] (std::string_view key
, std::string_view val
) {
130 args
.push_back(string2vector(std::string(key
)));
132 args
.push_back(string2vector(std::string(val
)));
136 std::string rte_file_prefix
;
137 rte_file_prefix
= "rte_";
138 rte_file_prefix
+= cct
->_conf
->name
.to_str();
139 args
.push_back(string2vector("--file-prefix"));
140 args
.push_back(string2vector(rte_file_prefix
));
142 std::vector
<char*> cargs
;
144 for (auto&& a
: args
) {
145 cargs
.push_back(a
.data());
147 if (!rte_initialized
) {
148 /* initialise the EAL for all */
149 int ret
= rte_eal_init(cargs
.size(), cargs
.data());
151 std::unique_lock
locker(lock
);
156 rte_initialized
= true;
159 std::unique_lock
locker(lock
);
164 cond
.wait(locker
, [this] { return !funcs
.empty() || stopped
; });
165 if (!funcs
.empty()) {
166 auto f
= std::move(funcs
.front());
173 std::unique_lock
locker(lock
);
174 cond
.wait(locker
, [&] { return done
; });
175 return initialized
? 0 : -EIO
;
178 size_t eal::mem_size(int num_cpus
)
182 // PMD mempool memory:
184 // We don't know what is going to be our networking configuration so we
185 // assume there is going to be a queue per-CPU.
187 memsize
+= num_cpus
* qp_mempool_obj_size();
189 // Plus we'll give a DPDK 64MB for "other stuff".
190 memsize
+= (64UL << 20);