]> git.proxmox.com Git - ceph.git/blob - ceph/src/msg/async/dpdk/dpdk_rte.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / msg / async / dpdk / dpdk_rte.cc
1 /*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18
19 #include <bitset>
20
21 #include <rte_config.h>
22 #include <rte_common.h>
23 #include <rte_ethdev.h>
24 #include <rte_version.h>
25
26 #include "include/str_map.h"
27
28 #include "DPDK.h"
29 #include "dpdk_rte.h"
30
31 namespace dpdk {
32
33 static inline std::vector<char> string2vector(std::string str) {
34 auto v = std::vector<char>(str.begin(), str.end());
35 v.push_back('\0');
36 return v;
37 }
38
39 static int bitcount(unsigned long long n)
40 {
41 return std::bitset<CHAR_BIT * sizeof(n)>{n}.count();
42 }
43
44 static int hex2bitcount(unsigned char c)
45 {
46 int val;
47
48 if (isdigit(c))
49 val = c - '0';
50 else if (isupper(c))
51 val = c - 'A' + 10;
52 else
53 val = c - 'a' + 10;
54 return bitcount(val);
55 }
56
57 static int coremask_bitcount(const char *buf)
58 {
59 int count = 0;
60
61 if (buf[0] == '0' &&
62 ((buf[1] == 'x') || (buf[1] == 'X')))
63 buf += 2;
64
65 for (int i = 0; buf[i] != '\0'; i++) {
66 char c = buf[i];
67 if (isxdigit(c) == 0)
68 return -EINVAL;
69 count += hex2bitcount(c);
70 }
71 return count;
72 }
73
74 bool eal::rte_initialized = false;
75
76 int eal::start()
77 {
78 if (initialized) {
79 return 1;
80 }
81
82 bool done = false;
83 auto coremask = cct->_conf.get_val<std::string>("ms_dpdk_coremask");
84 int coremaskbit = coremask_bitcount(coremask.c_str());
85
86 if (coremaskbit <= 0
87 || static_cast<uint64_t>(coremaskbit) < cct->_conf->ms_async_op_threads)
88 return -EINVAL;
89
90 t = std::thread([&]() {
91 // TODO: Inherit these from the app parameters - "opts"
92 std::vector<std::vector<char>> args {
93 string2vector("ceph"),
94 string2vector("-c"), string2vector(cct->_conf.get_val<std::string>("ms_dpdk_coremask")),
95 string2vector("-n"), string2vector(cct->_conf->ms_dpdk_memory_channel),
96 };
97
98 std::optional<std::string> hugepages_path;
99 if (!cct->_conf->ms_dpdk_hugepages.empty()) {
100 hugepages_path.emplace(cct->_conf->ms_dpdk_hugepages);
101 }
102
103 // If "hugepages" is not provided and DPDK PMD drivers mode is requested -
104 // use the default DPDK huge tables configuration.
105 if (hugepages_path) {
106 args.push_back(string2vector("--huge-dir"));
107 args.push_back(string2vector(*hugepages_path));
108
109 //
110 // We don't know what is going to be our networking configuration so we
111 // assume there is going to be a queue per-CPU. Plus we'll give a DPDK
112 // 64MB for "other stuff".
113 //
114 unsigned int x;
115 std::stringstream ss;
116 ss << std::hex << "fffefffe";
117 ss >> x;
118 size_t size_MB = mem_size(bitcount(x)) >> 20;
119 std::stringstream size_MB_str;
120 size_MB_str << size_MB;
121
122 args.push_back(string2vector("-m"));
123 args.push_back(string2vector(size_MB_str.str()));
124 } else if (!cct->_conf->ms_dpdk_pmd.empty()) {
125 args.push_back(string2vector("--no-huge"));
126 }
127
128 for_each_pair(cct->_conf.get_val<std::string>("ms_dpdk_devs_allowlist"), " ",
129 [&args] (std::string_view key, std::string_view val) {
130 args.push_back(string2vector(std::string(key)));
131 if (!val.empty()) {
132 args.push_back(string2vector(std::string(val)));
133 }
134 });
135
136 std::string rte_file_prefix;
137 rte_file_prefix = "rte_";
138 rte_file_prefix += cct->_conf->name.to_str();
139 args.push_back(string2vector("--file-prefix"));
140 args.push_back(string2vector(rte_file_prefix));
141
142 std::vector<char*> cargs;
143
144 for (auto&& a: args) {
145 cargs.push_back(a.data());
146 }
147 if (!rte_initialized) {
148 /* initialise the EAL for all */
149 int ret = rte_eal_init(cargs.size(), cargs.data());
150 if (ret < 0) {
151 std::unique_lock locker(lock);
152 done = true;
153 cond.notify_all();
154 return ret;
155 }
156 rte_initialized = true;
157 }
158
159 std::unique_lock locker(lock);
160 initialized = true;
161 done = true;
162 cond.notify_all();
163 while (!stopped) {
164 cond.wait(locker, [this] { return !funcs.empty() || stopped; });
165 if (!funcs.empty()) {
166 auto f = std::move(funcs.front());
167 funcs.pop_front();
168 f();
169 cond.notify_all();
170 }
171 }
172 });
173 std::unique_lock locker(lock);
174 cond.wait(locker, [&] { return done; });
175 return initialized ? 0 : -EIO;
176 }
177
178 size_t eal::mem_size(int num_cpus)
179 {
180 size_t memsize = 0;
181 //
182 // PMD mempool memory:
183 //
184 // We don't know what is going to be our networking configuration so we
185 // assume there is going to be a queue per-CPU.
186 //
187 memsize += num_cpus * qp_mempool_obj_size();
188
189 // Plus we'll give a DPDK 64MB for "other stuff".
190 memsize += (64UL << 20);
191
192 return memsize;
193 }
194
195 void eal::stop()
196 {
197 assert(initialized);
198 assert(!stopped);
199 stopped = true;
200 cond.notify_all();
201 t.join();
202 }
203
204 } // namespace dpdk