]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/pick_address.cc
0ab19292582b5d8701e3e458d6220d5782a66e0e
[ceph.git] / ceph / src / common / pick_address.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2012 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15 #include "common/pick_address.h"
16
17 #include <netdb.h>
18 #include <netinet/in.h>
19 #include <string>
20 #include <string.h>
21 #include <vector>
22
23 #include <boost/algorithm/string/predicate.hpp>
24 #include <fmt/format.h>
25
26 #include "include/ipaddr.h"
27 #include "include/str_list.h"
28 #include "common/ceph_context.h"
29 #ifndef WITH_SEASTAR
30 #include "common/config.h"
31 #include "common/config_obs.h"
32 #endif
33 #include "common/debug.h"
34 #include "common/errno.h"
35 #include "common/numa.h"
36
37 #ifndef HAVE_IN_ADDR_T
38 typedef uint32_t in_addr_t;
39 #endif
40
41 #ifndef IN_LOOPBACKNET
42 #define IN_LOOPBACKNET 127
43 #endif
44
45 #define dout_subsys ceph_subsys_
46
47 using std::string;
48 using std::vector;
49
50 namespace {
51
52 bool matches_with_name(const ifaddrs& ifa, const std::string& if_name)
53 {
54 return if_name.compare(ifa.ifa_name) == 0;
55 }
56
57 static int is_loopback_addr(sockaddr* addr)
58 {
59 if (addr->sa_family == AF_INET) {
60 const sockaddr_in* sin = (struct sockaddr_in *)(addr);
61 const in_addr_t net = ntohl(sin->sin_addr.s_addr) >> IN_CLASSA_NSHIFT;
62 return net == IN_LOOPBACKNET ? 1 : 0;
63 } else if (addr->sa_family == AF_INET6) {
64 sockaddr_in6* sin6 = (struct sockaddr_in6 *)(addr);
65 return IN6_IS_ADDR_LOOPBACK(&sin6->sin6_addr) ? 1 : 0;
66 } else {
67 return -1;
68 }
69 }
70
71 static int grade_addr(const ifaddrs& ifa)
72 {
73 if (ifa.ifa_addr == nullptr) {
74 return -1;
75 }
76 int score = 0;
77 if (ifa.ifa_flags & IFF_UP) {
78 score += 4;
79 }
80 switch (is_loopback_addr(ifa.ifa_addr)) {
81 case 0:
82 // prefer non-loopback addresses
83 score += 2;
84 break;
85 case 1:
86 score += 0;
87 break;
88 default:
89 score = -1;
90 break;
91 }
92 return score;
93 }
94
95 bool matches_with_net(const ifaddrs& ifa,
96 const sockaddr* net,
97 unsigned int prefix_len,
98 unsigned ipv)
99 {
100 switch (net->sa_family) {
101 case AF_INET:
102 if (ipv & CEPH_PICK_ADDRESS_IPV4) {
103 return matches_ipv4_in_subnet(ifa, (struct sockaddr_in*)net, prefix_len);
104 }
105 break;
106 case AF_INET6:
107 if (ipv & CEPH_PICK_ADDRESS_IPV6) {
108 return matches_ipv6_in_subnet(ifa, (struct sockaddr_in6*)net, prefix_len);
109 }
110 break;
111 }
112 return false;
113 }
114
115 bool matches_with_net(CephContext *cct,
116 const ifaddrs& ifa,
117 const std::string& s,
118 unsigned ipv)
119 {
120 struct sockaddr_storage net;
121 unsigned int prefix_len;
122 if (!parse_network(s.c_str(), &net, &prefix_len)) {
123 lderr(cct) << "unable to parse network: " << s << dendl;
124 exit(1);
125 }
126 return matches_with_net(ifa, (sockaddr*)&net, prefix_len, ipv);
127 }
128
129 int grade_with_numa_node(const ifaddrs& ifa, int numa_node)
130 {
131 #if defined(WITH_SEASTAR) || defined(_WIN32)
132 return 0;
133 #else
134 if (numa_node < 0) {
135 return 0;
136 }
137 int if_node = -1;
138 int r = get_iface_numa_node(ifa.ifa_name, &if_node);
139 if (r < 0) {
140 return 0;
141 }
142 return if_node == numa_node ? 1 : 0;
143 #endif
144 }
145 }
146
147 const struct sockaddr *find_ip_in_subnet_list(
148 CephContext *cct,
149 const struct ifaddrs *ifa,
150 unsigned ipv,
151 const std::string &networks,
152 const std::string &interfaces,
153 int numa_node)
154 {
155 const auto ifs = get_str_list(interfaces);
156 const auto nets = get_str_list(networks);
157 if (!ifs.empty() && nets.empty()) {
158 lderr(cct) << "interface names specified but not network names" << dendl;
159 exit(1);
160 }
161
162 int best_score = 0;
163 const sockaddr* best_addr = nullptr;
164 for (const auto* addr = ifa; addr != nullptr; addr = addr->ifa_next) {
165 if (!ifs.empty() &&
166 std::none_of(std::begin(ifs), std::end(ifs),
167 [&](const auto& if_name) {
168 return matches_with_name(*addr, if_name);
169 })) {
170 continue;
171 }
172 if (!nets.empty() &&
173 std::none_of(std::begin(nets), std::end(nets),
174 [&](const auto& net) {
175 return matches_with_net(cct, *addr, net, ipv);
176 })) {
177 continue;
178 }
179 int score = grade_addr(*addr);
180 if (score < 0) {
181 continue;
182 }
183 score += grade_with_numa_node(*addr, numa_node);
184 if (score > best_score) {
185 best_score = score;
186 best_addr = addr->ifa_addr;
187 }
188 }
189 return best_addr;
190 }
191
192 #ifndef WITH_SEASTAR
193 // observe this change
194 struct Observer : public md_config_obs_t {
195 const char *keys[2];
196 explicit Observer(const char *c) {
197 keys[0] = c;
198 keys[1] = NULL;
199 }
200
201 const char** get_tracked_conf_keys() const override {
202 return (const char **)keys;
203 }
204 void handle_conf_change(const ConfigProxy& conf,
205 const std::set <std::string> &changed) override {
206 // do nothing.
207 }
208 };
209
210 static void fill_in_one_address(CephContext *cct,
211 const struct ifaddrs *ifa,
212 const string &networks,
213 const string &interfaces,
214 const char *conf_var,
215 int numa_node = -1)
216 {
217 const struct sockaddr *found = find_ip_in_subnet_list(
218 cct,
219 ifa,
220 CEPH_PICK_ADDRESS_IPV4|CEPH_PICK_ADDRESS_IPV6,
221 networks,
222 interfaces,
223 numa_node);
224 if (!found) {
225 lderr(cct) << "unable to find any IP address in networks '" << networks
226 << "' interfaces '" << interfaces << "'" << dendl;
227 exit(1);
228 }
229
230 char buf[INET6_ADDRSTRLEN];
231 int err;
232
233 err = getnameinfo(found,
234 (found->sa_family == AF_INET)
235 ? sizeof(struct sockaddr_in)
236 : sizeof(struct sockaddr_in6),
237
238 buf, sizeof(buf),
239 nullptr, 0,
240 NI_NUMERICHOST);
241 if (err != 0) {
242 lderr(cct) << "unable to convert chosen address to string: " << gai_strerror(err) << dendl;
243 exit(1);
244 }
245
246 Observer obs(conf_var);
247
248 cct->_conf.add_observer(&obs);
249
250 cct->_conf.set_val_or_die(conf_var, buf);
251 cct->_conf.apply_changes(nullptr);
252
253 cct->_conf.remove_observer(&obs);
254 }
255
256 void pick_addresses(CephContext *cct, int needs)
257 {
258 auto public_addr = cct->_conf.get_val<entity_addr_t>("public_addr");
259 auto public_network = cct->_conf.get_val<std::string>("public_network");
260 auto public_network_interface =
261 cct->_conf.get_val<std::string>("public_network_interface");
262 auto cluster_addr = cct->_conf.get_val<entity_addr_t>("cluster_addr");
263 auto cluster_network = cct->_conf.get_val<std::string>("cluster_network");
264 auto cluster_network_interface =
265 cct->_conf.get_val<std::string>("cluster_network_interface");
266
267 struct ifaddrs *ifa;
268 int r = getifaddrs(&ifa);
269 if (r < 0) {
270 string err = cpp_strerror(errno);
271 lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
272 exit(1);
273 }
274 auto free_ifa = make_scope_guard([ifa] { freeifaddrs(ifa); });
275 if ((needs & CEPH_PICK_ADDRESS_PUBLIC) &&
276 public_addr.is_blank_ip() && !public_network.empty()) {
277 fill_in_one_address(cct, ifa, public_network, public_network_interface,
278 "public_addr");
279 }
280
281 if ((needs & CEPH_PICK_ADDRESS_CLUSTER) && cluster_addr.is_blank_ip()) {
282 if (!cluster_network.empty()) {
283 fill_in_one_address(cct, ifa, cluster_network, cluster_network_interface,
284 "cluster_addr");
285 } else {
286 if (!public_network.empty()) {
287 lderr(cct) << "Public network was set, but cluster network was not set " << dendl;
288 lderr(cct) << " Using public network also for cluster network" << dendl;
289 fill_in_one_address(cct, ifa, public_network, public_network_interface,
290 "cluster_addr");
291 }
292 }
293 }
294 }
295 #endif // !WITH_SEASTAR
296
297 static std::optional<entity_addr_t> get_one_address(
298 CephContext *cct,
299 const struct ifaddrs *ifa,
300 unsigned ipv,
301 const string &networks,
302 const string &interfaces,
303 int numa_node = -1)
304 {
305 const struct sockaddr *found = find_ip_in_subnet_list(cct, ifa, ipv,
306 networks,
307 interfaces,
308 numa_node);
309 if (!found) {
310 std::string_view ip_type;
311 if ((ipv & CEPH_PICK_ADDRESS_IPV4) && (ipv & CEPH_PICK_ADDRESS_IPV6)) {
312 ip_type = "IPv4 or IPv6";
313 } else if (ipv & CEPH_PICK_ADDRESS_IPV4) {
314 ip_type = "IPv4";
315 } else {
316 ip_type = "IPv6";
317 }
318 lderr(cct) << "unable to find any " << ip_type << " address in networks '"
319 << networks << "' interfaces '" << interfaces << "'" << dendl;
320 return {};
321 }
322
323 char buf[INET6_ADDRSTRLEN];
324 int err;
325
326 err = getnameinfo(found,
327 (found->sa_family == AF_INET)
328 ? sizeof(struct sockaddr_in)
329 : sizeof(struct sockaddr_in6),
330
331 buf, sizeof(buf),
332 nullptr, 0,
333 NI_NUMERICHOST);
334 if (err != 0) {
335 lderr(cct) << "unable to convert chosen address to string: " << gai_strerror(err) << dendl;
336 return {};
337 }
338
339 entity_addr_t addr;
340 if (addr.parse(buf)) {
341 return addr;
342 } else {
343 return {};
344 }
345 }
346
347 int pick_addresses(
348 CephContext *cct,
349 unsigned flags,
350 struct ifaddrs *ifa,
351 entity_addrvec_t *addrs,
352 int preferred_numa_node)
353 {
354 addrs->v.clear();
355
356 unsigned addrt = (flags & (CEPH_PICK_ADDRESS_PUBLIC |
357 CEPH_PICK_ADDRESS_CLUSTER));
358 if (addrt == 0 ||
359 addrt == (CEPH_PICK_ADDRESS_PUBLIC |
360 CEPH_PICK_ADDRESS_CLUSTER)) {
361 return -EINVAL;
362 }
363 unsigned msgrv = flags & (CEPH_PICK_ADDRESS_MSGR1 |
364 CEPH_PICK_ADDRESS_MSGR2);
365 if (msgrv == 0) {
366 if (cct->_conf.get_val<bool>("ms_bind_msgr1")) {
367 msgrv |= CEPH_PICK_ADDRESS_MSGR1;
368 }
369 if (cct->_conf.get_val<bool>("ms_bind_msgr2")) {
370 msgrv |= CEPH_PICK_ADDRESS_MSGR2;
371 }
372 if (msgrv == 0) {
373 return -EINVAL;
374 }
375 }
376 unsigned ipv = flags & (CEPH_PICK_ADDRESS_IPV4 |
377 CEPH_PICK_ADDRESS_IPV6);
378 if (ipv == 0) {
379 if (cct->_conf.get_val<bool>("ms_bind_ipv4")) {
380 ipv |= CEPH_PICK_ADDRESS_IPV4;
381 }
382 if (cct->_conf.get_val<bool>("ms_bind_ipv6")) {
383 ipv |= CEPH_PICK_ADDRESS_IPV6;
384 }
385 if (ipv == 0) {
386 return -EINVAL;
387 }
388 if (cct->_conf.get_val<bool>("ms_bind_prefer_ipv4")) {
389 flags |= CEPH_PICK_ADDRESS_PREFER_IPV4;
390 } else {
391 flags &= ~CEPH_PICK_ADDRESS_PREFER_IPV4;
392 }
393 }
394
395 entity_addr_t addr;
396 string networks;
397 string interfaces;
398 if (addrt & CEPH_PICK_ADDRESS_PUBLIC) {
399 addr = cct->_conf.get_val<entity_addr_t>("public_addr");
400 networks = cct->_conf.get_val<std::string>("public_network");
401 interfaces =
402 cct->_conf.get_val<std::string>("public_network_interface");
403 } else {
404 addr = cct->_conf.get_val<entity_addr_t>("cluster_addr");
405 networks = cct->_conf.get_val<std::string>("cluster_network");
406 interfaces =
407 cct->_conf.get_val<std::string>("cluster_network_interface");
408 if (networks.empty()) {
409 lderr(cct) << "Falling back to public interface" << dendl;
410 // fall back to public_ network and interface if cluster is not set
411 networks = cct->_conf.get_val<std::string>("public_network");
412 interfaces =
413 cct->_conf.get_val<std::string>("public_network_interface");
414 }
415 }
416 if (addr.is_blank_ip() &&
417 !networks.empty()) {
418 // note: pass in ipv to filter the matching addresses
419 for (auto pick_mask : {CEPH_PICK_ADDRESS_IPV4, CEPH_PICK_ADDRESS_IPV6}) {
420 if (ipv & pick_mask) {
421 auto ip_addr = get_one_address(cct, ifa, pick_mask,
422 networks, interfaces,
423 preferred_numa_node);
424 if (ip_addr) {
425 addrs->v.push_back(*ip_addr);
426 } else {
427 // picked but not found
428 return -1;
429 }
430 }
431 }
432 }
433
434 // note: we may have a blank addr here
435
436 // ipv4 and/or ipv6?
437 if (addrs->v.empty()) {
438 addr.set_type(entity_addr_t::TYPE_MSGR2);
439 for (auto pick_mask : {CEPH_PICK_ADDRESS_IPV4, CEPH_PICK_ADDRESS_IPV6}) {
440 if (ipv & pick_mask) {
441 addr.set_family(pick_mask == CEPH_PICK_ADDRESS_IPV4 ? AF_INET : AF_INET6);
442 addrs->v.push_back(addr);
443 }
444 }
445 }
446
447 std::sort(addrs->v.begin(), addrs->v.end(),
448 [flags] (entity_addr_t& lhs, entity_addr_t& rhs) {
449 if (flags & CEPH_PICK_ADDRESS_PREFER_IPV4) {
450 return lhs.is_ipv4() && rhs.is_ipv6();
451 } else {
452 return lhs.is_ipv6() && rhs.is_ipv4();
453 }
454 });
455
456 // msgr2 or legacy or both?
457 if (msgrv == (CEPH_PICK_ADDRESS_MSGR1 | CEPH_PICK_ADDRESS_MSGR2)) {
458 vector<entity_addr_t> v;
459 v.swap(addrs->v);
460 for (auto a : v) {
461 a.set_type(entity_addr_t::TYPE_MSGR2);
462 if (flags & CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS) {
463 a.set_port(CEPH_MON_PORT_IANA);
464 }
465 addrs->v.push_back(a);
466 a.set_type(entity_addr_t::TYPE_LEGACY);
467 if (flags & CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS) {
468 a.set_port(CEPH_MON_PORT_LEGACY);
469 }
470 addrs->v.push_back(a);
471 }
472 } else if (msgrv == CEPH_PICK_ADDRESS_MSGR1) {
473 for (auto& a : addrs->v) {
474 a.set_type(entity_addr_t::TYPE_LEGACY);
475 }
476 } else {
477 for (auto& a : addrs->v) {
478 a.set_type(entity_addr_t::TYPE_MSGR2);
479 }
480 }
481
482 return 0;
483 }
484
485 int pick_addresses(
486 CephContext *cct,
487 unsigned flags,
488 entity_addrvec_t *addrs,
489 int preferred_numa_node)
490 {
491 struct ifaddrs *ifa;
492 int r = getifaddrs(&ifa);
493 if (r < 0) {
494 r = -errno;
495 string err = cpp_strerror(r);
496 lderr(cct) << "unable to fetch interfaces and addresses: "
497 << cpp_strerror(r) << dendl;
498 return r;
499 }
500 r = pick_addresses(cct, flags, ifa, addrs, preferred_numa_node);
501 freeifaddrs(ifa);
502 return r;
503 }
504
505 std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network)
506 {
507 struct ifaddrs *ifa;
508 int r = getifaddrs(&ifa);
509 if (r < 0) {
510 string err = cpp_strerror(errno);
511 lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
512 return {};
513 }
514 auto free_ifa = make_scope_guard([ifa] { freeifaddrs(ifa); });
515 const unsigned int prefix_len = std::max(sizeof(in_addr::s_addr), sizeof(in6_addr::s6_addr)) * CHAR_BIT;
516 for (auto addr = ifa; addr != nullptr; addr = addr->ifa_next) {
517 if (matches_with_net(*ifa, (const struct sockaddr *) &network, prefix_len,
518 CEPH_PICK_ADDRESS_IPV4 | CEPH_PICK_ADDRESS_IPV6)) {
519 return addr->ifa_name;
520 }
521 }
522 return {};
523 }
524
525
526 bool have_local_addr(CephContext *cct, const std::list<entity_addr_t>& ls, entity_addr_t *match)
527 {
528 struct ifaddrs *ifa;
529 int r = getifaddrs(&ifa);
530 if (r < 0) {
531 lderr(cct) << "unable to fetch interfaces and addresses: " << cpp_strerror(errno) << dendl;
532 exit(1);
533 }
534 auto free_ifa = make_scope_guard([ifa] { freeifaddrs(ifa); });
535
536 for (struct ifaddrs *addrs = ifa; addrs != nullptr; addrs = addrs->ifa_next) {
537 if (addrs->ifa_addr) {
538 entity_addr_t a;
539 a.set_sockaddr(addrs->ifa_addr);
540 for (auto& p : ls) {
541 if (a.is_same_host(p)) {
542 *match = p;
543 return true;
544 }
545 }
546 }
547 }
548 return false;
549 }
550
551 int get_iface_numa_node(
552 const std::string& iface,
553 int *node)
554 {
555 enum class iface_t {
556 PHY_PORT,
557 BOND_PORT
558 } ifatype = iface_t::PHY_PORT;
559 std::string_view ifa{iface};
560 if (auto pos = ifa.find(":"); pos != ifa.npos) {
561 ifa.remove_suffix(ifa.size() - pos);
562 }
563 string fn = fmt::format("/sys/class/net/{}/device/numa_node", ifa);
564 int fd = ::open(fn.c_str(), O_RDONLY);
565 if (fd < 0) {
566 fn = fmt::format("/sys/class/net/{}/bonding/slaves", ifa);
567 fd = ::open(fn.c_str(), O_RDONLY);
568 if (fd < 0) {
569 return -errno;
570 }
571 ifatype = iface_t::BOND_PORT;
572 }
573
574 int r = 0;
575 char buf[1024];
576 char *endptr = 0;
577 r = safe_read(fd, &buf, sizeof(buf));
578 if (r < 0) {
579 goto out;
580 }
581 buf[r] = 0;
582 while (r > 0 && ::isspace(buf[--r])) {
583 buf[r] = 0;
584 }
585
586 switch (ifatype) {
587 case iface_t::PHY_PORT:
588 *node = strtoll(buf, &endptr, 10);
589 if (endptr != buf + strlen(buf)) {
590 r = -EINVAL;
591 goto out;
592 }
593 r = 0;
594 break;
595 case iface_t::BOND_PORT:
596 int bond_node = -1;
597 std::vector<std::string> sv;
598 std::string ifacestr = buf;
599 get_str_vec(ifacestr, " ", sv);
600 for (auto& iter : sv) {
601 int bn = -1;
602 r = get_iface_numa_node(iter, &bn);
603 if (r >= 0) {
604 if (bond_node == -1 || bn == bond_node) {
605 bond_node = bn;
606 } else {
607 *node = -2;
608 goto out;
609 }
610 } else {
611 goto out;
612 }
613 }
614 *node = bond_node;
615 break;
616 }
617
618 out:
619 ::close(fd);
620 return r;
621 }
622