]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/pick_address.cc
use the buster suite for getting the source package for now
[ceph.git] / ceph / src / common / pick_address.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2012 Inktank
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include "common/pick_address.h"
9f95a23c
TL
16
17#include <netdb.h>
18#include <string>
19#include <string.h>
20#include <vector>
21
22#include <fmt/format.h>
23
7c673cae
FG
24#include "include/ipaddr.h"
25#include "include/str_list.h"
11fdf7f2
TL
26#include "common/ceph_context.h"
27#ifndef WITH_SEASTAR
28#include "common/config.h"
29#include "common/config_obs.h"
30#endif
7c673cae
FG
31#include "common/debug.h"
32#include "common/errno.h"
11fdf7f2 33#include "common/numa.h"
7c673cae
FG
34
35#define dout_subsys ceph_subsys_
36
f67539c2
TL
37using std::string;
38using std::vector;
39
3efd9988
FG
40const struct sockaddr *find_ip_in_subnet_list(
41 CephContext *cct,
42 const struct ifaddrs *ifa,
11fdf7f2 43 unsigned ipv,
3efd9988 44 const std::string &networks,
11fdf7f2
TL
45 const std::string &interfaces,
46 int numa_node)
7c673cae
FG
47{
48 std::list<string> nets;
49 get_str_list(networks, nets);
3efd9988
FG
50 std::list<string> ifs;
51 get_str_list(interfaces, ifs);
52
53 // filter interfaces by name
11fdf7f2 54 const struct ifaddrs *filtered = nullptr;
3efd9988
FG
55 if (ifs.empty()) {
56 filtered = ifa;
57 } else {
58 if (nets.empty()) {
59 lderr(cct) << "interface names specified but not network names" << dendl;
60 exit(1);
61 }
62 const struct ifaddrs *t = ifa;
63 struct ifaddrs *head = 0;
11fdf7f2 64 while (t) {
3efd9988
FG
65 bool match = false;
66 for (auto& i : ifs) {
67 if (strcmp(i.c_str(), t->ifa_name) == 0) {
68 match = true;
69 break;
70 }
71 }
72 if (match) {
73 struct ifaddrs *n = new ifaddrs;
74 memcpy(n, t, sizeof(*t));
75 n->ifa_next = head;
76 head = n;
77 }
78 t = t->ifa_next;
79 }
11fdf7f2 80 if (!head) {
3efd9988
FG
81 lderr(cct) << "no interfaces matching " << ifs << dendl;
82 exit(1);
83 }
84 filtered = head;
85 }
7c673cae 86
11fdf7f2
TL
87 struct sockaddr *r = nullptr;
88 for (auto& s : nets) {
3efd9988
FG
89 struct sockaddr_storage net;
90 unsigned int prefix_len;
7c673cae 91
11fdf7f2
TL
92 if (!parse_network(s.c_str(), &net, &prefix_len)) {
93 lderr(cct) << "unable to parse network: " << s << dendl;
3efd9988
FG
94 exit(1);
95 }
96
11fdf7f2
TL
97 switch (net.ss_family) {
98 case AF_INET:
99 if (!(ipv & CEPH_PICK_ADDRESS_IPV4)) {
100 continue;
101 }
102 break;
103 case AF_INET6:
104 if (!(ipv & CEPH_PICK_ADDRESS_IPV6)) {
105 continue;
106 }
107 break;
108 }
109
3efd9988
FG
110 const struct ifaddrs *found = find_ip_in_subnet(
111 filtered,
11fdf7f2 112 (struct sockaddr *) &net, prefix_len, numa_node);
3efd9988
FG
113 if (found) {
114 r = found->ifa_addr;
115 break;
116 }
117 }
7c673cae 118
3efd9988
FG
119 if (filtered != ifa) {
120 while (filtered) {
121 struct ifaddrs *t = filtered->ifa_next;
122 delete filtered;
123 filtered = t;
7c673cae 124 }
3efd9988 125 }
7c673cae 126
3efd9988 127 return r;
7c673cae
FG
128}
129
11fdf7f2 130#ifndef WITH_SEASTAR
7c673cae
FG
131// observe this change
132struct Observer : public md_config_obs_t {
133 const char *keys[2];
134 explicit Observer(const char *c) {
135 keys[0] = c;
136 keys[1] = NULL;
137 }
138
139 const char** get_tracked_conf_keys() const override {
140 return (const char **)keys;
141 }
11fdf7f2 142 void handle_conf_change(const ConfigProxy& conf,
7c673cae
FG
143 const std::set <std::string> &changed) override {
144 // do nothing.
145 }
146};
147
148static void fill_in_one_address(CephContext *cct,
149 const struct ifaddrs *ifa,
150 const string networks,
3efd9988 151 const string interfaces,
11fdf7f2
TL
152 const char *conf_var,
153 int numa_node = -1)
7c673cae 154{
11fdf7f2
TL
155 const struct sockaddr *found = find_ip_in_subnet_list(
156 cct,
157 ifa,
158 CEPH_PICK_ADDRESS_IPV4|CEPH_PICK_ADDRESS_IPV6,
159 networks,
160 interfaces,
161 numa_node);
7c673cae 162 if (!found) {
3efd9988
FG
163 lderr(cct) << "unable to find any IP address in networks '" << networks
164 << "' interfaces '" << interfaces << "'" << dendl;
7c673cae
FG
165 exit(1);
166 }
167
168 char buf[INET6_ADDRSTRLEN];
169 int err;
170
171 err = getnameinfo(found,
172 (found->sa_family == AF_INET)
173 ? sizeof(struct sockaddr_in)
174 : sizeof(struct sockaddr_in6),
175
176 buf, sizeof(buf),
11fdf7f2 177 nullptr, 0,
7c673cae
FG
178 NI_NUMERICHOST);
179 if (err != 0) {
180 lderr(cct) << "unable to convert chosen address to string: " << gai_strerror(err) << dendl;
181 exit(1);
182 }
183
184 Observer obs(conf_var);
185
11fdf7f2 186 cct->_conf.add_observer(&obs);
7c673cae 187
11fdf7f2
TL
188 cct->_conf.set_val_or_die(conf_var, buf);
189 cct->_conf.apply_changes(nullptr);
7c673cae 190
11fdf7f2 191 cct->_conf.remove_observer(&obs);
7c673cae
FG
192}
193
194void pick_addresses(CephContext *cct, int needs)
195{
196 struct ifaddrs *ifa;
197 int r = getifaddrs(&ifa);
11fdf7f2
TL
198 auto public_addr = cct->_conf.get_val<entity_addr_t>("public_addr");
199 auto public_network = cct->_conf.get_val<std::string>("public_network");
200 auto public_network_interface =
201 cct->_conf.get_val<std::string>("public_network_interface");
202 auto cluster_addr = cct->_conf.get_val<entity_addr_t>("cluster_addr");
203 auto cluster_network = cct->_conf.get_val<std::string>("cluster_network");
204 auto cluster_network_interface =
205 cct->_conf.get_val<std::string>("cluster_network_interface");
206
207 if (r < 0) {
7c673cae
FG
208 string err = cpp_strerror(errno);
209 lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
210 exit(1);
211 }
212
11fdf7f2
TL
213 if ((needs & CEPH_PICK_ADDRESS_PUBLIC) &&
214 public_addr.is_blank_ip() && !public_network.empty()) {
215 fill_in_one_address(cct, ifa, public_network, public_network_interface,
216 "public_addr");
7c673cae
FG
217 }
218
11fdf7f2
TL
219 if ((needs & CEPH_PICK_ADDRESS_CLUSTER) && cluster_addr.is_blank_ip()) {
220 if (!cluster_network.empty()) {
221 fill_in_one_address(cct, ifa, cluster_network, cluster_network_interface,
3efd9988 222 "cluster_addr");
7c673cae 223 } else {
11fdf7f2 224 if (!public_network.empty()) {
7c673cae
FG
225 lderr(cct) << "Public network was set, but cluster network was not set " << dendl;
226 lderr(cct) << " Using public network also for cluster network" << dendl;
11fdf7f2
TL
227 fill_in_one_address(cct, ifa, public_network, public_network_interface,
228 "cluster_addr");
7c673cae
FG
229 }
230 }
231 }
232
233 freeifaddrs(ifa);
234}
11fdf7f2
TL
235#endif // !WITH_SEASTAR
236
237static int fill_in_one_address(
238 CephContext *cct,
239 const struct ifaddrs *ifa,
240 unsigned ipv,
241 const string networks,
242 const string interfaces,
243 entity_addrvec_t *addrs,
244 int numa_node = -1)
245{
246 const struct sockaddr *found = find_ip_in_subnet_list(cct, ifa, ipv, networks,
247 interfaces, numa_node);
248 if (!found) {
249 std::string ip_type = "";
250 if ((ipv & CEPH_PICK_ADDRESS_IPV4) && (ipv & CEPH_PICK_ADDRESS_IPV6)) {
251 ip_type = "IPv4 or IPv6";
252 } else if (ipv & CEPH_PICK_ADDRESS_IPV4) {
253 ip_type = "IPv4";
254 } else {
255 ip_type = "IPv6";
256 }
257 lderr(cct) << "unable to find any " << ip_type << " address in networks '"
258 << networks << "' interfaces '" << interfaces << "'" << dendl;
259 return -1;
260 }
261
262 char buf[INET6_ADDRSTRLEN];
263 int err;
7c673cae 264
11fdf7f2
TL
265 err = getnameinfo(found,
266 (found->sa_family == AF_INET)
267 ? sizeof(struct sockaddr_in)
268 : sizeof(struct sockaddr_in6),
269
270 buf, sizeof(buf),
271 nullptr, 0,
272 NI_NUMERICHOST);
273 if (err != 0) {
274 lderr(cct) << "unable to convert chosen address to string: " << gai_strerror(err) << dendl;
275 return -1;
276 }
277
278 entity_addr_t addr;
279 const char *end = 0;
280 bool r = addr.parse(buf, &end);
281 if (!r) {
282 return -1;
283 }
284 addrs->v.push_back(addr);
285 return 0;
286}
287
288int pick_addresses(
289 CephContext *cct,
290 unsigned flags,
291 struct ifaddrs *ifa,
292 entity_addrvec_t *addrs,
293 int preferred_numa_node)
294{
295 addrs->v.clear();
296
297 unsigned addrt = (flags & (CEPH_PICK_ADDRESS_PUBLIC |
298 CEPH_PICK_ADDRESS_CLUSTER));
299 if (addrt == 0 ||
300 addrt == (CEPH_PICK_ADDRESS_PUBLIC |
301 CEPH_PICK_ADDRESS_CLUSTER)) {
302 return -EINVAL;
303 }
304 unsigned msgrv = flags & (CEPH_PICK_ADDRESS_MSGR1 |
305 CEPH_PICK_ADDRESS_MSGR2);
306 if (msgrv == 0) {
307 if (cct->_conf.get_val<bool>("ms_bind_msgr1")) {
308 msgrv |= CEPH_PICK_ADDRESS_MSGR1;
309 }
310 if (cct->_conf.get_val<bool>("ms_bind_msgr2")) {
311 msgrv |= CEPH_PICK_ADDRESS_MSGR2;
312 }
313 if (msgrv == 0) {
314 return -EINVAL;
315 }
316 }
317 unsigned ipv = flags & (CEPH_PICK_ADDRESS_IPV4 |
318 CEPH_PICK_ADDRESS_IPV6);
319 if (ipv == 0) {
320 if (cct->_conf.get_val<bool>("ms_bind_ipv4")) {
321 ipv |= CEPH_PICK_ADDRESS_IPV4;
322 }
323 if (cct->_conf.get_val<bool>("ms_bind_ipv6")) {
324 ipv |= CEPH_PICK_ADDRESS_IPV6;
325 }
326 if (ipv == 0) {
327 return -EINVAL;
328 }
329 if (cct->_conf.get_val<bool>("ms_bind_prefer_ipv4")) {
330 flags |= CEPH_PICK_ADDRESS_PREFER_IPV4;
331 } else {
332 flags &= ~CEPH_PICK_ADDRESS_PREFER_IPV4;
333 }
334 }
335
336 entity_addr_t addr;
337 string networks;
338 string interfaces;
339 if (addrt & CEPH_PICK_ADDRESS_PUBLIC) {
340 addr = cct->_conf.get_val<entity_addr_t>("public_addr");
341 networks = cct->_conf.get_val<std::string>("public_network");
342 interfaces =
343 cct->_conf.get_val<std::string>("public_network_interface");
344 } else {
345 addr = cct->_conf.get_val<entity_addr_t>("cluster_addr");
346 networks = cct->_conf.get_val<std::string>("cluster_network");
347 interfaces =
348 cct->_conf.get_val<std::string>("cluster_network_interface");
349 if (networks.empty()) {
350 lderr(cct) << "Falling back to public interface" << dendl;
351 // fall back to public_ network and interface if cluster is not set
352 networks = cct->_conf.get_val<std::string>("public_network");
353 interfaces =
354 cct->_conf.get_val<std::string>("public_network_interface");
355 }
356 }
357 if (addr.is_blank_ip() &&
358 !networks.empty()) {
359 int ipv4_r = !(ipv & CEPH_PICK_ADDRESS_IPV4) ? 0 : -1;
360 int ipv6_r = !(ipv & CEPH_PICK_ADDRESS_IPV6) ? 0 : -1;
361 // first try on preferred numa node (if >= 0), then anywhere.
362 while (true) {
363 // note: pass in ipv to filter the matching addresses
364 if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
365 (flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
366 ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4,
367 networks, interfaces, addrs,
368 preferred_numa_node);
369 }
370 if (ipv & CEPH_PICK_ADDRESS_IPV6) {
371 ipv6_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV6,
372 networks, interfaces, addrs,
373 preferred_numa_node);
374 }
375 if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
376 !(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
377 ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4,
378 networks, interfaces, addrs,
379 preferred_numa_node);
380 }
381 if (ipv4_r >= 0 && ipv6_r >= 0) {
382 break;
383 }
384 if (preferred_numa_node < 0) {
385 return ipv4_r >= 0 && ipv6_r >= 0 ? 0 : -1;
386 }
387 preferred_numa_node = -1; // try any numa node
388 }
389 }
390
391 // note: we may have a blank addr here
392
393 // ipv4 and/or ipv6?
394 if (addrs->v.empty()) {
11fdf7f2
TL
395 addr.set_type(entity_addr_t::TYPE_MSGR2);
396 if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
397 (flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
398 addr.set_family(AF_INET);
399 addrs->v.push_back(addr);
400 }
401 if (ipv & CEPH_PICK_ADDRESS_IPV6) {
402 addr.set_family(AF_INET6);
403 addrs->v.push_back(addr);
404 }
405 if ((ipv & CEPH_PICK_ADDRESS_IPV4) &&
406 !(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) {
407 addr.set_family(AF_INET);
408 addrs->v.push_back(addr);
409 }
410 }
411
412 // msgr2 or legacy or both?
413 if (msgrv == (CEPH_PICK_ADDRESS_MSGR1 | CEPH_PICK_ADDRESS_MSGR2)) {
414 vector<entity_addr_t> v;
415 v.swap(addrs->v);
416 for (auto a : v) {
417 a.set_type(entity_addr_t::TYPE_MSGR2);
418 if (flags & CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS) {
419 a.set_port(CEPH_MON_PORT_IANA);
420 }
421 addrs->v.push_back(a);
422 a.set_type(entity_addr_t::TYPE_LEGACY);
423 if (flags & CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS) {
424 a.set_port(CEPH_MON_PORT_LEGACY);
425 }
426 addrs->v.push_back(a);
427 }
428 } else if (msgrv == CEPH_PICK_ADDRESS_MSGR1) {
429 for (auto& a : addrs->v) {
430 a.set_type(entity_addr_t::TYPE_LEGACY);
431 }
432 } else {
433 for (auto& a : addrs->v) {
434 a.set_type(entity_addr_t::TYPE_MSGR2);
435 }
436 }
437
438 return 0;
439}
440
441int pick_addresses(
442 CephContext *cct,
443 unsigned flags,
444 entity_addrvec_t *addrs,
445 int preferred_numa_node)
446{
447 struct ifaddrs *ifa;
448 int r = getifaddrs(&ifa);
449 if (r < 0) {
450 r = -errno;
451 string err = cpp_strerror(r);
452 lderr(cct) << "unable to fetch interfaces and addresses: "
453 << cpp_strerror(r) << dendl;
454 return r;
455 }
456 r = pick_addresses(cct, flags, ifa, addrs, preferred_numa_node);
457 freeifaddrs(ifa);
458 return r;
459}
b5b8bbf5
FG
460
461std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network)
462{
463 struct ifaddrs *ifa;
464 int r = getifaddrs(&ifa);
465 if (r < 0) {
466 string err = cpp_strerror(errno);
467 lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl;
468 return {};
469 }
470
f67539c2 471 const unsigned int prefix_len = std::max(sizeof(in_addr::s_addr), sizeof(in6_addr::s6_addr)) * CHAR_BIT;
11fdf7f2
TL
472 const struct ifaddrs *found = find_ip_in_subnet(
473 ifa,
474 (const struct sockaddr *) &network, prefix_len);
b5b8bbf5
FG
475
476 std::string result;
477 if (found) {
478 result = found->ifa_name;
479 }
480
481 freeifaddrs(ifa);
482
483 return result;
484}
485
486
f67539c2 487bool have_local_addr(CephContext *cct, const std::list<entity_addr_t>& ls, entity_addr_t *match)
7c673cae
FG
488{
489 struct ifaddrs *ifa;
490 int r = getifaddrs(&ifa);
491 if (r < 0) {
492 lderr(cct) << "unable to fetch interfaces and addresses: " << cpp_strerror(errno) << dendl;
493 exit(1);
494 }
495
496 bool found = false;
11fdf7f2 497 for (struct ifaddrs *addrs = ifa; addrs != nullptr; addrs = addrs->ifa_next) {
7c673cae
FG
498 if (addrs->ifa_addr) {
499 entity_addr_t a;
500 a.set_sockaddr(addrs->ifa_addr);
11fdf7f2
TL
501 for (auto& p : ls) {
502 if (a.is_same_host(p)) {
503 *match = p;
7c673cae
FG
504 found = true;
505 goto out;
506 }
507 }
508 }
509 }
510
511 out:
512 freeifaddrs(ifa);
513 return found;
514}
11fdf7f2
TL
515
516int get_iface_numa_node(
517 const std::string& iface,
518 int *node)
519{
9f95a23c
TL
520 enum class iface_t {
521 PHY_PORT,
522 BOND_PORT
523 } ifatype = iface_t::PHY_PORT;
f67539c2 524 std::string_view ifa{iface};
9f95a23c
TL
525 if (auto pos = ifa.find(":"); pos != ifa.npos) {
526 ifa.remove_suffix(ifa.size() - pos);
92f5a8d4 527 }
9f95a23c 528 string fn = fmt::format("/sys/class/net/{}/device/numa_node", ifa);
92f5a8d4
TL
529 int fd = ::open(fn.c_str(), O_RDONLY);
530 if (fd < 0) {
9f95a23c 531 fn = fmt::format("/sys/class/net/{}/bonding/slaves", ifa);
92f5a8d4
TL
532 fd = ::open(fn.c_str(), O_RDONLY);
533 if (fd < 0) {
534 return -errno;
535 }
9f95a23c 536 ifatype = iface_t::BOND_PORT;
92f5a8d4 537 }
11fdf7f2
TL
538
539 int r = 0;
540 char buf[1024];
541 char *endptr = 0;
11fdf7f2
TL
542 r = safe_read(fd, &buf, sizeof(buf));
543 if (r < 0) {
544 goto out;
545 }
546 buf[r] = 0;
547 while (r > 0 && ::isspace(buf[--r])) {
548 buf[r] = 0;
549 }
92f5a8d4
TL
550
551 switch (ifatype) {
9f95a23c 552 case iface_t::PHY_PORT:
92f5a8d4
TL
553 *node = strtoll(buf, &endptr, 10);
554 if (endptr != buf + strlen(buf)) {
555 r = -EINVAL;
556 goto out;
557 }
558 r = 0;
559 break;
9f95a23c
TL
560 case iface_t::BOND_PORT:
561 int bond_node = -1;
92f5a8d4 562 std::vector<std::string> sv;
9f95a23c
TL
563 std::string ifacestr = buf;
564 get_str_vec(ifacestr, " ", sv);
92f5a8d4
TL
565 for (auto& iter : sv) {
566 int bn = -1;
567 r = get_iface_numa_node(iter, &bn);
568 if (r >= 0) {
569 if (bond_node == -1 || bn == bond_node) {
570 bond_node = bn;
571 } else {
572 *node = -2;
573 goto out;
574 }
575 } else {
576 goto out;
577 }
578 }
579 *node = bond_node;
580 break;
11fdf7f2 581 }
92f5a8d4
TL
582
583 out:
11fdf7f2
TL
584 ::close(fd);
585 return r;
586}
92f5a8d4 587