]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2004-2012 Inktank | |
7 | * | |
8 | * This is free software; you can redistribute it and/or | |
9 | * modify it under the terms of the GNU Lesser General Public | |
10 | * License version 2.1, as published by the Free Software | |
11 | * Foundation. See file COPYING. | |
12 | * | |
13 | */ | |
14 | ||
15 | #include "common/pick_address.h" | |
7c673cae FG |
16 | #include "include/ipaddr.h" |
17 | #include "include/str_list.h" | |
11fdf7f2 TL |
18 | #include "common/ceph_context.h" |
19 | #ifndef WITH_SEASTAR | |
20 | #include "common/config.h" | |
21 | #include "common/config_obs.h" | |
22 | #endif | |
7c673cae FG |
23 | #include "common/debug.h" |
24 | #include "common/errno.h" | |
11fdf7f2 | 25 | #include "common/numa.h" |
7c673cae | 26 | |
31f18b77 FG |
27 | #include <netdb.h> |
28 | ||
7c673cae FG |
29 | #define dout_subsys ceph_subsys_ |
30 | ||
3efd9988 FG |
31 | const struct sockaddr *find_ip_in_subnet_list( |
32 | CephContext *cct, | |
33 | const struct ifaddrs *ifa, | |
11fdf7f2 | 34 | unsigned ipv, |
3efd9988 | 35 | const std::string &networks, |
11fdf7f2 TL |
36 | const std::string &interfaces, |
37 | int numa_node) | |
7c673cae FG |
38 | { |
39 | std::list<string> nets; | |
40 | get_str_list(networks, nets); | |
3efd9988 FG |
41 | std::list<string> ifs; |
42 | get_str_list(interfaces, ifs); | |
43 | ||
44 | // filter interfaces by name | |
11fdf7f2 | 45 | const struct ifaddrs *filtered = nullptr; |
3efd9988 FG |
46 | if (ifs.empty()) { |
47 | filtered = ifa; | |
48 | } else { | |
49 | if (nets.empty()) { | |
50 | lderr(cct) << "interface names specified but not network names" << dendl; | |
51 | exit(1); | |
52 | } | |
53 | const struct ifaddrs *t = ifa; | |
54 | struct ifaddrs *head = 0; | |
11fdf7f2 | 55 | while (t) { |
3efd9988 FG |
56 | bool match = false; |
57 | for (auto& i : ifs) { | |
58 | if (strcmp(i.c_str(), t->ifa_name) == 0) { | |
59 | match = true; | |
60 | break; | |
61 | } | |
62 | } | |
63 | if (match) { | |
64 | struct ifaddrs *n = new ifaddrs; | |
65 | memcpy(n, t, sizeof(*t)); | |
66 | n->ifa_next = head; | |
67 | head = n; | |
68 | } | |
69 | t = t->ifa_next; | |
70 | } | |
11fdf7f2 | 71 | if (!head) { |
3efd9988 FG |
72 | lderr(cct) << "no interfaces matching " << ifs << dendl; |
73 | exit(1); | |
74 | } | |
75 | filtered = head; | |
76 | } | |
7c673cae | 77 | |
11fdf7f2 TL |
78 | struct sockaddr *r = nullptr; |
79 | for (auto& s : nets) { | |
3efd9988 FG |
80 | struct sockaddr_storage net; |
81 | unsigned int prefix_len; | |
7c673cae | 82 | |
11fdf7f2 TL |
83 | if (!parse_network(s.c_str(), &net, &prefix_len)) { |
84 | lderr(cct) << "unable to parse network: " << s << dendl; | |
3efd9988 FG |
85 | exit(1); |
86 | } | |
87 | ||
11fdf7f2 TL |
88 | switch (net.ss_family) { |
89 | case AF_INET: | |
90 | if (!(ipv & CEPH_PICK_ADDRESS_IPV4)) { | |
91 | continue; | |
92 | } | |
93 | break; | |
94 | case AF_INET6: | |
95 | if (!(ipv & CEPH_PICK_ADDRESS_IPV6)) { | |
96 | continue; | |
97 | } | |
98 | break; | |
99 | } | |
100 | ||
3efd9988 FG |
101 | const struct ifaddrs *found = find_ip_in_subnet( |
102 | filtered, | |
11fdf7f2 | 103 | (struct sockaddr *) &net, prefix_len, numa_node); |
3efd9988 FG |
104 | if (found) { |
105 | r = found->ifa_addr; | |
106 | break; | |
107 | } | |
108 | } | |
7c673cae | 109 | |
3efd9988 FG |
110 | if (filtered != ifa) { |
111 | while (filtered) { | |
112 | struct ifaddrs *t = filtered->ifa_next; | |
113 | delete filtered; | |
114 | filtered = t; | |
7c673cae | 115 | } |
3efd9988 | 116 | } |
7c673cae | 117 | |
3efd9988 | 118 | return r; |
7c673cae FG |
119 | } |
120 | ||
11fdf7f2 | 121 | #ifndef WITH_SEASTAR |
7c673cae FG |
122 | // observe this change |
123 | struct Observer : public md_config_obs_t { | |
124 | const char *keys[2]; | |
125 | explicit Observer(const char *c) { | |
126 | keys[0] = c; | |
127 | keys[1] = NULL; | |
128 | } | |
129 | ||
130 | const char** get_tracked_conf_keys() const override { | |
131 | return (const char **)keys; | |
132 | } | |
11fdf7f2 | 133 | void handle_conf_change(const ConfigProxy& conf, |
7c673cae FG |
134 | const std::set <std::string> &changed) override { |
135 | // do nothing. | |
136 | } | |
137 | }; | |
138 | ||
139 | static void fill_in_one_address(CephContext *cct, | |
140 | const struct ifaddrs *ifa, | |
141 | const string networks, | |
3efd9988 | 142 | const string interfaces, |
11fdf7f2 TL |
143 | const char *conf_var, |
144 | int numa_node = -1) | |
7c673cae | 145 | { |
11fdf7f2 TL |
146 | const struct sockaddr *found = find_ip_in_subnet_list( |
147 | cct, | |
148 | ifa, | |
149 | CEPH_PICK_ADDRESS_IPV4|CEPH_PICK_ADDRESS_IPV6, | |
150 | networks, | |
151 | interfaces, | |
152 | numa_node); | |
7c673cae | 153 | if (!found) { |
3efd9988 FG |
154 | lderr(cct) << "unable to find any IP address in networks '" << networks |
155 | << "' interfaces '" << interfaces << "'" << dendl; | |
7c673cae FG |
156 | exit(1); |
157 | } | |
158 | ||
159 | char buf[INET6_ADDRSTRLEN]; | |
160 | int err; | |
161 | ||
162 | err = getnameinfo(found, | |
163 | (found->sa_family == AF_INET) | |
164 | ? sizeof(struct sockaddr_in) | |
165 | : sizeof(struct sockaddr_in6), | |
166 | ||
167 | buf, sizeof(buf), | |
11fdf7f2 | 168 | nullptr, 0, |
7c673cae FG |
169 | NI_NUMERICHOST); |
170 | if (err != 0) { | |
171 | lderr(cct) << "unable to convert chosen address to string: " << gai_strerror(err) << dendl; | |
172 | exit(1); | |
173 | } | |
174 | ||
175 | Observer obs(conf_var); | |
176 | ||
11fdf7f2 | 177 | cct->_conf.add_observer(&obs); |
7c673cae | 178 | |
11fdf7f2 TL |
179 | cct->_conf.set_val_or_die(conf_var, buf); |
180 | cct->_conf.apply_changes(nullptr); | |
7c673cae | 181 | |
11fdf7f2 | 182 | cct->_conf.remove_observer(&obs); |
7c673cae FG |
183 | } |
184 | ||
185 | void pick_addresses(CephContext *cct, int needs) | |
186 | { | |
187 | struct ifaddrs *ifa; | |
188 | int r = getifaddrs(&ifa); | |
11fdf7f2 TL |
189 | auto public_addr = cct->_conf.get_val<entity_addr_t>("public_addr"); |
190 | auto public_network = cct->_conf.get_val<std::string>("public_network"); | |
191 | auto public_network_interface = | |
192 | cct->_conf.get_val<std::string>("public_network_interface"); | |
193 | auto cluster_addr = cct->_conf.get_val<entity_addr_t>("cluster_addr"); | |
194 | auto cluster_network = cct->_conf.get_val<std::string>("cluster_network"); | |
195 | auto cluster_network_interface = | |
196 | cct->_conf.get_val<std::string>("cluster_network_interface"); | |
197 | ||
198 | if (r < 0) { | |
7c673cae FG |
199 | string err = cpp_strerror(errno); |
200 | lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl; | |
201 | exit(1); | |
202 | } | |
203 | ||
11fdf7f2 TL |
204 | if ((needs & CEPH_PICK_ADDRESS_PUBLIC) && |
205 | public_addr.is_blank_ip() && !public_network.empty()) { | |
206 | fill_in_one_address(cct, ifa, public_network, public_network_interface, | |
207 | "public_addr"); | |
7c673cae FG |
208 | } |
209 | ||
11fdf7f2 TL |
210 | if ((needs & CEPH_PICK_ADDRESS_CLUSTER) && cluster_addr.is_blank_ip()) { |
211 | if (!cluster_network.empty()) { | |
212 | fill_in_one_address(cct, ifa, cluster_network, cluster_network_interface, | |
3efd9988 | 213 | "cluster_addr"); |
7c673cae | 214 | } else { |
11fdf7f2 | 215 | if (!public_network.empty()) { |
7c673cae FG |
216 | lderr(cct) << "Public network was set, but cluster network was not set " << dendl; |
217 | lderr(cct) << " Using public network also for cluster network" << dendl; | |
11fdf7f2 TL |
218 | fill_in_one_address(cct, ifa, public_network, public_network_interface, |
219 | "cluster_addr"); | |
7c673cae FG |
220 | } |
221 | } | |
222 | } | |
223 | ||
224 | freeifaddrs(ifa); | |
225 | } | |
11fdf7f2 TL |
226 | #endif // !WITH_SEASTAR |
227 | ||
228 | static int fill_in_one_address( | |
229 | CephContext *cct, | |
230 | const struct ifaddrs *ifa, | |
231 | unsigned ipv, | |
232 | const string networks, | |
233 | const string interfaces, | |
234 | entity_addrvec_t *addrs, | |
235 | int numa_node = -1) | |
236 | { | |
237 | const struct sockaddr *found = find_ip_in_subnet_list(cct, ifa, ipv, networks, | |
238 | interfaces, numa_node); | |
239 | if (!found) { | |
240 | std::string ip_type = ""; | |
241 | if ((ipv & CEPH_PICK_ADDRESS_IPV4) && (ipv & CEPH_PICK_ADDRESS_IPV6)) { | |
242 | ip_type = "IPv4 or IPv6"; | |
243 | } else if (ipv & CEPH_PICK_ADDRESS_IPV4) { | |
244 | ip_type = "IPv4"; | |
245 | } else { | |
246 | ip_type = "IPv6"; | |
247 | } | |
248 | lderr(cct) << "unable to find any " << ip_type << " address in networks '" | |
249 | << networks << "' interfaces '" << interfaces << "'" << dendl; | |
250 | return -1; | |
251 | } | |
252 | ||
253 | char buf[INET6_ADDRSTRLEN]; | |
254 | int err; | |
7c673cae | 255 | |
11fdf7f2 TL |
256 | err = getnameinfo(found, |
257 | (found->sa_family == AF_INET) | |
258 | ? sizeof(struct sockaddr_in) | |
259 | : sizeof(struct sockaddr_in6), | |
260 | ||
261 | buf, sizeof(buf), | |
262 | nullptr, 0, | |
263 | NI_NUMERICHOST); | |
264 | if (err != 0) { | |
265 | lderr(cct) << "unable to convert chosen address to string: " << gai_strerror(err) << dendl; | |
266 | return -1; | |
267 | } | |
268 | ||
269 | entity_addr_t addr; | |
270 | const char *end = 0; | |
271 | bool r = addr.parse(buf, &end); | |
272 | if (!r) { | |
273 | return -1; | |
274 | } | |
275 | addrs->v.push_back(addr); | |
276 | return 0; | |
277 | } | |
278 | ||
279 | int pick_addresses( | |
280 | CephContext *cct, | |
281 | unsigned flags, | |
282 | struct ifaddrs *ifa, | |
283 | entity_addrvec_t *addrs, | |
284 | int preferred_numa_node) | |
285 | { | |
286 | addrs->v.clear(); | |
287 | ||
288 | unsigned addrt = (flags & (CEPH_PICK_ADDRESS_PUBLIC | | |
289 | CEPH_PICK_ADDRESS_CLUSTER)); | |
290 | if (addrt == 0 || | |
291 | addrt == (CEPH_PICK_ADDRESS_PUBLIC | | |
292 | CEPH_PICK_ADDRESS_CLUSTER)) { | |
293 | return -EINVAL; | |
294 | } | |
295 | unsigned msgrv = flags & (CEPH_PICK_ADDRESS_MSGR1 | | |
296 | CEPH_PICK_ADDRESS_MSGR2); | |
297 | if (msgrv == 0) { | |
298 | if (cct->_conf.get_val<bool>("ms_bind_msgr1")) { | |
299 | msgrv |= CEPH_PICK_ADDRESS_MSGR1; | |
300 | } | |
301 | if (cct->_conf.get_val<bool>("ms_bind_msgr2")) { | |
302 | msgrv |= CEPH_PICK_ADDRESS_MSGR2; | |
303 | } | |
304 | if (msgrv == 0) { | |
305 | return -EINVAL; | |
306 | } | |
307 | } | |
308 | unsigned ipv = flags & (CEPH_PICK_ADDRESS_IPV4 | | |
309 | CEPH_PICK_ADDRESS_IPV6); | |
310 | if (ipv == 0) { | |
311 | if (cct->_conf.get_val<bool>("ms_bind_ipv4")) { | |
312 | ipv |= CEPH_PICK_ADDRESS_IPV4; | |
313 | } | |
314 | if (cct->_conf.get_val<bool>("ms_bind_ipv6")) { | |
315 | ipv |= CEPH_PICK_ADDRESS_IPV6; | |
316 | } | |
317 | if (ipv == 0) { | |
318 | return -EINVAL; | |
319 | } | |
320 | if (cct->_conf.get_val<bool>("ms_bind_prefer_ipv4")) { | |
321 | flags |= CEPH_PICK_ADDRESS_PREFER_IPV4; | |
322 | } else { | |
323 | flags &= ~CEPH_PICK_ADDRESS_PREFER_IPV4; | |
324 | } | |
325 | } | |
326 | ||
327 | entity_addr_t addr; | |
328 | string networks; | |
329 | string interfaces; | |
330 | if (addrt & CEPH_PICK_ADDRESS_PUBLIC) { | |
331 | addr = cct->_conf.get_val<entity_addr_t>("public_addr"); | |
332 | networks = cct->_conf.get_val<std::string>("public_network"); | |
333 | interfaces = | |
334 | cct->_conf.get_val<std::string>("public_network_interface"); | |
335 | } else { | |
336 | addr = cct->_conf.get_val<entity_addr_t>("cluster_addr"); | |
337 | networks = cct->_conf.get_val<std::string>("cluster_network"); | |
338 | interfaces = | |
339 | cct->_conf.get_val<std::string>("cluster_network_interface"); | |
340 | if (networks.empty()) { | |
341 | lderr(cct) << "Falling back to public interface" << dendl; | |
342 | // fall back to public_ network and interface if cluster is not set | |
343 | networks = cct->_conf.get_val<std::string>("public_network"); | |
344 | interfaces = | |
345 | cct->_conf.get_val<std::string>("public_network_interface"); | |
346 | } | |
347 | } | |
348 | if (addr.is_blank_ip() && | |
349 | !networks.empty()) { | |
350 | int ipv4_r = !(ipv & CEPH_PICK_ADDRESS_IPV4) ? 0 : -1; | |
351 | int ipv6_r = !(ipv & CEPH_PICK_ADDRESS_IPV6) ? 0 : -1; | |
352 | // first try on preferred numa node (if >= 0), then anywhere. | |
353 | while (true) { | |
354 | // note: pass in ipv to filter the matching addresses | |
355 | if ((ipv & CEPH_PICK_ADDRESS_IPV4) && | |
356 | (flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) { | |
357 | ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4, | |
358 | networks, interfaces, addrs, | |
359 | preferred_numa_node); | |
360 | } | |
361 | if (ipv & CEPH_PICK_ADDRESS_IPV6) { | |
362 | ipv6_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV6, | |
363 | networks, interfaces, addrs, | |
364 | preferred_numa_node); | |
365 | } | |
366 | if ((ipv & CEPH_PICK_ADDRESS_IPV4) && | |
367 | !(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) { | |
368 | ipv4_r = fill_in_one_address(cct, ifa, CEPH_PICK_ADDRESS_IPV4, | |
369 | networks, interfaces, addrs, | |
370 | preferred_numa_node); | |
371 | } | |
372 | if (ipv4_r >= 0 && ipv6_r >= 0) { | |
373 | break; | |
374 | } | |
375 | if (preferred_numa_node < 0) { | |
376 | return ipv4_r >= 0 && ipv6_r >= 0 ? 0 : -1; | |
377 | } | |
378 | preferred_numa_node = -1; // try any numa node | |
379 | } | |
380 | } | |
381 | ||
382 | // note: we may have a blank addr here | |
383 | ||
384 | // ipv4 and/or ipv6? | |
385 | if (addrs->v.empty()) { | |
11fdf7f2 TL |
386 | addr.set_type(entity_addr_t::TYPE_MSGR2); |
387 | if ((ipv & CEPH_PICK_ADDRESS_IPV4) && | |
388 | (flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) { | |
389 | addr.set_family(AF_INET); | |
390 | addrs->v.push_back(addr); | |
391 | } | |
392 | if (ipv & CEPH_PICK_ADDRESS_IPV6) { | |
393 | addr.set_family(AF_INET6); | |
394 | addrs->v.push_back(addr); | |
395 | } | |
396 | if ((ipv & CEPH_PICK_ADDRESS_IPV4) && | |
397 | !(flags & CEPH_PICK_ADDRESS_PREFER_IPV4)) { | |
398 | addr.set_family(AF_INET); | |
399 | addrs->v.push_back(addr); | |
400 | } | |
401 | } | |
402 | ||
403 | // msgr2 or legacy or both? | |
404 | if (msgrv == (CEPH_PICK_ADDRESS_MSGR1 | CEPH_PICK_ADDRESS_MSGR2)) { | |
405 | vector<entity_addr_t> v; | |
406 | v.swap(addrs->v); | |
407 | for (auto a : v) { | |
408 | a.set_type(entity_addr_t::TYPE_MSGR2); | |
409 | if (flags & CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS) { | |
410 | a.set_port(CEPH_MON_PORT_IANA); | |
411 | } | |
412 | addrs->v.push_back(a); | |
413 | a.set_type(entity_addr_t::TYPE_LEGACY); | |
414 | if (flags & CEPH_PICK_ADDRESS_DEFAULT_MON_PORTS) { | |
415 | a.set_port(CEPH_MON_PORT_LEGACY); | |
416 | } | |
417 | addrs->v.push_back(a); | |
418 | } | |
419 | } else if (msgrv == CEPH_PICK_ADDRESS_MSGR1) { | |
420 | for (auto& a : addrs->v) { | |
421 | a.set_type(entity_addr_t::TYPE_LEGACY); | |
422 | } | |
423 | } else { | |
424 | for (auto& a : addrs->v) { | |
425 | a.set_type(entity_addr_t::TYPE_MSGR2); | |
426 | } | |
427 | } | |
428 | ||
429 | return 0; | |
430 | } | |
431 | ||
432 | int pick_addresses( | |
433 | CephContext *cct, | |
434 | unsigned flags, | |
435 | entity_addrvec_t *addrs, | |
436 | int preferred_numa_node) | |
437 | { | |
438 | struct ifaddrs *ifa; | |
439 | int r = getifaddrs(&ifa); | |
440 | if (r < 0) { | |
441 | r = -errno; | |
442 | string err = cpp_strerror(r); | |
443 | lderr(cct) << "unable to fetch interfaces and addresses: " | |
444 | << cpp_strerror(r) << dendl; | |
445 | return r; | |
446 | } | |
447 | r = pick_addresses(cct, flags, ifa, addrs, preferred_numa_node); | |
448 | freeifaddrs(ifa); | |
449 | return r; | |
450 | } | |
b5b8bbf5 FG |
451 | |
452 | std::string pick_iface(CephContext *cct, const struct sockaddr_storage &network) | |
453 | { | |
454 | struct ifaddrs *ifa; | |
455 | int r = getifaddrs(&ifa); | |
456 | if (r < 0) { | |
457 | string err = cpp_strerror(errno); | |
458 | lderr(cct) << "unable to fetch interfaces and addresses: " << err << dendl; | |
459 | return {}; | |
460 | } | |
461 | ||
b32b8144 | 462 | const unsigned int prefix_len = max(sizeof(in_addr::s_addr), sizeof(in6_addr::s6_addr)) * CHAR_BIT; |
11fdf7f2 TL |
463 | const struct ifaddrs *found = find_ip_in_subnet( |
464 | ifa, | |
465 | (const struct sockaddr *) &network, prefix_len); | |
b5b8bbf5 FG |
466 | |
467 | std::string result; | |
468 | if (found) { | |
469 | result = found->ifa_name; | |
470 | } | |
471 | ||
472 | freeifaddrs(ifa); | |
473 | ||
474 | return result; | |
475 | } | |
476 | ||
477 | ||
7c673cae FG |
478 | bool have_local_addr(CephContext *cct, const list<entity_addr_t>& ls, entity_addr_t *match) |
479 | { | |
480 | struct ifaddrs *ifa; | |
481 | int r = getifaddrs(&ifa); | |
482 | if (r < 0) { | |
483 | lderr(cct) << "unable to fetch interfaces and addresses: " << cpp_strerror(errno) << dendl; | |
484 | exit(1); | |
485 | } | |
486 | ||
487 | bool found = false; | |
11fdf7f2 | 488 | for (struct ifaddrs *addrs = ifa; addrs != nullptr; addrs = addrs->ifa_next) { |
7c673cae FG |
489 | if (addrs->ifa_addr) { |
490 | entity_addr_t a; | |
491 | a.set_sockaddr(addrs->ifa_addr); | |
11fdf7f2 TL |
492 | for (auto& p : ls) { |
493 | if (a.is_same_host(p)) { | |
494 | *match = p; | |
7c673cae FG |
495 | found = true; |
496 | goto out; | |
497 | } | |
498 | } | |
499 | } | |
500 | } | |
501 | ||
502 | out: | |
503 | freeifaddrs(ifa); | |
504 | return found; | |
505 | } | |
11fdf7f2 TL |
506 | |
507 | int get_iface_numa_node( | |
508 | const std::string& iface, | |
509 | int *node) | |
510 | { | |
511 | string fn = std::string("/sys/class/net/") + iface + "/device/numa_node"; | |
512 | ||
513 | int r = 0; | |
514 | char buf[1024]; | |
515 | char *endptr = 0; | |
516 | int fd = ::open(fn.c_str(), O_RDONLY); | |
517 | if (fd < 0) { | |
518 | return -errno; | |
519 | } | |
520 | r = safe_read(fd, &buf, sizeof(buf)); | |
521 | if (r < 0) { | |
522 | goto out; | |
523 | } | |
524 | buf[r] = 0; | |
525 | while (r > 0 && ::isspace(buf[--r])) { | |
526 | buf[r] = 0; | |
527 | } | |
528 | *node = strtoll(buf, &endptr, 10); | |
529 | if (endptr != buf + strlen(buf)) { | |
530 | r = -EINVAL; | |
531 | goto out; | |
532 | } | |
533 | r = 0; | |
534 | out: | |
535 | ::close(fd); | |
536 | return r; | |
537 | } |