]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/include/seastar/net/ip.hh
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / seastar / include / seastar / net / ip.hh
1 /*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18 /*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 *
21 */
22
23 #pragma once
24
25 #include <boost/asio/ip/address_v4.hpp>
26 #include <arpa/inet.h>
27 #include <unordered_map>
28 #include <cstdint>
29 #include <array>
30 #include <map>
31 #include <list>
32 #include <chrono>
33 #include <seastar/core/array_map.hh>
34 #include <seastar/net/byteorder.hh>
35 #include <seastar/core/byteorder.hh>
36 #include <seastar/net/arp.hh>
37 #include <seastar/net/ip_checksum.hh>
38 #include <seastar/net/const.hh>
39 #include <seastar/net/packet-util.hh>
40 #include <seastar/core/shared_ptr.hh>
41 #include <seastar/net/toeplitz.hh>
42 #include <seastar/net/udp.hh>
43 #include <seastar/core/metrics_registration.hh>
44
45 namespace seastar {
46
47 namespace net {
48
49 class ipv4;
50 template <ip_protocol_num ProtoNum>
51 class ipv4_l4;
52 struct ipv4_address;
53
54 template <typename InetTraits>
55 class tcp;
56
57 struct ipv4_address {
58 ipv4_address() : ip(0) {}
59 explicit ipv4_address(uint32_t ip) : ip(ip) {}
60 explicit ipv4_address(const std::string& addr) {
61 boost::system::error_code ec;
62 auto ipv4 = boost::asio::ip::address_v4::from_string(addr, ec);
63 if (ec) {
64 throw std::runtime_error(format("Wrong format for IPv4 address {}. Please ensure it's in dotted-decimal format",
65 addr));
66 }
67 ip = static_cast<uint32_t>(std::move(ipv4).to_ulong());
68 }
69 ipv4_address(ipv4_addr addr) {
70 ip = addr.ip;
71 }
72
73 packed<uint32_t> ip;
74
75 template <typename Adjuster>
76 auto adjust_endianness(Adjuster a) { return a(ip); }
77
78 friend bool operator==(ipv4_address x, ipv4_address y) {
79 return x.ip == y.ip;
80 }
81 friend bool operator!=(ipv4_address x, ipv4_address y) {
82 return x.ip != y.ip;
83 }
84
85 static ipv4_address read(const char* p) {
86 ipv4_address ia;
87 ia.ip = read_be<uint32_t>(p);
88 return ia;
89 }
90 static ipv4_address consume(const char*& p) {
91 auto ia = read(p);
92 p += 4;
93 return ia;
94 }
95 void write(char* p) const {
96 write_be<uint32_t>(p, ip);
97 }
98 void produce(char*& p) const {
99 produce_be<uint32_t>(p, ip);
100 }
101 static constexpr size_t size() {
102 return 4;
103 }
104 } __attribute__((packed));
105
106 static inline bool is_unspecified(ipv4_address addr) { return addr.ip == 0; }
107
108 std::ostream& operator<<(std::ostream& os, ipv4_address a);
109
110 }
111
112 }
113
114 namespace std {
115
116 template <>
117 struct hash<seastar::net::ipv4_address> {
118 size_t operator()(seastar::net::ipv4_address a) const { return a.ip; }
119 };
120
121 }
122
123 namespace seastar {
124
125 namespace net {
126
127 struct ipv4_traits {
128 using address_type = ipv4_address;
129 using inet_type = ipv4_l4<ip_protocol_num::tcp>;
130 struct l4packet {
131 ipv4_address to;
132 packet p;
133 ethernet_address e_dst;
134 ip_protocol_num proto_num;
135 };
136 using packet_provider_type = std::function<compat::optional<l4packet> ()>;
137 static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
138 csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
139 }
140 static void udp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
141 csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::udp), len);
142 }
143 static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
144 };
145
146 template <ip_protocol_num ProtoNum>
147 class ipv4_l4 {
148 public:
149 ipv4& _inet;
150 public:
151 ipv4_l4(ipv4& inet) : _inet(inet) {}
152 void register_packet_provider(ipv4_traits::packet_provider_type func);
153 future<ethernet_address> get_l2_dst_address(ipv4_address to);
154 };
155
156 class ip_protocol {
157 public:
158 virtual ~ip_protocol() {}
159 virtual void received(packet p, ipv4_address from, ipv4_address to) = 0;
160 virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) { return true; }
161 };
162
163 template <typename InetTraits>
164 struct l4connid {
165 using ipaddr = typename InetTraits::address_type;
166 using inet_type = typename InetTraits::inet_type;
167 struct connid_hash;
168
169 ipaddr local_ip;
170 ipaddr foreign_ip;
171 uint16_t local_port;
172 uint16_t foreign_port;
173
174 bool operator==(const l4connid& x) const {
175 return local_ip == x.local_ip
176 && foreign_ip == x.foreign_ip
177 && local_port == x.local_port
178 && foreign_port == x.foreign_port;
179 }
180
181 uint32_t hash(rss_key_type rss_key) {
182 forward_hash hash_data;
183 hash_data.push_back(hton(foreign_ip.ip));
184 hash_data.push_back(hton(local_ip.ip));
185 hash_data.push_back(hton(foreign_port));
186 hash_data.push_back(hton(local_port));
187 return toeplitz_hash(rss_key, hash_data);
188 }
189 };
190
191 class ipv4_tcp final : public ip_protocol {
192 ipv4_l4<ip_protocol_num::tcp> _inet_l4;
193 std::unique_ptr<tcp<ipv4_traits>> _tcp;
194 public:
195 ipv4_tcp(ipv4& inet);
196 ~ipv4_tcp();
197 virtual void received(packet p, ipv4_address from, ipv4_address to) override;
198 virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
199 friend class ipv4;
200 };
201
202 struct icmp_hdr {
203 enum class msg_type : uint8_t {
204 echo_reply = 0,
205 echo_request = 8,
206 };
207 msg_type type;
208 uint8_t code;
209 packed<uint16_t> csum;
210 packed<uint32_t> rest;
211 template <typename Adjuster>
212 auto adjust_endianness(Adjuster a) {
213 return a(csum);
214 }
215 } __attribute__((packed));
216
217
218 class icmp {
219 public:
220 using ipaddr = ipv4_address;
221 using inet_type = ipv4_l4<ip_protocol_num::icmp>;
222 explicit icmp(inet_type& inet) : _inet(inet) {
223 _inet.register_packet_provider([this] {
224 compat::optional<ipv4_traits::l4packet> l4p;
225 if (!_packetq.empty()) {
226 l4p = std::move(_packetq.front());
227 _packetq.pop_front();
228 _queue_space.signal(l4p.value().p.len());
229 }
230 return l4p;
231 });
232 }
233 void received(packet p, ipaddr from, ipaddr to);
234 private:
235 inet_type& _inet;
236 circular_buffer<ipv4_traits::l4packet> _packetq;
237 semaphore _queue_space = {212992};
238 };
239
240 class ipv4_icmp final : public ip_protocol {
241 ipv4_l4<ip_protocol_num::icmp> _inet_l4;
242 icmp _icmp;
243 public:
244 ipv4_icmp(ipv4& inet) : _inet_l4(inet), _icmp(_inet_l4) {}
245 virtual void received(packet p, ipv4_address from, ipv4_address to) {
246 _icmp.received(std::move(p), from, to);
247 }
248 friend class ipv4;
249 };
250
251 class ipv4_udp : public ip_protocol {
252 using connid = l4connid<ipv4_traits>;
253 using connid_hash = typename connid::connid_hash;
254
255 public:
256 static const int default_queue_size;
257 private:
258 static const uint16_t min_anonymous_port = 32768;
259 ipv4 &_inet;
260 std::unordered_map<uint16_t, lw_shared_ptr<udp_channel_state>> _channels;
261 int _queue_size = default_queue_size;
262 uint16_t _next_anonymous_port = min_anonymous_port;
263 circular_buffer<ipv4_traits::l4packet> _packetq;
264 private:
265 uint16_t next_port(uint16_t port);
266 public:
267 class registration {
268 private:
269 ipv4_udp &_proto;
270 uint16_t _port;
271 public:
272 registration(ipv4_udp &proto, uint16_t port) : _proto(proto), _port(port) {};
273
274 void unregister() {
275 _proto._channels.erase(_proto._channels.find(_port));
276 }
277
278 uint16_t port() const {
279 return _port;
280 }
281 };
282
283 ipv4_udp(ipv4& inet);
284 udp_channel make_channel(ipv4_addr addr);
285 virtual void received(packet p, ipv4_address from, ipv4_address to) override;
286 void send(uint16_t src_port, ipv4_addr dst, packet &&p);
287 bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
288 void set_queue_size(int size) { _queue_size = size; }
289 };
290
291 struct ip_hdr;
292
293 struct ip_packet_filter {
294 virtual ~ip_packet_filter() {};
295 virtual future<> handle(packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
296 };
297
298 struct ipv4_frag_id {
299 struct hash;
300 ipv4_address src_ip;
301 ipv4_address dst_ip;
302 uint16_t identification;
303 uint8_t protocol;
304 bool operator==(const ipv4_frag_id& x) const {
305 return src_ip == x.src_ip &&
306 dst_ip == x.dst_ip &&
307 identification == x.identification &&
308 protocol == x.protocol;
309 }
310 };
311
312 struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
313 private std::hash<uint16_t>, private std::hash<uint8_t> {
314 size_t operator()(const ipv4_frag_id& id) const noexcept {
315 using h1 = std::hash<ipv4_address>;
316 using h2 = std::hash<uint16_t>;
317 using h3 = std::hash<uint8_t>;
318 return h1::operator()(id.src_ip) ^
319 h1::operator()(id.dst_ip) ^
320 h2::operator()(id.identification) ^
321 h3::operator()(id.protocol);
322 }
323 };
324
325 struct ipv4_tag {};
326 using ipv4_packet_merger = packet_merger<uint32_t, ipv4_tag>;
327
328 class ipv4 {
329 public:
330 using clock_type = lowres_clock;
331 using address_type = ipv4_address;
332 using proto_type = uint16_t;
333 static address_type broadcast_address() { return ipv4_address(0xffffffff); }
334 static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
335 private:
336 interface* _netif;
337 std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
338 arp _global_arp;
339 arp_for<ipv4> _arp;
340 ipv4_address _host_address;
341 ipv4_address _gw_address;
342 ipv4_address _netmask;
343 l3_protocol _l3;
344 subscription<packet, ethernet_address> _rx_packets;
345 ipv4_tcp _tcp;
346 ipv4_icmp _icmp;
347 ipv4_udp _udp;
348 array_map<ip_protocol*, 256> _l4;
349 ip_packet_filter * _packet_filter = nullptr;
350 struct frag {
351 packet header;
352 ipv4_packet_merger data;
353 clock_type::time_point rx_time;
354 uint32_t mem_size = 0;
355 // fragment with MF == 0 inidates it is the last fragment
356 bool last_frag_received = false;
357
358 packet get_assembled_packet(ethernet_address from, ethernet_address to);
359 int32_t merge(ip_hdr &h, uint16_t offset, packet p);
360 bool is_complete();
361 };
362 std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
363 std::list<ipv4_frag_id> _frags_age;
364 static constexpr std::chrono::seconds _frag_timeout{30};
365 static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
366 static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
367 uint32_t _frag_mem{0};
368 timer<lowres_clock> _frag_timer;
369 circular_buffer<l3_protocol::l3packet> _packetq;
370 unsigned _pkt_provider_idx = 0;
371 metrics::metric_groups _metrics;
372 private:
373 future<> handle_received_packet(packet p, ethernet_address from);
374 bool forward(forward_hash& out_hash_data, packet& p, size_t off);
375 compat::optional<l3_protocol::l3packet> get_packet();
376 bool in_my_netmask(ipv4_address a) const;
377 void frag_limit_mem();
378 void frag_timeout();
379 void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size);
380 void frag_arm(clock_type::time_point now) {
381 auto tp = now + _frag_timeout;
382 _frag_timer.arm(tp);
383 }
384 void frag_arm() {
385 auto now = clock_type::now();
386 frag_arm(now);
387 }
388 public:
389 explicit ipv4(interface* netif);
390 void set_host_address(ipv4_address ip);
391 ipv4_address host_address();
392 void set_gw_address(ipv4_address ip);
393 ipv4_address gw_address() const;
394 void set_netmask_address(ipv4_address ip);
395 ipv4_address netmask_address() const;
396 interface * netif() const {
397 return _netif;
398 }
399 // TODO or something. Should perhaps truly be a list
400 // of filters. With ordering. And blackjack. Etc.
401 // But for now, a simple single raw pointer suffices
402 void set_packet_filter(ip_packet_filter *);
403 ip_packet_filter * packet_filter() const;
404 void send(ipv4_address to, ip_protocol_num proto_num, packet p, ethernet_address e_dst);
405 tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
406 ipv4_udp& get_udp() { return _udp; }
407 void register_l4(proto_type id, ip_protocol* handler);
408 const net::hw_features& hw_features() const { return _netif->hw_features(); }
409 static bool needs_frag(packet& p, ip_protocol_num proto_num, net::hw_features hw_features);
410 void learn(ethernet_address l2, ipv4_address l3) {
411 _arp.learn(l2, l3);
412 }
413 void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
414 _pkt_providers.push_back(std::move(func));
415 }
416 future<ethernet_address> get_l2_dst_address(ipv4_address to);
417 };
418
419 template <ip_protocol_num ProtoNum>
420 inline
421 void ipv4_l4<ProtoNum>::register_packet_provider(ipv4_traits::packet_provider_type func) {
422 _inet.register_packet_provider([func = std::move(func)] {
423 auto l4p = func();
424 if (l4p) {
425 l4p.value().proto_num = ProtoNum;
426 }
427 return l4p;
428 });
429 }
430
431 template <ip_protocol_num ProtoNum>
432 inline
433 future<ethernet_address> ipv4_l4<ProtoNum>::get_l2_dst_address(ipv4_address to) {
434 return _inet.get_l2_dst_address(to);
435 }
436
437 struct ip_hdr {
438 uint8_t ihl : 4;
439 uint8_t ver : 4;
440 uint8_t dscp : 6;
441 uint8_t ecn : 2;
442 packed<uint16_t> len;
443 packed<uint16_t> id;
444 packed<uint16_t> frag;
445 enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
446 uint8_t ttl;
447 uint8_t ip_proto;
448 packed<uint16_t> csum;
449 ipv4_address src_ip;
450 ipv4_address dst_ip;
451 uint8_t options[0];
452 template <typename Adjuster>
453 auto adjust_endianness(Adjuster a) {
454 return a(len, id, frag, csum, src_ip, dst_ip);
455 }
456 bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
457 bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
458 uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
459 } __attribute__((packed));
460
461 template <typename InetTraits>
462 struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
463 size_t operator()(const l4connid<InetTraits>& id) const noexcept {
464 using h1 = std::hash<ipaddr>;
465 using h2 = std::hash<uint16_t>;
466 return h1::operator()(id.local_ip)
467 ^ h1::operator()(id.foreign_ip)
468 ^ h2::operator()(id.local_port)
469 ^ h2::operator()(id.foreign_port);
470 }
471 };
472
473 void arp_learn(ethernet_address l2, ipv4_address l3);
474
475 }
476
477 }