]> git.proxmox.com Git - ceph.git/blame - ceph/src/msg/async/dpdk/IP.h
import 15.2.0 Octopus source
[ceph.git] / ceph / src / msg / async / dpdk / IP.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2/*
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
7 *
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19/*
20 * Copyright (C) 2014 Cloudius Systems, Ltd.
21 *
22 */
7c673cae
FG
23
24#ifndef CEPH_MSG_IP_H_
25#define CEPH_MSG_IP_H_
26
27#include <arpa/inet.h>
28#include <unordered_map>
29#include <cstdint>
30#include <array>
31#include <map>
32#include <list>
33#include <chrono>
34
35#include "msg/async/Event.h"
36#include "common/Throttle.h"
37
38#include "array_map.h"
39#include "ARP.h"
40#include "IPChecksum.h"
41#include "ip_types.h"
42#include "const.h"
43#include "net.h"
44#include "PacketUtil.h"
45#include "toeplitz.h"
46
47class ipv4;
48template <ip_protocol_num ProtoNum>
49class ipv4_l4;
50
51template <typename InetTraits>
52class tcp;
53
54struct ipv4_traits {
55 using address_type = ipv4_address;
56 using inet_type = ipv4_l4<ip_protocol_num::tcp>;
57 struct l4packet {
58 ipv4_address to;
59 Packet p;
60 ethernet_address e_dst;
61 ip_protocol_num proto_num;
62 };
63 using packet_provider_type = std::function<Tub<l4packet> ()>;
64 static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
65 csum.sum_many(src.ip, dst.ip, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
66 }
67 static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
68};
69
70template <ip_protocol_num ProtoNum>
71class ipv4_l4 {
72 public:
73 ipv4& _inet;
74 public:
75 ipv4_l4(ipv4& inet) : _inet(inet) {}
76 void register_packet_provider(ipv4_traits::packet_provider_type func);
77 void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb);
78};
79
80class ip_protocol {
81 public:
82 virtual ~ip_protocol() {}
83 virtual void received(Packet p, ipv4_address from, ipv4_address to) = 0;
84 virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) { return true; }
85};
86
87template <typename InetTraits>
88struct l4connid {
89 using ipaddr = typename InetTraits::address_type;
90 using inet_type = typename InetTraits::inet_type;
91 struct connid_hash;
92
93 ipaddr local_ip;
94 ipaddr foreign_ip;
95 uint16_t local_port;
96 uint16_t foreign_port;
97
98 bool operator==(const l4connid& x) const {
99 return local_ip == x.local_ip
100 && foreign_ip == x.foreign_ip
101 && local_port == x.local_port
102 && foreign_port == x.foreign_port;
103 }
104
105 uint32_t hash(const rss_key_type& rss_key) {
106 forward_hash hash_data;
107 hash_data.push_back(hton(foreign_ip.ip));
108 hash_data.push_back(hton(local_ip.ip));
109 hash_data.push_back(hton(foreign_port));
110 hash_data.push_back(hton(local_port));
111 return toeplitz_hash(rss_key, hash_data);
112 }
113};
114
115class ipv4_tcp final : public ip_protocol {
116 ipv4_l4<ip_protocol_num::tcp> _inet_l4;
117 std::unique_ptr<tcp<ipv4_traits>> _tcp;
118 public:
119 ipv4_tcp(ipv4& inet, EventCenter *c);
120 ~ipv4_tcp();
11fdf7f2 121 virtual void received(Packet p, ipv4_address from, ipv4_address to) override;
7c673cae
FG
122 virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) override;
123 friend class ipv4;
124};
125
7c673cae
FG
126
127class icmp {
128 public:
129 using ipaddr = ipv4_address;
130 using inet_type = ipv4_l4<ip_protocol_num::icmp>;
131 explicit icmp(CephContext *c, inet_type& inet)
132 : cct(c), _inet(inet), _queue_space(c, "DPDK::icmp::_queue_space", 212992) {
133 _inet.register_packet_provider([this] {
134 Tub<ipv4_traits::l4packet> l4p;
135 if (!_packetq.empty()) {
136 l4p = std::move(_packetq.front());
137 _packetq.pop_front();
138 _queue_space.put(l4p->p.len());
139 }
140 return l4p;
141 });
142 }
143 void received(Packet p, ipaddr from, ipaddr to);
144
145 private:
146 CephContext *cct;
147 // ipv4_l4<ip_protocol_num::icmp>
148 inet_type& _inet;
149 circular_buffer<ipv4_traits::l4packet> _packetq;
150 Throttle _queue_space;
151};
152
153class ipv4_icmp final : public ip_protocol {
154 CephContext *cct;
155 ipv4_l4<ip_protocol_num::icmp> _inet_l4;
156 icmp _icmp;
157 public:
158 ipv4_icmp(CephContext *c, ipv4& inet) : cct(c), _inet_l4(inet), _icmp(c, _inet_l4) {}
11fdf7f2 159 virtual void received(Packet p, ipv4_address from, ipv4_address to) override {
7c673cae
FG
160 _icmp.received(std::move(p), from, to);
161 }
162 friend class ipv4;
163};
164
165struct ip_hdr;
166
167struct ip_packet_filter {
168 virtual ~ip_packet_filter() {};
169 virtual void handle(Packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
170};
171
172struct ipv4_frag_id {
173 struct hash;
174 ipv4_address src_ip;
175 ipv4_address dst_ip;
176 uint16_t identification;
177 uint8_t protocol;
178 bool operator==(const ipv4_frag_id& x) const {
179 return src_ip == x.src_ip &&
180 dst_ip == x.dst_ip &&
181 identification == x.identification &&
182 protocol == x.protocol;
183 }
184};
185
186struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
187 private std::hash<uint16_t>, private std::hash<uint8_t> {
188 size_t operator()(const ipv4_frag_id& id) const noexcept {
189 using h1 = std::hash<ipv4_address>;
190 using h2 = std::hash<uint16_t>;
191 using h3 = std::hash<uint8_t>;
192 return h1::operator()(id.src_ip) ^
193 h1::operator()(id.dst_ip) ^
194 h2::operator()(id.identification) ^
195 h3::operator()(id.protocol);
196 }
197};
198
199struct ipv4_tag {};
200using ipv4_packet_merger = packet_merger<uint32_t, ipv4_tag>;
201
202class interface;
203
204class ipv4 {
205 public:
206 using address_type = ipv4_address;
207 using proto_type = uint16_t;
208 static address_type broadcast_address() { return ipv4_address(0xffffffff); }
209 static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
210 CephContext *cct;
211 EventCenter *center;
212
213 private:
214 interface* _netif;
215 std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
216 Tub<uint64_t> frag_timefd;
217 EventCallbackRef frag_handler;
218 arp _global_arp;
219 arp_for<ipv4> _arp;
220 ipv4_address _host_address;
221 ipv4_address _gw_address;
222 ipv4_address _netmask;
223 l3_protocol _l3;
224 subscription<Packet, ethernet_address> _rx_packets;
225 ipv4_tcp _tcp;
226 ipv4_icmp _icmp;
227 array_map<ip_protocol*, 256> _l4;
228 ip_packet_filter *_packet_filter;
229 struct frag {
230 Packet header;
231 ipv4_packet_merger data;
232 utime_t rx_time;
233 uint32_t mem_size = 0;
234 // fragment with MF == 0 inidates it is the last fragment
235 bool last_frag_received = false;
236
237 Packet get_assembled_packet(ethernet_address from, ethernet_address to);
238 int32_t merge(ip_hdr &h, uint16_t offset, Packet p);
239 bool is_complete();
240 };
241 std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
242 std::list<ipv4_frag_id> _frags_age;
243 static utime_t _frag_timeout;
244 static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
245 static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
246 uint32_t _frag_mem = 0;
247 circular_buffer<l3_protocol::l3packet> _packetq;
248 unsigned _pkt_provider_idx = 0;
249 PerfCounters *perf_logger;
250
251 private:
252 int handle_received_packet(Packet p, ethernet_address from);
253 bool forward(forward_hash& out_hash_data, Packet& p, size_t off);
254 Tub<l3_protocol::l3packet> get_packet();
255 bool in_my_netmask(ipv4_address a) const {
256 return !((a.ip ^ _host_address.ip) & _netmask.ip);
257 }
258 void frag_limit_mem();
259 void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size) {
260 _frags.erase(frag_id);
261 _frag_mem -= dropped_size;
262 }
263 void frag_arm(utime_t now) {
264 auto tp = now + _frag_timeout;
265 frag_timefd.construct(center->create_time_event(tp.to_nsec() / 1000, frag_handler));
266 }
267 void frag_arm() {
268 auto now = ceph_clock_now();
269 frag_timefd.construct(center->create_time_event(now.to_nsec() / 1000, frag_handler));
270 }
271
272 public:
273 void frag_timeout();
274
275 public:
276 explicit ipv4(CephContext *c, EventCenter *cen, interface* netif);
277 ~ipv4() {
278 delete frag_handler;
279 }
280 void set_host_address(ipv4_address ip) {
281 _host_address = ip;
282 _arp.set_self_addr(ip);
283 }
284 ipv4_address host_address() {
285 return _host_address;
286 }
287 void set_gw_address(ipv4_address ip) {
288 _gw_address = ip;
289 }
290 ipv4_address gw_address() const {
291 return _gw_address;
292 }
293 void set_netmask_address(ipv4_address ip) {
294 _netmask = ip;
295 }
296 ipv4_address netmask_address() const {
297 return _netmask;
298 }
299 interface *netif() const {
300 return _netif;
301 }
302 // TODO or something. Should perhaps truly be a list
303 // of filters. With ordering. And blackjack. Etc.
304 // But for now, a simple single raw pointer suffices
305 void set_packet_filter(ip_packet_filter *f) {
306 _packet_filter = f;
307 }
308 ip_packet_filter * packet_filter() const {
309 return _packet_filter;
310 }
311 void send(ipv4_address to, ip_protocol_num proto_num, Packet p, ethernet_address e_dst);
312 tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
313 void register_l4(proto_type id, ip_protocol* handler);
314 const hw_features& get_hw_features() const;
315 static bool needs_frag(Packet& p, ip_protocol_num proto_num, hw_features hw_features) {
316 if (p.len() + ipv4_hdr_len_min <= hw_features.mtu)
317 return false;
318
319 if ((proto_num == ip_protocol_num::tcp && hw_features.tx_tso))
320 return false;
321
322 return true;
323 }
324 void learn(ethernet_address l2, ipv4_address l3) {
325 _arp.learn(l2, l3);
326 }
327 void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
328 _pkt_providers.push_back(std::move(func));
329 }
330 void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb);
331};
332
333template <ip_protocol_num ProtoNum>
334inline void ipv4_l4<ProtoNum>::register_packet_provider(
335 ipv4_traits::packet_provider_type func) {
336 _inet.register_packet_provider([func] {
337 auto l4p = func();
338 if (l4p) {
339 (*l4p).proto_num = ProtoNum;
340 }
341 return l4p;
342 });
343}
344
345template <ip_protocol_num ProtoNum>
346inline void ipv4_l4<ProtoNum>::wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb) {
347 _inet.wait_l2_dst_address(to, std::move(p), std::move(cb));
348}
349
350struct ip_hdr {
351 uint8_t ihl : 4;
352 uint8_t ver : 4;
353 uint8_t dscp : 6;
354 uint8_t ecn : 2;
355 uint16_t len;
356 uint16_t id;
357 uint16_t frag;
358 enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
359 uint8_t ttl;
360 uint8_t ip_proto;
361 uint16_t csum;
362 ipv4_address src_ip;
363 ipv4_address dst_ip;
364 uint8_t options[0];
365 ip_hdr hton() {
366 ip_hdr hdr = *this;
367 hdr.len = ::hton(len);
368 hdr.id = ::hton(id);
369 hdr.frag = ::hton(frag);
370 hdr.csum = ::hton(csum);
371 hdr.src_ip.ip = ::hton(src_ip.ip);
372 hdr.dst_ip.ip = ::hton(dst_ip.ip);
373 return hdr;
374 }
375 ip_hdr ntoh() {
376 ip_hdr hdr = *this;
377 hdr.len = ::ntoh(len);
378 hdr.id = ::ntoh(id);
379 hdr.frag = ::ntoh(frag);
380 hdr.csum = ::ntoh(csum);
381 hdr.src_ip = src_ip.ntoh();
382 hdr.dst_ip = dst_ip.ntoh();
383 return hdr;
384 }
385
386 bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
387 bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
388 uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
389} __attribute__((packed));
390
391template <typename InetTraits>
392struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
393 size_t operator()(const l4connid<InetTraits>& id) const noexcept {
394 using h1 = std::hash<ipaddr>;
395 using h2 = std::hash<uint16_t>;
396 return h1::operator()(id.local_ip)
397 ^ h1::operator()(id.foreign_ip)
398 ^ h2::operator()(id.local_port)
399 ^ h2::operator()(id.foreign_port);
400 }
401};
402
403#endif /* CEPH_MSG_IP_H */