]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | /* | |
3 | * This file is open source software, licensed to you under the terms | |
4 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
5 | * distributed with this work for additional information regarding copyright | |
6 | * ownership. You may not use this file except in compliance with the License. | |
7 | * | |
8 | * You may obtain a copy of the License at | |
9 | * | |
10 | * http://www.apache.org/licenses/LICENSE-2.0 | |
11 | * | |
12 | * Unless required by applicable law or agreed to in writing, | |
13 | * software distributed under the License is distributed on an | |
14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
15 | * KIND, either express or implied. See the License for the | |
16 | * specific language governing permissions and limitations | |
17 | * under the License. | |
18 | */ | |
19 | /* | |
20 | * Copyright (C) 2014 Cloudius Systems, Ltd. | |
21 | * | |
22 | */ | |
7c673cae FG |
23 | |
24 | #ifndef CEPH_MSG_IP_H_ | |
25 | #define CEPH_MSG_IP_H_ | |
26 | ||
27 | #include <arpa/inet.h> | |
28 | #include <unordered_map> | |
29 | #include <cstdint> | |
30 | #include <array> | |
31 | #include <map> | |
32 | #include <list> | |
33 | #include <chrono> | |
34 | ||
35 | #include "msg/async/Event.h" | |
36 | #include "common/Throttle.h" | |
37 | ||
38 | #include "array_map.h" | |
39 | #include "ARP.h" | |
40 | #include "IPChecksum.h" | |
41 | #include "ip_types.h" | |
42 | #include "const.h" | |
43 | #include "net.h" | |
44 | #include "PacketUtil.h" | |
45 | #include "toeplitz.h" | |
46 | ||
47 | class ipv4; | |
48 | template <ip_protocol_num ProtoNum> | |
49 | class ipv4_l4; | |
50 | ||
51 | template <typename InetTraits> | |
52 | class tcp; | |
53 | ||
54 | struct ipv4_traits { | |
55 | using address_type = ipv4_address; | |
56 | using inet_type = ipv4_l4<ip_protocol_num::tcp>; | |
57 | struct l4packet { | |
58 | ipv4_address to; | |
59 | Packet p; | |
60 | ethernet_address e_dst; | |
61 | ip_protocol_num proto_num; | |
62 | }; | |
63 | using packet_provider_type = std::function<Tub<l4packet> ()>; | |
64 | static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) { | |
65 | csum.sum_many(src.ip, dst.ip, uint8_t(0), uint8_t(ip_protocol_num::tcp), len); | |
66 | } | |
67 | static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min; | |
68 | }; | |
69 | ||
70 | template <ip_protocol_num ProtoNum> | |
71 | class ipv4_l4 { | |
72 | public: | |
73 | ipv4& _inet; | |
74 | public: | |
75 | ipv4_l4(ipv4& inet) : _inet(inet) {} | |
76 | void register_packet_provider(ipv4_traits::packet_provider_type func); | |
77 | void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb); | |
78 | }; | |
79 | ||
80 | class ip_protocol { | |
81 | public: | |
82 | virtual ~ip_protocol() {} | |
83 | virtual void received(Packet p, ipv4_address from, ipv4_address to) = 0; | |
84 | virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) { return true; } | |
85 | }; | |
86 | ||
87 | template <typename InetTraits> | |
88 | struct l4connid { | |
89 | using ipaddr = typename InetTraits::address_type; | |
90 | using inet_type = typename InetTraits::inet_type; | |
91 | struct connid_hash; | |
92 | ||
93 | ipaddr local_ip; | |
94 | ipaddr foreign_ip; | |
95 | uint16_t local_port; | |
96 | uint16_t foreign_port; | |
97 | ||
98 | bool operator==(const l4connid& x) const { | |
99 | return local_ip == x.local_ip | |
100 | && foreign_ip == x.foreign_ip | |
101 | && local_port == x.local_port | |
102 | && foreign_port == x.foreign_port; | |
103 | } | |
104 | ||
105 | uint32_t hash(const rss_key_type& rss_key) { | |
106 | forward_hash hash_data; | |
107 | hash_data.push_back(hton(foreign_ip.ip)); | |
108 | hash_data.push_back(hton(local_ip.ip)); | |
109 | hash_data.push_back(hton(foreign_port)); | |
110 | hash_data.push_back(hton(local_port)); | |
111 | return toeplitz_hash(rss_key, hash_data); | |
112 | } | |
113 | }; | |
114 | ||
115 | class ipv4_tcp final : public ip_protocol { | |
116 | ipv4_l4<ip_protocol_num::tcp> _inet_l4; | |
117 | std::unique_ptr<tcp<ipv4_traits>> _tcp; | |
118 | public: | |
119 | ipv4_tcp(ipv4& inet, EventCenter *c); | |
120 | ~ipv4_tcp(); | |
11fdf7f2 | 121 | virtual void received(Packet p, ipv4_address from, ipv4_address to) override; |
7c673cae FG |
122 | virtual bool forward(forward_hash& out_hash_data, Packet& p, size_t off) override; |
123 | friend class ipv4; | |
124 | }; | |
125 | ||
7c673cae FG |
126 | |
127 | class icmp { | |
128 | public: | |
129 | using ipaddr = ipv4_address; | |
130 | using inet_type = ipv4_l4<ip_protocol_num::icmp>; | |
131 | explicit icmp(CephContext *c, inet_type& inet) | |
132 | : cct(c), _inet(inet), _queue_space(c, "DPDK::icmp::_queue_space", 212992) { | |
133 | _inet.register_packet_provider([this] { | |
134 | Tub<ipv4_traits::l4packet> l4p; | |
135 | if (!_packetq.empty()) { | |
136 | l4p = std::move(_packetq.front()); | |
137 | _packetq.pop_front(); | |
138 | _queue_space.put(l4p->p.len()); | |
139 | } | |
140 | return l4p; | |
141 | }); | |
142 | } | |
143 | void received(Packet p, ipaddr from, ipaddr to); | |
144 | ||
145 | private: | |
146 | CephContext *cct; | |
147 | // ipv4_l4<ip_protocol_num::icmp> | |
148 | inet_type& _inet; | |
149 | circular_buffer<ipv4_traits::l4packet> _packetq; | |
150 | Throttle _queue_space; | |
151 | }; | |
152 | ||
153 | class ipv4_icmp final : public ip_protocol { | |
154 | CephContext *cct; | |
155 | ipv4_l4<ip_protocol_num::icmp> _inet_l4; | |
156 | icmp _icmp; | |
157 | public: | |
158 | ipv4_icmp(CephContext *c, ipv4& inet) : cct(c), _inet_l4(inet), _icmp(c, _inet_l4) {} | |
11fdf7f2 | 159 | virtual void received(Packet p, ipv4_address from, ipv4_address to) override { |
7c673cae FG |
160 | _icmp.received(std::move(p), from, to); |
161 | } | |
162 | friend class ipv4; | |
163 | }; | |
164 | ||
165 | struct ip_hdr; | |
166 | ||
167 | struct ip_packet_filter { | |
168 | virtual ~ip_packet_filter() {}; | |
169 | virtual void handle(Packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0; | |
170 | }; | |
171 | ||
172 | struct ipv4_frag_id { | |
173 | struct hash; | |
174 | ipv4_address src_ip; | |
175 | ipv4_address dst_ip; | |
176 | uint16_t identification; | |
177 | uint8_t protocol; | |
178 | bool operator==(const ipv4_frag_id& x) const { | |
179 | return src_ip == x.src_ip && | |
180 | dst_ip == x.dst_ip && | |
181 | identification == x.identification && | |
182 | protocol == x.protocol; | |
183 | } | |
184 | }; | |
185 | ||
186 | struct ipv4_frag_id::hash : private std::hash<ipv4_address>, | |
187 | private std::hash<uint16_t>, private std::hash<uint8_t> { | |
188 | size_t operator()(const ipv4_frag_id& id) const noexcept { | |
189 | using h1 = std::hash<ipv4_address>; | |
190 | using h2 = std::hash<uint16_t>; | |
191 | using h3 = std::hash<uint8_t>; | |
192 | return h1::operator()(id.src_ip) ^ | |
193 | h1::operator()(id.dst_ip) ^ | |
194 | h2::operator()(id.identification) ^ | |
195 | h3::operator()(id.protocol); | |
196 | } | |
197 | }; | |
198 | ||
199 | struct ipv4_tag {}; | |
200 | using ipv4_packet_merger = packet_merger<uint32_t, ipv4_tag>; | |
201 | ||
202 | class interface; | |
203 | ||
204 | class ipv4 { | |
205 | public: | |
206 | using address_type = ipv4_address; | |
207 | using proto_type = uint16_t; | |
208 | static address_type broadcast_address() { return ipv4_address(0xffffffff); } | |
209 | static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); } | |
210 | CephContext *cct; | |
211 | EventCenter *center; | |
212 | ||
213 | private: | |
214 | interface* _netif; | |
215 | std::vector<ipv4_traits::packet_provider_type> _pkt_providers; | |
216 | Tub<uint64_t> frag_timefd; | |
217 | EventCallbackRef frag_handler; | |
218 | arp _global_arp; | |
219 | arp_for<ipv4> _arp; | |
220 | ipv4_address _host_address; | |
221 | ipv4_address _gw_address; | |
222 | ipv4_address _netmask; | |
223 | l3_protocol _l3; | |
224 | subscription<Packet, ethernet_address> _rx_packets; | |
225 | ipv4_tcp _tcp; | |
226 | ipv4_icmp _icmp; | |
227 | array_map<ip_protocol*, 256> _l4; | |
228 | ip_packet_filter *_packet_filter; | |
229 | struct frag { | |
230 | Packet header; | |
231 | ipv4_packet_merger data; | |
232 | utime_t rx_time; | |
233 | uint32_t mem_size = 0; | |
234 | // fragment with MF == 0 inidates it is the last fragment | |
235 | bool last_frag_received = false; | |
236 | ||
237 | Packet get_assembled_packet(ethernet_address from, ethernet_address to); | |
238 | int32_t merge(ip_hdr &h, uint16_t offset, Packet p); | |
239 | bool is_complete(); | |
240 | }; | |
241 | std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags; | |
242 | std::list<ipv4_frag_id> _frags_age; | |
243 | static utime_t _frag_timeout; | |
244 | static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024}; | |
245 | static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024}; | |
246 | uint32_t _frag_mem = 0; | |
247 | circular_buffer<l3_protocol::l3packet> _packetq; | |
248 | unsigned _pkt_provider_idx = 0; | |
249 | PerfCounters *perf_logger; | |
250 | ||
251 | private: | |
252 | int handle_received_packet(Packet p, ethernet_address from); | |
253 | bool forward(forward_hash& out_hash_data, Packet& p, size_t off); | |
254 | Tub<l3_protocol::l3packet> get_packet(); | |
255 | bool in_my_netmask(ipv4_address a) const { | |
256 | return !((a.ip ^ _host_address.ip) & _netmask.ip); | |
257 | } | |
258 | void frag_limit_mem(); | |
259 | void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size) { | |
260 | _frags.erase(frag_id); | |
261 | _frag_mem -= dropped_size; | |
262 | } | |
263 | void frag_arm(utime_t now) { | |
264 | auto tp = now + _frag_timeout; | |
265 | frag_timefd.construct(center->create_time_event(tp.to_nsec() / 1000, frag_handler)); | |
266 | } | |
267 | void frag_arm() { | |
268 | auto now = ceph_clock_now(); | |
269 | frag_timefd.construct(center->create_time_event(now.to_nsec() / 1000, frag_handler)); | |
270 | } | |
271 | ||
272 | public: | |
273 | void frag_timeout(); | |
274 | ||
275 | public: | |
276 | explicit ipv4(CephContext *c, EventCenter *cen, interface* netif); | |
277 | ~ipv4() { | |
278 | delete frag_handler; | |
279 | } | |
280 | void set_host_address(ipv4_address ip) { | |
281 | _host_address = ip; | |
282 | _arp.set_self_addr(ip); | |
283 | } | |
284 | ipv4_address host_address() { | |
285 | return _host_address; | |
286 | } | |
287 | void set_gw_address(ipv4_address ip) { | |
288 | _gw_address = ip; | |
289 | } | |
290 | ipv4_address gw_address() const { | |
291 | return _gw_address; | |
292 | } | |
293 | void set_netmask_address(ipv4_address ip) { | |
294 | _netmask = ip; | |
295 | } | |
296 | ipv4_address netmask_address() const { | |
297 | return _netmask; | |
298 | } | |
299 | interface *netif() const { | |
300 | return _netif; | |
301 | } | |
302 | // TODO or something. Should perhaps truly be a list | |
303 | // of filters. With ordering. And blackjack. Etc. | |
304 | // But for now, a simple single raw pointer suffices | |
305 | void set_packet_filter(ip_packet_filter *f) { | |
306 | _packet_filter = f; | |
307 | } | |
308 | ip_packet_filter * packet_filter() const { | |
309 | return _packet_filter; | |
310 | } | |
311 | void send(ipv4_address to, ip_protocol_num proto_num, Packet p, ethernet_address e_dst); | |
312 | tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; } | |
313 | void register_l4(proto_type id, ip_protocol* handler); | |
314 | const hw_features& get_hw_features() const; | |
315 | static bool needs_frag(Packet& p, ip_protocol_num proto_num, hw_features hw_features) { | |
316 | if (p.len() + ipv4_hdr_len_min <= hw_features.mtu) | |
317 | return false; | |
318 | ||
319 | if ((proto_num == ip_protocol_num::tcp && hw_features.tx_tso)) | |
320 | return false; | |
321 | ||
322 | return true; | |
323 | } | |
324 | void learn(ethernet_address l2, ipv4_address l3) { | |
325 | _arp.learn(l2, l3); | |
326 | } | |
327 | void register_packet_provider(ipv4_traits::packet_provider_type&& func) { | |
328 | _pkt_providers.push_back(std::move(func)); | |
329 | } | |
330 | void wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb); | |
331 | }; | |
332 | ||
333 | template <ip_protocol_num ProtoNum> | |
334 | inline void ipv4_l4<ProtoNum>::register_packet_provider( | |
335 | ipv4_traits::packet_provider_type func) { | |
336 | _inet.register_packet_provider([func] { | |
337 | auto l4p = func(); | |
338 | if (l4p) { | |
339 | (*l4p).proto_num = ProtoNum; | |
340 | } | |
341 | return l4p; | |
342 | }); | |
343 | } | |
344 | ||
345 | template <ip_protocol_num ProtoNum> | |
346 | inline void ipv4_l4<ProtoNum>::wait_l2_dst_address(ipv4_address to, Packet p, resolution_cb cb) { | |
347 | _inet.wait_l2_dst_address(to, std::move(p), std::move(cb)); | |
348 | } | |
349 | ||
350 | struct ip_hdr { | |
351 | uint8_t ihl : 4; | |
352 | uint8_t ver : 4; | |
353 | uint8_t dscp : 6; | |
354 | uint8_t ecn : 2; | |
355 | uint16_t len; | |
356 | uint16_t id; | |
357 | uint16_t frag; | |
358 | enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 }; | |
359 | uint8_t ttl; | |
360 | uint8_t ip_proto; | |
361 | uint16_t csum; | |
362 | ipv4_address src_ip; | |
363 | ipv4_address dst_ip; | |
364 | uint8_t options[0]; | |
365 | ip_hdr hton() { | |
366 | ip_hdr hdr = *this; | |
367 | hdr.len = ::hton(len); | |
368 | hdr.id = ::hton(id); | |
369 | hdr.frag = ::hton(frag); | |
370 | hdr.csum = ::hton(csum); | |
371 | hdr.src_ip.ip = ::hton(src_ip.ip); | |
372 | hdr.dst_ip.ip = ::hton(dst_ip.ip); | |
373 | return hdr; | |
374 | } | |
375 | ip_hdr ntoh() { | |
376 | ip_hdr hdr = *this; | |
377 | hdr.len = ::ntoh(len); | |
378 | hdr.id = ::ntoh(id); | |
379 | hdr.frag = ::ntoh(frag); | |
380 | hdr.csum = ::ntoh(csum); | |
381 | hdr.src_ip = src_ip.ntoh(); | |
382 | hdr.dst_ip = dst_ip.ntoh(); | |
383 | return hdr; | |
384 | } | |
385 | ||
386 | bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); } | |
387 | bool df() { return frag & (1 << uint8_t(frag_bits::df)); } | |
388 | uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); } | |
389 | } __attribute__((packed)); | |
390 | ||
391 | template <typename InetTraits> | |
392 | struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> { | |
393 | size_t operator()(const l4connid<InetTraits>& id) const noexcept { | |
394 | using h1 = std::hash<ipaddr>; | |
395 | using h2 = std::hash<uint16_t>; | |
396 | return h1::operator()(id.local_ip) | |
397 | ^ h1::operator()(id.foreign_ip) | |
398 | ^ h2::operator()(id.local_port) | |
399 | ^ h2::operator()(id.foreign_port); | |
400 | } | |
401 | }; | |
402 | ||
403 | #endif /* CEPH_MSG_IP_H */ |