2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
25 #include <boost/asio/ip/address_v4.hpp>
26 #include <arpa/inet.h>
27 #include <unordered_map>
33 #include <seastar/core/array_map.hh>
34 #include <seastar/net/byteorder.hh>
35 #include <seastar/core/byteorder.hh>
36 #include <seastar/net/arp.hh>
37 #include <seastar/net/ip_checksum.hh>
38 #include <seastar/net/const.hh>
39 #include <seastar/net/packet-util.hh>
40 #include <seastar/core/shared_ptr.hh>
41 #include <seastar/net/toeplitz.hh>
42 #include <seastar/net/udp.hh>
43 #include <seastar/core/metrics_registration.hh>
50 template <ip_protocol_num ProtoNum>
54 template <typename InetTraits>
58 ipv4_address() : ip(0) {}
59 explicit ipv4_address(uint32_t ip) : ip(ip) {}
60 explicit ipv4_address(const std::string& addr) {
61 boost::system::error_code ec;
62 auto ipv4 = boost::asio::ip::address_v4::from_string(addr, ec);
64 throw std::runtime_error(format("Wrong format for IPv4 address {}. Please ensure it's in dotted-decimal format",
67 ip = static_cast<uint32_t>(std::move(ipv4).to_ulong());
69 ipv4_address(ipv4_addr addr) {
75 template <typename Adjuster>
76 auto adjust_endianness(Adjuster a) { return a(ip); }
78 friend bool operator==(ipv4_address x, ipv4_address y) {
81 friend bool operator!=(ipv4_address x, ipv4_address y) {
85 static ipv4_address read(const char* p) {
87 ia.ip = read_be<uint32_t>(p);
90 static ipv4_address consume(const char*& p) {
95 void write(char* p) const {
96 write_be<uint32_t>(p, ip);
98 void produce(char*& p) const {
99 produce_be<uint32_t>(p, ip);
101 static constexpr size_t size() {
104 } __attribute__((packed));
106 static inline bool is_unspecified(ipv4_address addr) { return addr.ip == 0; }
108 std::ostream& operator<<(std::ostream& os, ipv4_address a);
117 struct hash<seastar::net::ipv4_address> {
118 size_t operator()(seastar::net::ipv4_address a) const { return a.ip; }
128 using address_type = ipv4_address;
129 using inet_type = ipv4_l4<ip_protocol_num::tcp>;
133 ethernet_address e_dst;
134 ip_protocol_num proto_num;
136 using packet_provider_type = std::function<compat::optional<l4packet> ()>;
137 static void tcp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
138 csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::tcp), len);
140 static void udp_pseudo_header_checksum(checksummer& csum, ipv4_address src, ipv4_address dst, uint16_t len) {
141 csum.sum_many(src.ip.raw, dst.ip.raw, uint8_t(0), uint8_t(ip_protocol_num::udp), len);
143 static constexpr uint8_t ip_hdr_len_min = ipv4_hdr_len_min;
146 template <ip_protocol_num ProtoNum>
151 ipv4_l4(ipv4& inet) : _inet(inet) {}
152 void register_packet_provider(ipv4_traits::packet_provider_type func);
153 future<ethernet_address> get_l2_dst_address(ipv4_address to);
158 virtual ~ip_protocol() {}
159 virtual void received(packet p, ipv4_address from, ipv4_address to) = 0;
160 virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) { return true; }
163 template <typename InetTraits>
165 using ipaddr = typename InetTraits::address_type;
166 using inet_type = typename InetTraits::inet_type;
172 uint16_t foreign_port;
174 bool operator==(const l4connid& x) const {
175 return local_ip == x.local_ip
176 && foreign_ip == x.foreign_ip
177 && local_port == x.local_port
178 && foreign_port == x.foreign_port;
181 uint32_t hash(rss_key_type rss_key) {
182 forward_hash hash_data;
183 hash_data.push_back(hton(foreign_ip.ip));
184 hash_data.push_back(hton(local_ip.ip));
185 hash_data.push_back(hton(foreign_port));
186 hash_data.push_back(hton(local_port));
187 return toeplitz_hash(rss_key, hash_data);
191 class ipv4_tcp final : public ip_protocol {
192 ipv4_l4<ip_protocol_num::tcp> _inet_l4;
193 std::unique_ptr<tcp<ipv4_traits>> _tcp;
195 ipv4_tcp(ipv4& inet);
197 virtual void received(packet p, ipv4_address from, ipv4_address to) override;
198 virtual bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
203 enum class msg_type : uint8_t {
209 packed<uint16_t> csum;
210 packed<uint32_t> rest;
211 template <typename Adjuster>
212 auto adjust_endianness(Adjuster a) {
215 } __attribute__((packed));
220 using ipaddr = ipv4_address;
221 using inet_type = ipv4_l4<ip_protocol_num::icmp>;
222 explicit icmp(inet_type& inet) : _inet(inet) {
223 _inet.register_packet_provider([this] {
224 compat::optional<ipv4_traits::l4packet> l4p;
225 if (!_packetq.empty()) {
226 l4p = std::move(_packetq.front());
227 _packetq.pop_front();
228 _queue_space.signal(l4p.value().p.len());
233 void received(packet p, ipaddr from, ipaddr to);
236 circular_buffer<ipv4_traits::l4packet> _packetq;
237 semaphore _queue_space = {212992};
240 class ipv4_icmp final : public ip_protocol {
241 ipv4_l4<ip_protocol_num::icmp> _inet_l4;
244 ipv4_icmp(ipv4& inet) : _inet_l4(inet), _icmp(_inet_l4) {}
245 virtual void received(packet p, ipv4_address from, ipv4_address to) {
246 _icmp.received(std::move(p), from, to);
251 class ipv4_udp : public ip_protocol {
252 using connid = l4connid<ipv4_traits>;
253 using connid_hash = typename connid::connid_hash;
256 static const int default_queue_size;
258 static const uint16_t min_anonymous_port = 32768;
260 std::unordered_map<uint16_t, lw_shared_ptr<udp_channel_state>> _channels;
261 int _queue_size = default_queue_size;
262 uint16_t _next_anonymous_port = min_anonymous_port;
263 circular_buffer<ipv4_traits::l4packet> _packetq;
265 uint16_t next_port(uint16_t port);
272 registration(ipv4_udp &proto, uint16_t port) : _proto(proto), _port(port) {};
275 _proto._channels.erase(_proto._channels.find(_port));
278 uint16_t port() const {
283 ipv4_udp(ipv4& inet);
284 udp_channel make_channel(ipv4_addr addr);
285 virtual void received(packet p, ipv4_address from, ipv4_address to) override;
286 void send(uint16_t src_port, ipv4_addr dst, packet &&p);
287 bool forward(forward_hash& out_hash_data, packet& p, size_t off) override;
288 void set_queue_size(int size) { _queue_size = size; }
293 struct ip_packet_filter {
294 virtual ~ip_packet_filter() {};
295 virtual future<> handle(packet& p, ip_hdr* iph, ethernet_address from, bool & handled) = 0;
298 struct ipv4_frag_id {
302 uint16_t identification;
304 bool operator==(const ipv4_frag_id& x) const {
305 return src_ip == x.src_ip &&
306 dst_ip == x.dst_ip &&
307 identification == x.identification &&
308 protocol == x.protocol;
312 struct ipv4_frag_id::hash : private std::hash<ipv4_address>,
313 private std::hash<uint16_t>, private std::hash<uint8_t> {
314 size_t operator()(const ipv4_frag_id& id) const noexcept {
315 using h1 = std::hash<ipv4_address>;
316 using h2 = std::hash<uint16_t>;
317 using h3 = std::hash<uint8_t>;
318 return h1::operator()(id.src_ip) ^
319 h1::operator()(id.dst_ip) ^
320 h2::operator()(id.identification) ^
321 h3::operator()(id.protocol);
326 using ipv4_packet_merger = packet_merger<uint32_t, ipv4_tag>;
330 using clock_type = lowres_clock;
331 using address_type = ipv4_address;
332 using proto_type = uint16_t;
333 static address_type broadcast_address() { return ipv4_address(0xffffffff); }
334 static proto_type arp_protocol_type() { return proto_type(eth_protocol_num::ipv4); }
337 std::vector<ipv4_traits::packet_provider_type> _pkt_providers;
340 ipv4_address _host_address;
341 ipv4_address _gw_address;
342 ipv4_address _netmask;
344 subscription<packet, ethernet_address> _rx_packets;
348 array_map<ip_protocol*, 256> _l4;
349 ip_packet_filter * _packet_filter = nullptr;
352 ipv4_packet_merger data;
353 clock_type::time_point rx_time;
354 uint32_t mem_size = 0;
355 // fragment with MF == 0 inidates it is the last fragment
356 bool last_frag_received = false;
358 packet get_assembled_packet(ethernet_address from, ethernet_address to);
359 int32_t merge(ip_hdr &h, uint16_t offset, packet p);
362 std::unordered_map<ipv4_frag_id, frag, ipv4_frag_id::hash> _frags;
363 std::list<ipv4_frag_id> _frags_age;
364 static constexpr std::chrono::seconds _frag_timeout{30};
365 static constexpr uint32_t _frag_low_thresh{3 * 1024 * 1024};
366 static constexpr uint32_t _frag_high_thresh{4 * 1024 * 1024};
367 uint32_t _frag_mem{0};
368 timer<lowres_clock> _frag_timer;
369 circular_buffer<l3_protocol::l3packet> _packetq;
370 unsigned _pkt_provider_idx = 0;
371 metrics::metric_groups _metrics;
373 future<> handle_received_packet(packet p, ethernet_address from);
374 bool forward(forward_hash& out_hash_data, packet& p, size_t off);
375 compat::optional<l3_protocol::l3packet> get_packet();
376 bool in_my_netmask(ipv4_address a) const;
377 void frag_limit_mem();
379 void frag_drop(ipv4_frag_id frag_id, uint32_t dropped_size);
380 void frag_arm(clock_type::time_point now) {
381 auto tp = now + _frag_timeout;
385 auto now = clock_type::now();
389 explicit ipv4(interface* netif);
390 void set_host_address(ipv4_address ip);
391 ipv4_address host_address();
392 void set_gw_address(ipv4_address ip);
393 ipv4_address gw_address() const;
394 void set_netmask_address(ipv4_address ip);
395 ipv4_address netmask_address() const;
396 interface * netif() const {
399 // TODO or something. Should perhaps truly be a list
400 // of filters. With ordering. And blackjack. Etc.
401 // But for now, a simple single raw pointer suffices
402 void set_packet_filter(ip_packet_filter *);
403 ip_packet_filter * packet_filter() const;
404 void send(ipv4_address to, ip_protocol_num proto_num, packet p, ethernet_address e_dst);
405 tcp<ipv4_traits>& get_tcp() { return *_tcp._tcp; }
406 ipv4_udp& get_udp() { return _udp; }
407 void register_l4(proto_type id, ip_protocol* handler);
408 const net::hw_features& hw_features() const { return _netif->hw_features(); }
409 static bool needs_frag(packet& p, ip_protocol_num proto_num, net::hw_features hw_features);
410 void learn(ethernet_address l2, ipv4_address l3) {
413 void register_packet_provider(ipv4_traits::packet_provider_type&& func) {
414 _pkt_providers.push_back(std::move(func));
416 future<ethernet_address> get_l2_dst_address(ipv4_address to);
419 template <ip_protocol_num ProtoNum>
421 void ipv4_l4<ProtoNum>::register_packet_provider(ipv4_traits::packet_provider_type func) {
422 _inet.register_packet_provider([func = std::move(func)] {
425 l4p.value().proto_num = ProtoNum;
431 template <ip_protocol_num ProtoNum>
433 future<ethernet_address> ipv4_l4<ProtoNum>::get_l2_dst_address(ipv4_address to) {
434 return _inet.get_l2_dst_address(to);
442 packed<uint16_t> len;
444 packed<uint16_t> frag;
445 enum class frag_bits : uint8_t { mf = 13, df = 14, reserved = 15, offset_shift = 3 };
448 packed<uint16_t> csum;
452 template <typename Adjuster>
453 auto adjust_endianness(Adjuster a) {
454 return a(len, id, frag, csum, src_ip, dst_ip);
456 bool mf() { return frag & (1 << uint8_t(frag_bits::mf)); }
457 bool df() { return frag & (1 << uint8_t(frag_bits::df)); }
458 uint16_t offset() { return frag << uint8_t(frag_bits::offset_shift); }
459 } __attribute__((packed));
461 template <typename InetTraits>
462 struct l4connid<InetTraits>::connid_hash : private std::hash<ipaddr>, private std::hash<uint16_t> {
463 size_t operator()(const l4connid<InetTraits>& id) const noexcept {
464 using h1 = std::hash<ipaddr>;
465 using h2 = std::hash<uint16_t>;
466 return h1::operator()(id.local_ip)
467 ^ h1::operator()(id.foreign_ip)
468 ^ h2::operator()(id.local_port)
469 ^ h2::operator()(id.foreign_port);
473 void arp_learn(ethernet_address l2, ipv4_address l3);