]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/include/seastar/net/net.hh
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / include / seastar / net / net.hh
CommitLineData
11fdf7f2
TL
1/*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18/*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 */
21
22#pragma once
23
24#include <seastar/core/reactor.hh>
25#include <seastar/core/deleter.hh>
26#include <seastar/core/queue.hh>
27#include <seastar/core/stream.hh>
28#include <seastar/core/metrics_registration.hh>
29#include <seastar/net/toeplitz.hh>
30#include <seastar/net/ethernet.hh>
31#include <seastar/net/packet.hh>
32#include <seastar/net/const.hh>
33#include <unordered_map>
34
35namespace seastar {
36
37namespace net {
38
39class packet;
40class interface;
41class device;
42class qp;
43class l3_protocol;
44
45class forward_hash {
46 uint8_t data[64];
47 size_t end_idx = 0;
48public:
49 size_t size() const {
50 return end_idx;
51 }
52 void push_back(uint8_t b) {
53 assert(end_idx < sizeof(data));
54 data[end_idx++] = b;
55 }
56 void push_back(uint16_t b) {
57 push_back(uint8_t(b));
58 push_back(uint8_t(b >> 8));
59 }
60 void push_back(uint32_t b) {
61 push_back(uint16_t(b));
62 push_back(uint16_t(b >> 16));
63 }
64 const uint8_t& operator[](size_t idx) const {
65 return data[idx];
66 }
67};
68
69struct hw_features {
70 // Enable tx ip header checksum offload
71 bool tx_csum_ip_offload = false;
72 // Enable tx l4 (TCP or UDP) checksum offload
73 bool tx_csum_l4_offload = false;
74 // Enable rx checksum offload
75 bool rx_csum_offload = false;
76 // LRO is enabled
77 bool rx_lro = false;
78 // Enable tx TCP segment offload
79 bool tx_tso = false;
80 // Enable tx UDP fragmentation offload
81 bool tx_ufo = false;
82 // Maximum Transmission Unit
83 uint16_t mtu = 1500;
84 // Maximun packet len when TCP/UDP offload is enabled
85 uint16_t max_packet_len = ip_packet_len_max - eth_hdr_len;
86};
87
88class l3_protocol {
89public:
90 struct l3packet {
91 eth_protocol_num proto_num;
92 ethernet_address to;
93 packet p;
94 };
95 using packet_provider_type = std::function<compat::optional<l3packet> ()>;
96private:
97 interface* _netif;
98 eth_protocol_num _proto_num;
99public:
100 explicit l3_protocol(interface* netif, eth_protocol_num proto_num, packet_provider_type func);
9f95a23c 101 future<> receive(
11fdf7f2
TL
102 std::function<future<> (packet, ethernet_address)> rx_fn,
103 std::function<bool (forward_hash&, packet&, size_t)> forward);
104private:
105 friend class interface;
106};
107
108class interface {
109 struct l3_rx_stream {
110 stream<packet, ethernet_address> packet_stream;
111 future<> ready;
112 std::function<bool (forward_hash&, packet&, size_t)> forward;
113 l3_rx_stream(std::function<bool (forward_hash&, packet&, size_t)>&& fw) : ready(packet_stream.started()), forward(fw) {}
114 };
115 std::unordered_map<uint16_t, l3_rx_stream> _proto_map;
116 std::shared_ptr<device> _dev;
11fdf7f2
TL
117 ethernet_address _hw_address;
118 net::hw_features _hw_features;
119 std::vector<l3_protocol::packet_provider_type> _pkt_providers;
120private:
121 future<> dispatch_packet(packet p);
122public:
123 explicit interface(std::shared_ptr<device> dev);
124 ethernet_address hw_address() { return _hw_address; }
125 const net::hw_features& hw_features() const { return _hw_features; }
9f95a23c 126 future<> register_l3(eth_protocol_num proto_num,
11fdf7f2
TL
127 std::function<future<> (packet p, ethernet_address from)> next,
128 std::function<bool (forward_hash&, packet&, size_t)> forward);
129 void forward(unsigned cpuid, packet p);
130 unsigned hash2cpu(uint32_t hash);
131 void register_packet_provider(l3_protocol::packet_provider_type func) {
132 _pkt_providers.push_back(std::move(func));
133 }
134 uint16_t hw_queues_count();
135 rss_key_type rss_key() const;
136 friend class l3_protocol;
137};
138
139struct qp_stats_good {
140 /**
141 * Update the packets bunch related statistics.
142 *
143 * Update the last packets bunch size and the total packets counter.
144 *
145 * @param count Number of packets in the last packets bunch.
146 */
147 void update_pkts_bunch(uint64_t count) {
148 last_bunch = count;
149 packets += count;
150 }
151
152 /**
153 * Increment the appropriate counters when a few fragments have been
154 * processed in a copy-way.
155 *
156 * @param nr_frags Number of copied fragments
157 * @param bytes Number of copied bytes
158 */
159 void update_copy_stats(uint64_t nr_frags, uint64_t bytes) {
160 copy_frags += nr_frags;
161 copy_bytes += bytes;
162 }
163
164 /**
165 * Increment total fragments and bytes statistics
166 *
167 * @param nfrags Number of processed fragments
168 * @param nbytes Number of bytes in the processed fragments
169 */
170 void update_frags_stats(uint64_t nfrags, uint64_t nbytes) {
171 nr_frags += nfrags;
172 bytes += nbytes;
173 }
174
175 uint64_t bytes; // total number of bytes
176 uint64_t nr_frags; // total number of fragments
177 uint64_t copy_frags; // fragments that were copied on L2 level
178 uint64_t copy_bytes; // bytes that were copied on L2 level
179 uint64_t packets; // total number of packets
180 uint64_t last_bunch; // number of packets in the last sent/received bunch
181};
182
183struct qp_stats {
184 qp_stats() : rx{}, tx{} {}
185
186 struct {
187 struct qp_stats_good good;
188
189 struct {
190 void inc_csum_err() {
191 ++csum;
192 ++total;
193 }
194
195 void inc_no_mem() {
196 ++no_mem;
197 ++total;
198 }
199
200 uint64_t no_mem; // Packets dropped due to allocation failure
201 uint64_t total; // total number of erroneous packets
202 uint64_t csum; // packets with bad checksum
203 } bad;
204 } rx;
205
206 struct {
207 struct qp_stats_good good;
208 uint64_t linearized; // number of packets that were linearized
209 } tx;
210};
211
212class qp {
213 using packet_provider_type = std::function<compat::optional<packet> ()>;
214 std::vector<packet_provider_type> _pkt_providers;
215 compat::optional<std::array<uint8_t, 128>> _sw_reta;
216 circular_buffer<packet> _proxy_packetq;
217 stream<packet> _rx_stream;
218 reactor::poller _tx_poller;
219 circular_buffer<packet> _tx_packetq;
220
221protected:
222 const std::string _stats_plugin_name;
223 const std::string _queue_name;
224 metrics::metric_groups _metrics;
225 qp_stats _stats;
226
227public:
228 qp(bool register_copy_stats = false,
229 const std::string stats_plugin_name = std::string("network"),
230 uint8_t qid = 0);
231 virtual ~qp();
232 virtual future<> send(packet p) = 0;
233 virtual uint32_t send(circular_buffer<packet>& p) {
234 uint32_t sent = 0;
235 while (!p.empty()) {
9f95a23c
TL
236 // FIXME: future is discarded
237 (void)send(std::move(p.front()));
11fdf7f2
TL
238 p.pop_front();
239 sent++;
240 }
241 return sent;
242 }
243 virtual void rx_start() {};
244 void configure_proxies(const std::map<unsigned, float>& cpu_weights);
245 // build REdirection TAble for cpu_weights map: target cpu -> weight
246 void build_sw_reta(const std::map<unsigned, float>& cpu_weights);
247 void proxy_send(packet p) {
248 _proxy_packetq.push_back(std::move(p));
249 }
250 void register_packet_provider(packet_provider_type func) {
251 _pkt_providers.push_back(std::move(func));
252 }
253 bool poll_tx();
254 friend class device;
255};
256
257class device {
258protected:
259 std::unique_ptr<qp*[]> _queues;
260 size_t _rss_table_bits = 0;
261public:
262 device() {
263 _queues = std::make_unique<qp*[]>(smp::count);
264 }
265 virtual ~device() {};
266 qp& queue_for_cpu(unsigned cpu) { return *_queues[cpu]; }
267 qp& local_queue() { return queue_for_cpu(engine().cpu_id()); }
9f95a23c
TL
268 void l2receive(packet p) {
269 // FIXME: future is discarded
270 (void)_queues[engine().cpu_id()]->_rx_stream.produce(std::move(p));
271 }
272 future<> receive(std::function<future<> (packet)> next_packet);
11fdf7f2
TL
273 virtual ethernet_address hw_address() = 0;
274 virtual net::hw_features hw_features() = 0;
275 virtual rss_key_type rss_key() const { return default_rsskey_40bytes; }
276 virtual uint16_t hw_queues_count() { return 1; }
277 virtual future<> link_ready() { return make_ready_future<>(); }
278 virtual std::unique_ptr<qp> init_local_queue(boost::program_options::variables_map opts, uint16_t qid) = 0;
279 virtual unsigned hash2qid(uint32_t hash) {
280 return hash % hw_queues_count();
281 }
282 void set_local_queue(std::unique_ptr<qp> dev);
283 template <typename Func>
284 unsigned forward_dst(unsigned src_cpuid, Func&& hashfn) {
285 auto& qp = queue_for_cpu(src_cpuid);
286 if (!qp._sw_reta) {
287 return src_cpuid;
288 }
289 auto hash = hashfn() >> _rss_table_bits;
290 auto& reta = *qp._sw_reta;
291 return reta[hash % reta.size()];
292 }
293 virtual unsigned hash2cpu(uint32_t hash) {
294 // there is an assumption here that qid == cpu_id which will
295 // not necessary be true in the future
296 return forward_dst(hash2qid(hash), [hash] { return hash; });
297 }
298};
299
300}
301
302}