]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright (C) 2014 Cloudius Systems, Ltd. | |
20 | */ | |
21 | ||
22 | #pragma once | |
23 | ||
24 | #include <seastar/core/reactor.hh> | |
25 | #include <seastar/core/deleter.hh> | |
26 | #include <seastar/core/queue.hh> | |
27 | #include <seastar/core/stream.hh> | |
28 | #include <seastar/core/metrics_registration.hh> | |
29 | #include <seastar/net/toeplitz.hh> | |
30 | #include <seastar/net/ethernet.hh> | |
31 | #include <seastar/net/packet.hh> | |
32 | #include <seastar/net/const.hh> | |
33 | #include <unordered_map> | |
34 | ||
35 | namespace seastar { | |
36 | ||
37 | namespace net { | |
38 | ||
39 | class packet; | |
40 | class interface; | |
41 | class device; | |
42 | class qp; | |
43 | class l3_protocol; | |
44 | ||
45 | class forward_hash { | |
46 | uint8_t data[64]; | |
47 | size_t end_idx = 0; | |
48 | public: | |
49 | size_t size() const { | |
50 | return end_idx; | |
51 | } | |
52 | void push_back(uint8_t b) { | |
53 | assert(end_idx < sizeof(data)); | |
54 | data[end_idx++] = b; | |
55 | } | |
56 | void push_back(uint16_t b) { | |
57 | push_back(uint8_t(b)); | |
58 | push_back(uint8_t(b >> 8)); | |
59 | } | |
60 | void push_back(uint32_t b) { | |
61 | push_back(uint16_t(b)); | |
62 | push_back(uint16_t(b >> 16)); | |
63 | } | |
64 | const uint8_t& operator[](size_t idx) const { | |
65 | return data[idx]; | |
66 | } | |
67 | }; | |
68 | ||
69 | struct hw_features { | |
70 | // Enable tx ip header checksum offload | |
71 | bool tx_csum_ip_offload = false; | |
72 | // Enable tx l4 (TCP or UDP) checksum offload | |
73 | bool tx_csum_l4_offload = false; | |
74 | // Enable rx checksum offload | |
75 | bool rx_csum_offload = false; | |
76 | // LRO is enabled | |
77 | bool rx_lro = false; | |
78 | // Enable tx TCP segment offload | |
79 | bool tx_tso = false; | |
80 | // Enable tx UDP fragmentation offload | |
81 | bool tx_ufo = false; | |
82 | // Maximum Transmission Unit | |
83 | uint16_t mtu = 1500; | |
84 | // Maximun packet len when TCP/UDP offload is enabled | |
85 | uint16_t max_packet_len = ip_packet_len_max - eth_hdr_len; | |
86 | }; | |
87 | ||
88 | class l3_protocol { | |
89 | public: | |
90 | struct l3packet { | |
91 | eth_protocol_num proto_num; | |
92 | ethernet_address to; | |
93 | packet p; | |
94 | }; | |
95 | using packet_provider_type = std::function<compat::optional<l3packet> ()>; | |
96 | private: | |
97 | interface* _netif; | |
98 | eth_protocol_num _proto_num; | |
99 | public: | |
100 | explicit l3_protocol(interface* netif, eth_protocol_num proto_num, packet_provider_type func); | |
9f95a23c | 101 | future<> receive( |
11fdf7f2 TL |
102 | std::function<future<> (packet, ethernet_address)> rx_fn, |
103 | std::function<bool (forward_hash&, packet&, size_t)> forward); | |
104 | private: | |
105 | friend class interface; | |
106 | }; | |
107 | ||
108 | class interface { | |
109 | struct l3_rx_stream { | |
110 | stream<packet, ethernet_address> packet_stream; | |
111 | future<> ready; | |
112 | std::function<bool (forward_hash&, packet&, size_t)> forward; | |
113 | l3_rx_stream(std::function<bool (forward_hash&, packet&, size_t)>&& fw) : ready(packet_stream.started()), forward(fw) {} | |
114 | }; | |
115 | std::unordered_map<uint16_t, l3_rx_stream> _proto_map; | |
116 | std::shared_ptr<device> _dev; | |
11fdf7f2 TL |
117 | ethernet_address _hw_address; |
118 | net::hw_features _hw_features; | |
119 | std::vector<l3_protocol::packet_provider_type> _pkt_providers; | |
120 | private: | |
121 | future<> dispatch_packet(packet p); | |
122 | public: | |
123 | explicit interface(std::shared_ptr<device> dev); | |
124 | ethernet_address hw_address() { return _hw_address; } | |
125 | const net::hw_features& hw_features() const { return _hw_features; } | |
9f95a23c | 126 | future<> register_l3(eth_protocol_num proto_num, |
11fdf7f2 TL |
127 | std::function<future<> (packet p, ethernet_address from)> next, |
128 | std::function<bool (forward_hash&, packet&, size_t)> forward); | |
129 | void forward(unsigned cpuid, packet p); | |
130 | unsigned hash2cpu(uint32_t hash); | |
131 | void register_packet_provider(l3_protocol::packet_provider_type func) { | |
132 | _pkt_providers.push_back(std::move(func)); | |
133 | } | |
134 | uint16_t hw_queues_count(); | |
135 | rss_key_type rss_key() const; | |
136 | friend class l3_protocol; | |
137 | }; | |
138 | ||
139 | struct qp_stats_good { | |
140 | /** | |
141 | * Update the packets bunch related statistics. | |
142 | * | |
143 | * Update the last packets bunch size and the total packets counter. | |
144 | * | |
145 | * @param count Number of packets in the last packets bunch. | |
146 | */ | |
147 | void update_pkts_bunch(uint64_t count) { | |
148 | last_bunch = count; | |
149 | packets += count; | |
150 | } | |
151 | ||
152 | /** | |
153 | * Increment the appropriate counters when a few fragments have been | |
154 | * processed in a copy-way. | |
155 | * | |
156 | * @param nr_frags Number of copied fragments | |
157 | * @param bytes Number of copied bytes | |
158 | */ | |
159 | void update_copy_stats(uint64_t nr_frags, uint64_t bytes) { | |
160 | copy_frags += nr_frags; | |
161 | copy_bytes += bytes; | |
162 | } | |
163 | ||
164 | /** | |
165 | * Increment total fragments and bytes statistics | |
166 | * | |
167 | * @param nfrags Number of processed fragments | |
168 | * @param nbytes Number of bytes in the processed fragments | |
169 | */ | |
170 | void update_frags_stats(uint64_t nfrags, uint64_t nbytes) { | |
171 | nr_frags += nfrags; | |
172 | bytes += nbytes; | |
173 | } | |
174 | ||
175 | uint64_t bytes; // total number of bytes | |
176 | uint64_t nr_frags; // total number of fragments | |
177 | uint64_t copy_frags; // fragments that were copied on L2 level | |
178 | uint64_t copy_bytes; // bytes that were copied on L2 level | |
179 | uint64_t packets; // total number of packets | |
180 | uint64_t last_bunch; // number of packets in the last sent/received bunch | |
181 | }; | |
182 | ||
183 | struct qp_stats { | |
184 | qp_stats() : rx{}, tx{} {} | |
185 | ||
186 | struct { | |
187 | struct qp_stats_good good; | |
188 | ||
189 | struct { | |
190 | void inc_csum_err() { | |
191 | ++csum; | |
192 | ++total; | |
193 | } | |
194 | ||
195 | void inc_no_mem() { | |
196 | ++no_mem; | |
197 | ++total; | |
198 | } | |
199 | ||
200 | uint64_t no_mem; // Packets dropped due to allocation failure | |
201 | uint64_t total; // total number of erroneous packets | |
202 | uint64_t csum; // packets with bad checksum | |
203 | } bad; | |
204 | } rx; | |
205 | ||
206 | struct { | |
207 | struct qp_stats_good good; | |
208 | uint64_t linearized; // number of packets that were linearized | |
209 | } tx; | |
210 | }; | |
211 | ||
212 | class qp { | |
213 | using packet_provider_type = std::function<compat::optional<packet> ()>; | |
214 | std::vector<packet_provider_type> _pkt_providers; | |
215 | compat::optional<std::array<uint8_t, 128>> _sw_reta; | |
216 | circular_buffer<packet> _proxy_packetq; | |
217 | stream<packet> _rx_stream; | |
218 | reactor::poller _tx_poller; | |
219 | circular_buffer<packet> _tx_packetq; | |
220 | ||
221 | protected: | |
222 | const std::string _stats_plugin_name; | |
223 | const std::string _queue_name; | |
224 | metrics::metric_groups _metrics; | |
225 | qp_stats _stats; | |
226 | ||
227 | public: | |
228 | qp(bool register_copy_stats = false, | |
229 | const std::string stats_plugin_name = std::string("network"), | |
230 | uint8_t qid = 0); | |
231 | virtual ~qp(); | |
232 | virtual future<> send(packet p) = 0; | |
233 | virtual uint32_t send(circular_buffer<packet>& p) { | |
234 | uint32_t sent = 0; | |
235 | while (!p.empty()) { | |
9f95a23c TL |
236 | // FIXME: future is discarded |
237 | (void)send(std::move(p.front())); | |
11fdf7f2 TL |
238 | p.pop_front(); |
239 | sent++; | |
240 | } | |
241 | return sent; | |
242 | } | |
243 | virtual void rx_start() {}; | |
244 | void configure_proxies(const std::map<unsigned, float>& cpu_weights); | |
245 | // build REdirection TAble for cpu_weights map: target cpu -> weight | |
246 | void build_sw_reta(const std::map<unsigned, float>& cpu_weights); | |
247 | void proxy_send(packet p) { | |
248 | _proxy_packetq.push_back(std::move(p)); | |
249 | } | |
250 | void register_packet_provider(packet_provider_type func) { | |
251 | _pkt_providers.push_back(std::move(func)); | |
252 | } | |
253 | bool poll_tx(); | |
254 | friend class device; | |
255 | }; | |
256 | ||
257 | class device { | |
258 | protected: | |
259 | std::unique_ptr<qp*[]> _queues; | |
260 | size_t _rss_table_bits = 0; | |
261 | public: | |
262 | device() { | |
263 | _queues = std::make_unique<qp*[]>(smp::count); | |
264 | } | |
265 | virtual ~device() {}; | |
266 | qp& queue_for_cpu(unsigned cpu) { return *_queues[cpu]; } | |
267 | qp& local_queue() { return queue_for_cpu(engine().cpu_id()); } | |
9f95a23c TL |
268 | void l2receive(packet p) { |
269 | // FIXME: future is discarded | |
270 | (void)_queues[engine().cpu_id()]->_rx_stream.produce(std::move(p)); | |
271 | } | |
272 | future<> receive(std::function<future<> (packet)> next_packet); | |
11fdf7f2 TL |
273 | virtual ethernet_address hw_address() = 0; |
274 | virtual net::hw_features hw_features() = 0; | |
275 | virtual rss_key_type rss_key() const { return default_rsskey_40bytes; } | |
276 | virtual uint16_t hw_queues_count() { return 1; } | |
277 | virtual future<> link_ready() { return make_ready_future<>(); } | |
278 | virtual std::unique_ptr<qp> init_local_queue(boost::program_options::variables_map opts, uint16_t qid) = 0; | |
279 | virtual unsigned hash2qid(uint32_t hash) { | |
280 | return hash % hw_queues_count(); | |
281 | } | |
282 | void set_local_queue(std::unique_ptr<qp> dev); | |
283 | template <typename Func> | |
284 | unsigned forward_dst(unsigned src_cpuid, Func&& hashfn) { | |
285 | auto& qp = queue_for_cpu(src_cpuid); | |
286 | if (!qp._sw_reta) { | |
287 | return src_cpuid; | |
288 | } | |
289 | auto hash = hashfn() >> _rss_table_bits; | |
290 | auto& reta = *qp._sw_reta; | |
291 | return reta[hash % reta.size()]; | |
292 | } | |
293 | virtual unsigned hash2cpu(uint32_t hash) { | |
294 | // there is an assumption here that qid == cpu_id which will | |
295 | // not necessary be true in the future | |
296 | return forward_dst(hash2qid(hash), [hash] { return hash; }); | |
297 | } | |
298 | }; | |
299 | ||
300 | } | |
301 | ||
302 | } |