]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/include/seastar/net/packet.hh
f7154e7d15d963fdf8e6e5a6960a8c47265ebfc5
[ceph.git] / ceph / src / seastar / include / seastar / net / packet.hh
1 /*
2 * This file is open source software, licensed to you under the terms
3 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
4 * distributed with this work for additional information regarding copyright
5 * ownership. You may not use this file except in compliance with the License.
6 *
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing,
12 * software distributed under the License is distributed on an
13 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 * KIND, either express or implied. See the License for the
15 * specific language governing permissions and limitations
16 * under the License.
17 */
18 /*
19 * Copyright (C) 2014 Cloudius Systems, Ltd.
20 */
21
22 #pragma once
23
24 #include <seastar/core/deleter.hh>
25 #include <seastar/core/temporary_buffer.hh>
26 #include <seastar/net/const.hh>
27 #include <vector>
28 #include <cassert>
29 #include <algorithm>
30 #include <iosfwd>
31 #include <seastar/util/std-compat.hh>
32 #include <functional>
33
34 namespace seastar {
35
36 namespace net {
37
38 struct fragment {
39 char* base;
40 size_t size;
41 };
42
43 struct offload_info {
44 ip_protocol_num protocol = ip_protocol_num::unused;
45 bool needs_csum = false;
46 uint8_t ip_hdr_len = 20;
47 uint8_t tcp_hdr_len = 20;
48 uint8_t udp_hdr_len = 8;
49 bool needs_ip_csum = false;
50 bool reassembled = false;
51 uint16_t tso_seg_size = 0;
52 // HW stripped VLAN header (CPU order)
53 std::optional<uint16_t> vlan_tci;
54 };
55
56 // Zero-copy friendly packet class
57 //
58 // For implementing zero-copy, we need a flexible destructor that can
59 // destroy packet data in different ways: decrementing a reference count,
60 // or calling a free()-like function.
61 //
62 // Moreover, we need different destructors for each set of fragments within
63 // a single fragment. For example, a header and trailer might need delete[]
64 // to be called, while the internal data needs a reference count to be
65 // released. Matters are complicated in that fragments can be split
66 // (due to virtual/physical translation).
67 //
68 // To implement this, we associate each packet with a single destructor,
69 // but allow composing a packet from another packet plus a fragment to
70 // be added, with its own destructor, causing the destructors to be chained.
71 //
72 // The downside is that the data needed for the destructor is duplicated,
73 // if it is already available in the fragment itself.
74 //
75 // As an optimization, when we allocate small fragments, we allocate some
76 // extra space, so prepending to the packet does not require extra
77 // allocations. This is useful when adding headers.
78 //
79 class packet final {
80 // enough for lots of headers, not quite two cache lines:
81 static constexpr size_t internal_data_size = 128 - 16;
82 static constexpr size_t default_nr_frags = 4;
83
84 struct pseudo_vector {
85 fragment* _start;
86 fragment* _finish;
87 pseudo_vector(fragment* start, size_t nr)
88 : _start(start), _finish(_start + nr) {}
89 fragment* begin() { return _start; }
90 fragment* end() { return _finish; }
91 fragment& operator[](size_t idx) { return _start[idx]; }
92 };
93
94 struct impl {
95 // when destroyed, virtual destructor will reclaim resources
96 deleter _deleter;
97 unsigned _len = 0;
98 uint16_t _nr_frags = 0;
99 uint16_t _allocated_frags;
100 offload_info _offload_info;
101 std::optional<uint32_t> _rss_hash;
102 char _data[internal_data_size]; // only _frags[0] may use
103 unsigned _headroom = internal_data_size; // in _data
104 // FIXME: share _data/_frags space
105
106 fragment _frags[];
107
108 impl(size_t nr_frags = default_nr_frags) noexcept;
109 impl(const impl&) = delete;
110 impl(fragment frag, size_t nr_frags = default_nr_frags);
111
112 pseudo_vector fragments() { return { _frags, _nr_frags }; }
113
114 static std::unique_ptr<impl> allocate(size_t nr_frags) {
115 nr_frags = std::max(nr_frags, default_nr_frags);
116 return std::unique_ptr<impl>(new (nr_frags) impl(nr_frags));
117 }
118
119 static std::unique_ptr<impl> copy(impl* old, size_t nr) {
120 auto n = allocate(nr);
121 n->_deleter = std::move(old->_deleter);
122 n->_len = old->_len;
123 n->_nr_frags = old->_nr_frags;
124 n->_headroom = old->_headroom;
125 n->_offload_info = old->_offload_info;
126 n->_rss_hash = old->_rss_hash;
127 std::copy(old->_frags, old->_frags + old->_nr_frags, n->_frags);
128 old->copy_internal_fragment_to(n.get());
129 return n;
130 }
131
132 static std::unique_ptr<impl> copy(impl* old) {
133 return copy(old, old->_nr_frags);
134 }
135
136 static std::unique_ptr<impl> allocate_if_needed(std::unique_ptr<impl> old, size_t extra_frags) {
137 if (old->_allocated_frags >= old->_nr_frags + extra_frags) {
138 return old;
139 }
140 return copy(old.get(), std::max<size_t>(old->_nr_frags + extra_frags, 2 * old->_nr_frags));
141 }
142 void* operator new(size_t size, size_t nr_frags = default_nr_frags) {
143 assert(nr_frags == uint16_t(nr_frags));
144 return ::operator new(size + nr_frags * sizeof(fragment));
145 }
146 // Matching the operator new above
147 void operator delete(void* ptr, size_t nr_frags) {
148 return ::operator delete(ptr);
149 }
150 // Since the above "placement delete" hides the global one, expose it
151 void operator delete(void* ptr) {
152 return ::operator delete(ptr);
153 }
154
155 bool using_internal_data() const {
156 return _nr_frags
157 && _frags[0].base >= _data
158 && _frags[0].base < _data + internal_data_size;
159 }
160
161 void unuse_internal_data() {
162 if (!using_internal_data()) {
163 return;
164 }
165 auto buf = static_cast<char*>(::malloc(_frags[0].size));
166 if (!buf) {
167 throw std::bad_alloc();
168 }
169 deleter d = make_free_deleter(buf);
170 std::copy(_frags[0].base, _frags[0].base + _frags[0].size, buf);
171 _frags[0].base = buf;
172 d.append(std::move(_deleter));
173 _deleter = std::move(d);
174 _headroom = internal_data_size;
175 }
176 void copy_internal_fragment_to(impl* to) {
177 if (!using_internal_data()) {
178 return;
179 }
180 to->_frags[0].base = to->_data + _headroom;
181 std::copy(_frags[0].base, _frags[0].base + _frags[0].size,
182 to->_frags[0].base);
183 }
184 };
185 packet(std::unique_ptr<impl>&& impl) noexcept : _impl(std::move(impl)) {}
186 std::unique_ptr<impl> _impl;
187 public:
188 static packet from_static_data(const char* data, size_t len) noexcept {
189 return {fragment{const_cast<char*>(data), len}, deleter()};
190 }
191
192 // build empty packet
193 packet();
194 // build empty packet with nr_frags allocated
195 packet(size_t nr_frags);
196 // move existing packet
197 packet(packet&& x) noexcept;
198 // copy data into packet
199 packet(const char* data, size_t len);
200 // copy data into packet
201 packet(fragment frag);
202 // zero-copy single fragment
203 packet(fragment frag, deleter del);
204 // zero-copy multiple fragments
205 packet(std::vector<fragment> frag, deleter del);
206 // build packet with iterator
207 template <typename Iterator>
208 packet(Iterator begin, Iterator end, deleter del);
209 // append fragment (copying new fragment)
210 packet(packet&& x, fragment frag);
211 // prepend fragment (copying new fragment, with header optimization)
212 packet(fragment frag, packet&& x);
213 // prepend fragment (zero-copy)
214 packet(fragment frag, deleter del, packet&& x);
215 // append fragment (zero-copy)
216 packet(packet&& x, fragment frag, deleter d);
217 // append temporary_buffer (zero-copy)
218 packet(packet&& x, temporary_buffer<char> buf);
219 // create from temporary_buffer (zero-copy)
220 packet(temporary_buffer<char> buf);
221 // append deleter
222 packet(packet&& x, deleter d);
223
224 packet& operator=(packet&& x) noexcept {
225 if (this != &x) {
226 this->~packet();
227 new (this) packet(std::move(x));
228 }
229 return *this;
230 }
231
232 unsigned len() const { return _impl->_len; }
233 unsigned memory() const { return len() + sizeof(packet::impl); }
234
235 fragment frag(unsigned idx) const { return _impl->_frags[idx]; }
236 fragment& frag(unsigned idx) { return _impl->_frags[idx]; }
237
238 unsigned nr_frags() const { return _impl->_nr_frags; }
239 pseudo_vector fragments() const { return { _impl->_frags, _impl->_nr_frags }; }
240 fragment* fragment_array() const { return _impl->_frags; }
241
242 // share packet data (reference counted, non COW)
243 packet share();
244 packet share(size_t offset, size_t len);
245
246 void append(packet&& p);
247
248 void trim_front(size_t how_much);
249 void trim_back(size_t how_much);
250
251 // get a header pointer, linearizing if necessary
252 template <typename Header>
253 Header* get_header(size_t offset = 0);
254
255 // get a header pointer, linearizing if necessary
256 char* get_header(size_t offset, size_t size);
257
258 // prepend a header (default-initializing it)
259 template <typename Header>
260 Header* prepend_header(size_t extra_size = 0);
261
262 // prepend a header (uninitialized!)
263 char* prepend_uninitialized_header(size_t size);
264
265 packet free_on_cpu(unsigned cpu, std::function<void()> cb = []{});
266
267 void linearize() { return linearize(0, len()); }
268
269 void reset() { _impl.reset(); }
270
271 void reserve(int n_frags) {
272 if (n_frags > _impl->_nr_frags) {
273 auto extra = n_frags - _impl->_nr_frags;
274 _impl = impl::allocate_if_needed(std::move(_impl), extra);
275 }
276 }
277 std::optional<uint32_t> rss_hash() {
278 return _impl->_rss_hash;
279 }
280 std::optional<uint32_t> set_rss_hash(uint32_t hash) {
281 return _impl->_rss_hash = hash;
282 }
283 // Call `func` for each fragment, avoiding data copies when possible
284 // `func` is called with a temporary_buffer<char> parameter
285 template <typename Func>
286 void release_into(Func&& func) {
287 unsigned idx = 0;
288 if (_impl->using_internal_data()) {
289 auto&& f = frag(idx++);
290 func(temporary_buffer<char>(f.base, f.size));
291 }
292 while (idx < nr_frags()) {
293 auto&& f = frag(idx++);
294 func(temporary_buffer<char>(f.base, f.size, _impl->_deleter.share()));
295 }
296 }
297 std::vector<temporary_buffer<char>> release() {
298 std::vector<temporary_buffer<char>> ret;
299 ret.reserve(_impl->_nr_frags);
300 release_into([&ret] (temporary_buffer<char>&& frag) {
301 ret.push_back(std::move(frag));
302 });
303 return ret;
304 }
305 explicit operator bool() {
306 return bool(_impl);
307 }
308 static packet make_null_packet() noexcept {
309 return net::packet(nullptr);
310 }
311 private:
312 void linearize(size_t at_frag, size_t desired_size);
313 bool allocate_headroom(size_t size);
314 public:
315 struct offload_info offload_info() const { return _impl->_offload_info; }
316 struct offload_info& offload_info_ref() { return _impl->_offload_info; }
317 void set_offload_info(struct offload_info oi) { _impl->_offload_info = oi; }
318 };
319
320 std::ostream& operator<<(std::ostream& os, const packet& p);
321
322 inline
323 packet::packet(packet&& x) noexcept
324 : _impl(std::move(x._impl)) {
325 }
326
327 inline
328 packet::impl::impl(size_t nr_frags) noexcept
329 : _len(0), _allocated_frags(nr_frags) {
330 }
331
332 inline
333 packet::impl::impl(fragment frag, size_t nr_frags)
334 : _len(frag.size), _allocated_frags(nr_frags) {
335 assert(_allocated_frags > _nr_frags);
336 if (frag.size <= internal_data_size) {
337 _headroom -= frag.size;
338 _frags[0] = { _data + _headroom, frag.size };
339 } else {
340 auto buf = static_cast<char*>(::malloc(frag.size));
341 if (!buf) {
342 throw std::bad_alloc();
343 }
344 deleter d = make_free_deleter(buf);
345 _frags[0] = { buf, frag.size };
346 _deleter.append(std::move(d));
347 }
348 std::copy(frag.base, frag.base + frag.size, _frags[0].base);
349 ++_nr_frags;
350 }
351
352 inline
353 packet::packet()
354 : _impl(impl::allocate(1)) {
355 }
356
357 inline
358 packet::packet(size_t nr_frags)
359 : _impl(impl::allocate(nr_frags)) {
360 }
361
362 inline
363 packet::packet(fragment frag) : _impl(new impl(frag)) {
364 }
365
366 inline
367 packet::packet(const char* data, size_t size) : packet(fragment{const_cast<char*>(data), size}) {
368 }
369
370 inline
371 packet::packet(fragment frag, deleter d)
372 : _impl(impl::allocate(1)) {
373 _impl->_deleter = std::move(d);
374 _impl->_frags[_impl->_nr_frags++] = frag;
375 _impl->_len = frag.size;
376 }
377
378 inline
379 packet::packet(std::vector<fragment> frag, deleter d)
380 : _impl(impl::allocate(frag.size())) {
381 _impl->_deleter = std::move(d);
382 std::copy(frag.begin(), frag.end(), _impl->_frags);
383 _impl->_nr_frags = frag.size();
384 _impl->_len = 0;
385 for (auto&& f : _impl->fragments()) {
386 _impl->_len += f.size;
387 }
388 }
389
390 template <typename Iterator>
391 inline
392 packet::packet(Iterator begin, Iterator end, deleter del) {
393 unsigned nr_frags = 0, len = 0;
394 nr_frags = std::distance(begin, end);
395 std::for_each(begin, end, [&] (const fragment& frag) { len += frag.size; });
396 _impl = impl::allocate(nr_frags);
397 _impl->_deleter = std::move(del);
398 _impl->_len = len;
399 _impl->_nr_frags = nr_frags;
400 std::copy(begin, end, _impl->_frags);
401 }
402
403 inline
404 packet::packet(packet&& x, fragment frag)
405 : _impl(impl::allocate_if_needed(std::move(x._impl), 1)) {
406 _impl->_len += frag.size;
407 std::unique_ptr<char[]> buf(new char[frag.size]);
408 std::copy(frag.base, frag.base + frag.size, buf.get());
409 _impl->_frags[_impl->_nr_frags++] = {buf.get(), frag.size};
410 _impl->_deleter = make_deleter(std::move(_impl->_deleter), [buf = buf.release()] {
411 delete[] buf;
412 });
413 }
414
415 inline
416 bool
417 packet::allocate_headroom(size_t size) {
418 if (_impl->_headroom >= size) {
419 _impl->_len += size;
420 if (!_impl->using_internal_data()) {
421 _impl = impl::allocate_if_needed(std::move(_impl), 1);
422 std::copy_backward(_impl->_frags, _impl->_frags + _impl->_nr_frags,
423 _impl->_frags + _impl->_nr_frags + 1);
424 _impl->_frags[0] = { _impl->_data + internal_data_size, 0 };
425 ++_impl->_nr_frags;
426 }
427 _impl->_headroom -= size;
428 _impl->_frags[0].base -= size;
429 _impl->_frags[0].size += size;
430 return true;
431 } else {
432 return false;
433 }
434 }
435
436
437 inline
438 packet::packet(fragment frag, packet&& x)
439 : _impl(std::move(x._impl)) {
440 // try to prepend into existing internal fragment
441 if (allocate_headroom(frag.size)) {
442 std::copy(frag.base, frag.base + frag.size, _impl->_frags[0].base);
443 return;
444 } else {
445 // didn't work out, allocate and copy
446 _impl->unuse_internal_data();
447 _impl = impl::allocate_if_needed(std::move(_impl), 1);
448 _impl->_len += frag.size;
449 std::unique_ptr<char[]> buf(new char[frag.size]);
450 std::copy(frag.base, frag.base + frag.size, buf.get());
451 std::copy_backward(_impl->_frags, _impl->_frags + _impl->_nr_frags,
452 _impl->_frags + _impl->_nr_frags + 1);
453 ++_impl->_nr_frags;
454 _impl->_frags[0] = {buf.get(), frag.size};
455 _impl->_deleter = make_deleter(std::move(_impl->_deleter),
456 [buf = std::move(buf)] {});
457 }
458 }
459
460 inline
461 packet::packet(packet&& x, fragment frag, deleter d)
462 : _impl(impl::allocate_if_needed(std::move(x._impl), 1)) {
463 _impl->_len += frag.size;
464 _impl->_frags[_impl->_nr_frags++] = frag;
465 d.append(std::move(_impl->_deleter));
466 _impl->_deleter = std::move(d);
467 }
468
469 inline
470 packet::packet(packet&& x, deleter d)
471 : _impl(std::move(x._impl)) {
472 _impl->_deleter.append(std::move(d));
473 }
474
475 inline
476 packet::packet(packet&& x, temporary_buffer<char> buf)
477 : packet(std::move(x), fragment{buf.get_write(), buf.size()}, buf.release()) {
478 }
479
480 inline
481 packet::packet(temporary_buffer<char> buf)
482 : packet(fragment{buf.get_write(), buf.size()}, buf.release()) {}
483
484 inline
485 void packet::append(packet&& p) {
486 if (!_impl->_len) {
487 *this = std::move(p);
488 return;
489 }
490 _impl = impl::allocate_if_needed(std::move(_impl), p._impl->_nr_frags);
491 _impl->_len += p._impl->_len;
492 p._impl->unuse_internal_data();
493 std::copy(p._impl->_frags, p._impl->_frags + p._impl->_nr_frags,
494 _impl->_frags + _impl->_nr_frags);
495 _impl->_nr_frags += p._impl->_nr_frags;
496 p._impl->_deleter.append(std::move(_impl->_deleter));
497 _impl->_deleter = std::move(p._impl->_deleter);
498 }
499
500 inline
501 char* packet::get_header(size_t offset, size_t size) {
502 if (offset + size > _impl->_len) {
503 return nullptr;
504 }
505 size_t i = 0;
506 while (i != _impl->_nr_frags && offset >= _impl->_frags[i].size) {
507 offset -= _impl->_frags[i++].size;
508 }
509 if (i == _impl->_nr_frags) {
510 return nullptr;
511 }
512 if (offset + size > _impl->_frags[i].size) {
513 linearize(i, offset + size);
514 }
515 return _impl->_frags[i].base + offset;
516 }
517
518 template <typename Header>
519 inline
520 Header* packet::get_header(size_t offset) {
521 return reinterpret_cast<Header*>(get_header(offset, sizeof(Header)));
522 }
523
524 inline
525 void packet::trim_front(size_t how_much) {
526 assert(how_much <= _impl->_len);
527 _impl->_len -= how_much;
528 size_t i = 0;
529 while (how_much && how_much >= _impl->_frags[i].size) {
530 how_much -= _impl->_frags[i++].size;
531 }
532 std::copy(_impl->_frags + i, _impl->_frags + _impl->_nr_frags, _impl->_frags);
533 _impl->_nr_frags -= i;
534 if (!_impl->using_internal_data()) {
535 _impl->_headroom = internal_data_size;
536 }
537 if (how_much) {
538 if (_impl->using_internal_data()) {
539 _impl->_headroom += how_much;
540 }
541 _impl->_frags[0].base += how_much;
542 _impl->_frags[0].size -= how_much;
543 }
544 }
545
546 inline
547 void packet::trim_back(size_t how_much) {
548 assert(how_much <= _impl->_len);
549 _impl->_len -= how_much;
550 size_t i = _impl->_nr_frags - 1;
551 while (how_much && how_much >= _impl->_frags[i].size) {
552 how_much -= _impl->_frags[i--].size;
553 }
554 _impl->_nr_frags = i + 1;
555 if (how_much) {
556 _impl->_frags[i].size -= how_much;
557 if (i == 0 && _impl->using_internal_data()) {
558 _impl->_headroom += how_much;
559 }
560 }
561 }
562
563 template <typename Header>
564 Header*
565 packet::prepend_header(size_t extra_size) {
566 auto h = prepend_uninitialized_header(sizeof(Header) + extra_size);
567 return new (h) Header{};
568 }
569
570 // prepend a header (uninitialized!)
571 inline
572 char* packet::prepend_uninitialized_header(size_t size) {
573 if (!allocate_headroom(size)) {
574 // didn't work out, allocate and copy
575 _impl->unuse_internal_data();
576 // try again, after unuse_internal_data we may have space after all
577 if (!allocate_headroom(size)) {
578 // failed
579 _impl->_len += size;
580 _impl = impl::allocate_if_needed(std::move(_impl), 1);
581 std::unique_ptr<char[]> buf(new char[size]);
582 std::copy_backward(_impl->_frags, _impl->_frags + _impl->_nr_frags,
583 _impl->_frags + _impl->_nr_frags + 1);
584 ++_impl->_nr_frags;
585 _impl->_frags[0] = {buf.get(), size};
586 _impl->_deleter = make_deleter(std::move(_impl->_deleter),
587 [buf = std::move(buf)] {});
588 }
589 }
590 return _impl->_frags[0].base;
591 }
592
593 inline
594 packet packet::share() {
595 return share(0, _impl->_len);
596 }
597
598 inline
599 packet packet::share(size_t offset, size_t len) {
600 _impl->unuse_internal_data(); // FIXME: eliminate?
601 packet n;
602 n._impl = impl::allocate_if_needed(std::move(n._impl), _impl->_nr_frags);
603 size_t idx = 0;
604 while (offset > 0 && offset >= _impl->_frags[idx].size) {
605 offset -= _impl->_frags[idx++].size;
606 }
607 while (n._impl->_len < len) {
608 auto& f = _impl->_frags[idx++];
609 auto fsize = std::min(len - n._impl->_len, f.size - offset);
610 n._impl->_frags[n._impl->_nr_frags++] = { f.base + offset, fsize };
611 n._impl->_len += fsize;
612 offset = 0;
613 }
614 n._impl->_offload_info = _impl->_offload_info;
615 assert(!n._impl->_deleter);
616 n._impl->_deleter = _impl->_deleter.share();
617 return n;
618 }
619
620 }
621
622 }