]> git.proxmox.com Git - ceph.git/blame - ceph/src/msg/async/dpdk/Packet.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / msg / async / dpdk / Packet.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2/*
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
7 *
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19/*
20 * Copyright (C) 2014 Cloudius Systems, Ltd.
21 */
7c673cae
FG
22
23#ifndef CEPH_MSG_PACKET_H_
24#define CEPH_MSG_PACKET_H_
25
26#include <vector>
27#include <algorithm>
28#include <iosfwd>
29
30#include "include/types.h"
7c673cae
FG
31#include "common/deleter.h"
32#include "msg/async/Event.h"
33
34#include "const.h"
35
36struct fragment {
37 char* base;
38 size_t size;
39};
40
41struct offload_info {
42 ip_protocol_num protocol = ip_protocol_num::unused;
43 bool needs_csum = false;
44 uint8_t ip_hdr_len = 20;
45 uint8_t tcp_hdr_len = 20;
46 uint8_t udp_hdr_len = 8;
47 bool needs_ip_csum = false;
48 bool reassembled = false;
49 uint16_t tso_seg_size = 0;
50 // HW stripped VLAN header (CPU order)
20effc67 51 std::optional<uint16_t> vlan_tci;
7c673cae
FG
52};
53
54// Zero-copy friendly packet class
55//
56// For implementing zero-copy, we need a flexible destructor that can
57// destroy packet data in different ways: decrementing a reference count,
58// or calling a free()-like function.
59//
60// Moreover, we need different destructors for each set of fragments within
61// a single fragment. For example, a header and trailer might need delete[]
62// to be called, while the internal data needs a reference count to be
63// released. Matters are complicated in that fragments can be split
64// (due to virtual/physical translation).
65//
66// To implement this, we associate each packet with a single destructor,
67// but allow composing a packet from another packet plus a fragment to
68// be added, with its own destructor, causing the destructors to be chained.
69//
70// The downside is that the data needed for the destructor is duplicated,
71// if it is already available in the fragment itself.
72//
73// As an optimization, when we allocate small fragments, we allocate some
74// extra space, so prepending to the packet does not require extra
75// allocations. This is useful when adding headers.
76//
77class Packet {
78 // enough for lots of headers, not quite two cache lines:
79 static constexpr size_t internal_data_size = 128 - 16;
80 static constexpr size_t default_nr_frags = 4;
81
82 struct pseudo_vector {
83 fragment* _start;
84 fragment* _finish;
85 pseudo_vector(fragment* start, size_t nr)
86 : _start(start), _finish(_start + nr) {}
87 fragment* begin() { return _start; }
88 fragment* end() { return _finish; }
89 fragment& operator[](size_t idx) { return _start[idx]; }
90 };
91
92 struct impl {
93 // when destroyed, virtual destructor will reclaim resources
94 deleter _deleter;
95 unsigned _len = 0;
96 uint16_t _nr_frags = 0;
97 uint16_t _allocated_frags;
98 offload_info _offload_info;
20effc67 99 std::optional<uint32_t> rss_hash;
7c673cae
FG
100 char data[internal_data_size]; // only frags[0] may use
101 unsigned headroom = internal_data_size; // in data
102 // FIXME: share data/frags space
103
104 fragment frags[];
105
11fdf7f2 106 explicit impl(size_t nr_frags = default_nr_frags);
7c673cae
FG
107 impl(const impl&) = delete;
108 impl(fragment frag, size_t nr_frags = default_nr_frags);
109
110 pseudo_vector fragments() { return { frags, _nr_frags }; }
111
112 static std::unique_ptr<impl> allocate(size_t nr_frags) {
11fdf7f2 113 nr_frags = std::max(nr_frags, default_nr_frags);
7c673cae
FG
114 return std::unique_ptr<impl>(new (nr_frags) impl(nr_frags));
115 }
116
117 static std::unique_ptr<impl> copy(impl* old, size_t nr) {
118 auto n = allocate(nr);
119 n->_deleter = std::move(old->_deleter);
120 n->_len = old->_len;
121 n->_nr_frags = old->_nr_frags;
122 n->headroom = old->headroom;
123 n->_offload_info = old->_offload_info;
20effc67 124 n->rss_hash = old->rss_hash;
7c673cae
FG
125 std::copy(old->frags, old->frags + old->_nr_frags, n->frags);
126 old->copy_internal_fragment_to(n.get());
9f95a23c 127 return n;
7c673cae
FG
128 }
129
130 static std::unique_ptr<impl> copy(impl* old) {
131 return copy(old, old->_nr_frags);
132 }
133
134 static std::unique_ptr<impl> allocate_if_needed(std::unique_ptr<impl> old, size_t extra_frags) {
135 if (old->_allocated_frags >= old->_nr_frags + extra_frags) {
9f95a23c 136 return old;
7c673cae
FG
137 }
138 return copy(old.get(), std::max<size_t>(old->_nr_frags + extra_frags, 2 * old->_nr_frags));
139 }
140 void* operator new(size_t size, size_t nr_frags = default_nr_frags) {
11fdf7f2 141 ceph_assert(nr_frags == uint16_t(nr_frags));
7c673cae
FG
142 return ::operator new(size + nr_frags * sizeof(fragment));
143 }
144 // Matching the operator new above
145 void operator delete(void* ptr, size_t nr_frags) {
146 return ::operator delete(ptr);
147 }
148 // Since the above "placement delete" hides the global one, expose it
149 void operator delete(void* ptr) {
150 return ::operator delete(ptr);
151 }
152
153 bool using_internal_data() const {
154 return _nr_frags
155 && frags[0].base >= data
156 && frags[0].base < data + internal_data_size;
157 }
158
159 void unuse_internal_data() {
160 if (!using_internal_data()) {
161 return;
162 }
163 auto buf = static_cast<char*>(::malloc(frags[0].size));
164 if (!buf) {
165 throw std::bad_alloc();
166 }
167 deleter d = make_free_deleter(buf);
168 std::copy(frags[0].base, frags[0].base + frags[0].size, buf);
169 frags[0].base = buf;
170 _deleter.append(std::move(d));
171 headroom = internal_data_size;
172 }
173 void copy_internal_fragment_to(impl* to) {
174 if (!using_internal_data()) {
175 return;
176 }
177 to->frags[0].base = to->data + headroom;
178 std::copy(frags[0].base, frags[0].base + frags[0].size,
179 to->frags[0].base);
180 }
181 };
11fdf7f2 182 explicit Packet(std::unique_ptr<impl>&& impl) : _impl(std::move(impl)) {}
7c673cae
FG
183 std::unique_ptr<impl> _impl;
184public:
185 static Packet from_static_data(const char* data, size_t len) {
186 return {fragment{const_cast<char*>(data), len}, deleter()};
187 }
188
189 // build empty Packet
190 Packet();
191 // build empty Packet with nr_frags allocated
11fdf7f2 192 explicit Packet(size_t nr_frags);
7c673cae
FG
193 // move existing Packet
194 Packet(Packet&& x) noexcept;
195 // copy data into Packet
196 Packet(const char* data, size_t len);
197 // copy data into Packet
11fdf7f2 198 explicit Packet(fragment frag);
7c673cae
FG
199 // zero-copy single fragment
200 Packet(fragment frag, deleter del);
201 // zero-copy multiple fragments
202 Packet(std::vector<fragment> frag, deleter del);
203 // build Packet with iterator
204 template <typename Iterator>
205 Packet(Iterator begin, Iterator end, deleter del);
206 // append fragment (copying new fragment)
207 Packet(Packet&& x, fragment frag);
208 // prepend fragment (copying new fragment, with header optimization)
209 Packet(fragment frag, Packet&& x);
210 // prepend fragment (zero-copy)
211 Packet(fragment frag, deleter del, Packet&& x);
212 // append fragment (zero-copy)
213 Packet(Packet&& x, fragment frag, deleter d);
214 // append deleter
215 Packet(Packet&& x, deleter d);
216
217 Packet& operator=(Packet&& x) {
218 if (this != &x) {
219 this->~Packet();
220 new (this) Packet(std::move(x));
221 }
222 return *this;
223 }
224
225 unsigned len() const { return _impl->_len; }
226 unsigned memory() const { return len() + sizeof(Packet::impl); }
227
228 fragment frag(unsigned idx) const { return _impl->frags[idx]; }
229 fragment& frag(unsigned idx) { return _impl->frags[idx]; }
230
231 unsigned nr_frags() const { return _impl->_nr_frags; }
232 pseudo_vector fragments() const { return { _impl->frags, _impl->_nr_frags }; }
233 fragment* fragment_array() const { return _impl->frags; }
234
235 // share Packet data (reference counted, non COW)
236 Packet share();
237 Packet share(size_t offset, size_t len);
238
239 void append(Packet&& p);
240
241 void trim_front(size_t how_much);
242 void trim_back(size_t how_much);
243
244 // get a header pointer, linearizing if necessary
245 template <typename Header>
246 Header* get_header(size_t offset = 0);
247
248 // get a header pointer, linearizing if necessary
249 char* get_header(size_t offset, size_t size);
250
251 // prepend a header (default-initializing it)
252 template <typename Header>
253 Header* prepend_header(size_t extra_size = 0);
254
255 // prepend a header (uninitialized!)
256 char* prepend_uninitialized_header(size_t size);
257
258 Packet free_on_cpu(EventCenter *c, std::function<void()> cb = []{});
259
260 void linearize() { return linearize(0, len()); }
261
262 void reset() { _impl.reset(); }
263
264 void reserve(int n_frags) {
265 if (n_frags > _impl->_nr_frags) {
266 auto extra = n_frags - _impl->_nr_frags;
267 _impl = impl::allocate_if_needed(std::move(_impl), extra);
268 }
269 }
20effc67 270 std::optional<uint32_t> rss_hash() {
7c673cae
FG
271 return _impl->rss_hash;
272 }
273 void set_rss_hash(uint32_t hash) {
20effc67 274 _impl->rss_hash = hash;
7c673cae
FG
275 }
276private:
277 void linearize(size_t at_frag, size_t desired_size);
278 bool allocate_headroom(size_t size);
279public:
280 class offload_info offload_info() const { return _impl->_offload_info; }
281 class offload_info& offload_info_ref() { return _impl->_offload_info; }
282 void set_offload_info(class offload_info oi) { _impl->_offload_info = oi; }
283};
284
285std::ostream& operator<<(std::ostream& os, const Packet& p);
286
287inline Packet::Packet(Packet&& x) noexcept
288 : _impl(std::move(x._impl)) {
289}
290
291inline Packet::impl::impl(size_t nr_frags)
292 : _len(0), _allocated_frags(nr_frags) {
293}
294
295inline Packet::impl::impl(fragment frag, size_t nr_frags)
296 : _len(frag.size), _allocated_frags(nr_frags) {
11fdf7f2 297 ceph_assert(_allocated_frags > _nr_frags);
7c673cae
FG
298 if (frag.size <= internal_data_size) {
299 headroom -= frag.size;
300 frags[0] = { data + headroom, frag.size };
301 } else {
302 auto buf = static_cast<char*>(::malloc(frag.size));
303 if (!buf) {
304 throw std::bad_alloc();
305 }
306 deleter d = make_free_deleter(buf);
307 frags[0] = { buf, frag.size };
308 _deleter.append(std::move(d));
309 }
310 std::copy(frag.base, frag.base + frag.size, frags[0].base);
311 ++_nr_frags;
312}
313
314inline Packet::Packet(): _impl(impl::allocate(1)) {
315}
316
317inline Packet::Packet(size_t nr_frags): _impl(impl::allocate(nr_frags)) {
318}
319
320inline Packet::Packet(fragment frag): _impl(new impl(frag)) {
321}
322
323inline Packet::Packet(const char* data, size_t size):
324 Packet(fragment{const_cast<char*>(data), size}) {
325}
326
327inline Packet::Packet(fragment frag, deleter d)
328 : _impl(impl::allocate(1)) {
329 _impl->_deleter = std::move(d);
330 _impl->frags[_impl->_nr_frags++] = frag;
331 _impl->_len = frag.size;
332}
333
334inline Packet::Packet(std::vector<fragment> frag, deleter d)
335 : _impl(impl::allocate(frag.size())) {
336 _impl->_deleter = std::move(d);
337 std::copy(frag.begin(), frag.end(), _impl->frags);
338 _impl->_nr_frags = frag.size();
339 _impl->_len = 0;
340 for (auto&& f : _impl->fragments()) {
341 _impl->_len += f.size;
342 }
343}
344
345template <typename Iterator>
346inline Packet::Packet(Iterator begin, Iterator end, deleter del) {
347 unsigned nr_frags = 0, len = 0;
348 nr_frags = std::distance(begin, end);
349 std::for_each(begin, end, [&] (fragment& frag) { len += frag.size; });
350 _impl = impl::allocate(nr_frags);
351 _impl->_deleter = std::move(del);
352 _impl->_len = len;
353 _impl->_nr_frags = nr_frags;
354 std::copy(begin, end, _impl->frags);
355}
356
357inline Packet::Packet(Packet&& x, fragment frag)
358 : _impl(impl::allocate_if_needed(std::move(x._impl), 1)) {
359 _impl->_len += frag.size;
360 char* buf = new char[frag.size];
361 std::copy(frag.base, frag.base + frag.size, buf);
362 _impl->frags[_impl->_nr_frags++] = {buf, frag.size};
363 _impl->_deleter = make_deleter(std::move(_impl->_deleter), [buf] {
364 delete[] buf;
365 });
366}
367
368inline bool Packet::allocate_headroom(size_t size) {
369 if (_impl->headroom >= size) {
370 _impl->_len += size;
371 if (!_impl->using_internal_data()) {
372 _impl = impl::allocate_if_needed(std::move(_impl), 1);
373 std::copy_backward(_impl->frags, _impl->frags + _impl->_nr_frags,
374 _impl->frags + _impl->_nr_frags + 1);
375 _impl->frags[0] = { _impl->data + internal_data_size, 0 };
376 ++_impl->_nr_frags;
377 }
378 _impl->headroom -= size;
379 _impl->frags[0].base -= size;
380 _impl->frags[0].size += size;
381 return true;
382 } else {
383 return false;
384 }
385}
386
387
388inline Packet::Packet(fragment frag, Packet&& x)
389 : _impl(std::move(x._impl)) {
390 // try to prepend into existing internal fragment
391 if (allocate_headroom(frag.size)) {
392 std::copy(frag.base, frag.base + frag.size, _impl->frags[0].base);
393 return;
394 } else {
395 // didn't work out, allocate and copy
396 _impl->unuse_internal_data();
397 _impl = impl::allocate_if_needed(std::move(_impl), 1);
398 _impl->_len += frag.size;
399 char *buf = new char[frag.size];
400 std::copy(frag.base, frag.base + frag.size, buf);
401 std::copy_backward(_impl->frags, _impl->frags + _impl->_nr_frags,
402 _impl->frags + _impl->_nr_frags + 1);
403 ++_impl->_nr_frags;
404 _impl->frags[0] = {buf, frag.size};
405 _impl->_deleter = make_deleter(
406 std::move(_impl->_deleter), [buf] { delete []buf; });
407 }
408}
409
410inline Packet::Packet(Packet&& x, fragment frag, deleter d)
411 : _impl(impl::allocate_if_needed(std::move(x._impl), 1)) {
412 _impl->_len += frag.size;
413 _impl->frags[_impl->_nr_frags++] = frag;
414 d.append(std::move(_impl->_deleter));
415 _impl->_deleter = std::move(d);
416}
417
418inline Packet::Packet(Packet&& x, deleter d): _impl(std::move(x._impl)) {
419 _impl->_deleter.append(std::move(d));
420}
421
422inline void Packet::append(Packet&& p) {
423 if (!_impl->_len) {
424 *this = std::move(p);
425 return;
426 }
427 _impl = impl::allocate_if_needed(std::move(_impl), p._impl->_nr_frags);
428 _impl->_len += p._impl->_len;
429 p._impl->unuse_internal_data();
430 std::copy(p._impl->frags, p._impl->frags + p._impl->_nr_frags,
431 _impl->frags + _impl->_nr_frags);
432 _impl->_nr_frags += p._impl->_nr_frags;
433 p._impl->_deleter.append(std::move(_impl->_deleter));
434 _impl->_deleter = std::move(p._impl->_deleter);
435}
436
437inline char* Packet::get_header(size_t offset, size_t size) {
438 if (offset + size > _impl->_len) {
439 return nullptr;
440 }
441 size_t i = 0;
442 while (i != _impl->_nr_frags && offset >= _impl->frags[i].size) {
443 offset -= _impl->frags[i++].size;
444 }
445 if (i == _impl->_nr_frags) {
446 return nullptr;
447 }
448 if (offset + size > _impl->frags[i].size) {
449 linearize(i, offset + size);
450 }
451 return _impl->frags[i].base + offset;
452}
453
454template <typename Header>
455inline Header* Packet::get_header(size_t offset) {
456 return reinterpret_cast<Header*>(get_header(offset, sizeof(Header)));
457}
458
459inline void Packet::trim_front(size_t how_much) {
11fdf7f2 460 ceph_assert(how_much <= _impl->_len);
7c673cae
FG
461 _impl->_len -= how_much;
462 size_t i = 0;
463 while (how_much && how_much >= _impl->frags[i].size) {
464 how_much -= _impl->frags[i++].size;
465 }
466 std::copy(_impl->frags + i, _impl->frags + _impl->_nr_frags, _impl->frags);
467 _impl->_nr_frags -= i;
468 if (!_impl->using_internal_data()) {
469 _impl->headroom = internal_data_size;
470 }
471 if (how_much) {
472 if (_impl->using_internal_data()) {
473 _impl->headroom += how_much;
474 }
475 _impl->frags[0].base += how_much;
476 _impl->frags[0].size -= how_much;
477 }
478}
479
480inline void Packet::trim_back(size_t how_much) {
11fdf7f2 481 ceph_assert(how_much <= _impl->_len);
7c673cae
FG
482 _impl->_len -= how_much;
483 size_t i = _impl->_nr_frags - 1;
484 while (how_much && how_much >= _impl->frags[i].size) {
485 how_much -= _impl->frags[i--].size;
486 }
487 _impl->_nr_frags = i + 1;
488 if (how_much) {
489 _impl->frags[i].size -= how_much;
490 if (i == 0 && _impl->using_internal_data()) {
491 _impl->headroom += how_much;
492 }
493 }
494}
495
496template <typename Header>
497Header* Packet::prepend_header(size_t extra_size) {
498 auto h = prepend_uninitialized_header(sizeof(Header) + extra_size);
499 return new (h) Header{};
500}
501
502// prepend a header (uninitialized!)
503inline char* Packet::prepend_uninitialized_header(size_t size) {
504 if (!allocate_headroom(size)) {
505 // didn't work out, allocate and copy
506 _impl->unuse_internal_data();
507 // try again, after unuse_internal_data we may have space after all
508 if (!allocate_headroom(size)) {
509 // failed
510 _impl->_len += size;
511 _impl = impl::allocate_if_needed(std::move(_impl), 1);
512 char *buf = new char[size];
513 std::copy_backward(_impl->frags, _impl->frags + _impl->_nr_frags,
514 _impl->frags + _impl->_nr_frags + 1);
515 ++_impl->_nr_frags;
516 _impl->frags[0] = {buf, size};
517 _impl->_deleter = make_deleter(std::move(_impl->_deleter),
518 [buf] { delete []buf; });
519 }
520 }
521 return _impl->frags[0].base;
522}
523
524inline Packet Packet::share() {
525 return share(0, _impl->_len);
526}
527
528inline Packet Packet::share(size_t offset, size_t len) {
529 _impl->unuse_internal_data(); // FIXME: eliminate?
530 Packet n;
531 n._impl = impl::allocate_if_needed(std::move(n._impl), _impl->_nr_frags);
532 size_t idx = 0;
533 while (offset > 0 && offset >= _impl->frags[idx].size) {
534 offset -= _impl->frags[idx++].size;
535 }
536 while (n._impl->_len < len) {
537 auto& f = _impl->frags[idx++];
538 auto fsize = std::min(len - n._impl->_len, f.size - offset);
539 n._impl->frags[n._impl->_nr_frags++] = { f.base + offset, fsize };
540 n._impl->_len += fsize;
541 offset = 0;
542 }
543 n._impl->_offload_info = _impl->_offload_info;
11fdf7f2 544 ceph_assert(!n._impl->_deleter);
7c673cae
FG
545 n._impl->_deleter = _impl->_deleter.share();
546 return n;
547}
548
549#endif /* CEPH_MSG_PACKET_H_ */