1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
3 * This file is open source software, licensed to you under the terms
4 * of the Apache License, Version 2.0 (the "License"). See the NOTICE file
5 * distributed with this work for additional information regarding copyright
6 * ownership. You may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
20 * Copyright (C) 2014 Cloudius Systems, Ltd.
23 * Ceph - scalable distributed file system
25 * Copyright (C) 2015 XSky <haomai@xsky.com>
27 * Author: Haomai Wang <haomaiwang@gmail.com>
33 #include "DPDKStack.h"
35 #include "common/dout.h"
36 #include "include/ceph_assert.h"
38 #define dout_subsys ceph_subsys_dpdk
40 #define dout_prefix *_dout << "net "
42 interface::interface(CephContext
*cct
, std::shared_ptr
<DPDKDevice
> dev
, EventCenter
*center
)
43 : cct(cct
), _dev(dev
),
46 [center
, this] (Packet p
) {
47 return dispatch_packet(center
, std::move(p
));
50 _hw_address(_dev
->hw_address()),
51 _hw_features(_dev
->get_hw_features()) {
53 unsigned qid
= center
->get_id();
54 dev
->queue_for_cpu(center
->get_id()).register_packet_provider([this, idx
, qid
] () mutable {
56 for (size_t i
= 0; i
< _pkt_providers
.size(); i
++) {
57 auto l3p
= _pkt_providers
[idx
++]();
58 if (idx
== _pkt_providers
.size())
61 auto l3pv
= std::move(*l3p
);
62 auto eh
= l3pv
.p
.prepend_header
<eth_hdr
>();
63 eh
->dst_mac
= l3pv
.to
;
64 eh
->src_mac
= _hw_address
;
65 eh
->eth_proto
= uint16_t(l3pv
.proto_num
);
67 ldout(this->cct
, 10) << "=== tx === proto " << std::hex
<< uint16_t(l3pv
.proto_num
)
68 << " " << _hw_address
<< " -> " << l3pv
.to
69 << " length " << std::dec
<< l3pv
.p
.len() << dendl
;
70 p
= std::move(l3pv
.p
);
78 subscription
<Packet
, ethernet_address
> interface::register_l3(
79 eth_protocol_num proto_num
,
80 std::function
<int (Packet p
, ethernet_address from
)> next
,
81 std::function
<bool (forward_hash
&, Packet
& p
, size_t)> forward
)
83 auto i
= _proto_map
.emplace(std::piecewise_construct
, std::make_tuple(uint16_t(proto_num
)), std::forward_as_tuple(std::move(forward
)));
84 ceph_assert(i
.second
);
85 l3_rx_stream
& l3_rx
= i
.first
->second
;
86 return l3_rx
.packet_stream
.listen(std::move(next
));
89 unsigned interface::hash2cpu(uint32_t hash
) {
90 return _dev
->hash2cpu(hash
);
93 const rss_key_type
& interface::rss_key() const {
94 return _dev
->rss_key();
97 uint16_t interface::hw_queues_count() const {
98 return _dev
->hw_queues_count();
101 class C_handle_l2forward
: public EventCallback
{
102 std::shared_ptr
<DPDKDevice
> sdev
;
103 unsigned &queue_depth
;
108 C_handle_l2forward(std::shared_ptr
<DPDKDevice
> &p
, unsigned &qd
, Packet pkt
, unsigned target
)
109 : sdev(p
), queue_depth(qd
), p(std::move(pkt
)), dst(target
) {}
110 void do_request(uint64_t fd
) {
111 sdev
->l2receive(dst
, std::move(p
));
117 void interface::forward(EventCenter
*source
, unsigned target
, Packet p
) {
118 static __thread
unsigned queue_depth
;
120 if (queue_depth
< 1000) {
122 // FIXME: need ensure this event not be called after EventCenter destruct
123 _dev
->workers
[target
]->center
.dispatch_event_external(
124 new C_handle_l2forward(_dev
, queue_depth
, std::move(p
.free_on_cpu(source
)), target
));
128 int interface::dispatch_packet(EventCenter
*center
, Packet p
) {
129 auto eh
= p
.get_header
<eth_hdr
>();
131 auto i
= _proto_map
.find(ntoh(eh
->eth_proto
));
132 auto hwrss
= p
.rss_hash();
134 ldout(cct
, 10) << __func__
<< " === rx === proto " << std::hex
<< ::ntoh(eh
->eth_proto
)
135 << " "<< eh
->src_mac
.ntoh() << " -> " << eh
->dst_mac
.ntoh()
136 << " length " << std::dec
<< p
.len() << " rss_hash " << *p
.rss_hash() << dendl
;
138 ldout(cct
, 10) << __func__
<< " === rx === proto " << std::hex
<< ::ntoh(eh
->eth_proto
)
139 << " "<< eh
->src_mac
.ntoh() << " -> " << eh
->dst_mac
.ntoh()
140 << " length " << std::dec
<< p
.len() << dendl
;
142 if (i
!= _proto_map
.end()) {
143 l3_rx_stream
& l3
= i
->second
;
144 auto fw
= _dev
->forward_dst(center
->get_id(), [&p
, &l3
, this] () {
145 auto hwrss
= p
.rss_hash();
150 if (l3
.forward(data
, p
, sizeof(eth_hdr
))) {
151 return toeplitz_hash(rss_key(), data
);
156 if (fw
!= center
->get_id()) {
157 ldout(cct
, 1) << __func__
<< " forward to " << fw
<< dendl
;
158 forward(center
, fw
, std::move(p
));
161 auto from
= h
.src_mac
;
162 p
.trim_front(sizeof(*eh
));
163 // avoid chaining, since queue length is unlimited
166 return l3
.packet_stream
.produce(std::move(p
), from
);
174 class C_arp_learn
: public EventCallback
{
176 ethernet_address l2_addr
;
177 ipv4_address l3_addr
;
180 C_arp_learn(DPDKWorker
*w
, ethernet_address l2
, ipv4_address l3
)
181 : worker(w
), l2_addr(l2
), l3_addr(l3
) {}
182 void do_request(uint64_t id
) {
183 worker
->arp_learn(l2_addr
, l3_addr
);
188 void interface::arp_learn(ethernet_address l2
, ipv4_address l3
)
190 for (auto &&w
: _dev
->workers
) {
191 w
->center
.dispatch_event_external(
192 new C_arp_learn(w
, l2
, l3
));
196 l3_protocol::l3_protocol(interface
* netif
, eth_protocol_num proto_num
, packet_provider_type func
)
197 : _netif(netif
), _proto_num(proto_num
) {
198 _netif
->register_packet_provider(std::move(func
));
201 subscription
<Packet
, ethernet_address
> l3_protocol::receive(
202 std::function
<int (Packet
, ethernet_address
)> rx_fn
,
203 std::function
<bool (forward_hash
&h
, Packet
&p
, size_t s
)> forward
) {
204 return _netif
->register_l3(_proto_num
, std::move(rx_fn
), std::move(forward
));