]> git.proxmox.com Git - ceph.git/blame - ceph/src/msg/async/rdma/RDMAIWARPConnectedSocketImpl.cc
import 15.2.0 Octopus source
[ceph.git] / ceph / src / msg / async / rdma / RDMAIWARPConnectedSocketImpl.cc
CommitLineData
11fdf7f2
TL
1#include "RDMAStack.h"
2
3#define dout_subsys ceph_subsys_ms
4#undef dout_prefix
5#define dout_prefix *_dout << " RDMAIWARPConnectedSocketImpl "
6
7#define TIMEOUT_MS 3000
8#define RETRY_COUNT 7
9
9f95a23c
TL
10RDMAIWARPConnectedSocketImpl::RDMAIWARPConnectedSocketImpl(CephContext *cct, shared_ptr<Infiniband>& ib,
11 shared_ptr<RDMADispatcher>& rdma_dispatcher,
12 RDMAWorker *w, RDMACMInfo *info)
13 : RDMAConnectedSocketImpl(cct, ib, rdma_dispatcher, w), cm_con_handler(new C_handle_cm_connection(this))
11fdf7f2
TL
14{
15 status = IDLE;
16 notify_fd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK);
17 if (info) {
18 is_server = true;
19 cm_id = info->cm_id;
20 cm_channel = info->cm_channel;
21 status = RDMA_ID_CREATED;
9f95a23c 22 peer_qpn = info->qp_num;
11fdf7f2
TL
23 if (alloc_resource()) {
24 close_notify();
25 return;
26 }
27 worker->center.submit_to(worker->center.get_id(), [this]() {
28 worker->center.create_file_event(cm_channel->fd, EVENT_READABLE, cm_con_handler);
29 status = CHANNEL_FD_CREATED;
30 }, false);
31 status = RESOURCE_ALLOCATED;
9f95a23c
TL
32 qp->get_local_cm_meta().peer_qpn = peer_qpn;
33 qp->get_peer_cm_meta().local_qpn = peer_qpn;
11fdf7f2
TL
34 } else {
35 is_server = false;
36 cm_channel = rdma_create_event_channel();
37 rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP);
38 status = RDMA_ID_CREATED;
39 ldout(cct, 20) << __func__ << " successfully created cm id: " << cm_id << dendl;
40 }
41}
42
43RDMAIWARPConnectedSocketImpl::~RDMAIWARPConnectedSocketImpl() {
44 ldout(cct, 20) << __func__ << " destruct." << dendl;
45 std::unique_lock l(close_mtx);
46 close_condition.wait(l, [&] { return closed; });
47 if (status >= RDMA_ID_CREATED) {
48 rdma_destroy_id(cm_id);
49 rdma_destroy_event_channel(cm_channel);
50 }
51}
52
53int RDMAIWARPConnectedSocketImpl::try_connect(const entity_addr_t& peer_addr, const SocketOptions &opts) {
54 worker->center.create_file_event(cm_channel->fd, EVENT_READABLE, cm_con_handler);
55 status = CHANNEL_FD_CREATED;
56 if (rdma_resolve_addr(cm_id, NULL, const_cast<struct sockaddr*>(peer_addr.get_sockaddr()), TIMEOUT_MS)) {
57 lderr(cct) << __func__ << " failed to resolve addr" << dendl;
58 return -1;
59 }
60 return 0;
61}
62
63void RDMAIWARPConnectedSocketImpl::close() {
64 error = ECONNRESET;
65 active = false;
66 if (status >= CONNECTED) {
67 rdma_disconnect(cm_id);
68 }
69 close_notify();
70}
71
72void RDMAIWARPConnectedSocketImpl::shutdown() {
73 error = ECONNRESET;
74 active = false;
75}
76
77void RDMAIWARPConnectedSocketImpl::handle_cm_connection() {
78 struct rdma_cm_event *event;
79 rdma_get_cm_event(cm_channel, &event);
80 ldout(cct, 20) << __func__ << " event name: " << rdma_event_str(event->event)
81 << " (cm id: " << cm_id << ")" << dendl;
82 struct rdma_conn_param cm_params;
83 switch (event->event) {
84 case RDMA_CM_EVENT_ADDR_RESOLVED:
85 status = ADDR_RESOLVED;
86 if (rdma_resolve_route(cm_id, TIMEOUT_MS)) {
87 lderr(cct) << __func__ << " failed to resolve rdma addr" << dendl;
88 notify();
89 }
90 break;
91
92 case RDMA_CM_EVENT_ROUTE_RESOLVED:
93 status = ROUTE_RESOLVED;
94 if (alloc_resource()) {
95 lderr(cct) << __func__ << " failed to alloc resource while resolving the route" << dendl;
96 connected = -ECONNREFUSED;
97 notify();
98 break;
99 }
11fdf7f2 100
92f5a8d4 101 // FIPS zeroization audit 20191115: this memset is not security related.
11fdf7f2
TL
102 memset(&cm_params, 0, sizeof(cm_params));
103 cm_params.retry_count = RETRY_COUNT;
104 cm_params.qp_num = local_qpn;
105 if (rdma_connect(cm_id, &cm_params)) {
106 lderr(cct) << __func__ << " failed to connect remote rdma port" << dendl;
107 connected = -ECONNREFUSED;
108 notify();
109 }
110 break;
111
112 case RDMA_CM_EVENT_ESTABLISHED:
113 ldout(cct, 20) << __func__ << " qp_num=" << cm_id->qp->qp_num << dendl;
114 status = CONNECTED;
115 if (!is_server) {
9f95a23c 116 peer_qpn = event->param.conn.qp_num;
11fdf7f2 117 activate();
9f95a23c
TL
118 qp->get_local_cm_meta().peer_qpn = peer_qpn;
119 qp->get_peer_cm_meta().local_qpn = peer_qpn;
11fdf7f2
TL
120 notify();
121 }
122 break;
123
124 case RDMA_CM_EVENT_ADDR_ERROR:
125 case RDMA_CM_EVENT_ROUTE_ERROR:
126 case RDMA_CM_EVENT_CONNECT_ERROR:
127 case RDMA_CM_EVENT_UNREACHABLE:
128 case RDMA_CM_EVENT_REJECTED:
129 lderr(cct) << __func__ << " rdma connection rejected" << dendl;
130 connected = -ECONNREFUSED;
131 notify();
132 break;
133
134 case RDMA_CM_EVENT_DISCONNECTED:
135 status = DISCONNECTED;
136 close_notify();
137 if (!error) {
138 error = ECONNRESET;
139 notify();
140 }
141 break;
142
143 case RDMA_CM_EVENT_DEVICE_REMOVAL:
144 break;
145
146 default:
147 ceph_abort_msg("unhandled event");
148 break;
149 }
150 rdma_ack_cm_event(event);
151}
152
153void RDMAIWARPConnectedSocketImpl::activate() {
154 ldout(cct, 30) << __func__ << dendl;
155 active = true;
156 connected = 1;
157}
158
159int RDMAIWARPConnectedSocketImpl::alloc_resource() {
160 ldout(cct, 30) << __func__ << dendl;
9f95a23c 161 qp = ib->create_queue_pair(cct, dispatcher->get_tx_cq(),
11fdf7f2
TL
162 dispatcher->get_rx_cq(), IBV_QPT_RC, cm_id);
163 if (!qp) {
164 return -1;
165 }
9f95a23c 166 local_qpn = qp->get_local_qp_number();
11fdf7f2
TL
167 dispatcher->register_qp(qp, this);
168 dispatcher->perf_logger->inc(l_msgr_rdma_created_queue_pair);
169 dispatcher->perf_logger->inc(l_msgr_rdma_active_queue_pair);
170 return 0;
171}
172
173void RDMAIWARPConnectedSocketImpl::close_notify() {
174 ldout(cct, 30) << __func__ << dendl;
175 if (status >= CHANNEL_FD_CREATED) {
176 worker->center.delete_file_event(cm_channel->fd, EVENT_READABLE);
177 }
178 std::unique_lock l(close_mtx);
179 if (!closed) {
180 closed = true;
181 close_condition.notify_all();
182 }
183}