]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | #include "RDMAStack.h" |
2 | ||
3 | #define dout_subsys ceph_subsys_ms | |
4 | #undef dout_prefix | |
5 | #define dout_prefix *_dout << " RDMAIWARPConnectedSocketImpl " | |
6 | ||
7 | #define TIMEOUT_MS 3000 | |
8 | #define RETRY_COUNT 7 | |
9 | ||
9f95a23c TL |
10 | RDMAIWARPConnectedSocketImpl::RDMAIWARPConnectedSocketImpl(CephContext *cct, shared_ptr<Infiniband>& ib, |
11 | shared_ptr<RDMADispatcher>& rdma_dispatcher, | |
12 | RDMAWorker *w, RDMACMInfo *info) | |
13 | : RDMAConnectedSocketImpl(cct, ib, rdma_dispatcher, w), cm_con_handler(new C_handle_cm_connection(this)) | |
11fdf7f2 TL |
14 | { |
15 | status = IDLE; | |
16 | notify_fd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK); | |
17 | if (info) { | |
18 | is_server = true; | |
19 | cm_id = info->cm_id; | |
20 | cm_channel = info->cm_channel; | |
21 | status = RDMA_ID_CREATED; | |
9f95a23c | 22 | peer_qpn = info->qp_num; |
11fdf7f2 TL |
23 | if (alloc_resource()) { |
24 | close_notify(); | |
25 | return; | |
26 | } | |
27 | worker->center.submit_to(worker->center.get_id(), [this]() { | |
28 | worker->center.create_file_event(cm_channel->fd, EVENT_READABLE, cm_con_handler); | |
29 | status = CHANNEL_FD_CREATED; | |
30 | }, false); | |
31 | status = RESOURCE_ALLOCATED; | |
9f95a23c TL |
32 | qp->get_local_cm_meta().peer_qpn = peer_qpn; |
33 | qp->get_peer_cm_meta().local_qpn = peer_qpn; | |
11fdf7f2 TL |
34 | } else { |
35 | is_server = false; | |
36 | cm_channel = rdma_create_event_channel(); | |
37 | rdma_create_id(cm_channel, &cm_id, NULL, RDMA_PS_TCP); | |
38 | status = RDMA_ID_CREATED; | |
39 | ldout(cct, 20) << __func__ << " successfully created cm id: " << cm_id << dendl; | |
40 | } | |
41 | } | |
42 | ||
43 | RDMAIWARPConnectedSocketImpl::~RDMAIWARPConnectedSocketImpl() { | |
44 | ldout(cct, 20) << __func__ << " destruct." << dendl; | |
45 | std::unique_lock l(close_mtx); | |
46 | close_condition.wait(l, [&] { return closed; }); | |
47 | if (status >= RDMA_ID_CREATED) { | |
48 | rdma_destroy_id(cm_id); | |
49 | rdma_destroy_event_channel(cm_channel); | |
50 | } | |
51 | } | |
52 | ||
53 | int RDMAIWARPConnectedSocketImpl::try_connect(const entity_addr_t& peer_addr, const SocketOptions &opts) { | |
54 | worker->center.create_file_event(cm_channel->fd, EVENT_READABLE, cm_con_handler); | |
55 | status = CHANNEL_FD_CREATED; | |
56 | if (rdma_resolve_addr(cm_id, NULL, const_cast<struct sockaddr*>(peer_addr.get_sockaddr()), TIMEOUT_MS)) { | |
57 | lderr(cct) << __func__ << " failed to resolve addr" << dendl; | |
58 | return -1; | |
59 | } | |
60 | return 0; | |
61 | } | |
62 | ||
63 | void RDMAIWARPConnectedSocketImpl::close() { | |
64 | error = ECONNRESET; | |
65 | active = false; | |
66 | if (status >= CONNECTED) { | |
67 | rdma_disconnect(cm_id); | |
68 | } | |
69 | close_notify(); | |
70 | } | |
71 | ||
72 | void RDMAIWARPConnectedSocketImpl::shutdown() { | |
73 | error = ECONNRESET; | |
74 | active = false; | |
75 | } | |
76 | ||
77 | void RDMAIWARPConnectedSocketImpl::handle_cm_connection() { | |
78 | struct rdma_cm_event *event; | |
79 | rdma_get_cm_event(cm_channel, &event); | |
80 | ldout(cct, 20) << __func__ << " event name: " << rdma_event_str(event->event) | |
81 | << " (cm id: " << cm_id << ")" << dendl; | |
82 | struct rdma_conn_param cm_params; | |
83 | switch (event->event) { | |
84 | case RDMA_CM_EVENT_ADDR_RESOLVED: | |
85 | status = ADDR_RESOLVED; | |
86 | if (rdma_resolve_route(cm_id, TIMEOUT_MS)) { | |
87 | lderr(cct) << __func__ << " failed to resolve rdma addr" << dendl; | |
88 | notify(); | |
89 | } | |
90 | break; | |
91 | ||
92 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | |
93 | status = ROUTE_RESOLVED; | |
94 | if (alloc_resource()) { | |
95 | lderr(cct) << __func__ << " failed to alloc resource while resolving the route" << dendl; | |
96 | connected = -ECONNREFUSED; | |
97 | notify(); | |
98 | break; | |
99 | } | |
11fdf7f2 | 100 | |
92f5a8d4 | 101 | // FIPS zeroization audit 20191115: this memset is not security related. |
11fdf7f2 TL |
102 | memset(&cm_params, 0, sizeof(cm_params)); |
103 | cm_params.retry_count = RETRY_COUNT; | |
104 | cm_params.qp_num = local_qpn; | |
105 | if (rdma_connect(cm_id, &cm_params)) { | |
106 | lderr(cct) << __func__ << " failed to connect remote rdma port" << dendl; | |
107 | connected = -ECONNREFUSED; | |
108 | notify(); | |
109 | } | |
110 | break; | |
111 | ||
112 | case RDMA_CM_EVENT_ESTABLISHED: | |
113 | ldout(cct, 20) << __func__ << " qp_num=" << cm_id->qp->qp_num << dendl; | |
114 | status = CONNECTED; | |
115 | if (!is_server) { | |
9f95a23c | 116 | peer_qpn = event->param.conn.qp_num; |
11fdf7f2 | 117 | activate(); |
9f95a23c TL |
118 | qp->get_local_cm_meta().peer_qpn = peer_qpn; |
119 | qp->get_peer_cm_meta().local_qpn = peer_qpn; | |
11fdf7f2 TL |
120 | notify(); |
121 | } | |
122 | break; | |
123 | ||
124 | case RDMA_CM_EVENT_ADDR_ERROR: | |
125 | case RDMA_CM_EVENT_ROUTE_ERROR: | |
126 | case RDMA_CM_EVENT_CONNECT_ERROR: | |
127 | case RDMA_CM_EVENT_UNREACHABLE: | |
128 | case RDMA_CM_EVENT_REJECTED: | |
129 | lderr(cct) << __func__ << " rdma connection rejected" << dendl; | |
130 | connected = -ECONNREFUSED; | |
131 | notify(); | |
132 | break; | |
133 | ||
134 | case RDMA_CM_EVENT_DISCONNECTED: | |
135 | status = DISCONNECTED; | |
136 | close_notify(); | |
137 | if (!error) { | |
138 | error = ECONNRESET; | |
139 | notify(); | |
140 | } | |
141 | break; | |
142 | ||
143 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | |
144 | break; | |
145 | ||
146 | default: | |
147 | ceph_abort_msg("unhandled event"); | |
148 | break; | |
149 | } | |
150 | rdma_ack_cm_event(event); | |
151 | } | |
152 | ||
153 | void RDMAIWARPConnectedSocketImpl::activate() { | |
154 | ldout(cct, 30) << __func__ << dendl; | |
155 | active = true; | |
156 | connected = 1; | |
157 | } | |
158 | ||
159 | int RDMAIWARPConnectedSocketImpl::alloc_resource() { | |
160 | ldout(cct, 30) << __func__ << dendl; | |
9f95a23c | 161 | qp = ib->create_queue_pair(cct, dispatcher->get_tx_cq(), |
11fdf7f2 TL |
162 | dispatcher->get_rx_cq(), IBV_QPT_RC, cm_id); |
163 | if (!qp) { | |
164 | return -1; | |
165 | } | |
9f95a23c | 166 | local_qpn = qp->get_local_qp_number(); |
11fdf7f2 TL |
167 | dispatcher->register_qp(qp, this); |
168 | dispatcher->perf_logger->inc(l_msgr_rdma_created_queue_pair); | |
169 | dispatcher->perf_logger->inc(l_msgr_rdma_active_queue_pair); | |
170 | return 0; | |
171 | } | |
172 | ||
173 | void RDMAIWARPConnectedSocketImpl::close_notify() { | |
174 | ldout(cct, 30) << __func__ << dendl; | |
175 | if (status >= CHANNEL_FD_CREATED) { | |
176 | worker->center.delete_file_event(cm_channel->fd, EVENT_READABLE); | |
177 | } | |
178 | std::unique_lock l(close_mtx); | |
179 | if (!closed) { | |
180 | closed = true; | |
181 | close_condition.notify_all(); | |
182 | } | |
183 | } |