]>
Commit | Line | Data |
---|---|---|
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- | |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2016 XSKY <haomai@xsky.com> | |
7 | * | |
8 | * Author: Haomai Wang <haomaiwang@gmail.com> | |
9 | * | |
10 | * This is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License version 2.1, as published by the Free Software | |
13 | * Foundation. See file COPYING. | |
14 | * | |
15 | */ | |
16 | ||
17 | #ifndef CEPH_RDMA_DEVICE_H | |
18 | #define CEPH_RDMA_DEVICE_H | |
19 | ||
20 | #include <infiniband/verbs.h> | |
21 | ||
22 | #include <string> | |
23 | #include <vector> | |
24 | ||
25 | #include "include/int_types.h" | |
26 | #include "include/page.h" | |
27 | #include "common/debug.h" | |
28 | #include "common/errno.h" | |
29 | #include "msg/msg_types.h" | |
30 | #include "msg/async/net_handler.h" | |
31 | #include "common/Mutex.h" | |
32 | #include "msg/async/Event.h" | |
33 | ||
34 | typedef Infiniband::QueuePair QueuePair; | |
35 | typedef Infiniband::CompletionChannel CompletionChannel; | |
36 | typedef Infiniband::CompletionQueue CompletionQueue; | |
37 | typedef Infiniband::ProtectionDomain ProtectionDomain; | |
38 | typedef Infiniband::MemoryManager::Cluster Cluster; | |
39 | typedef Infiniband::MemoryManager::Chunk Chunk; | |
40 | typedef Infiniband::MemoryManager MemoryManager; | |
41 | ||
42 | class Port { | |
43 | struct ibv_context* ctxt; | |
44 | int port_num; | |
45 | struct ibv_port_attr* port_attr; | |
46 | uint16_t lid; | |
47 | int gid_idx = 0; | |
48 | union ibv_gid gid; | |
49 | ||
50 | public: | |
51 | explicit Port(CephContext *cct, struct ibv_context* ictxt, uint8_t ipn); | |
52 | ~Port(); | |
53 | uint16_t get_lid() { return lid; } | |
54 | ibv_gid get_gid() { return gid; } | |
55 | int get_port_num() { return port_num; } | |
56 | ibv_port_attr* get_port_attr() { return port_attr; } | |
57 | int get_gid_idx() { return gid_idx; } | |
58 | }; | |
59 | ||
60 | ||
61 | class Device { | |
62 | class C_handle_cq_async : public EventCallback { | |
63 | Device *device; | |
64 | public: | |
65 | C_handle_cq_async(Device *d): device(d) {} | |
66 | void do_request(int fd) { | |
67 | device->handle_async_event(); | |
68 | } | |
69 | }; | |
70 | ||
71 | CephContext *cct; | |
72 | ibv_device *device; | |
73 | const char *name; | |
74 | ||
75 | Port **ports; // Array of Port objects. index is 1 based (IB port #1 is in | |
76 | // index 1). Index 0 is not used | |
77 | ||
78 | int port_cnt; | |
79 | ||
80 | uint32_t max_send_wr; | |
81 | uint32_t max_recv_wr; | |
82 | uint32_t max_sge; | |
83 | ||
84 | Mutex lock; // Protects from concurrent intialization of the device | |
85 | bool initialized = false; | |
86 | EventCallbackRef async_handler; | |
87 | Infiniband *infiniband; | |
88 | ||
89 | void verify_port(int port_num); | |
90 | ||
91 | public: | |
92 | explicit Device(CephContext *c, Infiniband *ib, ibv_device* d); | |
93 | ~Device(); | |
94 | ||
95 | void init(int ibport = -1); | |
96 | void uninit(); | |
97 | ||
98 | void handle_async_event(); | |
99 | ||
100 | const char* get_name() const { return name;} | |
101 | ||
102 | Port *get_port(int ibport); | |
103 | uint16_t get_lid(int p) { return get_port(p)->get_lid(); } | |
104 | ibv_gid get_gid(int p) { return get_port(p)->get_gid(); } | |
105 | int get_gid_idx(int p) { return get_port(p)->get_gid_idx(); } | |
106 | ||
107 | QueuePair *create_queue_pair(int port, | |
108 | ibv_qp_type type); | |
109 | ibv_srq* create_shared_receive_queue(uint32_t max_wr, uint32_t max_sge); | |
110 | CompletionChannel *create_comp_channel(CephContext *c); | |
111 | CompletionQueue *create_comp_queue(CephContext *c, CompletionChannel *cc=NULL); | |
112 | int post_chunk(Chunk* chunk); | |
113 | int post_channel_cluster(); | |
114 | ||
115 | MemoryManager* get_memory_manager() { return memory_manager; } | |
116 | bool is_tx_buffer(const char* c) { return memory_manager->is_tx_buffer(c);} | |
117 | bool is_rx_buffer(const char* c) { return memory_manager->is_rx_buffer(c);} | |
118 | Chunk *get_tx_chunk_by_buffer(const char *c) { return memory_manager->get_tx_chunk_by_buffer(c); } | |
119 | int get_tx_buffers(std::vector<Chunk*> &c, size_t bytes); | |
120 | int poll_tx_cq(int n, ibv_wc *wc); | |
121 | int poll_rx_cq(int n, ibv_wc *wc); | |
122 | void rearm_cqs(); | |
123 | ||
124 | struct ibv_context *ctxt; | |
125 | ibv_device_attr *device_attr; | |
126 | ||
127 | MemoryManager* memory_manager = nullptr; | |
128 | ibv_srq *srq = nullptr; | |
129 | Infiniband::CompletionQueue *rx_cq = nullptr; | |
130 | Infiniband::CompletionChannel *rx_cc = nullptr; | |
131 | Infiniband::CompletionQueue *tx_cq = nullptr; | |
132 | Infiniband::CompletionChannel *tx_cc = nullptr; | |
133 | ProtectionDomain *pd = nullptr; | |
134 | }; | |
135 | ||
136 | inline ostream& operator<<(ostream& out, const Device &d) | |
137 | { | |
138 | return out << d.get_name(); | |
139 | } | |
140 | ||
141 | ||
142 | class DeviceList { | |
143 | CephContext *cct; | |
144 | struct ibv_device ** device_list; | |
145 | int num; | |
146 | Device** devices; | |
147 | ||
148 | unsigned last_poll_dev = 0; | |
149 | struct pollfd *poll_fds; | |
150 | ||
151 | public: | |
152 | DeviceList(CephContext *cct, Infiniband *ib); | |
153 | ~DeviceList(); | |
154 | ||
155 | Device* get_device(const char* device_name); | |
156 | Device* get_device(const struct ibv_context *ctxt); | |
157 | ||
158 | void uninit(); | |
159 | ||
160 | void rearm_notify(); | |
161 | int poll_tx(int n, Device **d, ibv_wc *wc); | |
162 | int poll_rx(int n, Device **d, ibv_wc *wc); | |
163 | int poll_blocking(bool &done); | |
164 | ||
165 | void handle_async_event(); | |
166 | }; | |
167 | ||
168 | #endif |