]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | /* | |
4 | * Ceph - scalable distributed file system | |
5 | * | |
6 | * Copyright (C) 2016 XSky <haomai@xsky.com> | |
7 | * | |
8 | * Author: Haomai Wang <haomaiwang@gmail.com> | |
9 | * | |
10 | * This is free software; you can redistribute it and/or | |
11 | * modify it under the terms of the GNU Lesser General Public | |
12 | * License version 2.1, as published by the Free Software | |
13 | * Foundation. See file COPYING. | |
14 | * | |
15 | */ | |
16 | ||
11fdf7f2 TL |
17 | #include <mutex> |
18 | ||
7c673cae FG |
19 | #include "include/compat.h" |
20 | #include "common/Cond.h" | |
21 | #include "common/errno.h" | |
22 | #include "PosixStack.h" | |
23 | #ifdef HAVE_RDMA | |
24 | #include "rdma/RDMAStack.h" | |
25 | #endif | |
26 | #ifdef HAVE_DPDK | |
27 | #include "dpdk/DPDKStack.h" | |
28 | #endif | |
29 | ||
30 | #include "common/dout.h" | |
11fdf7f2 | 31 | #include "include/ceph_assert.h" |
7c673cae FG |
32 | |
33 | #define dout_subsys ceph_subsys_ms | |
34 | #undef dout_prefix | |
35 | #define dout_prefix *_dout << "stack " | |
36 | ||
37 | std::function<void ()> NetworkStack::add_thread(unsigned i) | |
38 | { | |
39 | Worker *w = workers[i]; | |
40 | return [this, w]() { | |
41 | char tp_name[16]; | |
11fdf7f2 | 42 | sprintf(tp_name, "msgr-worker-%u", w->id); |
7c673cae | 43 | ceph_pthread_setname(pthread_self(), tp_name); |
11fdf7f2 | 44 | const unsigned EventMaxWaitUs = 30000000; |
7c673cae FG |
45 | w->center.set_owner(); |
46 | ldout(cct, 10) << __func__ << " starting" << dendl; | |
47 | w->initialize(); | |
48 | w->init_done(); | |
49 | while (!w->done) { | |
50 | ldout(cct, 30) << __func__ << " calling event process" << dendl; | |
51 | ||
31f18b77 FG |
52 | ceph::timespan dur; |
53 | int r = w->center.process_events(EventMaxWaitUs, &dur); | |
7c673cae FG |
54 | if (r < 0) { |
55 | ldout(cct, 20) << __func__ << " process events failed: " | |
56 | << cpp_strerror(errno) << dendl; | |
57 | // TODO do something? | |
58 | } | |
31f18b77 | 59 | w->perf_logger->tinc(l_msgr_running_total_time, dur); |
7c673cae FG |
60 | } |
61 | w->reset(); | |
62 | w->destroy(); | |
63 | }; | |
64 | } | |
65 | ||
66 | std::shared_ptr<NetworkStack> NetworkStack::create(CephContext *c, const string &t) | |
67 | { | |
68 | if (t == "posix") | |
69 | return std::make_shared<PosixNetworkStack>(c, t); | |
70 | #ifdef HAVE_RDMA | |
71 | else if (t == "rdma") | |
72 | return std::make_shared<RDMAStack>(c, t); | |
73 | #endif | |
74 | #ifdef HAVE_DPDK | |
75 | else if (t == "dpdk") | |
76 | return std::make_shared<DPDKStack>(c, t); | |
77 | #endif | |
78 | ||
79 | lderr(c) << __func__ << " ms_async_transport_type " << t << | |
80 | " is not supported! " << dendl; | |
81 | ceph_abort(); | |
82 | return nullptr; | |
83 | } | |
84 | ||
85 | Worker* NetworkStack::create_worker(CephContext *c, const string &type, unsigned i) | |
86 | { | |
87 | if (type == "posix") | |
88 | return new PosixWorker(c, i); | |
89 | #ifdef HAVE_RDMA | |
90 | else if (type == "rdma") | |
91 | return new RDMAWorker(c, i); | |
92 | #endif | |
93 | #ifdef HAVE_DPDK | |
94 | else if (type == "dpdk") | |
95 | return new DPDKWorker(c, i); | |
96 | #endif | |
97 | ||
98 | lderr(c) << __func__ << " ms_async_transport_type " << type << | |
99 | " is not supported! " << dendl; | |
100 | ceph_abort(); | |
101 | return nullptr; | |
102 | } | |
103 | ||
104 | NetworkStack::NetworkStack(CephContext *c, const string &t): type(t), started(false), cct(c) | |
105 | { | |
11fdf7f2 | 106 | ceph_assert(cct->_conf->ms_async_op_threads > 0); |
31f18b77 | 107 | |
11fdf7f2 | 108 | const int InitEventNumber = 5000; |
7c673cae FG |
109 | num_workers = cct->_conf->ms_async_op_threads; |
110 | if (num_workers >= EventCenter::MAX_EVENTCENTER) { | |
111 | ldout(cct, 0) << __func__ << " max thread limit is " | |
112 | << EventCenter::MAX_EVENTCENTER << ", switching to this now. " | |
113 | << "Higher thread values are unnecessary and currently unsupported." | |
114 | << dendl; | |
115 | num_workers = EventCenter::MAX_EVENTCENTER; | |
116 | } | |
117 | ||
118 | for (unsigned i = 0; i < num_workers; ++i) { | |
119 | Worker *w = create_worker(cct, type, i); | |
120 | w->center.init(InitEventNumber, i, type); | |
121 | workers.push_back(w); | |
122 | } | |
7c673cae FG |
123 | } |
124 | ||
125 | void NetworkStack::start() | |
126 | { | |
11fdf7f2 TL |
127 | std::unique_lock<decltype(pool_spin)> lk(pool_spin); |
128 | ||
7c673cae | 129 | if (started) { |
7c673cae FG |
130 | return ; |
131 | } | |
132 | ||
133 | for (unsigned i = 0; i < num_workers; ++i) { | |
134 | if (workers[i]->is_init()) | |
135 | continue; | |
136 | std::function<void ()> thread = add_thread(i); | |
137 | spawn_worker(i, std::move(thread)); | |
138 | } | |
139 | started = true; | |
11fdf7f2 | 140 | lk.unlock(); |
7c673cae FG |
141 | |
142 | for (unsigned i = 0; i < num_workers; ++i) | |
143 | workers[i]->wait_for_init(); | |
144 | } | |
145 | ||
146 | Worker* NetworkStack::get_worker() | |
147 | { | |
148 | ldout(cct, 30) << __func__ << dendl; | |
149 | ||
150 | // start with some reasonably large number | |
151 | unsigned min_load = std::numeric_limits<int>::max(); | |
152 | Worker* current_best = nullptr; | |
153 | ||
154 | pool_spin.lock(); | |
155 | // find worker with least references | |
156 | // tempting case is returning on references == 0, but in reality | |
157 | // this will happen so rarely that there's no need for special case. | |
158 | for (unsigned i = 0; i < num_workers; ++i) { | |
159 | unsigned worker_load = workers[i]->references.load(); | |
160 | if (worker_load < min_load) { | |
161 | current_best = workers[i]; | |
162 | min_load = worker_load; | |
163 | } | |
164 | } | |
165 | ||
166 | pool_spin.unlock(); | |
11fdf7f2 | 167 | ceph_assert(current_best); |
7c673cae FG |
168 | ++current_best->references; |
169 | return current_best; | |
170 | } | |
171 | ||
172 | void NetworkStack::stop() | |
173 | { | |
11fdf7f2 | 174 | std::lock_guard<decltype(pool_spin)> lk(pool_spin); |
7c673cae FG |
175 | for (unsigned i = 0; i < num_workers; ++i) { |
176 | workers[i]->done = true; | |
177 | workers[i]->center.wakeup(); | |
178 | join_worker(i); | |
179 | } | |
180 | started = false; | |
181 | } | |
182 | ||
183 | class C_drain : public EventCallback { | |
184 | Mutex drain_lock; | |
185 | Cond drain_cond; | |
186 | unsigned drain_count; | |
187 | ||
188 | public: | |
189 | explicit C_drain(size_t c) | |
190 | : drain_lock("C_drain::drain_lock"), | |
191 | drain_count(c) {} | |
11fdf7f2 | 192 | void do_request(uint64_t id) override { |
7c673cae FG |
193 | Mutex::Locker l(drain_lock); |
194 | drain_count--; | |
195 | if (drain_count == 0) drain_cond.Signal(); | |
196 | } | |
197 | void wait() { | |
198 | Mutex::Locker l(drain_lock); | |
199 | while (drain_count) | |
200 | drain_cond.Wait(drain_lock); | |
201 | } | |
202 | }; | |
203 | ||
204 | void NetworkStack::drain() | |
205 | { | |
206 | ldout(cct, 30) << __func__ << " started." << dendl; | |
207 | pthread_t cur = pthread_self(); | |
208 | pool_spin.lock(); | |
209 | C_drain drain(num_workers); | |
210 | for (unsigned i = 0; i < num_workers; ++i) { | |
11fdf7f2 | 211 | ceph_assert(cur != workers[i]->center.get_owner()); |
7c673cae FG |
212 | workers[i]->center.dispatch_event_external(EventCallbackRef(&drain)); |
213 | } | |
214 | pool_spin.unlock(); | |
215 | drain.wait(); | |
216 | ldout(cct, 30) << __func__ << " end." << dendl; | |
217 | } |