]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* |
2 | * This file is open source software, licensed to you under the terms | |
3 | * of the Apache License, Version 2.0 (the "License"). See the NOTICE file | |
4 | * distributed with this work for additional information regarding copyright | |
5 | * ownership. You may not use this file except in compliance with the License. | |
6 | * | |
7 | * You may obtain a copy of the License at | |
8 | * | |
9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
10 | * | |
11 | * Unless required by applicable law or agreed to in writing, | |
12 | * software distributed under the License is distributed on an | |
13 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
14 | * KIND, either express or implied. See the License for the | |
15 | * specific language governing permissions and limitations | |
16 | * under the License. | |
17 | */ | |
18 | /* | |
19 | * Copyright (C) 2018 ScyllaDB | |
20 | * | |
21 | * The goal of this program is to allow a user to properly configure the Seastar I/O | |
22 | * scheduler. | |
23 | */ | |
24 | #include <iostream> | |
25 | #include <chrono> | |
26 | #include <random> | |
27 | #include <memory> | |
28 | #include <vector> | |
29 | #include <cmath> | |
30 | #include <sys/vfs.h> | |
31 | #include <sys/sysmacros.h> | |
11fdf7f2 TL |
32 | #include <boost/range/irange.hpp> |
33 | #include <boost/program_options.hpp> | |
34 | #include <boost/iterator/counting_iterator.hpp> | |
35 | #include <fstream> | |
36 | #include <wordexp.h> | |
37 | #include <yaml-cpp/yaml.h> | |
9f95a23c | 38 | #include <fmt/printf.h> |
11fdf7f2 TL |
39 | #include <seastar/core/thread.hh> |
40 | #include <seastar/core/sstring.hh> | |
41 | #include <seastar/core/posix.hh> | |
42 | #include <seastar/core/resource.hh> | |
43 | #include <seastar/core/aligned_buffer.hh> | |
44 | #include <seastar/core/sharded.hh> | |
45 | #include <seastar/core/app-template.hh> | |
46 | #include <seastar/core/shared_ptr.hh> | |
47 | #include <seastar/core/fsqual.hh> | |
48 | #include <seastar/util/defer.hh> | |
49 | #include <seastar/util/log.hh> | |
50 | #include <seastar/util/std-compat.hh> | |
51 | #include <seastar/util/read_first_line.hh> | |
52 | ||
53 | using namespace seastar; | |
54 | using namespace std::chrono_literals; | |
9f95a23c | 55 | namespace fs = seastar::compat::filesystem; |
11fdf7f2 TL |
56 | |
57 | logger iotune_logger("iotune"); | |
58 | ||
59 | using iotune_clock = std::chrono::steady_clock; | |
60 | static thread_local std::default_random_engine random_generator(std::chrono::duration_cast<std::chrono::nanoseconds>(iotune_clock::now().time_since_epoch()).count()); | |
61 | ||
11fdf7f2 TL |
62 | void check_device_properties(fs::path dev_sys_file) { |
63 | auto sched_file = dev_sys_file / "queue" / "scheduler"; | |
64 | auto sched_string = read_first_line(sched_file); | |
65 | auto beg = sched_string.find('['); | |
66 | size_t len = sched_string.size(); | |
67 | if (beg == sstring::npos) { | |
68 | beg = 0; | |
69 | } else { | |
70 | auto end = sched_string.find(']'); | |
71 | if (end != sstring::npos) { | |
72 | len = end - beg - 1; | |
73 | } | |
74 | beg++; | |
75 | } | |
76 | auto scheduler = sched_string.substr(beg, len); | |
77 | if ((scheduler != "noop") && (scheduler != "none")) { | |
78 | iotune_logger.warn("Scheduler for {} set to {}. It is recommend to set it to noop before evaluation so as not to skew the results.", | |
79 | sched_file.string(), scheduler); | |
80 | } | |
81 | ||
82 | auto nomerges_file = dev_sys_file / "queue" / "nomerges"; | |
9f95a23c | 83 | auto nomerges = read_first_line_as<unsigned>(nomerges_file); |
11fdf7f2 TL |
84 | if (nomerges != 2u) { |
85 | iotune_logger.warn("nomerges for {} set to {}. It is recommend to set it to 2 before evaluation so that merges are disabled. Results can be skewed otherwise.", | |
86 | nomerges_file.string(), nomerges); | |
87 | } | |
88 | } | |
89 | ||
90 | struct evaluation_directory { | |
91 | sstring _name; | |
92 | // We know that if we issue more than this, they will be blocked on linux anyway. | |
93 | unsigned _max_iodepth = 0; | |
94 | uint64_t _available_space; | |
95 | uint64_t _min_data_transfer_size = 512; | |
96 | unsigned _disks_per_array = 0; | |
97 | ||
98 | void scan_device(unsigned dev_maj, unsigned dev_min) { | |
99 | scan_device(fmt::format("{}:{}", dev_maj, dev_min)); | |
100 | } | |
101 | ||
102 | void scan_device(std::string dev_str) { | |
103 | scan_device(fs::path("/sys/dev/block") / dev_str); | |
104 | } | |
105 | ||
106 | void scan_device(fs::path sys_file) { | |
107 | try { | |
108 | sys_file = fs::canonical(sys_file); | |
109 | bool is_leaf = true; | |
110 | if (fs::exists(sys_file / "slaves")) { | |
111 | for (auto& dev : fs::directory_iterator(sys_file / "slaves")) { | |
112 | is_leaf = false; | |
9f95a23c | 113 | scan_device(read_first_line(dev.path() / "dev")); |
11fdf7f2 TL |
114 | } |
115 | } | |
116 | ||
117 | // our work is done if not leaf. We'll tune the leaves | |
118 | if (!is_leaf) { | |
119 | return; | |
120 | } | |
121 | ||
122 | if (fs::exists(sys_file / "partition")) { | |
123 | scan_device(sys_file.remove_filename()); | |
124 | } else { | |
125 | check_device_properties(sys_file); | |
126 | auto queue_dir = sys_file / "queue"; | |
9f95a23c | 127 | auto disk_min_io_size = read_first_line_as<uint64_t>(queue_dir / "minimum_io_size"); |
11fdf7f2 TL |
128 | |
129 | _min_data_transfer_size = std::max(_min_data_transfer_size, disk_min_io_size); | |
9f95a23c | 130 | _max_iodepth += read_first_line_as<uint64_t>(queue_dir / "nr_requests"); |
11fdf7f2 TL |
131 | _disks_per_array++; |
132 | } | |
133 | } catch (std::system_error& se) { | |
134 | iotune_logger.error("Error while parsing sysfs. Will continue with guessed values: {}", se.what()); | |
135 | _max_iodepth = 128; | |
136 | } | |
137 | _disks_per_array = std::max(_disks_per_array, 1u); | |
138 | } | |
139 | public: | |
140 | evaluation_directory(sstring name) | |
141 | : _name(name) | |
142 | , _available_space(fs::space(fs::path(_name)).available) | |
143 | {} | |
144 | ||
145 | unsigned max_iodepth() const { | |
146 | return _max_iodepth; | |
147 | } | |
148 | ||
149 | fs::path path() const { | |
150 | return fs::path(_name); | |
151 | } | |
152 | ||
153 | const sstring& name() const { | |
154 | return _name; | |
155 | } | |
156 | ||
157 | unsigned disks_per_array() const { | |
158 | return _disks_per_array; | |
159 | } | |
160 | ||
161 | uint64_t minimum_io_size() const { | |
162 | return _min_data_transfer_size; | |
163 | } | |
164 | ||
165 | future<> discover_directory() { | |
166 | return seastar::async([this] { | |
167 | auto f = open_directory(_name).get0(); | |
168 | auto st = f.stat().get0(); | |
169 | f.close().get(); | |
170 | ||
171 | scan_device(major(st.st_dev), minor(st.st_dev)); | |
172 | }); | |
173 | } | |
174 | ||
175 | uint64_t available_space() const { | |
176 | return _available_space; | |
177 | } | |
178 | }; | |
179 | ||
180 | struct io_rates { | |
181 | float bytes_per_sec = 0; | |
182 | float iops = 0; | |
183 | io_rates operator+(const io_rates& a) const { | |
184 | return io_rates{bytes_per_sec + a.bytes_per_sec, iops + a.iops}; | |
185 | } | |
186 | ||
187 | io_rates& operator+=(const io_rates& a) { | |
188 | bytes_per_sec += a.bytes_per_sec; | |
189 | iops += a.iops; | |
190 | return *this; | |
191 | } | |
192 | }; | |
193 | ||
194 | class invalid_position : public std::exception { | |
195 | public: | |
196 | virtual const char* what() const noexcept { | |
197 | return "file access position invalid"; | |
198 | } | |
199 | }; | |
200 | ||
201 | struct position_generator { | |
202 | virtual uint64_t get_pos() = 0; | |
203 | virtual bool is_sequential() const = 0; | |
204 | virtual ~position_generator() {} | |
205 | }; | |
206 | ||
207 | class sequential_issuer : public position_generator { | |
208 | size_t _buffer_size; | |
209 | uint64_t _position = 0; | |
210 | uint64_t _size_limit; | |
211 | public: | |
212 | sequential_issuer(size_t buffer_size, uint64_t size_limit) | |
213 | : _buffer_size(buffer_size) | |
214 | , _size_limit(size_limit) | |
215 | {} | |
216 | ||
217 | virtual bool is_sequential() const { | |
218 | return true; | |
219 | } | |
220 | ||
221 | virtual uint64_t get_pos() { | |
222 | if (_position >= _size_limit) { | |
9f95a23c TL |
223 | // Wrap around if reaching EOF. The write bandwidth is lower, |
224 | // and we also split the write bandwidth among shards, while we | |
225 | // read only from shard 0, so shard 0's file may not be large | |
226 | // enough to read from. | |
227 | _position = 0; | |
11fdf7f2 TL |
228 | } |
229 | auto pos = _position; | |
230 | _position += _buffer_size; | |
231 | return pos; | |
232 | } | |
233 | }; | |
234 | ||
235 | class random_issuer : public position_generator { | |
236 | size_t _buffer_size; | |
237 | uint64_t _last_position; | |
238 | std::uniform_int_distribution<uint64_t> _pos_distribution; | |
239 | public: | |
240 | random_issuer(size_t buffer_size, uint64_t last_position) | |
241 | : _buffer_size(buffer_size) | |
242 | , _last_position(last_position) | |
243 | , _pos_distribution(0, (last_position / buffer_size) - 1) | |
244 | {} | |
245 | ||
246 | virtual bool is_sequential() const { | |
247 | return false; | |
248 | } | |
249 | ||
250 | virtual uint64_t get_pos() { | |
251 | uint64_t pos = _pos_distribution(random_generator) * _buffer_size; | |
252 | if (pos >= _last_position) { | |
253 | throw invalid_position(); | |
254 | } | |
255 | return pos; | |
256 | } | |
257 | }; | |
258 | ||
259 | class request_issuer { | |
260 | public: | |
261 | virtual future<size_t> issue_request(uint64_t pos, char* buf, uint64_t size) = 0; | |
262 | virtual ~request_issuer() {} | |
263 | }; | |
264 | ||
265 | ||
266 | class write_request_issuer : public request_issuer { | |
267 | file _file; | |
268 | public: | |
269 | explicit write_request_issuer(file f) : _file(f) {} | |
270 | future<size_t> issue_request(uint64_t pos, char* buf, uint64_t size) override { | |
271 | return _file.dma_write(pos, buf, size); | |
272 | } | |
273 | }; | |
274 | ||
275 | class read_request_issuer : public request_issuer { | |
276 | file _file; | |
277 | public: | |
278 | explicit read_request_issuer(file f) : _file(f) {} | |
279 | future<size_t> issue_request(uint64_t pos, char* buf, uint64_t size) override { | |
280 | return _file.dma_read(pos, buf, size); | |
281 | } | |
282 | }; | |
283 | ||
284 | class io_worker { | |
285 | uint64_t _bytes = 0; | |
286 | unsigned _requests = 0; | |
287 | size_t _buffer_size; | |
11fdf7f2 TL |
288 | std::chrono::time_point<iotune_clock, std::chrono::duration<double>> _start_measuring; |
289 | std::chrono::time_point<iotune_clock, std::chrono::duration<double>> _end_measuring; | |
290 | std::chrono::time_point<iotune_clock, std::chrono::duration<double>> _end_load; | |
291 | // track separately because in the sequential case we may exhaust the file before _duration | |
292 | std::chrono::time_point<iotune_clock, std::chrono::duration<double>> _last_time_seen; | |
293 | ||
294 | std::unique_ptr<position_generator> _pos_impl; | |
295 | std::unique_ptr<request_issuer> _req_impl; | |
296 | public: | |
297 | bool is_sequential() const { | |
298 | return _pos_impl->is_sequential(); | |
299 | } | |
300 | ||
301 | bool should_stop() const { | |
302 | return iotune_clock::now() >= _end_load; | |
303 | } | |
304 | ||
305 | io_worker(size_t buffer_size, std::chrono::duration<double> duration, std::unique_ptr<request_issuer> reqs, std::unique_ptr<position_generator> pos) | |
306 | : _buffer_size(buffer_size) | |
11fdf7f2 TL |
307 | , _start_measuring(iotune_clock::now() + std::chrono::duration<double>(10ms)) |
308 | , _end_measuring(_start_measuring + duration) | |
309 | , _end_load(_end_measuring + 10ms) | |
310 | , _last_time_seen(_start_measuring) | |
311 | , _pos_impl(std::move(pos)) | |
312 | , _req_impl(std::move(reqs)) | |
313 | {} | |
314 | ||
315 | std::unique_ptr<char[], free_deleter> get_buffer() { | |
316 | return allocate_aligned_buffer<char>(_buffer_size, _buffer_size); | |
317 | } | |
318 | ||
319 | future<> issue_request(char* buf) { | |
320 | return _req_impl->issue_request(_pos_impl->get_pos(), buf, _buffer_size).then([this] (size_t size) { | |
321 | auto now = iotune_clock::now(); | |
322 | if ((now > _start_measuring) && (now < _end_measuring)) { | |
323 | _last_time_seen = now; | |
324 | _bytes += size; | |
325 | _requests++; | |
326 | } | |
327 | }); | |
328 | } | |
329 | ||
330 | uint64_t bytes() const { | |
331 | return _bytes; | |
332 | } | |
333 | ||
334 | io_rates get_io_rates() const { | |
335 | io_rates rates; | |
336 | auto t = _last_time_seen - _start_measuring; | |
337 | if (!t.count()) { | |
338 | throw std::runtime_error("No data collected"); | |
339 | } | |
340 | rates.bytes_per_sec = _bytes / t.count(); | |
341 | rates.iops = _requests / t.count(); | |
342 | return rates; | |
343 | } | |
344 | }; | |
345 | ||
346 | class test_file { | |
347 | public: | |
348 | enum class pattern { sequential, random }; | |
349 | private: | |
350 | fs::path _dirpath; | |
351 | uint64_t _file_size; | |
352 | file _file; | |
353 | ||
354 | std::unique_ptr<position_generator> get_position_generator(size_t buffer_size, pattern access_pattern) { | |
355 | if (access_pattern == pattern::sequential) { | |
356 | return std::make_unique<sequential_issuer>(buffer_size, _file_size); | |
357 | } else { | |
358 | return std::make_unique<random_issuer>(buffer_size, _file_size); | |
359 | } | |
360 | } | |
361 | public: | |
362 | test_file(const ::evaluation_directory& dir, uint64_t maximum_size) | |
363 | : _dirpath(dir.path() / fs::path(fmt::format("ioqueue-discovery-{}", engine().cpu_id()))) | |
364 | , _file_size(maximum_size) | |
365 | {} | |
366 | ||
367 | future<> create_data_file() { | |
368 | // XFS likes access in many directories better. | |
369 | return make_directory(_dirpath.string()).then([this] { | |
370 | auto testfile = _dirpath / fs::path("testfile"); | |
371 | file_open_options options; | |
372 | options.extent_allocation_size_hint = _file_size; | |
373 | return open_file_dma(testfile.string(), open_flags::rw | open_flags::create, std::move(options)).then([this, testfile] (file file) { | |
374 | _file = file; | |
375 | return remove_file(testfile.string()).then([this] { | |
376 | return remove_file(_dirpath.string()); | |
377 | }); | |
378 | }).then([this] { | |
379 | return _file.truncate(_file_size); | |
380 | }); | |
381 | }); | |
382 | } | |
383 | ||
384 | future<io_rates> do_workload(std::unique_ptr<io_worker> worker_ptr, unsigned max_os_concurrency, bool update_file_size = false) { | |
385 | if (update_file_size) { | |
386 | _file_size = 0; | |
387 | } | |
388 | ||
389 | auto worker = worker_ptr.get(); | |
390 | auto concurrency = boost::irange<unsigned, unsigned>(0, max_os_concurrency, 1); | |
9f95a23c | 391 | return parallel_for_each(std::move(concurrency), [worker] (unsigned idx) { |
11fdf7f2 TL |
392 | auto bufptr = worker->get_buffer(); |
393 | auto buf = bufptr.get(); | |
9f95a23c | 394 | return do_until([worker] { return worker->should_stop(); }, [buf, worker] { |
11fdf7f2 | 395 | return worker->issue_request(buf); |
9f95a23c | 396 | }).finally([alive = std::move(bufptr)] {}); |
11fdf7f2 TL |
397 | }).then_wrapped([this, worker = std::move(worker_ptr), update_file_size] (future<> f) { |
398 | try { | |
399 | f.get(); | |
400 | } catch (invalid_position& ip) { | |
401 | // expected if sequential. Example: reading and the file ended. | |
402 | if (!worker->is_sequential()) { | |
403 | throw; | |
404 | } | |
405 | } | |
406 | ||
407 | if (update_file_size) { | |
408 | _file_size = worker->bytes(); | |
409 | } | |
410 | return make_ready_future<io_rates>(worker->get_io_rates()); | |
411 | }); | |
412 | } | |
413 | ||
414 | future<io_rates> read_workload(size_t buffer_size, pattern access_pattern, unsigned max_os_concurrency, std::chrono::duration<double> duration) { | |
415 | buffer_size = std::max(buffer_size, _file.disk_read_dma_alignment()); | |
416 | auto worker = std::make_unique<io_worker>(buffer_size, duration, std::make_unique<read_request_issuer>(_file), get_position_generator(buffer_size, access_pattern)); | |
417 | return do_workload(std::move(worker), max_os_concurrency); | |
418 | } | |
419 | ||
420 | future<io_rates> write_workload(size_t buffer_size, pattern access_pattern, unsigned max_os_concurrency, std::chrono::duration<double> duration) { | |
421 | buffer_size = std::max(buffer_size, _file.disk_write_dma_alignment()); | |
422 | auto worker = std::make_unique<io_worker>(buffer_size, duration, std::make_unique<write_request_issuer>(_file), get_position_generator(buffer_size, access_pattern)); | |
423 | bool update_file_size = worker->is_sequential(); | |
424 | return do_workload(std::move(worker), max_os_concurrency, update_file_size).then([this] (io_rates r) { | |
425 | return _file.flush().then([r = std::move(r)] () mutable { | |
426 | return make_ready_future<io_rates>(std::move(r)); | |
427 | }); | |
428 | }); | |
429 | } | |
430 | ||
431 | future<> stop() { | |
9f95a23c | 432 | return _file.close(); |
11fdf7f2 TL |
433 | } |
434 | }; | |
435 | ||
436 | class iotune_multi_shard_context { | |
437 | ::evaluation_directory _test_directory; | |
438 | ||
439 | unsigned per_shard_io_depth() const { | |
440 | auto iodepth = _test_directory.max_iodepth() / smp::count; | |
441 | if (engine().cpu_id() < _test_directory.max_iodepth() % smp::count) { | |
442 | iodepth++; | |
443 | } | |
444 | return std::min(iodepth, 128u); | |
445 | } | |
446 | seastar::sharded<test_file> _iotune_test_file; | |
447 | public: | |
448 | future<> stop() { | |
449 | return _iotune_test_file.stop(); | |
450 | } | |
451 | ||
452 | future<> start() { | |
453 | return _iotune_test_file.start(_test_directory, _test_directory.available_space() / (2 * smp::count)); | |
454 | } | |
455 | ||
456 | future<> create_data_file() { | |
9f95a23c | 457 | return _iotune_test_file.invoke_on_all([] (test_file& tf) { |
11fdf7f2 TL |
458 | return tf.create_data_file(); |
459 | }); | |
460 | } | |
461 | ||
462 | future<io_rates> write_sequential_data(unsigned shard, size_t buffer_size, std::chrono::duration<double> duration) { | |
463 | return _iotune_test_file.invoke_on(shard, [this, buffer_size, duration] (test_file& tf) { | |
464 | return tf.write_workload(buffer_size, test_file::pattern::sequential, 4 * _test_directory.disks_per_array(), duration); | |
465 | }); | |
466 | } | |
467 | ||
468 | future<io_rates> read_sequential_data(unsigned shard, size_t buffer_size, std::chrono::duration<double> duration) { | |
469 | return _iotune_test_file.invoke_on(shard, [this, buffer_size, duration] (test_file& tf) { | |
470 | return tf.read_workload(buffer_size, test_file::pattern::sequential, 4 * _test_directory.disks_per_array(), duration); | |
471 | }); | |
472 | } | |
473 | ||
474 | future<io_rates> write_random_data(size_t buffer_size, std::chrono::duration<double> duration) { | |
475 | return _iotune_test_file.map_reduce0([buffer_size, this, duration] (test_file& tf) { | |
476 | return tf.write_workload(buffer_size, test_file::pattern::random, per_shard_io_depth(), duration); | |
477 | }, io_rates(), std::plus<io_rates>()); | |
478 | } | |
479 | ||
480 | future<io_rates> read_random_data(size_t buffer_size, std::chrono::duration<double> duration) { | |
481 | return _iotune_test_file.map_reduce0([buffer_size, this, duration] (test_file& tf) { | |
482 | return tf.read_workload(buffer_size, test_file::pattern::random, per_shard_io_depth(), duration); | |
483 | }, io_rates(), std::plus<io_rates>()); | |
484 | } | |
485 | ||
486 | iotune_multi_shard_context(::evaluation_directory dir) | |
487 | : _test_directory(dir) | |
488 | {} | |
489 | }; | |
490 | ||
491 | struct disk_descriptor { | |
492 | std::string mountpoint; | |
493 | uint64_t read_iops; | |
494 | uint64_t read_bw; | |
495 | uint64_t write_iops; | |
496 | uint64_t write_bw; | |
497 | }; | |
498 | ||
499 | void string_to_file(sstring conf_file, sstring buf) { | |
500 | auto f = file_desc::open(conf_file, O_WRONLY | O_CLOEXEC | O_CREAT | O_TRUNC, 0664); | |
501 | auto ret = f.write(buf.data(), buf.size()); | |
502 | if (!ret || (*ret != buf.size())) { | |
503 | throw std::runtime_error(fmt::format("Can't write {}: {}", conf_file, *ret)); | |
504 | } | |
505 | } | |
506 | ||
507 | void write_configuration_file(sstring conf_file, std::string format, sstring properties_file) { | |
508 | sstring buf; | |
509 | if (format == "seastar") { | |
510 | buf = fmt::format("io-properties-file={}\n", properties_file); | |
511 | } else { | |
512 | buf = fmt::format("SEASTAR_IO=\"--io-properties-file={}\"\n", properties_file); | |
513 | } | |
514 | string_to_file(conf_file, buf); | |
515 | } | |
516 | ||
9f95a23c | 517 | void write_property_file(sstring conf_file, std::vector<disk_descriptor> disk_descriptors) { |
11fdf7f2 TL |
518 | YAML::Emitter out; |
519 | out << YAML::BeginMap; | |
520 | out << YAML::Key << "disks"; | |
521 | out << YAML::BeginSeq; | |
522 | for (auto& desc : disk_descriptors) { | |
523 | out << YAML::BeginMap; | |
524 | out << YAML::Key << "mountpoint" << YAML::Value << desc.mountpoint; | |
525 | out << YAML::Key << "read_iops" << YAML::Value << desc.read_iops; | |
526 | out << YAML::Key << "read_bandwidth" << YAML::Value << desc.read_bw; | |
527 | out << YAML::Key << "write_iops" << YAML::Value << desc.write_iops; | |
528 | out << YAML::Key << "write_bandwidth" << YAML::Value << desc.write_bw; | |
529 | out << YAML::EndMap; | |
530 | } | |
531 | out << YAML::EndSeq; | |
532 | out << YAML::EndMap; | |
533 | out << YAML::Newline; | |
534 | ||
535 | string_to_file(conf_file, sstring(out.c_str(), out.size())); | |
536 | } | |
537 | ||
538 | // Returns the mountpoint of a path. It works by walking backwards from the canonical path | |
539 | // (absolute, with symlinks resolved), until we find a point that crosses a device ID. | |
540 | fs::path mountpoint_of(sstring filename) { | |
541 | fs::path mnt_candidate = fs::canonical(fs::path(filename)); | |
542 | compat::optional<dev_t> candidate_id = {}; | |
543 | auto current = mnt_candidate; | |
544 | do { | |
545 | auto f = open_directory(current.string()).get0(); | |
546 | auto st = f.stat().get0(); | |
547 | if ((candidate_id) && (*candidate_id != st.st_dev)) { | |
548 | return mnt_candidate; | |
549 | } | |
550 | mnt_candidate = current; | |
551 | candidate_id = st.st_dev; | |
552 | current = current.parent_path(); | |
9f95a23c | 553 | } while (mnt_candidate != current); |
11fdf7f2 TL |
554 | |
555 | return mnt_candidate; | |
556 | } | |
557 | ||
558 | int main(int ac, char** av) { | |
559 | namespace bpo = boost::program_options; | |
560 | bool fs_check = false; | |
561 | ||
562 | app_template::config app_cfg; | |
563 | app_cfg.name = "IOTune"; | |
564 | ||
565 | app_template app(std::move(app_cfg)); | |
566 | auto opt_add = app.add_options(); | |
567 | opt_add | |
568 | ("evaluation-directory", bpo::value<std::vector<sstring>>()->required(), "directory where to execute the evaluation") | |
569 | ("properties-file", bpo::value<sstring>(), "path in which to write the YAML file") | |
570 | ("options-file", bpo::value<sstring>(), "path in which to write the legacy conf file") | |
571 | ("duration", bpo::value<unsigned>()->default_value(120), "time, in seconds, for which to run the test") | |
572 | ("format", bpo::value<sstring>()->default_value("seastar"), "Configuration file format (seastar | envfile)") | |
573 | ("fs-check", bpo::bool_switch(&fs_check), "perform FS check only") | |
574 | ; | |
575 | ||
576 | return app.run(ac, av, [&] { | |
577 | return seastar::async([&] { | |
578 | auto& configuration = app.configuration(); | |
579 | auto eval_dirs = configuration["evaluation-directory"].as<std::vector<sstring>>(); | |
580 | auto format = configuration["format"].as<sstring>(); | |
581 | auto duration = std::chrono::duration<double>(configuration["duration"].as<unsigned>() * 1s); | |
582 | ||
9f95a23c | 583 | std::vector<disk_descriptor> disk_descriptors; |
11fdf7f2 TL |
584 | std::unordered_map<sstring, sstring> mountpoint_map; |
585 | // We want to evaluate once per mountpoint, but we still want to write in one of the | |
586 | // directories that we were provided - we may not have permissions to write into the | |
587 | // mountpoint itself. If we are passed more than one directory per mountpoint, we don't | |
588 | // really care to which one we write, so this simple hash will do. | |
589 | for (auto& eval_dir : eval_dirs) { | |
590 | mountpoint_map[mountpoint_of(eval_dir).string()] = eval_dir; | |
591 | } | |
592 | for (auto eval: mountpoint_map) { | |
593 | auto mountpoint = eval.first; | |
594 | auto eval_dir = eval.second; | |
595 | ||
596 | if (filesystem_has_good_aio_support(eval_dir, false) == false) { | |
597 | iotune_logger.error("Exception when qualifying filesystem at {}", eval_dir); | |
598 | return 1; | |
599 | } | |
600 | ||
601 | auto rec = 10000000000ULL; | |
602 | auto avail = fs_avail(eval_dir).get0(); | |
603 | if (avail < rec) { | |
604 | uint64_t val; | |
605 | const char* units; | |
606 | if (avail >= 1000000000) { | |
607 | val = (avail + 500000000) / 1000000000; | |
608 | units = "GB"; | |
609 | } else if (avail >= 1000000) { | |
610 | val = (avail + 500000) / 1000000; | |
611 | units = "MB"; | |
612 | } else { | |
613 | val = avail; | |
614 | units = "bytes"; | |
615 | } | |
616 | iotune_logger.warn("Available space on filesystem at {}: {} {}: is less than recommended: {} GB", | |
617 | eval_dir, val, units, rec / 1000000000ULL); | |
618 | } | |
619 | ||
620 | iotune_logger.info("{} passed sanity checks", eval_dir); | |
621 | if (fs_check) { | |
622 | return 0; | |
623 | } | |
624 | ||
625 | // Directory is the same object for all tests. | |
626 | ::evaluation_directory test_directory(eval_dir); | |
627 | test_directory.discover_directory().get(); | |
628 | ||
629 | ::iotune_multi_shard_context iotune_tests(test_directory); | |
630 | iotune_tests.start().get(); | |
11fdf7f2 | 631 | auto stop = defer([&iotune_tests] { |
9f95a23c TL |
632 | try { |
633 | iotune_tests.stop().get(); | |
634 | } catch (...) { | |
635 | fmt::print("Error occurred during iotune context shutdown: {}", std::current_exception()); | |
636 | abort(); | |
637 | } | |
11fdf7f2 TL |
638 | }); |
639 | ||
9f95a23c TL |
640 | iotune_tests.create_data_file().get(); |
641 | ||
11fdf7f2 TL |
642 | fmt::print("Starting Evaluation. This may take a while...\n"); |
643 | fmt::print("Measuring sequential write bandwidth: "); | |
644 | std::cout.flush(); | |
645 | io_rates write_bw; | |
646 | size_t sequential_buffer_size = 1 << 20; | |
647 | for (unsigned shard = 0; shard < smp::count; ++shard) { | |
648 | write_bw += iotune_tests.write_sequential_data(shard, sequential_buffer_size, duration * 0.70 / smp::count).get0(); | |
649 | } | |
650 | write_bw.bytes_per_sec /= smp::count; | |
651 | fmt::print("{} MB/s\n", uint64_t(write_bw.bytes_per_sec / (1024 * 1024))); | |
652 | ||
653 | fmt::print("Measuring sequential read bandwidth: "); | |
654 | std::cout.flush(); | |
655 | auto read_bw = iotune_tests.read_sequential_data(0, sequential_buffer_size, duration * 0.1).get0(); | |
656 | fmt::print("{} MB/s\n", uint64_t(read_bw.bytes_per_sec / (1024 * 1024))); | |
657 | ||
658 | fmt::print("Measuring random write IOPS: "); | |
659 | std::cout.flush(); | |
660 | auto write_iops = iotune_tests.write_random_data(test_directory.minimum_io_size(), duration * 0.1).get0(); | |
661 | fmt::print("{} IOPS\n", uint64_t(write_iops.iops)); | |
662 | ||
663 | fmt::print("Measuring random read IOPS: "); | |
664 | std::cout.flush(); | |
665 | auto read_iops = iotune_tests.read_random_data(test_directory.minimum_io_size(), duration * 0.1).get0(); | |
666 | fmt::print("{} IOPS\n", uint64_t(read_iops.iops)); | |
667 | ||
668 | struct disk_descriptor desc; | |
669 | desc.mountpoint = mountpoint; | |
670 | desc.read_iops = read_iops.iops; | |
671 | desc.read_bw = read_bw.bytes_per_sec; | |
672 | desc.write_iops = write_iops.iops; | |
673 | desc.write_bw = write_bw.bytes_per_sec; | |
674 | disk_descriptors.push_back(std::move(desc)); | |
675 | } | |
676 | ||
677 | auto file = "properties file"; | |
678 | try { | |
679 | if (configuration.count("properties-file")) { | |
680 | fmt::print("Writing result to {}\n", configuration["properties-file"].as<sstring>()); | |
681 | write_property_file(configuration["properties-file"].as<sstring>(), disk_descriptors); | |
682 | } | |
683 | ||
684 | file = "configuration file"; | |
685 | if (configuration.count("options-file")) { | |
686 | fmt::print("Writing result to {}\n", configuration["options-file"].as<sstring>()); | |
687 | write_configuration_file(configuration["options-file"].as<sstring>(), format, configuration["properties-file"].as<sstring>()); | |
688 | } | |
689 | } catch (...) { | |
690 | iotune_logger.error("Exception when writing {}: {}.\nPlease add the above values manually to your seastar command line.", file, std::current_exception()); | |
691 | return 1; | |
692 | } | |
693 | return 0; | |
694 | }); | |
695 | }); | |
696 | } |