]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/osdmaptool.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / tools / osdmaptool.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3/*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15#include <string>
16#include <sys/stat.h>
17
18#include "common/ceph_argparse.h"
19#include "common/errno.h"
20#include "common/safe_io.h"
f67539c2 21#include "include/random.h"
224ce89b 22#include "mon/health_check.h"
92f5a8d4
TL
23#include <time.h>
24#include <algorithm>
7c673cae
FG
25
26#include "global/global_init.h"
27#include "osd/OSDMap.h"
28
20effc67 29using namespace std;
7c673cae
FG
30
31void usage()
32{
11fdf7f2
TL
33 cout << " usage: [--print] <mapfilename>" << std::endl;
34 cout << " --create-from-conf creates an osd map with default configurations" << std::endl;
35 cout << " --createsimple <numosd> [--clobber] [--pg-bits <bitsperosd>] [--pgp-bits <bits>] creates a relatively generic OSD map with <numosd> devices" << std::endl;
36 cout << " --pgp-bits <bits> pgp_num map attribute will be shifted by <bits>" << std::endl;
37 cout << " --pg-bits <bits> pg_num map attribute will be shifted by <bits>" << std::endl;
38 cout << " --clobber allows osdmaptool to overwrite <mapfilename> if it already exists" << std::endl;
7c673cae
FG
39 cout << " --export-crush <file> write osdmap's crush map to <file>" << std::endl;
40 cout << " --import-crush <file> replace osdmap's crush map with <file>" << std::endl;
224ce89b 41 cout << " --health dump health checks" << std::endl;
11fdf7f2
TL
42 cout << " --test-map-pgs [--pool <poolid>] [--pg_num <pg_num>] [--range-first <first> --range-last <last>] map all pgs" << std::endl;
43 cout << " --test-map-pgs-dump [--pool <poolid>] [--range-first <first> --range-last <last>] map all pgs" << std::endl;
44 cout << " --test-map-pgs-dump-all [--pool <poolid>] [--range-first <first> --range-last <last>] map all pgs to osds" << std::endl;
7c673cae 45 cout << " --mark-up-in mark osds up and in (but do not persist)" << std::endl;
3efd9988 46 cout << " --mark-out <osdid> mark an osd as out (but do not persist)" << std::endl;
f67539c2
TL
47 cout << " --mark-up <osdid> mark an osd as up (but do not persist)" << std::endl;
48 cout << " --mark-in <osdid> mark an osd as in (but do not persist)" << std::endl;
224ce89b 49 cout << " --with-default-pool include default pool when creating map" << std::endl;
7c673cae 50 cout << " --clear-temp clear pg_temp and primary_temp" << std::endl;
f91f0fd5 51 cout << " --clean-temps clean pg_temps" << std::endl;
7c673cae
FG
52 cout << " --test-random do random placements" << std::endl;
53 cout << " --test-map-pg <pgid> map a pgid to osds" << std::endl;
54 cout << " --test-map-object <objectname> [--pool <poolid>] map an object to osds"
55 << std::endl;
56 cout << " --upmap-cleanup <file> clean up pg_upmap[_items] entries, writing" << std::endl;
57 cout << " commands to <file> [default: - for stdout]" << std::endl;
58 cout << " --upmap <file> calculate pg upmap entries to balance pg layout" << std::endl;
59 cout << " writing commands to <file> [default: - for stdout]" << std::endl;
92f5a8d4 60 cout << " --upmap-max <max-count> set max upmap entries to calculate [default: 10]" << std::endl;
7c673cae 61 cout << " --upmap-deviation <max-deviation>" << std::endl;
92f5a8d4 62 cout << " max deviation from target [default: 5]" << std::endl;
7c673cae 63 cout << " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl;
92f5a8d4 64 cout << " --upmap-active Act like an active balancer, keep applying changes until balanced" << std::endl;
11fdf7f2
TL
65 cout << " --dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported" << std::endl;
66 cout << " --tree displays a tree of the map" << std::endl;
67 cout << " --test-crush [--range-first <first> --range-last <last>] map pgs to acting osds" << std::endl;
f67539c2
TL
68 cout << " --adjust-crush-weight <osdid:weight>[,<osdid:weight>,<...>] change <osdid> CRUSH <weight> (but do not persist)" << std::endl;
69 cout << " --save write modified osdmap with upmap or crush-adjust changes" << std::endl;
1e59de90
TL
70 cout << " --read <file> calculate pg upmap entries to balance pg primaries" << std::endl;
71 cout << " --read-pool <poolname> specify which pool the read balancer should adjust" << std::endl;
72 cout << " --vstart prefix upmap and read output with './bin/'" << std::endl;
7c673cae
FG
73 exit(1);
74}
75
1e59de90 76void print_inc_upmaps(const OSDMap::Incremental& pending_inc, int fd, bool vstart, std::string cmd="ceph")
7c673cae
FG
77{
78 ostringstream ss;
1e59de90 79 std::string prefix = "./bin/";
7c673cae 80 for (auto& i : pending_inc.old_pg_upmap) {
1e59de90
TL
81 if (vstart)
82 ss << prefix;
83 ss << cmd + " osd rm-pg-upmap " << i << std::endl;
7c673cae
FG
84 }
85 for (auto& i : pending_inc.new_pg_upmap) {
1e59de90
TL
86 if (vstart)
87 ss << prefix;
88 ss << cmd + " osd pg-upmap " << i.first;
7c673cae
FG
89 for (auto osd : i.second) {
90 ss << " " << osd;
91 }
92 ss << std::endl;
93 }
94 for (auto& i : pending_inc.old_pg_upmap_items) {
1e59de90
TL
95 if (vstart)
96 ss << prefix;
97 ss << cmd + " osd rm-pg-upmap-items " << i << std::endl;
7c673cae
FG
98 }
99 for (auto& i : pending_inc.new_pg_upmap_items) {
1e59de90
TL
100 if (vstart)
101 ss << prefix;
102 ss << cmd + " osd pg-upmap-items " << i.first;
7c673cae
FG
103 for (auto p : i.second) {
104 ss << " " << p.first << " " << p.second;
105 }
106 ss << std::endl;
107 }
1e59de90
TL
108 for (auto& i : pending_inc.new_pg_upmap_primary) {
109 if (vstart)
110 ss << prefix;
111 ss << cmd + " osd pg-upmap-primary " << i.first << " " << i.second << std::endl;
112 }
7c673cae
FG
113 string s = ss.str();
114 int r = safe_write(fd, s.c_str(), s.size());
115 if (r < 0) {
116 cerr << "error writing output: " << cpp_strerror(r) << std::endl;
117 exit(1);
118 }
119}
120
121int main(int argc, const char **argv)
122{
20effc67 123 auto args = argv_to_vec(argc, argv);
11fdf7f2
TL
124 if (args.empty()) {
125 cerr << argv[0] << ": -h or --help for usage" << std::endl;
126 exit(1);
127 }
128 if (ceph_argparse_need_usage(args)) {
129 usage();
130 exit(0);
131 }
7c673cae
FG
132
133 auto cct = global_init(NULL, args, CEPH_ENTITY_TYPE_CLIENT,
134 CODE_ENVIRONMENT_UTILITY,
135 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
136 common_init_finish(g_ceph_context);
137
138 const char *me = argv[0];
139
140 std::string fn;
141 bool print = false;
142 boost::scoped_ptr<Formatter> print_formatter;
143 bool tree = false;
144 boost::scoped_ptr<Formatter> tree_formatter;
145 bool createsimple = false;
224ce89b 146 bool createpool = false;
7c673cae
FG
147 bool create_from_conf = false;
148 int num_osd = 0;
11fdf7f2
TL
149 int pg_bits = 6;
150 int pgp_bits = 6;
7c673cae
FG
151 bool clobber = false;
152 bool modified = false;
f67539c2 153 std::string export_crush, import_crush, test_map_pg, test_map_object, adjust_crush_weight;
7c673cae
FG
154 bool test_crush = false;
155 int range_first = -1;
156 int range_last = -1;
157 int pool = -1;
158 bool mark_up_in = false;
3efd9988 159 int marked_out = -1;
f67539c2
TL
160 int marked_up = -1;
161 int marked_in = -1;
7c673cae 162 bool clear_temp = false;
f91f0fd5 163 bool clean_temps = false;
7c673cae
FG
164 bool test_map_pgs = false;
165 bool test_map_pgs_dump = false;
166 bool test_random = false;
167 bool upmap_cleanup = false;
168 bool upmap = false;
224ce89b 169 bool health = false;
7c673cae 170 std::string upmap_file = "-";
92f5a8d4
TL
171 int upmap_max = 10;
172 int upmap_deviation = 5;
173 bool upmap_active = false;
7c673cae 174 std::set<std::string> upmap_pools;
20effc67
TL
175 std::random_device::result_type upmap_seed;
176 std::random_device::result_type *upmap_p_seed = nullptr;
1e59de90
TL
177 bool read = false;
178 std::string read_pool;
20effc67 179
7c673cae
FG
180 int64_t pg_num = -1;
181 bool test_map_pgs_dump_all = false;
f67539c2 182 bool save = false;
1e59de90 183 bool vstart = false;
7c673cae
FG
184
185 std::string val;
186 std::ostringstream err;
187 for (std::vector<const char*>::iterator i = args.begin(); i != args.end(); ) {
188 if (ceph_argparse_double_dash(args, i)) {
189 break;
7c673cae
FG
190 } else if (ceph_argparse_flag(args, i, "-p", "--print", (char*)NULL)) {
191 print = true;
192 } else if (ceph_argparse_witharg(args, i, &val, err, "--dump", (char*)NULL)) {
193 print = true;
194 if (!val.empty() && val != "plain") {
195 print_formatter.reset(Formatter::create(val, "", "json"));
196 }
197 } else if (ceph_argparse_witharg(args, i, &val, err, "--tree", (char*)NULL)) {
198 tree = true;
199 if (!val.empty() && val != "plain") {
200 tree_formatter.reset(Formatter::create(val, "", "json"));
201 }
11fdf7f2
TL
202 } else if (ceph_argparse_witharg(args, i, &pg_bits, err, "--osd-pg-bits", (char*)NULL)) {
203 } else if (ceph_argparse_witharg(args, i, &pgp_bits, err, "--osd-pgp-bits", (char*)NULL)) {
7c673cae
FG
204 } else if (ceph_argparse_witharg(args, i, &upmap_file, "--upmap-cleanup", (char*)NULL)) {
205 upmap_cleanup = true;
206 } else if (ceph_argparse_witharg(args, i, &upmap_file, "--upmap", (char*)NULL)) {
207 upmap_cleanup = true;
208 upmap = true;
1e59de90
TL
209 } else if (ceph_argparse_witharg(args, i, &upmap_file, "--read", (char*)NULL)) {
210 read = true;
7c673cae
FG
211 } else if (ceph_argparse_witharg(args, i, &upmap_max, err, "--upmap-max", (char*)NULL)) {
212 } else if (ceph_argparse_witharg(args, i, &upmap_deviation, err, "--upmap-deviation", (char*)NULL)) {
20effc67
TL
213 } else if (ceph_argparse_witharg(args, i, (int *)&upmap_seed, err, "--upmap-seed", (char*)NULL)) {
214 upmap_p_seed = &upmap_seed;
7c673cae
FG
215 } else if (ceph_argparse_witharg(args, i, &val, "--upmap-pool", (char*)NULL)) {
216 upmap_pools.insert(val);
1e59de90
TL
217 } else if (ceph_argparse_witharg(args, i, &val, "--read-pool", (char*)NULL)) {
218 read_pool = val;
7c673cae
FG
219 } else if (ceph_argparse_witharg(args, i, &num_osd, err, "--createsimple", (char*)NULL)) {
220 if (!err.str().empty()) {
221 cerr << err.str() << std::endl;
222 exit(EXIT_FAILURE);
223 }
224 createsimple = true;
92f5a8d4
TL
225 } else if (ceph_argparse_flag(args, i, "--upmap-active", (char*)NULL)) {
226 upmap_active = true;
224ce89b
WB
227 } else if (ceph_argparse_flag(args, i, "--health", (char*)NULL)) {
228 health = true;
229 } else if (ceph_argparse_flag(args, i, "--with-default-pool", (char*)NULL)) {
230 createpool = true;
7c673cae
FG
231 } else if (ceph_argparse_flag(args, i, "--create-from-conf", (char*)NULL)) {
232 create_from_conf = true;
233 } else if (ceph_argparse_flag(args, i, "--mark-up-in", (char*)NULL)) {
234 mark_up_in = true;
3efd9988
FG
235 } else if (ceph_argparse_witharg(args, i, &val, "--mark-out", (char*)NULL)) {
236 marked_out = std::stoi(val);
f67539c2
TL
237 } else if (ceph_argparse_witharg(args, i, &val, "--mark-up", (char*)NULL)) {
238 marked_up = std::stod(val);
239 } else if (ceph_argparse_witharg(args, i, &val, "--mark-in", (char*)NULL)) {
240 marked_in = std::stod(val);
7c673cae
FG
241 } else if (ceph_argparse_flag(args, i, "--clear-temp", (char*)NULL)) {
242 clear_temp = true;
f91f0fd5
TL
243 } else if (ceph_argparse_flag(args, i, "--clean-temps", (char*)NULL)) {
244 clean_temps = true;
7c673cae
FG
245 } else if (ceph_argparse_flag(args, i, "--test-map-pgs", (char*)NULL)) {
246 test_map_pgs = true;
247 } else if (ceph_argparse_flag(args, i, "--test-map-pgs-dump", (char*)NULL)) {
248 test_map_pgs_dump = true;
249 } else if (ceph_argparse_flag(args, i, "--test-map-pgs-dump-all", (char*)NULL)) {
250 test_map_pgs_dump_all = true;
251 } else if (ceph_argparse_flag(args, i, "--test-random", (char*)NULL)) {
252 test_random = true;
253 } else if (ceph_argparse_flag(args, i, "--clobber", (char*)NULL)) {
254 clobber = true;
255 } else if (ceph_argparse_witharg(args, i, &pg_bits, err, "--pg_bits", (char*)NULL)) {
256 if (!err.str().empty()) {
257 cerr << err.str() << std::endl;
258 exit(EXIT_FAILURE);
259 }
260 } else if (ceph_argparse_witharg(args, i, &pgp_bits, err, "--pgp_bits", (char*)NULL)) {
261 if (!err.str().empty()) {
262 cerr << err.str() << std::endl;
263 exit(EXIT_FAILURE);
264 }
265 } else if (ceph_argparse_witharg(args, i, &val, "--export_crush", (char*)NULL)) {
266 export_crush = val;
267 } else if (ceph_argparse_witharg(args, i, &val, "--import_crush", (char*)NULL)) {
268 import_crush = val;
269 } else if (ceph_argparse_witharg(args, i, &val, "--test_map_pg", (char*)NULL)) {
270 test_map_pg = val;
271 } else if (ceph_argparse_witharg(args, i, &val, "--test_map_object", (char*)NULL)) {
272 test_map_object = val;
273 } else if (ceph_argparse_flag(args, i, "--test_crush", (char*)NULL)) {
274 test_crush = true;
275 } else if (ceph_argparse_witharg(args, i, &val, err, "--pg_num", (char*)NULL)) {
276 string interr;
277 pg_num = strict_strtoll(val.c_str(), 10, &interr);
278 if (interr.length() > 0) {
279 cerr << "error parsing integer value " << interr << std::endl;
280 exit(EXIT_FAILURE);
281 }
282 } else if (ceph_argparse_witharg(args, i, &range_first, err, "--range_first", (char*)NULL)) {
283 } else if (ceph_argparse_witharg(args, i, &range_last, err, "--range_last", (char*)NULL)) {
284 } else if (ceph_argparse_witharg(args, i, &pool, err, "--pool", (char*)NULL)) {
285 if (!err.str().empty()) {
286 cerr << err.str() << std::endl;
287 exit(EXIT_FAILURE);
288 }
f67539c2
TL
289 } else if (ceph_argparse_witharg(args, i, &val, err, "--adjust-crush-weight", (char*)NULL)) {
290 adjust_crush_weight = val;
291 } else if (ceph_argparse_flag(args, i, "--save", (char*)NULL)) {
292 save = true;
1e59de90
TL
293 } else if (ceph_argparse_flag(args, i, "--vstart", (char*)NULL)) {
294 vstart = true;
7c673cae
FG
295 } else {
296 ++i;
297 }
298 }
299 if (args.empty()) {
300 cerr << me << ": must specify osdmap filename" << std::endl;
301 usage();
302 }
303 else if (args.size() > 1) {
304 cerr << me << ": too many arguments" << std::endl;
305 usage();
306 }
92f5a8d4
TL
307 if (upmap_deviation < 1) {
308 cerr << me << ": upmap-deviation must be >= 1" << std::endl;
309 usage();
310 }
7c673cae
FG
311 fn = args[0];
312
313 if (range_first >= 0 && range_last >= 0) {
314 set<OSDMap*> maps;
315 OSDMap *prev = NULL;
316 for (int i=range_first; i <= range_last; i++) {
317 ostringstream f;
318 f << fn << "/" << i;
319 bufferlist bl;
320 string error, s = f.str();
321 int r = bl.read_file(s.c_str(), &error);
322 if (r < 0) {
323 cerr << "unable to read " << s << ": " << cpp_strerror(r) << std::endl;
324 exit(1);
325 }
326 cout << s << " got " << bl.length() << " bytes" << std::endl;
327 OSDMap *o = new OSDMap;
328 o->decode(bl);
329 maps.insert(o);
330 if (prev)
331 OSDMap::dedup(prev, o);
332 prev = o;
333 }
334 exit(0);
335 }
336
337 OSDMap osdmap;
338 bufferlist bl;
339
340 cerr << me << ": osdmap file '" << fn << "'" << std::endl;
341
342 int r = 0;
343 struct stat st;
344 if (!createsimple && !create_from_conf && !clobber) {
345 std::string error;
346 r = bl.read_file(fn.c_str(), &error);
347 if (r == 0) {
348 try {
349 osdmap.decode(bl);
350 }
351 catch (const buffer::error &e) {
352 cerr << me << ": error decoding osdmap '" << fn << "'" << std::endl;
353 return -1;
354 }
355 }
356 else {
357 cerr << me << ": couldn't open " << fn << ": " << error << std::endl;
358 return -1;
359 }
360 }
361 else if ((createsimple || create_from_conf) && !clobber && ::stat(fn.c_str(), &st) == 0) {
362 cerr << me << ": " << fn << " exists, --clobber to overwrite" << std::endl;
363 return -1;
364 }
365
366 if (createsimple || create_from_conf) {
367 if (createsimple) {
368 if (num_osd < 1) {
369 cerr << me << ": osd count must be > 0" << std::endl;
370 exit(1);
371 }
372 } else {
373 num_osd = -1;
374 }
375 uuid_d fsid;
224ce89b
WB
376 if (createpool) {
377 osdmap.build_simple_with_pool(
378 g_ceph_context, 0, fsid, num_osd, pg_bits, pgp_bits);
379 } else {
380 osdmap.build_simple(g_ceph_context, 0, fsid, num_osd);
381 }
7c673cae
FG
382 modified = true;
383 }
384
385 if (mark_up_in) {
386 cout << "marking all OSDs up and in" << std::endl;
387 int n = osdmap.get_max_osd();
388 for (int i=0; i<n; i++) {
389 osdmap.set_state(i, osdmap.get_state(i) | CEPH_OSD_UP);
390 osdmap.set_weight(i, CEPH_OSD_IN);
f67539c2
TL
391 if (osdmap.crush->get_item_weight(i) == 0 ) {
392 osdmap.crush->adjust_item_weightf(g_ceph_context, i, 1.0);
393 }
7c673cae
FG
394 }
395 }
3efd9988
FG
396
397 if (marked_out >=0 && marked_out < osdmap.get_max_osd()) {
398 cout << "marking OSD@" << marked_out << " as out" << std::endl;
399 int id = marked_out;
400 osdmap.set_state(id, osdmap.get_state(id) | CEPH_OSD_UP);
401 osdmap.set_weight(id, CEPH_OSD_OUT);
3efd9988
FG
402 }
403
f67539c2
TL
404 if (marked_up >=0 && marked_up < osdmap.get_max_osd()) {
405 cout << "marking OSD@" << marked_up << " as up" << std::endl;
406 int id = marked_up;
407 osdmap.set_state(id, osdmap.get_state(id) | CEPH_OSD_UP);
408 }
409
410 if (marked_in >=0 && marked_in < osdmap.get_max_osd()) {
411 cout << "marking OSD@" << marked_up << " as up" << std::endl;
412 int id = marked_up;
413 osdmap.set_weight(id, CEPH_OSD_IN);
414 }
415
416 for_each_substr(adjust_crush_weight, ",", [&](auto osd_to_adjust) {
417 std::string_view osd_to_weight_delimiter{":"};
418 size_t pos = osd_to_adjust.find(osd_to_weight_delimiter);
419 if (pos == osd_to_adjust.npos) {
420 cerr << me << ": use ':' as separator of osd id and its weight"
421 << std::endl;
422 usage();
423 }
424 int osd_id = std::stoi(string(osd_to_adjust.substr(0, pos)));
425 float new_weight = std::stof(string(osd_to_adjust.substr(pos + 1)));
426 osdmap.crush->adjust_item_weightf(g_ceph_context, osd_id, new_weight);
427 std::cout << "Adjusted osd." << osd_id << " CRUSH weight to " << new_weight
428 << std::endl;
429 if (save) {
430 OSDMap::Incremental inc;
431 inc.fsid = osdmap.get_fsid();
432 inc.epoch = osdmap.get_epoch() + 1;
433 osdmap.apply_incremental(inc);
434 modified = true;
435 }
436 });
437
7c673cae
FG
438 if (clear_temp) {
439 cout << "clearing pg/primary temp" << std::endl;
440 osdmap.clear_temp();
441 }
f91f0fd5
TL
442 if (clean_temps) {
443 cout << "cleaning pg temps" << std::endl;
444 OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
445 OSDMap tmpmap;
446 tmpmap.deepish_copy_from(osdmap);
447 tmpmap.apply_incremental(pending_inc);
448 OSDMap::clean_temps(g_ceph_context, osdmap, tmpmap, &pending_inc);
449 }
7c673cae 450 int upmap_fd = STDOUT_FILENO;
1e59de90 451 if (upmap || upmap_cleanup || read) {
7c673cae 452 if (upmap_file != "-") {
92f5a8d4 453 upmap_fd = ::open(upmap_file.c_str(), O_CREAT|O_WRONLY|O_TRUNC, 0644);
7c673cae
FG
454 if (upmap_fd < 0) {
455 cerr << "error opening " << upmap_file << ": " << cpp_strerror(errno)
456 << std::endl;
457 exit(1);
458 }
459 cout << "writing upmap command output to: " << upmap_file << std::endl;
460 }
461 }
462 if (upmap_cleanup) {
463 cout << "checking for upmap cleanups" << std::endl;
464 OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
465 pending_inc.fsid = osdmap.get_fsid();
466 int r = osdmap.clean_pg_upmaps(g_ceph_context, &pending_inc);
467 if (r > 0) {
1e59de90 468 print_inc_upmaps(pending_inc, upmap_fd, vstart);
7c673cae 469 r = osdmap.apply_incremental(pending_inc);
11fdf7f2 470 ceph_assert(r == 0);
7c673cae
FG
471 }
472 }
1e59de90
TL
473 if (read) {
474 int64_t pid = osdmap.lookup_pg_pool_name(read_pool);
475 if (pid < 0) {
476 cerr << " pool " << read_pool << " does not exist" << std::endl;
477 exit(1);
478 }
479
480 const pg_pool_t* pool = osdmap.get_pg_pool(pid);
481 if (! pool->is_replicated()) {
482 cerr << read_pool << " is an erasure coded pool; "
483 << "please try again with a replicated pool." << std::endl;
484 exit(1);
485 }
486
487 OSDMap tmp_osd_map;
488 tmp_osd_map.deepish_copy_from(osdmap);
489
490 // Gather BEFORE info
491 map<uint64_t,set<pg_t>> pgs_by_osd;
492 map<uint64_t,set<pg_t>> prim_pgs_by_osd;
493 map<uint64_t,set<pg_t>> acting_prims_by_osd;
494 pgs_by_osd = tmp_osd_map.get_pgs_by_osd(g_ceph_context, pid, &prim_pgs_by_osd, &acting_prims_by_osd);
495 OSDMap::read_balance_info_t rb_info;
496 tmp_osd_map.calc_read_balance_score(g_ceph_context, pid, &rb_info);
497 float read_balance_score_before = rb_info.adjusted_score;
498 ceph_assert(read_balance_score_before >= 0);
499
500 // Calculate read balancer
501 OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
502 int num_changes = osdmap.balance_primaries(g_ceph_context, pid, &pending_inc, tmp_osd_map);
503
504 if (num_changes < 0) {
505 cerr << "Error balancing primaries. Rerun with at least --debug-osd=10 for more details." << std::endl;
506 exit(1);
507 }
508
509 // Gather AFTER info
510 map<uint64_t,set<pg_t>> pgs_by_osd_2;
511 map<uint64_t,set<pg_t>> prim_pgs_by_osd_2;
512 map<uint64_t,set<pg_t>> acting_prims_by_osd_2;
513 pgs_by_osd_2 = tmp_osd_map.get_pgs_by_osd(g_ceph_context, pid, &prim_pgs_by_osd_2, &acting_prims_by_osd_2);
514 tmp_osd_map.calc_read_balance_score(g_ceph_context, pid, &rb_info);
515 float read_balance_score_after = rb_info.adjusted_score;
516 ceph_assert(read_balance_score_after >= 0);
517
518 if (num_changes > 0) {
519 cout << " \n";
520 cout << "---------- BEFORE ------------ \n";
521 for (auto & [osd, pgs] : prim_pgs_by_osd) {
522 cout << " osd." << osd << " | primary affinity: " << tmp_osd_map.get_primary_affinityf(osd) << " | number of prims: " << pgs.size() << "\n";
523 }
524 cout << " \n";
525 cout << "read_balance_score of '" << read_pool << "': " << read_balance_score_before << "\n\n\n";
526
527 cout << "---------- AFTER ------------ \n";
528 for (auto & [osd, pgs] : prim_pgs_by_osd_2) {
529 cout << " osd." << osd << " | primary affinity: " << tmp_osd_map.get_primary_affinityf(osd) << " | number of prims: " << pgs.size() << "\n";
530 }
531 cout << " \n";
532 cout << "read_balance_score of '" << read_pool << "': " << read_balance_score_after << "\n\n\n";
533 cout << "num changes: " << num_changes << "\n";
534
535 print_inc_upmaps(pending_inc, upmap_fd, vstart);
536 } else {
537 cout << " Unable to find further optimization, or distribution is already perfect\n";
538 }
539 }
7c673cae
FG
540 if (upmap) {
541 cout << "upmap, max-count " << upmap_max
542 << ", max deviation " << upmap_deviation
543 << std::endl;
92f5a8d4
TL
544 vector<int64_t> pools;
545 set<int64_t> upmap_pool_nums;
7c673cae
FG
546 for (auto& s : upmap_pools) {
547 int64_t p = osdmap.lookup_pg_pool_name(s);
548 if (p < 0) {
92f5a8d4 549 cerr << " pool " << s << " does not exist" << std::endl;
7c673cae
FG
550 exit(1);
551 }
92f5a8d4
TL
552 pools.push_back(p);
553 upmap_pool_nums.insert(p);
7c673cae 554 }
92f5a8d4 555 if (!pools.empty()) {
7c673cae
FG
556 cout << " limiting to pools " << upmap_pools << " (" << pools << ")"
557 << std::endl;
7c673cae 558 } else {
92f5a8d4
TL
559 mempool::osdmap::map<int64_t,pg_pool_t> opools = osdmap.get_pools();
560 for (auto& i : opools) {
561 pools.push_back(i.first);
562 }
563 }
564 if (pools.empty()) {
565 cout << "No pools available" << std::endl;
566 goto skip_upmap;
7c673cae 567 }
92f5a8d4
TL
568 int rounds = 0;
569 struct timespec round_start;
20effc67 570 [[maybe_unused]] int r = clock_gettime(CLOCK_MONOTONIC, &round_start);
92f5a8d4
TL
571 assert(r == 0);
572 do {
f67539c2 573 random_device_t rd;
92f5a8d4
TL
574 std::shuffle(pools.begin(), pools.end(), std::mt19937{rd()});
575 cout << "pools ";
576 for (auto& i: pools)
577 cout << osdmap.get_pool_name(i) << " ";
578 cout << std::endl;
579 OSDMap::Incremental pending_inc(osdmap.get_epoch()+1);
580 pending_inc.fsid = osdmap.get_fsid();
581 int total_did = 0;
582 int left = upmap_max;
583 struct timespec begin, end;
584 r = clock_gettime(CLOCK_MONOTONIC, &begin);
585 assert(r == 0);
586 for (auto& i: pools) {
587 set<int64_t> one_pool;
588 one_pool.insert(i);
20effc67 589 //TODO: Josh: Add a function on the seed for multiple iterations.
92f5a8d4
TL
590 int did = osdmap.calc_pg_upmaps(
591 g_ceph_context, upmap_deviation,
592 left, one_pool,
20effc67 593 &pending_inc, upmap_p_seed);
92f5a8d4
TL
594 total_did += did;
595 left -= did;
596 if (left <= 0)
597 break;
20effc67
TL
598 if (upmap_p_seed != nullptr) {
599 *upmap_p_seed += 13;
600 }
92f5a8d4
TL
601 }
602 r = clock_gettime(CLOCK_MONOTONIC, &end);
603 assert(r == 0);
604 cout << "prepared " << total_did << "/" << upmap_max << " changes" << std::endl;
605 float elapsed_time = (end.tv_sec - begin.tv_sec) + 1.0e-9*(end.tv_nsec - begin.tv_nsec);
606 if (upmap_active)
607 cout << "Time elapsed " << elapsed_time << " secs" << std::endl;
608 if (total_did > 0) {
1e59de90 609 print_inc_upmaps(pending_inc, upmap_fd, vstart);
f67539c2 610 if (save || upmap_active) {
92f5a8d4
TL
611 int r = osdmap.apply_incremental(pending_inc);
612 ceph_assert(r == 0);
f67539c2 613 if (save)
92f5a8d4
TL
614 modified = true;
615 }
616 } else {
617 cout << "Unable to find further optimization, "
618 << "or distribution is already perfect"
619 << std::endl;
620 if (upmap_active) {
621 map<int,set<pg_t>> pgs_by_osd;
622 for (auto& i : osdmap.get_pools()) {
623 if (!upmap_pool_nums.empty() && !upmap_pool_nums.count(i.first))
624 continue;
625 for (unsigned ps = 0; ps < i.second.get_pg_num(); ++ps) {
626 pg_t pg(ps, i.first);
627 vector<int> up;
628 osdmap.pg_to_up_acting_osds(pg, &up, nullptr, nullptr, nullptr);
629 //ldout(cct, 20) << __func__ << " " << pg << " up " << up << dendl;
630 for (auto osd : up) {
631 if (osd != CRUSH_ITEM_NONE)
632 pgs_by_osd[osd].insert(pg);
633 }
634 }
635 }
636 for (auto& i : pgs_by_osd)
637 cout << "osd." << i.first << " pgs " << i.second.size() << std::endl;
638 float elapsed_time = (end.tv_sec - round_start.tv_sec) + 1.0e-9*(end.tv_nsec - round_start.tv_nsec);
639 cout << "Total time elapsed " << elapsed_time << " secs, " << rounds << " rounds" << std::endl;
640 }
641 break;
642 }
643 ++rounds;
644 } while(upmap_active);
7c673cae 645 }
92f5a8d4 646skip_upmap:
7c673cae
FG
647 if (upmap_file != "-") {
648 ::close(upmap_fd);
649 }
650
651 if (!import_crush.empty()) {
652 bufferlist cbl;
653 std::string error;
654 r = cbl.read_file(import_crush.c_str(), &error);
655 if (r) {
656 cerr << me << ": error reading crush map from " << import_crush
657 << ": " << error << std::endl;
658 exit(1);
659 }
660
661 // validate
662 CrushWrapper cw;
11fdf7f2 663 auto p = cbl.cbegin();
7c673cae
FG
664 cw.decode(p);
665
666 if (cw.get_max_devices() > osdmap.get_max_osd()) {
667 cerr << me << ": crushmap max_devices " << cw.get_max_devices()
668 << " > osdmap max_osd " << osdmap.get_max_osd() << std::endl;
669 exit(1);
670 }
671
672 // apply
673 OSDMap::Incremental inc;
674 inc.fsid = osdmap.get_fsid();
675 inc.epoch = osdmap.get_epoch()+1;
676 inc.crush = cbl;
677 osdmap.apply_incremental(inc);
678 cout << me << ": imported " << cbl.length() << " byte crush map from " << import_crush << std::endl;
679 modified = true;
680 }
681
682 if (!export_crush.empty()) {
683 bufferlist cbl;
684 osdmap.crush->encode(cbl, CEPH_FEATURES_SUPPORTED_DEFAULT);
685 r = cbl.write_file(export_crush.c_str());
686 if (r < 0) {
687 cerr << me << ": error writing crush map to " << import_crush << std::endl;
688 exit(1);
689 }
690 cout << me << ": exported crush map to " << export_crush << std::endl;
691 }
692
693 if (!test_map_object.empty()) {
694 object_t oid(test_map_object);
695 if (pool == -1) {
224ce89b
WB
696 cout << me << ": assuming pool 1 (use --pool to override)" << std::endl;
697 pool = 1;
7c673cae
FG
698 }
699 if (!osdmap.have_pg_pool(pool)) {
700 cerr << "There is no pool " << pool << std::endl;
701 exit(1);
702 }
703 object_locator_t loc(pool);
704 pg_t raw_pgid = osdmap.object_locator_to_pg(oid, loc);
705 pg_t pgid = osdmap.raw_pg_to_pg(raw_pgid);
706
707 vector<int> acting;
708 osdmap.pg_to_acting_osds(pgid, acting);
709 cout << " object '" << oid
710 << "' -> " << pgid
711 << " -> " << acting
712 << std::endl;
713 }
714 if (!test_map_pg.empty()) {
715 pg_t pgid;
716 if (!pgid.parse(test_map_pg.c_str())) {
717 cerr << me << ": failed to parse pg '" << test_map_pg << std::endl;
718 usage();
719 }
720 cout << " parsed '" << test_map_pg << "' -> " << pgid << std::endl;
721
722 vector<int> raw, up, acting;
723 int raw_primary, up_primary, acting_primary;
724 osdmap.pg_to_raw_osds(pgid, &raw, &raw_primary);
725 osdmap.pg_to_up_acting_osds(pgid, &up, &up_primary,
726 &acting, &acting_primary);
727 cout << pgid << " raw (" << raw << ", p" << raw_primary
728 << ") up (" << up << ", p" << up_primary
729 << ") acting (" << acting << ", p" << acting_primary << ")"
730 << std::endl;
731 }
732 if (test_map_pgs || test_map_pgs_dump || test_map_pgs_dump_all) {
733 if (pool != -1 && !osdmap.have_pg_pool(pool)) {
734 cerr << "There is no pool " << pool << std::endl;
735 exit(1);
736 }
737 int n = osdmap.get_max_osd();
738 vector<int> count(n, 0);
739 vector<int> first_count(n, 0);
740 vector<int> primary_count(n, 0);
741 vector<int> size(30, 0);
11fdf7f2 742 int max_size = 0;
7c673cae
FG
743 if (test_random)
744 srand(getpid());
745 auto& pools = osdmap.get_pools();
746 for (auto p = pools.begin(); p != pools.end(); ++p) {
747 if (pool != -1 && p->first != pool)
748 continue;
749 if (pg_num > 0)
750 p->second.set_pg_num(pg_num);
751
752 cout << "pool " << p->first
753 << " pg_num " << p->second.get_pg_num() << std::endl;
754 for (unsigned i = 0; i < p->second.get_pg_num(); ++i) {
755 pg_t pgid = pg_t(i, p->first);
756
757 vector<int> osds, raw, up, acting;
758 int primary, calced_primary, up_primary, acting_primary;
759 if (test_random) {
760 osds.resize(p->second.size);
761 for (unsigned i=0; i<osds.size(); ++i) {
762 osds[i] = rand() % osdmap.get_max_osd();
763 }
764 primary = osds[0];
765 } else if (test_map_pgs_dump_all) {
20effc67
TL
766 osdmap.pg_to_raw_osds(pgid, &raw, &calced_primary);
767 osdmap.pg_to_up_acting_osds(pgid, &up, &up_primary,
768 &acting, &acting_primary);
769 osds = acting;
770 primary = acting_primary;
771 } else {
7c673cae
FG
772 osdmap.pg_to_acting_osds(pgid, &osds, &primary);
773 }
774 size[osds.size()]++;
11fdf7f2
TL
775 if ((unsigned)max_size < osds.size())
776 max_size = osds.size();
7c673cae
FG
777
778 if (test_map_pgs_dump) {
779 cout << pgid << "\t" << osds << "\t" << primary << std::endl;
20effc67
TL
780 } else if (test_map_pgs_dump_all) {
781 cout << pgid << " raw (" << raw << ", p" << calced_primary
782 << ") up (" << up << ", p" << up_primary
783 << ") acting (" << acting << ", p" << acting_primary << ")"
784 << std::endl;
785 }
7c673cae
FG
786
787 for (unsigned i=0; i<osds.size(); i++) {
788 //cout << " rep " << i << " on " << osds[i] << std::endl;
1e59de90
TL
789 if (osds[i] != CRUSH_ITEM_NONE)
790 count[osds[i]]++;
7c673cae 791 }
1e59de90 792 if (osds.size() && osds[0] != CRUSH_ITEM_NONE)
7c673cae
FG
793 first_count[osds[0]]++;
794 if (primary >= 0)
795 primary_count[primary]++;
796 }
797 }
798
799 uint64_t total = 0;
800 int in = 0;
801 int min_osd = -1;
802 int max_osd = -1;
803 cout << "#osd\tcount\tfirst\tprimary\tc wt\twt\n";
804 for (int i=0; i<n; i++) {
805 if (!osdmap.is_in(i))
806 continue;
807 if (osdmap.crush->get_item_weight(i) <= 0)
808 continue;
809 in++;
810 cout << "osd." << i
811 << "\t" << count[i]
812 << "\t" << first_count[i]
813 << "\t" << primary_count[i]
814 << "\t" << osdmap.crush->get_item_weightf(i)
815 << "\t" << osdmap.get_weightf(i)
816 << std::endl;
817 total += count[i];
818 if (count[i] &&
819 (min_osd < 0 ||
820 count[i] < count[min_osd]))
821 min_osd = i;
822 if (count[i] &&
823 (max_osd < 0 ||
824 count[i] > count[max_osd]))
825 max_osd = i;
826
827 }
828 uint64_t avg = in ? (total / in) : 0;
829 double dev = 0;
830 for (int i=0; i<n; i++) {
831 if (!osdmap.is_in(i))
832 continue;
833 if (osdmap.crush->get_item_weight(i) <= 0)
834 continue;
835 dev += (avg - count[i]) * (avg - count[i]);
836 }
837 dev /= in;
838 dev = sqrt(dev);
839
840 //double edev = sqrt(pgavg) * (double)avg / pgavg;
841 double edev = sqrt((double)total / (double)in * (1.0 - (1.0 / (double)in)));
842 cout << " in " << in << std::endl;
843 cout << " avg " << avg
844 << " stddev " << dev
845 << " (" << (dev/avg) << "x)"
846 << " (expected " << edev << " " << (edev/avg) << "x))"
847 << std::endl;
848
849 if (min_osd >= 0)
850 cout << " min osd." << min_osd << " " << count[min_osd] << std::endl;
851 if (max_osd >= 0)
852 cout << " max osd." << max_osd << " " << count[max_osd] << std::endl;
853
11fdf7f2
TL
854 for (int i=0; i<=max_size; i++) {
855 if (size[i])
856 cout << "size " << i << "\t" << size[i] << std::endl;
7c673cae
FG
857 }
858 }
859 if (test_crush) {
860 int pass = 0;
861 while (1) {
862 cout << "pass " << ++pass << std::endl;
863
864 ceph::unordered_map<pg_t,vector<int> > m;
865 for (map<int64_t,pg_pool_t>::const_iterator p = osdmap.get_pools().begin();
866 p != osdmap.get_pools().end();
867 ++p) {
868 const pg_pool_t *pool = osdmap.get_pg_pool(p->first);
869 for (ps_t ps = 0; ps < pool->get_pg_num(); ps++) {
11fdf7f2 870 pg_t pgid(ps, p->first);
7c673cae
FG
871 for (int i=0; i<100; i++) {
872 cout << pgid << " attempt " << i << std::endl;
873
874 vector<int> r;
875 osdmap.pg_to_acting_osds(pgid, r);
876 //cout << pgid << " " << r << std::endl;
877 if (m.count(pgid)) {
878 if (m[pgid] != r) {
879 cout << pgid << " had " << m[pgid] << " now " << r << std::endl;
880 ceph_abort();
881 }
882 } else
883 m[pgid] = r;
884 }
885 }
886 }
887 }
888 }
889
224ce89b 890 if (!print && !health && !tree && !modified &&
7c673cae
FG
891 export_crush.empty() && import_crush.empty() &&
892 test_map_pg.empty() && test_map_object.empty() &&
893 !test_map_pgs && !test_map_pgs_dump && !test_map_pgs_dump_all &&
1e59de90 894 adjust_crush_weight.empty() && !upmap && !upmap_cleanup && !read) {
7c673cae
FG
895 cerr << me << ": no action specified?" << std::endl;
896 usage();
897 }
898
899 if (modified)
900 osdmap.inc_epoch();
901
224ce89b
WB
902 if (health) {
903 health_check_map_t checks;
92f5a8d4 904 osdmap.check_health(cct.get(), &checks);
224ce89b
WB
905 JSONFormatter jf(true);
906 jf.dump_object("checks", checks);
907 jf.flush(cout);
908 }
7c673cae
FG
909 if (print) {
910 if (print_formatter) {
911 print_formatter->open_object_section("osdmap");
912 osdmap.dump(print_formatter.get());
913 print_formatter->close_section();
914 print_formatter->flush(cout);
915 } else {
1e59de90 916 osdmap.print(cct.get(), cout);
7c673cae
FG
917 }
918 }
919
920 if (tree) {
921 if (tree_formatter) {
922 tree_formatter->open_object_section("tree");
923 osdmap.print_tree(tree_formatter.get(), NULL);
924 tree_formatter->close_section();
925 tree_formatter->flush(cout);
926 cout << std::endl;
927 } else {
928 osdmap.print_tree(NULL, &cout);
929 }
930 }
931 if (modified) {
932 bl.clear();
933 osdmap.encode(bl, CEPH_FEATURES_SUPPORTED_DEFAULT | CEPH_FEATURE_RESERVED);
934
935 // write it out
936 cout << me << ": writing epoch " << osdmap.get_epoch()
937 << " to " << fn
938 << std::endl;
939 int r = bl.write_file(fn.c_str());
940 if (r) {
941 cerr << "osdmaptool: error writing to '" << fn << "': "
942 << cpp_strerror(r) << std::endl;
943 return 1;
944 }
945 }
946
947
948 return 0;
949}