1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include "common/ceph_argparse.h"
19 #include "common/errno.h"
20 #include "common/safe_io.h"
21 #include "include/random.h"
22 #include "mon/health_check.h"
26 #include "global/global_init.h"
27 #include "osd/OSDMap.h"
33 cout
<< " usage: [--print] <mapfilename>" << std::endl
;
34 cout
<< " --create-from-conf creates an osd map with default configurations" << std::endl
;
35 cout
<< " --createsimple <numosd> [--clobber] [--pg-bits <bitsperosd>] [--pgp-bits <bits>] creates a relatively generic OSD map with <numosd> devices" << std::endl
;
36 cout
<< " --pgp-bits <bits> pgp_num map attribute will be shifted by <bits>" << std::endl
;
37 cout
<< " --pg-bits <bits> pg_num map attribute will be shifted by <bits>" << std::endl
;
38 cout
<< " --clobber allows osdmaptool to overwrite <mapfilename> if it already exists" << std::endl
;
39 cout
<< " --export-crush <file> write osdmap's crush map to <file>" << std::endl
;
40 cout
<< " --import-crush <file> replace osdmap's crush map with <file>" << std::endl
;
41 cout
<< " --health dump health checks" << std::endl
;
42 cout
<< " --test-map-pgs [--pool <poolid>] [--pg_num <pg_num>] [--range-first <first> --range-last <last>] map all pgs" << std::endl
;
43 cout
<< " --test-map-pgs-dump [--pool <poolid>] [--range-first <first> --range-last <last>] map all pgs" << std::endl
;
44 cout
<< " --test-map-pgs-dump-all [--pool <poolid>] [--range-first <first> --range-last <last>] map all pgs to osds" << std::endl
;
45 cout
<< " --mark-up-in mark osds up and in (but do not persist)" << std::endl
;
46 cout
<< " --mark-out <osdid> mark an osd as out (but do not persist)" << std::endl
;
47 cout
<< " --mark-up <osdid> mark an osd as up (but do not persist)" << std::endl
;
48 cout
<< " --mark-in <osdid> mark an osd as in (but do not persist)" << std::endl
;
49 cout
<< " --with-default-pool include default pool when creating map" << std::endl
;
50 cout
<< " --clear-temp clear pg_temp and primary_temp" << std::endl
;
51 cout
<< " --clean-temps clean pg_temps" << std::endl
;
52 cout
<< " --test-random do random placements" << std::endl
;
53 cout
<< " --test-map-pg <pgid> map a pgid to osds" << std::endl
;
54 cout
<< " --test-map-object <objectname> [--pool <poolid>] map an object to osds"
56 cout
<< " --upmap-cleanup <file> clean up pg_upmap[_items] entries, writing" << std::endl
;
57 cout
<< " commands to <file> [default: - for stdout]" << std::endl
;
58 cout
<< " --upmap <file> calculate pg upmap entries to balance pg layout" << std::endl
;
59 cout
<< " writing commands to <file> [default: - for stdout]" << std::endl
;
60 cout
<< " --upmap-max <max-count> set max upmap entries to calculate [default: 10]" << std::endl
;
61 cout
<< " --upmap-deviation <max-deviation>" << std::endl
;
62 cout
<< " max deviation from target [default: 5]" << std::endl
;
63 cout
<< " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl
;
64 cout
<< " --upmap-active Act like an active balancer, keep applying changes until balanced" << std::endl
;
65 cout
<< " --dump <format> displays the map in plain text when <format> is 'plain', 'json' if specified format is not supported" << std::endl
;
66 cout
<< " --tree displays a tree of the map" << std::endl
;
67 cout
<< " --test-crush [--range-first <first> --range-last <last>] map pgs to acting osds" << std::endl
;
68 cout
<< " --adjust-crush-weight <osdid:weight>[,<osdid:weight>,<...>] change <osdid> CRUSH <weight> (but do not persist)" << std::endl
;
69 cout
<< " --save write modified osdmap with upmap or crush-adjust changes" << std::endl
;
70 cout
<< " --read <file> calculate pg upmap entries to balance pg primaries" << std::endl
;
71 cout
<< " --read-pool <poolname> specify which pool the read balancer should adjust" << std::endl
;
72 cout
<< " --vstart prefix upmap and read output with './bin/'" << std::endl
;
76 void print_inc_upmaps(const OSDMap::Incremental
& pending_inc
, int fd
, bool vstart
, std::string cmd
="ceph")
79 std::string prefix
= "./bin/";
80 for (auto& i
: pending_inc
.old_pg_upmap
) {
83 ss
<< cmd
+ " osd rm-pg-upmap " << i
<< std::endl
;
85 for (auto& i
: pending_inc
.new_pg_upmap
) {
88 ss
<< cmd
+ " osd pg-upmap " << i
.first
;
89 for (auto osd
: i
.second
) {
94 for (auto& i
: pending_inc
.old_pg_upmap_items
) {
97 ss
<< cmd
+ " osd rm-pg-upmap-items " << i
<< std::endl
;
99 for (auto& i
: pending_inc
.new_pg_upmap_items
) {
102 ss
<< cmd
+ " osd pg-upmap-items " << i
.first
;
103 for (auto p
: i
.second
) {
104 ss
<< " " << p
.first
<< " " << p
.second
;
108 for (auto& i
: pending_inc
.new_pg_upmap_primary
) {
111 ss
<< cmd
+ " osd pg-upmap-primary " << i
.first
<< " " << i
.second
<< std::endl
;
114 int r
= safe_write(fd
, s
.c_str(), s
.size());
116 cerr
<< "error writing output: " << cpp_strerror(r
) << std::endl
;
121 int main(int argc
, const char **argv
)
123 auto args
= argv_to_vec(argc
, argv
);
125 cerr
<< argv
[0] << ": -h or --help for usage" << std::endl
;
128 if (ceph_argparse_need_usage(args
)) {
133 auto cct
= global_init(NULL
, args
, CEPH_ENTITY_TYPE_CLIENT
,
134 CODE_ENVIRONMENT_UTILITY
,
135 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
136 common_init_finish(g_ceph_context
);
138 const char *me
= argv
[0];
142 boost::scoped_ptr
<Formatter
> print_formatter
;
144 boost::scoped_ptr
<Formatter
> tree_formatter
;
145 bool createsimple
= false;
146 bool createpool
= false;
147 bool create_from_conf
= false;
151 bool clobber
= false;
152 bool modified
= false;
153 std::string export_crush
, import_crush
, test_map_pg
, test_map_object
, adjust_crush_weight
;
154 bool test_crush
= false;
155 int range_first
= -1;
158 bool mark_up_in
= false;
162 bool clear_temp
= false;
163 bool clean_temps
= false;
164 bool test_map_pgs
= false;
165 bool test_map_pgs_dump
= false;
166 bool test_random
= false;
167 bool upmap_cleanup
= false;
170 std::string upmap_file
= "-";
172 int upmap_deviation
= 5;
173 bool upmap_active
= false;
174 std::set
<std::string
> upmap_pools
;
175 std::random_device::result_type upmap_seed
;
176 std::random_device::result_type
*upmap_p_seed
= nullptr;
178 std::string read_pool
;
181 bool test_map_pgs_dump_all
= false;
186 std::ostringstream err
;
187 for (std::vector
<const char*>::iterator i
= args
.begin(); i
!= args
.end(); ) {
188 if (ceph_argparse_double_dash(args
, i
)) {
190 } else if (ceph_argparse_flag(args
, i
, "-p", "--print", (char*)NULL
)) {
192 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--dump", (char*)NULL
)) {
194 if (!val
.empty() && val
!= "plain") {
195 print_formatter
.reset(Formatter::create(val
, "", "json"));
197 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--tree", (char*)NULL
)) {
199 if (!val
.empty() && val
!= "plain") {
200 tree_formatter
.reset(Formatter::create(val
, "", "json"));
202 } else if (ceph_argparse_witharg(args
, i
, &pg_bits
, err
, "--osd-pg-bits", (char*)NULL
)) {
203 } else if (ceph_argparse_witharg(args
, i
, &pgp_bits
, err
, "--osd-pgp-bits", (char*)NULL
)) {
204 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap-cleanup", (char*)NULL
)) {
205 upmap_cleanup
= true;
206 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap", (char*)NULL
)) {
207 upmap_cleanup
= true;
209 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--read", (char*)NULL
)) {
211 } else if (ceph_argparse_witharg(args
, i
, &upmap_max
, err
, "--upmap-max", (char*)NULL
)) {
212 } else if (ceph_argparse_witharg(args
, i
, &upmap_deviation
, err
, "--upmap-deviation", (char*)NULL
)) {
213 } else if (ceph_argparse_witharg(args
, i
, (int *)&upmap_seed
, err
, "--upmap-seed", (char*)NULL
)) {
214 upmap_p_seed
= &upmap_seed
;
215 } else if (ceph_argparse_witharg(args
, i
, &val
, "--upmap-pool", (char*)NULL
)) {
216 upmap_pools
.insert(val
);
217 } else if (ceph_argparse_witharg(args
, i
, &val
, "--read-pool", (char*)NULL
)) {
219 } else if (ceph_argparse_witharg(args
, i
, &num_osd
, err
, "--createsimple", (char*)NULL
)) {
220 if (!err
.str().empty()) {
221 cerr
<< err
.str() << std::endl
;
225 } else if (ceph_argparse_flag(args
, i
, "--upmap-active", (char*)NULL
)) {
227 } else if (ceph_argparse_flag(args
, i
, "--health", (char*)NULL
)) {
229 } else if (ceph_argparse_flag(args
, i
, "--with-default-pool", (char*)NULL
)) {
231 } else if (ceph_argparse_flag(args
, i
, "--create-from-conf", (char*)NULL
)) {
232 create_from_conf
= true;
233 } else if (ceph_argparse_flag(args
, i
, "--mark-up-in", (char*)NULL
)) {
235 } else if (ceph_argparse_witharg(args
, i
, &val
, "--mark-out", (char*)NULL
)) {
236 marked_out
= std::stoi(val
);
237 } else if (ceph_argparse_witharg(args
, i
, &val
, "--mark-up", (char*)NULL
)) {
238 marked_up
= std::stod(val
);
239 } else if (ceph_argparse_witharg(args
, i
, &val
, "--mark-in", (char*)NULL
)) {
240 marked_in
= std::stod(val
);
241 } else if (ceph_argparse_flag(args
, i
, "--clear-temp", (char*)NULL
)) {
243 } else if (ceph_argparse_flag(args
, i
, "--clean-temps", (char*)NULL
)) {
245 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs", (char*)NULL
)) {
247 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs-dump", (char*)NULL
)) {
248 test_map_pgs_dump
= true;
249 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs-dump-all", (char*)NULL
)) {
250 test_map_pgs_dump_all
= true;
251 } else if (ceph_argparse_flag(args
, i
, "--test-random", (char*)NULL
)) {
253 } else if (ceph_argparse_flag(args
, i
, "--clobber", (char*)NULL
)) {
255 } else if (ceph_argparse_witharg(args
, i
, &pg_bits
, err
, "--pg_bits", (char*)NULL
)) {
256 if (!err
.str().empty()) {
257 cerr
<< err
.str() << std::endl
;
260 } else if (ceph_argparse_witharg(args
, i
, &pgp_bits
, err
, "--pgp_bits", (char*)NULL
)) {
261 if (!err
.str().empty()) {
262 cerr
<< err
.str() << std::endl
;
265 } else if (ceph_argparse_witharg(args
, i
, &val
, "--export_crush", (char*)NULL
)) {
267 } else if (ceph_argparse_witharg(args
, i
, &val
, "--import_crush", (char*)NULL
)) {
269 } else if (ceph_argparse_witharg(args
, i
, &val
, "--test_map_pg", (char*)NULL
)) {
271 } else if (ceph_argparse_witharg(args
, i
, &val
, "--test_map_object", (char*)NULL
)) {
272 test_map_object
= val
;
273 } else if (ceph_argparse_flag(args
, i
, "--test_crush", (char*)NULL
)) {
275 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--pg_num", (char*)NULL
)) {
277 pg_num
= strict_strtoll(val
.c_str(), 10, &interr
);
278 if (interr
.length() > 0) {
279 cerr
<< "error parsing integer value " << interr
<< std::endl
;
282 } else if (ceph_argparse_witharg(args
, i
, &range_first
, err
, "--range_first", (char*)NULL
)) {
283 } else if (ceph_argparse_witharg(args
, i
, &range_last
, err
, "--range_last", (char*)NULL
)) {
284 } else if (ceph_argparse_witharg(args
, i
, &pool
, err
, "--pool", (char*)NULL
)) {
285 if (!err
.str().empty()) {
286 cerr
<< err
.str() << std::endl
;
289 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--adjust-crush-weight", (char*)NULL
)) {
290 adjust_crush_weight
= val
;
291 } else if (ceph_argparse_flag(args
, i
, "--save", (char*)NULL
)) {
293 } else if (ceph_argparse_flag(args
, i
, "--vstart", (char*)NULL
)) {
300 cerr
<< me
<< ": must specify osdmap filename" << std::endl
;
303 else if (args
.size() > 1) {
304 cerr
<< me
<< ": too many arguments" << std::endl
;
307 if (upmap_deviation
< 1) {
308 cerr
<< me
<< ": upmap-deviation must be >= 1" << std::endl
;
313 if (range_first
>= 0 && range_last
>= 0) {
316 for (int i
=range_first
; i
<= range_last
; i
++) {
320 string error
, s
= f
.str();
321 int r
= bl
.read_file(s
.c_str(), &error
);
323 cerr
<< "unable to read " << s
<< ": " << cpp_strerror(r
) << std::endl
;
326 cout
<< s
<< " got " << bl
.length() << " bytes" << std::endl
;
327 OSDMap
*o
= new OSDMap
;
331 OSDMap::dedup(prev
, o
);
340 cerr
<< me
<< ": osdmap file '" << fn
<< "'" << std::endl
;
344 if (!createsimple
&& !create_from_conf
&& !clobber
) {
346 r
= bl
.read_file(fn
.c_str(), &error
);
351 catch (const buffer::error
&e
) {
352 cerr
<< me
<< ": error decoding osdmap '" << fn
<< "'" << std::endl
;
357 cerr
<< me
<< ": couldn't open " << fn
<< ": " << error
<< std::endl
;
361 else if ((createsimple
|| create_from_conf
) && !clobber
&& ::stat(fn
.c_str(), &st
) == 0) {
362 cerr
<< me
<< ": " << fn
<< " exists, --clobber to overwrite" << std::endl
;
366 if (createsimple
|| create_from_conf
) {
369 cerr
<< me
<< ": osd count must be > 0" << std::endl
;
377 osdmap
.build_simple_with_pool(
378 g_ceph_context
, 0, fsid
, num_osd
, pg_bits
, pgp_bits
);
380 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osd
);
386 cout
<< "marking all OSDs up and in" << std::endl
;
387 int n
= osdmap
.get_max_osd();
388 for (int i
=0; i
<n
; i
++) {
389 osdmap
.set_state(i
, osdmap
.get_state(i
) | CEPH_OSD_UP
);
390 osdmap
.set_weight(i
, CEPH_OSD_IN
);
391 if (osdmap
.crush
->get_item_weight(i
) == 0 ) {
392 osdmap
.crush
->adjust_item_weightf(g_ceph_context
, i
, 1.0);
397 if (marked_out
>=0 && marked_out
< osdmap
.get_max_osd()) {
398 cout
<< "marking OSD@" << marked_out
<< " as out" << std::endl
;
400 osdmap
.set_state(id
, osdmap
.get_state(id
) | CEPH_OSD_UP
);
401 osdmap
.set_weight(id
, CEPH_OSD_OUT
);
404 if (marked_up
>=0 && marked_up
< osdmap
.get_max_osd()) {
405 cout
<< "marking OSD@" << marked_up
<< " as up" << std::endl
;
407 osdmap
.set_state(id
, osdmap
.get_state(id
) | CEPH_OSD_UP
);
410 if (marked_in
>=0 && marked_in
< osdmap
.get_max_osd()) {
411 cout
<< "marking OSD@" << marked_up
<< " as up" << std::endl
;
413 osdmap
.set_weight(id
, CEPH_OSD_IN
);
416 for_each_substr(adjust_crush_weight
, ",", [&](auto osd_to_adjust
) {
417 std::string_view osd_to_weight_delimiter
{":"};
418 size_t pos
= osd_to_adjust
.find(osd_to_weight_delimiter
);
419 if (pos
== osd_to_adjust
.npos
) {
420 cerr
<< me
<< ": use ':' as separator of osd id and its weight"
424 int osd_id
= std::stoi(string(osd_to_adjust
.substr(0, pos
)));
425 float new_weight
= std::stof(string(osd_to_adjust
.substr(pos
+ 1)));
426 osdmap
.crush
->adjust_item_weightf(g_ceph_context
, osd_id
, new_weight
);
427 std::cout
<< "Adjusted osd." << osd_id
<< " CRUSH weight to " << new_weight
430 OSDMap::Incremental inc
;
431 inc
.fsid
= osdmap
.get_fsid();
432 inc
.epoch
= osdmap
.get_epoch() + 1;
433 osdmap
.apply_incremental(inc
);
439 cout
<< "clearing pg/primary temp" << std::endl
;
443 cout
<< "cleaning pg temps" << std::endl
;
444 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
446 tmpmap
.deepish_copy_from(osdmap
);
447 tmpmap
.apply_incremental(pending_inc
);
448 OSDMap::clean_temps(g_ceph_context
, osdmap
, tmpmap
, &pending_inc
);
450 int upmap_fd
= STDOUT_FILENO
;
451 if (upmap
|| upmap_cleanup
|| read
) {
452 if (upmap_file
!= "-") {
453 upmap_fd
= ::open(upmap_file
.c_str(), O_CREAT
|O_WRONLY
|O_TRUNC
, 0644);
455 cerr
<< "error opening " << upmap_file
<< ": " << cpp_strerror(errno
)
459 cout
<< "writing upmap command output to: " << upmap_file
<< std::endl
;
463 cout
<< "checking for upmap cleanups" << std::endl
;
464 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
465 pending_inc
.fsid
= osdmap
.get_fsid();
466 int r
= osdmap
.clean_pg_upmaps(g_ceph_context
, &pending_inc
);
468 print_inc_upmaps(pending_inc
, upmap_fd
, vstart
);
469 r
= osdmap
.apply_incremental(pending_inc
);
474 int64_t pid
= osdmap
.lookup_pg_pool_name(read_pool
);
476 cerr
<< " pool " << read_pool
<< " does not exist" << std::endl
;
480 const pg_pool_t
* pool
= osdmap
.get_pg_pool(pid
);
481 if (! pool
->is_replicated()) {
482 cerr
<< read_pool
<< " is an erasure coded pool; "
483 << "please try again with a replicated pool." << std::endl
;
488 tmp_osd_map
.deepish_copy_from(osdmap
);
490 // Gather BEFORE info
491 map
<uint64_t,set
<pg_t
>> pgs_by_osd
;
492 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd
;
493 map
<uint64_t,set
<pg_t
>> acting_prims_by_osd
;
494 pgs_by_osd
= tmp_osd_map
.get_pgs_by_osd(g_ceph_context
, pid
, &prim_pgs_by_osd
, &acting_prims_by_osd
);
495 OSDMap::read_balance_info_t rb_info
;
496 tmp_osd_map
.calc_read_balance_score(g_ceph_context
, pid
, &rb_info
);
497 float read_balance_score_before
= rb_info
.adjusted_score
;
498 ceph_assert(read_balance_score_before
>= 0);
500 // Calculate read balancer
501 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
502 int num_changes
= osdmap
.balance_primaries(g_ceph_context
, pid
, &pending_inc
, tmp_osd_map
);
504 if (num_changes
< 0) {
505 cerr
<< "Error balancing primaries. Rerun with at least --debug-osd=10 for more details." << std::endl
;
510 map
<uint64_t,set
<pg_t
>> pgs_by_osd_2
;
511 map
<uint64_t,set
<pg_t
>> prim_pgs_by_osd_2
;
512 map
<uint64_t,set
<pg_t
>> acting_prims_by_osd_2
;
513 pgs_by_osd_2
= tmp_osd_map
.get_pgs_by_osd(g_ceph_context
, pid
, &prim_pgs_by_osd_2
, &acting_prims_by_osd_2
);
514 tmp_osd_map
.calc_read_balance_score(g_ceph_context
, pid
, &rb_info
);
515 float read_balance_score_after
= rb_info
.adjusted_score
;
516 ceph_assert(read_balance_score_after
>= 0);
518 if (num_changes
> 0) {
520 cout
<< "---------- BEFORE ------------ \n";
521 for (auto & [osd
, pgs
] : prim_pgs_by_osd
) {
522 cout
<< " osd." << osd
<< " | primary affinity: " << tmp_osd_map
.get_primary_affinityf(osd
) << " | number of prims: " << pgs
.size() << "\n";
525 cout
<< "read_balance_score of '" << read_pool
<< "': " << read_balance_score_before
<< "\n\n\n";
527 cout
<< "---------- AFTER ------------ \n";
528 for (auto & [osd
, pgs
] : prim_pgs_by_osd_2
) {
529 cout
<< " osd." << osd
<< " | primary affinity: " << tmp_osd_map
.get_primary_affinityf(osd
) << " | number of prims: " << pgs
.size() << "\n";
532 cout
<< "read_balance_score of '" << read_pool
<< "': " << read_balance_score_after
<< "\n\n\n";
533 cout
<< "num changes: " << num_changes
<< "\n";
535 print_inc_upmaps(pending_inc
, upmap_fd
, vstart
);
537 cout
<< " Unable to find further optimization, or distribution is already perfect\n";
541 cout
<< "upmap, max-count " << upmap_max
542 << ", max deviation " << upmap_deviation
544 vector
<int64_t> pools
;
545 set
<int64_t> upmap_pool_nums
;
546 for (auto& s
: upmap_pools
) {
547 int64_t p
= osdmap
.lookup_pg_pool_name(s
);
549 cerr
<< " pool " << s
<< " does not exist" << std::endl
;
553 upmap_pool_nums
.insert(p
);
555 if (!pools
.empty()) {
556 cout
<< " limiting to pools " << upmap_pools
<< " (" << pools
<< ")"
559 mempool::osdmap::map
<int64_t,pg_pool_t
> opools
= osdmap
.get_pools();
560 for (auto& i
: opools
) {
561 pools
.push_back(i
.first
);
565 cout
<< "No pools available" << std::endl
;
569 struct timespec round_start
;
570 [[maybe_unused
]] int r
= clock_gettime(CLOCK_MONOTONIC
, &round_start
);
574 std::shuffle(pools
.begin(), pools
.end(), std::mt19937
{rd()});
577 cout
<< osdmap
.get_pool_name(i
) << " ";
579 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
580 pending_inc
.fsid
= osdmap
.get_fsid();
582 int left
= upmap_max
;
583 struct timespec begin
, end
;
584 r
= clock_gettime(CLOCK_MONOTONIC
, &begin
);
586 for (auto& i
: pools
) {
587 set
<int64_t> one_pool
;
589 //TODO: Josh: Add a function on the seed for multiple iterations.
590 int did
= osdmap
.calc_pg_upmaps(
591 g_ceph_context
, upmap_deviation
,
593 &pending_inc
, upmap_p_seed
);
598 if (upmap_p_seed
!= nullptr) {
602 r
= clock_gettime(CLOCK_MONOTONIC
, &end
);
604 cout
<< "prepared " << total_did
<< "/" << upmap_max
<< " changes" << std::endl
;
605 float elapsed_time
= (end
.tv_sec
- begin
.tv_sec
) + 1.0e-9*(end
.tv_nsec
- begin
.tv_nsec
);
607 cout
<< "Time elapsed " << elapsed_time
<< " secs" << std::endl
;
609 print_inc_upmaps(pending_inc
, upmap_fd
, vstart
);
610 if (save
|| upmap_active
) {
611 int r
= osdmap
.apply_incremental(pending_inc
);
617 cout
<< "Unable to find further optimization, "
618 << "or distribution is already perfect"
621 map
<int,set
<pg_t
>> pgs_by_osd
;
622 for (auto& i
: osdmap
.get_pools()) {
623 if (!upmap_pool_nums
.empty() && !upmap_pool_nums
.count(i
.first
))
625 for (unsigned ps
= 0; ps
< i
.second
.get_pg_num(); ++ps
) {
626 pg_t
pg(ps
, i
.first
);
628 osdmap
.pg_to_up_acting_osds(pg
, &up
, nullptr, nullptr, nullptr);
629 //ldout(cct, 20) << __func__ << " " << pg << " up " << up << dendl;
630 for (auto osd
: up
) {
631 if (osd
!= CRUSH_ITEM_NONE
)
632 pgs_by_osd
[osd
].insert(pg
);
636 for (auto& i
: pgs_by_osd
)
637 cout
<< "osd." << i
.first
<< " pgs " << i
.second
.size() << std::endl
;
638 float elapsed_time
= (end
.tv_sec
- round_start
.tv_sec
) + 1.0e-9*(end
.tv_nsec
- round_start
.tv_nsec
);
639 cout
<< "Total time elapsed " << elapsed_time
<< " secs, " << rounds
<< " rounds" << std::endl
;
644 } while(upmap_active
);
647 if (upmap_file
!= "-") {
651 if (!import_crush
.empty()) {
654 r
= cbl
.read_file(import_crush
.c_str(), &error
);
656 cerr
<< me
<< ": error reading crush map from " << import_crush
657 << ": " << error
<< std::endl
;
663 auto p
= cbl
.cbegin();
666 if (cw
.get_max_devices() > osdmap
.get_max_osd()) {
667 cerr
<< me
<< ": crushmap max_devices " << cw
.get_max_devices()
668 << " > osdmap max_osd " << osdmap
.get_max_osd() << std::endl
;
673 OSDMap::Incremental inc
;
674 inc
.fsid
= osdmap
.get_fsid();
675 inc
.epoch
= osdmap
.get_epoch()+1;
677 osdmap
.apply_incremental(inc
);
678 cout
<< me
<< ": imported " << cbl
.length() << " byte crush map from " << import_crush
<< std::endl
;
682 if (!export_crush
.empty()) {
684 osdmap
.crush
->encode(cbl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
685 r
= cbl
.write_file(export_crush
.c_str());
687 cerr
<< me
<< ": error writing crush map to " << import_crush
<< std::endl
;
690 cout
<< me
<< ": exported crush map to " << export_crush
<< std::endl
;
693 if (!test_map_object
.empty()) {
694 object_t
oid(test_map_object
);
696 cout
<< me
<< ": assuming pool 1 (use --pool to override)" << std::endl
;
699 if (!osdmap
.have_pg_pool(pool
)) {
700 cerr
<< "There is no pool " << pool
<< std::endl
;
703 object_locator_t
loc(pool
);
704 pg_t raw_pgid
= osdmap
.object_locator_to_pg(oid
, loc
);
705 pg_t pgid
= osdmap
.raw_pg_to_pg(raw_pgid
);
708 osdmap
.pg_to_acting_osds(pgid
, acting
);
709 cout
<< " object '" << oid
714 if (!test_map_pg
.empty()) {
716 if (!pgid
.parse(test_map_pg
.c_str())) {
717 cerr
<< me
<< ": failed to parse pg '" << test_map_pg
<< std::endl
;
720 cout
<< " parsed '" << test_map_pg
<< "' -> " << pgid
<< std::endl
;
722 vector
<int> raw
, up
, acting
;
723 int raw_primary
, up_primary
, acting_primary
;
724 osdmap
.pg_to_raw_osds(pgid
, &raw
, &raw_primary
);
725 osdmap
.pg_to_up_acting_osds(pgid
, &up
, &up_primary
,
726 &acting
, &acting_primary
);
727 cout
<< pgid
<< " raw (" << raw
<< ", p" << raw_primary
728 << ") up (" << up
<< ", p" << up_primary
729 << ") acting (" << acting
<< ", p" << acting_primary
<< ")"
732 if (test_map_pgs
|| test_map_pgs_dump
|| test_map_pgs_dump_all
) {
733 if (pool
!= -1 && !osdmap
.have_pg_pool(pool
)) {
734 cerr
<< "There is no pool " << pool
<< std::endl
;
737 int n
= osdmap
.get_max_osd();
738 vector
<int> count(n
, 0);
739 vector
<int> first_count(n
, 0);
740 vector
<int> primary_count(n
, 0);
741 vector
<int> size(30, 0);
745 auto& pools
= osdmap
.get_pools();
746 for (auto p
= pools
.begin(); p
!= pools
.end(); ++p
) {
747 if (pool
!= -1 && p
->first
!= pool
)
750 p
->second
.set_pg_num(pg_num
);
752 cout
<< "pool " << p
->first
753 << " pg_num " << p
->second
.get_pg_num() << std::endl
;
754 for (unsigned i
= 0; i
< p
->second
.get_pg_num(); ++i
) {
755 pg_t pgid
= pg_t(i
, p
->first
);
757 vector
<int> osds
, raw
, up
, acting
;
758 int primary
, calced_primary
, up_primary
, acting_primary
;
760 osds
.resize(p
->second
.size
);
761 for (unsigned i
=0; i
<osds
.size(); ++i
) {
762 osds
[i
] = rand() % osdmap
.get_max_osd();
765 } else if (test_map_pgs_dump_all
) {
766 osdmap
.pg_to_raw_osds(pgid
, &raw
, &calced_primary
);
767 osdmap
.pg_to_up_acting_osds(pgid
, &up
, &up_primary
,
768 &acting
, &acting_primary
);
770 primary
= acting_primary
;
772 osdmap
.pg_to_acting_osds(pgid
, &osds
, &primary
);
775 if ((unsigned)max_size
< osds
.size())
776 max_size
= osds
.size();
778 if (test_map_pgs_dump
) {
779 cout
<< pgid
<< "\t" << osds
<< "\t" << primary
<< std::endl
;
780 } else if (test_map_pgs_dump_all
) {
781 cout
<< pgid
<< " raw (" << raw
<< ", p" << calced_primary
782 << ") up (" << up
<< ", p" << up_primary
783 << ") acting (" << acting
<< ", p" << acting_primary
<< ")"
787 for (unsigned i
=0; i
<osds
.size(); i
++) {
788 //cout << " rep " << i << " on " << osds[i] << std::endl;
789 if (osds
[i
] != CRUSH_ITEM_NONE
)
792 if (osds
.size() && osds
[0] != CRUSH_ITEM_NONE
)
793 first_count
[osds
[0]]++;
795 primary_count
[primary
]++;
803 cout
<< "#osd\tcount\tfirst\tprimary\tc wt\twt\n";
804 for (int i
=0; i
<n
; i
++) {
805 if (!osdmap
.is_in(i
))
807 if (osdmap
.crush
->get_item_weight(i
) <= 0)
812 << "\t" << first_count
[i
]
813 << "\t" << primary_count
[i
]
814 << "\t" << osdmap
.crush
->get_item_weightf(i
)
815 << "\t" << osdmap
.get_weightf(i
)
820 count
[i
] < count
[min_osd
]))
824 count
[i
] > count
[max_osd
]))
828 uint64_t avg
= in
? (total
/ in
) : 0;
830 for (int i
=0; i
<n
; i
++) {
831 if (!osdmap
.is_in(i
))
833 if (osdmap
.crush
->get_item_weight(i
) <= 0)
835 dev
+= (avg
- count
[i
]) * (avg
- count
[i
]);
840 //double edev = sqrt(pgavg) * (double)avg / pgavg;
841 double edev
= sqrt((double)total
/ (double)in
* (1.0 - (1.0 / (double)in
)));
842 cout
<< " in " << in
<< std::endl
;
843 cout
<< " avg " << avg
845 << " (" << (dev
/avg
) << "x)"
846 << " (expected " << edev
<< " " << (edev
/avg
) << "x))"
850 cout
<< " min osd." << min_osd
<< " " << count
[min_osd
] << std::endl
;
852 cout
<< " max osd." << max_osd
<< " " << count
[max_osd
] << std::endl
;
854 for (int i
=0; i
<=max_size
; i
++) {
856 cout
<< "size " << i
<< "\t" << size
[i
] << std::endl
;
862 cout
<< "pass " << ++pass
<< std::endl
;
864 ceph::unordered_map
<pg_t
,vector
<int> > m
;
865 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
866 p
!= osdmap
.get_pools().end();
868 const pg_pool_t
*pool
= osdmap
.get_pg_pool(p
->first
);
869 for (ps_t ps
= 0; ps
< pool
->get_pg_num(); ps
++) {
870 pg_t
pgid(ps
, p
->first
);
871 for (int i
=0; i
<100; i
++) {
872 cout
<< pgid
<< " attempt " << i
<< std::endl
;
875 osdmap
.pg_to_acting_osds(pgid
, r
);
876 //cout << pgid << " " << r << std::endl;
879 cout
<< pgid
<< " had " << m
[pgid
] << " now " << r
<< std::endl
;
890 if (!print
&& !health
&& !tree
&& !modified
&&
891 export_crush
.empty() && import_crush
.empty() &&
892 test_map_pg
.empty() && test_map_object
.empty() &&
893 !test_map_pgs
&& !test_map_pgs_dump
&& !test_map_pgs_dump_all
&&
894 adjust_crush_weight
.empty() && !upmap
&& !upmap_cleanup
&& !read
) {
895 cerr
<< me
<< ": no action specified?" << std::endl
;
903 health_check_map_t checks
;
904 osdmap
.check_health(cct
.get(), &checks
);
905 JSONFormatter
jf(true);
906 jf
.dump_object("checks", checks
);
910 if (print_formatter
) {
911 print_formatter
->open_object_section("osdmap");
912 osdmap
.dump(print_formatter
.get());
913 print_formatter
->close_section();
914 print_formatter
->flush(cout
);
916 osdmap
.print(cct
.get(), cout
);
921 if (tree_formatter
) {
922 tree_formatter
->open_object_section("tree");
923 osdmap
.print_tree(tree_formatter
.get(), NULL
);
924 tree_formatter
->close_section();
925 tree_formatter
->flush(cout
);
928 osdmap
.print_tree(NULL
, &cout
);
933 osdmap
.encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
| CEPH_FEATURE_RESERVED
);
936 cout
<< me
<< ": writing epoch " << osdmap
.get_epoch()
939 int r
= bl
.write_file(fn
.c_str());
941 cerr
<< "osdmaptool: error writing to '" << fn
<< "': "
942 << cpp_strerror(r
) << std::endl
;