1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include "common/ceph_argparse.h"
19 #include "common/errno.h"
20 #include "common/safe_io.h"
21 #include "mon/health_check.h"
23 #include "global/global_init.h"
24 #include "osd/OSDMap.h"
30 cout
<< " usage: [--print] [--createsimple <numosd> [--clobber] [--pg_bits <bitsperosd>]] <mapfilename>" << std::endl
;
31 cout
<< " --export-crush <file> write osdmap's crush map to <file>" << std::endl
;
32 cout
<< " --import-crush <file> replace osdmap's crush map with <file>" << std::endl
;
33 cout
<< " --test-map-pgs [--pool <poolid>] [--pg_num <pg_num>] map all pgs" << std::endl
;
34 cout
<< " --test-map-pgs-dump [--pool <poolid>] map all pgs" << std::endl
;
35 cout
<< " --test-map-pgs-dump-all [--pool <poolid>] map all pgs to osds" << std::endl
;
36 cout
<< " --health dump health checks" << std::endl
;
37 cout
<< " --mark-up-in mark osds up and in (but do not persist)" << std::endl
;
38 cout
<< " --with-default-pool include default pool when creating map" << std::endl
;
39 cout
<< " --clear-temp clear pg_temp and primary_temp" << std::endl
;
40 cout
<< " --test-random do random placements" << std::endl
;
41 cout
<< " --test-map-pg <pgid> map a pgid to osds" << std::endl
;
42 cout
<< " --test-map-object <objectname> [--pool <poolid>] map an object to osds"
44 cout
<< " --upmap-cleanup <file> clean up pg_upmap[_items] entries, writing" << std::endl
;
45 cout
<< " commands to <file> [default: - for stdout]" << std::endl
;
46 cout
<< " --upmap <file> calculate pg upmap entries to balance pg layout" << std::endl
;
47 cout
<< " writing commands to <file> [default: - for stdout]" << std::endl
;
48 cout
<< " --upmap-max <max-count> set max upmap entries to calculate [default: 100]" << std::endl
;
49 cout
<< " --upmap-deviation <max-deviation>" << std::endl
;
50 cout
<< " max deviation from target [default: .01]" << std::endl
;
51 cout
<< " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl
;
52 cout
<< " --upmap-save write modified OSDMap with upmap changes" << std::endl
;
56 void print_inc_upmaps(const OSDMap::Incremental
& pending_inc
, int fd
)
59 for (auto& i
: pending_inc
.old_pg_upmap
) {
60 ss
<< "ceph osd rm-pg-upmap " << i
<< std::endl
;
62 for (auto& i
: pending_inc
.new_pg_upmap
) {
63 ss
<< "ceph osd pg-upmap " << i
.first
;
64 for (auto osd
: i
.second
) {
69 for (auto& i
: pending_inc
.old_pg_upmap_items
) {
70 ss
<< "ceph osd rm-pg-upmap-items " << i
<< std::endl
;
72 for (auto& i
: pending_inc
.new_pg_upmap_items
) {
73 ss
<< "ceph osd pg-upmap-items " << i
.first
;
74 for (auto p
: i
.second
) {
75 ss
<< " " << p
.first
<< " " << p
.second
;
80 int r
= safe_write(fd
, s
.c_str(), s
.size());
82 cerr
<< "error writing output: " << cpp_strerror(r
) << std::endl
;
87 int main(int argc
, const char **argv
)
89 vector
<const char*> args
;
90 argv_to_vec(argc
, argv
, args
);
93 auto cct
= global_init(NULL
, args
, CEPH_ENTITY_TYPE_CLIENT
,
94 CODE_ENVIRONMENT_UTILITY
,
95 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
96 common_init_finish(g_ceph_context
);
98 const char *me
= argv
[0];
102 boost::scoped_ptr
<Formatter
> print_formatter
;
104 boost::scoped_ptr
<Formatter
> tree_formatter
;
105 bool createsimple
= false;
106 bool createpool
= false;
107 bool create_from_conf
= false;
109 int pg_bits
= g_conf
->osd_pg_bits
;
110 int pgp_bits
= g_conf
->osd_pgp_bits
;
111 bool clobber
= false;
112 bool modified
= false;
113 std::string export_crush
, import_crush
, test_map_pg
, test_map_object
;
114 bool test_crush
= false;
115 int range_first
= -1;
118 bool mark_up_in
= false;
119 bool clear_temp
= false;
120 bool test_map_pgs
= false;
121 bool test_map_pgs_dump
= false;
122 bool test_random
= false;
123 bool upmap_cleanup
= false;
125 bool upmap_save
= false;
127 std::string upmap_file
= "-";
129 float upmap_deviation
= .01;
130 std::set
<std::string
> upmap_pools
;
132 bool test_map_pgs_dump_all
= false;
135 std::ostringstream err
;
136 for (std::vector
<const char*>::iterator i
= args
.begin(); i
!= args
.end(); ) {
137 if (ceph_argparse_double_dash(args
, i
)) {
139 } else if (ceph_argparse_flag(args
, i
, "-h", "--help", (char*)NULL
)) {
141 } else if (ceph_argparse_flag(args
, i
, "-p", "--print", (char*)NULL
)) {
143 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--dump", (char*)NULL
)) {
145 if (!val
.empty() && val
!= "plain") {
146 print_formatter
.reset(Formatter::create(val
, "", "json"));
148 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--tree", (char*)NULL
)) {
150 if (!val
.empty() && val
!= "plain") {
151 tree_formatter
.reset(Formatter::create(val
, "", "json"));
153 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap-cleanup", (char*)NULL
)) {
154 upmap_cleanup
= true;
155 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap-save", (char*)NULL
)) {
157 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap", (char*)NULL
)) {
158 upmap_cleanup
= true;
160 } else if (ceph_argparse_witharg(args
, i
, &upmap_max
, err
, "--upmap-max", (char*)NULL
)) {
161 } else if (ceph_argparse_witharg(args
, i
, &upmap_deviation
, err
, "--upmap-deviation", (char*)NULL
)) {
162 } else if (ceph_argparse_witharg(args
, i
, &val
, "--upmap-pool", (char*)NULL
)) {
163 upmap_pools
.insert(val
);
164 } else if (ceph_argparse_witharg(args
, i
, &num_osd
, err
, "--createsimple", (char*)NULL
)) {
165 if (!err
.str().empty()) {
166 cerr
<< err
.str() << std::endl
;
170 } else if (ceph_argparse_flag(args
, i
, "--health", (char*)NULL
)) {
172 } else if (ceph_argparse_flag(args
, i
, "--with-default-pool", (char*)NULL
)) {
174 } else if (ceph_argparse_flag(args
, i
, "--create-from-conf", (char*)NULL
)) {
175 create_from_conf
= true;
176 } else if (ceph_argparse_flag(args
, i
, "--mark-up-in", (char*)NULL
)) {
178 } else if (ceph_argparse_flag(args
, i
, "--clear-temp", (char*)NULL
)) {
180 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs", (char*)NULL
)) {
182 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs-dump", (char*)NULL
)) {
183 test_map_pgs_dump
= true;
184 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs-dump-all", (char*)NULL
)) {
185 test_map_pgs_dump_all
= true;
186 } else if (ceph_argparse_flag(args
, i
, "--test-random", (char*)NULL
)) {
188 } else if (ceph_argparse_flag(args
, i
, "--clobber", (char*)NULL
)) {
190 } else if (ceph_argparse_witharg(args
, i
, &pg_bits
, err
, "--pg_bits", (char*)NULL
)) {
191 if (!err
.str().empty()) {
192 cerr
<< err
.str() << std::endl
;
195 } else if (ceph_argparse_witharg(args
, i
, &pgp_bits
, err
, "--pgp_bits", (char*)NULL
)) {
196 if (!err
.str().empty()) {
197 cerr
<< err
.str() << std::endl
;
200 } else if (ceph_argparse_witharg(args
, i
, &val
, "--export_crush", (char*)NULL
)) {
202 } else if (ceph_argparse_witharg(args
, i
, &val
, "--import_crush", (char*)NULL
)) {
204 } else if (ceph_argparse_witharg(args
, i
, &val
, "--test_map_pg", (char*)NULL
)) {
206 } else if (ceph_argparse_witharg(args
, i
, &val
, "--test_map_object", (char*)NULL
)) {
207 test_map_object
= val
;
208 } else if (ceph_argparse_flag(args
, i
, "--test_crush", (char*)NULL
)) {
210 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--pg_num", (char*)NULL
)) {
212 pg_num
= strict_strtoll(val
.c_str(), 10, &interr
);
213 if (interr
.length() > 0) {
214 cerr
<< "error parsing integer value " << interr
<< std::endl
;
217 } else if (ceph_argparse_witharg(args
, i
, &range_first
, err
, "--range_first", (char*)NULL
)) {
218 } else if (ceph_argparse_witharg(args
, i
, &range_last
, err
, "--range_last", (char*)NULL
)) {
219 } else if (ceph_argparse_witharg(args
, i
, &pool
, err
, "--pool", (char*)NULL
)) {
220 if (!err
.str().empty()) {
221 cerr
<< err
.str() << std::endl
;
229 cerr
<< me
<< ": must specify osdmap filename" << std::endl
;
232 else if (args
.size() > 1) {
233 cerr
<< me
<< ": too many arguments" << std::endl
;
238 if (range_first
>= 0 && range_last
>= 0) {
241 for (int i
=range_first
; i
<= range_last
; i
++) {
245 string error
, s
= f
.str();
246 int r
= bl
.read_file(s
.c_str(), &error
);
248 cerr
<< "unable to read " << s
<< ": " << cpp_strerror(r
) << std::endl
;
251 cout
<< s
<< " got " << bl
.length() << " bytes" << std::endl
;
252 OSDMap
*o
= new OSDMap
;
256 OSDMap::dedup(prev
, o
);
265 cerr
<< me
<< ": osdmap file '" << fn
<< "'" << std::endl
;
269 if (!createsimple
&& !create_from_conf
&& !clobber
) {
271 r
= bl
.read_file(fn
.c_str(), &error
);
276 catch (const buffer::error
&e
) {
277 cerr
<< me
<< ": error decoding osdmap '" << fn
<< "'" << std::endl
;
282 cerr
<< me
<< ": couldn't open " << fn
<< ": " << error
<< std::endl
;
286 else if ((createsimple
|| create_from_conf
) && !clobber
&& ::stat(fn
.c_str(), &st
) == 0) {
287 cerr
<< me
<< ": " << fn
<< " exists, --clobber to overwrite" << std::endl
;
291 if (createsimple
|| create_from_conf
) {
294 cerr
<< me
<< ": osd count must be > 0" << std::endl
;
301 memset(&fsid
, 0, sizeof(uuid_d
));
303 osdmap
.build_simple_with_pool(
304 g_ceph_context
, 0, fsid
, num_osd
, pg_bits
, pgp_bits
);
306 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osd
);
312 cout
<< "marking all OSDs up and in" << std::endl
;
313 int n
= osdmap
.get_max_osd();
314 for (int i
=0; i
<n
; i
++) {
315 osdmap
.set_state(i
, osdmap
.get_state(i
) | CEPH_OSD_UP
);
316 osdmap
.set_weight(i
, CEPH_OSD_IN
);
317 osdmap
.crush
->adjust_item_weightf(g_ceph_context
, i
, 1.0);
321 cout
<< "clearing pg/primary temp" << std::endl
;
324 int upmap_fd
= STDOUT_FILENO
;
325 if (upmap
|| upmap_cleanup
) {
326 if (upmap_file
!= "-") {
327 upmap_fd
= ::open(upmap_file
.c_str(), O_CREAT
|O_WRONLY
, 0644);
329 cerr
<< "error opening " << upmap_file
<< ": " << cpp_strerror(errno
)
333 cout
<< "writing upmap command output to: " << upmap_file
<< std::endl
;
337 cout
<< "checking for upmap cleanups" << std::endl
;
338 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
339 pending_inc
.fsid
= osdmap
.get_fsid();
340 int r
= osdmap
.clean_pg_upmaps(g_ceph_context
, &pending_inc
);
342 print_inc_upmaps(pending_inc
, upmap_fd
);
343 r
= osdmap
.apply_incremental(pending_inc
);
348 cout
<< "upmap, max-count " << upmap_max
349 << ", max deviation " << upmap_deviation
351 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
352 pending_inc
.fsid
= osdmap
.get_fsid();
354 for (auto& s
: upmap_pools
) {
355 int64_t p
= osdmap
.lookup_pg_pool_name(s
);
357 cerr
<< " pool '" << s
<< "' does not exist" << std::endl
;
363 cout
<< " limiting to pools " << upmap_pools
<< " (" << pools
<< ")"
365 int changed
= osdmap
.calc_pg_upmaps(
366 g_ceph_context
, upmap_deviation
,
370 print_inc_upmaps(pending_inc
, upmap_fd
);
372 int r
= osdmap
.apply_incremental(pending_inc
);
377 cout
<< "no upmaps proposed" << std::endl
;
380 if (upmap_file
!= "-") {
384 if (!import_crush
.empty()) {
387 r
= cbl
.read_file(import_crush
.c_str(), &error
);
389 cerr
<< me
<< ": error reading crush map from " << import_crush
390 << ": " << error
<< std::endl
;
396 bufferlist::iterator p
= cbl
.begin();
399 if (cw
.get_max_devices() > osdmap
.get_max_osd()) {
400 cerr
<< me
<< ": crushmap max_devices " << cw
.get_max_devices()
401 << " > osdmap max_osd " << osdmap
.get_max_osd() << std::endl
;
406 OSDMap::Incremental inc
;
407 inc
.fsid
= osdmap
.get_fsid();
408 inc
.epoch
= osdmap
.get_epoch()+1;
410 osdmap
.apply_incremental(inc
);
411 cout
<< me
<< ": imported " << cbl
.length() << " byte crush map from " << import_crush
<< std::endl
;
415 if (!export_crush
.empty()) {
417 osdmap
.crush
->encode(cbl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
418 r
= cbl
.write_file(export_crush
.c_str());
420 cerr
<< me
<< ": error writing crush map to " << import_crush
<< std::endl
;
423 cout
<< me
<< ": exported crush map to " << export_crush
<< std::endl
;
426 if (!test_map_object
.empty()) {
427 object_t
oid(test_map_object
);
429 cout
<< me
<< ": assuming pool 1 (use --pool to override)" << std::endl
;
432 if (!osdmap
.have_pg_pool(pool
)) {
433 cerr
<< "There is no pool " << pool
<< std::endl
;
436 object_locator_t
loc(pool
);
437 pg_t raw_pgid
= osdmap
.object_locator_to_pg(oid
, loc
);
438 pg_t pgid
= osdmap
.raw_pg_to_pg(raw_pgid
);
441 osdmap
.pg_to_acting_osds(pgid
, acting
);
442 cout
<< " object '" << oid
447 if (!test_map_pg
.empty()) {
449 if (!pgid
.parse(test_map_pg
.c_str())) {
450 cerr
<< me
<< ": failed to parse pg '" << test_map_pg
<< std::endl
;
453 cout
<< " parsed '" << test_map_pg
<< "' -> " << pgid
<< std::endl
;
455 vector
<int> raw
, up
, acting
;
456 int raw_primary
, up_primary
, acting_primary
;
457 osdmap
.pg_to_raw_osds(pgid
, &raw
, &raw_primary
);
458 osdmap
.pg_to_up_acting_osds(pgid
, &up
, &up_primary
,
459 &acting
, &acting_primary
);
460 cout
<< pgid
<< " raw (" << raw
<< ", p" << raw_primary
461 << ") up (" << up
<< ", p" << up_primary
462 << ") acting (" << acting
<< ", p" << acting_primary
<< ")"
465 if (test_map_pgs
|| test_map_pgs_dump
|| test_map_pgs_dump_all
) {
466 if (pool
!= -1 && !osdmap
.have_pg_pool(pool
)) {
467 cerr
<< "There is no pool " << pool
<< std::endl
;
470 int n
= osdmap
.get_max_osd();
471 vector
<int> count(n
, 0);
472 vector
<int> first_count(n
, 0);
473 vector
<int> primary_count(n
, 0);
474 vector
<int> size(30, 0);
477 auto& pools
= osdmap
.get_pools();
478 for (auto p
= pools
.begin(); p
!= pools
.end(); ++p
) {
479 if (pool
!= -1 && p
->first
!= pool
)
482 p
->second
.set_pg_num(pg_num
);
484 cout
<< "pool " << p
->first
485 << " pg_num " << p
->second
.get_pg_num() << std::endl
;
486 for (unsigned i
= 0; i
< p
->second
.get_pg_num(); ++i
) {
487 pg_t pgid
= pg_t(i
, p
->first
);
489 vector
<int> osds
, raw
, up
, acting
;
490 int primary
, calced_primary
, up_primary
, acting_primary
;
492 osds
.resize(p
->second
.size
);
493 for (unsigned i
=0; i
<osds
.size(); ++i
) {
494 osds
[i
] = rand() % osdmap
.get_max_osd();
497 } else if (test_map_pgs_dump_all
) {
498 osdmap
.pg_to_raw_osds(pgid
, &raw
, &calced_primary
);
499 osdmap
.pg_to_up_acting_osds(pgid
, &up
, &up_primary
,
500 &acting
, &acting_primary
);
502 osdmap
.pg_to_acting_osds(pgid
, &osds
, &primary
);
506 if (test_map_pgs_dump
) {
507 cout
<< pgid
<< "\t" << osds
<< "\t" << primary
<< std::endl
;
508 } else if (test_map_pgs_dump_all
) {
509 cout
<< pgid
<< " raw (" << raw
<< ", p" << calced_primary
510 << ") up (" << up
<< ", p" << up_primary
511 << ") acting (" << acting
<< ", p" << acting_primary
<< ")"
515 for (unsigned i
=0; i
<osds
.size(); i
++) {
516 //cout << " rep " << i << " on " << osds[i] << std::endl;
520 first_count
[osds
[0]]++;
522 primary_count
[primary
]++;
530 cout
<< "#osd\tcount\tfirst\tprimary\tc wt\twt\n";
531 for (int i
=0; i
<n
; i
++) {
532 if (!osdmap
.is_in(i
))
534 if (osdmap
.crush
->get_item_weight(i
) <= 0)
539 << "\t" << first_count
[i
]
540 << "\t" << primary_count
[i
]
541 << "\t" << osdmap
.crush
->get_item_weightf(i
)
542 << "\t" << osdmap
.get_weightf(i
)
547 count
[i
] < count
[min_osd
]))
551 count
[i
] > count
[max_osd
]))
555 uint64_t avg
= in
? (total
/ in
) : 0;
557 for (int i
=0; i
<n
; i
++) {
558 if (!osdmap
.is_in(i
))
560 if (osdmap
.crush
->get_item_weight(i
) <= 0)
562 dev
+= (avg
- count
[i
]) * (avg
- count
[i
]);
567 //double edev = sqrt(pgavg) * (double)avg / pgavg;
568 double edev
= sqrt((double)total
/ (double)in
* (1.0 - (1.0 / (double)in
)));
569 cout
<< " in " << in
<< std::endl
;
570 cout
<< " avg " << avg
572 << " (" << (dev
/avg
) << "x)"
573 << " (expected " << edev
<< " " << (edev
/avg
) << "x))"
577 cout
<< " min osd." << min_osd
<< " " << count
[min_osd
] << std::endl
;
579 cout
<< " max osd." << max_osd
<< " " << count
[max_osd
] << std::endl
;
581 for (int i
=0; i
<4; i
++) {
582 cout
<< "size " << i
<< "\t" << size
[i
] << std::endl
;
588 cout
<< "pass " << ++pass
<< std::endl
;
590 ceph::unordered_map
<pg_t
,vector
<int> > m
;
591 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
592 p
!= osdmap
.get_pools().end();
594 const pg_pool_t
*pool
= osdmap
.get_pg_pool(p
->first
);
595 for (ps_t ps
= 0; ps
< pool
->get_pg_num(); ps
++) {
596 pg_t
pgid(ps
, p
->first
, -1);
597 for (int i
=0; i
<100; i
++) {
598 cout
<< pgid
<< " attempt " << i
<< std::endl
;
601 osdmap
.pg_to_acting_osds(pgid
, r
);
602 //cout << pgid << " " << r << std::endl;
605 cout
<< pgid
<< " had " << m
[pgid
] << " now " << r
<< std::endl
;
616 if (!print
&& !health
&& !tree
&& !modified
&&
617 export_crush
.empty() && import_crush
.empty() &&
618 test_map_pg
.empty() && test_map_object
.empty() &&
619 !test_map_pgs
&& !test_map_pgs_dump
&& !test_map_pgs_dump_all
&&
620 !upmap
&& !upmap_cleanup
) {
621 cerr
<< me
<< ": no action specified?" << std::endl
;
629 health_check_map_t checks
;
630 osdmap
.check_health(&checks
);
631 JSONFormatter
jf(true);
632 jf
.dump_object("checks", checks
);
636 if (print_formatter
) {
637 print_formatter
->open_object_section("osdmap");
638 osdmap
.dump(print_formatter
.get());
639 print_formatter
->close_section();
640 print_formatter
->flush(cout
);
647 if (tree_formatter
) {
648 tree_formatter
->open_object_section("tree");
649 osdmap
.print_tree(tree_formatter
.get(), NULL
);
650 tree_formatter
->close_section();
651 tree_formatter
->flush(cout
);
654 osdmap
.print_tree(NULL
, &cout
);
659 osdmap
.encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
| CEPH_FEATURE_RESERVED
);
662 cout
<< me
<< ": writing epoch " << osdmap
.get_epoch()
665 int r
= bl
.write_file(fn
.c_str());
667 cerr
<< "osdmaptool: error writing to '" << fn
<< "': "
668 << cpp_strerror(r
) << std::endl
;