1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include "common/ceph_argparse.h"
19 #include "common/errno.h"
20 #include "common/safe_io.h"
22 #include "global/global_init.h"
23 #include "osd/OSDMap.h"
29 cout
<< " usage: [--print] [--createsimple <numosd> [--clobber] [--pg_bits <bitsperosd>]] <mapfilename>" << std::endl
;
30 cout
<< " --export-crush <file> write osdmap's crush map to <file>" << std::endl
;
31 cout
<< " --import-crush <file> replace osdmap's crush map with <file>" << std::endl
;
32 cout
<< " --test-map-pgs [--pool <poolid>] [--pg_num <pg_num>] map all pgs" << std::endl
;
33 cout
<< " --test-map-pgs-dump [--pool <poolid>] map all pgs" << std::endl
;
34 cout
<< " --test-map-pgs-dump-all [--pool <poolid>] map all pgs to osds" << std::endl
;
35 cout
<< " --mark-up-in mark osds up and in (but do not persist)" << std::endl
;
36 cout
<< " --clear-temp clear pg_temp and primary_temp" << std::endl
;
37 cout
<< " --test-random do random placements" << std::endl
;
38 cout
<< " --test-map-pg <pgid> map a pgid to osds" << std::endl
;
39 cout
<< " --test-map-object <objectname> [--pool <poolid>] map an object to osds"
41 cout
<< " --upmap-cleanup <file> clean up pg_upmap[_items] entries, writing" << std::endl
;
42 cout
<< " commands to <file> [default: - for stdout]" << std::endl
;
43 cout
<< " --upmap <file> calculate pg upmap entries to balance pg layout" << std::endl
;
44 cout
<< " writing commands to <file> [default: - for stdout]" << std::endl
;
45 cout
<< " --upmap-max <max-count> set max upmap entries to calculate [default: 100]" << std::endl
;
46 cout
<< " --upmap-deviation <max-deviation>" << std::endl
;
47 cout
<< " max deviation from target [default: .01]" << std::endl
;
48 cout
<< " --upmap-pool <poolname> restrict upmap balancing to 1 or more pools" << std::endl
;
49 cout
<< " --upmap-save write modified OSDMap with upmap changes" << std::endl
;
53 void print_inc_upmaps(const OSDMap::Incremental
& pending_inc
, int fd
)
56 for (auto& i
: pending_inc
.old_pg_upmap
) {
57 ss
<< "ceph osd rm-pg-upmap " << i
<< std::endl
;
59 for (auto& i
: pending_inc
.new_pg_upmap
) {
60 ss
<< "ceph osd pg-upmap " << i
.first
;
61 for (auto osd
: i
.second
) {
66 for (auto& i
: pending_inc
.old_pg_upmap_items
) {
67 ss
<< "ceph osd rm-pg-upmap-items " << i
<< std::endl
;
69 for (auto& i
: pending_inc
.new_pg_upmap_items
) {
70 ss
<< "ceph osd pg-upmap-items " << i
.first
;
71 for (auto p
: i
.second
) {
72 ss
<< " " << p
.first
<< " " << p
.second
;
77 int r
= safe_write(fd
, s
.c_str(), s
.size());
79 cerr
<< "error writing output: " << cpp_strerror(r
) << std::endl
;
84 int main(int argc
, const char **argv
)
86 vector
<const char*> args
;
87 argv_to_vec(argc
, argv
, args
);
90 auto cct
= global_init(NULL
, args
, CEPH_ENTITY_TYPE_CLIENT
,
91 CODE_ENVIRONMENT_UTILITY
,
92 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
93 common_init_finish(g_ceph_context
);
95 const char *me
= argv
[0];
99 boost::scoped_ptr
<Formatter
> print_formatter
;
101 boost::scoped_ptr
<Formatter
> tree_formatter
;
102 bool createsimple
= false;
103 bool create_from_conf
= false;
105 int pg_bits
= g_conf
->osd_pg_bits
;
106 int pgp_bits
= g_conf
->osd_pgp_bits
;
107 bool clobber
= false;
108 bool modified
= false;
109 std::string export_crush
, import_crush
, test_map_pg
, test_map_object
;
110 bool test_crush
= false;
111 int range_first
= -1;
114 bool mark_up_in
= false;
115 bool clear_temp
= false;
116 bool test_map_pgs
= false;
117 bool test_map_pgs_dump
= false;
118 bool test_random
= false;
119 bool upmap_cleanup
= false;
121 bool upmap_save
= false;
122 std::string upmap_file
= "-";
124 float upmap_deviation
= .01;
125 std::set
<std::string
> upmap_pools
;
127 bool test_map_pgs_dump_all
= false;
130 std::ostringstream err
;
131 for (std::vector
<const char*>::iterator i
= args
.begin(); i
!= args
.end(); ) {
132 if (ceph_argparse_double_dash(args
, i
)) {
134 } else if (ceph_argparse_flag(args
, i
, "-h", "--help", (char*)NULL
)) {
136 } else if (ceph_argparse_flag(args
, i
, "-p", "--print", (char*)NULL
)) {
138 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--dump", (char*)NULL
)) {
140 if (!val
.empty() && val
!= "plain") {
141 print_formatter
.reset(Formatter::create(val
, "", "json"));
143 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--tree", (char*)NULL
)) {
145 if (!val
.empty() && val
!= "plain") {
146 tree_formatter
.reset(Formatter::create(val
, "", "json"));
148 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap-cleanup", (char*)NULL
)) {
149 upmap_cleanup
= true;
150 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap-save", (char*)NULL
)) {
152 } else if (ceph_argparse_witharg(args
, i
, &upmap_file
, "--upmap", (char*)NULL
)) {
153 upmap_cleanup
= true;
155 } else if (ceph_argparse_witharg(args
, i
, &upmap_max
, err
, "--upmap-max", (char*)NULL
)) {
156 } else if (ceph_argparse_witharg(args
, i
, &upmap_deviation
, err
, "--upmap-deviation", (char*)NULL
)) {
157 } else if (ceph_argparse_witharg(args
, i
, &val
, "--upmap-pool", (char*)NULL
)) {
158 upmap_pools
.insert(val
);
159 } else if (ceph_argparse_witharg(args
, i
, &num_osd
, err
, "--createsimple", (char*)NULL
)) {
160 if (!err
.str().empty()) {
161 cerr
<< err
.str() << std::endl
;
165 } else if (ceph_argparse_flag(args
, i
, "--create-from-conf", (char*)NULL
)) {
166 create_from_conf
= true;
167 } else if (ceph_argparse_flag(args
, i
, "--mark-up-in", (char*)NULL
)) {
169 } else if (ceph_argparse_flag(args
, i
, "--clear-temp", (char*)NULL
)) {
171 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs", (char*)NULL
)) {
173 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs-dump", (char*)NULL
)) {
174 test_map_pgs_dump
= true;
175 } else if (ceph_argparse_flag(args
, i
, "--test-map-pgs-dump-all", (char*)NULL
)) {
176 test_map_pgs_dump_all
= true;
177 } else if (ceph_argparse_flag(args
, i
, "--test-random", (char*)NULL
)) {
179 } else if (ceph_argparse_flag(args
, i
, "--clobber", (char*)NULL
)) {
181 } else if (ceph_argparse_witharg(args
, i
, &pg_bits
, err
, "--pg_bits", (char*)NULL
)) {
182 if (!err
.str().empty()) {
183 cerr
<< err
.str() << std::endl
;
186 } else if (ceph_argparse_witharg(args
, i
, &pgp_bits
, err
, "--pgp_bits", (char*)NULL
)) {
187 if (!err
.str().empty()) {
188 cerr
<< err
.str() << std::endl
;
191 } else if (ceph_argparse_witharg(args
, i
, &val
, "--export_crush", (char*)NULL
)) {
193 } else if (ceph_argparse_witharg(args
, i
, &val
, "--import_crush", (char*)NULL
)) {
195 } else if (ceph_argparse_witharg(args
, i
, &val
, "--test_map_pg", (char*)NULL
)) {
197 } else if (ceph_argparse_witharg(args
, i
, &val
, "--test_map_object", (char*)NULL
)) {
198 test_map_object
= val
;
199 } else if (ceph_argparse_flag(args
, i
, "--test_crush", (char*)NULL
)) {
201 } else if (ceph_argparse_witharg(args
, i
, &val
, err
, "--pg_num", (char*)NULL
)) {
203 pg_num
= strict_strtoll(val
.c_str(), 10, &interr
);
204 if (interr
.length() > 0) {
205 cerr
<< "error parsing integer value " << interr
<< std::endl
;
208 } else if (ceph_argparse_witharg(args
, i
, &range_first
, err
, "--range_first", (char*)NULL
)) {
209 } else if (ceph_argparse_witharg(args
, i
, &range_last
, err
, "--range_last", (char*)NULL
)) {
210 } else if (ceph_argparse_witharg(args
, i
, &pool
, err
, "--pool", (char*)NULL
)) {
211 if (!err
.str().empty()) {
212 cerr
<< err
.str() << std::endl
;
220 cerr
<< me
<< ": must specify osdmap filename" << std::endl
;
223 else if (args
.size() > 1) {
224 cerr
<< me
<< ": too many arguments" << std::endl
;
229 if (range_first
>= 0 && range_last
>= 0) {
232 for (int i
=range_first
; i
<= range_last
; i
++) {
236 string error
, s
= f
.str();
237 int r
= bl
.read_file(s
.c_str(), &error
);
239 cerr
<< "unable to read " << s
<< ": " << cpp_strerror(r
) << std::endl
;
242 cout
<< s
<< " got " << bl
.length() << " bytes" << std::endl
;
243 OSDMap
*o
= new OSDMap
;
247 OSDMap::dedup(prev
, o
);
256 cerr
<< me
<< ": osdmap file '" << fn
<< "'" << std::endl
;
260 if (!createsimple
&& !create_from_conf
&& !clobber
) {
262 r
= bl
.read_file(fn
.c_str(), &error
);
267 catch (const buffer::error
&e
) {
268 cerr
<< me
<< ": error decoding osdmap '" << fn
<< "'" << std::endl
;
273 cerr
<< me
<< ": couldn't open " << fn
<< ": " << error
<< std::endl
;
277 else if ((createsimple
|| create_from_conf
) && !clobber
&& ::stat(fn
.c_str(), &st
) == 0) {
278 cerr
<< me
<< ": " << fn
<< " exists, --clobber to overwrite" << std::endl
;
282 if (createsimple
|| create_from_conf
) {
285 cerr
<< me
<< ": osd count must be > 0" << std::endl
;
292 memset(&fsid
, 0, sizeof(uuid_d
));
293 osdmap
.build_simple(g_ceph_context
, 0, fsid
, num_osd
, pg_bits
, pgp_bits
);
298 cout
<< "marking all OSDs up and in" << std::endl
;
299 int n
= osdmap
.get_max_osd();
300 for (int i
=0; i
<n
; i
++) {
301 osdmap
.set_state(i
, osdmap
.get_state(i
) | CEPH_OSD_UP
);
302 osdmap
.set_weight(i
, CEPH_OSD_IN
);
303 osdmap
.crush
->adjust_item_weightf(g_ceph_context
, i
, 1.0);
307 cout
<< "clearing pg/primary temp" << std::endl
;
310 int upmap_fd
= STDOUT_FILENO
;
311 if (upmap
|| upmap_cleanup
) {
312 if (upmap_file
!= "-") {
313 upmap_fd
= ::open(upmap_file
.c_str(), O_CREAT
|O_WRONLY
, 0644);
315 cerr
<< "error opening " << upmap_file
<< ": " << cpp_strerror(errno
)
319 cout
<< "writing upmap command output to: " << upmap_file
<< std::endl
;
323 cout
<< "checking for upmap cleanups" << std::endl
;
324 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
325 pending_inc
.fsid
= osdmap
.get_fsid();
326 int r
= osdmap
.clean_pg_upmaps(g_ceph_context
, &pending_inc
);
328 print_inc_upmaps(pending_inc
, upmap_fd
);
329 r
= osdmap
.apply_incremental(pending_inc
);
334 cout
<< "upmap, max-count " << upmap_max
335 << ", max deviation " << upmap_deviation
337 OSDMap::Incremental
pending_inc(osdmap
.get_epoch()+1);
338 pending_inc
.fsid
= osdmap
.get_fsid();
340 for (auto& s
: upmap_pools
) {
341 int64_t p
= osdmap
.lookup_pg_pool_name(s
);
343 cerr
<< " pool '" << s
<< "' does not exist" << std::endl
;
349 cout
<< " limiting to pools " << upmap_pools
<< " (" << pools
<< ")"
351 int changed
= osdmap
.calc_pg_upmaps(
352 g_ceph_context
, upmap_deviation
,
356 print_inc_upmaps(pending_inc
, upmap_fd
);
358 int r
= osdmap
.apply_incremental(pending_inc
);
363 cout
<< "no upmaps proposed" << std::endl
;
366 if (upmap_file
!= "-") {
370 if (!import_crush
.empty()) {
373 r
= cbl
.read_file(import_crush
.c_str(), &error
);
375 cerr
<< me
<< ": error reading crush map from " << import_crush
376 << ": " << error
<< std::endl
;
382 bufferlist::iterator p
= cbl
.begin();
385 if (cw
.get_max_devices() > osdmap
.get_max_osd()) {
386 cerr
<< me
<< ": crushmap max_devices " << cw
.get_max_devices()
387 << " > osdmap max_osd " << osdmap
.get_max_osd() << std::endl
;
392 OSDMap::Incremental inc
;
393 inc
.fsid
= osdmap
.get_fsid();
394 inc
.epoch
= osdmap
.get_epoch()+1;
396 osdmap
.apply_incremental(inc
);
397 cout
<< me
<< ": imported " << cbl
.length() << " byte crush map from " << import_crush
<< std::endl
;
401 if (!export_crush
.empty()) {
403 osdmap
.crush
->encode(cbl
, CEPH_FEATURES_SUPPORTED_DEFAULT
);
404 r
= cbl
.write_file(export_crush
.c_str());
406 cerr
<< me
<< ": error writing crush map to " << import_crush
<< std::endl
;
409 cout
<< me
<< ": exported crush map to " << export_crush
<< std::endl
;
412 if (!test_map_object
.empty()) {
413 object_t
oid(test_map_object
);
415 cout
<< me
<< ": assuming pool 0 (use --pool to override)" << std::endl
;
418 if (!osdmap
.have_pg_pool(pool
)) {
419 cerr
<< "There is no pool " << pool
<< std::endl
;
422 object_locator_t
loc(pool
);
423 pg_t raw_pgid
= osdmap
.object_locator_to_pg(oid
, loc
);
424 pg_t pgid
= osdmap
.raw_pg_to_pg(raw_pgid
);
427 osdmap
.pg_to_acting_osds(pgid
, acting
);
428 cout
<< " object '" << oid
433 if (!test_map_pg
.empty()) {
435 if (!pgid
.parse(test_map_pg
.c_str())) {
436 cerr
<< me
<< ": failed to parse pg '" << test_map_pg
<< std::endl
;
439 cout
<< " parsed '" << test_map_pg
<< "' -> " << pgid
<< std::endl
;
441 vector
<int> raw
, up
, acting
;
442 int raw_primary
, up_primary
, acting_primary
;
443 osdmap
.pg_to_raw_osds(pgid
, &raw
, &raw_primary
);
444 osdmap
.pg_to_up_acting_osds(pgid
, &up
, &up_primary
,
445 &acting
, &acting_primary
);
446 cout
<< pgid
<< " raw (" << raw
<< ", p" << raw_primary
447 << ") up (" << up
<< ", p" << up_primary
448 << ") acting (" << acting
<< ", p" << acting_primary
<< ")"
451 if (test_map_pgs
|| test_map_pgs_dump
|| test_map_pgs_dump_all
) {
452 if (pool
!= -1 && !osdmap
.have_pg_pool(pool
)) {
453 cerr
<< "There is no pool " << pool
<< std::endl
;
456 int n
= osdmap
.get_max_osd();
457 vector
<int> count(n
, 0);
458 vector
<int> first_count(n
, 0);
459 vector
<int> primary_count(n
, 0);
460 vector
<int> size(30, 0);
463 auto& pools
= osdmap
.get_pools();
464 for (auto p
= pools
.begin(); p
!= pools
.end(); ++p
) {
465 if (pool
!= -1 && p
->first
!= pool
)
468 p
->second
.set_pg_num(pg_num
);
470 cout
<< "pool " << p
->first
471 << " pg_num " << p
->second
.get_pg_num() << std::endl
;
472 for (unsigned i
= 0; i
< p
->second
.get_pg_num(); ++i
) {
473 pg_t pgid
= pg_t(i
, p
->first
);
475 vector
<int> osds
, raw
, up
, acting
;
476 int primary
, calced_primary
, up_primary
, acting_primary
;
478 osds
.resize(p
->second
.size
);
479 for (unsigned i
=0; i
<osds
.size(); ++i
) {
480 osds
[i
] = rand() % osdmap
.get_max_osd();
483 } else if (test_map_pgs_dump_all
) {
484 osdmap
.pg_to_raw_osds(pgid
, &raw
, &calced_primary
);
485 osdmap
.pg_to_up_acting_osds(pgid
, &up
, &up_primary
,
486 &acting
, &acting_primary
);
488 osdmap
.pg_to_acting_osds(pgid
, &osds
, &primary
);
492 if (test_map_pgs_dump
) {
493 cout
<< pgid
<< "\t" << osds
<< "\t" << primary
<< std::endl
;
494 } else if (test_map_pgs_dump_all
) {
495 cout
<< pgid
<< " raw (" << raw
<< ", p" << calced_primary
496 << ") up (" << up
<< ", p" << up_primary
497 << ") acting (" << acting
<< ", p" << acting_primary
<< ")"
501 for (unsigned i
=0; i
<osds
.size(); i
++) {
502 //cout << " rep " << i << " on " << osds[i] << std::endl;
506 first_count
[osds
[0]]++;
508 primary_count
[primary
]++;
516 cout
<< "#osd\tcount\tfirst\tprimary\tc wt\twt\n";
517 for (int i
=0; i
<n
; i
++) {
518 if (!osdmap
.is_in(i
))
520 if (osdmap
.crush
->get_item_weight(i
) <= 0)
525 << "\t" << first_count
[i
]
526 << "\t" << primary_count
[i
]
527 << "\t" << osdmap
.crush
->get_item_weightf(i
)
528 << "\t" << osdmap
.get_weightf(i
)
533 count
[i
] < count
[min_osd
]))
537 count
[i
] > count
[max_osd
]))
541 uint64_t avg
= in
? (total
/ in
) : 0;
543 for (int i
=0; i
<n
; i
++) {
544 if (!osdmap
.is_in(i
))
546 if (osdmap
.crush
->get_item_weight(i
) <= 0)
548 dev
+= (avg
- count
[i
]) * (avg
- count
[i
]);
553 //double edev = sqrt(pgavg) * (double)avg / pgavg;
554 double edev
= sqrt((double)total
/ (double)in
* (1.0 - (1.0 / (double)in
)));
555 cout
<< " in " << in
<< std::endl
;
556 cout
<< " avg " << avg
558 << " (" << (dev
/avg
) << "x)"
559 << " (expected " << edev
<< " " << (edev
/avg
) << "x))"
563 cout
<< " min osd." << min_osd
<< " " << count
[min_osd
] << std::endl
;
565 cout
<< " max osd." << max_osd
<< " " << count
[max_osd
] << std::endl
;
567 for (int i
=0; i
<4; i
++) {
568 cout
<< "size " << i
<< "\t" << size
[i
] << std::endl
;
574 cout
<< "pass " << ++pass
<< std::endl
;
576 ceph::unordered_map
<pg_t
,vector
<int> > m
;
577 for (map
<int64_t,pg_pool_t
>::const_iterator p
= osdmap
.get_pools().begin();
578 p
!= osdmap
.get_pools().end();
580 const pg_pool_t
*pool
= osdmap
.get_pg_pool(p
->first
);
581 for (ps_t ps
= 0; ps
< pool
->get_pg_num(); ps
++) {
582 pg_t
pgid(ps
, p
->first
, -1);
583 for (int i
=0; i
<100; i
++) {
584 cout
<< pgid
<< " attempt " << i
<< std::endl
;
587 osdmap
.pg_to_acting_osds(pgid
, r
);
588 //cout << pgid << " " << r << std::endl;
591 cout
<< pgid
<< " had " << m
[pgid
] << " now " << r
<< std::endl
;
602 if (!print
&& !tree
&& !modified
&&
603 export_crush
.empty() && import_crush
.empty() &&
604 test_map_pg
.empty() && test_map_object
.empty() &&
605 !test_map_pgs
&& !test_map_pgs_dump
&& !test_map_pgs_dump_all
&&
606 !upmap
&& !upmap_cleanup
) {
607 cerr
<< me
<< ": no action specified?" << std::endl
;
615 if (print_formatter
) {
616 print_formatter
->open_object_section("osdmap");
617 osdmap
.dump(print_formatter
.get());
618 print_formatter
->close_section();
619 print_formatter
->flush(cout
);
626 if (tree_formatter
) {
627 tree_formatter
->open_object_section("tree");
628 osdmap
.print_tree(tree_formatter
.get(), NULL
);
629 tree_formatter
->close_section();
630 tree_formatter
->flush(cout
);
633 osdmap
.print_tree(NULL
, &cout
);
638 osdmap
.encode(bl
, CEPH_FEATURES_SUPPORTED_DEFAULT
| CEPH_FEATURE_RESERVED
);
641 cout
<< me
<< ": writing epoch " << osdmap
.get_epoch()
644 int r
= bl
.write_file(fn
.c_str());
646 cerr
<< "osdmaptool: error writing to '" << fn
<< "': "
647 << cpp_strerror(r
) << std::endl
;