#include "global/signal_handler.h"
#include "common/CDC.h"
+using namespace std;
+
struct EstimateResult {
std::unique_ptr<CDC> cdc;
void usage()
{
- cout << " usage: [--op <estimate|chunk-scrub|chunk-get-ref|chunk-put-ref|dump-chunk-refs>] [--pool <pool_name> ] " << std::endl;
+ cout <<
+"usage: \n"
+" ceph-dedup-tool \n"
+" [--op estimate --pool POOL --chunk-size CHUNK_SIZE --chunk-algorithm ALGO --fingerprint-algorithm FP_ALGO] \n"
+" [--op chunk-scrub --op chunk-scrub --chunk-pool POOL] \n"
+" [--op chunk-get-ref --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n"
+" [--op chunk-put-ref --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n"
+" [--op chunk-repair --chunk-pool POOL --object OID --target-ref OID --target-ref-pool-id POOL_ID] \n"
+" [--op dump-chunk-refs --chunk-pool POOL --object OID] \n"
+" [--op chunk-dedup --pool POOL --object OID --chunk-pool POOL --fingerprint-algorithm FP --source-off OFFSET --source-length LENGTH] \n"
+" [--op object-dedup --pool POOL --object OID --chunk-pool POOL --fingerprint-algorithm FP --dedup-cdc-chunk-size CHUNK_SIZE] \n"
+ << std::endl;
+ cout << "optional arguments: " << std::endl;
cout << " --object <object_name> " << std::endl;
cout << " --chunk-size <size> chunk-size (byte) " << std::endl;
cout << " --chunk-algorithm <fixed|fastcdc> " << std::endl;
cout << " --report-period <seconds> " << std::endl;
cout << " --max-seconds <seconds>" << std::endl;
cout << " --max-read-size <bytes> " << std::endl;
+ cout << "explanations: " << std::endl;
+ cout << " chunk-dedup performs deduplication using a chunk generated by given source" << std::endl;
+ cout << " offset and length. object-dedup deduplicates the entire object, not a chunk" << std::endl;
exit(1);
}
template <typename I, typename T>
static int rados_sistrtoll(I &i, T *val) {
std::string err;
- *val = strict_iecstrtoll(i->second.c_str(), &err);
+ *val = strict_iecstrtoll(i->second, &err);
if (err != "") {
cerr << "Invalid value for " << i->first << ": " << err << std::endl;
return -EINVAL;
}
if (op_name == "chunk-get-ref" ||
- op_name == "chunk-put-ref") {
+ op_name == "chunk-put-ref" ||
+ op_name == "chunk-repair") {
string target_object_name;
uint64_t pool_id;
i = opts.find("object");
}
hobject_t oid(sobject_t(target_object_name, CEPH_NOSNAP), "", hash, pool_id, "");
+ auto run_op = [] (ObjectWriteOperation& op, hobject_t& oid,
+ string& object_name, IoCtx& chunk_io_ctx) -> int {
+ int ret = chunk_io_ctx.operate(object_name, &op);
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ }
+ return ret;
+ };
+
ObjectWriteOperation op;
if (op_name == "chunk-get-ref") {
cls_cas_chunk_get_ref(op, oid);
- } else {
+ ret = run_op(op, oid, object_name, chunk_io_ctx);
+ } else if (op_name == "chunk-put-ref") {
cls_cas_chunk_put_ref(op, oid);
+ ret = run_op(op, oid, object_name, chunk_io_ctx);
+ } else if (op_name == "chunk-repair") {
+ ret = rados.ioctx_create2(pool_id, io_ctx);
+ if (ret < 0) {
+ cerr << oid << " ref " << pool_id
+ << ": referencing pool does not exist" << std::endl;
+ return ret;
+ }
+ int chunk_ref = -1, base_ref = -1;
+ // read object on chunk pool to know how many reference the object has
+ bufferlist t;
+ ret = chunk_io_ctx.getxattr(object_name, CHUNK_REFCOUNT_ATTR, t);
+ if (ret < 0) {
+ return ret;
+ }
+ chunk_refs_t refs;
+ auto p = t.cbegin();
+ decode(refs, p);
+ if (refs.get_type() != chunk_refs_t::TYPE_BY_OBJECT) {
+ cerr << " does not supported chunk type " << std::endl;
+ return -1;
+ }
+ chunk_ref =
+ static_cast<chunk_refs_by_object_t*>(refs.r.get())->by_object.count(oid);
+ if (chunk_ref < 0) {
+ cerr << object_name << " has no reference of " << target_object_name
+ << std::endl;
+ return chunk_ref;
+ }
+ cout << object_name << " has " << chunk_ref << " references for "
+ << target_object_name << std::endl;
+
+ // read object on base pool to know the number of chunk object's references
+ base_ref = cls_cas_references_chunk(io_ctx, target_object_name, object_name);
+ if (base_ref < 0) {
+ if (base_ref == -ENOENT || base_ref == -ENOLINK) {
+ base_ref = 0;
+ } else {
+ return base_ref;
+ }
+ }
+ cout << target_object_name << " has " << base_ref << " references for "
+ << object_name << std::endl;
+ if (chunk_ref != base_ref) {
+ if (base_ref > chunk_ref) {
+ cerr << "error : " << target_object_name << "'s ref. < " << object_name
+ << "' ref. " << std::endl;
+ return -EINVAL;
+ }
+ cout << " fix dangling reference from " << chunk_ref << " to " << base_ref
+ << std::endl;
+ while (base_ref != chunk_ref) {
+ ObjectWriteOperation op;
+ cls_cas_chunk_put_ref(op, oid);
+ chunk_ref--;
+ ret = run_op(op, oid, object_name, chunk_io_ctx);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
}
- ret = chunk_io_ctx.operate(object_name, &op);
- if (ret < 0) {
- cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
- }
-
return ret;
} else if (op_name == "dump-chunk-refs") {
return (ret < 0) ? 1 : 0;
}
+string make_pool_str(string pool, string var, string val)
+{
+ return string("{\"prefix\": \"osd pool set\",\"pool\":\"") + pool
+ + string("\",\"var\": \"") + var + string("\",\"val\": \"")
+ + val + string("\"}");
+}
+
+string make_pool_str(string pool, string var, int val)
+{
+ return make_pool_str(pool, var, stringify(val));
+}
+
+int make_dedup_object(const std::map < std::string, std::string > &opts,
+ std::vector<const char*> &nargs)
+{
+ Rados rados;
+ IoCtx io_ctx, chunk_io_ctx;
+ std::string object_name, chunk_pool_name, op_name, pool_name, fp_algo;
+ int ret;
+ std::map<std::string, std::string>::const_iterator i;
+
+ i = opts.find("op_name");
+ if (i != opts.end()) {
+ op_name = i->second;
+ } else {
+ cerr << "must specify op" << std::endl;
+ exit(1);
+ }
+ i = opts.find("pool");
+ if (i != opts.end()) {
+ pool_name = i->second;
+ } else {
+ cerr << "must specify --pool" << std::endl;
+ exit(1);
+ }
+ i = opts.find("object");
+ if (i != opts.end()) {
+ object_name = i->second;
+ } else {
+ cerr << "must specify object" << std::endl;
+ exit(1);
+ }
+
+ i = opts.find("chunk-pool");
+ if (i != opts.end()) {
+ chunk_pool_name = i->second;
+ } else {
+ cerr << "must specify --chunk-pool" << std::endl;
+ exit(1);
+ }
+ i = opts.find("pgid");
+ boost::optional<pg_t> pgid(i != opts.end(), pg_t());
+
+ ret = rados.init_with_context(g_ceph_context);
+ if (ret < 0) {
+ cerr << "couldn't initialize rados: " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ ret = rados.connect();
+ if (ret) {
+ cerr << "couldn't connect to cluster: " << cpp_strerror(ret) << std::endl;
+ ret = -1;
+ goto out;
+ }
+ ret = rados.ioctx_create(pool_name.c_str(), io_ctx);
+ if (ret < 0) {
+ cerr << "error opening pool "
+ << chunk_pool_name << ": "
+ << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ ret = rados.ioctx_create(chunk_pool_name.c_str(), chunk_io_ctx);
+ if (ret < 0) {
+ cerr << "error opening pool "
+ << chunk_pool_name << ": "
+ << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ i = opts.find("fingerprint-algorithm");
+ if (i != opts.end()) {
+ fp_algo = i->second.c_str();
+ if (fp_algo != "sha1"
+ && fp_algo != "sha256" && fp_algo != "sha512") {
+ cerr << "unrecognized fingerprint-algorithm " << fp_algo << std::endl;
+ exit(1);
+ }
+ }
+
+ if (op_name == "chunk-dedup") {
+ uint64_t offset, length;
+ string chunk_object;
+ i = opts.find("source-off");
+ if (i != opts.end()) {
+ if (rados_sistrtoll(i, &offset)) {
+ return -EINVAL;
+ }
+ } else {
+ cerr << "must specify --source-off" << std::endl;
+ exit(1);
+ }
+ i = opts.find("source-length");
+ if (i != opts.end()) {
+ if (rados_sistrtoll(i, &length)) {
+ return -EINVAL;
+ }
+ } else {
+ cerr << "must specify --source-off" << std::endl;
+ exit(1);
+ }
+ // 1. make a copy from manifest object to chunk object
+ bufferlist bl;
+ ret = io_ctx.read(object_name, bl, length, offset);
+ if (ret < 0) {
+ cerr << " reading object in base pool fails : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ chunk_object = [&fp_algo, &bl]() -> string {
+ if (fp_algo == "sha1") {
+ return ceph::crypto::digest<ceph::crypto::SHA1>(bl).to_str();
+ } else if (fp_algo == "sha256") {
+ return ceph::crypto::digest<ceph::crypto::SHA256>(bl).to_str();
+ } else if (fp_algo == "sha512") {
+ return ceph::crypto::digest<ceph::crypto::SHA512>(bl).to_str();
+ } else {
+ assert(0 == "unrecognized fingerprint type");
+ return {};
+ }
+ }();
+ ret = chunk_io_ctx.write(chunk_object, bl, length, offset);
+ if (ret < 0) {
+ cerr << " writing object in chunk pool fails : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ // 2. call set_chunk
+ ObjectReadOperation op;
+ op.set_chunk(offset, length, chunk_io_ctx, chunk_object, 0,
+ CEPH_OSD_OP_FLAG_WITH_REFERENCE);
+ ret = io_ctx.operate(object_name, &op, NULL);
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ } else if (op_name == "object-dedup") {
+ unsigned chunk_size;
+ i = opts.find("dedup-cdc-chunk-size");
+ if (i != opts.end()) {
+ if (rados_sistrtoll(i, &chunk_size)) {
+ cerr << "unrecognized dedup_cdc_chunk_size " << chunk_size << std::endl;
+ return -EINVAL;
+ }
+ }
+
+ bufferlist inbl;
+ ret = rados.mon_command(
+ make_pool_str(pool_name, "fingerprint_algorithm", fp_algo),
+ inbl, NULL, NULL);
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ ret = rados.mon_command(
+ make_pool_str(pool_name, "dedup_tier", chunk_pool_name),
+ inbl, NULL, NULL);
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ ret = rados.mon_command(
+ make_pool_str(pool_name, "dedup_chunk_algorithm", "fastcdc"),
+ inbl, NULL, NULL);
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+ ret = rados.mon_command(
+ make_pool_str(pool_name, "dedup_cdc_chunk_size", chunk_size),
+ inbl, NULL, NULL);
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ return ret;
+ }
+
+ /*
+ * TODO: add a better way to make an object a manifest object.
+ * We're using set_chunk with an incorrect object here simply to make
+ * the object a manifest object, the tier_flush() will remove
+ * it and replace it with the real contents.
+ */
+ // convert object to manifest object
+ ObjectWriteOperation op;
+ bufferlist temp;
+ temp.append("temp");
+ op.write_full(temp);
+
+ auto gen_r_num = [] () -> string {
+ std::random_device rd;
+ std::mt19937 gen(rd());
+ std::uniform_int_distribution<uint64_t> dist;
+ uint64_t r_num = dist(gen);
+ return to_string(r_num);
+ };
+ string temp_oid = gen_r_num();
+ // create temp chunk object for set-chunk
+ ret = chunk_io_ctx.operate(temp_oid, &op);
+ if (ret == -EEXIST) {
+ // one more try
+ temp_oid = gen_r_num();
+ ret = chunk_io_ctx.operate(temp_oid, &op);
+ }
+ if (ret < 0) {
+ cerr << " operate fail : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+
+ // set-chunk to make manifest object
+ ObjectReadOperation chunk_op;
+ chunk_op.set_chunk(0, 4, chunk_io_ctx, temp_oid, 0,
+ CEPH_OSD_OP_FLAG_WITH_REFERENCE);
+ ret = io_ctx.operate(object_name, &chunk_op, NULL);
+ if (ret < 0) {
+ cerr << " set_chunk fail : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+
+ // tier-flush to perform deduplication
+ ObjectReadOperation flush_op;
+ flush_op.tier_flush();
+ ret = io_ctx.operate(object_name, &flush_op, NULL);
+ if (ret < 0) {
+ cerr << " tier_flush fail : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+
+ // tier-evict
+ ObjectReadOperation evict_op;
+ evict_op.tier_evict();
+ ret = io_ctx.operate(object_name, &evict_op, NULL);
+ if (ret < 0) {
+ cerr << " tier_evict fail : " << cpp_strerror(ret) << std::endl;
+ goto out;
+ }
+ }
+
+out:
+ return (ret < 0) ? 1 : 0;
+}
+
int main(int argc, const char **argv)
{
- vector<const char*> args;
- argv_to_vec(argc, argv, args);
+ auto args = argv_to_vec(argc, argv);
if (args.empty()) {
cerr << argv[0] << ": -h or --help for usage" << std::endl;
exit(1);
opts["min-chunk-size"] = val;
} else if (ceph_argparse_witharg(args, i, &val, "--max-chunk-size", (char*)NULL)) {
opts["max-chunk-size"] = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--chunk-object", (char*)NULL)) {
+ opts["chunk-object"] = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--source-off", (char*)NULL)) {
+ opts["source-off"] = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--source-length", (char*)NULL)) {
+ opts["source-length"] = val;
+ } else if (ceph_argparse_witharg(args, i, &val, "--dedup-cdc-chunk-size", (char*)NULL)) {
+ opts["dedup-cdc-chunk-size"] = val;
} else if (ceph_argparse_flag(args, i, "--debug", (char*)NULL)) {
opts["debug"] = "true";
} else {
if (op_name == "estimate") {
return estimate_dedup_ratio(opts, args);
- } else if (op_name == "chunk-scrub") {
- return chunk_scrub_common(opts, args);
- } else if (op_name == "chunk-get-ref" ||
- op_name == "chunk-put-ref") {
- return chunk_scrub_common(opts, args);
- } else if (op_name == "dump-chunk-refs") {
+ } else if (op_name == "chunk-scrub" ||
+ op_name == "chunk-get-ref" ||
+ op_name == "chunk-put-ref" ||
+ op_name == "chunk-repair" ||
+ op_name == "dump-chunk-refs") {
return chunk_scrub_common(opts, args);
+ } else if (op_name == "chunk-dedup" ||
+ op_name == "object-dedup") {
+ /*
+ * chunk-dedup:
+ * using a chunk generated by given source,
+ * create a new object in the chunk pool or increase the reference
+ * if the object exists
+ *
+ * object-dedup:
+ * perform deduplication on the entire object, not a chunk.
+ *
+ */
+ return make_dedup_object(opts, args);
} else {
cerr << "unrecognized op " << op_name << std::endl;
exit(1);