]> git.proxmox.com Git - mirror_qemu.git/blame - block/rbd.c
target/i386: fix feature check in hyperv-stub.c
[mirror_qemu.git] / block / rbd.c
CommitLineData
f27aaf4b
CB
1/*
2 * QEMU Block driver for RADOS (Ceph)
3 *
ad32e9c0
JD
4 * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
5 * Josh Durgin <josh.durgin@dreamhost.com>
f27aaf4b
CB
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2. See
8 * the COPYING file in the top-level directory.
9 *
6b620ca3
PB
10 * Contributions after 2012-01-13 are licensed under the terms of the
11 * GNU GPL, version 2 or (at your option) any later version.
f27aaf4b
CB
12 */
13
80c71a24 14#include "qemu/osdep.h"
ad32e9c0 15
2836284d 16#include <rbd/librbd.h>
da34e65c 17#include "qapi/error.h"
1de7afc9 18#include "qemu/error-report.h"
0b8fa32f 19#include "qemu/module.h"
922a01a0 20#include "qemu/option.h"
737e150e 21#include "block/block_int.h"
609f45ea 22#include "block/qdict.h"
60390a21 23#include "crypto/secret.h"
f348b6d1 24#include "qemu/cutils.h"
c7cacb3e 25#include "qapi/qmp/qstring.h"
452fcdbc 26#include "qapi/qmp/qdict.h"
e98c6961 27#include "qapi/qmp/qjson.h"
47e6b297 28#include "qapi/qmp/qlist.h"
4bfb2741
KW
29#include "qapi/qobject-input-visitor.h"
30#include "qapi/qapi-visit-block-core.h"
f27aaf4b 31
f27aaf4b
CB
32/*
33 * When specifying the image filename use:
34 *
fab5cf59 35 * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
f27aaf4b 36 *
9e1fbcde 37 * poolname must be the name of an existing rados pool.
f27aaf4b 38 *
9e1fbcde 39 * devicename is the name of the rbd image.
f27aaf4b 40 *
9e1fbcde
SW
41 * Each option given is used to configure rados, and may be any valid
42 * Ceph option, "id", or "conf".
fab5cf59 43 *
9e1fbcde
SW
44 * The "id" option indicates what user we should authenticate as to
45 * the Ceph cluster. If it is excluded we will use the Ceph default
46 * (normally 'admin').
f27aaf4b 47 *
9e1fbcde
SW
48 * The "conf" option specifies a Ceph configuration file to read. If
49 * it is not specified, we will read from the default Ceph locations
50 * (e.g., /etc/ceph/ceph.conf). To avoid reading _any_ configuration
51 * file, specify conf=/dev/null.
f27aaf4b 52 *
9e1fbcde
SW
53 * Configuration values containing :, @, or = can be escaped with a
54 * leading "\".
f27aaf4b
CB
55 */
56
787f3133
JD
57/* rbd_aio_discard added in 0.1.2 */
58#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 2)
59#define LIBRBD_SUPPORTS_DISCARD
60#else
61#undef LIBRBD_SUPPORTS_DISCARD
62#endif
63
f27aaf4b
CB
64#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)
65
ad32e9c0
JD
66#define RBD_MAX_SNAPS 100
67
1d393bde 68/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
69#ifdef LIBRBD_SUPPORTS_IOVEC
70#define LIBRBD_USE_IOVEC 1
71#else
72#define LIBRBD_USE_IOVEC 0
73#endif
74
787f3133
JD
75typedef enum {
76 RBD_AIO_READ,
77 RBD_AIO_WRITE,
dc7588c1
JD
78 RBD_AIO_DISCARD,
79 RBD_AIO_FLUSH
787f3133
JD
80} RBDAIOCmd;
81
f27aaf4b 82typedef struct RBDAIOCB {
7c84b1b8 83 BlockAIOCB common;
08448d51 84 int64_t ret;
f27aaf4b
CB
85 QEMUIOVector *qiov;
86 char *bounce;
787f3133 87 RBDAIOCmd cmd;
f27aaf4b
CB
88 int error;
89 struct BDRVRBDState *s;
f27aaf4b
CB
90} RBDAIOCB;
91
92typedef struct RADOSCB {
f27aaf4b
CB
93 RBDAIOCB *acb;
94 struct BDRVRBDState *s;
ad32e9c0 95 int64_t size;
f27aaf4b 96 char *buf;
08448d51 97 int64_t ret;
f27aaf4b
CB
98} RADOSCB;
99
f27aaf4b 100typedef struct BDRVRBDState {
ad32e9c0
JD
101 rados_t cluster;
102 rados_ioctx_t io_ctx;
103 rbd_image_t image;
80b61a27 104 char *image_name;
ad32e9c0 105 char *snap;
f27aaf4b
CB
106} BDRVRBDState;
107
aa045c2d
KW
108static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
109 BlockdevOptionsRbd *opts, bool cache,
110 const char *keypairs, const char *secretid,
111 Error **errp);
112
730b00bb 113static char *qemu_rbd_next_tok(char *src, char delim, char **p)
f27aaf4b 114{
f27aaf4b
CB
115 char *end;
116
117 *p = NULL;
118
8efb339d 119 for (end = src; *end; ++end) {
16a06b24 120 if (*end == delim) {
8efb339d
MA
121 break;
122 }
123 if (*end == '\\' && end[1] != '\0') {
124 end++;
f27aaf4b
CB
125 }
126 }
8efb339d
MA
127 if (*end == delim) {
128 *p = end + 1;
129 *end = '\0';
130 }
7830f909 131 return src;
f27aaf4b
CB
132}
133
16a06b24
SW
134static void qemu_rbd_unescape(char *src)
135{
136 char *p;
137
138 for (p = src; *src; ++src, ++p) {
139 if (*src == '\\' && src[1] != '\0') {
140 src++;
141 }
142 *p = *src;
143 }
144 *p = '\0';
145}
146
c7cacb3e
JC
147static void qemu_rbd_parse_filename(const char *filename, QDict *options,
148 Error **errp)
f27aaf4b
CB
149{
150 const char *start;
e98c6961
EB
151 char *p, *buf;
152 QList *keypairs = NULL;
7830f909 153 char *found_str;
f27aaf4b
CB
154
155 if (!strstart(filename, "rbd:", &start)) {
d61563b2 156 error_setg(errp, "File name must start with 'rbd:'");
c7cacb3e 157 return;
f27aaf4b
CB
158 }
159
7267c094 160 buf = g_strdup(start);
f27aaf4b
CB
161 p = buf;
162
730b00bb 163 found_str = qemu_rbd_next_tok(p, '/', &p);
7830f909 164 if (!p) {
7830f909 165 error_setg(errp, "Pool name is required");
f27aaf4b
CB
166 goto done;
167 }
7830f909 168 qemu_rbd_unescape(found_str);
46f5ac20 169 qdict_put_str(options, "pool", found_str);
fab5cf59
JD
170
171 if (strchr(p, '@')) {
730b00bb 172 found_str = qemu_rbd_next_tok(p, '@', &p);
7830f909 173 qemu_rbd_unescape(found_str);
46f5ac20 174 qdict_put_str(options, "image", found_str);
7830f909 175
730b00bb 176 found_str = qemu_rbd_next_tok(p, ':', &p);
7830f909 177 qemu_rbd_unescape(found_str);
46f5ac20 178 qdict_put_str(options, "snapshot", found_str);
fab5cf59 179 } else {
730b00bb 180 found_str = qemu_rbd_next_tok(p, ':', &p);
7830f909 181 qemu_rbd_unescape(found_str);
46f5ac20 182 qdict_put_str(options, "image", found_str);
f27aaf4b 183 }
7830f909 184 if (!p) {
f27aaf4b
CB
185 goto done;
186 }
187
c7cacb3e
JC
188 /* The following are essentially all key/value pairs, and we treat
189 * 'id' and 'conf' a bit special. Key/value pairs may be in any order. */
190 while (p) {
191 char *name, *value;
730b00bb 192 name = qemu_rbd_next_tok(p, '=', &p);
c7cacb3e
JC
193 if (!p) {
194 error_setg(errp, "conf option %s has no value", name);
195 break;
7c7e9df0 196 }
c7cacb3e
JC
197
198 qemu_rbd_unescape(name);
199
730b00bb 200 value = qemu_rbd_next_tok(p, ':', &p);
c7cacb3e
JC
201 qemu_rbd_unescape(value);
202
203 if (!strcmp(name, "conf")) {
46f5ac20 204 qdict_put_str(options, "conf", value);
c7cacb3e 205 } else if (!strcmp(name, "id")) {
46f5ac20 206 qdict_put_str(options, "user", value);
c7cacb3e 207 } else {
e98c6961
EB
208 /*
209 * We pass these internally to qemu_rbd_set_keypairs(), so
210 * we can get away with the simpler list of [ "key1",
211 * "value1", "key2", "value2" ] rather than a raw dict
212 * { "key1": "value1", "key2": "value2" } where we can't
213 * guarantee order, or even a more correct but complex
214 * [ { "key1": "value1" }, { "key2": "value2" } ]
215 */
216 if (!keypairs) {
217 keypairs = qlist_new();
c7cacb3e 218 }
46f5ac20
EB
219 qlist_append_str(keypairs, name);
220 qlist_append_str(keypairs, value);
c7cacb3e 221 }
7c7e9df0 222 }
c7cacb3e 223
e98c6961
EB
224 if (keypairs) {
225 qdict_put(options, "=keyvalue-pairs",
226 qobject_to_json(QOBJECT(keypairs)));
c7cacb3e
JC
227 }
228
c7cacb3e 229done:
c7cacb3e 230 g_free(buf);
cb3e7f08 231 qobject_unref(keypairs);
c7cacb3e 232 return;
7c7e9df0
SW
233}
234
60390a21 235
e8e16d4b
EB
236static void qemu_rbd_refresh_limits(BlockDriverState *bs, Error **errp)
237{
238 /* XXX Does RBD support AIO on less than 512-byte alignment? */
239 bs->bl.request_alignment = 512;
240}
241
242
d083f954 243static int qemu_rbd_set_auth(rados_t cluster, BlockdevOptionsRbd *opts,
60390a21
DB
244 Error **errp)
245{
d083f954 246 char *key, *acr;
a3699de4
MA
247 int r;
248 GString *accu;
249 RbdAuthModeList *auth;
250
d083f954
MA
251 if (opts->key_secret) {
252 key = qcrypto_secret_lookup_as_base64(opts->key_secret, errp);
253 if (!key) {
254 return -EIO;
255 }
256 r = rados_conf_set(cluster, "key", key);
257 g_free(key);
258 if (r < 0) {
259 error_setg_errno(errp, -r, "Could not set 'key'");
260 return r;
a3699de4 261 }
60390a21
DB
262 }
263
a3699de4
MA
264 if (opts->has_auth_client_required) {
265 accu = g_string_new("");
266 for (auth = opts->auth_client_required; auth; auth = auth->next) {
267 if (accu->str[0]) {
268 g_string_append_c(accu, ';');
269 }
270 g_string_append(accu, RbdAuthMode_str(auth->value));
271 }
272 acr = g_string_free(accu, FALSE);
273 r = rados_conf_set(cluster, "auth_client_required", acr);
274 g_free(acr);
275 if (r < 0) {
276 error_setg_errno(errp, -r,
277 "Could not set 'auth_client_required'");
278 return r;
279 }
280 }
60390a21
DB
281
282 return 0;
283}
284
e98c6961 285static int qemu_rbd_set_keypairs(rados_t cluster, const char *keypairs_json,
c7cacb3e 286 Error **errp)
fab5cf59 287{
e98c6961
EB
288 QList *keypairs;
289 QString *name;
290 QString *value;
291 const char *key;
292 size_t remaining;
fab5cf59
JD
293 int ret = 0;
294
e98c6961
EB
295 if (!keypairs_json) {
296 return ret;
297 }
7dc847eb
HR
298 keypairs = qobject_to(QList,
299 qobject_from_json(keypairs_json, &error_abort));
e98c6961
EB
300 remaining = qlist_size(keypairs) / 2;
301 assert(remaining);
302
303 while (remaining--) {
7dc847eb
HR
304 name = qobject_to(QString, qlist_pop(keypairs));
305 value = qobject_to(QString, qlist_pop(keypairs));
e98c6961
EB
306 assert(name && value);
307 key = qstring_get_str(name);
308
309 ret = rados_conf_set(cluster, key, qstring_get_str(value));
cb3e7f08 310 qobject_unref(value);
c7cacb3e 311 if (ret < 0) {
e98c6961 312 error_setg_errno(errp, -ret, "invalid conf option %s", key);
cb3e7f08 313 qobject_unref(name);
c7cacb3e
JC
314 ret = -EINVAL;
315 break;
fab5cf59 316 }
cb3e7f08 317 qobject_unref(name);
fab5cf59
JD
318 }
319
cb3e7f08 320 qobject_unref(keypairs);
fab5cf59
JD
321 return ret;
322}
323
1d393bde 324static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
325{
326 if (LIBRBD_USE_IOVEC) {
327 RBDAIOCB *acb = rcb->acb;
328 iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
329 acb->qiov->size - offs);
330 } else {
331 memset(rcb->buf + offs, 0, rcb->size - offs);
332 }
333}
334
0f9d252d
JC
335static QemuOptsList runtime_opts = {
336 .name = "rbd",
337 .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
338 .desc = {
339 {
cbf036b4 340 .name = "pool",
0f9d252d 341 .type = QEMU_OPT_STRING,
cbf036b4 342 .help = "Rados pool name",
0f9d252d
JC
343 },
344 {
cbf036b4 345 .name = "image",
0f9d252d 346 .type = QEMU_OPT_STRING,
cbf036b4 347 .help = "Image name in the pool",
0f9d252d
JC
348 },
349 {
350 .name = "conf",
351 .type = QEMU_OPT_STRING,
352 .help = "Rados config file location",
353 },
0f9d252d
JC
354 {
355 .name = "snapshot",
356 .type = QEMU_OPT_STRING,
357 .help = "Ceph snapshot name",
358 },
359 {
360 /* maps to 'id' in rados_create() */
361 .name = "user",
362 .type = QEMU_OPT_STRING,
363 .help = "Rados id name",
364 },
cbf036b4 365 /*
2836284d 366 * server.* extracted manually, see qemu_rbd_mon_host()
cbf036b4 367 */
0f9d252d
JC
368 { /* end of list */ }
369 },
370};
371
d083f954 372/* FIXME Deprecate and remove keypairs or make it available in QMP. */
1bebea37
KW
373static int qemu_rbd_do_create(BlockdevCreateOptions *options,
374 const char *keypairs, const char *password_secret,
375 Error **errp)
f27aaf4b 376{
1bebea37 377 BlockdevCreateOptionsRbd *opts = &options->u.rbd;
ad32e9c0
JD
378 rados_t cluster;
379 rados_ioctx_t io_ctx;
1bebea37
KW
380 int obj_order = 0;
381 int ret;
382
383 assert(options->driver == BLOCKDEV_DRIVER_RBD);
384 if (opts->location->has_snapshot) {
385 error_setg(errp, "Can't use snapshot name for image creation");
386 return -EINVAL;
387 }
f27aaf4b 388
1bebea37
KW
389 if (opts->has_cluster_size) {
390 int64_t objsize = opts->cluster_size;
bd0cf596
CL
391 if ((objsize - 1) & objsize) { /* not a power of 2? */
392 error_setg(errp, "obj size needs to be power of 2");
1bebea37 393 return -EINVAL;
bd0cf596
CL
394 }
395 if (objsize < 4096) {
396 error_setg(errp, "obj size too small");
1bebea37 397 return -EINVAL;
f27aaf4b 398 }
786a4ea8 399 obj_order = ctz32(objsize);
f27aaf4b
CB
400 }
401
aa045c2d
KW
402 ret = qemu_rbd_connect(&cluster, &io_ctx, opts->location, false, keypairs,
403 password_secret, errp);
87cd3d20 404 if (ret < 0) {
1bebea37 405 return ret;
f27aaf4b
CB
406 }
407
1bebea37 408 ret = rbd_create(io_ctx, opts->location->image, opts->size, &obj_order);
87cd3d20
VU
409 if (ret < 0) {
410 error_setg_errno(errp, -ret, "error rbd create");
aa045c2d 411 goto out;
87cd3d20 412 }
f27aaf4b 413
1bebea37 414 ret = 0;
aa045c2d
KW
415out:
416 rados_ioctx_destroy(io_ctx);
e38f643a 417 rados_shutdown(cluster);
1bebea37
KW
418 return ret;
419}
420
421static int qemu_rbd_co_create(BlockdevCreateOptions *options, Error **errp)
422{
423 return qemu_rbd_do_create(options, NULL, NULL, errp);
424}
425
426static int coroutine_fn qemu_rbd_co_create_opts(const char *filename,
427 QemuOpts *opts,
428 Error **errp)
429{
430 BlockdevCreateOptions *create_options;
431 BlockdevCreateOptionsRbd *rbd_opts;
432 BlockdevOptionsRbd *loc;
433 Error *local_err = NULL;
434 const char *keypairs, *password_secret;
435 QDict *options = NULL;
436 int ret = 0;
437
438 create_options = g_new0(BlockdevCreateOptions, 1);
439 create_options->driver = BLOCKDEV_DRIVER_RBD;
440 rbd_opts = &create_options->u.rbd;
441
442 rbd_opts->location = g_new0(BlockdevOptionsRbd, 1);
443
444 password_secret = qemu_opt_get(opts, "password-secret");
445
446 /* Read out options */
447 rbd_opts->size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
448 BDRV_SECTOR_SIZE);
449 rbd_opts->cluster_size = qemu_opt_get_size_del(opts,
450 BLOCK_OPT_CLUSTER_SIZE, 0);
451 rbd_opts->has_cluster_size = (rbd_opts->cluster_size != 0);
452
453 options = qdict_new();
454 qemu_rbd_parse_filename(filename, options, &local_err);
455 if (local_err) {
456 ret = -EINVAL;
457 error_propagate(errp, local_err);
458 goto exit;
459 }
460
461 /*
462 * Caution: while qdict_get_try_str() is fine, getting non-string
463 * types would require more care. When @options come from -blockdev
464 * or blockdev_add, its members are typed according to the QAPI
465 * schema, but when they come from -drive, they're all QString.
466 */
467 loc = rbd_opts->location;
468 loc->pool = g_strdup(qdict_get_try_str(options, "pool"));
469 loc->conf = g_strdup(qdict_get_try_str(options, "conf"));
470 loc->has_conf = !!loc->conf;
471 loc->user = g_strdup(qdict_get_try_str(options, "user"));
472 loc->has_user = !!loc->user;
473 loc->image = g_strdup(qdict_get_try_str(options, "image"));
474 keypairs = qdict_get_try_str(options, "=keyvalue-pairs");
475
476 ret = qemu_rbd_do_create(create_options, keypairs, password_secret, errp);
477 if (ret < 0) {
478 goto exit;
479 }
c7cacb3e
JC
480
481exit:
cb3e7f08 482 qobject_unref(options);
1bebea37 483 qapi_free_BlockdevCreateOptions(create_options);
f27aaf4b
CB
484 return ret;
485}
486
487/*
e04fb07f
SH
488 * This aio completion is being called from rbd_finish_bh() and runs in qemu
489 * BH context.
f27aaf4b 490 */
ad32e9c0 491static void qemu_rbd_complete_aio(RADOSCB *rcb)
f27aaf4b
CB
492{
493 RBDAIOCB *acb = rcb->acb;
494 int64_t r;
495
f27aaf4b
CB
496 r = rcb->ret;
497
dc7588c1 498 if (acb->cmd != RBD_AIO_READ) {
f27aaf4b
CB
499 if (r < 0) {
500 acb->ret = r;
501 acb->error = 1;
502 } else if (!acb->error) {
ad32e9c0 503 acb->ret = rcb->size;
f27aaf4b
CB
504 }
505 } else {
ad32e9c0 506 if (r < 0) {
1d393bde 507 qemu_rbd_memset(rcb, 0);
f27aaf4b
CB
508 acb->ret = r;
509 acb->error = 1;
ad32e9c0 510 } else if (r < rcb->size) {
1d393bde 511 qemu_rbd_memset(rcb, r);
f27aaf4b 512 if (!acb->error) {
ad32e9c0 513 acb->ret = rcb->size;
f27aaf4b
CB
514 }
515 } else if (!acb->error) {
ad32e9c0 516 acb->ret = r;
f27aaf4b
CB
517 }
518 }
f27aaf4b 519
e04fb07f 520 g_free(rcb);
f27aaf4b 521
1d393bde 522 if (!LIBRBD_USE_IOVEC) {
523 if (acb->cmd == RBD_AIO_READ) {
524 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
525 }
526 qemu_vfree(acb->bounce);
e04fb07f 527 }
1d393bde 528
e04fb07f 529 acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
f27aaf4b 530
8007429a 531 qemu_aio_unref(acb);
f27aaf4b
CB
532}
533
4bfb2741 534static char *qemu_rbd_mon_host(BlockdevOptionsRbd *opts, Error **errp)
0a55679b 535{
4bfb2741 536 const char **vals;
2836284d
MA
537 const char *host, *port;
538 char *rados_str;
4bfb2741
KW
539 InetSocketAddressBaseList *p;
540 int i, cnt;
541
542 if (!opts->has_server) {
543 return NULL;
544 }
545
546 for (cnt = 0, p = opts->server; p; p = p->next) {
547 cnt++;
548 }
549
550 vals = g_new(const char *, cnt + 1);
551
552 for (i = 0, p = opts->server; p; p = p->next, i++) {
553 host = p->value->host;
554 port = p->value->port;
0a55679b 555
2836284d 556 if (strchr(host, ':')) {
4bfb2741 557 vals[i] = g_strdup_printf("[%s]:%s", host, port);
0a55679b 558 } else {
4bfb2741 559 vals[i] = g_strdup_printf("%s:%s", host, port);
0a55679b 560 }
0a55679b 561 }
2836284d 562 vals[i] = NULL;
0a55679b 563
2836284d 564 rados_str = i ? g_strjoinv(";", (char **)vals) : NULL;
2836284d 565 g_strfreev((char **)vals);
0a55679b
JC
566 return rados_str;
567}
568
3d9136f9 569static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
4bfb2741 570 BlockdevOptionsRbd *opts, bool cache,
4ff45049
KW
571 const char *keypairs, const char *secretid,
572 Error **errp)
f27aaf4b 573{
0a55679b 574 char *mon_host = NULL;
3d9136f9 575 Error *local_err = NULL;
f27aaf4b
CB
576 int r;
577
d083f954
MA
578 if (secretid) {
579 if (opts->key_secret) {
580 error_setg(errp,
581 "Legacy 'password-secret' clashes with 'key-secret'");
582 return -EINVAL;
583 }
584 opts->key_secret = g_strdup(secretid);
585 opts->has_key_secret = true;
586 }
587
4bfb2741 588 mon_host = qemu_rbd_mon_host(opts, &local_err);
0a55679b
JC
589 if (local_err) {
590 error_propagate(errp, local_err);
591 r = -EINVAL;
592 goto failed_opts;
593 }
594
4bfb2741 595 r = rados_create(cluster, opts->user);
ad32e9c0 596 if (r < 0) {
87cd3d20 597 error_setg_errno(errp, -r, "error initializing");
c3ca988d 598 goto failed_opts;
f27aaf4b
CB
599 }
600
c7cacb3e 601 /* try default location when conf=NULL, but ignore failure */
4bfb2741
KW
602 r = rados_conf_read_file(*cluster, opts->conf);
603 if (opts->has_conf && r < 0) {
604 error_setg_errno(errp, -r, "error reading conf file %s", opts->conf);
c7cacb3e 605 goto failed_shutdown;
99a3c89d
JD
606 }
607
3d9136f9 608 r = qemu_rbd_set_keypairs(*cluster, keypairs, errp);
c7cacb3e
JC
609 if (r < 0) {
610 goto failed_shutdown;
99a3c89d
JD
611 }
612
0a55679b 613 if (mon_host) {
3d9136f9 614 r = rados_conf_set(*cluster, "mon_host", mon_host);
0a55679b
JC
615 if (r < 0) {
616 goto failed_shutdown;
617 }
618 }
619
d083f954
MA
620 r = qemu_rbd_set_auth(*cluster, opts, errp);
621 if (r < 0) {
60390a21
DB
622 goto failed_shutdown;
623 }
624
b11f38fc
JD
625 /*
626 * Fallback to more conservative semantics if setting cache
627 * options fails. Ignore errors from setting rbd_cache because the
628 * only possible error is that the option does not exist, and
629 * librbd defaults to no caching. If write through caching cannot
630 * be set up, fall back to no caching.
631 */
3d9136f9
KW
632 if (cache) {
633 rados_conf_set(*cluster, "rbd_cache", "true");
b11f38fc 634 } else {
3d9136f9 635 rados_conf_set(*cluster, "rbd_cache", "false");
b11f38fc
JD
636 }
637
3d9136f9 638 r = rados_connect(*cluster);
ad32e9c0 639 if (r < 0) {
87cd3d20 640 error_setg_errno(errp, -r, "error connecting");
eb93d5d9 641 goto failed_shutdown;
f27aaf4b
CB
642 }
643
4bfb2741 644 r = rados_ioctx_create(*cluster, opts->pool, io_ctx);
ad32e9c0 645 if (r < 0) {
4bfb2741 646 error_setg_errno(errp, -r, "error opening pool %s", opts->pool);
eb93d5d9 647 goto failed_shutdown;
f27aaf4b
CB
648 }
649
3d9136f9
KW
650 return 0;
651
652failed_shutdown:
653 rados_shutdown(*cluster);
3d9136f9 654failed_opts:
3d9136f9
KW
655 g_free(mon_host);
656 return r;
657}
658
f24b03b5
JC
659static int qemu_rbd_convert_options(QDict *options, BlockdevOptionsRbd **opts,
660 Error **errp)
661{
662 Visitor *v;
663 Error *local_err = NULL;
664
665 /* Convert the remaining options into a QAPI object */
666 v = qobject_input_visitor_new_flat_confused(options, errp);
667 if (!v) {
668 return -EINVAL;
669 }
670
671 visit_type_BlockdevOptionsRbd(v, NULL, opts, &local_err);
672 visit_free(v);
673
674 if (local_err) {
675 error_propagate(errp, local_err);
676 return -EINVAL;
677 }
678
679 return 0;
680}
681
084d1d13
JC
682static int qemu_rbd_attempt_legacy_options(QDict *options,
683 BlockdevOptionsRbd **opts,
684 char **keypairs)
685{
686 char *filename;
687 int r;
688
689 filename = g_strdup(qdict_get_try_str(options, "filename"));
690 if (!filename) {
691 return -EINVAL;
692 }
693 qdict_del(options, "filename");
694
695 qemu_rbd_parse_filename(filename, options, NULL);
696
697 /* keypairs freed by caller */
698 *keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
699 if (*keypairs) {
700 qdict_del(options, "=keyvalue-pairs");
701 }
702
703 r = qemu_rbd_convert_options(options, opts, NULL);
704
705 g_free(filename);
706 return r;
707}
708
3d9136f9
KW
709static int qemu_rbd_open(BlockDriverState *bs, QDict *options, int flags,
710 Error **errp)
711{
712 BDRVRBDState *s = bs->opaque;
4bfb2741 713 BlockdevOptionsRbd *opts = NULL;
bfb15b4b 714 const QDictEntry *e;
3d9136f9 715 Error *local_err = NULL;
4ff45049 716 char *keypairs, *secretid;
3d9136f9
KW
717 int r;
718
4ff45049
KW
719 keypairs = g_strdup(qdict_get_try_str(options, "=keyvalue-pairs"));
720 if (keypairs) {
721 qdict_del(options, "=keyvalue-pairs");
722 }
723
724 secretid = g_strdup(qdict_get_try_str(options, "password-secret"));
725 if (secretid) {
726 qdict_del(options, "password-secret");
727 }
728
f24b03b5 729 r = qemu_rbd_convert_options(options, &opts, &local_err);
4bfb2741 730 if (local_err) {
084d1d13
JC
731 /* If keypairs are present, that means some options are present in
732 * the modern option format. Don't attempt to parse legacy option
733 * formats, as we won't support mixed usage. */
734 if (keypairs) {
735 error_propagate(errp, local_err);
736 goto out;
737 }
738
739 /* If the initial attempt to convert and process the options failed,
740 * we may be attempting to open an image file that has the rbd options
741 * specified in the older format consisting of all key/value pairs
742 * encoded in the filename. Go ahead and attempt to parse the
743 * filename, and see if we can pull out the required options. */
744 r = qemu_rbd_attempt_legacy_options(options, &opts, &keypairs);
745 if (r < 0) {
746 /* Propagate the original error, not the legacy parsing fallback
747 * error, as the latter was just a best-effort attempt. */
748 error_propagate(errp, local_err);
749 goto out;
750 }
751 /* Take care whenever deciding to actually deprecate; once this ability
752 * is removed, we will not be able to open any images with legacy-styled
753 * backing image strings. */
5197f445
MA
754 warn_report("RBD options encoded in the filename as keyvalue pairs "
755 "is deprecated");
4bfb2741
KW
756 }
757
bfb15b4b
JC
758 /* Remove the processed options from the QDict (the visitor processes
759 * _all_ options in the QDict) */
760 while ((e = qdict_first(options))) {
761 qdict_del(options, e->key);
762 }
763
d41a5588
KW
764 r = qemu_rbd_connect(&s->cluster, &s->io_ctx, opts,
765 !(flags & BDRV_O_NOCACHE), keypairs, secretid, errp);
3d9136f9 766 if (r < 0) {
4ff45049 767 goto out;
3d9136f9
KW
768 }
769
d41a5588
KW
770 s->snap = g_strdup(opts->snapshot);
771 s->image_name = g_strdup(opts->image);
772
e2b8247a 773 /* rbd_open is always r/w */
80b61a27 774 r = rbd_open(s->io_ctx, s->image_name, &s->image, s->snap);
f27aaf4b 775 if (r < 0) {
80b61a27
JC
776 error_setg_errno(errp, -r, "error reading header from %s",
777 s->image_name);
eb93d5d9 778 goto failed_open;
f27aaf4b
CB
779 }
780
e2b8247a
JC
781 /* If we are using an rbd snapshot, we must be r/o, otherwise
782 * leave as-is */
783 if (s->snap != NULL) {
eaa2410f
KW
784 r = bdrv_apply_auto_read_only(bs, "rbd snapshots are read-only", errp);
785 if (r < 0) {
786 rbd_close(s->image);
787 goto failed_open;
e2b8247a
JC
788 }
789 }
f27aaf4b 790
4ff45049
KW
791 r = 0;
792 goto out;
f27aaf4b 793
eb93d5d9 794failed_open:
ad32e9c0 795 rados_ioctx_destroy(s->io_ctx);
eb93d5d9 796 g_free(s->snap);
80b61a27 797 g_free(s->image_name);
3d9136f9 798 rados_shutdown(s->cluster);
4ff45049 799out:
4bfb2741 800 qapi_free_BlockdevOptionsRbd(opts);
4ff45049
KW
801 g_free(keypairs);
802 g_free(secretid);
f27aaf4b
CB
803 return r;
804}
805
56e7cf8d
JC
806
807/* Since RBD is currently always opened R/W via the API,
808 * we just need to check if we are using a snapshot or not, in
809 * order to determine if we will allow it to be R/W */
810static int qemu_rbd_reopen_prepare(BDRVReopenState *state,
811 BlockReopenQueue *queue, Error **errp)
812{
813 BDRVRBDState *s = state->bs->opaque;
814 int ret = 0;
815
816 if (s->snap && state->flags & BDRV_O_RDWR) {
817 error_setg(errp,
818 "Cannot change node '%s' to r/w when using RBD snapshot",
819 bdrv_get_device_or_node_name(state->bs));
820 ret = -EINVAL;
821 }
822
823 return ret;
824}
825
ad32e9c0 826static void qemu_rbd_close(BlockDriverState *bs)
f27aaf4b
CB
827{
828 BDRVRBDState *s = bs->opaque;
829
ad32e9c0
JD
830 rbd_close(s->image);
831 rados_ioctx_destroy(s->io_ctx);
7267c094 832 g_free(s->snap);
80b61a27 833 g_free(s->image_name);
ad32e9c0 834 rados_shutdown(s->cluster);
f27aaf4b
CB
835}
836
d7331bed 837static const AIOCBInfo rbd_aiocb_info = {
f27aaf4b 838 .aiocb_size = sizeof(RBDAIOCB),
f27aaf4b
CB
839};
840
e04fb07f 841static void rbd_finish_bh(void *opaque)
f27aaf4b 842{
e04fb07f 843 RADOSCB *rcb = opaque;
e04fb07f 844 qemu_rbd_complete_aio(rcb);
ad32e9c0
JD
845}
846
847/*
848 * This is the callback function for rbd_aio_read and _write
849 *
850 * Note: this function is being called from a non qemu thread so
851 * we need to be careful about what we do here. Generally we only
e04fb07f
SH
852 * schedule a BH, and do the rest of the io completion handling
853 * from rbd_finish_bh() which runs in a qemu context.
ad32e9c0
JD
854 */
855static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
856{
e04fb07f
SH
857 RBDAIOCB *acb = rcb->acb;
858
ad32e9c0
JD
859 rcb->ret = rbd_aio_get_return_value(c);
860 rbd_aio_release(c);
f27aaf4b 861
fffb6e12
PB
862 aio_bh_schedule_oneshot(bdrv_get_aio_context(acb->common.bs),
863 rbd_finish_bh, rcb);
f27aaf4b
CB
864}
865
787f3133
JD
866static int rbd_aio_discard_wrapper(rbd_image_t image,
867 uint64_t off,
868 uint64_t len,
869 rbd_completion_t comp)
870{
871#ifdef LIBRBD_SUPPORTS_DISCARD
872 return rbd_aio_discard(image, off, len, comp);
873#else
874 return -ENOTSUP;
875#endif
876}
877
dc7588c1
JD
878static int rbd_aio_flush_wrapper(rbd_image_t image,
879 rbd_completion_t comp)
880{
881#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
882 return rbd_aio_flush(image, comp);
883#else
884 return -ENOTSUP;
885#endif
886}
887
7c84b1b8 888static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
7bbca9e2 889 int64_t off,
7c84b1b8 890 QEMUIOVector *qiov,
7bbca9e2 891 int64_t size,
097310b5 892 BlockCompletionFunc *cb,
7c84b1b8
MA
893 void *opaque,
894 RBDAIOCmd cmd)
f27aaf4b
CB
895{
896 RBDAIOCB *acb;
0f7a0237 897 RADOSCB *rcb = NULL;
ad32e9c0 898 rbd_completion_t c;
51a13528 899 int r;
f27aaf4b
CB
900
901 BDRVRBDState *s = bs->opaque;
902
d7331bed 903 acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
787f3133 904 acb->cmd = cmd;
f27aaf4b 905 acb->qiov = qiov;
7bbca9e2 906 assert(!qiov || qiov->size == size);
1d393bde 907
908 rcb = g_new(RADOSCB, 1);
909
910 if (!LIBRBD_USE_IOVEC) {
911 if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
912 acb->bounce = NULL;
913 } else {
914 acb->bounce = qemu_try_blockalign(bs, qiov->size);
915 if (acb->bounce == NULL) {
916 goto failed;
917 }
0f7a0237 918 }
1d393bde 919 if (cmd == RBD_AIO_WRITE) {
920 qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
921 }
922 rcb->buf = acb->bounce;
787f3133 923 }
1d393bde 924
f27aaf4b
CB
925 acb->ret = 0;
926 acb->error = 0;
927 acb->s = s;
f27aaf4b 928
ad32e9c0 929 rcb->acb = acb;
ad32e9c0
JD
930 rcb->s = acb->s;
931 rcb->size = size;
51a13528
JD
932 r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
933 if (r < 0) {
934 goto failed;
935 }
f27aaf4b 936
787f3133
JD
937 switch (cmd) {
938 case RBD_AIO_WRITE:
1d393bde 939#ifdef LIBRBD_SUPPORTS_IOVEC
940 r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
941#else
942 r = rbd_aio_write(s->image, off, size, rcb->buf, c);
943#endif
787f3133
JD
944 break;
945 case RBD_AIO_READ:
1d393bde 946#ifdef LIBRBD_SUPPORTS_IOVEC
947 r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
948#else
949 r = rbd_aio_read(s->image, off, size, rcb->buf, c);
950#endif
787f3133
JD
951 break;
952 case RBD_AIO_DISCARD:
953 r = rbd_aio_discard_wrapper(s->image, off, size, c);
954 break;
dc7588c1
JD
955 case RBD_AIO_FLUSH:
956 r = rbd_aio_flush_wrapper(s->image, c);
957 break;
787f3133
JD
958 default:
959 r = -EINVAL;
51a13528
JD
960 }
961
962 if (r < 0) {
405a2764 963 goto failed_completion;
f27aaf4b 964 }
f27aaf4b 965 return &acb->common;
51a13528 966
405a2764
KW
967failed_completion:
968 rbd_aio_release(c);
51a13528 969failed:
7267c094 970 g_free(rcb);
1d393bde 971 if (!LIBRBD_USE_IOVEC) {
972 qemu_vfree(acb->bounce);
973 }
974
8007429a 975 qemu_aio_unref(acb);
51a13528 976 return NULL;
f27aaf4b
CB
977}
978
e8e16d4b
EB
979static BlockAIOCB *qemu_rbd_aio_preadv(BlockDriverState *bs,
980 uint64_t offset, uint64_t bytes,
981 QEMUIOVector *qiov, int flags,
982 BlockCompletionFunc *cb,
983 void *opaque)
f27aaf4b 984{
e8e16d4b 985 return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
787f3133 986 RBD_AIO_READ);
f27aaf4b
CB
987}
988
e8e16d4b
EB
989static BlockAIOCB *qemu_rbd_aio_pwritev(BlockDriverState *bs,
990 uint64_t offset, uint64_t bytes,
991 QEMUIOVector *qiov, int flags,
992 BlockCompletionFunc *cb,
993 void *opaque)
f27aaf4b 994{
e8e16d4b 995 return rbd_start_aio(bs, offset, qiov, bytes, cb, opaque,
787f3133 996 RBD_AIO_WRITE);
f27aaf4b
CB
997}
998
dc7588c1 999#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
7c84b1b8 1000static BlockAIOCB *qemu_rbd_aio_flush(BlockDriverState *bs,
097310b5 1001 BlockCompletionFunc *cb,
7c84b1b8 1002 void *opaque)
dc7588c1
JD
1003{
1004 return rbd_start_aio(bs, 0, NULL, 0, cb, opaque, RBD_AIO_FLUSH);
1005}
1006
1007#else
1008
8b94ff85 1009static int qemu_rbd_co_flush(BlockDriverState *bs)
7a3f5fe9
SW
1010{
1011#if LIBRBD_VERSION_CODE >= LIBRBD_VERSION(0, 1, 1)
1012 /* rbd_flush added in 0.1.1 */
1013 BDRVRBDState *s = bs->opaque;
1014 return rbd_flush(s->image);
1015#else
1016 return 0;
1017#endif
1018}
dc7588c1 1019#endif
7a3f5fe9 1020
ad32e9c0 1021static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
f27aaf4b
CB
1022{
1023 BDRVRBDState *s = bs->opaque;
ad32e9c0
JD
1024 rbd_image_info_t info;
1025 int r;
1026
1027 r = rbd_stat(s->image, &info, sizeof(info));
1028 if (r < 0) {
1029 return r;
1030 }
1031
1032 bdi->cluster_size = info.obj_size;
f27aaf4b
CB
1033 return 0;
1034}
1035
ad32e9c0 1036static int64_t qemu_rbd_getlength(BlockDriverState *bs)
f27aaf4b
CB
1037{
1038 BDRVRBDState *s = bs->opaque;
ad32e9c0
JD
1039 rbd_image_info_t info;
1040 int r;
f27aaf4b 1041
ad32e9c0
JD
1042 r = rbd_stat(s->image, &info, sizeof(info));
1043 if (r < 0) {
1044 return r;
1045 }
1046
1047 return info.size;
f27aaf4b
CB
1048}
1049
061ca8a3
KW
1050static int coroutine_fn qemu_rbd_co_truncate(BlockDriverState *bs,
1051 int64_t offset,
1052 PreallocMode prealloc,
1053 Error **errp)
30cdc48c
JD
1054{
1055 BDRVRBDState *s = bs->opaque;
1056 int r;
1057
8243ccb7
HR
1058 if (prealloc != PREALLOC_MODE_OFF) {
1059 error_setg(errp, "Unsupported preallocation mode '%s'",
977c736f 1060 PreallocMode_str(prealloc));
8243ccb7
HR
1061 return -ENOTSUP;
1062 }
1063
30cdc48c
JD
1064 r = rbd_resize(s->image, offset);
1065 if (r < 0) {
f59adb32 1066 error_setg_errno(errp, -r, "Failed to resize file");
30cdc48c
JD
1067 return r;
1068 }
1069
1070 return 0;
1071}
1072
ad32e9c0
JD
1073static int qemu_rbd_snap_create(BlockDriverState *bs,
1074 QEMUSnapshotInfo *sn_info)
f27aaf4b
CB
1075{
1076 BDRVRBDState *s = bs->opaque;
f27aaf4b 1077 int r;
f27aaf4b
CB
1078
1079 if (sn_info->name[0] == '\0') {
1080 return -EINVAL; /* we need a name for rbd snapshots */
1081 }
1082
1083 /*
1084 * rbd snapshots are using the name as the user controlled unique identifier
1085 * we can't use the rbd snapid for that purpose, as it can't be set
1086 */
1087 if (sn_info->id_str[0] != '\0' &&
1088 strcmp(sn_info->id_str, sn_info->name) != 0) {
1089 return -EINVAL;
1090 }
1091
1092 if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) {
1093 return -ERANGE;
1094 }
1095
ad32e9c0 1096 r = rbd_snap_create(s->image, sn_info->name);
f27aaf4b 1097 if (r < 0) {
ad32e9c0 1098 error_report("failed to create snap: %s", strerror(-r));
f27aaf4b
CB
1099 return r;
1100 }
1101
f27aaf4b
CB
1102 return 0;
1103}
1104
bd603247 1105static int qemu_rbd_snap_remove(BlockDriverState *bs,
a89d89d3
WX
1106 const char *snapshot_id,
1107 const char *snapshot_name,
1108 Error **errp)
bd603247
GF
1109{
1110 BDRVRBDState *s = bs->opaque;
1111 int r;
1112
a89d89d3
WX
1113 if (!snapshot_name) {
1114 error_setg(errp, "rbd need a valid snapshot name");
1115 return -EINVAL;
1116 }
1117
1118 /* If snapshot_id is specified, it must be equal to name, see
1119 qemu_rbd_snap_list() */
1120 if (snapshot_id && strcmp(snapshot_id, snapshot_name)) {
1121 error_setg(errp,
1122 "rbd do not support snapshot id, it should be NULL or "
1123 "equal to snapshot name");
1124 return -EINVAL;
1125 }
1126
bd603247 1127 r = rbd_snap_remove(s->image, snapshot_name);
a89d89d3
WX
1128 if (r < 0) {
1129 error_setg_errno(errp, -r, "Failed to remove the snapshot");
1130 }
bd603247
GF
1131 return r;
1132}
1133
1134static int qemu_rbd_snap_rollback(BlockDriverState *bs,
1135 const char *snapshot_name)
1136{
1137 BDRVRBDState *s = bs->opaque;
bd603247 1138
9be38598 1139 return rbd_snap_rollback(s->image, snapshot_name);
bd603247
GF
1140}
1141
ad32e9c0
JD
1142static int qemu_rbd_snap_list(BlockDriverState *bs,
1143 QEMUSnapshotInfo **psn_tab)
f27aaf4b
CB
1144{
1145 BDRVRBDState *s = bs->opaque;
f27aaf4b 1146 QEMUSnapshotInfo *sn_info, *sn_tab = NULL;
ad32e9c0
JD
1147 int i, snap_count;
1148 rbd_snap_info_t *snaps;
1149 int max_snaps = RBD_MAX_SNAPS;
f27aaf4b 1150
ad32e9c0 1151 do {
02c4f26b 1152 snaps = g_new(rbd_snap_info_t, max_snaps);
ad32e9c0 1153 snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
9e6337d0 1154 if (snap_count <= 0) {
7267c094 1155 g_free(snaps);
f27aaf4b 1156 }
ad32e9c0 1157 } while (snap_count == -ERANGE);
f27aaf4b 1158
ad32e9c0 1159 if (snap_count <= 0) {
b9c53290 1160 goto done;
f27aaf4b
CB
1161 }
1162
5839e53b 1163 sn_tab = g_new0(QEMUSnapshotInfo, snap_count);
f27aaf4b 1164
ad32e9c0
JD
1165 for (i = 0; i < snap_count; i++) {
1166 const char *snap_name = snaps[i].name;
f27aaf4b
CB
1167
1168 sn_info = sn_tab + i;
1169 pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name);
1170 pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name);
f27aaf4b 1171
ad32e9c0 1172 sn_info->vm_state_size = snaps[i].size;
f27aaf4b
CB
1173 sn_info->date_sec = 0;
1174 sn_info->date_nsec = 0;
1175 sn_info->vm_clock_nsec = 0;
1176 }
ad32e9c0 1177 rbd_snap_list_end(snaps);
9e6337d0 1178 g_free(snaps);
ad32e9c0 1179
b9c53290 1180 done:
f27aaf4b 1181 *psn_tab = sn_tab;
f27aaf4b 1182 return snap_count;
f27aaf4b
CB
1183}
1184
787f3133 1185#ifdef LIBRBD_SUPPORTS_DISCARD
4da444a0
EB
1186static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
1187 int64_t offset,
f5a5ca79 1188 int bytes,
4da444a0
EB
1189 BlockCompletionFunc *cb,
1190 void *opaque)
787f3133 1191{
f5a5ca79 1192 return rbd_start_aio(bs, offset, NULL, bytes, cb, opaque,
787f3133
JD
1193 RBD_AIO_DISCARD);
1194}
1195#endif
1196
be217884 1197#ifdef LIBRBD_SUPPORTS_INVALIDATE
2b148f39
PB
1198static void coroutine_fn qemu_rbd_co_invalidate_cache(BlockDriverState *bs,
1199 Error **errp)
be217884
AC
1200{
1201 BDRVRBDState *s = bs->opaque;
1202 int r = rbd_invalidate_cache(s->image);
1203 if (r < 0) {
1204 error_setg_errno(errp, -r, "Failed to invalidate the cache");
1205 }
1206}
1207#endif
1208
bd0cf596
CL
1209static QemuOptsList qemu_rbd_create_opts = {
1210 .name = "rbd-create-opts",
1211 .head = QTAILQ_HEAD_INITIALIZER(qemu_rbd_create_opts.head),
1212 .desc = {
1213 {
1214 .name = BLOCK_OPT_SIZE,
1215 .type = QEMU_OPT_SIZE,
1216 .help = "Virtual disk size"
1217 },
1218 {
1219 .name = BLOCK_OPT_CLUSTER_SIZE,
1220 .type = QEMU_OPT_SIZE,
1221 .help = "RBD object size"
1222 },
60390a21
DB
1223 {
1224 .name = "password-secret",
1225 .type = QEMU_OPT_STRING,
1226 .help = "ID of secret providing the password",
1227 },
bd0cf596
CL
1228 { /* end of list */ }
1229 }
f27aaf4b
CB
1230};
1231
2654267c
HR
1232static const char *const qemu_rbd_strong_runtime_opts[] = {
1233 "pool",
1234 "image",
1235 "conf",
1236 "snapshot",
1237 "user",
1238 "server.",
1239 "password-secret",
1240
1241 NULL
1242};
1243
f27aaf4b 1244static BlockDriver bdrv_rbd = {
c7cacb3e
JC
1245 .format_name = "rbd",
1246 .instance_size = sizeof(BDRVRBDState),
1247 .bdrv_parse_filename = qemu_rbd_parse_filename,
e8e16d4b 1248 .bdrv_refresh_limits = qemu_rbd_refresh_limits,
c7cacb3e
JC
1249 .bdrv_file_open = qemu_rbd_open,
1250 .bdrv_close = qemu_rbd_close,
56e7cf8d 1251 .bdrv_reopen_prepare = qemu_rbd_reopen_prepare,
1bebea37 1252 .bdrv_co_create = qemu_rbd_co_create,
efc75e2a 1253 .bdrv_co_create_opts = qemu_rbd_co_create_opts,
c7cacb3e
JC
1254 .bdrv_has_zero_init = bdrv_has_zero_init_1,
1255 .bdrv_get_info = qemu_rbd_getinfo,
1256 .create_opts = &qemu_rbd_create_opts,
1257 .bdrv_getlength = qemu_rbd_getlength,
061ca8a3 1258 .bdrv_co_truncate = qemu_rbd_co_truncate,
c7cacb3e 1259 .protocol_name = "rbd",
f27aaf4b 1260
e8e16d4b
EB
1261 .bdrv_aio_preadv = qemu_rbd_aio_preadv,
1262 .bdrv_aio_pwritev = qemu_rbd_aio_pwritev,
dc7588c1
JD
1263
1264#ifdef LIBRBD_SUPPORTS_AIO_FLUSH
1265 .bdrv_aio_flush = qemu_rbd_aio_flush,
1266#else
c68b89ac 1267 .bdrv_co_flush_to_disk = qemu_rbd_co_flush,
dc7588c1 1268#endif
f27aaf4b 1269
787f3133 1270#ifdef LIBRBD_SUPPORTS_DISCARD
4da444a0 1271 .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
787f3133
JD
1272#endif
1273
c68b89ac 1274 .bdrv_snapshot_create = qemu_rbd_snap_create,
bd603247 1275 .bdrv_snapshot_delete = qemu_rbd_snap_remove,
c68b89ac 1276 .bdrv_snapshot_list = qemu_rbd_snap_list,
bd603247 1277 .bdrv_snapshot_goto = qemu_rbd_snap_rollback,
be217884 1278#ifdef LIBRBD_SUPPORTS_INVALIDATE
2b148f39 1279 .bdrv_co_invalidate_cache = qemu_rbd_co_invalidate_cache,
be217884 1280#endif
2654267c
HR
1281
1282 .strong_runtime_opts = qemu_rbd_strong_runtime_opts,
f27aaf4b
CB
1283};
1284
1285static void bdrv_rbd_init(void)
1286{
1287 bdrv_register(&bdrv_rbd);
1288}
1289
1290block_init(bdrv_rbd_init);