#include "qemu/osdep.h"
#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qint.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
} while (0)
typedef struct SheepdogAIOCB SheepdogAIOCB;
+typedef struct BDRVSheepdogState BDRVSheepdogState;
typedef struct AIOReq {
SheepdogAIOCB *aiocb;
|| y->max_affect_data_idx < x->min_affect_data_idx))
struct SheepdogAIOCB {
- BlockAIOCB common;
+ BDRVSheepdogState *s;
QEMUIOVector *qiov;
enum AIOCBState aiocb_type;
Coroutine *coroutine;
- void (*aio_done_func)(SheepdogAIOCB *);
-
- bool cancelable;
int nr_pending;
uint32_t min_affect_data_idx;
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};
-typedef struct BDRVSheepdogState {
+struct BDRVSheepdogState {
BlockDriverState *bs;
AioContext *aio_context;
uint32_t cache_flags;
bool discard_supported;
- char *host_spec;
- bool is_unix;
+ SocketAddress *addr;
int fd;
CoMutex lock;
CoQueue overlapping_queue;
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
-} BDRVSheepdogState;
+};
typedef struct BDRVSheepdogReopenState {
int fd;
*
* 1. In sd_co_rw_vector, we send the I/O requests to the server and
* link the requests to the inflight_list in the
- * BDRVSheepdogState. The function exits without waiting for
+ * BDRVSheepdogState. The function yields while waiting for
* receiving the response.
*
* 2. We receive the response in aio_read_response, the fd handler to
- * the sheepdog connection. If metadata update is needed, we send
- * the write request to the vdi object in sd_write_done, the write
- * completion function. We switch back to sd_co_readv/writev after
- * all the requests belonging to the AIOCB are finished.
+ * the sheepdog connection. We switch back to sd_co_readv/sd_writev
+ * after all the requests belonging to the AIOCB are finished. If
+ * needed, sd_co_writev will send another requests for the vdi object.
*/
static inline AIOReq *alloc_aio_req(BDRVSheepdogState *s, SheepdogAIOCB *acb,
return aio_req;
}
-static inline void free_aio_req(BDRVSheepdogState *s, AIOReq *aio_req)
-{
- SheepdogAIOCB *acb = aio_req->aiocb;
-
- acb->cancelable = false;
- QLIST_REMOVE(aio_req, aio_siblings);
- g_free(aio_req);
-
- acb->nr_pending--;
-}
-
-static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
-{
- qemu_coroutine_enter(acb->coroutine);
- qemu_aio_unref(acb);
-}
-
-/*
- * Check whether the specified acb can be canceled
- *
- * We can cancel aio when any request belonging to the acb is:
- * - Not processed by the sheepdog server.
- * - Not linked to the inflight queue.
- */
-static bool sd_acb_cancelable(const SheepdogAIOCB *acb)
+static void wait_for_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *acb)
{
- BDRVSheepdogState *s = acb->common.bs->opaque;
- AIOReq *aioreq;
-
- if (!acb->cancelable) {
- return false;
- }
-
- QLIST_FOREACH(aioreq, &s->inflight_aio_head, aio_siblings) {
- if (aioreq->aiocb == acb) {
- return false;
- }
- }
-
- return true;
-}
-
-static void sd_aio_cancel(BlockAIOCB *blockacb)
-{
- SheepdogAIOCB *acb = (SheepdogAIOCB *)blockacb;
- BDRVSheepdogState *s = acb->common.bs->opaque;
- AIOReq *aioreq, *next;
-
- if (sd_acb_cancelable(acb)) {
- /* Remove outstanding requests from failed queue. */
- QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
- next) {
- if (aioreq->aiocb == acb) {
- free_aio_req(s, aioreq);
- }
- }
+ SheepdogAIOCB *cb;
- assert(acb->nr_pending == 0);
- if (acb->common.cb) {
- acb->common.cb(acb->common.opaque, -ECANCELED);
+retry:
+ QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
+ if (AIOCBOverlapping(acb, cb)) {
+ qemu_co_queue_wait(&s->overlapping_queue, NULL);
+ goto retry;
}
- sd_finish_aiocb(acb);
}
}
-static const AIOCBInfo sd_aiocb_info = {
- .aiocb_size = sizeof(SheepdogAIOCB),
- .cancel_async = sd_aio_cancel,
-};
-
-static SheepdogAIOCB *sd_aio_setup(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
+static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
+ QEMUIOVector *qiov, int64_t sector_num, int nb_sectors,
+ int type)
{
- SheepdogAIOCB *acb;
uint32_t object_size;
- BDRVSheepdogState *s = bs->opaque;
object_size = (UINT32_C(1) << s->inode.block_size_shift);
- acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
+ acb->s = s;
acb->qiov = qiov;
acb->sector_num = sector_num;
acb->nb_sectors = nb_sectors;
- acb->aio_done_func = NULL;
- acb->cancelable = true;
acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
acb->nr_pending = 0;
acb->min_dirty_data_idx = UINT32_MAX;
acb->max_dirty_data_idx = 0;
+ acb->aiocb_type = type;
- return acb;
+ if (type == AIOCB_FLUSH_CACHE) {
+ return;
+ }
+
+ wait_for_overlapping_aiocb(s, acb);
+ QLIST_INSERT_HEAD(&s->inflight_aiocb_head, acb, aiocb_siblings);
+}
+
+static SocketAddress *sd_socket_address(const char *path,
+ const char *host, const char *port)
+{
+ SocketAddress *addr = g_new0(SocketAddress, 1);
+
+ if (path) {
+ addr->type = SOCKET_ADDRESS_KIND_UNIX;
+ addr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+ addr->u.q_unix.data->path = g_strdup(path);
+ } else {
+ addr->type = SOCKET_ADDRESS_KIND_INET;
+ addr->u.inet.data = g_new0(InetSocketAddress, 1);
+ addr->u.inet.data->host = g_strdup(host ?: SD_DEFAULT_ADDR);
+ addr->u.inet.data->port = g_strdup(port ?: stringify(SD_DEFAULT_PORT));
+ }
+
+ return addr;
}
/* Return -EIO in case of error, file descriptor on success */
{
int fd;
- if (s->is_unix) {
- fd = unix_connect(s->host_spec, errp);
- } else {
- fd = inet_connect(s->host_spec, errp);
+ fd = socket_connect(s->addr, errp, NULL, NULL);
- if (fd >= 0) {
- int ret = socket_set_nodelay(fd);
- if (ret < 0) {
- error_report("%s", strerror(errno));
- }
+ if (s->addr->type == SOCKET_ADDRESS_KIND_INET && fd >= 0) {
+ int ret = socket_set_nodelay(fd);
+ if (ret < 0) {
+ error_report("%s", strerror(errno));
}
}
return ret;
}
-static void restart_co_req(void *opaque)
-{
- Coroutine *co = opaque;
-
- qemu_coroutine_enter(co);
-}
-
typedef struct SheepdogReqCo {
int sockfd;
BlockDriverState *bs;
unsigned int *rlen;
int ret;
bool finished;
+ Coroutine *co;
} SheepdogReqCo;
+static void restart_co_req(void *opaque)
+{
+ SheepdogReqCo *srco = opaque;
+
+ aio_co_wake(srco->co);
+}
+
static coroutine_fn void do_co_req(void *opaque)
{
int ret;
- Coroutine *co;
SheepdogReqCo *srco = opaque;
int sockfd = srco->sockfd;
SheepdogReq *hdr = srco->hdr;
unsigned int *wlen = srco->wlen;
unsigned int *rlen = srco->rlen;
- co = qemu_coroutine_self();
+ srco->co = qemu_coroutine_self();
aio_set_fd_handler(srco->aio_context, sockfd, false,
- NULL, restart_co_req, co);
+ NULL, restart_co_req, NULL, srco);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
}
aio_set_fd_handler(srco->aio_context, sockfd, false,
- restart_co_req, NULL, co);
+ restart_co_req, NULL, NULL, srco);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
aio_set_fd_handler(srco->aio_context, sockfd, false,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
+ srco->co = NULL;
srco->ret = ret;
srco->finished = true;
if (srco->bs) {
AIOReq *aio_req, *next;
aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
close(s->fd);
s->fd = -1;
while (!QLIST_EMPTY(&s->failed_aio_head)) {
aio_req = QLIST_FIRST(&s->failed_aio_head);
QLIST_REMOVE(aio_req, aio_siblings);
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
resend_aioreq(s, aio_req);
}
}
switch (acb->aiocb_type) {
case AIOCB_WRITE_UDATA:
- /* this coroutine context is no longer suitable for co_recv
- * because we may send data to update vdi objects */
- s->co_recv = NULL;
if (!is_data_obj(aio_req->oid)) {
break;
}
case AIOCB_DISCARD_OBJ:
switch (rsp.result) {
case SD_RES_INVALID_PARMS:
- error_report("sheep(%s) doesn't support discard command",
- s->host_spec);
+ error_report("server doesn't support discard command");
rsp.result = SD_RES_SUCCESS;
s->discard_supported = false;
break;
}
}
+ /* No more data for this aio_req (reload_inode below uses its own file
+ * descriptor handler which doesn't use co_recv).
+ */
+ s->co_recv = NULL;
+
+ QLIST_REMOVE(aio_req, aio_siblings);
switch (rsp.result) {
case SD_RES_SUCCESS:
break;
aio_req->oid = vid_to_vdi_oid(s->inode.vdi_id);
}
resend_aioreq(s, aio_req);
- goto out;
+ return;
default:
acb->ret = -EIO;
error_report("%s", sd_strerror(rsp.result));
break;
}
- free_aio_req(s, aio_req);
- if (!acb->nr_pending) {
+ g_free(aio_req);
+
+ if (!--acb->nr_pending) {
/*
* We've finished all requests which belong to the AIOCB, so
* we can switch back to sd_co_readv/writev now.
*/
- acb->aio_done_func(acb);
+ aio_co_wake(acb->coroutine);
}
-out:
- s->co_recv = NULL;
+
return;
+
err:
- s->co_recv = NULL;
reconnect_to_sdog(opaque);
}
s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
- qemu_coroutine_enter(s->co_recv);
+ aio_co_wake(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
- qemu_coroutine_enter(s->co_send);
+ aio_co_wake(s->co_send);
}
/*
}
aio_set_fd_handler(s->aio_context, fd, false,
- co_read_response, NULL, s);
+ co_read_response, NULL, NULL, s);
return fd;
}
-static int sd_parse_uri(BDRVSheepdogState *s, const char *filename,
- char *vdi, uint32_t *snapid, char *tag)
+/*
+ * Parse numeric snapshot ID in @str
+ * If @str can't be parsed as number, return false.
+ * Else, if the number is zero or too large, set *@snapid to zero and
+ * return true.
+ * Else, set *@snapid to the number and return true.
+ */
+static bool sd_parse_snapid(const char *str, uint32_t *snapid)
+{
+ unsigned long ul;
+ int ret;
+
+ ret = qemu_strtoul(str, NULL, 10, &ul);
+ if (ret == -ERANGE) {
+ ul = ret = 0;
+ }
+ if (ret) {
+ return false;
+ }
+ if (ul > UINT32_MAX) {
+ ul = 0;
+ }
+
+ *snapid = ul;
+ return true;
+}
+
+static bool sd_parse_snapid_or_tag(const char *str,
+ uint32_t *snapid, char tag[])
{
+ if (!sd_parse_snapid(str, snapid)) {
+ *snapid = 0;
+ if (g_strlcpy(tag, str, SD_MAX_VDI_TAG_LEN) >= SD_MAX_VDI_TAG_LEN) {
+ return false;
+ }
+ } else if (!*snapid) {
+ return false;
+ } else {
+ tag[0] = 0;
+ }
+ return true;
+}
+
+typedef struct {
+ const char *path; /* non-null iff transport is tcp */
+ const char *host; /* valid when transport is tcp */
+ int port; /* valid when transport is tcp */
+ char vdi[SD_MAX_VDI_LEN];
+ char tag[SD_MAX_VDI_TAG_LEN];
+ uint32_t snap_id;
+ /* Remainder is only for sd_config_done() */
URI *uri;
+ QueryParams *qp;
+} SheepdogConfig;
+
+static void sd_config_done(SheepdogConfig *cfg)
+{
+ if (cfg->qp) {
+ query_params_free(cfg->qp);
+ }
+ uri_free(cfg->uri);
+}
+
+static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
+ Error **errp)
+{
+ Error *err = NULL;
QueryParams *qp = NULL;
- int ret = 0;
+ bool is_unix;
+ URI *uri;
+
+ memset(cfg, 0, sizeof(*cfg));
- uri = uri_parse(filename);
+ cfg->uri = uri = uri_parse(filename);
if (!uri) {
- return -EINVAL;
+ error_setg(&err, "invalid URI");
+ goto out;
}
/* transport */
if (!strcmp(uri->scheme, "sheepdog")) {
- s->is_unix = false;
+ is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+tcp")) {
- s->is_unix = false;
+ is_unix = false;
} else if (!strcmp(uri->scheme, "sheepdog+unix")) {
- s->is_unix = true;
+ is_unix = true;
} else {
- ret = -EINVAL;
+ error_setg(&err, "URI scheme must be 'sheepdog', 'sheepdog+tcp',"
+ " or 'sheepdog+unix'");
goto out;
}
if (uri->path == NULL || !strcmp(uri->path, "/")) {
- ret = -EINVAL;
+ error_setg(&err, "missing file path in URI");
goto out;
}
- pstrcpy(vdi, SD_MAX_VDI_LEN, uri->path + 1);
-
- qp = query_params_parse(uri->query);
- if (qp->n > 1 || (s->is_unix && !qp->n) || (!s->is_unix && qp->n)) {
- ret = -EINVAL;
+ if (g_strlcpy(cfg->vdi, uri->path + 1, SD_MAX_VDI_LEN)
+ >= SD_MAX_VDI_LEN) {
+ error_setg(&err, "VDI name is too long");
goto out;
}
- if (s->is_unix) {
+ cfg->qp = qp = query_params_parse(uri->query);
+
+ if (is_unix) {
/* sheepdog+unix:///vdiname?socket=path */
- if (uri->server || uri->port || strcmp(qp->p[0].name, "socket")) {
- ret = -EINVAL;
+ if (uri->server || uri->port) {
+ error_setg(&err, "URI scheme %s doesn't accept a server address",
+ uri->scheme);
goto out;
}
- s->host_spec = g_strdup(qp->p[0].value);
+ if (!qp->n) {
+ error_setg(&err,
+ "URI scheme %s requires query parameter 'socket'",
+ uri->scheme);
+ goto out;
+ }
+ if (qp->n != 1 || strcmp(qp->p[0].name, "socket")) {
+ error_setg(&err, "unexpected query parameters");
+ goto out;
+ }
+ cfg->path = qp->p[0].value;
} else {
/* sheepdog[+tcp]://[host:port]/vdiname */
- s->host_spec = g_strdup_printf("%s:%d", uri->server ?: SD_DEFAULT_ADDR,
- uri->port ?: SD_DEFAULT_PORT);
+ if (qp->n) {
+ error_setg(&err, "unexpected query parameters");
+ goto out;
+ }
+ cfg->host = uri->server;
+ cfg->port = uri->port;
}
/* snapshot tag */
if (uri->fragment) {
- *snapid = strtoul(uri->fragment, NULL, 10);
- if (*snapid == 0) {
- pstrcpy(tag, SD_MAX_VDI_TAG_LEN, uri->fragment);
+ if (!sd_parse_snapid_or_tag(uri->fragment,
+ &cfg->snap_id, cfg->tag)) {
+ error_setg(&err, "'%s' is not a valid snapshot ID",
+ uri->fragment);
+ goto out;
}
} else {
- *snapid = CURRENT_VDI_ID; /* search current vdi */
+ cfg->snap_id = CURRENT_VDI_ID; /* search current vdi */
}
out:
- if (qp) {
- query_params_free(qp);
+ if (err) {
+ error_propagate(errp, err);
+ sd_config_done(cfg);
}
- uri_free(uri);
- return ret;
}
/*
* You can run VMs outside the Sheepdog cluster by specifying
* `hostname' and `port' (experimental).
*/
-static int parse_vdiname(BDRVSheepdogState *s, const char *filename,
- char *vdi, uint32_t *snapid, char *tag)
+static void parse_vdiname(SheepdogConfig *cfg, const char *filename,
+ Error **errp)
{
+ Error *err = NULL;
char *p, *q, *uri;
const char *host_spec, *vdi_spec;
- int nr_sep, ret;
+ int nr_sep;
strstart(filename, "sheepdog:", &filename);
p = q = g_strdup(filename);
uri = g_strdup_printf("sheepdog://%s/%s", host_spec, vdi_spec);
- ret = sd_parse_uri(s, uri, vdi, snapid, tag);
+ /*
+ * FIXME We to escape URI meta-characters, e.g. "x?y=z"
+ * produces "sheepdog://x?y=z". Because of that ...
+ */
+ sd_parse_uri(cfg, uri, &err);
+ if (err) {
+ /*
+ * ... this can fail, but the error message is misleading.
+ * Replace it by the traditional useless one until the
+ * escaping is fixed.
+ */
+ error_free(err);
+ error_setg(errp, "Can't parse filename");
+ }
g_free(q);
g_free(uri);
+}
- return ret;
+static void sd_parse_filename(const char *filename, QDict *options,
+ Error **errp)
+{
+ Error *err = NULL;
+ SheepdogConfig cfg;
+ char buf[32];
+
+ if (strstr(filename, "://")) {
+ sd_parse_uri(&cfg, filename, &err);
+ } else {
+ parse_vdiname(&cfg, filename, &err);
+ }
+ if (err) {
+ error_propagate(errp, err);
+ return;
+ }
+
+ if (cfg.host) {
+ qdict_set_default_str(options, "host", cfg.host);
+ }
+ if (cfg.port) {
+ snprintf(buf, sizeof(buf), "%d", cfg.port);
+ qdict_set_default_str(options, "port", buf);
+ }
+ if (cfg.path) {
+ qdict_set_default_str(options, "path", cfg.path);
+ }
+ qdict_set_default_str(options, "vdi", cfg.vdi);
+ qdict_set_default_str(options, "tag", cfg.tag);
+ if (cfg.snap_id) {
+ snprintf(buf, sizeof(buf), "%d", cfg.snap_id);
+ qdict_set_default_str(options, "snap-id", buf);
+ }
+
+ sd_config_done(&cfg);
}
static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
uint64_t old_oid = aio_req->base_oid;
bool create = aio_req->create;
+ QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
+
if (!nr_copies) {
error_report("bug");
}
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
aio_set_fd_handler(s->aio_context, s->fd, false,
- co_read_response, co_write_request, s);
+ co_read_response, co_write_request, NULL, s);
socket_set_cork(s->fd, 1);
/* send a header */
out:
socket_set_cork(s->fd, 0);
aio_set_fd_handler(s->aio_context, s->fd, false,
- co_read_response, NULL, s);
+ co_read_response, NULL, NULL, s);
s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
}
BDRVSheepdogState *s = bs->opaque;
aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
- NULL, NULL);
+ NULL, NULL, NULL);
}
static void sd_attach_aio_context(BlockDriverState *bs,
s->aio_context = new_context;
aio_set_fd_handler(new_context, s->fd, false,
- co_read_response, NULL, s);
+ co_read_response, NULL, NULL, s);
}
-/* TODO Convert to fine grained options */
static QemuOptsList runtime_opts = {
.name = "sheepdog",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
.desc = {
{
- .name = "filename",
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "port",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "vdi",
+ .type = QEMU_OPT_STRING,
+ },
+ {
+ .name = "snap-id",
+ .type = QEMU_OPT_NUMBER,
+ },
+ {
+ .name = "tag",
.type = QEMU_OPT_STRING,
- .help = "URL to the sheepdog image",
},
{ /* end of list */ }
},
int ret, fd;
uint32_t vid = 0;
BDRVSheepdogState *s = bs->opaque;
- char vdi[SD_MAX_VDI_LEN], tag[SD_MAX_VDI_TAG_LEN];
- uint32_t snapid;
+ const char *host, *port, *path, *vdi, *snap_id_str, *tag;
+ uint64_t snap_id;
char *buf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
- const char *filename;
s->bs = bs;
s->aio_context = bdrv_get_aio_context(bs);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
- goto out;
+ goto err_no_fd;
}
- filename = qemu_opt_get(opts, "filename");
+ host = qemu_opt_get(opts, "host");
+ port = qemu_opt_get(opts, "port");
+ path = qemu_opt_get(opts, "path");
+ vdi = qemu_opt_get(opts, "vdi");
+ snap_id_str = qemu_opt_get(opts, "snap-id");
+ snap_id = qemu_opt_get_number(opts, "snap-id", CURRENT_VDI_ID);
+ tag = qemu_opt_get(opts, "tag");
- QLIST_INIT(&s->inflight_aio_head);
- QLIST_INIT(&s->failed_aio_head);
- QLIST_INIT(&s->inflight_aiocb_head);
- s->fd = -1;
+ if ((host || port) && path) {
+ error_setg(errp, "can't use 'path' together with 'host' or 'port'");
+ ret = -EINVAL;
+ goto err_no_fd;
+ }
- memset(vdi, 0, sizeof(vdi));
- memset(tag, 0, sizeof(tag));
+ if (!vdi) {
+ error_setg(errp, "parameter 'vdi' is missing");
+ ret = -EINVAL;
+ goto err_no_fd;
+ }
+ if (strlen(vdi) >= SD_MAX_VDI_LEN) {
+ error_setg(errp, "value of parameter 'vdi' is too long");
+ ret = -EINVAL;
+ goto err_no_fd;
+ }
- if (strstr(filename, "://")) {
- ret = sd_parse_uri(s, filename, vdi, &snapid, tag);
- } else {
- ret = parse_vdiname(s, filename, vdi, &snapid, tag);
+ if (snap_id > UINT32_MAX) {
+ snap_id = 0;
}
- if (ret < 0) {
- error_setg(errp, "Can't parse filename");
- goto out;
+ if (snap_id_str && !snap_id) {
+ error_setg(errp, "'snap-id=%s' is not a valid snapshot ID",
+ snap_id_str);
+ ret = -EINVAL;
+ goto err_no_fd;
+ }
+
+ if (!tag) {
+ tag = "";
}
+ if (tag && strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
+ error_setg(errp, "value of parameter 'tag' is too long");
+ ret = -EINVAL;
+ goto err_no_fd;
+ }
+
+ s->addr = sd_socket_address(path, host, port);
+
+ QLIST_INIT(&s->inflight_aio_head);
+ QLIST_INIT(&s->failed_aio_head);
+ QLIST_INIT(&s->inflight_aiocb_head);
+
s->fd = get_sheep_fd(s, errp);
if (s->fd < 0) {
ret = s->fd;
- goto out;
+ goto err_no_fd;
}
- ret = find_vdi_name(s, vdi, snapid, tag, &vid, true, errp);
+ ret = find_vdi_name(s, vdi, (uint32_t)snap_id, tag, &vid, true, errp);
if (ret) {
- goto out;
+ goto err;
}
/*
}
s->discard_supported = true;
- if (snapid || tag[0] != '\0') {
+ if (snap_id || tag[0]) {
DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
s->is_snapshot = true;
}
fd = connect_to_sdog(s, errp);
if (fd < 0) {
ret = fd;
- goto out;
+ goto err;
}
buf = g_malloc(SD_INODE_SIZE);
if (ret) {
error_setg(errp, "Can't read snapshot inode");
- goto out;
+ goto err;
}
memcpy(&s->inode, buf, sizeof(s->inode));
qemu_opts_del(opts);
g_free(buf);
return 0;
-out:
+
+err:
aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
- false, NULL, NULL, NULL);
- if (s->fd >= 0) {
- closesocket(s->fd);
- }
+ false, NULL, NULL, NULL, NULL);
+ closesocket(s->fd);
+err_no_fd:
qemu_opts_del(opts);
g_free(buf);
return ret;
if (s->fd) {
aio_set_fd_handler(s->aio_context, s->fd, false,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
closesocket(s->fd);
}
if (re_s->fd) {
aio_set_fd_handler(s->aio_context, re_s->fd, false,
- NULL, NULL, NULL);
+ NULL, NULL, NULL, NULL);
closesocket(re_s->fd);
}
int ret;
blk = blk_new_open(filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
if (blk == NULL) {
ret = -EIO;
goto out_with_err_set;
}
copy = strtol(n1, NULL, 10);
+ /* FIXME fix error checking by switching to qemu_strtol() */
if (copy > SD_MAX_COPIES || copy < 1) {
return -EINVAL;
}
}
parity = strtol(n2, NULL, 10);
+ /* FIXME fix error checking by switching to qemu_strtol() */
if (parity >= SD_EC_MAX_STRIP || parity < 1) {
return -EINVAL;
}
static int sd_create(const char *filename, QemuOpts *opts,
Error **errp)
{
+ Error *err = NULL;
int ret = 0;
uint32_t vid = 0;
char *backing_file = NULL;
char *buf = NULL;
BDRVSheepdogState *s;
- char tag[SD_MAX_VDI_TAG_LEN];
- uint32_t snapid;
+ SheepdogConfig cfg;
uint64_t max_vdi_size;
bool prealloc = false;
s = g_new0(BDRVSheepdogState, 1);
- memset(tag, 0, sizeof(tag));
if (strstr(filename, "://")) {
- ret = sd_parse_uri(s, filename, s->name, &snapid, tag);
+ sd_parse_uri(&cfg, filename, &err);
} else {
- ret = parse_vdiname(s, filename, s->name, &snapid, tag);
+ parse_vdiname(&cfg, filename, &err);
}
- if (ret < 0) {
- error_setg(errp, "Can't parse filename");
+ if (err) {
+ error_propagate(errp, err);
goto out;
}
+ buf = cfg.port ? g_strdup_printf("%d", cfg.port) : NULL;
+ s->addr = sd_socket_address(cfg.path, cfg.host, buf);
+ g_free(buf);
+ strcpy(s->name, cfg.vdi);
+ sd_config_done(&cfg);
+
s->inode.vdi_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
if (s->inode.block_size_shift == 0) {
SheepdogVdiReq hdr;
SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
- Error *local_err = NULL;
int fd;
unsigned int wlen = 0, rlen = 0;
- fd = connect_to_sdog(s, &local_err);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
- error_report_err(local_err);
- ret = -EIO;
+ ret = fd;
goto out;
}
}
aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
- false, NULL, NULL, NULL);
+ false, NULL, NULL, NULL, NULL);
closesocket(s->fd);
- g_free(s->host_spec);
+ qapi_free_SocketAddress(s->addr);
}
static int64_t sd_getlength(BlockDriverState *bs)
/*
* This function is called after writing data objects. If we need to
* update metadata, this sends a write request to the vdi object.
- * Otherwise, this switches back to sd_co_readv/writev.
*/
static void coroutine_fn sd_write_done(SheepdogAIOCB *acb)
{
- BDRVSheepdogState *s = acb->common.bs->opaque;
+ BDRVSheepdogState *s = acb->s;
struct iovec iov;
AIOReq *aio_req;
uint32_t offset, data_len, mn, mx;
mx = acb->max_dirty_data_idx;
if (mn <= mx) {
/* we need to update the vdi object. */
+ ++acb->nr_pending;
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
mn * sizeof(s->inode.data_vdi_id[0]);
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
iov.iov_len = sizeof(s->inode);
aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
data_len, offset, 0, false, 0, offset);
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
add_aio_request(s, aio_req, &iov, 1, AIOCB_WRITE_UDATA);
-
- acb->aio_done_func = sd_finish_aiocb;
- acb->aiocb_type = AIOCB_WRITE_UDATA;
- return;
+ if (--acb->nr_pending) {
+ qemu_coroutine_yield();
+ }
}
-
- sd_finish_aiocb(acb);
}
/* Delete current working VDI on the snapshot chain */
* Returns 1 when we need to wait a response, 0 when there is no sent
* request and -errno in error cases.
*/
-static int coroutine_fn sd_co_rw_vector(void *p)
+static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
{
- SheepdogAIOCB *acb = p;
int ret = 0;
unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
unsigned long idx;
uint32_t object_size;
uint64_t oid;
uint64_t offset;
- BDRVSheepdogState *s = acb->common.bs->opaque;
+ BDRVSheepdogState *s = acb->s;
SheepdogInode *inode = &s->inode;
AIOReq *aio_req;
ret = sd_create_branch(s);
if (ret) {
acb->ret = -EIO;
- goto out;
+ return;
}
}
old_oid,
acb->aiocb_type == AIOCB_DISCARD_OBJ ?
0 : done);
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
-
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
done:
idx++;
done += len;
}
-out:
- if (!--acb->nr_pending) {
- return acb->ret;
+ if (--acb->nr_pending) {
+ qemu_coroutine_yield();
}
- return 1;
}
-static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+static void sd_aio_complete(SheepdogAIOCB *acb)
{
- SheepdogAIOCB *cb;
-
- QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
- if (AIOCBOverlapping(aiocb, cb)) {
- return true;
- }
+ if (acb->aiocb_type == AIOCB_FLUSH_CACHE) {
+ return;
}
- QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
- return false;
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&acb->s->overlapping_queue);
}
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- SheepdogAIOCB *acb;
+ SheepdogAIOCB acb;
int ret;
int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
BDRVSheepdogState *s = bs->opaque;
}
}
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
- acb->aio_done_func = sd_write_done;
- acb->aiocb_type = AIOCB_WRITE_UDATA;
-
-retry:
- if (check_overlapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overlapping_queue);
- goto retry;
- }
+ sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_WRITE_UDATA);
+ sd_co_rw_vector(&acb);
+ sd_write_done(&acb);
+ sd_aio_complete(&acb);
- ret = sd_co_rw_vector(acb);
- if (ret <= 0) {
- QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overlapping_queue);
- qemu_aio_unref(acb);
- return ret;
- }
-
- qemu_coroutine_yield();
-
- QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overlapping_queue);
-
- return acb->ret;
+ return acb.ret;
}
static coroutine_fn int sd_co_readv(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
- SheepdogAIOCB *acb;
- int ret;
+ SheepdogAIOCB acb;
BDRVSheepdogState *s = bs->opaque;
- acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
- acb->aiocb_type = AIOCB_READ_UDATA;
- acb->aio_done_func = sd_finish_aiocb;
-
-retry:
- if (check_overlapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overlapping_queue);
- goto retry;
- }
-
- ret = sd_co_rw_vector(acb);
- if (ret <= 0) {
- QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overlapping_queue);
- qemu_aio_unref(acb);
- return ret;
- }
+ sd_aio_setup(&acb, s, qiov, sector_num, nb_sectors, AIOCB_READ_UDATA);
+ sd_co_rw_vector(&acb);
+ sd_aio_complete(&acb);
- qemu_coroutine_yield();
-
- QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overlapping_queue);
- return acb->ret;
+ return acb.ret;
}
static int coroutine_fn sd_co_flush_to_disk(BlockDriverState *bs)
{
BDRVSheepdogState *s = bs->opaque;
- SheepdogAIOCB *acb;
+ SheepdogAIOCB acb;
AIOReq *aio_req;
if (s->cache_flags != SD_FLAG_CMD_CACHE) {
return 0;
}
- acb = sd_aio_setup(bs, NULL, 0, 0);
- acb->aiocb_type = AIOCB_FLUSH_CACHE;
- acb->aio_done_func = sd_finish_aiocb;
+ sd_aio_setup(&acb, s, NULL, 0, 0, AIOCB_FLUSH_CACHE);
- aio_req = alloc_aio_req(s, acb, vid_to_vdi_oid(s->inode.vdi_id),
+ acb.nr_pending++;
+ aio_req = alloc_aio_req(s, &acb, vid_to_vdi_oid(s->inode.vdi_id),
0, 0, 0, false, 0, 0);
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, NULL, 0, acb->aiocb_type);
+ add_aio_request(s, aio_req, NULL, 0, acb.aiocb_type);
- qemu_coroutine_yield();
- return acb->ret;
+ if (--acb.nr_pending) {
+ qemu_coroutine_yield();
+ }
+
+ sd_aio_complete(&acb);
+ return acb.ret;
}
static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
BDRVSheepdogState *old_s;
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid = 0;
- int ret = 0;
+ int ret;
+
+ if (!sd_parse_snapid_or_tag(snapshot_id, &snapid, tag)) {
+ return -EINVAL;
+ }
old_s = g_new(BDRVSheepdogState, 1);
memcpy(old_s, s, sizeof(BDRVSheepdogState));
- snapid = strtoul(snapshot_id, NULL, 10);
- if (snapid) {
- tag[0] = 0;
- } else {
- pstrcpy(tag, sizeof(tag), snapshot_id);
- }
-
ret = reload_inode(s, snapid, tag);
if (ret) {
goto out;
#define NR_BATCHED_DISCARD 128
-static bool remove_objects(BDRVSheepdogState *s)
+static int remove_objects(BDRVSheepdogState *s, Error **errp)
{
int fd, i = 0, nr_objs = 0;
- Error *local_err = NULL;
- int ret = 0;
- bool result = true;
+ int ret;
SheepdogInode *inode = &s->inode;
- fd = connect_to_sdog(s, &local_err);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
- error_report_err(local_err);
- return false;
+ return fd;
}
nr_objs = count_data_objs(inode);
data_vdi_id[start_idx]),
false, s->cache_flags);
if (ret < 0) {
- error_report("failed to discard snapshot inode.");
- result = false;
+ error_setg(errp, "Failed to discard snapshot inode");
goto out;
}
}
+ ret = 0;
out:
closesocket(fd);
- return result;
+ return ret;
}
static int sd_snapshot_delete(BlockDriverState *bs,
const char *name,
Error **errp)
{
+ /*
+ * FIXME should delete the snapshot matching both @snapshot_id and
+ * @name, but @name not used here
+ */
unsigned long snap_id = 0;
char snap_tag[SD_MAX_VDI_TAG_LEN];
- Error *local_err = NULL;
int fd, ret;
char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
BDRVSheepdogState *s = bs->opaque;
};
SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
- if (!remove_objects(s)) {
- return -1;
+ ret = remove_objects(s, errp);
+ if (ret) {
+ return ret;
}
memset(buf, 0, sizeof(buf));
memset(snap_tag, 0, sizeof(snap_tag));
pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
+ /* TODO Use sd_parse_snapid() once this mess is cleaned up */
ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
if (ret || snap_id > UINT32_MAX) {
+ /*
+ * FIXME Since qemu_strtoul() returns -EINVAL when
+ * @snapshot_id is null, @snapshot_id is mandatory. Correct
+ * would be to require at least one of @snapshot_id and @name.
+ */
error_setg(errp, "Invalid snapshot ID: %s",
snapshot_id ? snapshot_id : "<null>");
return -EINVAL;
if (snap_id) {
hdr.snapid = (uint32_t) snap_id;
} else {
+ /* FIXME I suspect we should use @name here */
+ /* FIXME don't truncate silently */
pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
}
- ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
- &local_err);
+ ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true, errp);
if (ret) {
return ret;
}
- fd = connect_to_sdog(s, &local_err);
+ fd = connect_to_sdog(s, errp);
if (fd < 0) {
- error_report_err(local_err);
- return -1;
+ return fd;
}
ret = do_req(fd, s->bs, (SheepdogReq *)&hdr,
buf, &wlen, &rlen);
closesocket(fd);
if (ret) {
+ error_setg_errno(errp, -ret, "Couldn't send request to server");
return ret;
}
switch (rsp->result) {
case SD_RES_NO_VDI:
- error_report("%s was already deleted", s->name);
+ error_setg(errp, "Can't find the snapshot");
+ return -ENOENT;
case SD_RES_SUCCESS:
break;
default:
- error_report("%s, %s", sd_strerror(rsp->result), s->name);
- return -1;
+ error_setg(errp, "%s", sd_strerror(rsp->result));
+ return -EIO;
}
- return ret;
+ return 0;
}
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
int count)
{
- SheepdogAIOCB *acb;
+ SheepdogAIOCB acb;
BDRVSheepdogState *s = bs->opaque;
- int ret;
QEMUIOVector discard_iov;
struct iovec iov;
uint32_t zero = 0;
if (!QEMU_IS_ALIGNED(offset | count, BDRV_SECTOR_SIZE)) {
return -ENOTSUP;
}
- acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
- count >> BDRV_SECTOR_BITS);
- acb->aiocb_type = AIOCB_DISCARD_OBJ;
- acb->aio_done_func = sd_finish_aiocb;
-
-retry:
- if (check_overlapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overlapping_queue);
- goto retry;
- }
-
- ret = sd_co_rw_vector(acb);
- if (ret <= 0) {
- QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overlapping_queue);
- qemu_aio_unref(acb);
- return ret;
- }
-
- qemu_coroutine_yield();
-
- QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overlapping_queue);
+ sd_aio_setup(&acb, s, &discard_iov, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS, AIOCB_DISCARD_OBJ);
+ sd_co_rw_vector(&acb);
+ sd_aio_complete(&acb);
- return acb->ret;
+ return acb.ret;
}
static coroutine_fn int64_t
.format_name = "sheepdog",
.protocol_name = "sheepdog",
.instance_size = sizeof(BDRVSheepdogState),
- .bdrv_needs_filename = true,
+ .bdrv_parse_filename = sd_parse_filename,
.bdrv_file_open = sd_open,
.bdrv_reopen_prepare = sd_reopen_prepare,
.bdrv_reopen_commit = sd_reopen_commit,
.format_name = "sheepdog",
.protocol_name = "sheepdog+tcp",
.instance_size = sizeof(BDRVSheepdogState),
- .bdrv_needs_filename = true,
+ .bdrv_parse_filename = sd_parse_filename,
.bdrv_file_open = sd_open,
.bdrv_reopen_prepare = sd_reopen_prepare,
.bdrv_reopen_commit = sd_reopen_commit,
.format_name = "sheepdog",
.protocol_name = "sheepdog+unix",
.instance_size = sizeof(BDRVSheepdogState),
- .bdrv_needs_filename = true,
+ .bdrv_parse_filename = sd_parse_filename,
.bdrv_file_open = sd_open,
.bdrv_reopen_prepare = sd_reopen_prepare,
.bdrv_reopen_commit = sd_reopen_commit,