#include "qapi/error.h"
#include "trace.h"
#include "nbd-internal.h"
+#include "qemu/units.h"
#define NBD_META_ID_BASE_ALLOCATION 0
#define NBD_META_ID_DIRTY_BITMAP 1
-/* NBD_MAX_BITMAP_EXTENTS: 1 mb of extents data. An empirical
+/*
+ * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
* constant. If an increase is needed, note that the NBD protocol
* recommends no larger than 32 mb, so that the client won't consider
- * the reply as a denial of service attack. */
-#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8)
+ * the reply as a denial of service attack.
+ */
+#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
static int system_errno_to_nbd_errno(int err)
{
BlockBackend *blk;
char *name;
char *description;
- off_t dev_offset;
- off_t size;
+ uint64_t dev_offset;
+ uint64_t size;
uint16_t nbdflags;
QTAILQ_HEAD(, NBDClient) clients;
QTAILQ_ENTRY(NBDExport) next;
NBDExport *exp;
QCryptoTLSCreds *tlscreds;
- char *tlsaclname;
+ char *tlsauthz;
QIOChannelSocket *sioc; /* The underlying data channel */
QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
int nb_requests;
bool closing;
+ uint32_t check_align; /* If non-zero, check for aligned client requests */
+
bool structured_reply;
NBDExportMetaContexts export_meta;
error_setg(errp, "Bad length received");
return -EINVAL;
}
- if (nbd_read(client->ioc, name, client->optlen, errp) < 0) {
- error_prepend(errp, "read failed: ");
+ if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
return -EIO;
}
name[client->optlen] = '\0';
bool blocksize = false;
uint32_t sizes[3];
char buf[sizeof(uint64_t) + sizeof(uint16_t)];
+ uint32_t check_align = 0;
/* Client sends:
4 bytes: L, name length (can be 0)
/* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
* according to whether the client requested it, and according to
* whether this is OPT_INFO or OPT_GO. */
- /* minimum - 1 for back-compat, or 512 if client is new enough.
- * TODO: consult blk_bs(blk)->bl.request_alignment? */
- sizes[0] =
- (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
+ /* minimum - 1 for back-compat, or actual if client will obey it. */
+ if (client->opt == NBD_OPT_INFO || blocksize) {
+ check_align = sizes[0] = blk_get_request_alignment(exp->blk);
+ } else {
+ sizes[0] = 1;
+ }
+ assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
/* preferred - Hard-code to 4096 for now.
* TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
- sizes[1] = 4096;
+ sizes[1] = MAX(4096, sizes[0]);
/* maximum - At most 32M, but smaller as appropriate. */
sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
return rc;
}
- /* If the client is just asking for NBD_OPT_INFO, but forgot to
- * request block sizes, return an error.
- * TODO: consult blk_bs(blk)->request_align, and only error if it
- * is not 1? */
- if (client->opt == NBD_OPT_INFO && !blocksize) {
+ /*
+ * If the client is just asking for NBD_OPT_INFO, but forgot to
+ * request block sizes in a situation that would impact
+ * performance, then return an error. But for NBD_OPT_GO, we
+ * tolerate all clients, regardless of alignments.
+ */
+ if (client->opt == NBD_OPT_INFO && !blocksize &&
+ blk_get_request_alignment(exp->blk) > 1) {
return nbd_negotiate_send_rep_err(client,
NBD_REP_ERR_BLOCK_SIZE_REQD,
errp,
if (client->opt == NBD_OPT_GO) {
client->exp = exp;
+ client->check_align = check_align;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
nbd_check_meta_export(client);
tioc = qio_channel_tls_new_server(ioc,
client->tlscreds,
- client->tlsaclname,
+ client->tlsauthz,
errp);
if (!tioc) {
return NULL;
... Rest of request
*/
- if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) {
- error_prepend(errp, "read failed: ");
+ if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
return -EIO;
}
- flags = be32_to_cpu(flags);
trace_nbd_negotiate_options_flags(flags);
if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
fixedNewstyle = true;
uint32_t option, length;
uint64_t magic;
- if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) {
- error_prepend(errp, "read failed: ");
+ if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
return -EINVAL;
}
- magic = be64_to_cpu(magic);
trace_nbd_negotiate_options_check_magic(magic);
if (magic != NBD_OPTS_MAGIC) {
error_setg(errp, "Bad magic received");
return -EINVAL;
}
- if (nbd_read(client->ioc, &option,
- sizeof(option), errp) < 0) {
- error_prepend(errp, "read failed: ");
+ if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
return -EINVAL;
}
- option = be32_to_cpu(option);
client->opt = option;
- if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) {
- error_prepend(errp, "read failed: ");
+ if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
return -EINVAL;
}
- length = be32_to_cpu(length);
assert(!client->optlen);
client->optlen = length;
uint32_t magic;
int ret;
- ret = nbd_read(ioc, buf, sizeof(buf), errp);
+ ret = nbd_read(ioc, buf, sizeof(buf), "request", errp);
if (ret < 0) {
return ret;
}
if (client->tlscreds) {
object_unref(OBJECT(client->tlscreds));
}
- g_free(client->tlsaclname);
+ g_free(client->tlsauthz);
if (client->exp) {
QTAILQ_REMOVE(&client->exp->clients, client, next);
nbd_export_put(client->exp);
nbd_export_close(exp);
}
-NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
- const char *name, const char *description,
+NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
+ uint64_t size, const char *name, const char *desc,
const char *bitmap, uint16_t nbdflags,
void (*close)(NBDExport *), bool writethrough,
BlockBackend *on_eject_blk, Error **errp)
if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
perm |= BLK_PERM_WRITE;
}
- blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+ blk = blk_new(bdrv_get_aio_context(bs), perm,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto fail;
}
blk_set_enable_write_cache(blk, !writethrough);
+ blk_set_allow_aio_context_change(blk, true);
exp->refcount = 1;
QTAILQ_INIT(&exp->clients);
exp->blk = blk;
+ assert(dev_offset <= INT64_MAX);
exp->dev_offset = dev_offset;
exp->name = g_strdup(name);
- exp->description = g_strdup(description);
+ exp->description = g_strdup(desc);
exp->nbdflags = nbdflags;
- exp->size = size < 0 ? blk_getlength(blk) : size;
- if (exp->size < 0) {
- error_setg_errno(errp, -exp->size,
- "Failed to determine the NBD export's length");
- goto fail;
- }
- exp->size -= exp->size % BDRV_SECTOR_SIZE;
+ assert(size <= INT64_MAX - dev_offset);
+ exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
if (bitmap) {
BdrvDirtyBitmap *bm = NULL;
- BlockDriverState *bs = blk_bs(blk);
while (true) {
bm = bdrv_find_dirty_bitmap(bs, bitmap);
goto fail;
}
+ if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
+ goto fail;
+ }
+
if ((nbdflags & NBD_FLAG_READ_ONLY) && bdrv_is_writable(bs) &&
bdrv_dirty_bitmap_enabled(bm)) {
error_setg(errp,
goto fail;
}
- if (bdrv_dirty_bitmap_user_locked(bm)) {
- error_setg(errp, "Bitmap '%s' is in use", bitmap);
- goto fail;
- }
-
- bdrv_dirty_bitmap_set_qmp_locked(bm, true);
+ bdrv_dirty_bitmap_set_busy(bm, true);
exp->export_bitmap = bm;
exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
bitmap);
}
if (exp->export_bitmap) {
- bdrv_dirty_bitmap_set_qmp_locked(exp->export_bitmap, false);
+ bdrv_dirty_bitmap_set_busy(exp->export_bitmap, false);
g_free(exp->export_bitmap_context);
}
flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
(ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
- offset += num;
- remaining_bytes -= num;
if (first_extent) {
extent->flags = flags;
extent->length = num;
first_extent = false;
- continue;
- }
-
- if (flags == extent->flags) {
+ } else if (flags == extent->flags) {
/* extend current extent */
extent->length += num;
} else {
extent->flags = flags;
extent->length = num;
}
+ offset += num;
+ remaining_bytes -= num;
}
extents_end = extent + 1;
Error **errp)
{
int ret;
- unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+ unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
NBDExtent *extents = g_new(NBDExtent, nb_extents);
uint64_t final_length = length;
uint32_t context_id, Error **errp)
{
int ret;
- unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+ unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
NBDExtent *extents = g_new(NBDExtent, nb_extents);
uint64_t final_length = length;
}
}
if (request->type == NBD_CMD_WRITE) {
- if (nbd_read(client->ioc, req->data, request->len, errp) < 0) {
- error_prepend(errp, "reading from socket failed: ");
+ if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
+ errp) < 0)
+ {
return -EIO;
}
req->complete = true;
return -EROFS;
}
if (request->from > client->exp->size ||
- request->from + request->len > client->exp->size) {
+ request->len > client->exp->size - request->from) {
error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
", Size: %" PRIu64, request->from, request->len,
- (uint64_t)client->exp->size);
+ client->exp->size);
return (request->type == NBD_CMD_WRITE ||
request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
}
+ if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
+ client->check_align)) {
+ /*
+ * The block layer gracefully handles unaligned requests, but
+ * it's still worth tracing client non-compliance
+ */
+ trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
+ request->from,
+ request->len,
+ client->check_align);
+ }
valid_flags = NBD_CMD_FLAG_FUA;
if (request->type == NBD_CMD_READ && client->structured_reply) {
valid_flags |= NBD_CMD_FLAG_DF;
*/
void nbd_client_new(QIOChannelSocket *sioc,
QCryptoTLSCreds *tlscreds,
- const char *tlsaclname,
+ const char *tlsauthz,
void (*close_fn)(NBDClient *, bool))
{
NBDClient *client;
if (tlscreds) {
object_ref(OBJECT(client->tlscreds));
}
- client->tlsaclname = g_strdup(tlsaclname);
+ client->tlsauthz = g_strdup(tlsauthz);
client->sioc = sioc;
object_ref(OBJECT(client->sioc));
client->ioc = QIO_CHANNEL(sioc);