#include "qapi/error.h"
#include "trace.h"
#include "nbd-internal.h"
+#include "qemu/units.h"
#define NBD_META_ID_BASE_ALLOCATION 0
#define NBD_META_ID_DIRTY_BITMAP 1
-/* NBD_MAX_BITMAP_EXTENTS: 1 mb of extents data. An empirical
+/*
+ * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
* constant. If an increase is needed, note that the NBD protocol
* recommends no larger than 32 mb, so that the client won't consider
- * the reply as a denial of service attack. */
-#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8)
+ * the reply as a denial of service attack.
+ */
+#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
static int system_errno_to_nbd_errno(int err)
{
int nb_requests;
bool closing;
+ uint32_t check_align; /* If non-zero, check for aligned client requests */
+
bool structured_reply;
NBDExportMetaContexts export_meta;
bool blocksize = false;
uint32_t sizes[3];
char buf[sizeof(uint64_t) + sizeof(uint16_t)];
+ uint32_t check_align = 0;
/* Client sends:
4 bytes: L, name length (can be 0)
/* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
* according to whether the client requested it, and according to
* whether this is OPT_INFO or OPT_GO. */
- /* minimum - 1 for back-compat, or 512 if client is new enough.
- * TODO: consult blk_bs(blk)->bl.request_alignment? */
- sizes[0] =
- (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
+ /* minimum - 1 for back-compat, or actual if client will obey it. */
+ if (client->opt == NBD_OPT_INFO || blocksize) {
+ check_align = sizes[0] = blk_get_request_alignment(exp->blk);
+ } else {
+ sizes[0] = 1;
+ }
+ assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
/* preferred - Hard-code to 4096 for now.
* TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
- sizes[1] = 4096;
+ sizes[1] = MAX(4096, sizes[0]);
/* maximum - At most 32M, but smaller as appropriate. */
sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
return rc;
}
- /* If the client is just asking for NBD_OPT_INFO, but forgot to
- * request block sizes, return an error.
- * TODO: consult blk_bs(blk)->request_align, and only error if it
- * is not 1? */
- if (client->opt == NBD_OPT_INFO && !blocksize) {
+ /*
+ * If the client is just asking for NBD_OPT_INFO, but forgot to
+ * request block sizes in a situation that would impact
+ * performance, then return an error. But for NBD_OPT_GO, we
+ * tolerate all clients, regardless of alignments.
+ */
+ if (client->opt == NBD_OPT_INFO && !blocksize &&
+ blk_get_request_alignment(exp->blk) > 1) {
return nbd_negotiate_send_rep_err(client,
NBD_REP_ERR_BLOCK_SIZE_REQD,
errp,
if (client->opt == NBD_OPT_GO) {
client->exp = exp;
+ client->check_align = check_align;
QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
nbd_export_get(client->exp);
nbd_check_meta_export(client);
if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
perm |= BLK_PERM_WRITE;
}
- blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+ blk = blk_new(bdrv_get_aio_context(bs), perm,
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
ret = blk_insert_bs(blk, bs, errp);
if (ret < 0) {
goto fail;
}
blk_set_enable_write_cache(blk, !writethrough);
+ blk_set_allow_aio_context_change(blk, true);
exp->refcount = 1;
QTAILQ_INIT(&exp->clients);
goto fail;
}
- if (bdrv_dirty_bitmap_busy(bm)) {
- error_setg(errp, "Bitmap '%s' is in use", bitmap);
+ if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
goto fail;
}
flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
(ret & BDRV_BLOCK_ZERO ? NBD_STATE_ZERO : 0);
- offset += num;
- remaining_bytes -= num;
if (first_extent) {
extent->flags = flags;
extent->length = num;
first_extent = false;
- continue;
- }
-
- if (flags == extent->flags) {
+ } else if (flags == extent->flags) {
/* extend current extent */
extent->length += num;
} else {
extent->flags = flags;
extent->length = num;
}
+ offset += num;
+ remaining_bytes -= num;
}
extents_end = extent + 1;
Error **errp)
{
int ret;
- unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+ unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
NBDExtent *extents = g_new(NBDExtent, nb_extents);
uint64_t final_length = length;
uint32_t context_id, Error **errp)
{
int ret;
- unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+ unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
NBDExtent *extents = g_new(NBDExtent, nb_extents);
uint64_t final_length = length;
return (request->type == NBD_CMD_WRITE ||
request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
}
+ if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
+ client->check_align)) {
+ /*
+ * The block layer gracefully handles unaligned requests, but
+ * it's still worth tracing client non-compliance
+ */
+ trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
+ request->from,
+ request->len,
+ client->check_align);
+ }
valid_flags = NBD_CMD_FLAG_FUA;
if (request->type == NBD_CMD_READ && client->structured_reply) {
valid_flags |= NBD_CMD_FLAG_DF;