]> git.proxmox.com Git - mirror_qemu.git/blobdiff - nbd/server.c
i386: Update new x86_apicid parsing rules with die_offset support
[mirror_qemu.git] / nbd / server.c
index 51ee8094e0258e8b8ef5dc6dc4fcf853b65c9dfa..10faedcfc55de9eba89fa0c123fe689b6f600b98 100644 (file)
 #include "qapi/error.h"
 #include "trace.h"
 #include "nbd-internal.h"
+#include "qemu/units.h"
 
 #define NBD_META_ID_BASE_ALLOCATION 0
 #define NBD_META_ID_DIRTY_BITMAP 1
 
-/* NBD_MAX_BITMAP_EXTENTS: 1 mb of extents data. An empirical
+/*
+ * NBD_MAX_BLOCK_STATUS_EXTENTS: 1 MiB of extents data. An empirical
  * constant. If an increase is needed, note that the NBD protocol
  * recommends no larger than 32 mb, so that the client won't consider
- * the reply as a denial of service attack. */
-#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8)
+ * the reply as a denial of service attack.
+ */
+#define NBD_MAX_BLOCK_STATUS_EXTENTS (1 * MiB / 8)
 
 static int system_errno_to_nbd_errno(int err)
 {
@@ -77,8 +80,8 @@ struct NBDExport {
     BlockBackend *blk;
     char *name;
     char *description;
-    off_t dev_offset;
-    off_t size;
+    uint64_t dev_offset;
+    uint64_t size;
     uint16_t nbdflags;
     QTAILQ_HEAD(, NBDClient) clients;
     QTAILQ_ENTRY(NBDExport) next;
@@ -111,7 +114,7 @@ struct NBDClient {
 
     NBDExport *exp;
     QCryptoTLSCreds *tlscreds;
-    char *tlsaclname;
+    char *tlsauthz;
     QIOChannelSocket *sioc; /* The underlying data channel */
     QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
 
@@ -124,6 +127,8 @@ struct NBDClient {
     int nb_requests;
     bool closing;
 
+    uint32_t check_align; /* If non-zero, check for aligned client requests */
+
     bool structured_reply;
     NBDExportMetaContexts export_meta;
 
@@ -438,8 +443,7 @@ static int nbd_negotiate_handle_export_name(NBDClient *client,
         error_setg(errp, "Bad length received");
         return -EINVAL;
     }
-    if (nbd_read(client->ioc, name, client->optlen, errp) < 0) {
-        error_prepend(errp, "read failed: ");
+    if (nbd_read(client->ioc, name, client->optlen, "export name", errp) < 0) {
         return -EIO;
     }
     name[client->optlen] = '\0';
@@ -534,6 +538,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
     bool blocksize = false;
     uint32_t sizes[3];
     char buf[sizeof(uint64_t) + sizeof(uint16_t)];
+    uint32_t check_align = 0;
 
     /* Client sends:
         4 bytes: L, name length (can be 0)
@@ -608,13 +613,16 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
     /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
      * according to whether the client requested it, and according to
      * whether this is OPT_INFO or OPT_GO. */
-    /* minimum - 1 for back-compat, or 512 if client is new enough.
-     * TODO: consult blk_bs(blk)->bl.request_alignment? */
-    sizes[0] =
-            (client->opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
+    /* minimum - 1 for back-compat, or actual if client will obey it. */
+    if (client->opt == NBD_OPT_INFO || blocksize) {
+        check_align = sizes[0] = blk_get_request_alignment(exp->blk);
+    } else {
+        sizes[0] = 1;
+    }
+    assert(sizes[0] <= NBD_MAX_BUFFER_SIZE);
     /* preferred - Hard-code to 4096 for now.
      * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
-    sizes[1] = 4096;
+    sizes[1] = MAX(4096, sizes[0]);
     /* maximum - At most 32M, but smaller as appropriate. */
     sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
     trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
@@ -638,11 +646,14 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
         return rc;
     }
 
-    /* If the client is just asking for NBD_OPT_INFO, but forgot to
-     * request block sizes, return an error.
-     * TODO: consult blk_bs(blk)->request_align, and only error if it
-     * is not 1? */
-    if (client->opt == NBD_OPT_INFO && !blocksize) {
+    /*
+     * If the client is just asking for NBD_OPT_INFO, but forgot to
+     * request block sizes in a situation that would impact
+     * performance, then return an error. But for NBD_OPT_GO, we
+     * tolerate all clients, regardless of alignments.
+     */
+    if (client->opt == NBD_OPT_INFO && !blocksize &&
+        blk_get_request_alignment(exp->blk) > 1) {
         return nbd_negotiate_send_rep_err(client,
                                           NBD_REP_ERR_BLOCK_SIZE_REQD,
                                           errp,
@@ -658,6 +669,7 @@ static int nbd_negotiate_handle_info(NBDClient *client, uint16_t myflags,
 
     if (client->opt == NBD_OPT_GO) {
         client->exp = exp;
+        client->check_align = check_align;
         QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
         nbd_export_get(client->exp);
         nbd_check_meta_export(client);
@@ -687,7 +699,7 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
 
     tioc = qio_channel_tls_new_server(ioc,
                                       client->tlscreds,
-                                      client->tlsaclname,
+                                      client->tlsauthz,
                                       errp);
     if (!tioc) {
         return NULL;
@@ -1046,11 +1058,9 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
         ...           Rest of request
     */
 
-    if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) {
-        error_prepend(errp, "read failed: ");
+    if (nbd_read32(client->ioc, &flags, "flags", errp) < 0) {
         return -EIO;
     }
-    flags = be32_to_cpu(flags);
     trace_nbd_negotiate_options_flags(flags);
     if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
         fixedNewstyle = true;
@@ -1070,30 +1080,23 @@ static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
         uint32_t option, length;
         uint64_t magic;
 
-        if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) {
-            error_prepend(errp, "read failed: ");
+        if (nbd_read64(client->ioc, &magic, "opts magic", errp) < 0) {
             return -EINVAL;
         }
-        magic = be64_to_cpu(magic);
         trace_nbd_negotiate_options_check_magic(magic);
         if (magic != NBD_OPTS_MAGIC) {
             error_setg(errp, "Bad magic received");
             return -EINVAL;
         }
 
-        if (nbd_read(client->ioc, &option,
-                     sizeof(option), errp) < 0) {
-            error_prepend(errp, "read failed: ");
+        if (nbd_read32(client->ioc, &option, "option", errp) < 0) {
             return -EINVAL;
         }
-        option = be32_to_cpu(option);
         client->opt = option;
 
-        if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) {
-            error_prepend(errp, "read failed: ");
+        if (nbd_read32(client->ioc, &length, "option length", errp) < 0) {
             return -EINVAL;
         }
-        length = be32_to_cpu(length);
         assert(!client->optlen);
         client->optlen = length;
 
@@ -1306,7 +1309,7 @@ static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
     uint32_t magic;
     int ret;
 
-    ret = nbd_read(ioc, buf, sizeof(buf), errp);
+    ret = nbd_read(ioc, buf, sizeof(buf), "request", errp);
     if (ret < 0) {
         return ret;
     }
@@ -1358,7 +1361,7 @@ void nbd_client_put(NBDClient *client)
         if (client->tlscreds) {
             object_unref(OBJECT(client->tlscreds));
         }
-        g_free(client->tlsaclname);
+        g_free(client->tlsauthz);
         if (client->exp) {
             QTAILQ_REMOVE(&client->exp->clients, client, next);
             nbd_export_put(client->exp);
@@ -1455,8 +1458,8 @@ static void nbd_eject_notifier(Notifier *n, void *data)
     nbd_export_close(exp);
 }
 
-NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
-                          const char *name, const char *description,
+NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
+                          uint64_t size, const char *name, const char *desc,
                           const char *bitmap, uint16_t nbdflags,
                           void (*close)(NBDExport *), bool writethrough,
                           BlockBackend *on_eject_blk, Error **errp)
@@ -1484,28 +1487,29 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
     if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
         perm |= BLK_PERM_WRITE;
     }
-    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
-                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
+    blk = blk_new(bdrv_get_aio_context(bs), perm,
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
+                  BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
     ret = blk_insert_bs(blk, bs, errp);
     if (ret < 0) {
         goto fail;
     }
     blk_set_enable_write_cache(blk, !writethrough);
+    blk_set_allow_aio_context_change(blk, true);
 
     exp->refcount = 1;
     QTAILQ_INIT(&exp->clients);
     exp->blk = blk;
-    assert(dev_offset >= 0 && dev_offset <= INT64_MAX);
+    assert(dev_offset <= INT64_MAX);
     exp->dev_offset = dev_offset;
     exp->name = g_strdup(name);
-    exp->description = g_strdup(description);
+    exp->description = g_strdup(desc);
     exp->nbdflags = nbdflags;
-    assert(size >= 0 && size <= INT64_MAX - dev_offset);
+    assert(size <= INT64_MAX - dev_offset);
     exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
 
     if (bitmap) {
         BdrvDirtyBitmap *bm = NULL;
-        BlockDriverState *bs = blk_bs(blk);
 
         while (true) {
             bm = bdrv_find_dirty_bitmap(bs, bitmap);
@@ -1521,6 +1525,10 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
             goto fail;
         }
 
+        if (bdrv_dirty_bitmap_check(bm, BDRV_BITMAP_ALLOW_RO, errp)) {
+            goto fail;
+        }
+
         if ((nbdflags & NBD_FLAG_READ_ONLY) && bdrv_is_writable(bs) &&
             bdrv_dirty_bitmap_enabled(bm)) {
             error_setg(errp,
@@ -1529,12 +1537,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
             goto fail;
         }
 
-        if (bdrv_dirty_bitmap_user_locked(bm)) {
-            error_setg(errp, "Bitmap '%s' is in use", bitmap);
-            goto fail;
-        }
-
-        bdrv_dirty_bitmap_set_qmp_locked(bm, true);
+        bdrv_dirty_bitmap_set_busy(bm, true);
         exp->export_bitmap = bm;
         exp->export_bitmap_context = g_strdup_printf("qemu:dirty-bitmap:%s",
                                                      bitmap);
@@ -1652,7 +1655,7 @@ void nbd_export_put(NBDExport *exp)
         }
 
         if (exp->export_bitmap) {
-            bdrv_dirty_bitmap_set_qmp_locked(exp->export_bitmap, false);
+            bdrv_dirty_bitmap_set_busy(exp->export_bitmap, false);
             g_free(exp->export_bitmap_context);
         }
 
@@ -1889,17 +1892,12 @@ static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
 
         flags = (ret & BDRV_BLOCK_ALLOCATED ? 0 : NBD_STATE_HOLE) |
                 (ret & BDRV_BLOCK_ZERO      ? NBD_STATE_ZERO : 0);
-        offset += num;
-        remaining_bytes -= num;
 
         if (first_extent) {
             extent->flags = flags;
             extent->length = num;
             first_extent = false;
-            continue;
-        }
-
-        if (flags == extent->flags) {
+        } else if (flags == extent->flags) {
             /* extend current extent */
             extent->length += num;
         } else {
@@ -1912,6 +1910,8 @@ static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
             extent->flags = flags;
             extent->length = num;
         }
+        offset += num;
+        remaining_bytes -= num;
     }
 
     extents_end = extent + 1;
@@ -1963,7 +1963,7 @@ static int nbd_co_send_block_status(NBDClient *client, uint64_t handle,
                                     Error **errp)
 {
     int ret;
-    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
     NBDExtent *extents = g_new(NBDExtent, nb_extents);
     uint64_t final_length = length;
 
@@ -2048,7 +2048,7 @@ static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
                               uint32_t context_id, Error **errp)
 {
     int ret;
-    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BITMAP_EXTENTS;
+    unsigned int nb_extents = dont_fragment ? 1 : NBD_MAX_BLOCK_STATUS_EXTENTS;
     NBDExtent *extents = g_new(NBDExtent, nb_extents);
     uint64_t final_length = length;
 
@@ -2111,8 +2111,9 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
         }
     }
     if (request->type == NBD_CMD_WRITE) {
-        if (nbd_read(client->ioc, req->data, request->len, errp) < 0) {
-            error_prepend(errp, "reading from socket failed: ");
+        if (nbd_read(client->ioc, req->data, request->len, "CMD_WRITE data",
+                     errp) < 0)
+        {
             return -EIO;
         }
         req->complete = true;
@@ -2130,13 +2131,24 @@ static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
         return -EROFS;
     }
     if (request->from > client->exp->size ||
-        request->from + request->len > client->exp->size) {
+        request->len > client->exp->size - request->from) {
         error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
                    ", Size: %" PRIu64, request->from, request->len,
-                   (uint64_t)client->exp->size);
+                   client->exp->size);
         return (request->type == NBD_CMD_WRITE ||
                 request->type == NBD_CMD_WRITE_ZEROES) ? -ENOSPC : -EINVAL;
     }
+    if (client->check_align && !QEMU_IS_ALIGNED(request->from | request->len,
+                                                client->check_align)) {
+        /*
+         * The block layer gracefully handles unaligned requests, but
+         * it's still worth tracing client non-compliance
+         */
+        trace_nbd_co_receive_align_compliance(nbd_cmd_lookup(request->type),
+                                              request->from,
+                                              request->len,
+                                              client->check_align);
+    }
     valid_flags = NBD_CMD_FLAG_FUA;
     if (request->type == NBD_CMD_READ && client->structured_reply) {
         valid_flags |= NBD_CMD_FLAG_DF;
@@ -2435,7 +2447,7 @@ static coroutine_fn void nbd_co_client_start(void *opaque)
  */
 void nbd_client_new(QIOChannelSocket *sioc,
                     QCryptoTLSCreds *tlscreds,
-                    const char *tlsaclname,
+                    const char *tlsauthz,
                     void (*close_fn)(NBDClient *, bool))
 {
     NBDClient *client;
@@ -2447,7 +2459,7 @@ void nbd_client_new(QIOChannelSocket *sioc,
     if (tlscreds) {
         object_ref(OBJECT(client->tlscreds));
     }
-    client->tlsaclname = g_strdup(tlsaclname);
+    client->tlsauthz = g_strdup(tlsauthz);
     client->sioc = sioc;
     object_ref(OBJECT(client->sioc));
     client->ioc = QIO_CHANNEL(sioc);