]> git.proxmox.com Git - mirror_qemu.git/commitdiff
Merge remote-tracking branch 'remotes/vsementsov/tags/pull-nbd-2022-02-09-v2' into...
authorPeter Maydell <peter.maydell@linaro.org>
Sat, 12 Feb 2022 22:04:07 +0000 (22:04 +0000)
committerPeter Maydell <peter.maydell@linaro.org>
Sat, 12 Feb 2022 22:04:07 +0000 (22:04 +0000)
nbd: handle AioContext change correctly

v2: add my s-o-b marks to each commit

# gpg: Signature made Fri 11 Feb 2022 13:14:55 GMT
# gpg:                using RSA key 8B9C26CDB2FD147C880E86A1561F24C1F19F79FB
# gpg: Good signature from "Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>" [unknown]
# gpg: WARNING: This key is not certified with a trusted signature!
# gpg:          There is no indication that the signature belongs to the owner.
# Primary key fingerprint: 8B9C 26CD B2FD 147C 880E  86A1 561F 24C1 F19F 79FB

* remotes/vsementsov/tags/pull-nbd-2022-02-09-v2:
  iotests/281: Let NBD connection yield in iothread
  block/nbd: Move s->ioc on AioContext change
  iotests/281: Test lingering timers
  iotests.py: Add QemuStorageDaemon class
  block/nbd: Assert there are no timers when closed
  block/nbd: Delete open timer when done
  block/nbd: Delete reconnect delay timer when done

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
block/nbd.c
tests/qemu-iotests/281
tests/qemu-iotests/281.out
tests/qemu-iotests/iotests.py

index 63dbfa807d023000894c78cac3e93703c4d684d3..5853d85d60840c63d42b19559ed3c3ee86f09120 100644 (file)
@@ -110,6 +110,10 @@ static void nbd_clear_bdrvstate(BlockDriverState *bs)
 
     yank_unregister_instance(BLOCKDEV_YANK_INSTANCE(bs->node_name));
 
+    /* Must not leave timers behind that would access freed data */
+    assert(!s->reconnect_delay_timer);
+    assert(!s->open_timer);
+
     object_unref(OBJECT(s->tlscreds));
     qapi_free_SocketAddress(s->saddr);
     s->saddr = NULL;
@@ -381,6 +385,13 @@ static coroutine_fn void nbd_reconnect_attempt(BDRVNBDState *s)
     }
 
     nbd_co_do_establish_connection(s->bs, NULL);
+
+    /*
+     * The reconnect attempt is done (maybe successfully, maybe not), so
+     * we no longer need this timer.  Delete it so it will not outlive
+     * this I/O request (so draining removes all timers).
+     */
+    reconnect_delay_timer_del(s);
 }
 
 static coroutine_fn int nbd_receive_replies(BDRVNBDState *s, uint64_t handle)
@@ -1878,11 +1889,19 @@ static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
         goto fail;
     }
 
+    /*
+     * The connect attempt is done, so we no longer need this timer.
+     * Delete it, because we do not want it to be around when this node
+     * is drained or closed.
+     */
+    open_timer_del(s);
+
     nbd_client_connection_enable_retry(s->conn);
 
     return 0;
 
 fail:
+    open_timer_del(s);
     nbd_clear_bdrvstate(bs);
     return ret;
 }
@@ -2036,6 +2055,42 @@ static void nbd_cancel_in_flight(BlockDriverState *bs)
     nbd_co_establish_connection_cancel(s->conn);
 }
 
+static void nbd_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    /* The open_timer is used only during nbd_open() */
+    assert(!s->open_timer);
+
+    /*
+     * The reconnect_delay_timer is scheduled in I/O paths when the
+     * connection is lost, to cancel the reconnection attempt after a
+     * given time.  Once this attempt is done (successfully or not),
+     * nbd_reconnect_attempt() ensures the timer is deleted before the
+     * respective I/O request is resumed.
+     * Since the AioContext can only be changed when a node is drained,
+     * the reconnect_delay_timer cannot be active here.
+     */
+    assert(!s->reconnect_delay_timer);
+
+    if (s->ioc) {
+        qio_channel_attach_aio_context(s->ioc, new_context);
+    }
+}
+
+static void nbd_detach_aio_context(BlockDriverState *bs)
+{
+    BDRVNBDState *s = bs->opaque;
+
+    assert(!s->open_timer);
+    assert(!s->reconnect_delay_timer);
+
+    if (s->ioc) {
+        qio_channel_detach_aio_context(s->ioc);
+    }
+}
+
 static BlockDriver bdrv_nbd = {
     .format_name                = "nbd",
     .protocol_name              = "nbd",
@@ -2059,6 +2114,9 @@ static BlockDriver bdrv_nbd = {
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
     .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
+
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_tcp = {
@@ -2084,6 +2142,9 @@ static BlockDriver bdrv_nbd_tcp = {
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
     .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
+
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
 };
 
 static BlockDriver bdrv_nbd_unix = {
@@ -2109,6 +2170,9 @@ static BlockDriver bdrv_nbd_unix = {
     .bdrv_dirname               = nbd_dirname,
     .strong_runtime_opts        = nbd_strong_runtime_opts,
     .bdrv_cancel_in_flight      = nbd_cancel_in_flight,
+
+    .bdrv_attach_aio_context    = nbd_attach_aio_context,
+    .bdrv_detach_aio_context    = nbd_detach_aio_context,
 };
 
 static void bdrv_nbd_init(void)
index 318e33393919f2b89295700bbf0568c29a11c58e..5e1339bd756c32a458c3977a9f8c02ccc2c351b7 100755 (executable)
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# group: rw quick
+# group: rw
 #
 # Test cases for blockdev + IOThread interactions
 #
@@ -20,8 +20,9 @@
 #
 
 import os
+import time
 import iotests
-from iotests import qemu_img
+from iotests import qemu_img, QemuStorageDaemon
 
 image_len = 64 * 1024 * 1024
 
@@ -243,6 +244,102 @@ class TestBlockdevBackupAbort(iotests.QMPTestCase):
         # Hangs on failure, we expect this error.
         self.assert_qmp(result, 'error/class', 'GenericError')
 
+# Test for RHBZ#2033626
+class TestYieldingAndTimers(iotests.QMPTestCase):
+    sock = os.path.join(iotests.sock_dir, 'nbd.sock')
+    qsd = None
+
+    def setUp(self):
+        self.create_nbd_export()
+
+        # Simple VM with an NBD block device connected to the NBD export
+        # provided by the QSD, and an (initially unused) iothread
+        self.vm = iotests.VM()
+        self.vm.add_object('iothread,id=iothr')
+        self.vm.add_blockdev('nbd,node-name=nbd,server.type=unix,' +
+                             f'server.path={self.sock},export=exp,' +
+                             'reconnect-delay=1,open-timeout=1')
+
+        self.vm.launch()
+
+    def tearDown(self):
+        self.stop_nbd_export()
+        self.vm.shutdown()
+
+    def test_timers_with_blockdev_del(self):
+        # The NBD BDS will have had an active open timer, because setUp() gave
+        # a positive value for @open-timeout.  It should be gone once the BDS
+        # has been opened.
+        # (But there used to be a bug where it remained active, which will
+        # become important below.)
+
+        # Stop and restart the NBD server, and do some I/O on the client to
+        # trigger a reconnect and start the reconnect delay timer
+        self.stop_nbd_export()
+        self.create_nbd_export()
+
+        result = self.vm.qmp('human-monitor-command',
+                             command_line='qemu-io nbd "write 0 512"')
+        self.assert_qmp(result, 'return', '')
+
+        # Reconnect is done, so the reconnect delay timer should be gone.
+        # (This is similar to how the open timer should be gone after open,
+        # and similarly there used to be a bug where it was not gone.)
+
+        # Delete the BDS to see whether both timers are gone.  If they are not,
+        # they will remain active, fire later, and then access freed data.
+        # (Or, with "block/nbd: Assert there are no timers when closed"
+        # applied, the assertions added in that patch will fail.)
+        result = self.vm.qmp('blockdev-del', node_name='nbd')
+        self.assert_qmp(result, 'return', {})
+
+        # Give the timers some time to fire (both have a timeout of 1 s).
+        # (Sleeping in an iotest may ring some alarm bells, but note that if
+        # the timing is off here, the test will just always pass.  If we kill
+        # the VM too early, then we just kill the timers before they can fire,
+        # thus not see the error, and so the test will pass.)
+        time.sleep(2)
+
+    def test_yield_in_iothread(self):
+        # Move the NBD node to the I/O thread; the NBD block driver should
+        # attach the connection's QIOChannel to that thread's AioContext, too
+        result = self.vm.qmp('x-blockdev-set-iothread',
+                             node_name='nbd', iothread='iothr')
+        self.assert_qmp(result, 'return', {})
+
+        # Do some I/O that will be throttled by the QSD, so that the network
+        # connection hopefully will yield here.  When it is resumed, it must
+        # then be resumed in the I/O thread's AioContext.
+        result = self.vm.qmp('human-monitor-command',
+                             command_line='qemu-io nbd "read 0 128K"')
+        self.assert_qmp(result, 'return', '')
+
+    def create_nbd_export(self):
+        assert self.qsd is None
+
+        # Export a throttled null-co BDS: Reads are throttled (max 64 kB/s),
+        # writes are not.
+        self.qsd = QemuStorageDaemon(
+            '--object',
+            'throttle-group,id=thrgr,x-bps-read=65536,x-bps-read-max=65536',
+
+            '--blockdev',
+            'null-co,node-name=null,read-zeroes=true',
+
+            '--blockdev',
+            'throttle,node-name=thr,file=null,throttle-group=thrgr',
+
+            '--nbd-server',
+            f'addr.type=unix,addr.path={self.sock}',
+
+            '--export',
+            'nbd,id=exp,node-name=thr,name=exp,writable=true'
+        )
+
+    def stop_nbd_export(self):
+        self.qsd.stop()
+        self.qsd = None
+
 if __name__ == '__main__':
     iotests.main(supported_fmts=['qcow2'],
                  supported_protocols=['file'],
index 89968f35d78392e0adcdec1e1c97460e92d3e8e4..3f8a935a082d8dde81eafb46b644fdff9d1a0cce 100644 (file)
@@ -1,5 +1,5 @@
-....
+......
 ----------------------------------------------------------------------
-Ran 4 tests
+Ran 6 tests
 
 OK
index 8cdb381f2aa65f7ba88b4c3aaf473e7ee51a60ff..6ba65eb1ffec74384c073206b956ad810ec88621 100644 (file)
@@ -73,6 +73,8 @@ if os.environ.get('QEMU_NBD_OPTIONS'):
 qemu_prog = os.environ.get('QEMU_PROG', 'qemu')
 qemu_opts = os.environ.get('QEMU_OPTIONS', '').strip().split(' ')
 
+qsd_prog = os.environ.get('QSD_PROG', 'qemu-storage-daemon')
+
 gdb_qemu_env = os.environ.get('GDB_OPTIONS')
 qemu_gdb = []
 if gdb_qemu_env:
@@ -345,6 +347,44 @@ class QemuIoInteractive:
         return self._read_output()
 
 
+class QemuStorageDaemon:
+    def __init__(self, *args: str, instance_id: str = 'a'):
+        assert '--pidfile' not in args
+        self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
+        all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
+
+        # Cannot use with here, we want the subprocess to stay around
+        # pylint: disable=consider-using-with
+        self._p = subprocess.Popen(all_args)
+        while not os.path.exists(self.pidfile):
+            if self._p.poll() is not None:
+                cmd = ' '.join(all_args)
+                raise RuntimeError(
+                    'qemu-storage-daemon terminated with exit code ' +
+                    f'{self._p.returncode}: {cmd}')
+
+            time.sleep(0.01)
+
+        with open(self.pidfile, encoding='utf-8') as f:
+            self._pid = int(f.read().strip())
+
+        assert self._pid == self._p.pid
+
+    def stop(self, kill_signal=15):
+        self._p.send_signal(kill_signal)
+        self._p.wait()
+        self._p = None
+
+        try:
+            os.remove(self.pidfile)
+        except OSError:
+            pass
+
+    def __del__(self):
+        if self._p is not None:
+            self.stop(kill_signal=9)
+
+
 def qemu_nbd(*args):
     '''Run qemu-nbd in daemon mode and return the parent's exit code'''
     return subprocess.call(qemu_nbd_args + ['--fork'] + list(args))