From 26eee146bc4ca89ec0d70e9265e90e680994ad58 Mon Sep 17 00:00:00 2001 From: Stefan Reiter Date: Wed, 1 Sep 2021 18:10:12 +0200 Subject: [PATCH] add temporary QMP race fix same as the initial version sent to qemu-devel, it won't be the final fix we plan to upstream but it should be enough band-aid to workaround how PVE uses the QMP. Signed-off-by: Stefan Reiter [ Thomas: add a bit reasoning to commit message body ] Signed-off-by: Thomas Lamprecht --- ...race-with-clients-disconnecting-earl.patch | 205 ++++++++++++++++++ debian/patches/series | 1 + 2 files changed, 206 insertions(+) create mode 100644 debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch diff --git a/debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch b/debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch new file mode 100644 index 0000000..ffa0120 --- /dev/null +++ b/debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch @@ -0,0 +1,205 @@ +From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 +From: Stefan Reiter +Date: Mon, 23 Aug 2021 11:28:32 +0200 +Subject: [PATCH] monitor/qmp: fix race with clients disconnecting early + +The following sequence can produce a race condition that results in +responses meant for different clients being sent to the wrong one: + +(QMP, no OOB) +1) client A connects +2) client A sends 'qmp_capabilities' +3) 'qmp_dispatch' runs in coroutine, schedules out to + 'do_qmp_dispatch_bh' and yields +4) client A disconnects (i.e. aborts, crashes, etc...) +5) client B connects +6) 'do_qmp_dispatch_bh' runs 'qmp_capabilities' and wakes calling coroutine +7) capabilities are now set and 'mon->commands' is set to '&qmp_commands' +8) 'qmp_dispatch' returns to 'monitor_qmp_dispatch' +9) success message is sent to client B *without it ever having sent + 'qmp_capabilities' itself* +9a) even if client B ignores it, it will now presumably send it's own + greeting, which will error because caps are already set + +The fix proposed here uses an atomic, sequential connection number +stored in the MonitorQMP struct, which is incremented everytime a new +client connects. Since it is not changed on CHR_EVENT_CLOSED, the +behaviour of allowing a client to disconnect only one side of the +connection is retained. + +The connection_nr needs to be exposed outside of the monitor subsystem, +since qmp_dispatch lives in qapi code. It needs to be checked twice, +once for actually running the command in the BH (fixes 7), and once for +sending back a response (fixes 9). + +This satisfies my local reproducer - using multiple clients constantly +looping to open a connection, send the greeting, then exiting no longer +crashes other, normally behaving clients with unrelated responses. + +Signed-off-by: Stefan Reiter +--- + include/monitor/monitor.h | 1 + + monitor/monitor-internal.h | 7 +++++++ + monitor/monitor.c | 15 +++++++++++++++ + monitor/qmp.c | 15 ++++++++++++++- + qapi/qmp-dispatch.c | 21 +++++++++++++++++---- + stubs/monitor-core.c | 5 +++++ + 6 files changed, 59 insertions(+), 5 deletions(-) + +diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h +index af3887bb71..ff6db1448b 100644 +--- a/include/monitor/monitor.h ++++ b/include/monitor/monitor.h +@@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts; + Monitor *monitor_cur(void); + Monitor *monitor_set_cur(Coroutine *co, Monitor *mon); + bool monitor_cur_is_qmp(void); ++int monitor_get_connection_nr(const Monitor *mon); + + void monitor_init_globals(void); + void monitor_init_globals_core(void); +diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h +index 9c3a09cb01..a92be8c3f7 100644 +--- a/monitor/monitor-internal.h ++++ b/monitor/monitor-internal.h +@@ -144,6 +144,13 @@ typedef struct { + QemuMutex qmp_queue_lock; + /* Input queue that holds all the parsed QMP requests */ + GQueue *qmp_requests; ++ ++ /* ++ * A sequential number that gets incremented on every new CHR_EVENT_OPENED. ++ * Used to avoid leftover responses in BHs from being sent to the wrong ++ * client. Access with atomics. ++ */ ++ int connection_nr; + } MonitorQMP; + + /** +diff --git a/monitor/monitor.c b/monitor/monitor.c +index 636bcc81c5..ee5ac26c37 100644 +--- a/monitor/monitor.c ++++ b/monitor/monitor.c +@@ -136,6 +136,21 @@ bool monitor_cur_is_qmp(void) + return cur_mon && monitor_is_qmp(cur_mon); + } + ++/** ++ * If @mon is a QMP monitor, return the connection_nr, otherwise -1. ++ */ ++int monitor_get_connection_nr(const Monitor *mon) ++{ ++ MonitorQMP *qmp_mon; ++ ++ if (!monitor_is_qmp(mon)) { ++ return -1; ++ } ++ ++ qmp_mon = container_of(mon, MonitorQMP, common); ++ return qatomic_read(&qmp_mon->connection_nr); ++} ++ + /** + * Is @mon is using readline? + * Note: not all HMP monitors use readline, e.g., gdbserver has a +diff --git a/monitor/qmp.c b/monitor/qmp.c +index 0ef7cebb78..3ec67e32d3 100644 +--- a/monitor/qmp.c ++++ b/monitor/qmp.c +@@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) + QDict *rsp; + QDict *error; + ++ int conn_nr_before = qatomic_read(&mon->connection_nr); ++ + rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon), + &mon->common); + +@@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req) + } + } + +- monitor_qmp_respond(mon, rsp); ++ /* ++ * qmp_dispatch might have yielded and waited for a BH, in which case there ++ * is a chance a new client connected in the meantime - if this happened, ++ * the command will not have been executed, but we also need to ensure that ++ * we don't send back a corresponding response on a line that no longer ++ * belongs to this request. ++ */ ++ if (conn_nr_before == qatomic_read(&mon->connection_nr)) { ++ monitor_qmp_respond(mon, rsp); ++ } ++ + qobject_unref(rsp); + } + +@@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event) + + switch (event) { + case CHR_EVENT_OPENED: ++ qatomic_inc_fetch(&mon->connection_nr); + mon->commands = &qmp_cap_negotiation_commands; + monitor_qmp_caps_reset(mon); + data = qmp_greeting(mon); +diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c +index 59600210ce..95602446eb 100644 +--- a/qapi/qmp-dispatch.c ++++ b/qapi/qmp-dispatch.c +@@ -120,16 +120,28 @@ typedef struct QmpDispatchBH { + QObject **ret; + Error **errp; + Coroutine *co; ++ int conn_nr; + } QmpDispatchBH; + + static void do_qmp_dispatch_bh(void *opaque) + { + QmpDispatchBH *data = opaque; + +- assert(monitor_cur() == NULL); +- monitor_set_cur(qemu_coroutine_self(), data->cur_mon); +- data->cmd->fn(data->args, data->ret, data->errp); +- monitor_set_cur(qemu_coroutine_self(), NULL); ++ /* ++ * A QMP monitor tracks it's client with a connection number, if this ++ * changes during the scheduling delay of this BH, we must not execute the ++ * command. Otherwise a badly placed 'qmp_capabilities' might affect the ++ * connection state of a client it was never meant for. ++ */ ++ if (data->conn_nr == monitor_get_connection_nr(data->cur_mon)) { ++ assert(monitor_cur() == NULL); ++ monitor_set_cur(qemu_coroutine_self(), data->cur_mon); ++ data->cmd->fn(data->args, data->ret, data->errp); ++ monitor_set_cur(qemu_coroutine_self(), NULL); ++ } else { ++ error_setg(data->errp, "active monitor connection changed"); ++ } ++ + aio_co_wake(data->co); + } + +@@ -243,6 +255,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request, + .ret = &ret, + .errp = &err, + .co = qemu_coroutine_self(), ++ .conn_nr = monitor_get_connection_nr(cur_mon), + }; + aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh, + &data); +diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c +index d058a2a00d..3290b58120 100644 +--- a/stubs/monitor-core.c ++++ b/stubs/monitor-core.c +@@ -13,6 +13,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon) + return NULL; + } + ++int monitor_get_connection_nr(const Monitor *mon) ++{ ++ return -1; ++} ++ + void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp) + { + } diff --git a/debian/patches/series b/debian/patches/series index 2289085..b3fe14f 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,5 +1,6 @@ extra/0001-monitor-qmp-fix-race-on-CHR_EVENT_CLOSED-without-OOB.patch extra/0002-block-io_uring-resubmit-when-result-is-EAGAIN.patch +extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch -- 2.39.2