]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/extra/0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
add temporary QMP race fix
[pve-qemu.git] / debian / patches / extra / 0003-monitor-qmp-fix-race-with-clients-disconnecting-earl.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Stefan Reiter <s.reiter@proxmox.com>
3 Date: Mon, 23 Aug 2021 11:28:32 +0200
4 Subject: [PATCH] monitor/qmp: fix race with clients disconnecting early
5
6 The following sequence can produce a race condition that results in
7 responses meant for different clients being sent to the wrong one:
8
9 (QMP, no OOB)
10 1) client A connects
11 2) client A sends 'qmp_capabilities'
12 3) 'qmp_dispatch' runs in coroutine, schedules out to
13 'do_qmp_dispatch_bh' and yields
14 4) client A disconnects (i.e. aborts, crashes, etc...)
15 5) client B connects
16 6) 'do_qmp_dispatch_bh' runs 'qmp_capabilities' and wakes calling coroutine
17 7) capabilities are now set and 'mon->commands' is set to '&qmp_commands'
18 8) 'qmp_dispatch' returns to 'monitor_qmp_dispatch'
19 9) success message is sent to client B *without it ever having sent
20 'qmp_capabilities' itself*
21 9a) even if client B ignores it, it will now presumably send it's own
22 greeting, which will error because caps are already set
23
24 The fix proposed here uses an atomic, sequential connection number
25 stored in the MonitorQMP struct, which is incremented everytime a new
26 client connects. Since it is not changed on CHR_EVENT_CLOSED, the
27 behaviour of allowing a client to disconnect only one side of the
28 connection is retained.
29
30 The connection_nr needs to be exposed outside of the monitor subsystem,
31 since qmp_dispatch lives in qapi code. It needs to be checked twice,
32 once for actually running the command in the BH (fixes 7), and once for
33 sending back a response (fixes 9).
34
35 This satisfies my local reproducer - using multiple clients constantly
36 looping to open a connection, send the greeting, then exiting no longer
37 crashes other, normally behaving clients with unrelated responses.
38
39 Signed-off-by: Stefan Reiter <s.reiter@proxmox.com>
40 ---
41 include/monitor/monitor.h | 1 +
42 monitor/monitor-internal.h | 7 +++++++
43 monitor/monitor.c | 15 +++++++++++++++
44 monitor/qmp.c | 15 ++++++++++++++-
45 qapi/qmp-dispatch.c | 21 +++++++++++++++++----
46 stubs/monitor-core.c | 5 +++++
47 6 files changed, 59 insertions(+), 5 deletions(-)
48
49 diff --git a/include/monitor/monitor.h b/include/monitor/monitor.h
50 index af3887bb71..ff6db1448b 100644
51 --- a/include/monitor/monitor.h
52 +++ b/include/monitor/monitor.h
53 @@ -16,6 +16,7 @@ extern QemuOptsList qemu_mon_opts;
54 Monitor *monitor_cur(void);
55 Monitor *monitor_set_cur(Coroutine *co, Monitor *mon);
56 bool monitor_cur_is_qmp(void);
57 +int monitor_get_connection_nr(const Monitor *mon);
58
59 void monitor_init_globals(void);
60 void monitor_init_globals_core(void);
61 diff --git a/monitor/monitor-internal.h b/monitor/monitor-internal.h
62 index 9c3a09cb01..a92be8c3f7 100644
63 --- a/monitor/monitor-internal.h
64 +++ b/monitor/monitor-internal.h
65 @@ -144,6 +144,13 @@ typedef struct {
66 QemuMutex qmp_queue_lock;
67 /* Input queue that holds all the parsed QMP requests */
68 GQueue *qmp_requests;
69 +
70 + /*
71 + * A sequential number that gets incremented on every new CHR_EVENT_OPENED.
72 + * Used to avoid leftover responses in BHs from being sent to the wrong
73 + * client. Access with atomics.
74 + */
75 + int connection_nr;
76 } MonitorQMP;
77
78 /**
79 diff --git a/monitor/monitor.c b/monitor/monitor.c
80 index 636bcc81c5..ee5ac26c37 100644
81 --- a/monitor/monitor.c
82 +++ b/monitor/monitor.c
83 @@ -136,6 +136,21 @@ bool monitor_cur_is_qmp(void)
84 return cur_mon && monitor_is_qmp(cur_mon);
85 }
86
87 +/**
88 + * If @mon is a QMP monitor, return the connection_nr, otherwise -1.
89 + */
90 +int monitor_get_connection_nr(const Monitor *mon)
91 +{
92 + MonitorQMP *qmp_mon;
93 +
94 + if (!monitor_is_qmp(mon)) {
95 + return -1;
96 + }
97 +
98 + qmp_mon = container_of(mon, MonitorQMP, common);
99 + return qatomic_read(&qmp_mon->connection_nr);
100 +}
101 +
102 /**
103 * Is @mon is using readline?
104 * Note: not all HMP monitors use readline, e.g., gdbserver has a
105 diff --git a/monitor/qmp.c b/monitor/qmp.c
106 index 0ef7cebb78..3ec67e32d3 100644
107 --- a/monitor/qmp.c
108 +++ b/monitor/qmp.c
109 @@ -141,6 +141,8 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
110 QDict *rsp;
111 QDict *error;
112
113 + int conn_nr_before = qatomic_read(&mon->connection_nr);
114 +
115 rsp = qmp_dispatch(mon->commands, req, qmp_oob_enabled(mon),
116 &mon->common);
117
118 @@ -156,7 +158,17 @@ static void monitor_qmp_dispatch(MonitorQMP *mon, QObject *req)
119 }
120 }
121
122 - monitor_qmp_respond(mon, rsp);
123 + /*
124 + * qmp_dispatch might have yielded and waited for a BH, in which case there
125 + * is a chance a new client connected in the meantime - if this happened,
126 + * the command will not have been executed, but we also need to ensure that
127 + * we don't send back a corresponding response on a line that no longer
128 + * belongs to this request.
129 + */
130 + if (conn_nr_before == qatomic_read(&mon->connection_nr)) {
131 + monitor_qmp_respond(mon, rsp);
132 + }
133 +
134 qobject_unref(rsp);
135 }
136
137 @@ -444,6 +456,7 @@ static void monitor_qmp_event(void *opaque, QEMUChrEvent event)
138
139 switch (event) {
140 case CHR_EVENT_OPENED:
141 + qatomic_inc_fetch(&mon->connection_nr);
142 mon->commands = &qmp_cap_negotiation_commands;
143 monitor_qmp_caps_reset(mon);
144 data = qmp_greeting(mon);
145 diff --git a/qapi/qmp-dispatch.c b/qapi/qmp-dispatch.c
146 index 59600210ce..95602446eb 100644
147 --- a/qapi/qmp-dispatch.c
148 +++ b/qapi/qmp-dispatch.c
149 @@ -120,16 +120,28 @@ typedef struct QmpDispatchBH {
150 QObject **ret;
151 Error **errp;
152 Coroutine *co;
153 + int conn_nr;
154 } QmpDispatchBH;
155
156 static void do_qmp_dispatch_bh(void *opaque)
157 {
158 QmpDispatchBH *data = opaque;
159
160 - assert(monitor_cur() == NULL);
161 - monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
162 - data->cmd->fn(data->args, data->ret, data->errp);
163 - monitor_set_cur(qemu_coroutine_self(), NULL);
164 + /*
165 + * A QMP monitor tracks it's client with a connection number, if this
166 + * changes during the scheduling delay of this BH, we must not execute the
167 + * command. Otherwise a badly placed 'qmp_capabilities' might affect the
168 + * connection state of a client it was never meant for.
169 + */
170 + if (data->conn_nr == monitor_get_connection_nr(data->cur_mon)) {
171 + assert(monitor_cur() == NULL);
172 + monitor_set_cur(qemu_coroutine_self(), data->cur_mon);
173 + data->cmd->fn(data->args, data->ret, data->errp);
174 + monitor_set_cur(qemu_coroutine_self(), NULL);
175 + } else {
176 + error_setg(data->errp, "active monitor connection changed");
177 + }
178 +
179 aio_co_wake(data->co);
180 }
181
182 @@ -243,6 +255,7 @@ QDict *qmp_dispatch(const QmpCommandList *cmds, QObject *request,
183 .ret = &ret,
184 .errp = &err,
185 .co = qemu_coroutine_self(),
186 + .conn_nr = monitor_get_connection_nr(cur_mon),
187 };
188 aio_bh_schedule_oneshot(qemu_get_aio_context(), do_qmp_dispatch_bh,
189 &data);
190 diff --git a/stubs/monitor-core.c b/stubs/monitor-core.c
191 index d058a2a00d..3290b58120 100644
192 --- a/stubs/monitor-core.c
193 +++ b/stubs/monitor-core.c
194 @@ -13,6 +13,11 @@ Monitor *monitor_set_cur(Coroutine *co, Monitor *mon)
195 return NULL;
196 }
197
198 +int monitor_get_connection_nr(const Monitor *mon)
199 +{
200 + return -1;
201 +}
202 +
203 void monitor_init_qmp(Chardev *chr, bool pretty, Error **errp)
204 {
205 }