]> git.proxmox.com Git - mirror_qemu.git/blame - nbd/server.c
nbd/server: refactor nbd_co_send_reply
[mirror_qemu.git] / nbd / server.c
CommitLineData
75818250 1/*
b626b51a 2 * Copyright (C) 2016 Red Hat, Inc.
7a5ca864
FB
3 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
4 *
798bfe00 5 * Network Block Device Server Side
7a5ca864
FB
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; under version 2 of the License.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
8167ee88 17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
75818250 18 */
7a5ca864 19
d38ea87a 20#include "qemu/osdep.h"
da34e65c 21#include "qapi/error.h"
798bfe00 22#include "nbd-internal.h"
ca441480
PB
23
24static int system_errno_to_nbd_errno(int err)
25{
26 switch (err) {
27 case 0:
28 return NBD_SUCCESS;
29 case EPERM:
c0301fcc 30 case EROFS:
ca441480
PB
31 return NBD_EPERM;
32 case EIO:
33 return NBD_EIO;
34 case ENOMEM:
35 return NBD_ENOMEM;
36#ifdef EDQUOT
37 case EDQUOT:
38#endif
39 case EFBIG:
40 case ENOSPC:
41 return NBD_ENOSPC;
b6f5d3b5
EB
42 case ESHUTDOWN:
43 return NBD_ESHUTDOWN;
ca441480
PB
44 case EINVAL:
45 default:
46 return NBD_EINVAL;
47 }
48}
49
9a304d29
PB
50/* Definitions for opaque data types */
51
315f78ab 52typedef struct NBDRequestData NBDRequestData;
9a304d29 53
315f78ab
EB
54struct NBDRequestData {
55 QSIMPLEQ_ENTRY(NBDRequestData) entry;
9a304d29
PB
56 NBDClient *client;
57 uint8_t *data;
29b6c3b3 58 bool complete;
9a304d29
PB
59};
60
61struct NBDExport {
2c8d9f06 62 int refcount;
0ddf08db
PB
63 void (*close)(NBDExport *exp);
64
aadf99a7 65 BlockBackend *blk;
ee0a19ec 66 char *name;
b1a75b33 67 char *description;
9a304d29
PB
68 off_t dev_offset;
69 off_t size;
7423f417 70 uint16_t nbdflags;
4b9441f6 71 QTAILQ_HEAD(, NBDClient) clients;
ee0a19ec 72 QTAILQ_ENTRY(NBDExport) next;
958c717d
HR
73
74 AioContext *ctx;
741cc431 75
cd7fca95 76 BlockBackend *eject_notifier_blk;
741cc431 77 Notifier eject_notifier;
9a304d29
PB
78};
79
ee0a19ec
PB
80static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
81
9a304d29
PB
82struct NBDClient {
83 int refcount;
0c9390d9 84 void (*close_fn)(NBDClient *client, bool negotiated);
9a304d29 85
c203c59a 86 bool no_zeroes;
9a304d29 87 NBDExport *exp;
f95910fe
DB
88 QCryptoTLSCreds *tlscreds;
89 char *tlsaclname;
1c778ef7
DB
90 QIOChannelSocket *sioc; /* The underlying data channel */
91 QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
9a304d29
PB
92
93 Coroutine *recv_coroutine;
94
95 CoMutex send_lock;
96 Coroutine *send_coroutine;
97
4b9441f6 98 QTAILQ_ENTRY(NBDClient) next;
9a304d29 99 int nb_requests;
ff2b68aa 100 bool closing;
9a304d29
PB
101};
102
7a5ca864
FB
103/* That's all folks */
104
ff82911c 105static void nbd_client_receive_next_request(NBDClient *client);
958c717d 106
6b8c01e7 107/* Basic flow for negotiation
7a5ca864
FB
108
109 Server Client
7a5ca864 110 Negotiate
6b8c01e7
PB
111
112 or
113
114 Server Client
115 Negotiate #1
116 Option
117 Negotiate #2
118
119 ----
120
121 followed by
122
123 Server Client
7a5ca864
FB
124 Request
125 Response
126 Request
127 Response
128 ...
129 ...
130 Request (type == 2)
6b8c01e7 131
7a5ca864
FB
132*/
133
526e5c65
EB
134/* Send a reply header, including length, but no payload.
135 * Return -errno on error, 0 on success. */
136static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
137 uint32_t opt, uint32_t len)
6b8c01e7 138{
6b8c01e7 139 uint64_t magic;
6b8c01e7 140
526e5c65
EB
141 TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
142 type, opt, len);
f95910fe 143
f5076b5a 144 magic = cpu_to_be64(NBD_REP_MAGIC);
2b0bbc4f 145 if (nbd_write(ioc, &magic, sizeof(magic), NULL) < 0) {
f5076b5a
HB
146 LOG("write failed (rep magic)");
147 return -EINVAL;
6b8c01e7 148 }
f5076b5a 149 opt = cpu_to_be32(opt);
2b0bbc4f 150 if (nbd_write(ioc, &opt, sizeof(opt), NULL) < 0) {
f5076b5a
HB
151 LOG("write failed (rep opt)");
152 return -EINVAL;
6b8c01e7 153 }
f5076b5a 154 type = cpu_to_be32(type);
2b0bbc4f 155 if (nbd_write(ioc, &type, sizeof(type), NULL) < 0) {
f5076b5a
HB
156 LOG("write failed (rep type)");
157 return -EINVAL;
6b8c01e7 158 }
526e5c65 159 len = cpu_to_be32(len);
2b0bbc4f 160 if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
f5076b5a
HB
161 LOG("write failed (rep data length)");
162 return -EINVAL;
6b8c01e7 163 }
f5076b5a
HB
164 return 0;
165}
6b8c01e7 166
526e5c65
EB
167/* Send a reply header with default 0 length.
168 * Return -errno on error, 0 on success. */
169static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
170{
171 return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
172}
173
36683283
EB
174/* Send an error reply.
175 * Return -errno on error, 0 on success. */
176static int GCC_FMT_ATTR(4, 5)
177nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
178 uint32_t opt, const char *fmt, ...)
179{
180 va_list va;
181 char *msg;
182 int ret;
183 size_t len;
184
185 va_start(va, fmt);
186 msg = g_strdup_vprintf(fmt, va);
187 va_end(va);
188 len = strlen(msg);
189 assert(len < 4096);
190 TRACE("sending error message \"%s\"", msg);
191 ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
192 if (ret < 0) {
193 goto out;
194 }
2b0bbc4f 195 if (nbd_write(ioc, msg, len, NULL) < 0) {
36683283
EB
196 LOG("write failed (error message)");
197 ret = -EIO;
198 } else {
199 ret = 0;
200 }
201out:
202 g_free(msg);
203 return ret;
204}
205
526e5c65
EB
206/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
207 * Return -errno on error, 0 on success. */
1c778ef7 208static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
32d7d2e0 209{
b1a75b33 210 size_t name_len, desc_len;
526e5c65 211 uint32_t len;
b1a75b33
EB
212 const char *name = exp->name ? exp->name : "";
213 const char *desc = exp->description ? exp->description : "";
526e5c65 214 int rc;
32d7d2e0 215
b1a75b33
EB
216 TRACE("Advertising export name '%s' description '%s'", name, desc);
217 name_len = strlen(name);
218 desc_len = strlen(desc);
526e5c65
EB
219 len = name_len + desc_len + sizeof(len);
220 rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
221 if (rc < 0) {
222 return rc;
32d7d2e0 223 }
526e5c65 224
32d7d2e0 225 len = cpu_to_be32(name_len);
2b0bbc4f 226 if (nbd_write(ioc, &len, sizeof(len), NULL) < 0) {
b1a75b33
EB
227 LOG("write failed (name length)");
228 return -EINVAL;
229 }
2b0bbc4f 230 if (nbd_write(ioc, name, name_len, NULL) < 0) {
b1a75b33 231 LOG("write failed (name buffer)");
32d7d2e0
HB
232 return -EINVAL;
233 }
2b0bbc4f 234 if (nbd_write(ioc, desc, desc_len, NULL) < 0) {
b1a75b33 235 LOG("write failed (description buffer)");
32d7d2e0
HB
236 return -EINVAL;
237 }
238 return 0;
239}
240
526e5c65
EB
241/* Process the NBD_OPT_LIST command, with a potential series of replies.
242 * Return -errno on error, 0 on success. */
1a6245a5 243static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
32d7d2e0 244{
32d7d2e0
HB
245 NBDExport *exp;
246
32d7d2e0 247 if (length) {
2b0bbc4f 248 if (nbd_drop(client->ioc, length, NULL) < 0) {
0379f474
HR
249 return -EIO;
250 }
36683283
EB
251 return nbd_negotiate_send_rep_err(client->ioc,
252 NBD_REP_ERR_INVALID, NBD_OPT_LIST,
253 "OPT_LIST should not have length");
32d7d2e0
HB
254 }
255
256 /* For each export, send a NBD_REP_SERVER reply. */
257 QTAILQ_FOREACH(exp, &exports, next) {
1c778ef7 258 if (nbd_negotiate_send_rep_list(client->ioc, exp)) {
32d7d2e0
HB
259 return -EINVAL;
260 }
261 }
262 /* Finish with a NBD_REP_ACK. */
1c778ef7 263 return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST);
32d7d2e0
HB
264}
265
1a6245a5 266static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
f5076b5a 267{
1c778ef7 268 int rc = -EINVAL;
943cec86 269 char name[NBD_MAX_NAME_SIZE + 1];
6b8c01e7 270
f5076b5a
HB
271 /* Client sends:
272 [20 .. xx] export name (length bytes)
273 */
6b8c01e7 274 TRACE("Checking length");
943cec86 275 if (length >= sizeof(name)) {
6b8c01e7
PB
276 LOG("Bad length received");
277 goto fail;
278 }
2b0bbc4f 279 if (nbd_read(client->ioc, name, length, NULL) < 0) {
6b8c01e7
PB
280 LOG("read failed");
281 goto fail;
282 }
283 name[length] = '\0';
284
9344e5f5
DB
285 TRACE("Client requested export '%s'", name);
286
6b8c01e7
PB
287 client->exp = nbd_export_find(name);
288 if (!client->exp) {
289 LOG("export not found");
290 goto fail;
291 }
292
293 QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
294 nbd_export_get(client->exp);
6b8c01e7
PB
295 rc = 0;
296fail:
297 return rc;
298}
299
36683283
EB
300/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
301 * new channel for all further (now-encrypted) communication. */
f95910fe
DB
302static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
303 uint32_t length)
304{
305 QIOChannel *ioc;
306 QIOChannelTLS *tioc;
307 struct NBDTLSHandshakeData data = { 0 };
308
309 TRACE("Setting up TLS");
310 ioc = client->ioc;
311 if (length) {
2b0bbc4f 312 if (nbd_drop(ioc, length, NULL) < 0) {
f95910fe
DB
313 return NULL;
314 }
36683283
EB
315 nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
316 "OPT_STARTTLS should not have length");
f95910fe
DB
317 return NULL;
318 }
319
63d5ef86
EB
320 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
321 NBD_OPT_STARTTLS) < 0) {
322 return NULL;
323 }
f95910fe
DB
324
325 tioc = qio_channel_tls_new_server(ioc,
326 client->tlscreds,
327 client->tlsaclname,
328 NULL);
329 if (!tioc) {
330 return NULL;
331 }
332
0d73f725 333 qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
f95910fe
DB
334 TRACE("Starting TLS handshake");
335 data.loop = g_main_loop_new(g_main_context_default(), FALSE);
336 qio_channel_tls_handshake(tioc,
337 nbd_tls_handshake,
338 &data,
339 NULL);
340
341 if (!data.complete) {
342 g_main_loop_run(data.loop);
343 }
344 g_main_loop_unref(data.loop);
345 if (data.error) {
346 object_unref(OBJECT(tioc));
347 error_free(data.error);
348 return NULL;
349 }
350
351 return QIO_CHANNEL(tioc);
352}
353
354
526e5c65
EB
355/* Process all NBD_OPT_* client option commands.
356 * Return -errno on error, 0 on success. */
1a6245a5 357static int nbd_negotiate_options(NBDClient *client)
f5076b5a 358{
9c122ada 359 uint32_t flags;
26afa868 360 bool fixedNewstyle = false;
9c122ada
HR
361
362 /* Client sends:
363 [ 0 .. 3] client flags
364
365 [ 0 .. 7] NBD_OPTS_MAGIC
366 [ 8 .. 11] NBD option
367 [12 .. 15] Data length
368 ... Rest of request
369
370 [ 0 .. 7] NBD_OPTS_MAGIC
371 [ 8 .. 11] Second NBD option
372 [12 .. 15] Data length
373 ... Rest of request
374 */
375
2b0bbc4f 376 if (nbd_read(client->ioc, &flags, sizeof(flags), NULL) < 0) {
9c122ada
HR
377 LOG("read failed");
378 return -EIO;
379 }
380 TRACE("Checking client flags");
381 be32_to_cpus(&flags);
26afa868 382 if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
2cb34749 383 TRACE("Client supports fixed newstyle handshake");
26afa868
DB
384 fixedNewstyle = true;
385 flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
386 }
c203c59a
EB
387 if (flags & NBD_FLAG_C_NO_ZEROES) {
388 TRACE("Client supports no zeroes at handshake end");
389 client->no_zeroes = true;
390 flags &= ~NBD_FLAG_C_NO_ZEROES;
391 }
26afa868 392 if (flags != 0) {
2cb34749 393 TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
9c122ada
HR
394 return -EIO;
395 }
396
f5076b5a 397 while (1) {
9c122ada 398 int ret;
26afa868 399 uint32_t clientflags, length;
f5076b5a
HB
400 uint64_t magic;
401
2b0bbc4f 402 if (nbd_read(client->ioc, &magic, sizeof(magic), NULL) < 0) {
f5076b5a
HB
403 LOG("read failed");
404 return -EINVAL;
405 }
406 TRACE("Checking opts magic");
407 if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
408 LOG("Bad magic received");
409 return -EINVAL;
410 }
411
2b0bbc4f
VSO
412 if (nbd_read(client->ioc, &clientflags,
413 sizeof(clientflags), NULL) < 0)
f5d406fe 414 {
f5076b5a
HB
415 LOG("read failed");
416 return -EINVAL;
417 }
26afa868 418 clientflags = be32_to_cpu(clientflags);
f5076b5a 419
2b0bbc4f 420 if (nbd_read(client->ioc, &length, sizeof(length), NULL) < 0) {
f5076b5a
HB
421 LOG("read failed");
422 return -EINVAL;
423 }
424 length = be32_to_cpu(length);
425
2cb34749 426 TRACE("Checking option 0x%" PRIx32, clientflags);
f95910fe
DB
427 if (client->tlscreds &&
428 client->ioc == (QIOChannel *)client->sioc) {
429 QIOChannel *tioc;
430 if (!fixedNewstyle) {
2cb34749 431 TRACE("Unsupported option 0x%" PRIx32, clientflags);
f95910fe
DB
432 return -EINVAL;
433 }
434 switch (clientflags) {
435 case NBD_OPT_STARTTLS:
436 tioc = nbd_negotiate_handle_starttls(client, length);
437 if (!tioc) {
438 return -EIO;
439 }
440 object_unref(OBJECT(client->ioc));
441 client->ioc = QIO_CHANNEL(tioc);
442 break;
443
d1129a8a
EB
444 case NBD_OPT_EXPORT_NAME:
445 /* No way to return an error to client, so drop connection */
446 TRACE("Option 0x%x not permitted before TLS", clientflags);
447 return -EINVAL;
448
f95910fe 449 default:
2b0bbc4f 450 if (nbd_drop(client->ioc, length, NULL) < 0) {
d1129a8a
EB
451 return -EIO;
452 }
36683283
EB
453 ret = nbd_negotiate_send_rep_err(client->ioc,
454 NBD_REP_ERR_TLS_REQD,
455 clientflags,
456 "Option 0x%" PRIx32
457 "not permitted before TLS",
458 clientflags);
63d5ef86
EB
459 if (ret < 0) {
460 return ret;
461 }
b6f5d3b5
EB
462 /* Let the client keep trying, unless they asked to quit */
463 if (clientflags == NBD_OPT_ABORT) {
464 return -EINVAL;
465 }
d1129a8a 466 break;
f95910fe
DB
467 }
468 } else if (fixedNewstyle) {
26afa868
DB
469 switch (clientflags) {
470 case NBD_OPT_LIST:
471 ret = nbd_negotiate_handle_list(client, length);
472 if (ret < 0) {
473 return ret;
474 }
475 break;
476
477 case NBD_OPT_ABORT:
b6f5d3b5
EB
478 /* NBD spec says we must try to reply before
479 * disconnecting, but that we must also tolerate
480 * guests that don't wait for our reply. */
481 nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
26afa868
DB
482 return -EINVAL;
483
484 case NBD_OPT_EXPORT_NAME:
485 return nbd_negotiate_handle_export_name(client, length);
486
f95910fe 487 case NBD_OPT_STARTTLS:
2b0bbc4f 488 if (nbd_drop(client->ioc, length, NULL) < 0) {
d1129a8a
EB
489 return -EIO;
490 }
f95910fe 491 if (client->tlscreds) {
36683283
EB
492 ret = nbd_negotiate_send_rep_err(client->ioc,
493 NBD_REP_ERR_INVALID,
494 clientflags,
495 "TLS already enabled");
f95910fe 496 } else {
36683283
EB
497 ret = nbd_negotiate_send_rep_err(client->ioc,
498 NBD_REP_ERR_POLICY,
499 clientflags,
500 "TLS not configured");
63d5ef86
EB
501 }
502 if (ret < 0) {
503 return ret;
f95910fe 504 }
d1129a8a 505 break;
26afa868 506 default:
2b0bbc4f 507 if (nbd_drop(client->ioc, length, NULL) < 0) {
156f6a10
EB
508 return -EIO;
509 }
36683283
EB
510 ret = nbd_negotiate_send_rep_err(client->ioc,
511 NBD_REP_ERR_UNSUP,
512 clientflags,
513 "Unsupported option 0x%"
514 PRIx32,
515 clientflags);
63d5ef86
EB
516 if (ret < 0) {
517 return ret;
518 }
156f6a10 519 break;
26afa868
DB
520 }
521 } else {
522 /*
523 * If broken new-style we should drop the connection
524 * for anything except NBD_OPT_EXPORT_NAME
525 */
526 switch (clientflags) {
527 case NBD_OPT_EXPORT_NAME:
528 return nbd_negotiate_handle_export_name(client, length);
529
530 default:
2cb34749 531 TRACE("Unsupported option 0x%" PRIx32, clientflags);
26afa868 532 return -EINVAL;
32d7d2e0 533 }
f5076b5a
HB
534 }
535 }
536}
537
1a6245a5
FZ
538typedef struct {
539 NBDClient *client;
540 Coroutine *co;
541} NBDClientNewData;
542
543static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
7a5ca864 544{
1a6245a5 545 NBDClient *client = data->client;
b2e3d87f 546 char buf[8 + 8 + 8 + 128];
185b4338 547 int rc;
7423f417 548 const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
1f4d6d18
EB
549 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
550 NBD_FLAG_SEND_WRITE_ZEROES);
f95910fe 551 bool oldStyle;
c203c59a 552 size_t len;
b2e3d87f 553
f95910fe 554 /* Old style negotiation header without options
6b8c01e7
PB
555 [ 0 .. 7] passwd ("NBDMAGIC")
556 [ 8 .. 15] magic (NBD_CLIENT_MAGIC)
b2e3d87f 557 [16 .. 23] size
6b8c01e7 558 [24 .. 25] server flags (0)
5672ee54 559 [26 .. 27] export flags
6b8c01e7
PB
560 [28 .. 151] reserved (0)
561
f95910fe 562 New style negotiation header with options
6b8c01e7
PB
563 [ 0 .. 7] passwd ("NBDMAGIC")
564 [ 8 .. 15] magic (NBD_OPTS_MAGIC)
565 [16 .. 17] server flags (0)
f95910fe 566 ....options sent....
6b8c01e7
PB
567 [18 .. 25] size
568 [26 .. 27] export flags
c203c59a 569 [28 .. 151] reserved (0, omit if no_zeroes)
b2e3d87f
NT
570 */
571
1c778ef7 572 qio_channel_set_blocking(client->ioc, false, NULL);
185b4338
PB
573 rc = -EINVAL;
574
b2e3d87f 575 TRACE("Beginning negotiation.");
8ffaaba0 576 memset(buf, 0, sizeof(buf));
b2e3d87f 577 memcpy(buf, "NBDMAGIC", 8);
f95910fe
DB
578
579 oldStyle = client->exp != NULL && !client->tlscreds;
580 if (oldStyle) {
2cb34749
EB
581 TRACE("advertising size %" PRIu64 " and flags %x",
582 client->exp->size, client->exp->nbdflags | myflags);
667ad26f
JS
583 stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
584 stq_be_p(buf + 16, client->exp->size);
585 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
6b8c01e7 586 } else {
667ad26f 587 stq_be_p(buf + 8, NBD_OPTS_MAGIC);
c203c59a 588 stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
6b8c01e7 589 }
b2e3d87f 590
f95910fe
DB
591 if (oldStyle) {
592 if (client->tlscreds) {
593 TRACE("TLS cannot be enabled with oldstyle protocol");
594 goto fail;
595 }
2b0bbc4f 596 if (nbd_write(client->ioc, buf, sizeof(buf), NULL) < 0) {
6b8c01e7
PB
597 LOG("write failed");
598 goto fail;
599 }
600 } else {
2b0bbc4f 601 if (nbd_write(client->ioc, buf, 18, NULL) < 0) {
6b8c01e7
PB
602 LOG("write failed");
603 goto fail;
604 }
1a6245a5 605 rc = nbd_negotiate_options(client);
f5076b5a 606 if (rc != 0) {
6b8c01e7
PB
607 LOG("option negotiation failed");
608 goto fail;
609 }
610
2cb34749
EB
611 TRACE("advertising size %" PRIu64 " and flags %x",
612 client->exp->size, client->exp->nbdflags | myflags);
667ad26f
JS
613 stq_be_p(buf + 18, client->exp->size);
614 stw_be_p(buf + 26, client->exp->nbdflags | myflags);
c203c59a 615 len = client->no_zeroes ? 10 : sizeof(buf) - 18;
2b0bbc4f 616 if (nbd_write(client->ioc, buf + 18, len, NULL) < 0) {
6b8c01e7
PB
617 LOG("write failed");
618 goto fail;
619 }
b2e3d87f
NT
620 }
621
07f35073 622 TRACE("Negotiation succeeded.");
185b4338
PB
623 rc = 0;
624fail:
625 return rc;
7a5ca864
FB
626}
627
a0dc63a6 628static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
75818250 629{
fa26c26b 630 uint8_t buf[NBD_REQUEST_SIZE];
b2e3d87f 631 uint32_t magic;
a0dc63a6 632 int ret;
b2e3d87f 633
d1fdf257 634 ret = nbd_read(ioc, buf, sizeof(buf), NULL);
185b4338
PB
635 if (ret < 0) {
636 return ret;
637 }
638
b2e3d87f
NT
639 /* Request
640 [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
b626b51a
EB
641 [ 4 .. 5] flags (NBD_CMD_FLAG_FUA, ...)
642 [ 6 .. 7] type (NBD_CMD_READ, ...)
b2e3d87f
NT
643 [ 8 .. 15] handle
644 [16 .. 23] from
645 [24 .. 27] len
646 */
647
773dce3c 648 magic = ldl_be_p(buf);
b626b51a
EB
649 request->flags = lduw_be_p(buf + 4);
650 request->type = lduw_be_p(buf + 6);
773dce3c
PM
651 request->handle = ldq_be_p(buf + 8);
652 request->from = ldq_be_p(buf + 16);
653 request->len = ldl_be_p(buf + 24);
b2e3d87f 654
b626b51a
EB
655 TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
656 ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
657 magic, request->flags, request->type, request->from, request->len);
b2e3d87f
NT
658
659 if (magic != NBD_REQUEST_MAGIC) {
2cb34749 660 LOG("invalid magic (got 0x%" PRIx32 ")", magic);
185b4338 661 return -EINVAL;
b2e3d87f
NT
662 }
663 return 0;
75818250
TS
664}
665
a0dc63a6 666static int nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
75818250 667{
fa26c26b 668 uint8_t buf[NBD_REPLY_SIZE];
b2e3d87f 669
ca441480
PB
670 reply->error = system_errno_to_nbd_errno(reply->error);
671
2cb34749
EB
672 TRACE("Sending response to client: { .error = %" PRId32
673 ", handle = %" PRIu64 " }",
7548fe31
EB
674 reply->error, reply->handle);
675
b2e3d87f
NT
676 /* Reply
677 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
678 [ 4 .. 7] error (0 == no error)
679 [ 7 .. 15] handle
680 */
667ad26f
JS
681 stl_be_p(buf, NBD_REPLY_MAGIC);
682 stl_be_p(buf + 4, reply->error);
683 stq_be_p(buf + 8, reply->handle);
b2e3d87f 684
d1fdf257 685 return nbd_write(ioc, buf, sizeof(buf), NULL);
75818250 686}
7a5ca864 687
41996e38
PB
688#define MAX_NBD_REQUESTS 16
689
ce33967a 690void nbd_client_get(NBDClient *client)
1743b515
PB
691{
692 client->refcount++;
693}
694
ce33967a 695void nbd_client_put(NBDClient *client)
1743b515
PB
696{
697 if (--client->refcount == 0) {
ff2b68aa 698 /* The last reference should be dropped by client->close,
f53a829b 699 * which is called by client_close.
ff2b68aa
PB
700 */
701 assert(client->closing);
702
ff82911c 703 qio_channel_detach_aio_context(client->ioc);
1c778ef7
DB
704 object_unref(OBJECT(client->sioc));
705 object_unref(OBJECT(client->ioc));
f95910fe
DB
706 if (client->tlscreds) {
707 object_unref(OBJECT(client->tlscreds));
708 }
709 g_free(client->tlsaclname);
6b8c01e7
PB
710 if (client->exp) {
711 QTAILQ_REMOVE(&client->exp->clients, client, next);
712 nbd_export_put(client->exp);
713 }
1743b515
PB
714 g_free(client);
715 }
716}
717
0c9390d9 718static void client_close(NBDClient *client, bool negotiated)
1743b515 719{
ff2b68aa
PB
720 if (client->closing) {
721 return;
722 }
723
724 client->closing = true;
725
726 /* Force requests to finish. They will drop their own references,
727 * then we'll close the socket and free the NBDClient.
728 */
1c778ef7
DB
729 qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
730 NULL);
ff2b68aa
PB
731
732 /* Also tell the client, so that they release their reference. */
0c9390d9
EB
733 if (client->close_fn) {
734 client->close_fn(client, negotiated);
1743b515 735 }
1743b515
PB
736}
737
315f78ab 738static NBDRequestData *nbd_request_get(NBDClient *client)
d9a73806 739{
315f78ab 740 NBDRequestData *req;
72deddc5 741
41996e38
PB
742 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
743 client->nb_requests++;
744
315f78ab 745 req = g_new0(NBDRequestData, 1);
72deddc5
PB
746 nbd_client_get(client);
747 req->client = client;
d9a73806
PB
748 return req;
749}
750
315f78ab 751static void nbd_request_put(NBDRequestData *req)
d9a73806 752{
72deddc5 753 NBDClient *client = req->client;
e1adb27a 754
2d821488
SH
755 if (req->data) {
756 qemu_vfree(req->data);
757 }
1729404c 758 g_free(req);
e1adb27a 759
958c717d 760 client->nb_requests--;
ff82911c
PB
761 nbd_client_receive_next_request(client);
762
72deddc5 763 nbd_client_put(client);
d9a73806
PB
764}
765
aadf99a7 766static void blk_aio_attached(AioContext *ctx, void *opaque)
f2149281
HR
767{
768 NBDExport *exp = opaque;
769 NBDClient *client;
770
771 TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);
772
773 exp->ctx = ctx;
774
775 QTAILQ_FOREACH(client, &exp->clients, next) {
ff82911c
PB
776 qio_channel_attach_aio_context(client->ioc, ctx);
777 if (client->recv_coroutine) {
778 aio_co_schedule(ctx, client->recv_coroutine);
779 }
780 if (client->send_coroutine) {
781 aio_co_schedule(ctx, client->send_coroutine);
782 }
f2149281
HR
783 }
784}
785
aadf99a7 786static void blk_aio_detach(void *opaque)
f2149281
HR
787{
788 NBDExport *exp = opaque;
789 NBDClient *client;
790
791 TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);
792
793 QTAILQ_FOREACH(client, &exp->clients, next) {
ff82911c 794 qio_channel_detach_aio_context(client->ioc);
f2149281
HR
795 }
796
797 exp->ctx = NULL;
798}
799
741cc431
HR
800static void nbd_eject_notifier(Notifier *n, void *data)
801{
802 NBDExport *exp = container_of(n, NBDExport, eject_notifier);
803 nbd_export_close(exp);
804}
805
cd7fca95 806NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
7423f417 807 uint16_t nbdflags, void (*close)(NBDExport *),
cd7fca95 808 bool writethrough, BlockBackend *on_eject_blk,
98f44bbe 809 Error **errp)
af49bbbe 810{
cd7fca95 811 BlockBackend *blk;
af49bbbe 812 NBDExport *exp = g_malloc0(sizeof(NBDExport));
8a7ce4f9 813 uint64_t perm;
d7086422 814 int ret;
cd7fca95 815
8a7ce4f9
KW
816 /* Don't allow resize while the NBD server is running, otherwise we don't
817 * care what happens with the node. */
818 perm = BLK_PERM_CONSISTENT_READ;
819 if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
820 perm |= BLK_PERM_WRITE;
821 }
822 blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
823 BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
d7086422
KW
824 ret = blk_insert_bs(blk, bs, errp);
825 if (ret < 0) {
826 goto fail;
827 }
cd7fca95
KW
828 blk_set_enable_write_cache(blk, !writethrough);
829
2c8d9f06 830 exp->refcount = 1;
4b9441f6 831 QTAILQ_INIT(&exp->clients);
aadf99a7 832 exp->blk = blk;
af49bbbe
PB
833 exp->dev_offset = dev_offset;
834 exp->nbdflags = nbdflags;
98f44bbe
HR
835 exp->size = size < 0 ? blk_getlength(blk) : size;
836 if (exp->size < 0) {
837 error_setg_errno(errp, -exp->size,
838 "Failed to determine the NBD export's length");
839 goto fail;
840 }
841 exp->size -= exp->size % BDRV_SECTOR_SIZE;
842
0ddf08db 843 exp->close = close;
aadf99a7 844 exp->ctx = blk_get_aio_context(blk);
aadf99a7 845 blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
741cc431 846
cd7fca95
KW
847 if (on_eject_blk) {
848 blk_ref(on_eject_blk);
849 exp->eject_notifier_blk = on_eject_blk;
850 exp->eject_notifier.notify = nbd_eject_notifier;
851 blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
852 }
741cc431 853
7ea2d269
AK
854 /*
855 * NBD exports are used for non-shared storage migration. Make sure
04c01a5c 856 * that BDRV_O_INACTIVE is cleared and the image is ready for write
7ea2d269
AK
857 * access since the export could be available before migration handover.
858 */
e5f3e12e 859 aio_context_acquire(exp->ctx);
aadf99a7 860 blk_invalidate_cache(blk, NULL);
e5f3e12e 861 aio_context_release(exp->ctx);
af49bbbe 862 return exp;
98f44bbe
HR
863
864fail:
cd7fca95 865 blk_unref(blk);
98f44bbe
HR
866 g_free(exp);
867 return NULL;
af49bbbe
PB
868}
869
ee0a19ec
PB
870NBDExport *nbd_export_find(const char *name)
871{
872 NBDExport *exp;
873 QTAILQ_FOREACH(exp, &exports, next) {
874 if (strcmp(name, exp->name) == 0) {
875 return exp;
876 }
877 }
878
879 return NULL;
880}
881
882void nbd_export_set_name(NBDExport *exp, const char *name)
883{
884 if (exp->name == name) {
885 return;
886 }
887
888 nbd_export_get(exp);
889 if (exp->name != NULL) {
890 g_free(exp->name);
891 exp->name = NULL;
892 QTAILQ_REMOVE(&exports, exp, next);
893 nbd_export_put(exp);
894 }
895 if (name != NULL) {
896 nbd_export_get(exp);
897 exp->name = g_strdup(name);
898 QTAILQ_INSERT_TAIL(&exports, exp, next);
899 }
900 nbd_export_put(exp);
901}
902
b1a75b33
EB
903void nbd_export_set_description(NBDExport *exp, const char *description)
904{
905 g_free(exp->description);
906 exp->description = g_strdup(description);
907}
908
af49bbbe
PB
909void nbd_export_close(NBDExport *exp)
910{
4b9441f6 911 NBDClient *client, *next;
2c8d9f06 912
4b9441f6
PB
913 nbd_export_get(exp);
914 QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
0c9390d9 915 client_close(client, true);
4b9441f6 916 }
125afda8 917 nbd_export_set_name(exp, NULL);
b1a75b33 918 nbd_export_set_description(exp, NULL);
4b9441f6 919 nbd_export_put(exp);
2c8d9f06
PB
920}
921
922void nbd_export_get(NBDExport *exp)
923{
924 assert(exp->refcount > 0);
925 exp->refcount++;
926}
927
928void nbd_export_put(NBDExport *exp)
929{
930 assert(exp->refcount > 0);
931 if (exp->refcount == 1) {
932 nbd_export_close(exp);
d9a73806
PB
933 }
934
2c8d9f06 935 if (--exp->refcount == 0) {
ee0a19ec 936 assert(exp->name == NULL);
b1a75b33 937 assert(exp->description == NULL);
ee0a19ec 938
0ddf08db
PB
939 if (exp->close) {
940 exp->close(exp);
941 }
942
d6268348 943 if (exp->blk) {
cd7fca95
KW
944 if (exp->eject_notifier_blk) {
945 notifier_remove(&exp->eject_notifier);
946 blk_unref(exp->eject_notifier_blk);
947 }
d6268348
WC
948 blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
949 blk_aio_detach, exp);
950 blk_unref(exp->blk);
951 exp->blk = NULL;
952 }
953
2c8d9f06
PB
954 g_free(exp);
955 }
af49bbbe
PB
956}
957
e140177d 958BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
125afda8 959{
aadf99a7 960 return exp->blk;
125afda8
PB
961}
962
ee0a19ec
PB
963void nbd_export_close_all(void)
964{
965 NBDExport *exp, *next;
966
967 QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
968 nbd_export_close(exp);
ee0a19ec
PB
969 }
970}
971
a0dc63a6 972static int nbd_co_send_reply(NBDRequestData *req, NBDReply *reply, int len)
22045592 973{
72deddc5 974 NBDClient *client = req->client;
572b97e7 975 int rc;
22045592 976
1c778ef7 977 g_assert(qemu_in_coroutine());
262db388 978 qemu_co_mutex_lock(&client->send_lock);
262db388
PB
979 client->send_coroutine = qemu_coroutine_self();
980
22045592 981 if (!len) {
1c778ef7 982 rc = nbd_send_reply(client->ioc, reply);
22045592 983 } else {
1c778ef7
DB
984 qio_channel_set_cork(client->ioc, true);
985 rc = nbd_send_reply(client->ioc, reply);
572b97e7
VSO
986 if (rc == 0) {
987 rc = nbd_write(client->ioc, req->data, len, NULL);
988 if (rc < 0) {
185b4338 989 rc = -EIO;
22045592
PB
990 }
991 }
1c778ef7 992 qio_channel_set_cork(client->ioc, false);
22045592 993 }
262db388
PB
994
995 client->send_coroutine = NULL;
262db388 996 qemu_co_mutex_unlock(&client->send_lock);
22045592
PB
997 return rc;
998}
999
29b6c3b3
EB
1000/* Collect a client request. Return 0 if request looks valid, -EAGAIN
1001 * to keep trying the collection, -EIO to drop connection right away,
1002 * and any other negative value to report an error to the client
1003 * (although the caller may still need to disconnect after reporting
1004 * the error). */
a0dc63a6 1005static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request)
a030b347 1006{
72deddc5 1007 NBDClient *client = req->client;
a0dc63a6 1008 int rc;
a030b347 1009
1c778ef7 1010 g_assert(qemu_in_coroutine());
ff82911c 1011 assert(client->recv_coroutine == qemu_coroutine_self());
1c778ef7 1012 rc = nbd_receive_request(client->ioc, request);
7fe7b68b
PB
1013 if (rc < 0) {
1014 if (rc != -EAGAIN) {
1015 rc = -EIO;
1016 }
a030b347
PB
1017 goto out;
1018 }
1019
29b6c3b3
EB
1020 TRACE("Decoding type");
1021
b626b51a 1022 if (request->type != NBD_CMD_WRITE) {
29b6c3b3
EB
1023 /* No payload, we are ready to read the next request. */
1024 req->complete = true;
1025 }
1026
b626b51a 1027 if (request->type == NBD_CMD_DISC) {
29b6c3b3
EB
1028 /* Special case: we're going to disconnect without a reply,
1029 * whether or not flags, from, or len are bogus */
1030 TRACE("Request type is DISCONNECT");
1031 rc = -EIO;
1032 goto out;
1033 }
1034
1035 /* Check for sanity in the parameters, part 1. Defer as many
1036 * checks as possible until after reading any NBD_CMD_WRITE
1037 * payload, so we can try and keep the connection alive. */
a030b347 1038 if ((request->from + request->len) < request->from) {
29b6c3b3 1039 LOG("integer overflow detected, you're probably being attacked");
a030b347
PB
1040 rc = -EINVAL;
1041 goto out;
1042 }
1043
b626b51a 1044 if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
eb38c3b6 1045 if (request->len > NBD_MAX_BUFFER_SIZE) {
2cb34749 1046 LOG("len (%" PRIu32" ) is larger than max len (%u)",
eb38c3b6
PB
1047 request->len, NBD_MAX_BUFFER_SIZE);
1048 rc = -EINVAL;
1049 goto out;
1050 }
1051
f1c17521
PB
1052 req->data = blk_try_blockalign(client->exp->blk, request->len);
1053 if (req->data == NULL) {
1054 rc = -ENOMEM;
1055 goto out;
1056 }
2d821488 1057 }
b626b51a 1058 if (request->type == NBD_CMD_WRITE) {
2cb34749 1059 TRACE("Reading %" PRIu32 " byte(s)", request->len);
a030b347 1060
d1fdf257 1061 if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) {
a030b347
PB
1062 LOG("reading from socket failed");
1063 rc = -EIO;
1064 goto out;
1065 }
29b6c3b3 1066 req->complete = true;
a030b347 1067 }
29b6c3b3
EB
1068
1069 /* Sanity checks, part 2. */
1070 if (request->from + request->len > client->exp->size) {
1071 LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
1072 ", Size: %" PRIu64, request->from, request->len,
1073 (uint64_t)client->exp->size);
b626b51a 1074 rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
29b6c3b3
EB
1075 goto out;
1076 }
1f4d6d18 1077 if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
b626b51a 1078 LOG("unsupported flags (got 0x%x)", request->flags);
5bee0f47
EB
1079 rc = -EINVAL;
1080 goto out;
ab7c548e 1081 }
1f4d6d18
EB
1082 if (request->type != NBD_CMD_WRITE_ZEROES &&
1083 (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
1084 LOG("unexpected flags (got 0x%x)", request->flags);
1085 rc = -EINVAL;
1086 goto out;
1087 }
29b6c3b3 1088
a030b347
PB
1089 rc = 0;
1090
1091out:
262db388 1092 client->recv_coroutine = NULL;
ff82911c 1093 nbd_client_receive_next_request(client);
958c717d 1094
a030b347
PB
1095 return rc;
1096}
1097
ff82911c
PB
1098/* Owns a reference to the NBDClient passed as opaque. */
1099static coroutine_fn void nbd_trip(void *opaque)
75818250 1100{
262db388 1101 NBDClient *client = opaque;
1743b515 1102 NBDExport *exp = client->exp;
315f78ab 1103 NBDRequestData *req;
ff82911c 1104 NBDRequest request = { 0 }; /* GCC thinks it can be used uninitialized */
ed2dd912 1105 NBDReply reply;
a0dc63a6 1106 int ret;
a0c30369 1107 int flags;
b2e3d87f
NT
1108
1109 TRACE("Reading request.");
ff2b68aa 1110 if (client->closing) {
ff82911c 1111 nbd_client_put(client);
ff2b68aa
PB
1112 return;
1113 }
b2e3d87f 1114
ff2b68aa 1115 req = nbd_request_get(client);
262db388 1116 ret = nbd_co_receive_request(req, &request);
7fe7b68b
PB
1117 if (ret == -EAGAIN) {
1118 goto done;
1119 }
a030b347 1120 if (ret == -EIO) {
d9a73806 1121 goto out;
a030b347 1122 }
b2e3d87f 1123
fae69416
PB
1124 reply.handle = request.handle;
1125 reply.error = 0;
1126
a030b347
PB
1127 if (ret < 0) {
1128 reply.error = -ret;
1129 goto error_reply;
b2e3d87f 1130 }
b2e3d87f 1131
d6268348
WC
1132 if (client->closing) {
1133 /*
1134 * The client may be closed when we are blocked in
1135 * nbd_co_receive_request()
1136 */
1137 goto done;
1138 }
1139
b626b51a 1140 switch (request.type) {
b2e3d87f
NT
1141 case NBD_CMD_READ:
1142 TRACE("Request type is READ");
1143
b626b51a
EB
1144 /* XXX: NBD Protocol only documents use of FUA with WRITE */
1145 if (request.flags & NBD_CMD_FLAG_FUA) {
aadf99a7 1146 ret = blk_co_flush(exp->blk);
e25ceb76
PB
1147 if (ret < 0) {
1148 LOG("flush failed");
1149 reply.error = -ret;
1150 goto error_reply;
1151 }
1152 }
1153
df7b97ff
EB
1154 ret = blk_pread(exp->blk, request.from + exp->dev_offset,
1155 req->data, request.len);
adcf6302 1156 if (ret < 0) {
b2e3d87f 1157 LOG("reading from file failed");
adcf6302 1158 reply.error = -ret;
fae69416 1159 goto error_reply;
b2e3d87f 1160 }
b2e3d87f 1161
2cb34749 1162 TRACE("Read %" PRIu32" byte(s)", request.len);
262db388 1163 if (nbd_co_send_reply(req, &reply, request.len) < 0)
d9a73806 1164 goto out;
b2e3d87f
NT
1165 break;
1166 case NBD_CMD_WRITE:
1167 TRACE("Request type is WRITE");
1168
af49bbbe 1169 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
b2e3d87f 1170 TRACE("Server is read-only, return error");
fae69416
PB
1171 reply.error = EROFS;
1172 goto error_reply;
1173 }
1174
1175 TRACE("Writing to device");
1176
a0c30369 1177 flags = 0;
b626b51a 1178 if (request.flags & NBD_CMD_FLAG_FUA) {
a0c30369
EB
1179 flags |= BDRV_REQ_FUA;
1180 }
df7b97ff 1181 ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
a0c30369 1182 req->data, request.len, flags);
fae69416
PB
1183 if (ret < 0) {
1184 LOG("writing to file failed");
1185 reply.error = -ret;
1186 goto error_reply;
1187 }
b2e3d87f 1188
1f4d6d18
EB
1189 if (nbd_co_send_reply(req, &reply, 0) < 0) {
1190 goto out;
1191 }
1192 break;
1193
1194 case NBD_CMD_WRITE_ZEROES:
1195 TRACE("Request type is WRITE_ZEROES");
1196
1197 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1198 TRACE("Server is read-only, return error");
1199 reply.error = EROFS;
1200 goto error_reply;
1201 }
1202
1203 TRACE("Writing to device");
1204
1205 flags = 0;
1206 if (request.flags & NBD_CMD_FLAG_FUA) {
1207 flags |= BDRV_REQ_FUA;
1208 }
1209 if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
1210 flags |= BDRV_REQ_MAY_UNMAP;
1211 }
1212 ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
1213 request.len, flags);
1214 if (ret < 0) {
1215 LOG("writing to file failed");
1216 reply.error = -ret;
1217 goto error_reply;
1218 }
1219
fc19f8a0 1220 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 1221 goto out;
fc19f8a0 1222 }
b2e3d87f 1223 break;
29b6c3b3 1224
b2e3d87f 1225 case NBD_CMD_DISC:
29b6c3b3
EB
1226 /* unreachable, thanks to special case in nbd_co_receive_request() */
1227 abort();
1228
1486d04a
PB
1229 case NBD_CMD_FLUSH:
1230 TRACE("Request type is FLUSH");
1231
aadf99a7 1232 ret = blk_co_flush(exp->blk);
1486d04a
PB
1233 if (ret < 0) {
1234 LOG("flush failed");
1235 reply.error = -ret;
1236 }
fc19f8a0 1237 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 1238 goto out;
fc19f8a0 1239 }
7a706633
PB
1240 break;
1241 case NBD_CMD_TRIM:
1242 TRACE("Request type is TRIM");
1c6c4bb7
EB
1243 ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
1244 request.len);
1245 if (ret < 0) {
1246 LOG("discard failed");
1247 reply.error = -ret;
7a706633 1248 }
fc19f8a0 1249 if (nbd_co_send_reply(req, &reply, 0) < 0) {
d9a73806 1250 goto out;
fc19f8a0 1251 }
1486d04a 1252 break;
b2e3d87f 1253 default:
2cb34749 1254 LOG("invalid request type (%" PRIu32 ") received", request.type);
8b2f0abf 1255 reply.error = EINVAL;
fae69416 1256 error_reply:
29b6c3b3
EB
1257 /* We must disconnect after NBD_CMD_WRITE if we did not
1258 * read the payload.
1259 */
1260 if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
d9a73806 1261 goto out;
fc19f8a0 1262 }
fae69416 1263 break;
b2e3d87f
NT
1264 }
1265
1266 TRACE("Request/Reply complete");
1267
7fe7b68b 1268done:
262db388 1269 nbd_request_put(req);
ff82911c 1270 nbd_client_put(client);
262db388
PB
1271 return;
1272
d9a73806 1273out:
72deddc5 1274 nbd_request_put(req);
0c9390d9 1275 client_close(client, true);
ff82911c 1276 nbd_client_put(client);
7a5ca864 1277}
af49bbbe 1278
ff82911c 1279static void nbd_client_receive_next_request(NBDClient *client)
958c717d 1280{
ff82911c
PB
1281 if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
1282 nbd_client_get(client);
1283 client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
1284 aio_co_schedule(client->exp->ctx, client->recv_coroutine);
958c717d
HR
1285 }
1286}
1287
1a6245a5
FZ
1288static coroutine_fn void nbd_co_client_start(void *opaque)
1289{
1290 NBDClientNewData *data = opaque;
1291 NBDClient *client = data->client;
1292 NBDExport *exp = client->exp;
1293
1294 if (exp) {
1295 nbd_export_get(exp);
df8ad9f1 1296 QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1a6245a5 1297 }
df8ad9f1
EB
1298 qemu_co_mutex_init(&client->send_lock);
1299
1a6245a5 1300 if (nbd_negotiate(data)) {
0c9390d9 1301 client_close(client, false);
1a6245a5
FZ
1302 goto out;
1303 }
ff82911c
PB
1304
1305 nbd_client_receive_next_request(client);
1306
1a6245a5
FZ
1307out:
1308 g_free(data);
1309}
1310
0c9390d9
EB
1311/*
1312 * Create a new client listener on the given export @exp, using the
1313 * given channel @sioc. Begin servicing it in a coroutine. When the
1314 * connection closes, call @close_fn with an indication of whether the
1315 * client completed negotiation.
1316 */
1c778ef7
DB
1317void nbd_client_new(NBDExport *exp,
1318 QIOChannelSocket *sioc,
f95910fe
DB
1319 QCryptoTLSCreds *tlscreds,
1320 const char *tlsaclname,
0c9390d9 1321 void (*close_fn)(NBDClient *, bool))
af49bbbe 1322{
1743b515 1323 NBDClient *client;
1a6245a5
FZ
1324 NBDClientNewData *data = g_new(NBDClientNewData, 1);
1325
1743b515
PB
1326 client = g_malloc0(sizeof(NBDClient));
1327 client->refcount = 1;
1328 client->exp = exp;
f95910fe
DB
1329 client->tlscreds = tlscreds;
1330 if (tlscreds) {
1331 object_ref(OBJECT(client->tlscreds));
1332 }
1333 client->tlsaclname = g_strdup(tlsaclname);
1c778ef7
DB
1334 client->sioc = sioc;
1335 object_ref(OBJECT(client->sioc));
1336 client->ioc = QIO_CHANNEL(sioc);
1337 object_ref(OBJECT(client->ioc));
0c9390d9 1338 client->close_fn = close_fn;
2c8d9f06 1339
1a6245a5 1340 data->client = client;
0b8b8753
PB
1341 data->co = qemu_coroutine_create(nbd_co_client_start, data);
1342 qemu_coroutine_enter(data->co);
af49bbbe 1343}