]> git.proxmox.com Git - qemu.git/blob - nbd.c
nbd: consistently check for <0 or >=0
[qemu.git] / nbd.c
1 /*
2 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
3 *
4 * Network Block Device
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "nbd.h"
20 #include "block.h"
21 #include "block_int.h"
22
23 #include "qemu-coroutine.h"
24
25 #include <errno.h>
26 #include <string.h>
27 #ifndef _WIN32
28 #include <sys/ioctl.h>
29 #endif
30 #if defined(__sun__) || defined(__HAIKU__)
31 #include <sys/ioccom.h>
32 #endif
33 #include <ctype.h>
34 #include <inttypes.h>
35
36 #ifdef __linux__
37 #include <linux/fs.h>
38 #endif
39
40 #include "qemu_socket.h"
41 #include "qemu-queue.h"
42
43 //#define DEBUG_NBD
44
45 #ifdef DEBUG_NBD
46 #define TRACE(msg, ...) do { \
47 LOG(msg, ## __VA_ARGS__); \
48 } while(0)
49 #else
50 #define TRACE(msg, ...) \
51 do { } while (0)
52 #endif
53
54 #define LOG(msg, ...) do { \
55 fprintf(stderr, "%s:%s():L%d: " msg "\n", \
56 __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
57 } while(0)
58
59 /* This is all part of the "official" NBD API */
60
61 #define NBD_REPLY_SIZE (4 + 4 + 8)
62 #define NBD_REQUEST_MAGIC 0x25609513
63 #define NBD_REPLY_MAGIC 0x67446698
64
65 #define NBD_SET_SOCK _IO(0xab, 0)
66 #define NBD_SET_BLKSIZE _IO(0xab, 1)
67 #define NBD_SET_SIZE _IO(0xab, 2)
68 #define NBD_DO_IT _IO(0xab, 3)
69 #define NBD_CLEAR_SOCK _IO(0xab, 4)
70 #define NBD_CLEAR_QUE _IO(0xab, 5)
71 #define NBD_PRINT_DEBUG _IO(0xab, 6)
72 #define NBD_SET_SIZE_BLOCKS _IO(0xab, 7)
73 #define NBD_DISCONNECT _IO(0xab, 8)
74 #define NBD_SET_TIMEOUT _IO(0xab, 9)
75 #define NBD_SET_FLAGS _IO(0xab, 10)
76
77 #define NBD_OPT_EXPORT_NAME (1 << 0)
78
79 /* That's all folks */
80
81 #define read_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, true)
82 #define write_sync(fd, buffer, size) nbd_wr_sync(fd, buffer, size, false)
83
84 size_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
85 {
86 size_t offset = 0;
87
88 if (qemu_in_coroutine()) {
89 if (do_read) {
90 return qemu_co_recv(fd, buffer, size);
91 } else {
92 return qemu_co_send(fd, buffer, size);
93 }
94 }
95
96 while (offset < size) {
97 ssize_t len;
98
99 if (do_read) {
100 len = qemu_recv(fd, buffer + offset, size - offset, 0);
101 } else {
102 len = send(fd, buffer + offset, size - offset, 0);
103 }
104
105 if (len < 0) {
106 errno = socket_error();
107
108 /* recoverable error */
109 if (errno == EINTR || errno == EAGAIN) {
110 continue;
111 }
112
113 /* unrecoverable error */
114 return 0;
115 }
116
117 /* eof */
118 if (len == 0) {
119 break;
120 }
121
122 offset += len;
123 }
124
125 return offset;
126 }
127
128 static void combine_addr(char *buf, size_t len, const char* address,
129 uint16_t port)
130 {
131 /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
132 if (strstr(address, ":")) {
133 snprintf(buf, len, "[%s]:%u", address, port);
134 } else {
135 snprintf(buf, len, "%s:%u", address, port);
136 }
137 }
138
139 int tcp_socket_outgoing(const char *address, uint16_t port)
140 {
141 char address_and_port[128];
142 combine_addr(address_and_port, 128, address, port);
143 return tcp_socket_outgoing_spec(address_and_port);
144 }
145
146 int tcp_socket_outgoing_spec(const char *address_and_port)
147 {
148 return inet_connect(address_and_port, SOCK_STREAM);
149 }
150
151 int tcp_socket_incoming(const char *address, uint16_t port)
152 {
153 char address_and_port[128];
154 combine_addr(address_and_port, 128, address, port);
155 return tcp_socket_incoming_spec(address_and_port);
156 }
157
158 int tcp_socket_incoming_spec(const char *address_and_port)
159 {
160 char *ostr = NULL;
161 int olen = 0;
162 return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0);
163 }
164
165 int unix_socket_incoming(const char *path)
166 {
167 char *ostr = NULL;
168 int olen = 0;
169
170 return unix_listen(path, ostr, olen);
171 }
172
173 int unix_socket_outgoing(const char *path)
174 {
175 return unix_connect(path);
176 }
177
178 /* Basic flow
179
180 Server Client
181
182 Negotiate
183 Request
184 Response
185 Request
186 Response
187 ...
188 ...
189 Request (type == 2)
190 */
191
192 static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
193 {
194 char buf[8 + 8 + 8 + 128];
195
196 /* Negotiate
197 [ 0 .. 7] passwd ("NBDMAGIC")
198 [ 8 .. 15] magic (0x00420281861253)
199 [16 .. 23] size
200 [24 .. 27] flags
201 [28 .. 151] reserved (0)
202 */
203
204 TRACE("Beginning negotiation.");
205 memcpy(buf, "NBDMAGIC", 8);
206 cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
207 cpu_to_be64w((uint64_t*)(buf + 16), size);
208 cpu_to_be32w((uint32_t*)(buf + 24),
209 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
210 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
211 memset(buf + 28, 0, 124);
212
213 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
214 LOG("write failed");
215 errno = EINVAL;
216 return -1;
217 }
218
219 TRACE("Negotiation succeeded.");
220
221 return 0;
222 }
223
224 int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
225 off_t *size, size_t *blocksize)
226 {
227 char buf[256];
228 uint64_t magic, s;
229 uint16_t tmp;
230
231 TRACE("Receiving negotiation.");
232
233 if (read_sync(csock, buf, 8) != 8) {
234 LOG("read failed");
235 errno = EINVAL;
236 return -1;
237 }
238
239 buf[8] = '\0';
240 if (strlen(buf) == 0) {
241 LOG("server connection closed");
242 errno = EINVAL;
243 return -1;
244 }
245
246 TRACE("Magic is %c%c%c%c%c%c%c%c",
247 qemu_isprint(buf[0]) ? buf[0] : '.',
248 qemu_isprint(buf[1]) ? buf[1] : '.',
249 qemu_isprint(buf[2]) ? buf[2] : '.',
250 qemu_isprint(buf[3]) ? buf[3] : '.',
251 qemu_isprint(buf[4]) ? buf[4] : '.',
252 qemu_isprint(buf[5]) ? buf[5] : '.',
253 qemu_isprint(buf[6]) ? buf[6] : '.',
254 qemu_isprint(buf[7]) ? buf[7] : '.');
255
256 if (memcmp(buf, "NBDMAGIC", 8) != 0) {
257 LOG("Invalid magic received");
258 errno = EINVAL;
259 return -1;
260 }
261
262 if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
263 LOG("read failed");
264 errno = EINVAL;
265 return -1;
266 }
267 magic = be64_to_cpu(magic);
268 TRACE("Magic is 0x%" PRIx64, magic);
269
270 if (name) {
271 uint32_t reserved = 0;
272 uint32_t opt;
273 uint32_t namesize;
274
275 TRACE("Checking magic (opts_magic)");
276 if (magic != 0x49484156454F5054LL) {
277 LOG("Bad magic received");
278 errno = EINVAL;
279 return -1;
280 }
281 if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
282 LOG("flags read failed");
283 errno = EINVAL;
284 return -1;
285 }
286 *flags = be16_to_cpu(tmp) << 16;
287 /* reserved for future use */
288 if (write_sync(csock, &reserved, sizeof(reserved)) !=
289 sizeof(reserved)) {
290 LOG("write failed (reserved)");
291 errno = EINVAL;
292 return -1;
293 }
294 /* write the export name */
295 magic = cpu_to_be64(magic);
296 if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
297 LOG("write failed (magic)");
298 errno = EINVAL;
299 return -1;
300 }
301 opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
302 if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
303 LOG("write failed (opt)");
304 errno = EINVAL;
305 return -1;
306 }
307 namesize = cpu_to_be32(strlen(name));
308 if (write_sync(csock, &namesize, sizeof(namesize)) !=
309 sizeof(namesize)) {
310 LOG("write failed (namesize)");
311 errno = EINVAL;
312 return -1;
313 }
314 if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
315 LOG("write failed (name)");
316 errno = EINVAL;
317 return -1;
318 }
319 } else {
320 TRACE("Checking magic (cli_magic)");
321
322 if (magic != 0x00420281861253LL) {
323 LOG("Bad magic received");
324 errno = EINVAL;
325 return -1;
326 }
327 }
328
329 if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
330 LOG("read failed");
331 errno = EINVAL;
332 return -1;
333 }
334 *size = be64_to_cpu(s);
335 *blocksize = 1024;
336 TRACE("Size is %" PRIu64, *size);
337
338 if (!name) {
339 if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
340 LOG("read failed (flags)");
341 errno = EINVAL;
342 return -1;
343 }
344 *flags = be32_to_cpup(flags);
345 } else {
346 if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
347 LOG("read failed (tmp)");
348 errno = EINVAL;
349 return -1;
350 }
351 *flags |= be32_to_cpu(tmp);
352 }
353 if (read_sync(csock, &buf, 124) != 124) {
354 LOG("read failed (buf)");
355 errno = EINVAL;
356 return -1;
357 }
358 return 0;
359 }
360
361 #ifdef __linux__
362 int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
363 {
364 TRACE("Setting NBD socket");
365
366 if (ioctl(fd, NBD_SET_SOCK, csock) < 0) {
367 int serrno = errno;
368 LOG("Failed to set NBD socket");
369 errno = serrno;
370 return -1;
371 }
372
373 TRACE("Setting block size to %lu", (unsigned long)blocksize);
374
375 if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) < 0) {
376 int serrno = errno;
377 LOG("Failed setting NBD block size");
378 errno = serrno;
379 return -1;
380 }
381
382 TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
383
384 if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) < 0) {
385 int serrno = errno;
386 LOG("Failed setting size (in blocks)");
387 errno = serrno;
388 return -1;
389 }
390
391 if (flags & NBD_FLAG_READ_ONLY) {
392 int read_only = 1;
393 TRACE("Setting readonly attribute");
394
395 if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
396 int serrno = errno;
397 LOG("Failed setting read-only attribute");
398 errno = serrno;
399 return -1;
400 }
401 }
402
403 if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
404 && errno != ENOTTY) {
405 int serrno = errno;
406 LOG("Failed setting flags");
407 errno = serrno;
408 return -1;
409 }
410
411 TRACE("Negotiation ended");
412
413 return 0;
414 }
415
416 int nbd_disconnect(int fd)
417 {
418 ioctl(fd, NBD_CLEAR_QUE);
419 ioctl(fd, NBD_DISCONNECT);
420 ioctl(fd, NBD_CLEAR_SOCK);
421 return 0;
422 }
423
424 int nbd_client(int fd)
425 {
426 int ret;
427 int serrno;
428
429 TRACE("Doing NBD loop");
430
431 ret = ioctl(fd, NBD_DO_IT);
432 if (ret < 0 && errno == EPIPE) {
433 /* NBD_DO_IT normally returns EPIPE when someone has disconnected
434 * the socket via NBD_DISCONNECT. We do not want to return 1 in
435 * that case.
436 */
437 ret = 0;
438 }
439 serrno = errno;
440
441 TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
442
443 TRACE("Clearing NBD queue");
444 ioctl(fd, NBD_CLEAR_QUE);
445
446 TRACE("Clearing NBD socket");
447 ioctl(fd, NBD_CLEAR_SOCK);
448
449 errno = serrno;
450 return ret;
451 }
452 #else
453 int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
454 {
455 errno = ENOTSUP;
456 return -1;
457 }
458
459 int nbd_disconnect(int fd)
460 {
461 errno = ENOTSUP;
462 return -1;
463 }
464
465 int nbd_client(int fd)
466 {
467 errno = ENOTSUP;
468 return -1;
469 }
470 #endif
471
472 ssize_t nbd_send_request(int csock, struct nbd_request *request)
473 {
474 uint8_t buf[4 + 4 + 8 + 8 + 4];
475
476 cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
477 cpu_to_be32w((uint32_t*)(buf + 4), request->type);
478 cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
479 cpu_to_be64w((uint64_t*)(buf + 16), request->from);
480 cpu_to_be32w((uint32_t*)(buf + 24), request->len);
481
482 TRACE("Sending request to client: "
483 "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
484 request->from, request->len, request->handle, request->type);
485
486 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
487 LOG("writing to socket failed");
488 errno = EINVAL;
489 return -1;
490 }
491 return 0;
492 }
493
494 static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
495 {
496 uint8_t buf[4 + 4 + 8 + 8 + 4];
497 uint32_t magic;
498
499 if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
500 LOG("read failed");
501 errno = EINVAL;
502 return -1;
503 }
504
505 /* Request
506 [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
507 [ 4 .. 7] type (0 == READ, 1 == WRITE)
508 [ 8 .. 15] handle
509 [16 .. 23] from
510 [24 .. 27] len
511 */
512
513 magic = be32_to_cpup((uint32_t*)buf);
514 request->type = be32_to_cpup((uint32_t*)(buf + 4));
515 request->handle = be64_to_cpup((uint64_t*)(buf + 8));
516 request->from = be64_to_cpup((uint64_t*)(buf + 16));
517 request->len = be32_to_cpup((uint32_t*)(buf + 24));
518
519 TRACE("Got request: "
520 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
521 magic, request->type, request->from, request->len);
522
523 if (magic != NBD_REQUEST_MAGIC) {
524 LOG("invalid magic (got 0x%x)", magic);
525 errno = EINVAL;
526 return -1;
527 }
528 return 0;
529 }
530
531 ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply)
532 {
533 uint8_t buf[NBD_REPLY_SIZE];
534 uint32_t magic;
535
536 if (read_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
537 LOG("read failed");
538 errno = EINVAL;
539 return -1;
540 }
541
542 /* Reply
543 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
544 [ 4 .. 7] error (0 == no error)
545 [ 7 .. 15] handle
546 */
547
548 magic = be32_to_cpup((uint32_t*)buf);
549 reply->error = be32_to_cpup((uint32_t*)(buf + 4));
550 reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
551
552 TRACE("Got reply: "
553 "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
554 magic, reply->error, reply->handle);
555
556 if (magic != NBD_REPLY_MAGIC) {
557 LOG("invalid magic (got 0x%x)", magic);
558 errno = EINVAL;
559 return -1;
560 }
561 return 0;
562 }
563
564 static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
565 {
566 uint8_t buf[4 + 4 + 8];
567
568 /* Reply
569 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
570 [ 4 .. 7] error (0 == no error)
571 [ 7 .. 15] handle
572 */
573 cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
574 cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
575 cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
576
577 TRACE("Sending response to client");
578
579 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
580 LOG("writing to socket failed");
581 errno = EINVAL;
582 return -1;
583 }
584 return 0;
585 }
586
587 #define MAX_NBD_REQUESTS 16
588
589 typedef struct NBDRequest NBDRequest;
590
591 struct NBDRequest {
592 QSIMPLEQ_ENTRY(NBDRequest) entry;
593 NBDClient *client;
594 uint8_t *data;
595 };
596
597 struct NBDExport {
598 BlockDriverState *bs;
599 off_t dev_offset;
600 off_t size;
601 uint32_t nbdflags;
602 QSIMPLEQ_HEAD(, NBDRequest) requests;
603 };
604
605 struct NBDClient {
606 int refcount;
607 void (*close)(NBDClient *client);
608
609 NBDExport *exp;
610 int sock;
611
612 Coroutine *recv_coroutine;
613
614 CoMutex send_lock;
615 Coroutine *send_coroutine;
616
617 int nb_requests;
618 };
619
620 static void nbd_client_get(NBDClient *client)
621 {
622 client->refcount++;
623 }
624
625 static void nbd_client_put(NBDClient *client)
626 {
627 if (--client->refcount == 0) {
628 g_free(client);
629 }
630 }
631
632 static void nbd_client_close(NBDClient *client)
633 {
634 qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
635 close(client->sock);
636 client->sock = -1;
637 if (client->close) {
638 client->close(client);
639 }
640 nbd_client_put(client);
641 }
642
643 static NBDRequest *nbd_request_get(NBDClient *client)
644 {
645 NBDRequest *req;
646 NBDExport *exp = client->exp;
647
648 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
649 client->nb_requests++;
650
651 if (QSIMPLEQ_EMPTY(&exp->requests)) {
652 req = g_malloc0(sizeof(NBDRequest));
653 req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
654 } else {
655 req = QSIMPLEQ_FIRST(&exp->requests);
656 QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
657 }
658 nbd_client_get(client);
659 req->client = client;
660 return req;
661 }
662
663 static void nbd_request_put(NBDRequest *req)
664 {
665 NBDClient *client = req->client;
666 QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
667 if (client->nb_requests-- == MAX_NBD_REQUESTS) {
668 qemu_notify_event();
669 }
670 nbd_client_put(client);
671 }
672
673 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
674 off_t size, uint32_t nbdflags)
675 {
676 NBDExport *exp = g_malloc0(sizeof(NBDExport));
677 QSIMPLEQ_INIT(&exp->requests);
678 exp->bs = bs;
679 exp->dev_offset = dev_offset;
680 exp->nbdflags = nbdflags;
681 exp->size = size == -1 ? exp->bs->total_sectors * 512 : size;
682 return exp;
683 }
684
685 void nbd_export_close(NBDExport *exp)
686 {
687 while (!QSIMPLEQ_EMPTY(&exp->requests)) {
688 NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
689 QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
690 qemu_vfree(first->data);
691 g_free(first);
692 }
693
694 bdrv_close(exp->bs);
695 g_free(exp);
696 }
697
698 static int nbd_can_read(void *opaque);
699 static void nbd_read(void *opaque);
700 static void nbd_restart_write(void *opaque);
701
702 static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
703 int len)
704 {
705 NBDClient *client = req->client;
706 int csock = client->sock;
707 ssize_t rc, ret;
708
709 qemu_co_mutex_lock(&client->send_lock);
710 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
711 nbd_restart_write, client);
712 client->send_coroutine = qemu_coroutine_self();
713
714 if (!len) {
715 rc = nbd_send_reply(csock, reply);
716 if (rc < 0) {
717 rc = -errno;
718 }
719 } else {
720 socket_set_cork(csock, 1);
721 rc = nbd_send_reply(csock, reply);
722 if (rc >= 0) {
723 ret = qemu_co_send(csock, req->data, len);
724 if (ret != len) {
725 errno = EIO;
726 rc = -1;
727 }
728 }
729 if (rc < 0) {
730 rc = -errno;
731 }
732 socket_set_cork(csock, 0);
733 }
734
735 client->send_coroutine = NULL;
736 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
737 qemu_co_mutex_unlock(&client->send_lock);
738 return rc;
739 }
740
741 static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
742 {
743 NBDClient *client = req->client;
744 int csock = client->sock;
745 ssize_t rc;
746
747 client->recv_coroutine = qemu_coroutine_self();
748 if (nbd_receive_request(csock, request) < 0) {
749 rc = -EIO;
750 goto out;
751 }
752
753 if (request->len > NBD_BUFFER_SIZE) {
754 LOG("len (%u) is larger than max len (%u)",
755 request->len, NBD_BUFFER_SIZE);
756 rc = -EINVAL;
757 goto out;
758 }
759
760 if ((request->from + request->len) < request->from) {
761 LOG("integer overflow detected! "
762 "you're probably being attacked");
763 rc = -EINVAL;
764 goto out;
765 }
766
767 TRACE("Decoding type");
768
769 if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
770 TRACE("Reading %u byte(s)", request->len);
771
772 if (qemu_co_recv(csock, req->data, request->len) != request->len) {
773 LOG("reading from socket failed");
774 rc = -EIO;
775 goto out;
776 }
777 }
778 rc = 0;
779
780 out:
781 client->recv_coroutine = NULL;
782 return rc;
783 }
784
785 static void nbd_trip(void *opaque)
786 {
787 NBDClient *client = opaque;
788 NBDRequest *req = nbd_request_get(client);
789 NBDExport *exp = client->exp;
790 struct nbd_request request;
791 struct nbd_reply reply;
792 ssize_t ret;
793
794 TRACE("Reading request.");
795
796 ret = nbd_co_receive_request(req, &request);
797 if (ret == -EIO) {
798 goto out;
799 }
800
801 reply.handle = request.handle;
802 reply.error = 0;
803
804 if (ret < 0) {
805 reply.error = -ret;
806 goto error_reply;
807 }
808
809 if ((request.from + request.len) > exp->size) {
810 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
811 ", Offset: %" PRIu64 "\n",
812 request.from, request.len,
813 (uint64_t)exp->size, (uint64_t)exp->dev_offset);
814 LOG("requested operation past EOF--bad client?");
815 goto invalid_request;
816 }
817
818 switch (request.type & NBD_CMD_MASK_COMMAND) {
819 case NBD_CMD_READ:
820 TRACE("Request type is READ");
821
822 ret = bdrv_read(exp->bs, (request.from + exp->dev_offset) / 512,
823 req->data, request.len / 512);
824 if (ret < 0) {
825 LOG("reading from file failed");
826 reply.error = -ret;
827 goto error_reply;
828 }
829
830 TRACE("Read %u byte(s)", request.len);
831 if (nbd_co_send_reply(req, &reply, request.len) < 0)
832 goto out;
833 break;
834 case NBD_CMD_WRITE:
835 TRACE("Request type is WRITE");
836
837 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
838 TRACE("Server is read-only, return error");
839 reply.error = EROFS;
840 goto error_reply;
841 }
842
843 TRACE("Writing to device");
844
845 ret = bdrv_write(exp->bs, (request.from + exp->dev_offset) / 512,
846 req->data, request.len / 512);
847 if (ret < 0) {
848 LOG("writing to file failed");
849 reply.error = -ret;
850 goto error_reply;
851 }
852
853 if (request.type & NBD_CMD_FLAG_FUA) {
854 ret = bdrv_co_flush(exp->bs);
855 if (ret < 0) {
856 LOG("flush failed");
857 reply.error = -ret;
858 goto error_reply;
859 }
860 }
861
862 if (nbd_co_send_reply(req, &reply, 0) < 0) {
863 goto out;
864 }
865 break;
866 case NBD_CMD_DISC:
867 TRACE("Request type is DISCONNECT");
868 errno = 0;
869 goto out;
870 case NBD_CMD_FLUSH:
871 TRACE("Request type is FLUSH");
872
873 ret = bdrv_co_flush(exp->bs);
874 if (ret < 0) {
875 LOG("flush failed");
876 reply.error = -ret;
877 }
878 if (nbd_co_send_reply(req, &reply, 0) < 0) {
879 goto out;
880 }
881 break;
882 case NBD_CMD_TRIM:
883 TRACE("Request type is TRIM");
884 ret = bdrv_co_discard(exp->bs, (request.from + exp->dev_offset) / 512,
885 request.len / 512);
886 if (ret < 0) {
887 LOG("discard failed");
888 reply.error = -ret;
889 }
890 if (nbd_co_send_reply(req, &reply, 0) < 0) {
891 goto out;
892 }
893 break;
894 default:
895 LOG("invalid request type (%u) received", request.type);
896 invalid_request:
897 reply.error = -EINVAL;
898 error_reply:
899 if (nbd_co_send_reply(req, &reply, 0) < 0) {
900 goto out;
901 }
902 break;
903 }
904
905 TRACE("Request/Reply complete");
906
907 nbd_request_put(req);
908 return;
909
910 out:
911 nbd_request_put(req);
912 nbd_client_close(client);
913 }
914
915 static int nbd_can_read(void *opaque)
916 {
917 NBDClient *client = opaque;
918
919 return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
920 }
921
922 static void nbd_read(void *opaque)
923 {
924 NBDClient *client = opaque;
925
926 if (client->recv_coroutine) {
927 qemu_coroutine_enter(client->recv_coroutine, NULL);
928 } else {
929 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
930 }
931 }
932
933 static void nbd_restart_write(void *opaque)
934 {
935 NBDClient *client = opaque;
936
937 qemu_coroutine_enter(client->send_coroutine, NULL);
938 }
939
940 NBDClient *nbd_client_new(NBDExport *exp, int csock,
941 void (*close)(NBDClient *))
942 {
943 NBDClient *client;
944 if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) {
945 return NULL;
946 }
947 client = g_malloc0(sizeof(NBDClient));
948 client->refcount = 1;
949 client->exp = exp;
950 client->sock = csock;
951 client->close = close;
952 qemu_co_mutex_init(&client->send_lock);
953 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
954 return client;
955 }