blob: dc0adf90ed1e77dff41ea1b221cb63b967d0c973 [file] [log] [blame]
ths75818252008-07-03 13:41:03 +00001/*
bellard7a5ca862008-05-27 21:13:40 +00002 * Copyright (C) 2005 Anthony Liguori <anthony@codemonkey.ws>
3 *
4 * Network Block Device
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; under version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
Blue Swirl8167ee82009-07-16 20:47:01 +000016 * along with this program; if not, see <http://www.gnu.org/licenses/>.
ths75818252008-07-03 13:41:03 +000017 */
bellard7a5ca862008-05-27 21:13:40 +000018
19#include "nbd.h"
Markus Armbrusterab359cd2011-09-06 18:58:58 +020020#include "block.h"
bellard7a5ca862008-05-27 21:13:40 +000021
Paolo Bonzini262db382011-09-19 15:19:27 +020022#include "qemu-coroutine.h"
23
bellard7a5ca862008-05-27 21:13:40 +000024#include <errno.h>
25#include <string.h>
aliguori03ff3ca2008-09-15 15:51:35 +000026#ifndef _WIN32
bellard7a5ca862008-05-27 21:13:40 +000027#include <sys/ioctl.h>
aliguori03ff3ca2008-09-15 15:51:35 +000028#endif
Andreas Färber5dc2eec2010-09-20 00:50:46 +020029#if defined(__sun__) || defined(__HAIKU__)
aliguori7e00eb92008-08-02 01:57:02 +000030#include <sys/ioccom.h>
31#endif
bellard7a5ca862008-05-27 21:13:40 +000032#include <ctype.h>
33#include <inttypes.h>
bellard7a5ca862008-05-27 21:13:40 +000034
Paolo Bonzinib90fb4b2011-09-08 17:24:54 +020035#ifdef __linux__
36#include <linux/fs.h>
37#endif
38
aliguori03ff3ca2008-09-15 15:51:35 +000039#include "qemu_socket.h"
Paolo Bonzinid9a73802011-09-19 14:18:33 +020040#include "qemu-queue.h"
ths75818252008-07-03 13:41:03 +000041
aliguori03ff3ca2008-09-15 15:51:35 +000042//#define DEBUG_NBD
43
44#ifdef DEBUG_NBD
ths75818252008-07-03 13:41:03 +000045#define TRACE(msg, ...) do { \
aliguori03ff3ca2008-09-15 15:51:35 +000046 LOG(msg, ## __VA_ARGS__); \
ths75818252008-07-03 13:41:03 +000047} while(0)
aliguori03ff3ca2008-09-15 15:51:35 +000048#else
49#define TRACE(msg, ...) \
50 do { } while (0)
51#endif
bellard7a5ca862008-05-27 21:13:40 +000052
53#define LOG(msg, ...) do { \
54 fprintf(stderr, "%s:%s():L%d: " msg "\n", \
55 __FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
56} while(0)
57
bellard7a5ca862008-05-27 21:13:40 +000058/* This is all part of the "official" NBD API */
59
Nick Thomasb2e3d872011-02-22 15:44:51 +000060#define NBD_REPLY_SIZE (4 + 4 + 8)
bellard7a5ca862008-05-27 21:13:40 +000061#define NBD_REQUEST_MAGIC 0x25609513
62#define NBD_REPLY_MAGIC 0x67446698
63
64#define NBD_SET_SOCK _IO(0xab, 0)
65#define NBD_SET_BLKSIZE _IO(0xab, 1)
66#define NBD_SET_SIZE _IO(0xab, 2)
67#define NBD_DO_IT _IO(0xab, 3)
68#define NBD_CLEAR_SOCK _IO(0xab, 4)
69#define NBD_CLEAR_QUE _IO(0xab, 5)
Nick Thomasb2e3d872011-02-22 15:44:51 +000070#define NBD_PRINT_DEBUG _IO(0xab, 6)
71#define NBD_SET_SIZE_BLOCKS _IO(0xab, 7)
bellard7a5ca862008-05-27 21:13:40 +000072#define NBD_DISCONNECT _IO(0xab, 8)
Paolo Bonzinibbb74ed2011-09-08 17:24:55 +020073#define NBD_SET_TIMEOUT _IO(0xab, 9)
74#define NBD_SET_FLAGS _IO(0xab, 10)
bellard7a5ca862008-05-27 21:13:40 +000075
Nick Thomasb2e3d872011-02-22 15:44:51 +000076#define NBD_OPT_EXPORT_NAME (1 << 0)
Laurent Vivier1d45f8b2010-08-25 22:48:33 +020077
bellard7a5ca862008-05-27 21:13:40 +000078/* That's all folks */
79
Paolo Bonzini185b4332012-03-05 08:56:10 +010080ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
bellard7a5ca862008-05-27 21:13:40 +000081{
82 size_t offset = 0;
Paolo Bonzini185b4332012-03-05 08:56:10 +010083 int err;
bellard7a5ca862008-05-27 21:13:40 +000084
Paolo Bonziniae255e52011-09-08 14:28:59 +020085 if (qemu_in_coroutine()) {
86 if (do_read) {
87 return qemu_co_recv(fd, buffer, size);
88 } else {
89 return qemu_co_send(fd, buffer, size);
90 }
91 }
92
bellard7a5ca862008-05-27 21:13:40 +000093 while (offset < size) {
94 ssize_t len;
95
96 if (do_read) {
Blue Swirl00aa0042011-07-23 20:04:29 +000097 len = qemu_recv(fd, buffer + offset, size - offset, 0);
bellard7a5ca862008-05-27 21:13:40 +000098 } else {
aliguori03ff3ca2008-09-15 15:51:35 +000099 len = send(fd, buffer + offset, size - offset, 0);
bellard7a5ca862008-05-27 21:13:40 +0000100 }
101
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100102 if (len < 0) {
Paolo Bonzini185b4332012-03-05 08:56:10 +0100103 err = socket_error();
aliguori03ff3ca2008-09-15 15:51:35 +0000104
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100105 /* recoverable error */
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100106 if (err == EINTR || (offset > 0 && err == EAGAIN)) {
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100107 continue;
108 }
109
110 /* unrecoverable error */
Paolo Bonzini185b4332012-03-05 08:56:10 +0100111 return -err;
bellard7a5ca862008-05-27 21:13:40 +0000112 }
113
114 /* eof */
115 if (len == 0) {
116 break;
117 }
118
bellard7a5ca862008-05-27 21:13:40 +0000119 offset += len;
120 }
121
122 return offset;
123}
124
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100125static ssize_t read_sync(int fd, void *buffer, size_t size)
126{
127 /* Sockets are kept in blocking mode in the negotiation phase. After
128 * that, a non-readable socket simply means that another thread stole
129 * our request/reply. Synchronization is done with recv_coroutine, so
130 * that this is coroutine-safe.
131 */
132 return nbd_wr_sync(fd, buffer, size, true);
133}
134
135static ssize_t write_sync(int fd, void *buffer, size_t size)
136{
137 int ret;
138 do {
139 /* For writes, we do expect the socket to be writable. */
140 ret = nbd_wr_sync(fd, buffer, size, false);
141 } while (ret == -EAGAIN);
142 return ret;
143}
144
Nick Thomasc12504c2011-02-22 15:44:53 +0000145static void combine_addr(char *buf, size_t len, const char* address,
146 uint16_t port)
147{
148 /* If the address-part contains a colon, it's an IPv6 IP so needs [] */
149 if (strstr(address, ":")) {
150 snprintf(buf, len, "[%s]:%u", address, port);
151 } else {
152 snprintf(buf, len, "%s:%u", address, port);
153 }
154}
155
ths75818252008-07-03 13:41:03 +0000156int tcp_socket_outgoing(const char *address, uint16_t port)
bellard7a5ca862008-05-27 21:13:40 +0000157{
Nick Thomasc12504c2011-02-22 15:44:53 +0000158 char address_and_port[128];
159 combine_addr(address_and_port, 128, address, port);
160 return tcp_socket_outgoing_spec(address_and_port);
161}
bellard7a5ca862008-05-27 21:13:40 +0000162
Nick Thomasc12504c2011-02-22 15:44:53 +0000163int tcp_socket_outgoing_spec(const char *address_and_port)
164{
Amos Konga6ba35b2012-05-11 00:28:16 +0800165 return inet_connect(address_and_port, true, NULL);
bellard7a5ca862008-05-27 21:13:40 +0000166}
167
168int tcp_socket_incoming(const char *address, uint16_t port)
169{
Nick Thomasc12504c2011-02-22 15:44:53 +0000170 char address_and_port[128];
171 combine_addr(address_and_port, 128, address, port);
172 return tcp_socket_incoming_spec(address_and_port);
bellard7a5ca862008-05-27 21:13:40 +0000173}
174
Nick Thomasc12504c2011-02-22 15:44:53 +0000175int tcp_socket_incoming_spec(const char *address_and_port)
176{
177 char *ostr = NULL;
178 int olen = 0;
Amos Kong029409e2012-05-11 00:28:26 +0800179 return inet_listen(address_and_port, ostr, olen, SOCK_STREAM, 0, NULL);
Nick Thomasc12504c2011-02-22 15:44:53 +0000180}
181
thscd831bd2008-07-03 10:23:51 +0000182int unix_socket_incoming(const char *path)
183{
Nick Thomasc12504c2011-02-22 15:44:53 +0000184 char *ostr = NULL;
185 int olen = 0;
thscd831bd2008-07-03 10:23:51 +0000186
Nick Thomasc12504c2011-02-22 15:44:53 +0000187 return unix_listen(path, ostr, olen);
thscd831bd2008-07-03 10:23:51 +0000188}
189
190int unix_socket_outgoing(const char *path)
191{
Nick Thomasc12504c2011-02-22 15:44:53 +0000192 return unix_connect(path);
thscd831bd2008-07-03 10:23:51 +0000193}
thscd831bd2008-07-03 10:23:51 +0000194
bellard7a5ca862008-05-27 21:13:40 +0000195/* Basic flow
196
197 Server Client
198
199 Negotiate
200 Request
201 Response
202 Request
203 Response
204 ...
205 ...
206 Request (type == 2)
207*/
208
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200209static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
bellard7a5ca862008-05-27 21:13:40 +0000210{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000211 char buf[8 + 8 + 8 + 128];
Paolo Bonzini185b4332012-03-05 08:56:10 +0100212 int rc;
bellard7a5ca862008-05-27 21:13:40 +0000213
Nick Thomasb2e3d872011-02-22 15:44:51 +0000214 /* Negotiate
215 [ 0 .. 7] passwd ("NBDMAGIC")
216 [ 8 .. 15] magic (0x00420281861253)
217 [16 .. 23] size
Paolo Bonzinib90fb4b2011-09-08 17:24:54 +0200218 [24 .. 27] flags
219 [28 .. 151] reserved (0)
Nick Thomasb2e3d872011-02-22 15:44:51 +0000220 */
bellard7a5ca862008-05-27 21:13:40 +0000221
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100222 socket_set_block(csock);
Paolo Bonzini185b4332012-03-05 08:56:10 +0100223 rc = -EINVAL;
224
Nick Thomasb2e3d872011-02-22 15:44:51 +0000225 TRACE("Beginning negotiation.");
226 memcpy(buf, "NBDMAGIC", 8);
227 cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
228 cpu_to_be64w((uint64_t*)(buf + 16), size);
Paolo Bonzini2c7989a2011-10-21 13:16:28 +0200229 cpu_to_be32w((uint32_t*)(buf + 24),
Paolo Bonzini7a706632011-10-21 13:17:14 +0200230 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
231 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
Paolo Bonzinib90fb4b2011-09-08 17:24:54 +0200232 memset(buf + 28, 0, 124);
bellard7a5ca862008-05-27 21:13:40 +0000233
Nick Thomasb2e3d872011-02-22 15:44:51 +0000234 if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
235 LOG("write failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100236 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000237 }
bellard7a5ca862008-05-27 21:13:40 +0000238
Dong Xu Wang07f35072011-11-22 18:06:26 +0800239 TRACE("Negotiation succeeded.");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100240 rc = 0;
241fail:
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100242 socket_set_nonblock(csock);
Paolo Bonzini185b4332012-03-05 08:56:10 +0100243 return rc;
bellard7a5ca862008-05-27 21:13:40 +0000244}
245
Laurent Vivier1d45f8b2010-08-25 22:48:33 +0200246int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags,
247 off_t *size, size_t *blocksize)
bellard7a5ca862008-05-27 21:13:40 +0000248{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000249 char buf[256];
250 uint64_t magic, s;
251 uint16_t tmp;
Paolo Bonzini185b4332012-03-05 08:56:10 +0100252 int rc;
bellard7a5ca862008-05-27 21:13:40 +0000253
Dong Xu Wang07f35072011-11-22 18:06:26 +0800254 TRACE("Receiving negotiation.");
bellard7a5ca862008-05-27 21:13:40 +0000255
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100256 socket_set_block(csock);
Paolo Bonzini185b4332012-03-05 08:56:10 +0100257 rc = -EINVAL;
258
Nick Thomasb2e3d872011-02-22 15:44:51 +0000259 if (read_sync(csock, buf, 8) != 8) {
260 LOG("read failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100261 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000262 }
bellard7a5ca862008-05-27 21:13:40 +0000263
Nick Thomasb2e3d872011-02-22 15:44:51 +0000264 buf[8] = '\0';
265 if (strlen(buf) == 0) {
266 LOG("server connection closed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100267 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000268 }
bellard7a5ca862008-05-27 21:13:40 +0000269
Nick Thomasb2e3d872011-02-22 15:44:51 +0000270 TRACE("Magic is %c%c%c%c%c%c%c%c",
271 qemu_isprint(buf[0]) ? buf[0] : '.',
272 qemu_isprint(buf[1]) ? buf[1] : '.',
273 qemu_isprint(buf[2]) ? buf[2] : '.',
274 qemu_isprint(buf[3]) ? buf[3] : '.',
275 qemu_isprint(buf[4]) ? buf[4] : '.',
276 qemu_isprint(buf[5]) ? buf[5] : '.',
277 qemu_isprint(buf[6]) ? buf[6] : '.',
278 qemu_isprint(buf[7]) ? buf[7] : '.');
bellard7a5ca862008-05-27 21:13:40 +0000279
Nick Thomasb2e3d872011-02-22 15:44:51 +0000280 if (memcmp(buf, "NBDMAGIC", 8) != 0) {
281 LOG("Invalid magic received");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100282 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000283 }
bellard7a5ca862008-05-27 21:13:40 +0000284
Nick Thomasb2e3d872011-02-22 15:44:51 +0000285 if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
286 LOG("read failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100287 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000288 }
289 magic = be64_to_cpu(magic);
290 TRACE("Magic is 0x%" PRIx64, magic);
bellard7a5ca862008-05-27 21:13:40 +0000291
Nick Thomasb2e3d872011-02-22 15:44:51 +0000292 if (name) {
293 uint32_t reserved = 0;
294 uint32_t opt;
295 uint32_t namesize;
Laurent Vivier1d45f8b2010-08-25 22:48:33 +0200296
Nick Thomasb2e3d872011-02-22 15:44:51 +0000297 TRACE("Checking magic (opts_magic)");
298 if (magic != 0x49484156454F5054LL) {
299 LOG("Bad magic received");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100300 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000301 }
302 if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
303 LOG("flags read failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100304 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000305 }
306 *flags = be16_to_cpu(tmp) << 16;
307 /* reserved for future use */
308 if (write_sync(csock, &reserved, sizeof(reserved)) !=
309 sizeof(reserved)) {
310 LOG("write failed (reserved)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100311 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000312 }
313 /* write the export name */
314 magic = cpu_to_be64(magic);
315 if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
316 LOG("write failed (magic)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100317 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000318 }
319 opt = cpu_to_be32(NBD_OPT_EXPORT_NAME);
320 if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
321 LOG("write failed (opt)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100322 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000323 }
324 namesize = cpu_to_be32(strlen(name));
325 if (write_sync(csock, &namesize, sizeof(namesize)) !=
326 sizeof(namesize)) {
327 LOG("write failed (namesize)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100328 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000329 }
330 if (write_sync(csock, (char*)name, strlen(name)) != strlen(name)) {
331 LOG("write failed (name)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100332 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000333 }
334 } else {
335 TRACE("Checking magic (cli_magic)");
Laurent Vivier1d45f8b2010-08-25 22:48:33 +0200336
Nick Thomasb2e3d872011-02-22 15:44:51 +0000337 if (magic != 0x00420281861253LL) {
338 LOG("Bad magic received");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100339 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000340 }
341 }
Laurent Vivier1d45f8b2010-08-25 22:48:33 +0200342
Nick Thomasb2e3d872011-02-22 15:44:51 +0000343 if (read_sync(csock, &s, sizeof(s)) != sizeof(s)) {
344 LOG("read failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100345 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000346 }
347 *size = be64_to_cpu(s);
348 *blocksize = 1024;
349 TRACE("Size is %" PRIu64, *size);
Laurent Vivier1d45f8b2010-08-25 22:48:33 +0200350
Nick Thomasb2e3d872011-02-22 15:44:51 +0000351 if (!name) {
352 if (read_sync(csock, flags, sizeof(*flags)) != sizeof(*flags)) {
353 LOG("read failed (flags)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100354 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000355 }
356 *flags = be32_to_cpup(flags);
357 } else {
358 if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
359 LOG("read failed (tmp)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100360 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000361 }
362 *flags |= be32_to_cpu(tmp);
363 }
364 if (read_sync(csock, &buf, 124) != 124) {
365 LOG("read failed (buf)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100366 goto fail;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000367 }
Paolo Bonzini185b4332012-03-05 08:56:10 +0100368 rc = 0;
369
370fail:
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100371 socket_set_nonblock(csock);
Paolo Bonzini185b4332012-03-05 08:56:10 +0100372 return rc;
thscd831bd2008-07-03 10:23:51 +0000373}
bellard7a5ca862008-05-27 21:13:40 +0000374
Paolo Bonzinib90fb4b2011-09-08 17:24:54 +0200375#ifdef __linux__
376int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
thscd831bd2008-07-03 10:23:51 +0000377{
Chunyan Liu3e05c782011-12-02 23:27:54 +0800378 TRACE("Setting NBD socket");
379
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100380 if (ioctl(fd, NBD_SET_SOCK, csock) < 0) {
Chunyan Liu3e05c782011-12-02 23:27:54 +0800381 int serrno = errno;
382 LOG("Failed to set NBD socket");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100383 return -serrno;
Chunyan Liu3e05c782011-12-02 23:27:54 +0800384 }
385
Nick Thomasb2e3d872011-02-22 15:44:51 +0000386 TRACE("Setting block size to %lu", (unsigned long)blocksize);
bellard7a5ca862008-05-27 21:13:40 +0000387
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100388 if (ioctl(fd, NBD_SET_BLKSIZE, blocksize) < 0) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000389 int serrno = errno;
390 LOG("Failed setting NBD block size");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100391 return -serrno;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000392 }
bellard7a5ca862008-05-27 21:13:40 +0000393
Blue Swirl0bfcd592010-05-22 08:02:12 +0000394 TRACE("Setting size to %zd block(s)", (size_t)(size / blocksize));
bellard7a5ca862008-05-27 21:13:40 +0000395
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100396 if (ioctl(fd, NBD_SET_SIZE_BLOCKS, size / blocksize) < 0) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000397 int serrno = errno;
398 LOG("Failed setting size (in blocks)");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100399 return -serrno;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000400 }
bellard7a5ca862008-05-27 21:13:40 +0000401
Paolo Bonzinib90fb4b2011-09-08 17:24:54 +0200402 if (flags & NBD_FLAG_READ_ONLY) {
403 int read_only = 1;
404 TRACE("Setting readonly attribute");
405
406 if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
407 int serrno = errno;
408 LOG("Failed setting read-only attribute");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100409 return -serrno;
Paolo Bonzinib90fb4b2011-09-08 17:24:54 +0200410 }
411 }
412
Paolo Bonzini973b3d02011-09-08 17:24:56 +0200413 if (ioctl(fd, NBD_SET_FLAGS, flags) < 0
414 && errno != ENOTTY) {
415 int serrno = errno;
416 LOG("Failed setting flags");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100417 return -serrno;
Paolo Bonzini973b3d02011-09-08 17:24:56 +0200418 }
419
Nick Thomasb2e3d872011-02-22 15:44:51 +0000420 TRACE("Negotiation ended");
bellard7a5ca862008-05-27 21:13:40 +0000421
Nick Thomasb2e3d872011-02-22 15:44:51 +0000422 return 0;
bellard7a5ca862008-05-27 21:13:40 +0000423}
424
425int nbd_disconnect(int fd)
426{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000427 ioctl(fd, NBD_CLEAR_QUE);
428 ioctl(fd, NBD_DISCONNECT);
429 ioctl(fd, NBD_CLEAR_SOCK);
430 return 0;
bellard7a5ca862008-05-27 21:13:40 +0000431}
432
Jes Sorensen0a4eb862010-08-31 09:30:33 +0200433int nbd_client(int fd)
bellard7a5ca862008-05-27 21:13:40 +0000434{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000435 int ret;
436 int serrno;
bellard7a5ca862008-05-27 21:13:40 +0000437
Nick Thomasb2e3d872011-02-22 15:44:51 +0000438 TRACE("Doing NBD loop");
bellard7a5ca862008-05-27 21:13:40 +0000439
Nick Thomasb2e3d872011-02-22 15:44:51 +0000440 ret = ioctl(fd, NBD_DO_IT);
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100441 if (ret < 0 && errno == EPIPE) {
Paolo Bonzini74624682011-11-04 15:51:18 +0100442 /* NBD_DO_IT normally returns EPIPE when someone has disconnected
443 * the socket via NBD_DISCONNECT. We do not want to return 1 in
444 * that case.
445 */
446 ret = 0;
447 }
Nick Thomasb2e3d872011-02-22 15:44:51 +0000448 serrno = errno;
bellard7a5ca862008-05-27 21:13:40 +0000449
Nick Thomasb2e3d872011-02-22 15:44:51 +0000450 TRACE("NBD loop returned %d: %s", ret, strerror(serrno));
bellard7a5ca862008-05-27 21:13:40 +0000451
Nick Thomasb2e3d872011-02-22 15:44:51 +0000452 TRACE("Clearing NBD queue");
453 ioctl(fd, NBD_CLEAR_QUE);
bellard7a5ca862008-05-27 21:13:40 +0000454
Nick Thomasb2e3d872011-02-22 15:44:51 +0000455 TRACE("Clearing NBD socket");
456 ioctl(fd, NBD_CLEAR_SOCK);
bellard7a5ca862008-05-27 21:13:40 +0000457
Nick Thomasb2e3d872011-02-22 15:44:51 +0000458 errno = serrno;
459 return ret;
bellard7a5ca862008-05-27 21:13:40 +0000460}
aliguori03ff3ca2008-09-15 15:51:35 +0000461#else
Paolo Bonzini8e725062011-09-21 09:34:12 +0200462int nbd_init(int fd, int csock, uint32_t flags, off_t size, size_t blocksize)
aliguori03ff3ca2008-09-15 15:51:35 +0000463{
Paolo Bonzini185b4332012-03-05 08:56:10 +0100464 return -ENOTSUP;
aliguori03ff3ca2008-09-15 15:51:35 +0000465}
466
467int nbd_disconnect(int fd)
468{
Paolo Bonzini185b4332012-03-05 08:56:10 +0100469 return -ENOTSUP;
aliguori03ff3ca2008-09-15 15:51:35 +0000470}
471
Jes Sorensen0a4eb862010-08-31 09:30:33 +0200472int nbd_client(int fd)
aliguori03ff3ca2008-09-15 15:51:35 +0000473{
Paolo Bonzini185b4332012-03-05 08:56:10 +0100474 return -ENOTSUP;
aliguori03ff3ca2008-09-15 15:51:35 +0000475}
476#endif
bellard7a5ca862008-05-27 21:13:40 +0000477
Paolo Bonzini94e73402012-03-07 11:25:01 +0100478ssize_t nbd_send_request(int csock, struct nbd_request *request)
ths75818252008-07-03 13:41:03 +0000479{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000480 uint8_t buf[4 + 4 + 8 + 8 + 4];
Paolo Bonzini185b4332012-03-05 08:56:10 +0100481 ssize_t ret;
ths75818252008-07-03 13:41:03 +0000482
Nick Thomasb2e3d872011-02-22 15:44:51 +0000483 cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
484 cpu_to_be32w((uint32_t*)(buf + 4), request->type);
485 cpu_to_be64w((uint64_t*)(buf + 8), request->handle);
486 cpu_to_be64w((uint64_t*)(buf + 16), request->from);
487 cpu_to_be32w((uint32_t*)(buf + 24), request->len);
ths75818252008-07-03 13:41:03 +0000488
Nick Thomasb2e3d872011-02-22 15:44:51 +0000489 TRACE("Sending request to client: "
490 "{ .from = %" PRIu64", .len = %u, .handle = %" PRIu64", .type=%i}",
491 request->from, request->len, request->handle, request->type);
ths75818252008-07-03 13:41:03 +0000492
Paolo Bonzini185b4332012-03-05 08:56:10 +0100493 ret = write_sync(csock, buf, sizeof(buf));
494 if (ret < 0) {
495 return ret;
496 }
497
498 if (ret != sizeof(buf)) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000499 LOG("writing to socket failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100500 return -EINVAL;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000501 }
502 return 0;
ths75818252008-07-03 13:41:03 +0000503}
504
Paolo Bonzini94e73402012-03-07 11:25:01 +0100505static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
bellard7a5ca862008-05-27 21:13:40 +0000506{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000507 uint8_t buf[4 + 4 + 8 + 8 + 4];
508 uint32_t magic;
Paolo Bonzini185b4332012-03-05 08:56:10 +0100509 ssize_t ret;
bellard7a5ca862008-05-27 21:13:40 +0000510
Paolo Bonzini185b4332012-03-05 08:56:10 +0100511 ret = read_sync(csock, buf, sizeof(buf));
512 if (ret < 0) {
513 return ret;
514 }
515
516 if (ret != sizeof(buf)) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000517 LOG("read failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100518 return -EINVAL;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000519 }
bellard7a5ca862008-05-27 21:13:40 +0000520
Nick Thomasb2e3d872011-02-22 15:44:51 +0000521 /* Request
522 [ 0 .. 3] magic (NBD_REQUEST_MAGIC)
523 [ 4 .. 7] type (0 == READ, 1 == WRITE)
524 [ 8 .. 15] handle
525 [16 .. 23] from
526 [24 .. 27] len
527 */
bellard7a5ca862008-05-27 21:13:40 +0000528
Nick Thomasb2e3d872011-02-22 15:44:51 +0000529 magic = be32_to_cpup((uint32_t*)buf);
530 request->type = be32_to_cpup((uint32_t*)(buf + 4));
531 request->handle = be64_to_cpup((uint64_t*)(buf + 8));
532 request->from = be64_to_cpup((uint64_t*)(buf + 16));
533 request->len = be32_to_cpup((uint32_t*)(buf + 24));
bellard7a5ca862008-05-27 21:13:40 +0000534
Nick Thomasb2e3d872011-02-22 15:44:51 +0000535 TRACE("Got request: "
536 "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
537 magic, request->type, request->from, request->len);
bellard7a5ca862008-05-27 21:13:40 +0000538
Nick Thomasb2e3d872011-02-22 15:44:51 +0000539 if (magic != NBD_REQUEST_MAGIC) {
540 LOG("invalid magic (got 0x%x)", magic);
Paolo Bonzini185b4332012-03-05 08:56:10 +0100541 return -EINVAL;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000542 }
543 return 0;
ths75818252008-07-03 13:41:03 +0000544}
bellard7a5ca862008-05-27 21:13:40 +0000545
Paolo Bonzini94e73402012-03-07 11:25:01 +0100546ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply)
ths75818252008-07-03 13:41:03 +0000547{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000548 uint8_t buf[NBD_REPLY_SIZE];
549 uint32_t magic;
Paolo Bonzini185b4332012-03-05 08:56:10 +0100550 ssize_t ret;
ths75818252008-07-03 13:41:03 +0000551
Paolo Bonzini185b4332012-03-05 08:56:10 +0100552 ret = read_sync(csock, buf, sizeof(buf));
553 if (ret < 0) {
554 return ret;
555 }
556
557 if (ret != sizeof(buf)) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000558 LOG("read failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100559 return -EINVAL;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000560 }
bellard7a5ca862008-05-27 21:13:40 +0000561
Nick Thomasb2e3d872011-02-22 15:44:51 +0000562 /* Reply
563 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
564 [ 4 .. 7] error (0 == no error)
565 [ 7 .. 15] handle
566 */
ths75818252008-07-03 13:41:03 +0000567
Nick Thomasb2e3d872011-02-22 15:44:51 +0000568 magic = be32_to_cpup((uint32_t*)buf);
569 reply->error = be32_to_cpup((uint32_t*)(buf + 4));
570 reply->handle = be64_to_cpup((uint64_t*)(buf + 8));
ths75818252008-07-03 13:41:03 +0000571
Nick Thomasb2e3d872011-02-22 15:44:51 +0000572 TRACE("Got reply: "
573 "{ magic = 0x%x, .error = %d, handle = %" PRIu64" }",
574 magic, reply->error, reply->handle);
ths75818252008-07-03 13:41:03 +0000575
Nick Thomasb2e3d872011-02-22 15:44:51 +0000576 if (magic != NBD_REPLY_MAGIC) {
577 LOG("invalid magic (got 0x%x)", magic);
Paolo Bonzini185b4332012-03-05 08:56:10 +0100578 return -EINVAL;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000579 }
580 return 0;
ths75818252008-07-03 13:41:03 +0000581}
582
Paolo Bonzini94e73402012-03-07 11:25:01 +0100583static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
ths75818252008-07-03 13:41:03 +0000584{
Nick Thomasb2e3d872011-02-22 15:44:51 +0000585 uint8_t buf[4 + 4 + 8];
Paolo Bonzini185b4332012-03-05 08:56:10 +0100586 ssize_t ret;
ths75818252008-07-03 13:41:03 +0000587
Nick Thomasb2e3d872011-02-22 15:44:51 +0000588 /* Reply
589 [ 0 .. 3] magic (NBD_REPLY_MAGIC)
590 [ 4 .. 7] error (0 == no error)
591 [ 7 .. 15] handle
592 */
593 cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
594 cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
595 cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);
ths75818252008-07-03 13:41:03 +0000596
Nick Thomasb2e3d872011-02-22 15:44:51 +0000597 TRACE("Sending response to client");
ths75818252008-07-03 13:41:03 +0000598
Paolo Bonzini185b4332012-03-05 08:56:10 +0100599 ret = write_sync(csock, buf, sizeof(buf));
600 if (ret < 0) {
601 return ret;
602 }
603
604 if (ret != sizeof(buf)) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000605 LOG("writing to socket failed");
Paolo Bonzini185b4332012-03-05 08:56:10 +0100606 return -EINVAL;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000607 }
608 return 0;
ths75818252008-07-03 13:41:03 +0000609}
610
Paolo Bonzini41996e32011-09-19 15:25:40 +0200611#define MAX_NBD_REQUESTS 16
612
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200613typedef struct NBDRequest NBDRequest;
614
615struct NBDRequest {
616 QSIMPLEQ_ENTRY(NBDRequest) entry;
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200617 NBDClient *client;
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200618 uint8_t *data;
619};
620
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200621struct NBDExport {
622 BlockDriverState *bs;
623 off_t dev_offset;
624 off_t size;
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200625 uint32_t nbdflags;
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200626 QSIMPLEQ_HEAD(, NBDRequest) requests;
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200627};
628
Paolo Bonzini1743b512011-09-19 14:33:23 +0200629struct NBDClient {
630 int refcount;
631 void (*close)(NBDClient *client);
632
633 NBDExport *exp;
634 int sock;
Paolo Bonzini262db382011-09-19 15:19:27 +0200635
636 Coroutine *recv_coroutine;
637
638 CoMutex send_lock;
639 Coroutine *send_coroutine;
Paolo Bonzini41996e32011-09-19 15:25:40 +0200640
641 int nb_requests;
Paolo Bonzini1743b512011-09-19 14:33:23 +0200642};
643
644static void nbd_client_get(NBDClient *client)
645{
646 client->refcount++;
647}
648
649static void nbd_client_put(NBDClient *client)
650{
651 if (--client->refcount == 0) {
652 g_free(client);
653 }
654}
655
656static void nbd_client_close(NBDClient *client)
657{
658 qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
659 close(client->sock);
660 client->sock = -1;
661 if (client->close) {
662 client->close(client);
663 }
664 nbd_client_put(client);
665}
666
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200667static NBDRequest *nbd_request_get(NBDClient *client)
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200668{
669 NBDRequest *req;
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200670 NBDExport *exp = client->exp;
671
Paolo Bonzini41996e32011-09-19 15:25:40 +0200672 assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
673 client->nb_requests++;
674
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200675 if (QSIMPLEQ_EMPTY(&exp->requests)) {
676 req = g_malloc0(sizeof(NBDRequest));
677 req->data = qemu_blockalign(exp->bs, NBD_BUFFER_SIZE);
678 } else {
679 req = QSIMPLEQ_FIRST(&exp->requests);
680 QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
681 }
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200682 nbd_client_get(client);
683 req->client = client;
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200684 return req;
685}
686
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200687static void nbd_request_put(NBDRequest *req)
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200688{
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200689 NBDClient *client = req->client;
690 QSIMPLEQ_INSERT_HEAD(&client->exp->requests, req, entry);
Paolo Bonzini41996e32011-09-19 15:25:40 +0200691 if (client->nb_requests-- == MAX_NBD_REQUESTS) {
692 qemu_notify_event();
693 }
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200694 nbd_client_put(client);
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200695}
696
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200697NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
698 off_t size, uint32_t nbdflags)
699{
700 NBDExport *exp = g_malloc0(sizeof(NBDExport));
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200701 QSIMPLEQ_INIT(&exp->requests);
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200702 exp->bs = bs;
703 exp->dev_offset = dev_offset;
704 exp->nbdflags = nbdflags;
Paolo Bonzini38ceff02012-03-12 16:17:27 +0100705 exp->size = size == -1 ? bdrv_getlength(bs) : size;
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200706 return exp;
707}
708
709void nbd_export_close(NBDExport *exp)
710{
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200711 while (!QSIMPLEQ_EMPTY(&exp->requests)) {
712 NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
713 QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
714 qemu_vfree(first->data);
715 g_free(first);
716 }
717
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200718 bdrv_close(exp->bs);
719 g_free(exp);
720}
721
Paolo Bonzini41996e32011-09-19 15:25:40 +0200722static int nbd_can_read(void *opaque);
Paolo Bonzini262db382011-09-19 15:19:27 +0200723static void nbd_read(void *opaque);
724static void nbd_restart_write(void *opaque);
725
Paolo Bonzini94e73402012-03-07 11:25:01 +0100726static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
727 int len)
Paolo Bonzini22045592011-09-19 14:25:30 +0200728{
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200729 NBDClient *client = req->client;
730 int csock = client->sock;
Paolo Bonzini94e73402012-03-07 11:25:01 +0100731 ssize_t rc, ret;
Paolo Bonzini22045592011-09-19 14:25:30 +0200732
Paolo Bonzini262db382011-09-19 15:19:27 +0200733 qemu_co_mutex_lock(&client->send_lock);
Paolo Bonzini41996e32011-09-19 15:25:40 +0200734 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read,
735 nbd_restart_write, client);
Paolo Bonzini262db382011-09-19 15:19:27 +0200736 client->send_coroutine = qemu_coroutine_self();
737
Paolo Bonzini22045592011-09-19 14:25:30 +0200738 if (!len) {
739 rc = nbd_send_reply(csock, reply);
Paolo Bonzini22045592011-09-19 14:25:30 +0200740 } else {
741 socket_set_cork(csock, 1);
742 rc = nbd_send_reply(csock, reply);
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100743 if (rc >= 0) {
Paolo Bonzini262db382011-09-19 15:19:27 +0200744 ret = qemu_co_send(csock, req->data, len);
Paolo Bonzini22045592011-09-19 14:25:30 +0200745 if (ret != len) {
Paolo Bonzini185b4332012-03-05 08:56:10 +0100746 rc = -EIO;
Paolo Bonzini22045592011-09-19 14:25:30 +0200747 }
748 }
Paolo Bonzini22045592011-09-19 14:25:30 +0200749 socket_set_cork(csock, 0);
750 }
Paolo Bonzini262db382011-09-19 15:19:27 +0200751
752 client->send_coroutine = NULL;
Paolo Bonzini41996e32011-09-19 15:25:40 +0200753 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
Paolo Bonzini262db382011-09-19 15:19:27 +0200754 qemu_co_mutex_unlock(&client->send_lock);
Paolo Bonzini22045592011-09-19 14:25:30 +0200755 return rc;
756}
757
Paolo Bonzini94e73402012-03-07 11:25:01 +0100758static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
Paolo Bonzinia030b342011-09-19 15:07:54 +0200759{
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200760 NBDClient *client = req->client;
761 int csock = client->sock;
Paolo Bonzini94e73402012-03-07 11:25:01 +0100762 ssize_t rc;
Paolo Bonzinia030b342011-09-19 15:07:54 +0200763
Paolo Bonzini262db382011-09-19 15:19:27 +0200764 client->recv_coroutine = qemu_coroutine_self();
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100765 rc = nbd_receive_request(csock, request);
766 if (rc < 0) {
767 if (rc != -EAGAIN) {
768 rc = -EIO;
769 }
Paolo Bonzinia030b342011-09-19 15:07:54 +0200770 goto out;
771 }
772
773 if (request->len > NBD_BUFFER_SIZE) {
774 LOG("len (%u) is larger than max len (%u)",
775 request->len, NBD_BUFFER_SIZE);
776 rc = -EINVAL;
777 goto out;
778 }
779
780 if ((request->from + request->len) < request->from) {
781 LOG("integer overflow detected! "
782 "you're probably being attacked");
783 rc = -EINVAL;
784 goto out;
785 }
786
787 TRACE("Decoding type");
788
789 if ((request->type & NBD_CMD_MASK_COMMAND) == NBD_CMD_WRITE) {
790 TRACE("Reading %u byte(s)", request->len);
791
Paolo Bonzini262db382011-09-19 15:19:27 +0200792 if (qemu_co_recv(csock, req->data, request->len) != request->len) {
Paolo Bonzinia030b342011-09-19 15:07:54 +0200793 LOG("reading from socket failed");
794 rc = -EIO;
795 goto out;
796 }
797 }
798 rc = 0;
799
800out:
Paolo Bonzini262db382011-09-19 15:19:27 +0200801 client->recv_coroutine = NULL;
Paolo Bonzinia030b342011-09-19 15:07:54 +0200802 return rc;
803}
804
Paolo Bonzini262db382011-09-19 15:19:27 +0200805static void nbd_trip(void *opaque)
ths75818252008-07-03 13:41:03 +0000806{
Paolo Bonzini262db382011-09-19 15:19:27 +0200807 NBDClient *client = opaque;
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200808 NBDRequest *req = nbd_request_get(client);
Paolo Bonzini1743b512011-09-19 14:33:23 +0200809 NBDExport *exp = client->exp;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000810 struct nbd_request request;
811 struct nbd_reply reply;
Paolo Bonzini94e73402012-03-07 11:25:01 +0100812 ssize_t ret;
ths75818252008-07-03 13:41:03 +0000813
Nick Thomasb2e3d872011-02-22 15:44:51 +0000814 TRACE("Reading request.");
ths75818252008-07-03 13:41:03 +0000815
Paolo Bonzini262db382011-09-19 15:19:27 +0200816 ret = nbd_co_receive_request(req, &request);
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100817 if (ret == -EAGAIN) {
818 goto done;
819 }
Paolo Bonzinia030b342011-09-19 15:07:54 +0200820 if (ret == -EIO) {
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200821 goto out;
Paolo Bonzinia030b342011-09-19 15:07:54 +0200822 }
ths75818252008-07-03 13:41:03 +0000823
Paolo Bonzinifae69412011-09-19 16:04:36 +0200824 reply.handle = request.handle;
825 reply.error = 0;
826
Paolo Bonzinia030b342011-09-19 15:07:54 +0200827 if (ret < 0) {
828 reply.error = -ret;
829 goto error_reply;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000830 }
bellard7a5ca862008-05-27 21:13:40 +0000831
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200832 if ((request.from + request.len) > exp->size) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000833 LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
834 ", Offset: %" PRIu64 "\n",
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200835 request.from, request.len,
Stefan Weil0fee8f32012-04-12 22:30:16 +0200836 (uint64_t)exp->size, (uint64_t)exp->dev_offset);
Nick Thomasb2e3d872011-02-22 15:44:51 +0000837 LOG("requested operation past EOF--bad client?");
Paolo Bonzinifae69412011-09-19 16:04:36 +0200838 goto invalid_request;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000839 }
bellard7a5ca862008-05-27 21:13:40 +0000840
Paolo Bonzini2c7989a2011-10-21 13:16:28 +0200841 switch (request.type & NBD_CMD_MASK_COMMAND) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000842 case NBD_CMD_READ:
843 TRACE("Request type is READ");
bellard7a5ca862008-05-27 21:13:40 +0000844
Paolo Bonzinie25ceb72012-04-19 11:59:11 +0200845 if (request.type & NBD_CMD_FLAG_FUA) {
846 ret = bdrv_co_flush(exp->bs);
847 if (ret < 0) {
848 LOG("flush failed");
849 reply.error = -ret;
850 goto error_reply;
851 }
852 }
853
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200854 ret = bdrv_read(exp->bs, (request.from + exp->dev_offset) / 512,
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200855 req->data, request.len / 512);
Paolo Bonziniadcf6302011-09-13 17:27:45 +0200856 if (ret < 0) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000857 LOG("reading from file failed");
Paolo Bonziniadcf6302011-09-13 17:27:45 +0200858 reply.error = -ret;
Paolo Bonzinifae69412011-09-19 16:04:36 +0200859 goto error_reply;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000860 }
bellard7a5ca862008-05-27 21:13:40 +0000861
Nick Thomasb2e3d872011-02-22 15:44:51 +0000862 TRACE("Read %u byte(s)", request.len);
Paolo Bonzini262db382011-09-19 15:19:27 +0200863 if (nbd_co_send_reply(req, &reply, request.len) < 0)
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200864 goto out;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000865 break;
866 case NBD_CMD_WRITE:
867 TRACE("Request type is WRITE");
bellard7a5ca862008-05-27 21:13:40 +0000868
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200869 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
Nick Thomasb2e3d872011-02-22 15:44:51 +0000870 TRACE("Server is read-only, return error");
Paolo Bonzinifae69412011-09-19 16:04:36 +0200871 reply.error = EROFS;
872 goto error_reply;
873 }
bellard7a5ca862008-05-27 21:13:40 +0000874
Paolo Bonzinifae69412011-09-19 16:04:36 +0200875 TRACE("Writing to device");
876
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200877 ret = bdrv_write(exp->bs, (request.from + exp->dev_offset) / 512,
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200878 req->data, request.len / 512);
Paolo Bonzinifae69412011-09-19 16:04:36 +0200879 if (ret < 0) {
880 LOG("writing to file failed");
881 reply.error = -ret;
882 goto error_reply;
883 }
884
885 if (request.type & NBD_CMD_FLAG_FUA) {
Paolo Bonzini262db382011-09-19 15:19:27 +0200886 ret = bdrv_co_flush(exp->bs);
Paolo Bonziniadcf6302011-09-13 17:27:45 +0200887 if (ret < 0) {
Paolo Bonzinifae69412011-09-19 16:04:36 +0200888 LOG("flush failed");
Paolo Bonziniadcf6302011-09-13 17:27:45 +0200889 reply.error = -ret;
Paolo Bonzinifae69412011-09-19 16:04:36 +0200890 goto error_reply;
Paolo Bonzini2c7989a2011-10-21 13:16:28 +0200891 }
Nick Thomasb2e3d872011-02-22 15:44:51 +0000892 }
bellard7a5ca862008-05-27 21:13:40 +0000893
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100894 if (nbd_co_send_reply(req, &reply, 0) < 0) {
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200895 goto out;
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100896 }
Nick Thomasb2e3d872011-02-22 15:44:51 +0000897 break;
898 case NBD_CMD_DISC:
899 TRACE("Request type is DISCONNECT");
900 errno = 0;
Paolo Bonzini262db382011-09-19 15:19:27 +0200901 goto out;
Paolo Bonzini1486d042011-10-21 13:17:14 +0200902 case NBD_CMD_FLUSH:
903 TRACE("Request type is FLUSH");
904
Paolo Bonzini262db382011-09-19 15:19:27 +0200905 ret = bdrv_co_flush(exp->bs);
Paolo Bonzini1486d042011-10-21 13:17:14 +0200906 if (ret < 0) {
907 LOG("flush failed");
908 reply.error = -ret;
909 }
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100910 if (nbd_co_send_reply(req, &reply, 0) < 0) {
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200911 goto out;
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100912 }
Paolo Bonzini1486d042011-10-21 13:17:14 +0200913 break;
Paolo Bonzini7a706632011-10-21 13:17:14 +0200914 case NBD_CMD_TRIM:
915 TRACE("Request type is TRIM");
Paolo Bonzini262db382011-09-19 15:19:27 +0200916 ret = bdrv_co_discard(exp->bs, (request.from + exp->dev_offset) / 512,
917 request.len / 512);
Paolo Bonzini7a706632011-10-21 13:17:14 +0200918 if (ret < 0) {
919 LOG("discard failed");
920 reply.error = -ret;
921 }
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100922 if (nbd_co_send_reply(req, &reply, 0) < 0) {
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200923 goto out;
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100924 }
Paolo Bonzini7a706632011-10-21 13:17:14 +0200925 break;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000926 default:
927 LOG("invalid request type (%u) received", request.type);
Paolo Bonzinifae69412011-09-19 16:04:36 +0200928 invalid_request:
929 reply.error = -EINVAL;
930 error_reply:
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100931 if (nbd_co_send_reply(req, &reply, 0) < 0) {
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200932 goto out;
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100933 }
Paolo Bonzinifae69412011-09-19 16:04:36 +0200934 break;
Nick Thomasb2e3d872011-02-22 15:44:51 +0000935 }
bellard7a5ca862008-05-27 21:13:40 +0000936
Nick Thomasb2e3d872011-02-22 15:44:51 +0000937 TRACE("Request/Reply complete");
bellard7a5ca862008-05-27 21:13:40 +0000938
Paolo Bonzini7fe7b682012-03-05 09:10:35 +0100939done:
Paolo Bonzini262db382011-09-19 15:19:27 +0200940 nbd_request_put(req);
941 return;
942
Paolo Bonzinid9a73802011-09-19 14:18:33 +0200943out:
Paolo Bonzini72deddc2011-10-07 16:47:56 +0200944 nbd_request_put(req);
Paolo Bonzini262db382011-09-19 15:19:27 +0200945 nbd_client_close(client);
bellard7a5ca862008-05-27 21:13:40 +0000946}
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200947
Paolo Bonzini41996e32011-09-19 15:25:40 +0200948static int nbd_can_read(void *opaque)
949{
950 NBDClient *client = opaque;
951
952 return client->recv_coroutine || client->nb_requests < MAX_NBD_REQUESTS;
953}
954
Paolo Bonzini1743b512011-09-19 14:33:23 +0200955static void nbd_read(void *opaque)
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200956{
Paolo Bonzini1743b512011-09-19 14:33:23 +0200957 NBDClient *client = opaque;
958
Paolo Bonzini262db382011-09-19 15:19:27 +0200959 if (client->recv_coroutine) {
960 qemu_coroutine_enter(client->recv_coroutine, NULL);
961 } else {
962 qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
Paolo Bonzini1743b512011-09-19 14:33:23 +0200963 }
Paolo Bonzini1743b512011-09-19 14:33:23 +0200964}
965
Paolo Bonzini262db382011-09-19 15:19:27 +0200966static void nbd_restart_write(void *opaque)
967{
968 NBDClient *client = opaque;
969
970 qemu_coroutine_enter(client->send_coroutine, NULL);
971}
972
Paolo Bonzini1743b512011-09-19 14:33:23 +0200973NBDClient *nbd_client_new(NBDExport *exp, int csock,
974 void (*close)(NBDClient *))
975{
976 NBDClient *client;
Paolo Bonzinifc19f8a2012-03-07 11:05:34 +0100977 if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) {
Paolo Bonzini1743b512011-09-19 14:33:23 +0200978 return NULL;
979 }
980 client = g_malloc0(sizeof(NBDClient));
981 client->refcount = 1;
982 client->exp = exp;
983 client->sock = csock;
984 client->close = close;
Paolo Bonzini262db382011-09-19 15:19:27 +0200985 qemu_co_mutex_init(&client->send_lock);
Paolo Bonzini41996e32011-09-19 15:25:40 +0200986 qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
Paolo Bonzini1743b512011-09-19 14:33:23 +0200987 return client;
Paolo Bonziniaf49bbb2011-09-19 14:03:37 +0200988}