Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Linux native AIO support. |
| 3 | * |
| 4 | * Copyright (C) 2009 IBM, Corp. |
| 5 | * Copyright (C) 2009 Red Hat, Inc. |
| 6 | * |
| 7 | * This work is licensed under the terms of the GNU GPL, version 2 or later. |
| 8 | * See the COPYING file in the top-level directory. |
| 9 | */ |
Peter Maydell | 80c71a2 | 2016-01-18 18:01:42 +0000 | [diff] [blame] | 10 | #include "qemu/osdep.h" |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 11 | #include "qemu-common.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 12 | #include "block/aio.h" |
Paolo Bonzini | 1de7afc | 2012-12-17 18:20:00 +0100 | [diff] [blame] | 13 | #include "qemu/queue.h" |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 14 | #include "block/block.h" |
Paolo Bonzini | 9f8540e | 2012-06-09 10:57:37 +0200 | [diff] [blame] | 15 | #include "block/raw-aio.h" |
Paolo Bonzini | 1de7afc | 2012-12-17 18:20:00 +0100 | [diff] [blame] | 16 | #include "qemu/event_notifier.h" |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 17 | #include "qemu/coroutine.h" |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 18 | |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 19 | #include <libaio.h> |
| 20 | |
| 21 | /* |
| 22 | * Queue size (per-device). |
| 23 | * |
| 24 | * XXX: eventually we need to communicate this to the guest and/or make it |
| 25 | * tunable by the guest. If we get more outstanding requests at a time |
| 26 | * than this we will get EAGAIN from io_submit which is communicated to |
| 27 | * the guest as an I/O error. |
| 28 | */ |
| 29 | #define MAX_EVENTS 128 |
| 30 | |
| 31 | struct qemu_laiocb { |
Markus Armbruster | 7c84b1b | 2014-10-07 13:59:14 +0200 | [diff] [blame] | 32 | BlockAIOCB common; |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 33 | Coroutine *co; |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 34 | LinuxAioState *ctx; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 35 | struct iocb iocb; |
| 36 | ssize_t ret; |
| 37 | size_t nbytes; |
Kevin Wolf | b161e2e | 2011-10-13 15:42:52 +0200 | [diff] [blame] | 38 | QEMUIOVector *qiov; |
| 39 | bool is_read; |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 40 | QSIMPLEQ_ENTRY(qemu_laiocb) next; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 41 | }; |
| 42 | |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 43 | typedef struct { |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 44 | int plugged; |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 45 | unsigned int in_queue; |
| 46 | unsigned int in_flight; |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 47 | bool blocked; |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 48 | QSIMPLEQ_HEAD(, qemu_laiocb) pending; |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 49 | } LaioQueue; |
| 50 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 51 | struct LinuxAioState { |
Paolo Bonzini | 0187f5c | 2016-07-04 18:33:20 +0200 | [diff] [blame] | 52 | AioContext *aio_context; |
| 53 | |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 54 | io_context_t ctx; |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 55 | EventNotifier e; |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 56 | |
| 57 | /* io queue for submit at batch */ |
| 58 | LaioQueue io_q; |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 59 | |
| 60 | /* I/O completion processing */ |
| 61 | QEMUBH *completion_bh; |
| 62 | struct io_event events[MAX_EVENTS]; |
| 63 | int event_idx; |
| 64 | int event_max; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 65 | }; |
| 66 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 67 | static void ioq_submit(LinuxAioState *s); |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 68 | |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 69 | static inline ssize_t io_event_ret(struct io_event *ev) |
| 70 | { |
| 71 | return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res); |
| 72 | } |
| 73 | |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 74 | /* |
| 75 | * Completes an AIO request (calls the callback and frees the ACB). |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 76 | */ |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 77 | static void qemu_laio_process_completion(struct qemu_laiocb *laiocb) |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 78 | { |
| 79 | int ret; |
| 80 | |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 81 | ret = laiocb->ret; |
| 82 | if (ret != -ECANCELED) { |
Kevin Wolf | b161e2e | 2011-10-13 15:42:52 +0200 | [diff] [blame] | 83 | if (ret == laiocb->nbytes) { |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 84 | ret = 0; |
Kevin Wolf | b161e2e | 2011-10-13 15:42:52 +0200 | [diff] [blame] | 85 | } else if (ret >= 0) { |
| 86 | /* Short reads mean EOF, pad with zeros. */ |
| 87 | if (laiocb->is_read) { |
Michael Tokarev | 3d9b492 | 2012-03-10 16:54:23 +0400 | [diff] [blame] | 88 | qemu_iovec_memset(laiocb->qiov, ret, 0, |
| 89 | laiocb->qiov->size - ret); |
Kevin Wolf | b161e2e | 2011-10-13 15:42:52 +0200 | [diff] [blame] | 90 | } else { |
Denis V. Lunev | 1c42f14 | 2016-06-23 14:37:16 +0300 | [diff] [blame] | 91 | ret = -ENOSPC; |
Kevin Wolf | b161e2e | 2011-10-13 15:42:52 +0200 | [diff] [blame] | 92 | } |
| 93 | } |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 94 | } |
| 95 | |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 96 | laiocb->ret = ret; |
| 97 | if (laiocb->co) { |
Paolo Bonzini | 0b8b875 | 2016-07-04 19:10:01 +0200 | [diff] [blame] | 98 | qemu_coroutine_enter(laiocb->co); |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 99 | } else { |
| 100 | laiocb->common.cb(laiocb->common.opaque, ret); |
| 101 | qemu_aio_unref(laiocb); |
| 102 | } |
Kevin Wolf | db0ffc2 | 2009-10-22 17:54:41 +0200 | [diff] [blame] | 103 | } |
| 104 | |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 105 | /* The completion BH fetches completed I/O requests and invokes their |
| 106 | * callbacks. |
| 107 | * |
| 108 | * The function is somewhat tricky because it supports nested event loops, for |
| 109 | * example when a request callback invokes aio_poll(). In order to do this, |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 110 | * the completion events array and index are kept in LinuxAioState. The BH |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 111 | * reschedules itself as long as there are completions pending so it will |
| 112 | * either be called again in a nested event loop or will be called after all |
| 113 | * events have been completed. When there are no events left to complete, the |
| 114 | * BH returns without rescheduling. |
| 115 | */ |
| 116 | static void qemu_laio_completion_bh(void *opaque) |
| 117 | { |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 118 | LinuxAioState *s = opaque; |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 119 | |
| 120 | /* Fetch more completion events when empty */ |
| 121 | if (s->event_idx == s->event_max) { |
| 122 | do { |
| 123 | struct timespec ts = { 0 }; |
| 124 | s->event_max = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, |
| 125 | s->events, &ts); |
| 126 | } while (s->event_max == -EINTR); |
| 127 | |
| 128 | s->event_idx = 0; |
| 129 | if (s->event_max <= 0) { |
| 130 | s->event_max = 0; |
| 131 | return; /* no more events */ |
| 132 | } |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 133 | s->io_q.in_flight -= s->event_max; |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 134 | } |
| 135 | |
| 136 | /* Reschedule so nested event loops see currently pending completions */ |
| 137 | qemu_bh_schedule(s->completion_bh); |
| 138 | |
| 139 | /* Process completion events */ |
| 140 | while (s->event_idx < s->event_max) { |
| 141 | struct iocb *iocb = s->events[s->event_idx].obj; |
| 142 | struct qemu_laiocb *laiocb = |
| 143 | container_of(iocb, struct qemu_laiocb, iocb); |
| 144 | |
| 145 | laiocb->ret = io_event_ret(&s->events[s->event_idx]); |
| 146 | s->event_idx++; |
| 147 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 148 | qemu_laio_process_completion(laiocb); |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 149 | } |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 150 | |
| 151 | if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { |
| 152 | ioq_submit(s); |
| 153 | } |
Kevin Wolf | ccb9dc1 | 2014-11-28 15:23:12 +0100 | [diff] [blame] | 154 | |
| 155 | qemu_bh_cancel(s->completion_bh); |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 156 | } |
| 157 | |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 158 | static void qemu_laio_completion_cb(EventNotifier *e) |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 159 | { |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 160 | LinuxAioState *s = container_of(e, LinuxAioState, e); |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 161 | |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 162 | if (event_notifier_test_and_clear(&s->e)) { |
Kevin Wolf | ccb9dc1 | 2014-11-28 15:23:12 +0100 | [diff] [blame] | 163 | qemu_laio_completion_bh(s); |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 164 | } |
| 165 | } |
| 166 | |
Markus Armbruster | 7c84b1b | 2014-10-07 13:59:14 +0200 | [diff] [blame] | 167 | static void laio_cancel(BlockAIOCB *blockacb) |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 168 | { |
| 169 | struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb; |
| 170 | struct io_event event; |
| 171 | int ret; |
| 172 | |
Fam Zheng | 771b64d | 2014-09-11 13:41:13 +0800 | [diff] [blame] | 173 | if (laiocb->ret != -EINPROGRESS) { |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 174 | return; |
Fam Zheng | 771b64d | 2014-09-11 13:41:13 +0800 | [diff] [blame] | 175 | } |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 176 | ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event); |
Fam Zheng | 771b64d | 2014-09-11 13:41:13 +0800 | [diff] [blame] | 177 | laiocb->ret = -ECANCELED; |
| 178 | if (ret != 0) { |
| 179 | /* iocb is not cancelled, cb will be called by the event loop later */ |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 180 | return; |
| 181 | } |
| 182 | |
Fam Zheng | 771b64d | 2014-09-11 13:41:13 +0800 | [diff] [blame] | 183 | laiocb->common.cb(laiocb->common.opaque, laiocb->ret); |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 184 | } |
| 185 | |
Stefan Hajnoczi | d7331be | 2012-10-31 16:34:37 +0100 | [diff] [blame] | 186 | static const AIOCBInfo laio_aiocb_info = { |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 187 | .aiocb_size = sizeof(struct qemu_laiocb), |
Fam Zheng | 771b64d | 2014-09-11 13:41:13 +0800 | [diff] [blame] | 188 | .cancel_async = laio_cancel, |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 189 | }; |
| 190 | |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 191 | static void ioq_init(LaioQueue *io_q) |
| 192 | { |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 193 | QSIMPLEQ_INIT(&io_q->pending); |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 194 | io_q->plugged = 0; |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 195 | io_q->in_queue = 0; |
| 196 | io_q->in_flight = 0; |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 197 | io_q->blocked = false; |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 198 | } |
| 199 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 200 | static void ioq_submit(LinuxAioState *s) |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 201 | { |
Paolo Bonzini | 82595da | 2014-12-11 14:52:30 +0100 | [diff] [blame] | 202 | int ret, len; |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 203 | struct qemu_laiocb *aiocb; |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 204 | struct iocb *iocbs[MAX_EVENTS]; |
Paolo Bonzini | 82595da | 2014-12-11 14:52:30 +0100 | [diff] [blame] | 205 | QSIMPLEQ_HEAD(, qemu_laiocb) completed; |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 206 | |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 207 | do { |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 208 | if (s->io_q.in_flight >= MAX_EVENTS) { |
| 209 | break; |
| 210 | } |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 211 | len = 0; |
| 212 | QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) { |
| 213 | iocbs[len++] = &aiocb->iocb; |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 214 | if (s->io_q.in_flight + len >= MAX_EVENTS) { |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 215 | break; |
| 216 | } |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 217 | } |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 218 | |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 219 | ret = io_submit(s->ctx, len, iocbs); |
| 220 | if (ret == -EAGAIN) { |
Paolo Bonzini | 82595da | 2014-12-11 14:52:30 +0100 | [diff] [blame] | 221 | break; |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 222 | } |
| 223 | if (ret < 0) { |
Kevin Wolf | 44713c9 | 2016-08-09 13:20:19 +0200 | [diff] [blame] | 224 | /* Fail the first request, retry the rest */ |
| 225 | aiocb = QSIMPLEQ_FIRST(&s->io_q.pending); |
| 226 | QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next); |
| 227 | s->io_q.in_queue--; |
| 228 | aiocb->ret = ret; |
| 229 | qemu_laio_process_completion(aiocb); |
| 230 | continue; |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 231 | } |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 232 | |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 233 | s->io_q.in_flight += ret; |
| 234 | s->io_q.in_queue -= ret; |
Paolo Bonzini | 82595da | 2014-12-11 14:52:30 +0100 | [diff] [blame] | 235 | aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb); |
| 236 | QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed); |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 237 | } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending)); |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 238 | s->io_q.blocked = (s->io_q.in_queue > 0); |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 239 | } |
| 240 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 241 | void laio_io_plug(BlockDriverState *bs, LinuxAioState *s) |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 242 | { |
Paolo Bonzini | 0187f5c | 2016-07-04 18:33:20 +0200 | [diff] [blame] | 243 | s->io_q.plugged++; |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 244 | } |
| 245 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 246 | void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s) |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 247 | { |
Paolo Bonzini | 6b98bd6 | 2016-04-07 18:33:34 +0200 | [diff] [blame] | 248 | assert(s->io_q.plugged); |
Paolo Bonzini | 0187f5c | 2016-07-04 18:33:20 +0200 | [diff] [blame] | 249 | if (--s->io_q.plugged == 0 && |
| 250 | !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) { |
Paolo Bonzini | de35464 | 2014-12-11 14:52:29 +0100 | [diff] [blame] | 251 | ioq_submit(s); |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 252 | } |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 253 | } |
| 254 | |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 255 | static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset, |
| 256 | int type) |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 257 | { |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 258 | LinuxAioState *s = laiocb->ctx; |
| 259 | struct iocb *iocbs = &laiocb->iocb; |
| 260 | QEMUIOVector *qiov = laiocb->qiov; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 261 | |
| 262 | switch (type) { |
| 263 | case QEMU_AIO_WRITE: |
| 264 | io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset); |
| 265 | break; |
| 266 | case QEMU_AIO_READ: |
| 267 | io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset); |
| 268 | break; |
Frediano Ziglio | c30e624 | 2011-08-30 09:46:11 +0200 | [diff] [blame] | 269 | /* Currently Linux kernel does not support other operations */ |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 270 | default: |
| 271 | fprintf(stderr, "%s: invalid AIO request type 0x%x.\n", |
| 272 | __func__, type); |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 273 | return -EIO; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 274 | } |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 275 | io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e)); |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 276 | |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 277 | QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next); |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 278 | s->io_q.in_queue++; |
Paolo Bonzini | 43f2376 | 2014-12-11 14:52:27 +0100 | [diff] [blame] | 279 | if (!s->io_q.blocked && |
Roman Pen | 5e1b34a | 2016-07-13 15:03:24 +0200 | [diff] [blame] | 280 | (!s->io_q.plugged || |
| 281 | s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) { |
Paolo Bonzini | 28b2408 | 2014-12-11 14:52:26 +0100 | [diff] [blame] | 282 | ioq_submit(s); |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 283 | } |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 284 | |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 285 | return 0; |
| 286 | } |
| 287 | |
| 288 | int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd, |
Kevin Wolf | 9d52aa3 | 2016-06-03 17:36:27 +0200 | [diff] [blame] | 289 | uint64_t offset, QEMUIOVector *qiov, int type) |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 290 | { |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 291 | int ret; |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 292 | struct qemu_laiocb laiocb = { |
| 293 | .co = qemu_coroutine_self(), |
Kevin Wolf | 9d52aa3 | 2016-06-03 17:36:27 +0200 | [diff] [blame] | 294 | .nbytes = qiov->size, |
Kevin Wolf | 2174f12 | 2014-08-06 17:18:07 +0200 | [diff] [blame] | 295 | .ctx = s, |
| 296 | .is_read = (type == QEMU_AIO_READ), |
| 297 | .qiov = qiov, |
| 298 | }; |
| 299 | |
| 300 | ret = laio_do_submit(fd, &laiocb, offset, type); |
| 301 | if (ret < 0) { |
| 302 | return ret; |
| 303 | } |
| 304 | |
| 305 | qemu_coroutine_yield(); |
| 306 | return laiocb.ret; |
| 307 | } |
| 308 | |
| 309 | BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd, |
| 310 | int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, |
| 311 | BlockCompletionFunc *cb, void *opaque, int type) |
| 312 | { |
| 313 | struct qemu_laiocb *laiocb; |
| 314 | off_t offset = sector_num * BDRV_SECTOR_SIZE; |
| 315 | int ret; |
| 316 | |
| 317 | laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque); |
| 318 | laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE; |
| 319 | laiocb->ctx = s; |
| 320 | laiocb->ret = -EINPROGRESS; |
| 321 | laiocb->is_read = (type == QEMU_AIO_READ); |
| 322 | laiocb->qiov = qiov; |
| 323 | |
| 324 | ret = laio_do_submit(fd, laiocb, offset, type); |
| 325 | if (ret < 0) { |
| 326 | qemu_aio_unref(laiocb); |
| 327 | return NULL; |
| 328 | } |
| 329 | |
| 330 | return &laiocb->common; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 331 | } |
| 332 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 333 | void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context) |
Stefan Hajnoczi | c2f3426 | 2014-05-08 16:34:47 +0200 | [diff] [blame] | 334 | { |
Fam Zheng | dca21ef | 2015-10-23 11:08:05 +0800 | [diff] [blame] | 335 | aio_set_event_notifier(old_context, &s->e, false, NULL); |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 336 | qemu_bh_delete(s->completion_bh); |
Stefan Hajnoczi | c2f3426 | 2014-05-08 16:34:47 +0200 | [diff] [blame] | 337 | } |
| 338 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 339 | void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context) |
Stefan Hajnoczi | c2f3426 | 2014-05-08 16:34:47 +0200 | [diff] [blame] | 340 | { |
Paolo Bonzini | 0187f5c | 2016-07-04 18:33:20 +0200 | [diff] [blame] | 341 | s->aio_context = new_context; |
Stefan Hajnoczi | 2cdff7f | 2014-08-04 16:56:33 +0100 | [diff] [blame] | 342 | s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s); |
Fam Zheng | dca21ef | 2015-10-23 11:08:05 +0800 | [diff] [blame] | 343 | aio_set_event_notifier(new_context, &s->e, false, |
| 344 | qemu_laio_completion_cb); |
Stefan Hajnoczi | c2f3426 | 2014-05-08 16:34:47 +0200 | [diff] [blame] | 345 | } |
| 346 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 347 | LinuxAioState *laio_init(void) |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 348 | { |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 349 | LinuxAioState *s; |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 350 | |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 351 | s = g_malloc0(sizeof(*s)); |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 352 | if (event_notifier_init(&s->e, false) < 0) { |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 353 | goto out_free_state; |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 354 | } |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 355 | |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 356 | if (io_setup(MAX_EVENTS, &s->ctx) != 0) { |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 357 | goto out_close_efd; |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 358 | } |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 359 | |
Ming Lei | 1b3abdc | 2014-07-04 18:04:34 +0800 | [diff] [blame] | 360 | ioq_init(&s->io_q); |
| 361 | |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 362 | return s; |
| 363 | |
| 364 | out_close_efd: |
Paolo Bonzini | c90caf2 | 2012-02-24 08:39:02 +0100 | [diff] [blame] | 365 | event_notifier_cleanup(&s->e); |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 366 | out_free_state: |
Anthony Liguori | 7267c09 | 2011-08-20 22:09:37 -0500 | [diff] [blame] | 367 | g_free(s); |
Christoph Hellwig | 5c6c3a6 | 2009-08-20 16:58:35 +0200 | [diff] [blame] | 368 | return NULL; |
| 369 | } |
Stefan Hajnoczi | abd269b | 2014-05-08 16:34:48 +0200 | [diff] [blame] | 370 | |
Paolo Bonzini | dd7f7ed | 2016-04-07 18:33:35 +0200 | [diff] [blame] | 371 | void laio_cleanup(LinuxAioState *s) |
Stefan Hajnoczi | abd269b | 2014-05-08 16:34:48 +0200 | [diff] [blame] | 372 | { |
Stefan Hajnoczi | abd269b | 2014-05-08 16:34:48 +0200 | [diff] [blame] | 373 | event_notifier_cleanup(&s->e); |
Gonglei | a1abf40 | 2014-07-12 11:43:37 +0800 | [diff] [blame] | 374 | |
| 375 | if (io_destroy(s->ctx) != 0) { |
| 376 | fprintf(stderr, "%s: destroy AIO context %p failed\n", |
| 377 | __func__, &s->ctx); |
| 378 | } |
Stefan Hajnoczi | abd269b | 2014-05-08 16:34:48 +0200 | [diff] [blame] | 379 | g_free(s); |
| 380 | } |