Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 1 | /* |
| 2 | * QEMU block layer thread pool |
| 3 | * |
| 4 | * Copyright IBM, Corp. 2008 |
| 5 | * Copyright Red Hat, Inc. 2012 |
| 6 | * |
| 7 | * Authors: |
| 8 | * Anthony Liguori <aliguori@us.ibm.com> |
| 9 | * Paolo Bonzini <pbonzini@redhat.com> |
| 10 | * |
| 11 | * This work is licensed under the terms of the GNU GPL, version 2. See |
| 12 | * the COPYING file in the top-level directory. |
| 13 | * |
| 14 | * Contributions after 2012-01-13 are licensed under the terms of the |
| 15 | * GNU GPL, version 2 or (at your option) any later version. |
| 16 | */ |
| 17 | #include "qemu-common.h" |
Paolo Bonzini | 1de7afc | 2012-12-17 18:20:00 +0100 | [diff] [blame^] | 18 | #include "qemu/queue.h" |
| 19 | #include "qemu/thread.h" |
| 20 | #include "qemu/osdep.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 21 | #include "block/coroutine.h" |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 22 | #include "trace.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 23 | #include "block/block_int.h" |
Paolo Bonzini | 1de7afc | 2012-12-17 18:20:00 +0100 | [diff] [blame^] | 24 | #include "qemu/event_notifier.h" |
Paolo Bonzini | 737e150 | 2012-12-17 18:19:44 +0100 | [diff] [blame] | 25 | #include "block/thread-pool.h" |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 26 | |
| 27 | static void do_spawn_thread(void); |
| 28 | |
| 29 | typedef struct ThreadPoolElement ThreadPoolElement; |
| 30 | |
| 31 | enum ThreadState { |
| 32 | THREAD_QUEUED, |
| 33 | THREAD_ACTIVE, |
| 34 | THREAD_DONE, |
| 35 | THREAD_CANCELED, |
| 36 | }; |
| 37 | |
| 38 | struct ThreadPoolElement { |
| 39 | BlockDriverAIOCB common; |
| 40 | ThreadPoolFunc *func; |
| 41 | void *arg; |
Paolo Bonzini | 19d092c | 2012-10-31 10:09:11 +0100 | [diff] [blame] | 42 | |
| 43 | /* Moving state out of THREAD_QUEUED is protected by lock. After |
| 44 | * that, only the worker thread can write to it. Reads and writes |
| 45 | * of state and ret are ordered with memory barriers. |
| 46 | */ |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 47 | enum ThreadState state; |
| 48 | int ret; |
| 49 | |
| 50 | /* Access to this list is protected by lock. */ |
| 51 | QTAILQ_ENTRY(ThreadPoolElement) reqs; |
| 52 | |
| 53 | /* Access to this list is protected by the global mutex. */ |
| 54 | QLIST_ENTRY(ThreadPoolElement) all; |
| 55 | }; |
| 56 | |
| 57 | static EventNotifier notifier; |
| 58 | static QemuMutex lock; |
| 59 | static QemuCond check_cancel; |
| 60 | static QemuSemaphore sem; |
| 61 | static int max_threads = 64; |
| 62 | static QEMUBH *new_thread_bh; |
| 63 | |
| 64 | /* The following variables are protected by the global mutex. */ |
| 65 | static QLIST_HEAD(, ThreadPoolElement) head; |
| 66 | |
| 67 | /* The following variables are protected by lock. */ |
| 68 | static QTAILQ_HEAD(, ThreadPoolElement) request_list; |
| 69 | static int cur_threads; |
| 70 | static int idle_threads; |
| 71 | static int new_threads; /* backlog of threads we need to create */ |
| 72 | static int pending_threads; /* threads created but not running yet */ |
| 73 | static int pending_cancellations; /* whether we need a cond_broadcast */ |
| 74 | |
| 75 | static void *worker_thread(void *unused) |
| 76 | { |
| 77 | qemu_mutex_lock(&lock); |
| 78 | pending_threads--; |
| 79 | do_spawn_thread(); |
| 80 | |
| 81 | while (1) { |
| 82 | ThreadPoolElement *req; |
| 83 | int ret; |
| 84 | |
| 85 | do { |
| 86 | idle_threads++; |
| 87 | qemu_mutex_unlock(&lock); |
| 88 | ret = qemu_sem_timedwait(&sem, 10000); |
| 89 | qemu_mutex_lock(&lock); |
| 90 | idle_threads--; |
| 91 | } while (ret == -1 && !QTAILQ_EMPTY(&request_list)); |
| 92 | if (ret == -1) { |
| 93 | break; |
| 94 | } |
| 95 | |
| 96 | req = QTAILQ_FIRST(&request_list); |
| 97 | QTAILQ_REMOVE(&request_list, req, reqs); |
| 98 | req->state = THREAD_ACTIVE; |
| 99 | qemu_mutex_unlock(&lock); |
| 100 | |
| 101 | ret = req->func(req->arg); |
| 102 | |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 103 | req->ret = ret; |
Paolo Bonzini | 19d092c | 2012-10-31 10:09:11 +0100 | [diff] [blame] | 104 | /* Write ret before state. */ |
| 105 | smp_wmb(); |
| 106 | req->state = THREAD_DONE; |
| 107 | |
| 108 | qemu_mutex_lock(&lock); |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 109 | if (pending_cancellations) { |
| 110 | qemu_cond_broadcast(&check_cancel); |
| 111 | } |
| 112 | |
| 113 | event_notifier_set(¬ifier); |
| 114 | } |
| 115 | |
| 116 | cur_threads--; |
| 117 | qemu_mutex_unlock(&lock); |
| 118 | return NULL; |
| 119 | } |
| 120 | |
| 121 | static void do_spawn_thread(void) |
| 122 | { |
| 123 | QemuThread t; |
| 124 | |
| 125 | /* Runs with lock taken. */ |
| 126 | if (!new_threads) { |
| 127 | return; |
| 128 | } |
| 129 | |
| 130 | new_threads--; |
| 131 | pending_threads++; |
| 132 | |
| 133 | qemu_thread_create(&t, worker_thread, NULL, QEMU_THREAD_DETACHED); |
| 134 | } |
| 135 | |
| 136 | static void spawn_thread_bh_fn(void *opaque) |
| 137 | { |
| 138 | qemu_mutex_lock(&lock); |
| 139 | do_spawn_thread(); |
| 140 | qemu_mutex_unlock(&lock); |
| 141 | } |
| 142 | |
| 143 | static void spawn_thread(void) |
| 144 | { |
| 145 | cur_threads++; |
| 146 | new_threads++; |
| 147 | /* If there are threads being created, they will spawn new workers, so |
| 148 | * we don't spend time creating many threads in a loop holding a mutex or |
| 149 | * starving the current vcpu. |
| 150 | * |
| 151 | * If there are no idle threads, ask the main thread to create one, so we |
| 152 | * inherit the correct affinity instead of the vcpu affinity. |
| 153 | */ |
| 154 | if (!pending_threads) { |
| 155 | qemu_bh_schedule(new_thread_bh); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | static void event_notifier_ready(EventNotifier *notifier) |
| 160 | { |
| 161 | ThreadPoolElement *elem, *next; |
| 162 | |
| 163 | event_notifier_test_and_clear(notifier); |
| 164 | restart: |
| 165 | QLIST_FOREACH_SAFE(elem, &head, all, next) { |
| 166 | if (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { |
| 167 | continue; |
| 168 | } |
| 169 | if (elem->state == THREAD_DONE) { |
| 170 | trace_thread_pool_complete(elem, elem->common.opaque, elem->ret); |
| 171 | } |
| 172 | if (elem->state == THREAD_DONE && elem->common.cb) { |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 173 | QLIST_REMOVE(elem, all); |
Paolo Bonzini | 19d092c | 2012-10-31 10:09:11 +0100 | [diff] [blame] | 174 | /* Read state before ret. */ |
| 175 | smp_rmb(); |
| 176 | elem->common.cb(elem->common.opaque, elem->ret); |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 177 | qemu_aio_release(elem); |
| 178 | goto restart; |
| 179 | } else { |
| 180 | /* remove the request */ |
| 181 | QLIST_REMOVE(elem, all); |
| 182 | qemu_aio_release(elem); |
| 183 | } |
| 184 | } |
| 185 | } |
| 186 | |
| 187 | static int thread_pool_active(EventNotifier *notifier) |
| 188 | { |
| 189 | return !QLIST_EMPTY(&head); |
| 190 | } |
| 191 | |
| 192 | static void thread_pool_cancel(BlockDriverAIOCB *acb) |
| 193 | { |
| 194 | ThreadPoolElement *elem = (ThreadPoolElement *)acb; |
| 195 | |
| 196 | trace_thread_pool_cancel(elem, elem->common.opaque); |
| 197 | |
| 198 | qemu_mutex_lock(&lock); |
| 199 | if (elem->state == THREAD_QUEUED && |
| 200 | /* No thread has yet started working on elem. we can try to "steal" |
| 201 | * the item from the worker if we can get a signal from the |
| 202 | * semaphore. Because this is non-blocking, we can do it with |
| 203 | * the lock taken and ensure that elem will remain THREAD_QUEUED. |
| 204 | */ |
| 205 | qemu_sem_timedwait(&sem, 0) == 0) { |
| 206 | QTAILQ_REMOVE(&request_list, elem, reqs); |
| 207 | elem->state = THREAD_CANCELED; |
| 208 | event_notifier_set(¬ifier); |
| 209 | } else { |
| 210 | pending_cancellations++; |
| 211 | while (elem->state != THREAD_CANCELED && elem->state != THREAD_DONE) { |
| 212 | qemu_cond_wait(&check_cancel, &lock); |
| 213 | } |
| 214 | pending_cancellations--; |
| 215 | } |
| 216 | qemu_mutex_unlock(&lock); |
| 217 | } |
| 218 | |
Stefan Hajnoczi | d7331be | 2012-10-31 16:34:37 +0100 | [diff] [blame] | 219 | static const AIOCBInfo thread_pool_aiocb_info = { |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 220 | .aiocb_size = sizeof(ThreadPoolElement), |
| 221 | .cancel = thread_pool_cancel, |
| 222 | }; |
| 223 | |
| 224 | BlockDriverAIOCB *thread_pool_submit_aio(ThreadPoolFunc *func, void *arg, |
| 225 | BlockDriverCompletionFunc *cb, void *opaque) |
| 226 | { |
| 227 | ThreadPoolElement *req; |
| 228 | |
Stefan Hajnoczi | d7331be | 2012-10-31 16:34:37 +0100 | [diff] [blame] | 229 | req = qemu_aio_get(&thread_pool_aiocb_info, NULL, cb, opaque); |
Paolo Bonzini | d354c7e | 2012-02-23 13:23:34 +0100 | [diff] [blame] | 230 | req->func = func; |
| 231 | req->arg = arg; |
| 232 | req->state = THREAD_QUEUED; |
| 233 | |
| 234 | QLIST_INSERT_HEAD(&head, req, all); |
| 235 | |
| 236 | trace_thread_pool_submit(req, arg); |
| 237 | |
| 238 | qemu_mutex_lock(&lock); |
| 239 | if (idle_threads == 0 && cur_threads < max_threads) { |
| 240 | spawn_thread(); |
| 241 | } |
| 242 | QTAILQ_INSERT_TAIL(&request_list, req, reqs); |
| 243 | qemu_mutex_unlock(&lock); |
| 244 | qemu_sem_post(&sem); |
| 245 | return &req->common; |
| 246 | } |
| 247 | |
| 248 | typedef struct ThreadPoolCo { |
| 249 | Coroutine *co; |
| 250 | int ret; |
| 251 | } ThreadPoolCo; |
| 252 | |
| 253 | static void thread_pool_co_cb(void *opaque, int ret) |
| 254 | { |
| 255 | ThreadPoolCo *co = opaque; |
| 256 | |
| 257 | co->ret = ret; |
| 258 | qemu_coroutine_enter(co->co, NULL); |
| 259 | } |
| 260 | |
| 261 | int coroutine_fn thread_pool_submit_co(ThreadPoolFunc *func, void *arg) |
| 262 | { |
| 263 | ThreadPoolCo tpc = { .co = qemu_coroutine_self(), .ret = -EINPROGRESS }; |
| 264 | assert(qemu_in_coroutine()); |
| 265 | thread_pool_submit_aio(func, arg, thread_pool_co_cb, &tpc); |
| 266 | qemu_coroutine_yield(); |
| 267 | return tpc.ret; |
| 268 | } |
| 269 | |
| 270 | void thread_pool_submit(ThreadPoolFunc *func, void *arg) |
| 271 | { |
| 272 | thread_pool_submit_aio(func, arg, NULL, NULL); |
| 273 | } |
| 274 | |
| 275 | static void thread_pool_init(void) |
| 276 | { |
| 277 | QLIST_INIT(&head); |
| 278 | event_notifier_init(¬ifier, false); |
| 279 | qemu_mutex_init(&lock); |
| 280 | qemu_cond_init(&check_cancel); |
| 281 | qemu_sem_init(&sem, 0); |
| 282 | qemu_aio_set_event_notifier(¬ifier, event_notifier_ready, |
| 283 | thread_pool_active); |
| 284 | |
| 285 | QTAILQ_INIT(&request_list); |
| 286 | new_thread_bh = qemu_bh_new(spawn_thread_bh_fn, NULL); |
| 287 | } |
| 288 | |
| 289 | block_init(thread_pool_init) |