sheepdog: use per AIOCB dirty indexes for non overlapping requests In the commit 96b14ff85acf, requests for overlapping areas are serialized. However, it cannot handle a case of non overlapping requests. In such a case, min_dirty_data_idx and max_dirty_data_idx can be overwritten by the requests and invalid inode update can happen e.g. a case like create(1, 2) and create(3, 4) are issued in parallel. This patch lets SheepdogAIOCB have dirty data indexes instead of BDRVSheepdogState for avoiding the above situation. This patch also does trivial renaming for better description: overwrapping -> overlapping Cc: Teruaki Ishizaki <ishizaki.teruaki@lab.ntt.co.jp> Cc: Vasiliy Tolstov <v.tolstov@selfip.ru> Cc: Jeff Cody <jcody@redhat.com> Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp> Tested-by: Vasiliy Tolstov <v.tolstov@selfip.ru> Message-id: 1441076590-8015-2-git-send-email-mitake.hitoshi@lab.ntt.co.jp Signed-off-by: Jeff Cody <jcody@redhat.com>
diff --git a/block/sheepdog.c b/block/sheepdog.c index 255372e..08a09e9 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c
@@ -318,7 +318,7 @@ AIOCB_DISCARD_OBJ, }; -#define AIOCBOverwrapping(x, y) \ +#define AIOCBOverlapping(x, y) \ (!(x->max_affect_data_idx < y->min_affect_data_idx \ || y->max_affect_data_idx < x->min_affect_data_idx)) @@ -342,6 +342,15 @@ uint32_t min_affect_data_idx; uint32_t max_affect_data_idx; + /* + * The difference between affect_data_idx and dirty_data_idx: + * affect_data_idx represents range of index of all request types. + * dirty_data_idx represents range of index updated by COW requests. + * dirty_data_idx is used for updating an inode object. + */ + uint32_t min_dirty_data_idx; + uint32_t max_dirty_data_idx; + QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings; }; @@ -351,9 +360,6 @@ SheepdogInode inode; - uint32_t min_dirty_data_idx; - uint32_t max_dirty_data_idx; - char name[SD_MAX_VDI_LEN]; bool is_snapshot; uint32_t cache_flags; @@ -373,7 +379,7 @@ QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head; QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head; - CoQueue overwrapping_queue; + CoQueue overlapping_queue; QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head; } BDRVSheepdogState; @@ -561,6 +567,9 @@ acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE + acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size; + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; + return acb; } @@ -824,8 +833,8 @@ */ if (rsp.result == SD_RES_SUCCESS) { s->inode.data_vdi_id[idx] = s->inode.vdi_id; - s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx); - s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx); + acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx); + acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx); } } break; @@ -1471,13 +1480,11 @@ } memcpy(&s->inode, buf, sizeof(s->inode)); - s->min_dirty_data_idx = UINT32_MAX; - s->max_dirty_data_idx = 0; bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE; pstrcpy(s->name, sizeof(s->name), vdi); qemu_co_mutex_init(&s->lock); - qemu_co_queue_init(&s->overwrapping_queue); + qemu_co_queue_init(&s->overlapping_queue); qemu_opts_del(opts); g_free(buf); return 0; @@ -1989,16 +1996,16 @@ AIOReq *aio_req; uint32_t offset, data_len, mn, mx; - mn = s->min_dirty_data_idx; - mx = s->max_dirty_data_idx; + mn = acb->min_dirty_data_idx; + mx = acb->max_dirty_data_idx; if (mn <= mx) { /* we need to update the vdi object. */ offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) + mn * sizeof(s->inode.data_vdi_id[0]); data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]); - s->min_dirty_data_idx = UINT32_MAX; - s->max_dirty_data_idx = 0; + acb->min_dirty_data_idx = UINT32_MAX; + acb->max_dirty_data_idx = 0; iov.iov_base = &s->inode; iov.iov_len = sizeof(s->inode); @@ -2224,12 +2231,12 @@ return 1; } -static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) +static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb) { SheepdogAIOCB *cb; QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) { - if (AIOCBOverwrapping(aiocb, cb)) { + if (AIOCBOverlapping(aiocb, cb)) { return true; } } @@ -2258,15 +2265,15 @@ acb->aiocb_type = AIOCB_WRITE_UDATA; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2274,7 +2281,7 @@ qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2291,15 +2298,15 @@ acb->aio_done_func = sd_finish_aiocb; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2307,7 +2314,7 @@ qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; } @@ -2656,15 +2663,15 @@ acb->aio_done_func = sd_finish_aiocb; retry: - if (check_overwrapping_aiocb(s, acb)) { - qemu_co_queue_wait(&s->overwrapping_queue); + if (check_overlapping_aiocb(s, acb)) { + qemu_co_queue_wait(&s->overlapping_queue); goto retry; } ret = sd_co_rw_vector(acb); if (ret <= 0) { QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); qemu_aio_unref(acb); return ret; } @@ -2672,7 +2679,7 @@ qemu_coroutine_yield(); QLIST_REMOVE(acb, aiocb_siblings); - qemu_co_queue_restart_all(&s->overwrapping_queue); + qemu_co_queue_restart_all(&s->overlapping_queue); return acb->ret; }