sheepdog: use per AIOCB dirty indexes for non overlapping requests
In the commit 96b14ff85acf, requests for overlapping areas are
serialized. However, it cannot handle a case of non overlapping
requests. In such a case, min_dirty_data_idx and max_dirty_data_idx
can be overwritten by the requests and invalid inode update can
happen e.g. a case like create(1, 2) and create(3, 4) are issued in
parallel.
This patch lets SheepdogAIOCB have dirty data indexes instead of
BDRVSheepdogState for avoiding the above situation.
This patch also does trivial renaming for better description:
overwrapping -> overlapping
Cc: Teruaki Ishizaki <ishizaki.teruaki@lab.ntt.co.jp>
Cc: Vasiliy Tolstov <v.tolstov@selfip.ru>
Cc: Jeff Cody <jcody@redhat.com>
Signed-off-by: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
Tested-by: Vasiliy Tolstov <v.tolstov@selfip.ru>
Message-id: 1441076590-8015-2-git-send-email-mitake.hitoshi@lab.ntt.co.jp
Signed-off-by: Jeff Cody <jcody@redhat.com>
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 255372e..08a09e9 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -318,7 +318,7 @@
AIOCB_DISCARD_OBJ,
};
-#define AIOCBOverwrapping(x, y) \
+#define AIOCBOverlapping(x, y) \
(!(x->max_affect_data_idx < y->min_affect_data_idx \
|| y->max_affect_data_idx < x->min_affect_data_idx))
@@ -342,6 +342,15 @@
uint32_t min_affect_data_idx;
uint32_t max_affect_data_idx;
+ /*
+ * The difference between affect_data_idx and dirty_data_idx:
+ * affect_data_idx represents range of index of all request types.
+ * dirty_data_idx represents range of index updated by COW requests.
+ * dirty_data_idx is used for updating an inode object.
+ */
+ uint32_t min_dirty_data_idx;
+ uint32_t max_dirty_data_idx;
+
QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};
@@ -351,9 +360,6 @@
SheepdogInode inode;
- uint32_t min_dirty_data_idx;
- uint32_t max_dirty_data_idx;
-
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
uint32_t cache_flags;
@@ -373,7 +379,7 @@
QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
- CoQueue overwrapping_queue;
+ CoQueue overlapping_queue;
QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
} BDRVSheepdogState;
@@ -561,6 +567,9 @@
acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
+
return acb;
}
@@ -824,8 +833,8 @@
*/
if (rsp.result == SD_RES_SUCCESS) {
s->inode.data_vdi_id[idx] = s->inode.vdi_id;
- s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
- s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+ acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
+ acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
}
}
break;
@@ -1471,13 +1480,11 @@
}
memcpy(&s->inode, buf, sizeof(s->inode));
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
- qemu_co_queue_init(&s->overwrapping_queue);
+ qemu_co_queue_init(&s->overlapping_queue);
qemu_opts_del(opts);
g_free(buf);
return 0;
@@ -1989,16 +1996,16 @@
AIOReq *aio_req;
uint32_t offset, data_len, mn, mx;
- mn = s->min_dirty_data_idx;
- mx = s->max_dirty_data_idx;
+ mn = acb->min_dirty_data_idx;
+ mx = acb->max_dirty_data_idx;
if (mn <= mx) {
/* we need to update the vdi object. */
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
mn * sizeof(s->inode.data_vdi_id[0]);
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
@@ -2224,12 +2231,12 @@
return 1;
}
-static bool check_overwrapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
{
SheepdogAIOCB *cb;
QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
- if (AIOCBOverwrapping(aiocb, cb)) {
+ if (AIOCBOverlapping(aiocb, cb)) {
return true;
}
}
@@ -2258,15 +2265,15 @@
acb->aiocb_type = AIOCB_WRITE_UDATA;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2274,7 +2281,7 @@
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2291,15 +2298,15 @@
acb->aio_done_func = sd_finish_aiocb;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2307,7 +2314,7 @@
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2656,15 +2663,15 @@
acb->aio_done_func = sd_finish_aiocb;
retry:
- if (check_overwrapping_aiocb(s, acb)) {
- qemu_co_queue_wait(&s->overwrapping_queue);
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
goto retry;
}
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
@@ -2672,7 +2679,7 @@
qemu_coroutine_yield();
QLIST_REMOVE(acb, aiocb_siblings);
- qemu_co_queue_restart_all(&s->overwrapping_queue);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}