|  | /* | 
|  | * QEMU Block driver for Archipelago | 
|  | * | 
|  | * Copyright (C) 2014 Chrysostomos Nanakos <cnanakos@grnet.gr> | 
|  | * | 
|  | * This work is licensed under the terms of the GNU GPL, version 2 or later. | 
|  | * See the COPYING file in the top-level directory. | 
|  | * | 
|  | */ | 
|  |  | 
|  | /* | 
|  | * VM Image on Archipelago volume is specified like this: | 
|  | * | 
|  | * file.driver=archipelago,file.volume=<volumename> | 
|  | * [,file.mport=<mapperd_port>[,file.vport=<vlmcd_port>] | 
|  | * [,file.segment=<segment_name>]] | 
|  | * | 
|  | * or | 
|  | * | 
|  | * file=archipelago:<volumename>[/mport=<mapperd_port>[:vport=<vlmcd_port>][: | 
|  | * segment=<segment_name>]] | 
|  | * | 
|  | * 'archipelago' is the protocol. | 
|  | * | 
|  | * 'mport' is the port number on which mapperd is listening. This is optional | 
|  | * and if not specified, QEMU will make Archipelago to use the default port. | 
|  | * | 
|  | * 'vport' is the port number on which vlmcd is listening. This is optional | 
|  | * and if not specified, QEMU will make Archipelago to use the default port. | 
|  | * | 
|  | * 'segment' is the name of the shared memory segment Archipelago stack | 
|  | * is using. This is optional and if not specified, QEMU will make Archipelago | 
|  | * to use the default value, 'archipelago'. | 
|  | * | 
|  | * Examples: | 
|  | * | 
|  | * file.driver=archipelago,file.volume=my_vm_volume | 
|  | * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123 | 
|  | * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123, | 
|  | *  file.vport=1234 | 
|  | * file.driver=archipelago,file.volume=my_vm_volume,file.mport=123, | 
|  | *  file.vport=1234,file.segment=my_segment | 
|  | * | 
|  | * or | 
|  | * | 
|  | * file=archipelago:my_vm_volume | 
|  | * file=archipelago:my_vm_volume/mport=123 | 
|  | * file=archipelago:my_vm_volume/mport=123:vport=1234 | 
|  | * file=archipelago:my_vm_volume/mport=123:vport=1234:segment=my_segment | 
|  | * | 
|  | */ | 
|  |  | 
|  | #include "qemu-common.h" | 
|  | #include "block/block_int.h" | 
|  | #include "qemu/error-report.h" | 
|  | #include "qemu/thread.h" | 
|  | #include "qapi/qmp/qint.h" | 
|  | #include "qapi/qmp/qstring.h" | 
|  | #include "qapi/qmp/qjson.h" | 
|  | #include "qemu/atomic.h" | 
|  |  | 
|  | #include <inttypes.h> | 
|  | #include <xseg/xseg.h> | 
|  | #include <xseg/protocol.h> | 
|  |  | 
|  | #define MAX_REQUEST_SIZE    524288 | 
|  |  | 
|  | #define ARCHIPELAGO_OPT_VOLUME      "volume" | 
|  | #define ARCHIPELAGO_OPT_SEGMENT     "segment" | 
|  | #define ARCHIPELAGO_OPT_MPORT       "mport" | 
|  | #define ARCHIPELAGO_OPT_VPORT       "vport" | 
|  | #define ARCHIPELAGO_DFL_MPORT       1001 | 
|  | #define ARCHIPELAGO_DFL_VPORT       501 | 
|  |  | 
|  | #define archipelagolog(fmt, ...) \ | 
|  | do {                         \ | 
|  | fprintf(stderr, "archipelago\t%-24s: " fmt, __func__, ##__VA_ARGS__); \ | 
|  | } while (0) | 
|  |  | 
|  | typedef enum { | 
|  | ARCHIP_OP_READ, | 
|  | ARCHIP_OP_WRITE, | 
|  | ARCHIP_OP_FLUSH, | 
|  | ARCHIP_OP_VOLINFO, | 
|  | ARCHIP_OP_TRUNCATE, | 
|  | } ARCHIPCmd; | 
|  |  | 
|  | typedef struct ArchipelagoAIOCB { | 
|  | BlockAIOCB common; | 
|  | QEMUBH *bh; | 
|  | struct BDRVArchipelagoState *s; | 
|  | QEMUIOVector *qiov; | 
|  | ARCHIPCmd cmd; | 
|  | int status; | 
|  | int64_t size; | 
|  | int64_t ret; | 
|  | } ArchipelagoAIOCB; | 
|  |  | 
|  | typedef struct BDRVArchipelagoState { | 
|  | ArchipelagoAIOCB *event_acb; | 
|  | char *volname; | 
|  | char *segment_name; | 
|  | uint64_t size; | 
|  | /* Archipelago specific */ | 
|  | struct xseg *xseg; | 
|  | struct xseg_port *port; | 
|  | xport srcport; | 
|  | xport sport; | 
|  | xport mportno; | 
|  | xport vportno; | 
|  | QemuMutex archip_mutex; | 
|  | QemuCond archip_cond; | 
|  | bool is_signaled; | 
|  | /* Request handler specific */ | 
|  | QemuThread request_th; | 
|  | QemuCond request_cond; | 
|  | QemuMutex request_mutex; | 
|  | bool th_is_signaled; | 
|  | bool stopping; | 
|  | } BDRVArchipelagoState; | 
|  |  | 
|  | typedef struct ArchipelagoSegmentedRequest { | 
|  | size_t count; | 
|  | size_t total; | 
|  | int ref; | 
|  | int failed; | 
|  | } ArchipelagoSegmentedRequest; | 
|  |  | 
|  | typedef struct AIORequestData { | 
|  | const char *volname; | 
|  | off_t offset; | 
|  | size_t size; | 
|  | uint64_t bufidx; | 
|  | int ret; | 
|  | int op; | 
|  | ArchipelagoAIOCB *aio_cb; | 
|  | ArchipelagoSegmentedRequest *segreq; | 
|  | } AIORequestData; | 
|  |  | 
|  | static void qemu_archipelago_complete_aio(void *opaque); | 
|  |  | 
|  | static void init_local_signal(struct xseg *xseg, xport sport, xport srcport) | 
|  | { | 
|  | if (xseg && (sport != srcport)) { | 
|  | xseg_init_local_signal(xseg, srcport); | 
|  | sport = srcport; | 
|  | } | 
|  | } | 
|  |  | 
|  | static void archipelago_finish_aiocb(AIORequestData *reqdata) | 
|  | { | 
|  | if (reqdata->aio_cb->ret != reqdata->segreq->total) { | 
|  | reqdata->aio_cb->ret = -EIO; | 
|  | } else if (reqdata->aio_cb->ret == reqdata->segreq->total) { | 
|  | reqdata->aio_cb->ret = 0; | 
|  | } | 
|  | reqdata->aio_cb->bh = aio_bh_new( | 
|  | bdrv_get_aio_context(reqdata->aio_cb->common.bs), | 
|  | qemu_archipelago_complete_aio, reqdata | 
|  | ); | 
|  | qemu_bh_schedule(reqdata->aio_cb->bh); | 
|  | } | 
|  |  | 
|  | static int wait_reply(struct xseg *xseg, xport srcport, struct xseg_port *port, | 
|  | struct xseg_request *expected_req) | 
|  | { | 
|  | struct xseg_request *req; | 
|  | xseg_prepare_wait(xseg, srcport); | 
|  | void *psd = xseg_get_signal_desc(xseg, port); | 
|  | while (1) { | 
|  | req = xseg_receive(xseg, srcport, X_NONBLOCK); | 
|  | if (req) { | 
|  | if (req != expected_req) { | 
|  | archipelagolog("Unknown received request\n"); | 
|  | xseg_put_request(xseg, req, srcport); | 
|  | } else if (!(req->state & XS_SERVED)) { | 
|  | return -1; | 
|  | } else { | 
|  | break; | 
|  | } | 
|  | } | 
|  | xseg_wait_signal(xseg, psd, 100000UL); | 
|  | } | 
|  | xseg_cancel_wait(xseg, srcport); | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void xseg_request_handler(void *state) | 
|  | { | 
|  | BDRVArchipelagoState *s = (BDRVArchipelagoState *) state; | 
|  | void *psd = xseg_get_signal_desc(s->xseg, s->port); | 
|  | qemu_mutex_lock(&s->request_mutex); | 
|  |  | 
|  | while (!s->stopping) { | 
|  | struct xseg_request *req; | 
|  | void *data; | 
|  | xseg_prepare_wait(s->xseg, s->srcport); | 
|  | req = xseg_receive(s->xseg, s->srcport, X_NONBLOCK); | 
|  | if (req) { | 
|  | AIORequestData *reqdata; | 
|  | ArchipelagoSegmentedRequest *segreq; | 
|  | xseg_get_req_data(s->xseg, req, (void **)&reqdata); | 
|  |  | 
|  | switch (reqdata->op) { | 
|  | case ARCHIP_OP_READ: | 
|  | data = xseg_get_data(s->xseg, req); | 
|  | segreq = reqdata->segreq; | 
|  | segreq->count += req->serviced; | 
|  |  | 
|  | qemu_iovec_from_buf(reqdata->aio_cb->qiov, reqdata->bufidx, | 
|  | data, | 
|  | req->serviced); | 
|  |  | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  |  | 
|  | if (atomic_fetch_dec(&segreq->ref) == 1) { | 
|  | if (!segreq->failed) { | 
|  | reqdata->aio_cb->ret = segreq->count; | 
|  | archipelago_finish_aiocb(reqdata); | 
|  | g_free(segreq); | 
|  | } else { | 
|  | g_free(segreq); | 
|  | g_free(reqdata); | 
|  | } | 
|  | } else { | 
|  | g_free(reqdata); | 
|  | } | 
|  | break; | 
|  | case ARCHIP_OP_WRITE: | 
|  | case ARCHIP_OP_FLUSH: | 
|  | segreq = reqdata->segreq; | 
|  | segreq->count += req->serviced; | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  |  | 
|  | if (atomic_fetch_dec(&segreq->ref) == 1) { | 
|  | if (!segreq->failed) { | 
|  | reqdata->aio_cb->ret = segreq->count; | 
|  | archipelago_finish_aiocb(reqdata); | 
|  | g_free(segreq); | 
|  | } else { | 
|  | g_free(segreq); | 
|  | g_free(reqdata); | 
|  | } | 
|  | } else { | 
|  | g_free(reqdata); | 
|  | } | 
|  | break; | 
|  | case ARCHIP_OP_VOLINFO: | 
|  | case ARCHIP_OP_TRUNCATE: | 
|  | s->is_signaled = true; | 
|  | qemu_cond_signal(&s->archip_cond); | 
|  | break; | 
|  | } | 
|  | } else { | 
|  | xseg_wait_signal(s->xseg, psd, 100000UL); | 
|  | } | 
|  | xseg_cancel_wait(s->xseg, s->srcport); | 
|  | } | 
|  |  | 
|  | s->th_is_signaled = true; | 
|  | qemu_cond_signal(&s->request_cond); | 
|  | qemu_mutex_unlock(&s->request_mutex); | 
|  | qemu_thread_exit(NULL); | 
|  | } | 
|  |  | 
|  | static int qemu_archipelago_xseg_init(BDRVArchipelagoState *s) | 
|  | { | 
|  | if (xseg_initialize()) { | 
|  | archipelagolog("Cannot initialize XSEG\n"); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | s->xseg = xseg_join("posix", s->segment_name, | 
|  | "posixfd", NULL); | 
|  | if (!s->xseg) { | 
|  | archipelagolog("Cannot join XSEG shared memory segment\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | s->port = xseg_bind_dynport(s->xseg); | 
|  | s->srcport = s->port->portno; | 
|  | init_local_signal(s->xseg, s->sport, s->srcport); | 
|  | return 0; | 
|  |  | 
|  | err_exit: | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static int qemu_archipelago_init(BDRVArchipelagoState *s) | 
|  | { | 
|  | int ret; | 
|  |  | 
|  | ret = qemu_archipelago_xseg_init(s); | 
|  | if (ret < 0) { | 
|  | error_report("Cannot initialize XSEG. Aborting..."); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | qemu_cond_init(&s->archip_cond); | 
|  | qemu_mutex_init(&s->archip_mutex); | 
|  | qemu_cond_init(&s->request_cond); | 
|  | qemu_mutex_init(&s->request_mutex); | 
|  | s->th_is_signaled = false; | 
|  | qemu_thread_create(&s->request_th, "xseg_io_th", | 
|  | (void *) xseg_request_handler, | 
|  | (void *) s, QEMU_THREAD_JOINABLE); | 
|  |  | 
|  | err_exit: | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void qemu_archipelago_complete_aio(void *opaque) | 
|  | { | 
|  | AIORequestData *reqdata = (AIORequestData *) opaque; | 
|  | ArchipelagoAIOCB *aio_cb = (ArchipelagoAIOCB *) reqdata->aio_cb; | 
|  |  | 
|  | qemu_bh_delete(aio_cb->bh); | 
|  | aio_cb->common.cb(aio_cb->common.opaque, aio_cb->ret); | 
|  | aio_cb->status = 0; | 
|  |  | 
|  | qemu_aio_unref(aio_cb); | 
|  | g_free(reqdata); | 
|  | } | 
|  |  | 
|  | static void xseg_find_port(char *pstr, const char *needle, xport *aport) | 
|  | { | 
|  | const char *a; | 
|  | char *endptr = NULL; | 
|  | unsigned long port; | 
|  | if (strstart(pstr, needle, &a)) { | 
|  | if (strlen(a) > 0) { | 
|  | port = strtoul(a, &endptr, 10); | 
|  | if (strlen(endptr)) { | 
|  | *aport = -2; | 
|  | return; | 
|  | } | 
|  | *aport = (xport) port; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static void xseg_find_segment(char *pstr, const char *needle, | 
|  | char **segment_name) | 
|  | { | 
|  | const char *a; | 
|  | if (strstart(pstr, needle, &a)) { | 
|  | if (strlen(a) > 0) { | 
|  | *segment_name = g_strdup(a); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | static void parse_filename_opts(const char *filename, Error **errp, | 
|  | char **volume, char **segment_name, | 
|  | xport *mport, xport *vport) | 
|  | { | 
|  | const char *start; | 
|  | char *tokens[4], *ds; | 
|  | int idx; | 
|  | xport lmport = NoPort, lvport = NoPort; | 
|  |  | 
|  | strstart(filename, "archipelago:", &start); | 
|  |  | 
|  | ds = g_strdup(start); | 
|  | tokens[0] = strtok(ds, "/"); | 
|  | tokens[1] = strtok(NULL, ":"); | 
|  | tokens[2] = strtok(NULL, ":"); | 
|  | tokens[3] = strtok(NULL, "\0"); | 
|  |  | 
|  | if (!strlen(tokens[0])) { | 
|  | error_setg(errp, "volume name must be specified first"); | 
|  | g_free(ds); | 
|  | return; | 
|  | } | 
|  |  | 
|  | for (idx = 1; idx < 4; idx++) { | 
|  | if (tokens[idx] != NULL) { | 
|  | if (strstart(tokens[idx], "mport=", NULL)) { | 
|  | xseg_find_port(tokens[idx], "mport=", &lmport); | 
|  | } | 
|  | if (strstart(tokens[idx], "vport=", NULL)) { | 
|  | xseg_find_port(tokens[idx], "vport=", &lvport); | 
|  | } | 
|  | if (strstart(tokens[idx], "segment=", NULL)) { | 
|  | xseg_find_segment(tokens[idx], "segment=", segment_name); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if ((lmport == -2) || (lvport == -2)) { | 
|  | error_setg(errp, "mport and/or vport must be set"); | 
|  | g_free(ds); | 
|  | return; | 
|  | } | 
|  | *volume = g_strdup(tokens[0]); | 
|  | *mport = lmport; | 
|  | *vport = lvport; | 
|  | g_free(ds); | 
|  | } | 
|  |  | 
|  | static void archipelago_parse_filename(const char *filename, QDict *options, | 
|  | Error **errp) | 
|  | { | 
|  | const char *start; | 
|  | char *volume = NULL, *segment_name = NULL; | 
|  | xport mport = NoPort, vport = NoPort; | 
|  |  | 
|  | if (qdict_haskey(options, ARCHIPELAGO_OPT_VOLUME) | 
|  | || qdict_haskey(options, ARCHIPELAGO_OPT_SEGMENT) | 
|  | || qdict_haskey(options, ARCHIPELAGO_OPT_MPORT) | 
|  | || qdict_haskey(options, ARCHIPELAGO_OPT_VPORT)) { | 
|  | error_setg(errp, "volume/mport/vport/segment and a file name may not" | 
|  | " be specified at the same time"); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (!strstart(filename, "archipelago:", &start)) { | 
|  | error_setg(errp, "File name must start with 'archipelago:'"); | 
|  | return; | 
|  | } | 
|  |  | 
|  | if (!strlen(start) || strstart(start, "/", NULL)) { | 
|  | error_setg(errp, "volume name must be specified"); | 
|  | return; | 
|  | } | 
|  |  | 
|  | parse_filename_opts(filename, errp, &volume, &segment_name, &mport, &vport); | 
|  |  | 
|  | if (volume) { | 
|  | qdict_put(options, ARCHIPELAGO_OPT_VOLUME, qstring_from_str(volume)); | 
|  | g_free(volume); | 
|  | } | 
|  | if (segment_name) { | 
|  | qdict_put(options, ARCHIPELAGO_OPT_SEGMENT, | 
|  | qstring_from_str(segment_name)); | 
|  | g_free(segment_name); | 
|  | } | 
|  | if (mport != NoPort) { | 
|  | qdict_put(options, ARCHIPELAGO_OPT_MPORT, qint_from_int(mport)); | 
|  | } | 
|  | if (vport != NoPort) { | 
|  | qdict_put(options, ARCHIPELAGO_OPT_VPORT, qint_from_int(vport)); | 
|  | } | 
|  | } | 
|  |  | 
|  | static QemuOptsList archipelago_runtime_opts = { | 
|  | .name = "archipelago", | 
|  | .head = QTAILQ_HEAD_INITIALIZER(archipelago_runtime_opts.head), | 
|  | .desc = { | 
|  | { | 
|  | .name = ARCHIPELAGO_OPT_VOLUME, | 
|  | .type = QEMU_OPT_STRING, | 
|  | .help = "Name of the volume image", | 
|  | }, | 
|  | { | 
|  | .name = ARCHIPELAGO_OPT_SEGMENT, | 
|  | .type = QEMU_OPT_STRING, | 
|  | .help = "Name of the Archipelago shared memory segment", | 
|  | }, | 
|  | { | 
|  | .name = ARCHIPELAGO_OPT_MPORT, | 
|  | .type = QEMU_OPT_NUMBER, | 
|  | .help = "Archipelago mapperd port number" | 
|  | }, | 
|  | { | 
|  | .name = ARCHIPELAGO_OPT_VPORT, | 
|  | .type = QEMU_OPT_NUMBER, | 
|  | .help = "Archipelago vlmcd port number" | 
|  |  | 
|  | }, | 
|  | { /* end of list */ } | 
|  | }, | 
|  | }; | 
|  |  | 
|  | static int qemu_archipelago_open(BlockDriverState *bs, | 
|  | QDict *options, | 
|  | int bdrv_flags, | 
|  | Error **errp) | 
|  | { | 
|  | int ret = 0; | 
|  | const char *volume, *segment_name; | 
|  | QemuOpts *opts; | 
|  | Error *local_err = NULL; | 
|  | BDRVArchipelagoState *s = bs->opaque; | 
|  |  | 
|  | opts = qemu_opts_create(&archipelago_runtime_opts, NULL, 0, &error_abort); | 
|  | qemu_opts_absorb_qdict(opts, options, &local_err); | 
|  | if (local_err) { | 
|  | error_propagate(errp, local_err); | 
|  | ret = -EINVAL; | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | s->mportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_MPORT, | 
|  | ARCHIPELAGO_DFL_MPORT); | 
|  | s->vportno = qemu_opt_get_number(opts, ARCHIPELAGO_OPT_VPORT, | 
|  | ARCHIPELAGO_DFL_VPORT); | 
|  |  | 
|  | segment_name = qemu_opt_get(opts, ARCHIPELAGO_OPT_SEGMENT); | 
|  | if (segment_name == NULL) { | 
|  | s->segment_name = g_strdup("archipelago"); | 
|  | } else { | 
|  | s->segment_name = g_strdup(segment_name); | 
|  | } | 
|  |  | 
|  | volume = qemu_opt_get(opts, ARCHIPELAGO_OPT_VOLUME); | 
|  | if (volume == NULL) { | 
|  | error_setg(errp, "archipelago block driver requires the 'volume'" | 
|  | " option"); | 
|  | ret = -EINVAL; | 
|  | goto err_exit; | 
|  | } | 
|  | s->volname = g_strdup(volume); | 
|  |  | 
|  | /* Initialize XSEG, join shared memory segment */ | 
|  | ret = qemu_archipelago_init(s); | 
|  | if (ret < 0) { | 
|  | error_setg(errp, "cannot initialize XSEG and join shared " | 
|  | "memory segment"); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | qemu_opts_del(opts); | 
|  | return 0; | 
|  |  | 
|  | err_exit: | 
|  | g_free(s->volname); | 
|  | g_free(s->segment_name); | 
|  | qemu_opts_del(opts); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static void qemu_archipelago_close(BlockDriverState *bs) | 
|  | { | 
|  | int r, targetlen; | 
|  | char *target; | 
|  | struct xseg_request *req; | 
|  | BDRVArchipelagoState *s = bs->opaque; | 
|  |  | 
|  | s->stopping = true; | 
|  |  | 
|  | qemu_mutex_lock(&s->request_mutex); | 
|  | while (!s->th_is_signaled) { | 
|  | qemu_cond_wait(&s->request_cond, | 
|  | &s->request_mutex); | 
|  | } | 
|  | qemu_mutex_unlock(&s->request_mutex); | 
|  | qemu_thread_join(&s->request_th); | 
|  | qemu_cond_destroy(&s->request_cond); | 
|  | qemu_mutex_destroy(&s->request_mutex); | 
|  |  | 
|  | qemu_cond_destroy(&s->archip_cond); | 
|  | qemu_mutex_destroy(&s->archip_mutex); | 
|  |  | 
|  | targetlen = strlen(s->volname); | 
|  | req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC); | 
|  | if (!req) { | 
|  | archipelagolog("Cannot get XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | r = xseg_prep_request(s->xseg, req, targetlen, 0); | 
|  | if (r < 0) { | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | archipelagolog("Cannot prepare XSEG close request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | target = xseg_get_target(s->xseg, req); | 
|  | memcpy(target, s->volname, targetlen); | 
|  | req->size = req->datalen; | 
|  | req->offset = 0; | 
|  | req->op = X_CLOSE; | 
|  |  | 
|  | xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); | 
|  | if (p == NoPort) { | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | archipelagolog("Cannot submit XSEG close request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | xseg_signal(s->xseg, p); | 
|  | wait_reply(s->xseg, s->srcport, s->port, req); | 
|  |  | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  |  | 
|  | err_exit: | 
|  | g_free(s->volname); | 
|  | g_free(s->segment_name); | 
|  | xseg_quit_local_signal(s->xseg, s->srcport); | 
|  | xseg_leave_dynport(s->xseg, s->port); | 
|  | xseg_leave(s->xseg); | 
|  | } | 
|  |  | 
|  | static int qemu_archipelago_create_volume(Error **errp, const char *volname, | 
|  | char *segment_name, | 
|  | uint64_t size, xport mportno, | 
|  | xport vportno) | 
|  | { | 
|  | int ret, targetlen; | 
|  | struct xseg *xseg = NULL; | 
|  | struct xseg_request *req; | 
|  | struct xseg_request_clone *xclone; | 
|  | struct xseg_port *port; | 
|  | xport srcport = NoPort, sport = NoPort; | 
|  | char *target; | 
|  |  | 
|  | /* Try default values if none has been set */ | 
|  | if (mportno == (xport) -1) { | 
|  | mportno = ARCHIPELAGO_DFL_MPORT; | 
|  | } | 
|  |  | 
|  | if (vportno == (xport) -1) { | 
|  | vportno = ARCHIPELAGO_DFL_VPORT; | 
|  | } | 
|  |  | 
|  | if (xseg_initialize()) { | 
|  | error_setg(errp, "Cannot initialize XSEG"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | xseg = xseg_join("posix", segment_name, | 
|  | "posixfd", NULL); | 
|  |  | 
|  | if (!xseg) { | 
|  | error_setg(errp, "Cannot join XSEG shared memory segment"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | port = xseg_bind_dynport(xseg); | 
|  | srcport = port->portno; | 
|  | init_local_signal(xseg, sport, srcport); | 
|  |  | 
|  | req = xseg_get_request(xseg, srcport, mportno, X_ALLOC); | 
|  | if (!req) { | 
|  | error_setg(errp, "Cannot get XSEG request"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | targetlen = strlen(volname); | 
|  | ret = xseg_prep_request(xseg, req, targetlen, | 
|  | sizeof(struct xseg_request_clone)); | 
|  | if (ret < 0) { | 
|  | error_setg(errp, "Cannot prepare XSEG request"); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | target = xseg_get_target(xseg, req); | 
|  | if (!target) { | 
|  | error_setg(errp, "Cannot get XSEG target."); | 
|  | goto err_exit; | 
|  | } | 
|  | memcpy(target, volname, targetlen); | 
|  | xclone = (struct xseg_request_clone *) xseg_get_data(xseg, req); | 
|  | memset(xclone->target, 0 , XSEG_MAX_TARGETLEN); | 
|  | xclone->targetlen = 0; | 
|  | xclone->size = size; | 
|  | req->offset = 0; | 
|  | req->size = req->datalen; | 
|  | req->op = X_CLONE; | 
|  |  | 
|  | xport p = xseg_submit(xseg, req, srcport, X_ALLOC); | 
|  | if (p == NoPort) { | 
|  | error_setg(errp, "Could not submit XSEG request"); | 
|  | goto err_exit; | 
|  | } | 
|  | xseg_signal(xseg, p); | 
|  |  | 
|  | ret = wait_reply(xseg, srcport, port, req); | 
|  | if (ret < 0) { | 
|  | error_setg(errp, "wait_reply() error."); | 
|  | } | 
|  |  | 
|  | xseg_put_request(xseg, req, srcport); | 
|  | xseg_quit_local_signal(xseg, srcport); | 
|  | xseg_leave_dynport(xseg, port); | 
|  | xseg_leave(xseg); | 
|  | return ret; | 
|  |  | 
|  | err_exit: | 
|  | xseg_put_request(xseg, req, srcport); | 
|  | xseg_quit_local_signal(xseg, srcport); | 
|  | xseg_leave_dynport(xseg, port); | 
|  | xseg_leave(xseg); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | static int qemu_archipelago_create(const char *filename, | 
|  | QemuOpts *options, | 
|  | Error **errp) | 
|  | { | 
|  | int ret = 0; | 
|  | uint64_t total_size = 0; | 
|  | char *volname = NULL, *segment_name = NULL; | 
|  | const char *start; | 
|  | xport mport = NoPort, vport = NoPort; | 
|  |  | 
|  | if (!strstart(filename, "archipelago:", &start)) { | 
|  | error_setg(errp, "File name must start with 'archipelago:'"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | if (!strlen(start) || strstart(start, "/", NULL)) { | 
|  | error_setg(errp, "volume name must be specified"); | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | parse_filename_opts(filename, errp, &volname, &segment_name, &mport, | 
|  | &vport); | 
|  | total_size = ROUND_UP(qemu_opt_get_size_del(options, BLOCK_OPT_SIZE, 0), | 
|  | BDRV_SECTOR_SIZE); | 
|  |  | 
|  | if (segment_name == NULL) { | 
|  | segment_name = g_strdup("archipelago"); | 
|  | } | 
|  |  | 
|  | /* Create an Archipelago volume */ | 
|  | ret = qemu_archipelago_create_volume(errp, volname, segment_name, | 
|  | total_size, mport, | 
|  | vport); | 
|  |  | 
|  | g_free(volname); | 
|  | g_free(segment_name); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static const AIOCBInfo archipelago_aiocb_info = { | 
|  | .aiocb_size = sizeof(ArchipelagoAIOCB), | 
|  | }; | 
|  |  | 
|  | static int archipelago_submit_request(BDRVArchipelagoState *s, | 
|  | uint64_t bufidx, | 
|  | size_t count, | 
|  | off_t offset, | 
|  | ArchipelagoAIOCB *aio_cb, | 
|  | ArchipelagoSegmentedRequest *segreq, | 
|  | int op) | 
|  | { | 
|  | int ret, targetlen; | 
|  | char *target; | 
|  | void *data = NULL; | 
|  | struct xseg_request *req; | 
|  | AIORequestData *reqdata = g_new(AIORequestData, 1); | 
|  |  | 
|  | targetlen = strlen(s->volname); | 
|  | req = xseg_get_request(s->xseg, s->srcport, s->vportno, X_ALLOC); | 
|  | if (!req) { | 
|  | archipelagolog("Cannot get XSEG request\n"); | 
|  | goto err_exit2; | 
|  | } | 
|  | ret = xseg_prep_request(s->xseg, req, targetlen, count); | 
|  | if (ret < 0) { | 
|  | archipelagolog("Cannot prepare XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | target = xseg_get_target(s->xseg, req); | 
|  | if (!target) { | 
|  | archipelagolog("Cannot get XSEG target\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | memcpy(target, s->volname, targetlen); | 
|  | req->size = count; | 
|  | req->offset = offset; | 
|  |  | 
|  | switch (op) { | 
|  | case ARCHIP_OP_READ: | 
|  | req->op = X_READ; | 
|  | break; | 
|  | case ARCHIP_OP_WRITE: | 
|  | req->op = X_WRITE; | 
|  | break; | 
|  | case ARCHIP_OP_FLUSH: | 
|  | req->op = X_FLUSH; | 
|  | break; | 
|  | } | 
|  | reqdata->volname = s->volname; | 
|  | reqdata->offset = offset; | 
|  | reqdata->size = count; | 
|  | reqdata->bufidx = bufidx; | 
|  | reqdata->aio_cb = aio_cb; | 
|  | reqdata->segreq = segreq; | 
|  | reqdata->op = op; | 
|  |  | 
|  | xseg_set_req_data(s->xseg, req, reqdata); | 
|  | if (op == ARCHIP_OP_WRITE) { | 
|  | data = xseg_get_data(s->xseg, req); | 
|  | if (!data) { | 
|  | archipelagolog("Cannot get XSEG data\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | qemu_iovec_to_buf(aio_cb->qiov, bufidx, data, count); | 
|  | } | 
|  |  | 
|  | xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); | 
|  | if (p == NoPort) { | 
|  | archipelagolog("Could not submit XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | xseg_signal(s->xseg, p); | 
|  | return 0; | 
|  |  | 
|  | err_exit: | 
|  | g_free(reqdata); | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | return -EIO; | 
|  | err_exit2: | 
|  | g_free(reqdata); | 
|  | return -EIO; | 
|  | } | 
|  |  | 
|  | static int archipelago_aio_segmented_rw(BDRVArchipelagoState *s, | 
|  | size_t count, | 
|  | off_t offset, | 
|  | ArchipelagoAIOCB *aio_cb, | 
|  | int op) | 
|  | { | 
|  | int ret, segments_nr; | 
|  | size_t pos = 0; | 
|  | ArchipelagoSegmentedRequest *segreq; | 
|  |  | 
|  | segreq = g_new0(ArchipelagoSegmentedRequest, 1); | 
|  |  | 
|  | if (op == ARCHIP_OP_FLUSH) { | 
|  | segments_nr = 1; | 
|  | } else { | 
|  | segments_nr = (int)(count / MAX_REQUEST_SIZE) + \ | 
|  | ((count % MAX_REQUEST_SIZE) ? 1 : 0); | 
|  | } | 
|  | segreq->total = count; | 
|  | atomic_mb_set(&segreq->ref, segments_nr); | 
|  |  | 
|  | while (segments_nr > 1) { | 
|  | ret = archipelago_submit_request(s, pos, | 
|  | MAX_REQUEST_SIZE, | 
|  | offset + pos, | 
|  | aio_cb, segreq, op); | 
|  |  | 
|  | if (ret < 0) { | 
|  | goto err_exit; | 
|  | } | 
|  | count -= MAX_REQUEST_SIZE; | 
|  | pos += MAX_REQUEST_SIZE; | 
|  | segments_nr--; | 
|  | } | 
|  | ret = archipelago_submit_request(s, pos, count, offset + pos, | 
|  | aio_cb, segreq, op); | 
|  |  | 
|  | if (ret < 0) { | 
|  | goto err_exit; | 
|  | } | 
|  | return 0; | 
|  |  | 
|  | err_exit: | 
|  | segreq->failed = 1; | 
|  | if (atomic_fetch_sub(&segreq->ref, segments_nr) == segments_nr) { | 
|  | g_free(segreq); | 
|  | } | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static BlockAIOCB *qemu_archipelago_aio_rw(BlockDriverState *bs, | 
|  | int64_t sector_num, | 
|  | QEMUIOVector *qiov, | 
|  | int nb_sectors, | 
|  | BlockCompletionFunc *cb, | 
|  | void *opaque, | 
|  | int op) | 
|  | { | 
|  | ArchipelagoAIOCB *aio_cb; | 
|  | BDRVArchipelagoState *s = bs->opaque; | 
|  | int64_t size, off; | 
|  | int ret; | 
|  |  | 
|  | aio_cb = qemu_aio_get(&archipelago_aiocb_info, bs, cb, opaque); | 
|  | aio_cb->cmd = op; | 
|  | aio_cb->qiov = qiov; | 
|  |  | 
|  | aio_cb->ret = 0; | 
|  | aio_cb->s = s; | 
|  | aio_cb->status = -EINPROGRESS; | 
|  |  | 
|  | off = sector_num * BDRV_SECTOR_SIZE; | 
|  | size = nb_sectors * BDRV_SECTOR_SIZE; | 
|  | aio_cb->size = size; | 
|  |  | 
|  | ret = archipelago_aio_segmented_rw(s, size, off, | 
|  | aio_cb, op); | 
|  | if (ret < 0) { | 
|  | goto err_exit; | 
|  | } | 
|  | return &aio_cb->common; | 
|  |  | 
|  | err_exit: | 
|  | error_report("qemu_archipelago_aio_rw(): I/O Error"); | 
|  | qemu_aio_unref(aio_cb); | 
|  | return NULL; | 
|  | } | 
|  |  | 
|  | static BlockAIOCB *qemu_archipelago_aio_readv(BlockDriverState *bs, | 
|  | int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | 
|  | BlockCompletionFunc *cb, void *opaque) | 
|  | { | 
|  | return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb, | 
|  | opaque, ARCHIP_OP_READ); | 
|  | } | 
|  |  | 
|  | static BlockAIOCB *qemu_archipelago_aio_writev(BlockDriverState *bs, | 
|  | int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, | 
|  | BlockCompletionFunc *cb, void *opaque) | 
|  | { | 
|  | return qemu_archipelago_aio_rw(bs, sector_num, qiov, nb_sectors, cb, | 
|  | opaque, ARCHIP_OP_WRITE); | 
|  | } | 
|  |  | 
|  | static int64_t archipelago_volume_info(BDRVArchipelagoState *s) | 
|  | { | 
|  | uint64_t size; | 
|  | int ret, targetlen; | 
|  | struct xseg_request *req; | 
|  | struct xseg_reply_info *xinfo; | 
|  | AIORequestData *reqdata = g_new(AIORequestData, 1); | 
|  |  | 
|  | const char *volname = s->volname; | 
|  | targetlen = strlen(volname); | 
|  | req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC); | 
|  | if (!req) { | 
|  | archipelagolog("Cannot get XSEG request\n"); | 
|  | goto err_exit2; | 
|  | } | 
|  | ret = xseg_prep_request(s->xseg, req, targetlen, | 
|  | sizeof(struct xseg_reply_info)); | 
|  | if (ret < 0) { | 
|  | archipelagolog("Cannot prepare XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | char *target = xseg_get_target(s->xseg, req); | 
|  | if (!target) { | 
|  | archipelagolog("Cannot get XSEG target\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | memcpy(target, volname, targetlen); | 
|  | req->size = req->datalen; | 
|  | req->offset = 0; | 
|  | req->op = X_INFO; | 
|  |  | 
|  | reqdata->op = ARCHIP_OP_VOLINFO; | 
|  | reqdata->volname = volname; | 
|  | xseg_set_req_data(s->xseg, req, reqdata); | 
|  |  | 
|  | xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); | 
|  | if (p == NoPort) { | 
|  | archipelagolog("Cannot submit XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | xseg_signal(s->xseg, p); | 
|  | qemu_mutex_lock(&s->archip_mutex); | 
|  | while (!s->is_signaled) { | 
|  | qemu_cond_wait(&s->archip_cond, &s->archip_mutex); | 
|  | } | 
|  | s->is_signaled = false; | 
|  | qemu_mutex_unlock(&s->archip_mutex); | 
|  |  | 
|  | xinfo = (struct xseg_reply_info *) xseg_get_data(s->xseg, req); | 
|  | size = xinfo->size; | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | g_free(reqdata); | 
|  | s->size = size; | 
|  | return size; | 
|  |  | 
|  | err_exit: | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | err_exit2: | 
|  | g_free(reqdata); | 
|  | return -EIO; | 
|  | } | 
|  |  | 
|  | static int64_t qemu_archipelago_getlength(BlockDriverState *bs) | 
|  | { | 
|  | int64_t ret; | 
|  | BDRVArchipelagoState *s = bs->opaque; | 
|  |  | 
|  | ret = archipelago_volume_info(s); | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset) | 
|  | { | 
|  | int ret, targetlen; | 
|  | struct xseg_request *req; | 
|  | BDRVArchipelagoState *s = bs->opaque; | 
|  | AIORequestData *reqdata = g_new(AIORequestData, 1); | 
|  |  | 
|  | const char *volname = s->volname; | 
|  | targetlen = strlen(volname); | 
|  | req = xseg_get_request(s->xseg, s->srcport, s->mportno, X_ALLOC); | 
|  | if (!req) { | 
|  | archipelagolog("Cannot get XSEG request\n"); | 
|  | goto err_exit2; | 
|  | } | 
|  |  | 
|  | ret = xseg_prep_request(s->xseg, req, targetlen, 0); | 
|  | if (ret < 0) { | 
|  | archipelagolog("Cannot prepare XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | char *target = xseg_get_target(s->xseg, req); | 
|  | if (!target) { | 
|  | archipelagolog("Cannot get XSEG target\n"); | 
|  | goto err_exit; | 
|  | } | 
|  | memcpy(target, volname, targetlen); | 
|  | req->offset = offset; | 
|  | req->op = X_TRUNCATE; | 
|  |  | 
|  | reqdata->op = ARCHIP_OP_TRUNCATE; | 
|  | reqdata->volname = volname; | 
|  |  | 
|  | xseg_set_req_data(s->xseg, req, reqdata); | 
|  |  | 
|  | xport p = xseg_submit(s->xseg, req, s->srcport, X_ALLOC); | 
|  | if (p == NoPort) { | 
|  | archipelagolog("Cannot submit XSEG request\n"); | 
|  | goto err_exit; | 
|  | } | 
|  |  | 
|  | xseg_signal(s->xseg, p); | 
|  | qemu_mutex_lock(&s->archip_mutex); | 
|  | while (!s->is_signaled) { | 
|  | qemu_cond_wait(&s->archip_cond, &s->archip_mutex); | 
|  | } | 
|  | s->is_signaled = false; | 
|  | qemu_mutex_unlock(&s->archip_mutex); | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | g_free(reqdata); | 
|  | return 0; | 
|  |  | 
|  | err_exit: | 
|  | xseg_put_request(s->xseg, req, s->srcport); | 
|  | err_exit2: | 
|  | g_free(reqdata); | 
|  | return -EIO; | 
|  | } | 
|  |  | 
|  | static QemuOptsList qemu_archipelago_create_opts = { | 
|  | .name = "archipelago-create-opts", | 
|  | .head = QTAILQ_HEAD_INITIALIZER(qemu_archipelago_create_opts.head), | 
|  | .desc = { | 
|  | { | 
|  | .name = BLOCK_OPT_SIZE, | 
|  | .type = QEMU_OPT_SIZE, | 
|  | .help = "Virtual disk size" | 
|  | }, | 
|  | { /* end of list */ } | 
|  | } | 
|  | }; | 
|  |  | 
|  | static BlockAIOCB *qemu_archipelago_aio_flush(BlockDriverState *bs, | 
|  | BlockCompletionFunc *cb, void *opaque) | 
|  | { | 
|  | return qemu_archipelago_aio_rw(bs, 0, NULL, 0, cb, opaque, | 
|  | ARCHIP_OP_FLUSH); | 
|  | } | 
|  |  | 
|  | static BlockDriver bdrv_archipelago = { | 
|  | .format_name         = "archipelago", | 
|  | .protocol_name       = "archipelago", | 
|  | .instance_size       = sizeof(BDRVArchipelagoState), | 
|  | .bdrv_parse_filename = archipelago_parse_filename, | 
|  | .bdrv_file_open      = qemu_archipelago_open, | 
|  | .bdrv_close          = qemu_archipelago_close, | 
|  | .bdrv_create         = qemu_archipelago_create, | 
|  | .bdrv_getlength      = qemu_archipelago_getlength, | 
|  | .bdrv_truncate       = qemu_archipelago_truncate, | 
|  | .bdrv_aio_readv      = qemu_archipelago_aio_readv, | 
|  | .bdrv_aio_writev     = qemu_archipelago_aio_writev, | 
|  | .bdrv_aio_flush      = qemu_archipelago_aio_flush, | 
|  | .bdrv_has_zero_init  = bdrv_has_zero_init_1, | 
|  | .create_opts         = &qemu_archipelago_create_opts, | 
|  | }; | 
|  |  | 
|  | static void bdrv_archipelago_init(void) | 
|  | { | 
|  | bdrv_register(&bdrv_archipelago); | 
|  | } | 
|  |  | 
|  | block_init(bdrv_archipelago_init); |