Merge remote-tracking branch 'remotes/ehabkost/tags/x86-pull-request' into staging
X86 queue, 2015-07-07
Patch "target-i386: emulate CPUID level of real hardware" was removed after the
2015-07-03 pull request.
# gpg: Signature made Tue Jul 7 15:46:23 2015 BST using RSA key ID 984DC5A6
# gpg: Good signature from "Eduardo Habkost <ehabkost@redhat.com>"
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg: It is not certain that the signature belongs to the owner.
# Primary key fingerprint: 5A32 2FD5 ABC4 D3DB ACCF D1AA 2807 936F 984D C5A6
* remotes/ehabkost/tags/x86-pull-request:
target-i386: avoid overflow in the tsc-frequency property
i386: Introduce ARAT CPU feature
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
diff --git a/block.c b/block.c
index 7e130cc..5e80336 100644
--- a/block.c
+++ b/block.c
@@ -1271,7 +1271,7 @@
QemuOpts *opts = NULL;
QDict *snapshot_options;
BlockDriverState *bs_snapshot;
- Error *local_err;
+ Error *local_err = NULL;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1841,9 +1841,9 @@
if (bs->job) {
block_job_cancel_sync(bs->job);
}
- bdrv_drain_all(); /* complete I/O */
+ bdrv_drain(bs); /* complete I/O */
bdrv_flush(bs);
- bdrv_drain_all(); /* in case flush left pending I/O */
+ bdrv_drain(bs); /* in case flush left pending I/O */
notifier_list_notify(&bs->close_notifiers, bs);
if (bs->drv) {
@@ -3906,7 +3906,7 @@
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
{
- bdrv_drain_all(); /* ensure there are no in-flight requests */
+ bdrv_drain(bs); /* ensure there are no in-flight requests */
bdrv_detach_aio_context(bs);
diff --git a/block/io.c b/block/io.c
index 305e0d9..d4bc83b 100644
--- a/block/io.c
+++ b/block/io.c
@@ -236,12 +236,12 @@
/*
* Wait for pending requests to complete on a single BlockDriverState subtree
*
- * See the warning in bdrv_drain_all(). This function can only be called if
- * you are sure nothing can generate I/O because you have op blockers
- * installed.
- *
* Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
* AioContext.
+ *
+ * Only this BlockDriverState's AioContext is run, so in-flight requests must
+ * not depend on events in other AioContexts. In that case, use
+ * bdrv_drain_all() instead.
*/
void bdrv_drain(BlockDriverState *bs)
{
@@ -260,12 +260,6 @@
*
* This function does not flush data to disk, use bdrv_flush_all() for that
* after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete. Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
*/
void bdrv_drain_all(void)
{
@@ -288,6 +282,12 @@
}
}
+ /* Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a
+ * coroutine can submit an I/O request to another device in response to
+ * request completion. Therefore we must keep looping until there was no
+ * more activity rather than simply draining each device independently.
+ */
while (busy) {
busy = false;
diff --git a/block/mirror.c b/block/mirror.c
index 8888cea..d409337 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -708,6 +708,8 @@
s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
+ g_free(s->replaces);
+ block_job_release(bs);
return;
}
bdrv_set_enable_write_cache(s->target, true);
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index ed92a09..53b8afc 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -281,9 +281,6 @@
i = min_lru_index;
trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
c == s->l2_table_cache, i);
- if (i < 0) {
- return i;
- }
ret = qcow2_cache_entry_flush(bs, c, i);
if (ret < 0) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index cbe6574..855febe 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -2430,7 +2430,8 @@
struct stat st;
if (strstart(filename, "/dev/fd", NULL) &&
- !strstart(filename, "/dev/fdset/", NULL)) {
+ !strstart(filename, "/dev/fdset/", NULL) &&
+ !strstart(filename, "/dev/fd/", NULL)) {
prio = 50;
}
diff --git a/block/snapshot.c b/block/snapshot.c
index 19395ae..49e143e 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -239,7 +239,7 @@
}
/* drain all pending i/o before deleting snapshot */
- bdrv_drain_all();
+ bdrv_drain(bs);
if (drv->bdrv_snapshot_delete) {
return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
diff --git a/blockjob.c b/blockjob.c
index ec46fad..62bb906 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -66,10 +66,7 @@
block_job_set_speed(job, speed, &local_err);
if (local_err) {
- bs->job = NULL;
- bdrv_op_unblock_all(bs, job->blocker);
- error_free(job->blocker);
- g_free(job);
+ block_job_release(bs);
error_propagate(errp, local_err);
return NULL;
}
@@ -77,16 +74,23 @@
return job;
}
+void block_job_release(BlockDriverState *bs)
+{
+ BlockJob *job = bs->job;
+
+ bs->job = NULL;
+ bdrv_op_unblock_all(bs, job->blocker);
+ error_free(job->blocker);
+ g_free(job);
+}
+
void block_job_completed(BlockJob *job, int ret)
{
BlockDriverState *bs = job->bs;
assert(bs->job == job);
job->cb(job->opaque, ret);
- bs->job = NULL;
- bdrv_op_unblock_all(bs, job->blocker);
- error_free(job->blocker);
- g_free(job);
+ block_job_release(bs);
}
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt
index 4c13d48..d92cc48 100644
--- a/docs/qmp/qmp-events.txt
+++ b/docs/qmp/qmp-events.txt
@@ -473,6 +473,20 @@
{ "timestamp": {"seconds": 1290688046, "microseconds": 417172},
"event": "SPICE_MIGRATE_COMPLETED" }
+MIGRATION
+---------
+
+Emitted when a migration event happens
+
+Data: None.
+
+ - "status": migration status
+ See MigrationStatus in ~/qapi-schema.json for possible values
+
+Example:
+
+{"timestamp": {"seconds": 1432121972, "microseconds": 744001},
+ "event": "MIGRATION", "data": {"status": "completed"}}
STOP
----
diff --git a/docs/specs/rocker.txt b/docs/specs/rocker.txt
index 0af5c61..1c74351 100644
--- a/docs/specs/rocker.txt
+++ b/docs/specs/rocker.txt
@@ -637,6 +637,7 @@
(1 << 5): TCP packet
(1 << 6): UDP packet
(1 << 7): TCP/UDP csum good
+ (1 << 8): Offload forward
RX_CSUM 2 IP calculated checksum:
IPv4: IP payload csum
IPv6: header and payload csum
@@ -645,6 +646,9 @@
RX_FRAG_MAX_LEN 2 Packet maximum fragment length
RX_FRAG_LEN 2 Actual packet fragment length after receive
+Offload forward RX_FLAG indicates the device has already forwarded the packet
+so the host CPU should not also forward the packet.
+
Possible status return codes in descriptor on completion are:
DESC_COMP_ERR reason
diff --git a/exec.c b/exec.c
index 251dc79..b7f7f98 100644
--- a/exec.c
+++ b/exec.c
@@ -1414,6 +1414,11 @@
}
}
+ new_ram_size = MAX(old_ram_size,
+ (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
+ if (new_ram_size > old_ram_size) {
+ migration_bitmap_extend(old_ram_size, new_ram_size);
+ }
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.
diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index f0f25c7..5bc62cf 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -17,7 +17,6 @@
#include "hw/virtio/virtio-gpu.h"
static Property virtio_gpu_pci_properties[] = {
- DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPUPCI, vdev.conf),
DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
DEFINE_PROP_END_OF_LIST(),
};
@@ -25,13 +24,21 @@
static void virtio_gpu_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp)
{
VirtIOGPUPCI *vgpu = VIRTIO_GPU_PCI(vpci_dev);
+ VirtIOGPU *g = &vgpu->vdev;
DeviceState *vdev = DEVICE(&vgpu->vdev);
+ int i;
qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus));
/* force virtio-1.0 */
vpci_dev->flags &= ~VIRTIO_PCI_FLAG_DISABLE_MODERN;
vpci_dev->flags |= VIRTIO_PCI_FLAG_DISABLE_LEGACY;
object_property_set_bool(OBJECT(vdev), true, "realized", errp);
+
+ for (i = 0; i < g->conf.max_outputs; i++) {
+ object_property_set_link(OBJECT(g->scanout[i].con),
+ OBJECT(vpci_dev),
+ "device", errp);
+ }
}
static void virtio_gpu_pci_class_init(ObjectClass *klass, void *data)
@@ -49,8 +56,9 @@
static void virtio_gpu_initfn(Object *obj)
{
VirtIOGPUPCI *dev = VIRTIO_GPU_PCI(obj);
- object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU);
- object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_GPU);
}
static const TypeInfo virtio_gpu_pci_info = {
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 8c109b7..990a26b 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -871,7 +871,7 @@
}
static Property virtio_gpu_properties[] = {
- DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOGPU, conf),
+ DEFINE_PROP_UINT32("max_outputs", VirtIOGPU, conf.max_outputs, 1),
DEFINE_PROP_END_OF_LIST(),
};
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index 94f9d0e..f7e539f 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -79,6 +79,7 @@
VirtIOGPU *g = &vvga->vdev;
VGACommonState *vga = &vvga->vga;
uint32_t offset;
+ int i;
/* init vga compat bits */
vga->vram_size_mb = 8;
@@ -120,6 +121,12 @@
vga->con = g->scanout[0].con;
graphic_console_set_hwops(vga->con, &virtio_vga_ops, vvga);
+
+ for (i = 0; i < g->conf.max_outputs; i++) {
+ object_property_set_link(OBJECT(g->scanout[i].con),
+ OBJECT(vpci_dev),
+ "device", errp);
+ }
}
static void virtio_vga_reset(DeviceState *dev)
@@ -131,7 +138,6 @@
}
static Property virtio_vga_properties[] = {
- DEFINE_VIRTIO_GPU_PROPERTIES(VirtIOVGA, vdev.conf),
DEFINE_VIRTIO_GPU_PCI_PROPERTIES(VirtIOPCIProxy),
DEFINE_PROP_END_OF_LIST(),
};
@@ -155,8 +161,9 @@
static void virtio_vga_inst_initfn(Object *obj)
{
VirtIOVGA *dev = VIRTIO_VGA(obj);
- object_initialize(&dev->vdev, sizeof(dev->vdev), TYPE_VIRTIO_GPU);
- object_property_add_child(obj, "virtio-backend", OBJECT(&dev->vdev), NULL);
+
+ virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev),
+ TYPE_VIRTIO_GPU);
}
static TypeInfo virtio_vga_info = {
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 56cdcb9..4c3cb40 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -312,6 +312,8 @@
if (kvm_enabled()) {
pcms->smm = ON_OFF_AUTO_OFF;
}
+ global_state_set_optional();
+ savevm_skip_configuration();
}
static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c
index 8aa3a67..43e6c18 100644
--- a/hw/i386/pc_q35.c
+++ b/hw/i386/pc_q35.c
@@ -295,6 +295,8 @@
if (kvm_enabled()) {
pcms->smm = ON_OFF_AUTO_OFF;
}
+ global_state_set_optional();
+ savevm_skip_configuration();
}
static void pc_compat_2_2(MachineState *machine)
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
index bab8e2a..5c6bcd0 100644
--- a/hw/net/e1000.c
+++ b/hw/net/e1000.c
@@ -185,6 +185,9 @@
{
s->mac_reg[STATUS] |= E1000_STATUS_LU;
s->phy_reg[PHY_STATUS] |= MII_SR_LINK_STATUS;
+
+ /* E1000_STATUS_LU is tested by e1000_can_receive() */
+ qemu_flush_queued_packets(qemu_get_queue(s->nic));
}
static bool
diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 4d25842..47d080f 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -96,7 +96,7 @@
RockerSwitch *qmp_query_rocker(const char *name, Error **errp)
{
- RockerSwitch *rocker = g_malloc0(sizeof(*rocker));
+ RockerSwitch *rocker;
Rocker *r;
r = rocker_find(name);
@@ -106,6 +106,7 @@
return NULL;
}
+ rocker = g_new0(RockerSwitch, 1);
rocker->name = g_strdup(r->name);
rocker->id = r->switch_id;
rocker->ports = r->fp_ports;
@@ -192,11 +193,13 @@
if (!tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
return -ROCKER_EINVAL;
}
+ break;
case ROCKER_TX_OFFLOAD_TSO:
if (!tlvs[ROCKER_TLV_TX_TSO_MSS] ||
!tlvs[ROCKER_TLV_TX_TSO_HDR_LEN]) {
return -ROCKER_EINVAL;
}
+ break;
}
if (tlvs[ROCKER_TLV_TX_L3_CSUM_OFF]) {
@@ -600,7 +603,7 @@
}
int rx_produce(World *world, uint32_t pport,
- const struct iovec *iov, int iovcnt)
+ const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu)
{
Rocker *r = world_rocker(world);
PCIDevice *dev = (PCIDevice *)r;
@@ -643,6 +646,10 @@
goto out;
}
+ if (copy_to_cpu) {
+ rx_flags |= ROCKER_RX_FLAGS_FWD_OFFLOAD;
+ }
+
/* XXX calc rx flags/csum */
tlv_size = rocker_tlv_total_size(sizeof(uint16_t)) + /* flags */
diff --git a/hw/net/rocker/rocker.h b/hw/net/rocker/rocker.h
index b3310b6..f9c80f8 100644
--- a/hw/net/rocker/rocker.h
+++ b/hw/net/rocker/rocker.h
@@ -77,7 +77,7 @@
int rocker_event_mac_vlan_seen(Rocker *r, uint32_t pport, uint8_t *addr,
uint16_t vlan_id);
int rx_produce(World *world, uint32_t pport,
- const struct iovec *iov, int iovcnt);
+ const struct iovec *iov, int iovcnt, uint8_t copy_to_cpu);
int rocker_port_eg(Rocker *r, uint32_t pport,
const struct iovec *iov, int iovcnt);
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
index d8d934c..c693ae5 100644
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@@ -125,18 +125,21 @@
return ROCKER_OK;
}
-static int fp_port_can_receive(NetClientState *nc)
-{
- FpPort *port = qemu_get_nic_opaque(nc);
-
- return port->enabled;
-}
-
static ssize_t fp_port_receive_iov(NetClientState *nc, const struct iovec *iov,
int iovcnt)
{
FpPort *port = qemu_get_nic_opaque(nc);
+ /* If the port is disabled, we want to drop this pkt
+ * now rather than queing it for later. We don't want
+ * any stale pkts getting into the device when the port
+ * transitions to enabled.
+ */
+
+ if (!port->enabled) {
+ return -1;
+ }
+
return world_ingress(port->world, port->pport, iov, iovcnt);
}
@@ -165,7 +168,6 @@
static NetClientInfo fp_port_info = {
.type = NET_CLIENT_OPTIONS_KIND_NIC,
.size = sizeof(NICState),
- .can_receive = fp_port_can_receive,
.receive = fp_port_receive,
.receive_iov = fp_port_receive_iov,
.cleanup = fp_port_cleanup,
diff --git a/hw/net/rocker/rocker_hw.h b/hw/net/rocker/rocker_hw.h
index fe639ba..8c50830 100644
--- a/hw/net/rocker/rocker_hw.h
+++ b/hw/net/rocker/rocker_hw.h
@@ -250,6 +250,7 @@
#define ROCKER_RX_FLAGS_TCP (1 << 5)
#define ROCKER_RX_FLAGS_UDP (1 << 6)
#define ROCKER_RX_FLAGS_TCP_UDP_CSUM_GOOD (1 << 7)
+#define ROCKER_RX_FLAGS_FWD_OFFLOAD (1 << 8)
/* Tx msg */
enum {
diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index b25a17d..874fb01 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -825,6 +825,8 @@
static void of_dpa_output_l2_interface(OfDpaFlowContext *fc,
OfDpaGroup *group)
{
+ uint8_t copy_to_cpu = fc->action_set.apply.copy_to_cpu;
+
if (group->l2_interface.pop_vlan) {
of_dpa_flow_pkt_strip_vlan(fc);
}
@@ -837,7 +839,8 @@
*/
if (group->l2_interface.out_pport == 0) {
- rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt);
+ rx_produce(fc->of_dpa->world, fc->in_pport, fc->iov, fc->iovcnt,
+ copy_to_cpu);
} else if (group->l2_interface.out_pport != fc->in_pport) {
rocker_port_eg(world_rocker(fc->of_dpa->world),
group->l2_interface.out_pport,
@@ -2525,7 +2528,6 @@
ngroup->has_set_vlan_id = true;
ngroup->set_vlan_id = ntohs(group->l2_rewrite.vlan_id);
}
- break;
if (memcmp(group->l2_rewrite.src_mac.a, zero_mac.a, ETH_ALEN)) {
ngroup->has_set_eth_src = true;
ngroup->set_eth_src =
@@ -2536,6 +2538,7 @@
ngroup->set_eth_dst =
qemu_mac_strdup_printf(group->l2_rewrite.dst_mac.a);
}
+ break;
case ROCKER_OF_DPA_GROUP_TYPE_L2_FLOOD:
case ROCKER_OF_DPA_GROUP_TYPE_L2_MCAST:
ngroup->has_vlan_id = true;
diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c
index b991e87..a6b18f1 100644
--- a/hw/net/rocker/rocker_world.c
+++ b/hw/net/rocker/rocker_world.c
@@ -32,7 +32,7 @@
return world->ops->ig(world, pport, iov, iovcnt);
}
- return iov_size(iov, iovcnt);
+ return -1;
}
int world_do_cmd(World *world, DescInfo *info,
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index 104a0f5..706e060 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -1879,6 +1879,12 @@
return -1;
}
+ if (s->peer_has_vhdr) {
+ vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
+ buf += sizeof(struct virtio_net_hdr);
+ size -= sizeof(struct virtio_net_hdr);
+ }
+
/* Pad to minimum Ethernet frame length */
if (size < sizeof(min_buf)) {
memcpy(min_buf, buf, size);
@@ -1887,12 +1893,6 @@
size = sizeof(min_buf);
}
- if (s->peer_has_vhdr) {
- vmxnet_rx_pkt_set_vhdr(s->rx_pkt, (struct virtio_net_hdr *)buf);
- buf += sizeof(struct virtio_net_hdr);
- size -= sizeof(struct virtio_net_hdr);
- }
-
vmxnet_rx_pkt_set_packet_type(s->rx_pkt,
get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index f174e5a..bcf5f0f 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -34,6 +34,7 @@
#include "sysemu/cpus.h"
#include "sysemu/kvm.h"
#include "kvm_ppc.h"
+#include "migration/migration.h"
#include "mmu-hash64.h"
#include "qom/cpu.h"
@@ -1851,6 +1852,8 @@
static void spapr_compat_2_3(Object *obj)
{
+ savevm_skip_section_footers();
+ global_state_set_optional();
}
static void spapr_compat_2_2(Object *obj)
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index 57d8ef1..dd9d5e6 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -166,6 +166,14 @@
void block_job_yield(BlockJob *job);
/**
+ * block_job_release:
+ * @bs: The block device.
+ *
+ * Release job resources when an error occurred or job completed.
+ */
+void block_job_release(BlockDriverState *bs);
+
+/**
* block_job_completed:
* @job: The job being completed.
* @ret: The status code.
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d678114..2e74760 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -365,4 +365,7 @@
return cpu->can_do_io != 0;
}
+#if !defined(CONFIG_USER_ONLY)
+void migration_bitmap_extend(ram_addr_t old, ram_addr_t new);
+#endif
#endif
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index b8c9244..8896761 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -112,9 +112,6 @@
VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, false), \
DEFINE_PROP_UINT32("vectors", _state, nvectors, 3)
-#define DEFINE_VIRTIO_GPU_PROPERTIES(_state, _conf_field) \
- DEFINE_PROP_UINT32("max_outputs", _state, _conf_field.max_outputs, 1)
-
#define VIRTIO_GPU_FILL_CMD(out) do { \
size_t s; \
s = iov_to_buf(cmd->elem.out_sg, cmd->elem.out_num, 0, \
diff --git a/include/migration/migration.h b/include/migration/migration.h
index 9387c8c..b2711ef 100644
--- a/include/migration/migration.h
+++ b/include/migration/migration.h
@@ -34,6 +34,7 @@
#define QEMU_VM_SECTION_FULL 0x04
#define QEMU_VM_SUBSECTION 0x05
#define QEMU_VM_VMDESCRIPTION 0x06
+#define QEMU_VM_CONFIGURATION 0x07
#define QEMU_VM_SECTION_FOOTER 0x7e
struct MigrationParams {
@@ -176,10 +177,11 @@
int migrate_compress_level(void);
int migrate_compress_threads(void);
int migrate_decompress_threads(void);
+bool migrate_use_events(void);
void ram_control_before_iterate(QEMUFile *f, uint64_t flags);
void ram_control_after_iterate(QEMUFile *f, uint64_t flags);
-void ram_control_load_hook(QEMUFile *f, uint64_t flags);
+void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data);
/* Whenever this is found in the data stream, the flags
* will be passed to ram_control_load_hook in the incoming-migration
@@ -197,4 +199,7 @@
void ram_mig_init(void);
void savevm_skip_section_footers(void);
+void register_global_state(void);
+void global_state_set_optional(void);
+void savevm_skip_configuration(void);
#endif
diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h
index 4f67d79..ea49f33 100644
--- a/include/migration/qemu-file.h
+++ b/include/migration/qemu-file.h
@@ -63,16 +63,20 @@
/*
* This function provides hooks around different
* stages of RAM migration.
+ * 'opaque' is the backend specific data in QEMUFile
+ * 'data' is call specific data associated with the 'flags' value
*/
-typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags);
+typedef int (QEMURamHookFunc)(QEMUFile *f, void *opaque, uint64_t flags,
+ void *data);
/*
* Constants used by ram_control_* hooks
*/
-#define RAM_CONTROL_SETUP 0
-#define RAM_CONTROL_ROUND 1
-#define RAM_CONTROL_HOOK 2
-#define RAM_CONTROL_FINISH 3
+#define RAM_CONTROL_SETUP 0
+#define RAM_CONTROL_ROUND 1
+#define RAM_CONTROL_HOOK 2
+#define RAM_CONTROL_FINISH 3
+#define RAM_CONTROL_BLOCK_REG 4
/*
* This function allows override of where the RAM page
diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 0695d7c..f51ff69 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -820,6 +820,8 @@
void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, QJSON *vmdesc);
+bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque);
+
int vmstate_register_with_alias_id(DeviceState *dev, int instance_id,
const VMStateDescription *vmsd,
void *base, int alias_id,
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index df80951..44570d1 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -28,6 +28,7 @@
void runstate_set(RunState new_state);
int runstate_is_running(void);
bool runstate_needs_reset(void);
+bool runstate_store(char *str, size_t size);
typedef struct vm_change_state_entry VMChangeStateEntry;
typedef void VMChangeStateHandler(void *opaque, int running, RunState state);
diff --git a/migration/block.c b/migration/block.c
index ddb59cc..ed865ed 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -457,7 +457,7 @@
blk_mig_lock();
if (bmds_aio_inflight(bmds, sector)) {
blk_mig_unlock();
- bdrv_drain_all();
+ bdrv_drain(bmds->bs);
} else {
blk_mig_unlock();
}
diff --git a/migration/migration.c b/migration/migration.c
index c6ac08a..45719a0 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -26,6 +26,8 @@
#include "qemu/thread.h"
#include "qmp-commands.h"
#include "trace.h"
+#include "qapi/util.h"
+#include "qapi-event.h"
#define MAX_THROTTLE (32 << 20) /* Migration speed throttling */
@@ -97,6 +99,120 @@
mis_current = NULL;
}
+
+typedef struct {
+ bool optional;
+ uint32_t size;
+ uint8_t runstate[100];
+} GlobalState;
+
+static GlobalState global_state;
+
+static int global_state_store(void)
+{
+ if (!runstate_store((char *)global_state.runstate,
+ sizeof(global_state.runstate))) {
+ error_report("runstate name too big: %s", global_state.runstate);
+ trace_migrate_state_too_big();
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static char *global_state_get_runstate(void)
+{
+ return (char *)global_state.runstate;
+}
+
+void global_state_set_optional(void)
+{
+ global_state.optional = true;
+}
+
+static bool global_state_needed(void *opaque)
+{
+ GlobalState *s = opaque;
+ char *runstate = (char *)s->runstate;
+
+ /* If it is not optional, it is mandatory */
+
+ if (s->optional == false) {
+ return true;
+ }
+
+ /* If state is running or paused, it is not needed */
+
+ if (strcmp(runstate, "running") == 0 ||
+ strcmp(runstate, "paused") == 0) {
+ return false;
+ }
+
+ /* for any other state it is needed */
+ return true;
+}
+
+static int global_state_post_load(void *opaque, int version_id)
+{
+ GlobalState *s = opaque;
+ int ret = 0;
+ char *runstate = (char *)s->runstate;
+
+ trace_migrate_global_state_post_load(runstate);
+
+ if (strcmp(runstate, "running") != 0) {
+ Error *local_err = NULL;
+ int r = qapi_enum_parse(RunState_lookup, runstate, RUN_STATE_MAX,
+ -1, &local_err);
+
+ if (r == -1) {
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ return -EINVAL;
+ }
+ ret = vm_stop_force_state(r);
+ }
+
+ return ret;
+}
+
+static void global_state_pre_save(void *opaque)
+{
+ GlobalState *s = opaque;
+
+ trace_migrate_global_state_pre_save((char *)s->runstate);
+ s->size = strlen((char *)s->runstate) + 1;
+}
+
+static const VMStateDescription vmstate_globalstate = {
+ .name = "globalstate",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .post_load = global_state_post_load,
+ .pre_save = global_state_pre_save,
+ .needed = global_state_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(size, GlobalState),
+ VMSTATE_BUFFER(runstate, GlobalState),
+ VMSTATE_END_OF_LIST()
+ },
+};
+
+void register_global_state(void)
+{
+ /* We would use it independently that we receive it */
+ strcpy((char *)&global_state.runstate, "");
+ vmstate_register(NULL, 0, &vmstate_globalstate, &global_state);
+}
+
+static void migrate_generate_event(int new_state)
+{
+ if (migrate_use_events()) {
+ qapi_event_send_migration(new_state, &error_abort);
+ trace_migrate_set_state(new_state);
+ }
+}
+
/*
* Called on -incoming with a defer: uri.
* The migration can be started later after any parameters have been
@@ -114,6 +230,7 @@
{
const char *p;
+ qapi_event_send_migration(MIGRATION_STATUS_SETUP, &error_abort);
if (!strcmp(uri, "defer")) {
deferred_incoming_migration(errp);
} else if (strstart(uri, "tcp:", &p)) {
@@ -142,7 +259,7 @@
int ret;
migration_incoming_state_new(f);
-
+ migrate_generate_event(MIGRATION_STATUS_ACTIVE);
ret = qemu_loadvm_state(f);
qemu_fclose(f);
@@ -150,10 +267,12 @@
migration_incoming_state_destroy();
if (ret < 0) {
+ migrate_generate_event(MIGRATION_STATUS_FAILED);
error_report("load of migration failed: %s", strerror(-ret));
migrate_decompress_threads_join();
exit(EXIT_FAILURE);
}
+ migrate_generate_event(MIGRATION_STATUS_COMPLETED);
qemu_announce_self();
/* Make sure all file formats flush their mutable metadata */
@@ -164,10 +283,20 @@
exit(EXIT_FAILURE);
}
- if (autostart) {
+ /* runstate == "" means that we haven't received it through the
+ * wire, so we obey autostart. runstate == runing means that we
+ * need to run it, we need to make sure that we do it after
+ * everything else has finished. Every other state change is done
+ * at the post_load function */
+
+ if (strcmp(global_state_get_runstate(), "running") == 0) {
vm_start();
- } else {
- runstate_set(RUN_STATE_PAUSED);
+ } else if (strcmp(global_state_get_runstate(), "") == 0) {
+ if (autostart) {
+ vm_start();
+ } else {
+ runstate_set(RUN_STATE_PAUSED);
+ }
}
migrate_decompress_threads_join();
}
@@ -392,8 +521,8 @@
static void migrate_set_state(MigrationState *s, int old_state, int new_state)
{
- if (atomic_cmpxchg(&s->state, old_state, new_state) == new_state) {
- trace_migrate_set_state(new_state);
+ if (atomic_cmpxchg(&s->state, old_state, new_state) == old_state) {
+ migrate_generate_event(new_state);
}
}
@@ -432,8 +561,7 @@
{
trace_migrate_fd_error();
assert(s->file == NULL);
- s->state = MIGRATION_STATUS_FAILED;
- trace_migrate_set_state(MIGRATION_STATUS_FAILED);
+ migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
notifier_list_notify(&migration_state_notifiers, s);
}
@@ -517,8 +645,7 @@
s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS] =
decompress_thread_count;
s->bandwidth_limit = bandwidth_limit;
- s->state = MIGRATION_STATUS_SETUP;
- trace_migrate_set_state(MIGRATION_STATUS_SETUP);
+ migrate_set_state(s, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP);
s->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
return s;
@@ -577,7 +704,6 @@
error_setg(errp, QERR_MIGRATION_ACTIVE);
return;
}
-
if (runstate_check(RUN_STATE_INMIGRATE)) {
error_setg(errp, "Guest is waiting for an incoming migration");
return;
@@ -592,6 +718,12 @@
return;
}
+ /* We are starting a new migration, so we want to start in a clean
+ state. This change is only needed if previous migration
+ failed/was cancelled. We don't use migrate_set_state() because
+ we are setting the initial state, not changing it. */
+ s->state = MIGRATION_STATUS_NONE;
+
s = migrate_init(¶ms);
if (strstart(uri, "tcp:", &p)) {
@@ -611,7 +743,7 @@
} else {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "uri",
"a valid migration protocol");
- s->state = MIGRATION_STATUS_FAILED;
+ migrate_set_state(s, MIGRATION_STATUS_SETUP, MIGRATION_STATUS_FAILED);
return;
}
@@ -740,6 +872,15 @@
return s->parameters[MIGRATION_PARAMETER_DECOMPRESS_THREADS];
}
+bool migrate_use_events(void)
+{
+ MigrationState *s;
+
+ s = migrate_get_current();
+
+ return s->enabled_capabilities[MIGRATION_CAPABILITY_EVENTS];
+}
+
int migrate_use_xbzrle(void)
{
MigrationState *s;
@@ -793,10 +934,13 @@
qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
old_vm_running = runstate_is_running();
- ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
- if (ret >= 0) {
- qemu_file_set_rate_limit(s->file, INT64_MAX);
- qemu_savevm_state_complete(s->file);
+ ret = global_state_store();
+ if (!ret) {
+ ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+ if (ret >= 0) {
+ qemu_file_set_rate_limit(s->file, INT64_MAX);
+ qemu_savevm_state_complete(s->file);
+ }
}
qemu_mutex_unlock_iothread();
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 557c1c1..6bb3dc1 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -129,7 +129,7 @@
int ret = 0;
if (f->ops->before_ram_iterate) {
- ret = f->ops->before_ram_iterate(f, f->opaque, flags);
+ ret = f->ops->before_ram_iterate(f, f->opaque, flags, NULL);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
@@ -141,24 +141,30 @@
int ret = 0;
if (f->ops->after_ram_iterate) {
- ret = f->ops->after_ram_iterate(f, f->opaque, flags);
+ ret = f->ops->after_ram_iterate(f, f->opaque, flags, NULL);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
}
}
-void ram_control_load_hook(QEMUFile *f, uint64_t flags)
+void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data)
{
int ret = -EINVAL;
if (f->ops->hook_ram_load) {
- ret = f->ops->hook_ram_load(f, f->opaque, flags);
+ ret = f->ops->hook_ram_load(f, f->opaque, flags, data);
if (ret < 0) {
qemu_file_set_error(f, ret);
}
} else {
- qemu_file_set_error(f, ret);
+ /*
+ * Hook is a hook specifically requested by the source sending a flag
+ * that expects there to be a hook on the destination.
+ */
+ if (flags == RAM_CONTROL_HOOK) {
+ qemu_file_set_error(f, ret);
+ }
}
}
diff --git a/migration/ram.c b/migration/ram.c
index 57368e1..c696814 100644
--- a/migration/ram.c
+++ b/migration/ram.c
@@ -222,6 +222,7 @@
static RAMBlock *last_sent_block;
static ram_addr_t last_offset;
static unsigned long *migration_bitmap;
+static QemuMutex migration_bitmap_mutex;
static uint64_t migration_dirty_pages;
static uint32_t last_version;
static bool ram_bulk_stage;
@@ -494,6 +495,7 @@
return 1;
}
+/* Called with rcu_read_lock() to protect migration_bitmap */
static inline
ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
ram_addr_t start)
@@ -502,26 +504,31 @@
unsigned long nr = base + (start >> TARGET_PAGE_BITS);
uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
+ unsigned long *bitmap;
unsigned long next;
+ bitmap = atomic_rcu_read(&migration_bitmap);
if (ram_bulk_stage && nr > base) {
next = nr + 1;
} else {
- next = find_next_bit(migration_bitmap, size, nr);
+ next = find_next_bit(bitmap, size, nr);
}
if (next < size) {
- clear_bit(next, migration_bitmap);
+ clear_bit(next, bitmap);
migration_dirty_pages--;
}
return (next - base) << TARGET_PAGE_BITS;
}
+/* Called with rcu_read_lock() to protect migration_bitmap */
static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
{
+ unsigned long *bitmap;
+ bitmap = atomic_rcu_read(&migration_bitmap);
migration_dirty_pages +=
- cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length);
+ cpu_physical_memory_sync_dirty_bitmap(bitmap, start, length);
}
@@ -563,11 +570,13 @@
trace_migration_bitmap_sync_start();
address_space_sync_dirty_bitmap(&address_space_memory);
+ qemu_mutex_lock(&migration_bitmap_mutex);
rcu_read_lock();
QLIST_FOREACH_RCU(block, &ram_list.blocks, next) {
migration_bitmap_sync_range(block->mr->ram_addr, block->used_length);
}
rcu_read_unlock();
+ qemu_mutex_unlock(&migration_bitmap_mutex);
trace_migration_bitmap_sync_end(migration_dirty_pages
- num_dirty_pages_init);
@@ -1017,10 +1026,15 @@
static void migration_end(void)
{
- if (migration_bitmap) {
+ /* caller have hold iothread lock or is in a bh, so there is
+ * no writing race against this migration_bitmap
+ */
+ unsigned long *bitmap = migration_bitmap;
+ atomic_rcu_set(&migration_bitmap, NULL);
+ if (bitmap) {
memory_global_dirty_log_stop();
- g_free(migration_bitmap);
- migration_bitmap = NULL;
+ synchronize_rcu();
+ g_free(bitmap);
}
XBZRLE_cache_lock();
@@ -1051,6 +1065,30 @@
#define MAX_WAIT 50 /* ms, half buffered_file limit */
+void migration_bitmap_extend(ram_addr_t old, ram_addr_t new)
+{
+ /* called in qemu main thread, so there is
+ * no writing race against this migration_bitmap
+ */
+ if (migration_bitmap) {
+ unsigned long *old_bitmap = migration_bitmap, *bitmap;
+ bitmap = bitmap_new(new);
+
+ /* prevent migration_bitmap content from being set bit
+ * by migration_bitmap_sync_range() at the same time.
+ * it is safe to migration if migration_bitmap is cleared bit
+ * at the same time.
+ */
+ qemu_mutex_lock(&migration_bitmap_mutex);
+ bitmap_copy(bitmap, old_bitmap, old);
+ bitmap_set(bitmap, old, new - old);
+ atomic_rcu_set(&migration_bitmap, bitmap);
+ qemu_mutex_unlock(&migration_bitmap_mutex);
+ migration_dirty_pages += new - old;
+ synchronize_rcu();
+ g_free(old_bitmap);
+ }
+}
/* Each of ram_save_setup, ram_save_iterate and ram_save_complete has
* long-running RCU critical section. When rcu-reclaims in the code
@@ -1067,6 +1105,7 @@
dirty_rate_high_cnt = 0;
bitmap_sync_count = 0;
migration_bitmap_sync_init();
+ qemu_mutex_init(&migration_bitmap_mutex);
if (migrate_use_xbzrle()) {
XBZRLE_cache_lock();
@@ -1477,6 +1516,8 @@
error_report_err(local_err);
}
}
+ ram_control_load_hook(f, RAM_CONTROL_BLOCK_REG,
+ block->idstr);
break;
}
}
@@ -1545,7 +1586,7 @@
break;
default:
if (flags & RAM_SAVE_FLAG_HOOK) {
- ram_control_load_hook(f, flags);
+ ram_control_load_hook(f, RAM_CONTROL_HOOK, NULL);
} else {
error_report("Unknown combination of migration flags: %#x",
flags);
diff --git a/migration/rdma.c b/migration/rdma.c
index b777273..f106b2a 100644
--- a/migration/rdma.c
+++ b/migration/rdma.c
@@ -215,17 +215,19 @@
* the information. It's small anyway, so a list is overkill.
*/
typedef struct RDMALocalBlock {
- uint8_t *local_host_addr; /* local virtual address */
- uint64_t remote_host_addr; /* remote virtual address */
- uint64_t offset;
- uint64_t length;
- struct ibv_mr **pmr; /* MRs for chunk-level registration */
- struct ibv_mr *mr; /* MR for non-chunk-level registration */
- uint32_t *remote_keys; /* rkeys for chunk-level registration */
- uint32_t remote_rkey; /* rkeys for non-chunk-level registration */
- int index; /* which block are we */
- bool is_ram_block;
- int nb_chunks;
+ char *block_name;
+ uint8_t *local_host_addr; /* local virtual address */
+ uint64_t remote_host_addr; /* remote virtual address */
+ uint64_t offset;
+ uint64_t length;
+ struct ibv_mr **pmr; /* MRs for chunk-level registration */
+ struct ibv_mr *mr; /* MR for non-chunk-level registration */
+ uint32_t *remote_keys; /* rkeys for chunk-level registration */
+ uint32_t remote_rkey; /* rkeys for non-chunk-level registration */
+ int index; /* which block are we */
+ unsigned int src_index; /* (Only used on dest) */
+ bool is_ram_block;
+ int nb_chunks;
unsigned long *transit_bitmap;
unsigned long *unregister_bitmap;
} RDMALocalBlock;
@@ -353,6 +355,9 @@
RDMALocalBlocks local_ram_blocks;
RDMADestBlock *dest_blocks;
+ /* Index of the next RAMBlock received during block registration */
+ unsigned int next_src_index;
+
/*
* Migration on *destination* started.
* Then use coroutine yield function.
@@ -411,7 +416,7 @@
*/
typedef struct QEMU_PACKED {
union QEMU_PACKED {
- uint64_t current_addr; /* offset into the ramblock of the chunk */
+ uint64_t current_addr; /* offset into the ram_addr_t space */
uint64_t chunk; /* chunk to lookup if unregistering */
} key;
uint32_t current_index; /* which ramblock the chunk belongs to */
@@ -419,8 +424,19 @@
uint64_t chunks; /* how many sequential chunks to register */
} RDMARegister;
-static void register_to_network(RDMARegister *reg)
+static void register_to_network(RDMAContext *rdma, RDMARegister *reg)
{
+ RDMALocalBlock *local_block;
+ local_block = &rdma->local_ram_blocks.block[reg->current_index];
+
+ if (local_block->is_ram_block) {
+ /*
+ * current_addr as passed in is an address in the local ram_addr_t
+ * space, we need to translate this for the destination
+ */
+ reg->key.current_addr -= local_block->offset;
+ reg->key.current_addr += rdma->dest_blocks[reg->current_index].offset;
+ }
reg->key.current_addr = htonll(reg->key.current_addr);
reg->current_index = htonl(reg->current_index);
reg->chunks = htonll(reg->chunks);
@@ -436,13 +452,19 @@
typedef struct QEMU_PACKED {
uint32_t value; /* if zero, we will madvise() */
uint32_t block_idx; /* which ram block index */
- uint64_t offset; /* where in the remote ramblock this chunk */
+ uint64_t offset; /* Address in remote ram_addr_t space */
uint64_t length; /* length of the chunk */
} RDMACompress;
-static void compress_to_network(RDMACompress *comp)
+static void compress_to_network(RDMAContext *rdma, RDMACompress *comp)
{
comp->value = htonl(comp->value);
+ /*
+ * comp->offset as passed in is an address in the local ram_addr_t
+ * space, we need to translate this for the destination
+ */
+ comp->offset -= rdma->local_ram_blocks.block[comp->block_idx].offset;
+ comp->offset += rdma->dest_blocks[comp->block_idx].offset;
comp->block_idx = htonl(comp->block_idx);
comp->offset = htonll(comp->offset);
comp->length = htonll(comp->length);
@@ -511,27 +533,27 @@
return result;
}
-static int rdma_add_block(RDMAContext *rdma, void *host_addr,
+static int rdma_add_block(RDMAContext *rdma, const char *block_name,
+ void *host_addr,
ram_addr_t block_offset, uint64_t length)
{
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
- (void *)(uintptr_t)block_offset);
+ RDMALocalBlock *block;
RDMALocalBlock *old = local->block;
- assert(block == NULL);
-
local->block = g_malloc0(sizeof(RDMALocalBlock) * (local->nb_blocks + 1));
if (local->nb_blocks) {
int x;
- for (x = 0; x < local->nb_blocks; x++) {
- g_hash_table_remove(rdma->blockmap,
- (void *)(uintptr_t)old[x].offset);
- g_hash_table_insert(rdma->blockmap,
- (void *)(uintptr_t)old[x].offset,
- &local->block[x]);
+ if (rdma->blockmap) {
+ for (x = 0; x < local->nb_blocks; x++) {
+ g_hash_table_remove(rdma->blockmap,
+ (void *)(uintptr_t)old[x].offset);
+ g_hash_table_insert(rdma->blockmap,
+ (void *)(uintptr_t)old[x].offset,
+ &local->block[x]);
+ }
}
memcpy(local->block, old, sizeof(RDMALocalBlock) * local->nb_blocks);
g_free(old);
@@ -539,10 +561,12 @@
block = &local->block[local->nb_blocks];
+ block->block_name = g_strdup(block_name);
block->local_host_addr = host_addr;
block->offset = block_offset;
block->length = length;
block->index = local->nb_blocks;
+ block->src_index = ~0U; /* Filled in by the receipt of the block list */
block->nb_chunks = ram_chunk_index(host_addr, host_addr + length) + 1UL;
block->transit_bitmap = bitmap_new(block->nb_chunks);
bitmap_clear(block->transit_bitmap, 0, block->nb_chunks);
@@ -552,9 +576,12 @@
block->is_ram_block = local->init ? false : true;
- g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+ if (rdma->blockmap) {
+ g_hash_table_insert(rdma->blockmap, (void *) block_offset, block);
+ }
- trace_rdma_add_block(local->nb_blocks, (uintptr_t) block->local_host_addr,
+ trace_rdma_add_block(block_name, local->nb_blocks,
+ (uintptr_t) block->local_host_addr,
block->offset, block->length,
(uintptr_t) (block->local_host_addr + block->length),
BITS_TO_LONGS(block->nb_chunks) *
@@ -574,7 +601,7 @@
static int qemu_rdma_init_one_block(const char *block_name, void *host_addr,
ram_addr_t block_offset, ram_addr_t length, void *opaque)
{
- return rdma_add_block(opaque, host_addr, block_offset, length);
+ return rdma_add_block(opaque, block_name, host_addr, block_offset, length);
}
/*
@@ -587,7 +614,6 @@
RDMALocalBlocks *local = &rdma->local_ram_blocks;
assert(rdma->blockmap == NULL);
- rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
memset(local, 0, sizeof *local);
qemu_ram_foreach_block(qemu_rdma_init_one_block, rdma);
trace_qemu_rdma_init_ram_blocks(local->nb_blocks);
@@ -597,16 +623,19 @@
return 0;
}
-static int rdma_delete_block(RDMAContext *rdma, ram_addr_t block_offset)
+/*
+ * Note: If used outside of cleanup, the caller must ensure that the destination
+ * block structures are also updated
+ */
+static int rdma_delete_block(RDMAContext *rdma, RDMALocalBlock *block)
{
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- RDMALocalBlock *block = g_hash_table_lookup(rdma->blockmap,
- (void *) block_offset);
RDMALocalBlock *old = local->block;
int x;
- assert(block);
-
+ if (rdma->blockmap) {
+ g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)block->offset);
+ }
if (block->pmr) {
int j;
@@ -636,8 +665,14 @@
g_free(block->remote_keys);
block->remote_keys = NULL;
- for (x = 0; x < local->nb_blocks; x++) {
- g_hash_table_remove(rdma->blockmap, (void *)(uintptr_t)old[x].offset);
+ g_free(block->block_name);
+ block->block_name = NULL;
+
+ if (rdma->blockmap) {
+ for (x = 0; x < local->nb_blocks; x++) {
+ g_hash_table_remove(rdma->blockmap,
+ (void *)(uintptr_t)old[x].offset);
+ }
}
if (local->nb_blocks > 1) {
@@ -659,8 +694,7 @@
local->block = NULL;
}
- trace_rdma_delete_block(local->nb_blocks,
- (uintptr_t)block->local_host_addr,
+ trace_rdma_delete_block(block, (uintptr_t)block->local_host_addr,
block->offset, block->length,
(uintptr_t)(block->local_host_addr + block->length),
BITS_TO_LONGS(block->nb_chunks) *
@@ -670,7 +704,7 @@
local->nb_blocks--;
- if (local->nb_blocks) {
+ if (local->nb_blocks && rdma->blockmap) {
for (x = 0; x < local->nb_blocks; x++) {
g_hash_table_insert(rdma->blockmap,
(void *)(uintptr_t)local->block[x].offset,
@@ -1223,7 +1257,7 @@
/*
* Perform a non-optimized memory unregistration after every transfer
- * for demonsration purposes, only if pin-all is not requested.
+ * for demonstration purposes, only if pin-all is not requested.
*
* Potential optimizations:
* 1. Start a new thread to run this function continuously
@@ -1289,7 +1323,7 @@
rdma->total_registrations--;
reg.key.chunk = chunk;
- register_to_network(®);
+ register_to_network(rdma, ®);
ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®,
&resp, NULL, NULL);
if (ret < 0) {
@@ -1910,7 +1944,7 @@
trace_qemu_rdma_write_one_zero(chunk, sge.length,
current_index, current_addr);
- compress_to_network(&comp);
+ compress_to_network(rdma, &comp);
ret = qemu_rdma_exchange_send(rdma, &head,
(uint8_t *) &comp, NULL, NULL, NULL);
@@ -1937,7 +1971,7 @@
trace_qemu_rdma_write_one_sendreg(chunk, sge.length, current_index,
current_addr);
- register_to_network(®);
+ register_to_network(rdma, ®);
ret = qemu_rdma_exchange_send(rdma, &head, (uint8_t *) ®,
&resp, ®_result_idx, NULL);
if (ret < 0) {
@@ -2198,7 +2232,7 @@
if (rdma->local_ram_blocks.block) {
while (rdma->local_ram_blocks.nb_blocks) {
- rdma_delete_block(rdma, rdma->local_ram_blocks.block->offset);
+ rdma_delete_block(rdma, &rdma->local_ram_blocks.block[0]);
}
}
@@ -2271,6 +2305,14 @@
goto err_rdma_source_init;
}
+ /* Build the hash that maps from offset to RAMBlock */
+ rdma->blockmap = g_hash_table_new(g_direct_hash, g_direct_equal);
+ for (idx = 0; idx < rdma->local_ram_blocks.nb_blocks; idx++) {
+ g_hash_table_insert(rdma->blockmap,
+ (void *)(uintptr_t)rdma->local_ram_blocks.block[idx].offset,
+ &rdma->local_ram_blocks.block[idx]);
+ }
+
for (idx = 0; idx < RDMA_WRID_MAX; idx++) {
ret = qemu_rdma_reg_control(rdma, idx);
if (ret) {
@@ -2880,6 +2922,14 @@
return ret;
}
+static int dest_ram_sort_func(const void *a, const void *b)
+{
+ unsigned int a_index = ((const RDMALocalBlock *)a)->src_index;
+ unsigned int b_index = ((const RDMALocalBlock *)b)->src_index;
+
+ return (a_index < b_index) ? -1 : (a_index != b_index);
+}
+
/*
* During each iteration of the migration, we listen for instructions
* by the source VM to perform dynamic page registrations before they
@@ -2889,8 +2939,7 @@
*
* Keep doing this until the source tells us to stop.
*/
-static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque,
- uint64_t flags)
+static int qemu_rdma_registration_handle(QEMUFile *f, void *opaque)
{
RDMAControlHeader reg_resp = { .len = sizeof(RDMARegisterResult),
.type = RDMA_CONTROL_REGISTER_RESULT,
@@ -2920,7 +2969,7 @@
CHECK_ERROR_STATE();
do {
- trace_qemu_rdma_registration_handle_wait(flags);
+ trace_qemu_rdma_registration_handle_wait();
ret = qemu_rdma_exchange_recv(rdma, &head, RDMA_CONTROL_NONE);
@@ -2943,6 +2992,13 @@
trace_qemu_rdma_registration_handle_compress(comp->length,
comp->block_idx,
comp->offset);
+ if (comp->block_idx >= rdma->local_ram_blocks.nb_blocks) {
+ error_report("rdma: 'compress' bad block index %u (vs %d)",
+ (unsigned int)comp->block_idx,
+ rdma->local_ram_blocks.nb_blocks);
+ ret = -EIO;
+ break;
+ }
block = &(rdma->local_ram_blocks.block[comp->block_idx]);
host_addr = block->local_host_addr +
@@ -2958,6 +3014,13 @@
case RDMA_CONTROL_RAM_BLOCKS_REQUEST:
trace_qemu_rdma_registration_handle_ram_blocks();
+ /* Sort our local RAM Block list so it's the same as the source,
+ * we can do this since we've filled in a src_index in the list
+ * as we received the RAMBlock list earlier.
+ */
+ qsort(rdma->local_ram_blocks.block,
+ rdma->local_ram_blocks.nb_blocks,
+ sizeof(RDMALocalBlock), dest_ram_sort_func);
if (rdma->pin_all) {
ret = qemu_rdma_reg_whole_ram_blocks(rdma);
if (ret) {
@@ -2985,6 +3048,12 @@
rdma->dest_blocks[i].length = local->block[i].length;
dest_block_to_network(&rdma->dest_blocks[i]);
+ trace_qemu_rdma_registration_handle_ram_blocks_loop(
+ local->block[i].block_name,
+ local->block[i].offset,
+ local->block[i].length,
+ local->block[i].local_host_addr,
+ local->block[i].src_index);
}
blocks.len = rdma->local_ram_blocks.nb_blocks
@@ -3018,8 +3087,23 @@
trace_qemu_rdma_registration_handle_register_loop(count,
reg->current_index, reg->key.current_addr, reg->chunks);
+ if (reg->current_index >= rdma->local_ram_blocks.nb_blocks) {
+ error_report("rdma: 'register' bad block index %u (vs %d)",
+ (unsigned int)reg->current_index,
+ rdma->local_ram_blocks.nb_blocks);
+ ret = -ENOENT;
+ break;
+ }
block = &(rdma->local_ram_blocks.block[reg->current_index]);
if (block->is_ram_block) {
+ if (block->offset > reg->key.current_addr) {
+ error_report("rdma: bad register address for block %s"
+ " offset: %" PRIx64 " current_addr: %" PRIx64,
+ block->block_name, block->offset,
+ reg->key.current_addr);
+ ret = -ERANGE;
+ break;
+ }
host_addr = (block->local_host_addr +
(reg->key.current_addr - block->offset));
chunk = ram_chunk_index(block->local_host_addr,
@@ -3028,6 +3112,14 @@
chunk = reg->key.chunk;
host_addr = block->local_host_addr +
(reg->key.chunk * (1UL << RDMA_REG_CHUNK_SHIFT));
+ /* Check for particularly bad chunk value */
+ if (host_addr < (void *)block->local_host_addr) {
+ error_report("rdma: bad chunk for block %s"
+ " chunk: %" PRIx64,
+ block->block_name, reg->key.chunk);
+ ret = -ERANGE;
+ break;
+ }
}
chunk_start = ram_chunk_start(block, chunk);
chunk_end = ram_chunk_end(block, chunk + reg->chunks);
@@ -3108,8 +3200,56 @@
return ret;
}
+/* Destination:
+ * Called via a ram_control_load_hook during the initial RAM load section which
+ * lists the RAMBlocks by name. This lets us know the order of the RAMBlocks
+ * on the source.
+ * We've already built our local RAMBlock list, but not yet sent the list to
+ * the source.
+ */
+static int rdma_block_notification_handle(QEMUFileRDMA *rfile, const char *name)
+{
+ RDMAContext *rdma = rfile->rdma;
+ int curr;
+ int found = -1;
+
+ /* Find the matching RAMBlock in our local list */
+ for (curr = 0; curr < rdma->local_ram_blocks.nb_blocks; curr++) {
+ if (!strcmp(rdma->local_ram_blocks.block[curr].block_name, name)) {
+ found = curr;
+ break;
+ }
+ }
+
+ if (found == -1) {
+ error_report("RAMBlock '%s' not found on destination", name);
+ return -ENOENT;
+ }
+
+ rdma->local_ram_blocks.block[curr].src_index = rdma->next_src_index;
+ trace_rdma_block_notification_handle(name, rdma->next_src_index);
+ rdma->next_src_index++;
+
+ return 0;
+}
+
+static int rdma_load_hook(QEMUFile *f, void *opaque, uint64_t flags, void *data)
+{
+ switch (flags) {
+ case RAM_CONTROL_BLOCK_REG:
+ return rdma_block_notification_handle(opaque, data);
+
+ case RAM_CONTROL_HOOK:
+ return qemu_rdma_registration_handle(f, opaque);
+
+ default:
+ /* Shouldn't be called with any other values */
+ abort();
+ }
+}
+
static int qemu_rdma_registration_start(QEMUFile *f, void *opaque,
- uint64_t flags)
+ uint64_t flags, void *data)
{
QEMUFileRDMA *rfile = opaque;
RDMAContext *rdma = rfile->rdma;
@@ -3128,7 +3268,7 @@
* First, flush writes, if any.
*/
static int qemu_rdma_registration_stop(QEMUFile *f, void *opaque,
- uint64_t flags)
+ uint64_t flags, void *data)
{
Error *local_err = NULL, **errp = &local_err;
QEMUFileRDMA *rfile = opaque;
@@ -3148,7 +3288,7 @@
if (flags == RAM_CONTROL_SETUP) {
RDMAControlHeader resp = {.type = RDMA_CONTROL_RAM_BLOCKS_RESULT };
RDMALocalBlocks *local = &rdma->local_ram_blocks;
- int reg_result_idx, i, j, nb_dest_blocks;
+ int reg_result_idx, i, nb_dest_blocks;
head.type = RDMA_CONTROL_RAM_BLOCKS_REQUEST;
trace_qemu_rdma_registration_stop_ram();
@@ -3184,9 +3324,11 @@
*/
if (local->nb_blocks != nb_dest_blocks) {
- ERROR(errp, "ram blocks mismatch #1! "
+ ERROR(errp, "ram blocks mismatch (Number of blocks %d vs %d) "
"Your QEMU command line parameters are probably "
- "not identical on both the source and destination.");
+ "not identical on both the source and destination.",
+ local->nb_blocks, nb_dest_blocks);
+ rdma->error_state = -EINVAL;
return -EINVAL;
}
@@ -3196,30 +3338,18 @@
for (i = 0; i < nb_dest_blocks; i++) {
network_to_dest_block(&rdma->dest_blocks[i]);
- /* search local ram blocks */
- for (j = 0; j < local->nb_blocks; j++) {
- if (rdma->dest_blocks[i].offset != local->block[j].offset) {
- continue;
- }
-
- if (rdma->dest_blocks[i].length != local->block[j].length) {
- ERROR(errp, "ram blocks mismatch #2! "
- "Your QEMU command line parameters are probably "
- "not identical on both the source and destination.");
- return -EINVAL;
- }
- local->block[j].remote_host_addr =
- rdma->dest_blocks[i].remote_host_addr;
- local->block[j].remote_rkey = rdma->dest_blocks[i].remote_rkey;
- break;
- }
-
- if (j >= local->nb_blocks) {
- ERROR(errp, "ram blocks mismatch #3! "
- "Your QEMU command line parameters are probably "
- "not identical on both the source and destination.");
+ /* We require that the blocks are in the same order */
+ if (rdma->dest_blocks[i].length != local->block[i].length) {
+ ERROR(errp, "Block %s/%d has a different length %" PRIu64
+ "vs %" PRIu64, local->block[i].block_name, i,
+ local->block[i].length,
+ rdma->dest_blocks[i].length);
+ rdma->error_state = -EINVAL;
return -EINVAL;
}
+ local->block[i].remote_host_addr =
+ rdma->dest_blocks[i].remote_host_addr;
+ local->block[i].remote_rkey = rdma->dest_blocks[i].remote_rkey;
}
}
@@ -3250,7 +3380,7 @@
.get_buffer = qemu_rdma_get_buffer,
.get_fd = qemu_rdma_get_fd,
.close = qemu_rdma_close,
- .hook_ram_load = qemu_rdma_registration_handle,
+ .hook_ram_load = rdma_load_hook,
};
static const QEMUFileOps rdma_write_ops = {
@@ -3263,12 +3393,13 @@
static void *qemu_fopen_rdma(RDMAContext *rdma, const char *mode)
{
- QEMUFileRDMA *r = g_malloc0(sizeof(QEMUFileRDMA));
+ QEMUFileRDMA *r;
if (qemu_file_mode_is_not_valid(mode)) {
return NULL;
}
+ r = g_malloc0(sizeof(QEMUFileRDMA));
r->rdma = rdma;
if (mode[0] == 'w') {
@@ -3287,7 +3418,7 @@
QEMUFile *f;
Error *local_err = NULL, **errp = &local_err;
- trace_qemu_dma_accept_incoming_migration();
+ trace_qemu_rdma_accept_incoming_migration();
ret = qemu_rdma_accept(rdma);
if (ret) {
@@ -3295,7 +3426,7 @@
return;
}
- trace_qemu_dma_accept_incoming_migration_accepted();
+ trace_qemu_rdma_accept_incoming_migration_accepted();
f = qemu_fopen_rdma(rdma, "rb");
if (f == NULL) {
diff --git a/migration/savevm.c b/migration/savevm.c
index 9e0e286..86735fc 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -246,11 +246,55 @@
typedef struct SaveState {
QTAILQ_HEAD(, SaveStateEntry) handlers;
int global_section_id;
+ bool skip_configuration;
+ uint32_t len;
+ const char *name;
} SaveState;
static SaveState savevm_state = {
.handlers = QTAILQ_HEAD_INITIALIZER(savevm_state.handlers),
.global_section_id = 0,
+ .skip_configuration = false,
+};
+
+void savevm_skip_configuration(void)
+{
+ savevm_state.skip_configuration = true;
+}
+
+
+static void configuration_pre_save(void *opaque)
+{
+ SaveState *state = opaque;
+ const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+
+ state->len = strlen(current_name);
+ state->name = current_name;
+}
+
+static int configuration_post_load(void *opaque, int version_id)
+{
+ SaveState *state = opaque;
+ const char *current_name = MACHINE_GET_CLASS(current_machine)->name;
+
+ if (strncmp(state->name, current_name, state->len) != 0) {
+ error_report("Machine type received is '%s' and local is '%s'",
+ state->name, current_name);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static const VMStateDescription vmstate_configuration = {
+ .name = "configuration",
+ .version_id = 1,
+ .post_load = configuration_post_load,
+ .pre_save = configuration_pre_save,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT32(len, SaveState),
+ VMSTATE_VBUFFER_ALLOC_UINT32(name, SaveState, 0, NULL, 0, len),
+ VMSTATE_END_OF_LIST()
+ },
};
static void dump_vmstate_vmsd(FILE *out_file,
@@ -653,41 +697,6 @@
}
}
-/*
- * Read a footer off the wire and check that it matches the expected section
- *
- * Returns: true if the footer was good
- * false if there is a problem (and calls error_report to say why)
- */
-static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
-{
- uint8_t read_mark;
- uint32_t read_section_id;
-
- if (skip_section_footers) {
- /* No footer to check */
- return true;
- }
-
- read_mark = qemu_get_byte(f);
-
- if (read_mark != QEMU_VM_SECTION_FOOTER) {
- error_report("Missing section footer for %s", se->idstr);
- return false;
- }
-
- read_section_id = qemu_get_be32(f);
- if (read_section_id != se->section_id) {
- error_report("Mismatched section id in footer for %s -"
- " read 0x%x expected 0x%x",
- se->idstr, read_section_id, se->section_id);
- return false;
- }
-
- /* All good */
- return true;
-}
-
bool qemu_savevm_state_blocked(Error **errp)
{
SaveStateEntry *se;
@@ -723,6 +732,11 @@
se->ops->set_params(params, se->opaque);
}
+ if (!savevm_state.skip_configuration) {
+ qemu_put_byte(f, QEMU_VM_CONFIGURATION);
+ vmstate_save_state(f, &vmstate_configuration, &savevm_state, 0);
+ }
+
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_setup) {
continue;
@@ -836,6 +850,11 @@
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
}
+ if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+ trace_savevm_section_skip(se->idstr, se->section_id);
+ continue;
+ }
+
trace_savevm_section_start(se->idstr, se->section_id);
json_start_object(vmdesc, NULL);
@@ -949,6 +968,9 @@
if ((!se->ops || !se->ops->save_state) && !se->vmsd) {
continue;
}
+ if (se->vmsd && !vmstate_save_needed(se->vmsd, se->opaque)) {
+ continue;
+ }
save_section_header(f, se, QEMU_VM_SECTION_FULL);
@@ -989,6 +1011,41 @@
int version_id;
};
+/*
+ * Read a footer off the wire and check that it matches the expected section
+ *
+ * Returns: true if the footer was good
+ * false if there is a problem (and calls error_report to say why)
+ */
+static bool check_section_footer(QEMUFile *f, LoadStateEntry *le)
+{
+ uint8_t read_mark;
+ uint32_t read_section_id;
+
+ if (skip_section_footers) {
+ /* No footer to check */
+ return true;
+ }
+
+ read_mark = qemu_get_byte(f);
+
+ if (read_mark != QEMU_VM_SECTION_FOOTER) {
+ error_report("Missing section footer for %s", le->se->idstr);
+ return false;
+ }
+
+ read_section_id = qemu_get_be32(f);
+ if (read_section_id != le->section_id) {
+ error_report("Mismatched section id in footer for %s -"
+ " read 0x%x expected 0x%x",
+ le->se->idstr, read_section_id, le->section_id);
+ return false;
+ }
+
+ /* All good */
+ return true;
+}
+
void loadvm_free_handlers(MigrationIncomingState *mis)
{
LoadStateEntry *le, *new_le;
@@ -1029,6 +1086,18 @@
return -ENOTSUP;
}
+ if (!savevm_state.skip_configuration) {
+ if (qemu_get_byte(f) != QEMU_VM_CONFIGURATION) {
+ error_report("Configuration section missing");
+ return -EINVAL;
+ }
+ ret = vmstate_load_state(f, &vmstate_configuration, &savevm_state, 0);
+
+ if (ret) {
+ return ret;
+ }
+ }
+
while ((section_type = qemu_get_byte(f)) != QEMU_VM_EOF) {
uint32_t instance_id, version_id, section_id;
SaveStateEntry *se;
@@ -1082,7 +1151,7 @@
" device '%s'", instance_id, idstr);
goto out;
}
- if (!check_section_footer(f, le->se)) {
+ if (!check_section_footer(f, le)) {
ret = -EINVAL;
goto out;
}
@@ -1109,7 +1178,7 @@
section_id, le->se->idstr);
goto out;
}
- if (!check_section_footer(f, le->se)) {
+ if (!check_section_footer(f, le)) {
ret = -EINVAL;
goto out;
}
@@ -1127,16 +1196,35 @@
* Try to read in the VMDESC section as well, so that dumping tools that
* intercept our migration stream have the chance to see it.
*/
- if (qemu_get_byte(f) == QEMU_VM_VMDESCRIPTION) {
- uint32_t size = qemu_get_be32(f);
- uint8_t *buf = g_malloc(0x1000);
- while (size > 0) {
- uint32_t read_chunk = MIN(size, 0x1000);
- qemu_get_buffer(f, buf, read_chunk);
- size -= read_chunk;
+ /* We've got to be careful; if we don't read the data and just shut the fd
+ * then the sender can error if we close while it's still sending.
+ * We also mustn't read data that isn't there; some transports (RDMA)
+ * will stall waiting for that data when the source has already closed.
+ */
+ if (should_send_vmdesc()) {
+ uint8_t *buf;
+ uint32_t size;
+ section_type = qemu_get_byte(f);
+
+ if (section_type != QEMU_VM_VMDESCRIPTION) {
+ error_report("Expected vmdescription section, but got %d",
+ section_type);
+ /*
+ * It doesn't seem worth failing at this point since
+ * we apparently have an otherwise valid VM state
+ */
+ } else {
+ buf = g_malloc(0x1000);
+ size = qemu_get_be32(f);
+
+ while (size > 0) {
+ uint32_t read_chunk = MIN(size, 0x1000);
+ qemu_get_buffer(f, buf, read_chunk);
+ size -= read_chunk;
+ }
+ g_free(buf);
}
- g_free(buf);
}
cpu_synchronize_all_post_init();
diff --git a/migration/vmstate.c b/migration/vmstate.c
index 6138d1a..e8ccf22 100644
--- a/migration/vmstate.c
+++ b/migration/vmstate.c
@@ -276,6 +276,17 @@
json_end_object(vmdesc);
}
+
+bool vmstate_save_needed(const VMStateDescription *vmsd, void *opaque)
+{
+ if (vmsd->needed && !vmsd->needed(opaque)) {
+ /* optional section not needed */
+ return false;
+ }
+ return true;
+}
+
+
void vmstate_save_state(QEMUFile *f, const VMStateDescription *vmsd,
void *opaque, QJSON *vmdesc)
{
diff --git a/qapi-schema.json b/qapi-schema.json
index 106008c..1285b8c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -523,6 +523,9 @@
# minimize migration traffic. The feature is disabled by default.
# (since 2.4 )
#
+# @events: generate events for each migration state change
+# (since 2.4 )
+#
# @auto-converge: If enabled, QEMU will automatically throttle down the guest
# to speed up convergence of RAM migration. (since 1.6)
#
@@ -530,7 +533,7 @@
##
{ 'enum': 'MigrationCapability',
'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
- 'compress'] }
+ 'compress', 'events'] }
##
# @MigrationCapabilityStatus
diff --git a/qapi/event.json b/qapi/event.json
index 378dda5..f0cef01 100644
--- a/qapi/event.json
+++ b/qapi/event.json
@@ -243,6 +243,18 @@
{ 'event': 'SPICE_MIGRATE_COMPLETED' }
##
+# @MIGRATION
+#
+# Emitted when a migration event happens
+#
+# @status: @MigrationStatus describing the current migration status.
+#
+# Since: 2.4
+##
+{ 'event': 'MIGRATION',
+ 'data': {'status': 'MigrationStatus'}}
+
+##
# @ACPI_DEVICE_OST
#
# Emitted when guest executes ACPI _OST method.
diff --git a/tests/Makefile b/tests/Makefile
index eff5e11..2cd1195 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -135,6 +135,9 @@
gcov-files-pci-y += hw/display/vga.c
gcov-files-pci-y += hw/display/cirrus_vga.c
gcov-files-pci-y += hw/display/vga-pci.c
+gcov-files-pci-y += hw/display/virtio-gpu.c
+gcov-files-pci-y += hw/display/virtio-gpu-pci.c
+gcov-files-pci-$(CONFIG_VIRTIO_VGA) += hw/display/virtio-vga.c
check-qtest-pci-y += tests/intel-hda-test$(EXESUF)
gcov-files-pci-y += hw/audio/intel-hda.c hw/audio/hda-codec.c
diff --git a/tests/display-vga-test.c b/tests/display-vga-test.c
index 17f5910..7694344 100644
--- a/tests/display-vga-test.c
+++ b/tests/display-vga-test.c
@@ -36,6 +36,20 @@
qtest_end();
}
+static void pci_virtio_gpu(void)
+{
+ qtest_start("-vga none -device virtio-gpu-pci");
+ qtest_end();
+}
+
+#ifdef CONFIG_VIRTIO_VGA
+static void pci_virtio_vga(void)
+{
+ qtest_start("-vga none -device virtio-vga");
+ qtest_end();
+}
+#endif
+
int main(int argc, char **argv)
{
int ret;
@@ -46,6 +60,10 @@
qtest_add_func("/display/pci/stdvga", pci_stdvga);
qtest_add_func("/display/pci/secondary", pci_secondary);
qtest_add_func("/display/pci/multihead", pci_multihead);
+ qtest_add_func("/display/pci/virtio-gpu", pci_virtio_gpu);
+#ifdef CONFIG_VIRTIO_VGA
+ qtest_add_func("/display/pci/virtio-vga", pci_virtio_vga);
+#endif
ret = g_test_run();
return ret;
diff --git a/tests/rocker/bridge-vlan b/tests/rocker/bridge-vlan
index ef9e5f5..897d82c 100755
--- a/tests/rocker/bridge-vlan
+++ b/tests/rocker/bridge-vlan
@@ -20,8 +20,8 @@
# add both ports to VLAN 57
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self"
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2"
# turn off learning and flooding in SW
diff --git a/tests/rocker/bridge-vlan-stp b/tests/rocker/bridge-vlan-stp
index c660312..85d2646 100755
--- a/tests/rocker/bridge-vlan-stp
+++ b/tests/rocker/bridge-vlan-stp
@@ -21,8 +21,8 @@
# add both ports to VLAN 57
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1 master self"
-simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2 master self"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p1"
+simp ssh tut sw1 --cmd "sudo /sbin/bridge vlan add vid 57 dev sw1p2"
# turn off learning and flooding in SW
diff --git a/trace-events b/trace-events
index d24d80a..2d395c5 100644
--- a/trace-events
+++ b/trace-events
@@ -1191,6 +1191,7 @@
qemu_loadvm_state_section_startfull(uint32_t section_id, const char *idstr, uint32_t instance_id, uint32_t version_id) "%u(%s) %u %u"
savevm_section_start(const char *id, unsigned int section_id) "%s, section_id %u"
savevm_section_end(const char *id, unsigned int section_id, int ret) "%s, section_id %u -> %d"
+savevm_section_skip(const char *id, unsigned int section_id) "%s, section_id %u"
savevm_state_begin(void) ""
savevm_state_header(void) ""
savevm_state_iterate(void) ""
@@ -1403,10 +1404,13 @@
migrate_fd_cancel(void) ""
migrate_pending(uint64_t size, uint64_t max) "pending size %" PRIu64 " max %" PRIu64
migrate_transferred(uint64_t tranferred, uint64_t time_spent, double bandwidth, uint64_t size) "transferred %" PRIu64 " time_spent %" PRIu64 " bandwidth %g max_size %" PRId64
+migrate_state_too_big(void) ""
+migrate_global_state_post_load(const char *state) "loaded state: %s"
+migrate_global_state_pre_save(const char *state) "saved state: %s"
# migration/rdma.c
-qemu_dma_accept_incoming_migration(void) ""
-qemu_dma_accept_incoming_migration_accepted(void) ""
+qemu_rdma_accept_incoming_migration(void) ""
+qemu_rdma_accept_incoming_migration_accepted(void) ""
qemu_rdma_accept_pin_state(bool pin) "%d"
qemu_rdma_accept_pin_verbsc(void *verbs) "Verbs context after listen: %p"
qemu_rdma_block_for_wrid_miss(const char *wcompstr, int wcomp, const char *gcompstr, uint64_t req) "A Wanted wrid %s (%d) but got %s (%" PRIu64 ")"
@@ -1433,13 +1437,14 @@
qemu_rdma_registration_handle_compress(int64_t length, int index, int64_t offset) "Zapping zero chunk: %" PRId64 " bytes, index %d, offset %" PRId64
qemu_rdma_registration_handle_finished(void) ""
qemu_rdma_registration_handle_ram_blocks(void) ""
+qemu_rdma_registration_handle_ram_blocks_loop(const char *name, uint64_t offset, uint64_t length, void *local_host_addr, unsigned int src_index) "%s: @%" PRIx64 "/%" PRIu64 " host:@%p src_index: %u"
qemu_rdma_registration_handle_register(int requests) "%d requests"
qemu_rdma_registration_handle_register_loop(int req, int index, uint64_t addr, uint64_t chunks) "Registration request (%d): index %d, current_addr %" PRIu64 " chunks: %" PRIu64
qemu_rdma_registration_handle_register_rkey(int rkey) "%x"
qemu_rdma_registration_handle_unregister(int requests) "%d requests"
qemu_rdma_registration_handle_unregister_loop(int count, int index, uint64_t chunk) "Unregistration request (%d): index %d, chunk %" PRIu64
qemu_rdma_registration_handle_unregister_success(uint64_t chunk) "%" PRIu64
-qemu_rdma_registration_handle_wait(uint64_t flags) "Waiting for next request %" PRIu64
+qemu_rdma_registration_handle_wait(void) ""
qemu_rdma_registration_start(uint64_t flags) "%" PRIu64
qemu_rdma_registration_stop(uint64_t flags) "%" PRIu64
qemu_rdma_registration_stop_ram(void) ""
@@ -1458,8 +1463,9 @@
qemu_rdma_write_one_sendreg(uint64_t chunk, int len, int index, int64_t offset) "Sending registration request chunk %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
qemu_rdma_write_one_top(uint64_t chunks, uint64_t size) "Writing %" PRIu64 " chunks, (%" PRIu64 " MB)"
qemu_rdma_write_one_zero(uint64_t chunk, int len, int index, int64_t offset) "Entire chunk is zero, sending compress: %" PRIu64 " for %d bytes, index: %d, offset: %" PRId64
-rdma_add_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
-rdma_delete_block(int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_add_block(const char *block_name, int block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Added Block: '%s':%d, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
+rdma_block_notification_handle(const char *name, int index) "%s at %d"
+rdma_delete_block(void *block, uint64_t addr, uint64_t offset, uint64_t len, uint64_t end, uint64_t bits, int chunks) "Deleted Block: %p, addr: %" PRIu64 ", offset: %" PRIu64 " length: %" PRIu64 " end: %" PRIu64 " bits %" PRIu64 " chunks %d"
rdma_start_incoming_migration(void) ""
rdma_start_incoming_migration_after_dest_init(void) ""
rdma_start_incoming_migration_after_rdma_listen(void) ""
diff --git a/vl.c b/vl.c
index 69ad90c..cfa6133 100644
--- a/vl.c
+++ b/vl.c
@@ -573,8 +573,14 @@
{ RUN_STATE_DEBUG, RUN_STATE_RUNNING },
{ RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
- { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+ { RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
+ { RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
{ RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
+ { RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
+ { RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
+ { RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
+ { RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
+ { RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
@@ -634,6 +640,18 @@
return current_run_state == state;
}
+bool runstate_store(char *str, size_t size)
+{
+ const char *state = RunState_lookup[current_run_state];
+ size_t len = strlen(state) + 1;
+
+ if (len > size) {
+ return false;
+ }
+ memcpy(str, state, len);
+ return true;
+}
+
static void runstate_init(void)
{
const RunStateTransition *p;
@@ -4606,6 +4624,7 @@
return 0;
}
+ register_global_state();
if (incoming) {
Error *local_err = NULL;
qemu_start_incoming_migration(incoming, &local_err);