Merge remote-tracking branch 'agraf/xen-next' into staging
diff --git a/configure b/configure
index e80a1e9..100be89 100755
--- a/configure
+++ b/configure
@@ -3295,10 +3295,12 @@
     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
       target_phys_bits=64
       echo "CONFIG_XEN=y" >> $config_target_mak
-      if test "$cpu" = "i386" -o "$cpu" = "x86_64"; then
-          echo "CONFIG_XEN_MAPCACHE=y" >> $config_target_mak
-      fi
+    else
+      echo "CONFIG_NO_XEN=y" >> $config_target_mak
     fi
+    ;;
+  *)
+    echo "CONFIG_NO_XEN=y" >> $config_target_mak
 esac
 case "$target_arch2" in
   i386|x86_64|ppcemb|ppc|ppc64|s390x)
diff --git a/cpu-common.h b/cpu-common.h
index 16c9f4f..c9878ba 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -23,7 +23,15 @@
 };
 
 /* address in the RAM (different from a physical address) */
+#if defined(CONFIG_XEN_BACKEND) && TARGET_PHYS_ADDR_BITS == 64
+typedef uint64_t ram_addr_t;
+#  define RAM_ADDR_MAX UINT64_MAX
+#  define RAM_ADDR_FMT "%" PRIx64
+#else
 typedef unsigned long ram_addr_t;
+#  define RAM_ADDR_MAX ULONG_MAX
+#  define RAM_ADDR_FMT "%lx"
+#endif
 
 /* memory API */
 
diff --git a/exec.c b/exec.c
index d51502f..476b507 100644
--- a/exec.c
+++ b/exec.c
@@ -2870,13 +2870,13 @@
 static ram_addr_t find_ram_offset(ram_addr_t size)
 {
     RAMBlock *block, *next_block;
-    ram_addr_t offset = 0, mingap = ULONG_MAX;
+    ram_addr_t offset = 0, mingap = RAM_ADDR_MAX;
 
     if (QLIST_EMPTY(&ram_list.blocks))
         return 0;
 
     QLIST_FOREACH(block, &ram_list.blocks, next) {
-        ram_addr_t end, next = ULONG_MAX;
+        ram_addr_t end, next = RAM_ADDR_MAX;
 
         end = block->offset + block->length;
 
@@ -3088,7 +3088,8 @@
 #endif
                 }
                 if (area != vaddr) {
-                    fprintf(stderr, "Could not remap addr: %lx@%lx\n",
+                    fprintf(stderr, "Could not remap addr: "
+                            RAM_ADDR_FMT "@" RAM_ADDR_FMT "\n",
                             length, addr);
                     exit(1);
                 }
@@ -3877,7 +3878,7 @@
     uint8_t *ptr;
     uint32_t val;
     target_phys_addr_t page;
-    unsigned long pd;
+    ram_addr_t pd;
     PhysPageDesc *p;
 
     while (len > 0) {
@@ -3917,7 +3918,7 @@
                     l = 1;
                 }
             } else {
-                unsigned long addr1;
+                ram_addr_t addr1;
                 addr1 = (pd & TARGET_PAGE_MASK) + (addr & ~TARGET_PAGE_MASK);
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
@@ -4071,7 +4072,7 @@
     target_phys_addr_t page;
     unsigned long pd;
     PhysPageDesc *p;
-    ram_addr_t raddr = ULONG_MAX;
+    ram_addr_t raddr = RAM_ADDR_MAX;
     ram_addr_t rlen;
     void *ret;
 
diff --git a/hw/ide.h b/hw/ide.h
index 34d9394..a490cbb 100644
--- a/hw/ide.h
+++ b/hw/ide.h
@@ -13,6 +13,7 @@
 /* ide-pci.c */
 void pci_cmd646_ide_init(PCIBus *bus, DriveInfo **hd_table,
                          int secondary_ide_enabled);
+PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 PCIDevice *pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 PCIDevice *pci_piix4_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
 void vt82c686b_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn);
diff --git a/hw/ide/piix.c b/hw/ide/piix.c
index 84f72b0..f527dbd 100644
--- a/hw/ide/piix.c
+++ b/hw/ide/piix.c
@@ -149,6 +149,42 @@
     return 0;
 }
 
+static int pci_piix3_xen_ide_unplug(DeviceState *dev)
+{
+    PCIDevice *pci_dev;
+    PCIIDEState *pci_ide;
+    DriveInfo *di;
+    int i = 0;
+
+    pci_dev = DO_UPCAST(PCIDevice, qdev, dev);
+    pci_ide = DO_UPCAST(PCIIDEState, dev, pci_dev);
+
+    for (; i < 3; i++) {
+        di = drive_get_by_index(IF_IDE, i);
+        if (di != NULL && di->bdrv != NULL && !di->bdrv->removable) {
+            DeviceState *ds = bdrv_get_attached(di->bdrv);
+            if (ds) {
+                bdrv_detach(di->bdrv, ds);
+            }
+            bdrv_close(di->bdrv);
+            pci_ide->bus[di->bus].ifs[di->unit].bs = NULL;
+            drive_put_ref(di);
+        }
+    }
+    qdev_reset_all(&(pci_ide->dev.qdev));
+    return 0;
+}
+
+PCIDevice *pci_piix3_xen_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
+{
+    PCIDevice *dev;
+
+    dev = pci_create_simple(bus, devfn, "piix3-ide-xen");
+    dev->qdev.info->unplug = pci_piix3_xen_ide_unplug;
+    pci_ide_create_devs(dev, hd_table);
+    return dev;
+}
+
 /* hd_table must contain 4 block drivers */
 /* NOTE: for the PIIX3, the IRQs and IOports are hardcoded */
 PCIDevice *pci_piix3_ide_init(PCIBus *bus, DriveInfo **hd_table, int devfn)
@@ -182,6 +218,14 @@
         .device_id    = PCI_DEVICE_ID_INTEL_82371SB_1,
         .class_id     = PCI_CLASS_STORAGE_IDE,
     },{
+        .qdev.name    = "piix3-ide-xen",
+        .qdev.size    = sizeof(PCIIDEState),
+        .qdev.no_user = 1,
+        .init         = pci_piix_ide_initfn,
+        .vendor_id    = PCI_VENDOR_ID_INTEL,
+        .device_id    = PCI_DEVICE_ID_INTEL_82371SB_1,
+        .class_id     = PCI_CLASS_STORAGE_IDE,
+    },{
         .qdev.name    = "piix4-ide",
         .qdev.size    = sizeof(PCIIDEState),
         .qdev.no_user = 1,
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 2b9c2b1..c0a2abe 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -160,7 +160,11 @@
     ide_drive_get(hd, MAX_IDE_BUS);
     if (pci_enabled) {
         PCIDevice *dev;
-        dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1);
+        if (xen_enabled()) {
+            dev = pci_piix3_xen_ide_init(pci_bus, hd, piix3_devfn + 1);
+        } else {
+            dev = pci_piix3_ide_init(pci_bus, hd, piix3_devfn + 1);
+        }
         idebus[0] = qdev_get_child_bus(&dev->qdev, "ide.0");
         idebus[1] = qdev_get_child_bus(&dev->qdev, "ide.1");
     } else {
diff --git a/hw/xen.h b/hw/xen.h
index e432705..2162111 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -24,7 +24,7 @@
 
 static inline int xen_enabled(void)
 {
-#ifdef CONFIG_XEN
+#if defined(CONFIG_XEN_BACKEND) && !defined(CONFIG_NO_XEN)
     return xen_allowed;
 #else
     return 0;
diff --git a/hw/xen_platform.c b/hw/xen_platform.c
index f43e175..fb6be6a 100644
--- a/hw/xen_platform.c
+++ b/hw/xen_platform.c
@@ -76,6 +76,35 @@
 }
 
 /* Xen Platform, Fixed IOPort */
+#define UNPLUG_ALL_IDE_DISKS 1
+#define UNPLUG_ALL_NICS 2
+#define UNPLUG_AUX_IDE_DISKS 4
+
+static void unplug_nic(PCIBus *b, PCIDevice *d)
+{
+    if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
+            PCI_CLASS_NETWORK_ETHERNET) {
+        qdev_unplug(&(d->qdev));
+    }
+}
+
+static void pci_unplug_nics(PCIBus *bus)
+{
+    pci_for_each_device(bus, 0, unplug_nic);
+}
+
+static void unplug_disks(PCIBus *b, PCIDevice *d)
+{
+    if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
+            PCI_CLASS_STORAGE_IDE) {
+        qdev_unplug(&(d->qdev));
+    }
+}
+
+static void pci_unplug_disks(PCIBus *bus)
+{
+    pci_for_each_device(bus, 0, unplug_disks);
+}
 
 static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t val)
 {
@@ -83,10 +112,22 @@
 
     switch (addr - XEN_PLATFORM_IOPORT) {
     case 0:
-        /* TODO: */
         /* Unplug devices.  Value is a bitmask of which devices to
            unplug, with bit 0 the IDE devices, bit 1 the network
            devices, and bit 2 the non-primary-master IDE devices. */
+        if (val & UNPLUG_ALL_IDE_DISKS) {
+            DPRINTF("unplug disks\n");
+            qemu_aio_flush();
+            bdrv_flush_all();
+            pci_unplug_disks(s->pci_dev.bus);
+        }
+        if (val & UNPLUG_ALL_NICS) {
+            DPRINTF("unplug nics\n");
+            pci_unplug_nics(s->pci_dev.bus);
+        }
+        if (val & UNPLUG_AUX_IDE_DISKS) {
+            DPRINTF("unplug auxiliary disks not supported\n");
+        }
         break;
     case 2:
         switch (val) {
diff --git a/vl.c b/vl.c
index 73316cf..426cea7 100644
--- a/vl.c
+++ b/vl.c
@@ -2454,11 +2454,6 @@
                     exit(1);
                 }
 
-                /* On 32-bit hosts, QEMU is limited by virtual address space */
-                if (value > (2047 << 20) && HOST_LONG_BITS == 32) {
-                    fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n");
-                    exit(1);
-                }
                 if (value != (uint64_t)(ram_addr_t)value) {
                     fprintf(stderr, "qemu: ram size too large\n");
                     exit(1);
@@ -3116,8 +3111,17 @@
         exit(1);
 
     /* init the memory */
-    if (ram_size == 0)
+    if (ram_size == 0) {
         ram_size = DEFAULT_RAM_SIZE * 1024 * 1024;
+    }
+
+    if (!xen_enabled()) {
+        /* On 32-bit hosts, QEMU is limited by virtual address space */
+        if (ram_size > (2047 << 20) && HOST_LONG_BITS == 32) {
+            fprintf(stderr, "qemu: at most 2047 MB RAM can be simulated\n");
+            exit(1);
+        }
+    }
 
     /* init the dynamic translator */
     cpu_exec_init_all(tb_size * 1024 * 1024);
diff --git a/xen-all.c b/xen-all.c
index 167bed6..9eaeac1 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -19,6 +19,7 @@
 
 #include <xen/hvm/ioreq.h>
 #include <xen/hvm/params.h>
+#include <xen/hvm/e820.h>
 
 //#define DEBUG_XEN
 
@@ -144,6 +145,12 @@
     new_block->host = NULL;
     new_block->offset = 0;
     new_block->length = ram_size;
+    if (ram_size >= HVM_BELOW_4G_RAM_END) {
+        /* Xen does not allocate the memory continuously, and keep a hole at
+         * HVM_BELOW_4G_MMIO_START of HVM_BELOW_4G_MMIO_LENGTH
+         */
+        new_block->length += HVM_BELOW_4G_MMIO_LENGTH;
+    }
 
     QLIST_INSERT_HEAD(&ram_list.blocks, new_block, next);
 
@@ -152,20 +159,26 @@
     memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
            0xff, new_block->length >> TARGET_PAGE_BITS);
 
-    if (ram_size >= 0xe0000000 ) {
-        above_4g_mem_size = ram_size - 0xe0000000;
-        below_4g_mem_size = 0xe0000000;
+    if (ram_size >= HVM_BELOW_4G_RAM_END) {
+        above_4g_mem_size = ram_size - HVM_BELOW_4G_RAM_END;
+        below_4g_mem_size = HVM_BELOW_4G_RAM_END;
     } else {
         below_4g_mem_size = ram_size;
     }
 
-    cpu_register_physical_memory(0, below_4g_mem_size, new_block->offset);
-#if TARGET_PHYS_ADDR_BITS > 32
+    cpu_register_physical_memory(0, 0xa0000, 0);
+    /* Skip of the VGA IO memory space, it will be registered later by the VGA
+     * emulated device.
+     *
+     * The area between 0xc0000 and 0x100000 will be used by SeaBIOS to load
+     * the Options ROM, so it is registered here as RAM.
+     */
+    cpu_register_physical_memory(0xc0000, below_4g_mem_size - 0xc0000,
+                                 0xc0000);
     if (above_4g_mem_size > 0) {
         cpu_register_physical_memory(0x100000000ULL, above_4g_mem_size,
-                                     new_block->offset + below_4g_mem_size);
+                                     0x100000000ULL);
     }
-#endif
 }
 
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size)
@@ -184,7 +197,7 @@
     }
 
     if (xc_domain_populate_physmap_exact(xen_xc, xen_domid, nr_pfn, 0, 0, pfn_list)) {
-        hw_error("xen: failed to populate ram at %lx", ram_addr);
+        hw_error("xen: failed to populate ram at " RAM_ADDR_FMT, ram_addr);
     }
 
     qemu_free(pfn_list);
@@ -797,12 +810,17 @@
     }
 }
 
-static void xenstore_record_dm_state(XenIOState *s, const char *state)
+static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
 {
     char path[50];
 
+    if (xs == NULL) {
+        fprintf(stderr, "xenstore connection not initialized\n");
+        exit(1);
+    }
+
     snprintf(path, sizeof (path), "/local/domain/0/device-model/%u/state", xen_domid);
-    if (!xs_write(s->xenstore, XBT_NULL, path, state, strlen(state))) {
+    if (!xs_write(xs, XBT_NULL, path, state, strlen(state))) {
         fprintf(stderr, "error recording dm state\n");
         exit(1);
     }
@@ -823,15 +841,20 @@
     if (evtchn_fd != -1) {
         qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, state);
     }
-
-    /* record state running */
-    xenstore_record_dm_state(state, "running");
 }
 
 
 /* Initialise Xen */
 
-static void xen_vm_change_state_handler(void *opaque, int running, int reason)
+static void xen_change_state_handler(void *opaque, int running, int reason)
+{
+    if (running) {
+        /* record state running */
+        xenstore_record_dm_state(xenstore, "running");
+    }
+}
+
+static void xen_hvm_change_state_handler(void *opaque, int running, int reason)
 {
     XenIOState *state = opaque;
     if (running) {
@@ -854,6 +877,7 @@
         xen_be_printf(NULL, 0, "can't open xen interface\n");
         return -1;
     }
+    qemu_add_vm_change_state_handler(xen_change_state_handler, NULL);
 
     return 0;
 }
@@ -915,7 +939,7 @@
     xen_map_cache_init();
     xen_ram_init(ram_size);
 
-    qemu_add_vm_change_state_handler(xen_vm_change_state_handler, state);
+    qemu_add_vm_change_state_handler(xen_hvm_change_state_handler, state);
 
     state->client = xen_cpu_phys_memory_client;
     QLIST_INIT(&state->physmap);