Merge branch 'master' of git://git.qemu.org/qemu
diff --git a/configure b/configure
index 850efc0..254b801 100755
--- a/configure
+++ b/configure
@@ -1828,7 +1828,7 @@
 #include <check.h>
 int main(void) { suite_create("qemu test"); return 0; }
 EOF
-  check_libs=`$pkg_config --libs check`
+  check_libs=`$pkg_config --libs check 2>/dev/null`
   if compile_prog "" $check_libs ; then
     check_utests=yes
     libs_tools="$check_libs $libs_tools"
diff --git a/cutils.c b/cutils.c
index c91f887..5d995bc 100644
--- a/cutils.c
+++ b/cutils.c
@@ -415,3 +415,15 @@
 {
     return strtosz_suffix(nptr, end, STRTOSZ_DEFSUFFIX_MB);
 }
+
+int qemu_parse_fd(const char *param)
+{
+    int fd;
+    char *endptr = NULL;
+
+    fd = strtol(param, &endptr, 10);
+    if (*endptr || (fd == 0 && param == endptr)) {
+        return -1;
+    }
+    return fd;
+}
diff --git a/dma-helpers.c b/dma-helpers.c
index 86d2d0a..bdcd38c 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -18,8 +18,7 @@
     qsg->size = 0;
 }
 
-void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
-                     target_phys_addr_t len)
+void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len)
 {
     if (qsg->nsg == qsg->nalloc) {
         qsg->nalloc = 2 * qsg->nalloc + 1;
@@ -45,7 +44,7 @@
     bool to_dev;
     bool in_cancel;
     int sg_cur_index;
-    target_phys_addr_t sg_cur_byte;
+    dma_addr_t sg_cur_byte;
     QEMUIOVector iov;
     QEMUBH *bh;
     DMAIOFunc *io_func;
diff --git a/dma.h b/dma.h
index 2bdc236..a13209d 100644
--- a/dma.h
+++ b/dma.h
@@ -18,21 +18,29 @@
 typedef struct ScatterGatherEntry ScatterGatherEntry;
 
 #if defined(TARGET_PHYS_ADDR_BITS)
+typedef target_phys_addr_t dma_addr_t;
+
+#define DMA_ADDR_FMT TARGET_FMT_plx
+
+typedef enum {
+    DMA_DIRECTION_TO_DEVICE = 0,
+    DMA_DIRECTION_FROM_DEVICE = 1,
+} DMADirection;
+
 struct ScatterGatherEntry {
-    target_phys_addr_t base;
-    target_phys_addr_t len;
+    dma_addr_t base;
+    dma_addr_t len;
 };
 
 struct QEMUSGList {
     ScatterGatherEntry *sg;
     int nsg;
     int nalloc;
-    target_phys_addr_t size;
+    dma_addr_t size;
 };
 
 void qemu_sglist_init(QEMUSGList *qsg, int alloc_hint);
-void qemu_sglist_add(QEMUSGList *qsg, target_phys_addr_t base,
-                     target_phys_addr_t len);
+void qemu_sglist_add(QEMUSGList *qsg, dma_addr_t base, dma_addr_t len);
 void qemu_sglist_destroy(QEMUSGList *qsg);
 #endif
 
diff --git a/hw/event_notifier.c b/event_notifier.c
similarity index 98%
rename from hw/event_notifier.c
rename to event_notifier.c
index 13f3656..2c73555 100644
--- a/hw/event_notifier.c
+++ b/event_notifier.c
@@ -10,7 +10,6 @@
  * the COPYING file in the top-level directory.
  */
 
-#include "hw.h"
 #include "event_notifier.h"
 #ifdef CONFIG_EVENTFD
 #include <sys/eventfd.h>
diff --git a/hw/event_notifier.h b/event_notifier.h
similarity index 100%
rename from hw/event_notifier.h
rename to event_notifier.h
diff --git a/hw/ac97.c b/hw/ac97.c
index bc69d4e..6800af4 100644
--- a/hw/ac97.c
+++ b/hw/ac97.c
@@ -18,6 +18,7 @@
 #include "audiodev.h"
 #include "audio/audio.h"
 #include "pci.h"
+#include "dma.h"
 
 enum {
     AC97_Reset                     = 0x00,
@@ -224,7 +225,7 @@
 {
     uint8_t b[8];
 
-    cpu_physical_memory_read (r->bdbar + r->civ * 8, b, 8);
+    pci_dma_read (&s->dev, r->bdbar + r->civ * 8, b, 8);
     r->bd_valid = 1;
     r->bd.addr = le32_to_cpu (*(uint32_t *) &b[0]) & ~3;
     r->bd.ctl_len = le32_to_cpu (*(uint32_t *) &b[4]);
@@ -973,7 +974,7 @@
     while (temp) {
         int copied;
         to_copy = audio_MIN (temp, sizeof (tmpbuf));
-        cpu_physical_memory_read (addr, tmpbuf, to_copy);
+        pci_dma_read (&s->dev, addr, tmpbuf, to_copy);
         copied = AUD_write (s->voice_po, tmpbuf, to_copy);
         dolog ("write_audio max=%x to_copy=%x copied=%x\n",
                max, to_copy, copied);
@@ -1054,7 +1055,7 @@
             *stop = 1;
             break;
         }
-        cpu_physical_memory_write (addr, tmpbuf, acquired);
+        pci_dma_write (&s->dev, addr, tmpbuf, acquired);
         temp -= acquired;
         addr += acquired;
         nread += acquired;
diff --git a/hw/e1000.c b/hw/e1000.c
index ce8fc8b..986ed9c 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -31,6 +31,7 @@
 #include "net/checksum.h"
 #include "loader.h"
 #include "sysemu.h"
+#include "dma.h"
 
 #include "e1000_hw.h"
 
@@ -465,7 +466,7 @@
             bytes = split_size;
             if (tp->size + bytes > msh)
                 bytes = msh - tp->size;
-            cpu_physical_memory_read(addr, tp->data + tp->size, bytes);
+            pci_dma_read(&s->dev, addr, tp->data + tp->size, bytes);
             if ((sz = tp->size + bytes) >= hdr && tp->size < hdr)
                 memmove(tp->header, tp->data, hdr);
             tp->size = sz;
@@ -480,7 +481,7 @@
         // context descriptor TSE is not set, while data descriptor TSE is set
         DBGOUT(TXERR, "TCP segmentaion Error\n");
     } else {
-        cpu_physical_memory_read(addr, tp->data + tp->size, split_size);
+        pci_dma_read(&s->dev, addr, tp->data + tp->size, split_size);
         tp->size += split_size;
     }
 
@@ -496,7 +497,7 @@
 }
 
 static uint32_t
-txdesc_writeback(target_phys_addr_t base, struct e1000_tx_desc *dp)
+txdesc_writeback(E1000State *s, dma_addr_t base, struct e1000_tx_desc *dp)
 {
     uint32_t txd_upper, txd_lower = le32_to_cpu(dp->lower.data);
 
@@ -505,8 +506,8 @@
     txd_upper = (le32_to_cpu(dp->upper.data) | E1000_TXD_STAT_DD) &
                 ~(E1000_TXD_STAT_EC | E1000_TXD_STAT_LC | E1000_TXD_STAT_TU);
     dp->upper.data = cpu_to_le32(txd_upper);
-    cpu_physical_memory_write(base + ((char *)&dp->upper - (char *)dp),
-                              (void *)&dp->upper, sizeof(dp->upper));
+    pci_dma_write(&s->dev, base + ((char *)&dp->upper - (char *)dp),
+                  (void *)&dp->upper, sizeof(dp->upper));
     return E1000_ICR_TXDW;
 }
 
@@ -521,7 +522,7 @@
 static void
 start_xmit(E1000State *s)
 {
-    target_phys_addr_t base;
+    dma_addr_t base;
     struct e1000_tx_desc desc;
     uint32_t tdh_start = s->mac_reg[TDH], cause = E1000_ICS_TXQE;
 
@@ -533,14 +534,14 @@
     while (s->mac_reg[TDH] != s->mac_reg[TDT]) {
         base = tx_desc_base(s) +
                sizeof(struct e1000_tx_desc) * s->mac_reg[TDH];
-        cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
+        pci_dma_read(&s->dev, base, (void *)&desc, sizeof(desc));
 
         DBGOUT(TX, "index %d: %p : %x %x\n", s->mac_reg[TDH],
                (void *)(intptr_t)desc.buffer_addr, desc.lower.data,
                desc.upper.data);
 
         process_tx_desc(s, &desc);
-        cause |= txdesc_writeback(base, &desc);
+        cause |= txdesc_writeback(s, base, &desc);
 
         if (++s->mac_reg[TDH] * sizeof(desc) >= s->mac_reg[TDLEN])
             s->mac_reg[TDH] = 0;
@@ -668,7 +669,7 @@
 {
     E1000State *s = DO_UPCAST(NICState, nc, nc)->opaque;
     struct e1000_rx_desc desc;
-    target_phys_addr_t base;
+    dma_addr_t base;
     unsigned int n, rdt;
     uint32_t rdh_start;
     uint16_t vlan_special = 0;
@@ -713,7 +714,7 @@
             desc_size = s->rxbuf_size;
         }
         base = rx_desc_base(s) + sizeof(desc) * s->mac_reg[RDH];
-        cpu_physical_memory_read(base, (void *)&desc, sizeof(desc));
+        pci_dma_read(&s->dev, base, (void *)&desc, sizeof(desc));
         desc.special = vlan_special;
         desc.status |= (vlan_status | E1000_RXD_STAT_DD);
         if (desc.buffer_addr) {
@@ -722,9 +723,9 @@
                 if (copy_size > s->rxbuf_size) {
                     copy_size = s->rxbuf_size;
                 }
-                cpu_physical_memory_write(le64_to_cpu(desc.buffer_addr),
-                                          (void *)(buf + desc_offset + vlan_offset),
-                                          copy_size);
+                pci_dma_write(&s->dev, le64_to_cpu(desc.buffer_addr),
+                                 (void *)(buf + desc_offset + vlan_offset),
+                                 copy_size);
             }
             desc_offset += desc_size;
             desc.length = cpu_to_le16(desc_size);
@@ -738,7 +739,7 @@
         } else { // as per intel docs; skip descriptors with null buf addr
             DBGOUT(RX, "Null RX descriptor!!\n");
         }
-        cpu_physical_memory_write(base, (void *)&desc, sizeof(desc));
+        pci_dma_write(&s->dev, base, (void *)&desc, sizeof(desc));
 
         if (++s->mac_reg[RDH] * sizeof(desc) >= s->mac_reg[RDLEN])
             s->mac_reg[RDH] = 0;
diff --git a/hw/eepro100.c b/hw/eepro100.c
index 4e3c52f..7d59e71 100644
--- a/hw/eepro100.c
+++ b/hw/eepro100.c
@@ -46,6 +46,7 @@
 #include "net.h"
 #include "eeprom93xx.h"
 #include "sysemu.h"
+#include "dma.h"
 
 /* QEMU sends frames smaller than 60 bytes to ethernet nics.
  * Such frames are rejected by real nics and their emulations.
@@ -315,38 +316,6 @@
     0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
 };
 
-/* Read a 16 bit little endian value from physical memory. */
-static uint16_t e100_ldw_le_phys(target_phys_addr_t addr)
-{
-    /* Load 16 bit (little endian) word from emulated hardware. */
-    uint16_t val;
-    cpu_physical_memory_read(addr, &val, sizeof(val));
-    return le16_to_cpu(val);
-}
-
-/* Read a 32 bit little endian value from physical memory. */
-static uint32_t e100_ldl_le_phys(target_phys_addr_t addr)
-{
-    /* Load 32 bit (little endian) word from emulated hardware. */
-    uint32_t val;
-    cpu_physical_memory_read(addr, &val, sizeof(val));
-    return le32_to_cpu(val);
-}
-
-/* Write a 16 bit little endian value to physical memory. */
-static void e100_stw_le_phys(target_phys_addr_t addr, uint16_t val)
-{
-    val = cpu_to_le16(val);
-    cpu_physical_memory_write(addr, &val, sizeof(val));
-}
-
-/* Write a 32 bit little endian value to physical memory. */
-static void e100_stl_le_phys(target_phys_addr_t addr, uint32_t val)
-{
-    val = cpu_to_le32(val);
-    cpu_physical_memory_write(addr, &val, sizeof(val));
-}
-
 #define POLYNOMIAL 0x04c11db6
 
 /* From FreeBSD */
@@ -744,21 +713,26 @@
      * values which really matter.
      * Number of data should check configuration!!!
      */
-    cpu_physical_memory_write(s->statsaddr, &s->statistics, s->stats_size);
-    e100_stl_le_phys(s->statsaddr + 0, s->statistics.tx_good_frames);
-    e100_stl_le_phys(s->statsaddr + 36, s->statistics.rx_good_frames);
-    e100_stl_le_phys(s->statsaddr + 48, s->statistics.rx_resource_errors);
-    e100_stl_le_phys(s->statsaddr + 60, s->statistics.rx_short_frame_errors);
+    pci_dma_write(&s->dev, s->statsaddr,
+                  (uint8_t *) &s->statistics, s->stats_size);
+    stl_le_pci_dma(&s->dev, s->statsaddr + 0,
+                   s->statistics.tx_good_frames);
+    stl_le_pci_dma(&s->dev, s->statsaddr + 36,
+                   s->statistics.rx_good_frames);
+    stl_le_pci_dma(&s->dev, s->statsaddr + 48,
+                   s->statistics.rx_resource_errors);
+    stl_le_pci_dma(&s->dev, s->statsaddr + 60,
+                   s->statistics.rx_short_frame_errors);
 #if 0
-    e100_stw_le_phys(s->statsaddr + 76, s->statistics.xmt_tco_frames);
-    e100_stw_le_phys(s->statsaddr + 78, s->statistics.rcv_tco_frames);
+    stw_le_pci_dma(&s->dev, s->statsaddr + 76, s->statistics.xmt_tco_frames);
+    stw_le_pci_dma(&s->dev, s->statsaddr + 78, s->statistics.rcv_tco_frames);
     missing("CU dump statistical counters");
 #endif
 }
 
 static void read_cb(EEPRO100State *s)
 {
-    cpu_physical_memory_read(s->cb_address, &s->tx, sizeof(s->tx));
+    pci_dma_read(&s->dev, s->cb_address, (uint8_t *) &s->tx, sizeof(s->tx));
     s->tx.status = le16_to_cpu(s->tx.status);
     s->tx.command = le16_to_cpu(s->tx.command);
     s->tx.link = le32_to_cpu(s->tx.link);
@@ -788,18 +762,17 @@
     }
     assert(tcb_bytes <= sizeof(buf));
     while (size < tcb_bytes) {
-        uint32_t tx_buffer_address = e100_ldl_le_phys(tbd_address);
-        uint16_t tx_buffer_size = e100_ldw_le_phys(tbd_address + 4);
+        uint32_t tx_buffer_address = ldl_le_pci_dma(&s->dev, tbd_address);
+        uint16_t tx_buffer_size = lduw_le_pci_dma(&s->dev, tbd_address + 4);
 #if 0
-        uint16_t tx_buffer_el = e100_ldw_le_phys(tbd_address + 6);
+        uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev, tbd_address + 6);
 #endif
         tbd_address += 8;
         TRACE(RXTX, logout
             ("TBD (simplified mode): buffer address 0x%08x, size 0x%04x\n",
              tx_buffer_address, tx_buffer_size));
         tx_buffer_size = MIN(tx_buffer_size, sizeof(buf) - size);
-        cpu_physical_memory_read(tx_buffer_address, &buf[size],
-                                 tx_buffer_size);
+        pci_dma_read(&s->dev, tx_buffer_address, &buf[size], tx_buffer_size);
         size += tx_buffer_size;
     }
     if (tbd_array == 0xffffffff) {
@@ -810,16 +783,19 @@
         if (s->has_extended_tcb_support && !(s->configuration[6] & BIT(4))) {
             /* Extended Flexible TCB. */
             for (; tbd_count < 2; tbd_count++) {
-                uint32_t tx_buffer_address = e100_ldl_le_phys(tbd_address);
-                uint16_t tx_buffer_size = e100_ldw_le_phys(tbd_address + 4);
-                uint16_t tx_buffer_el = e100_ldw_le_phys(tbd_address + 6);
+                uint32_t tx_buffer_address = ldl_le_pci_dma(&s->dev,
+                                                            tbd_address);
+                uint16_t tx_buffer_size = lduw_le_pci_dma(&s->dev,
+                                                          tbd_address + 4);
+                uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev,
+                                                        tbd_address + 6);
                 tbd_address += 8;
                 TRACE(RXTX, logout
                     ("TBD (extended flexible mode): buffer address 0x%08x, size 0x%04x\n",
                      tx_buffer_address, tx_buffer_size));
                 tx_buffer_size = MIN(tx_buffer_size, sizeof(buf) - size);
-                cpu_physical_memory_read(tx_buffer_address, &buf[size],
-                                         tx_buffer_size);
+                pci_dma_read(&s->dev, tx_buffer_address,
+                             &buf[size], tx_buffer_size);
                 size += tx_buffer_size;
                 if (tx_buffer_el & 1) {
                     break;
@@ -828,16 +804,16 @@
         }
         tbd_address = tbd_array;
         for (; tbd_count < s->tx.tbd_count; tbd_count++) {
-            uint32_t tx_buffer_address = e100_ldl_le_phys(tbd_address);
-            uint16_t tx_buffer_size = e100_ldw_le_phys(tbd_address + 4);
-            uint16_t tx_buffer_el = e100_ldw_le_phys(tbd_address + 6);
+            uint32_t tx_buffer_address = ldl_le_pci_dma(&s->dev, tbd_address);
+            uint16_t tx_buffer_size = lduw_le_pci_dma(&s->dev, tbd_address + 4);
+            uint16_t tx_buffer_el = lduw_le_pci_dma(&s->dev, tbd_address + 6);
             tbd_address += 8;
             TRACE(RXTX, logout
                 ("TBD (flexible mode): buffer address 0x%08x, size 0x%04x\n",
                  tx_buffer_address, tx_buffer_size));
             tx_buffer_size = MIN(tx_buffer_size, sizeof(buf) - size);
-            cpu_physical_memory_read(tx_buffer_address, &buf[size],
-                                     tx_buffer_size);
+            pci_dma_read(&s->dev, tx_buffer_address,
+                         &buf[size], tx_buffer_size);
             size += tx_buffer_size;
             if (tx_buffer_el & 1) {
                 break;
@@ -862,7 +838,7 @@
     TRACE(OTHER, logout("multicast list, multicast count = %u\n", multicast_count));
     for (i = 0; i < multicast_count; i += 6) {
         uint8_t multicast_addr[6];
-        cpu_physical_memory_read(s->cb_address + 10 + i, multicast_addr, 6);
+        pci_dma_read(&s->dev, s->cb_address + 10 + i, multicast_addr, 6);
         TRACE(OTHER, logout("multicast entry %s\n", nic_dump(multicast_addr, 6)));
         unsigned mcast_idx = compute_mcast_idx(multicast_addr);
         assert(mcast_idx < 64);
@@ -896,12 +872,12 @@
             /* Do nothing. */
             break;
         case CmdIASetup:
-            cpu_physical_memory_read(s->cb_address + 8, &s->conf.macaddr.a[0], 6);
+            pci_dma_read(&s->dev, s->cb_address + 8, &s->conf.macaddr.a[0], 6);
             TRACE(OTHER, logout("macaddr: %s\n", nic_dump(&s->conf.macaddr.a[0], 6)));
             break;
         case CmdConfigure:
-            cpu_physical_memory_read(s->cb_address + 8, &s->configuration[0],
-                                     sizeof(s->configuration));
+            pci_dma_read(&s->dev, s->cb_address + 8,
+                         &s->configuration[0], sizeof(s->configuration));
             TRACE(OTHER, logout("configuration: %s\n",
                                 nic_dump(&s->configuration[0], 16)));
             TRACE(OTHER, logout("configuration: %s\n",
@@ -938,7 +914,8 @@
             break;
         }
         /* Write new status. */
-        e100_stw_le_phys(s->cb_address, s->tx.status | ok_status | STATUS_C);
+        stw_le_pci_dma(&s->dev, s->cb_address,
+                       s->tx.status | ok_status | STATUS_C);
         if (bit_i) {
             /* CU completed action. */
             eepro100_cx_interrupt(s);
@@ -1005,7 +982,7 @@
         /* Dump statistical counters. */
         TRACE(OTHER, logout("val=0x%02x (dump stats)\n", val));
         dump_statistics(s);
-        e100_stl_le_phys(s->statsaddr + s->stats_size, 0xa005);
+        stl_le_pci_dma(&s->dev, s->statsaddr + s->stats_size, 0xa005);
         break;
     case CU_CMD_BASE:
         /* Load CU base. */
@@ -1016,7 +993,7 @@
         /* Dump and reset statistical counters. */
         TRACE(OTHER, logout("val=0x%02x (dump stats and reset)\n", val));
         dump_statistics(s);
-        e100_stl_le_phys(s->statsaddr + s->stats_size, 0xa007);
+        stl_le_pci_dma(&s->dev, s->statsaddr + s->stats_size, 0xa007);
         memset(&s->statistics, 0, sizeof(s->statistics));
         break;
     case CU_SRESUME:
@@ -1310,10 +1287,10 @@
     case PORT_SELFTEST:
         TRACE(OTHER, logout("selftest address=0x%08x\n", address));
         eepro100_selftest_t data;
-        cpu_physical_memory_read(address, &data, sizeof(data));
+        pci_dma_read(&s->dev, address, (uint8_t *) &data, sizeof(data));
         data.st_sign = 0xffffffff;
         data.st_result = 0;
-        cpu_physical_memory_write(address, &data, sizeof(data));
+        pci_dma_write(&s->dev, address, (uint8_t *) &data, sizeof(data));
         break;
     case PORT_SELECTIVE_RESET:
         TRACE(OTHER, logout("selective reset, selftest address=0x%08x\n", address));
@@ -1729,8 +1706,8 @@
     }
     /* !!! */
     eepro100_rx_t rx;
-    cpu_physical_memory_read(s->ru_base + s->ru_offset, &rx,
-                             sizeof(eepro100_rx_t));
+    pci_dma_read(&s->dev, s->ru_base + s->ru_offset,
+                 (uint8_t *) &rx, sizeof(eepro100_rx_t));
     uint16_t rfd_command = le16_to_cpu(rx.command);
     uint16_t rfd_size = le16_to_cpu(rx.size);
 
@@ -1746,10 +1723,10 @@
 #endif
     TRACE(OTHER, logout("command 0x%04x, link 0x%08x, addr 0x%08x, size %u\n",
           rfd_command, rx.link, rx.rx_buf_addr, rfd_size));
-    e100_stw_le_phys(s->ru_base + s->ru_offset +
-                     offsetof(eepro100_rx_t, status), rfd_status);
-    e100_stw_le_phys(s->ru_base + s->ru_offset +
-                     offsetof(eepro100_rx_t, count), size);
+    stw_le_pci_dma(&s->dev, s->ru_base + s->ru_offset +
+                offsetof(eepro100_rx_t, status), rfd_status);
+    stw_le_pci_dma(&s->dev, s->ru_base + s->ru_offset +
+                offsetof(eepro100_rx_t, count), size);
     /* Early receive interrupt not supported. */
 #if 0
     eepro100_er_interrupt(s);
@@ -1763,8 +1740,8 @@
 #if 0
     assert(!(s->configuration[17] & BIT(0)));
 #endif
-    cpu_physical_memory_write(s->ru_base + s->ru_offset +
-                              sizeof(eepro100_rx_t), buf, size);
+    pci_dma_write(&s->dev, s->ru_base + s->ru_offset +
+                  sizeof(eepro100_rx_t), buf, size);
     s->statistics.rx_good_frames++;
     eepro100_fr_interrupt(s);
     s->ru_offset = le32_to_cpu(rx.link);
diff --git a/hw/es1370.c b/hw/es1370.c
index 2daadde..c5c16b0 100644
--- a/hw/es1370.c
+++ b/hw/es1370.c
@@ -30,6 +30,7 @@
 #include "audiodev.h"
 #include "audio/audio.h"
 #include "pci.h"
+#include "dma.h"
 
 /* Missing stuff:
    SCTRL_P[12](END|ST)INC
@@ -802,7 +803,7 @@
             if (!acquired)
                 break;
 
-            cpu_physical_memory_write (addr, tmpbuf, acquired);
+            pci_dma_write (&s->dev, addr, tmpbuf, acquired);
 
             temp -= acquired;
             addr += acquired;
@@ -816,7 +817,7 @@
             int copied, to_copy;
 
             to_copy = audio_MIN ((size_t) temp, sizeof (tmpbuf));
-            cpu_physical_memory_read (addr, tmpbuf, to_copy);
+            pci_dma_read (&s->dev, addr, tmpbuf, to_copy);
             copied = AUD_write (voice, tmpbuf, to_copy);
             if (!copied)
                 break;
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index f133c42..49b823d 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -62,7 +62,8 @@
     } prd;
     int l, len;
 
-    qemu_sglist_init(&s->sg, s->nsector / (BMDMA_PAGE_SIZE / 512) + 1);
+    pci_dma_sglist_init(&s->sg, &bm->pci_dev->dev,
+                        s->nsector / (BMDMA_PAGE_SIZE / 512) + 1);
     s->io_buffer_size = 0;
     for(;;) {
         if (bm->cur_prd_len == 0) {
@@ -70,7 +71,7 @@
             if (bm->cur_prd_last ||
                 (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE)
                 return s->io_buffer_size != 0;
-            cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
+            pci_dma_read(&bm->pci_dev->dev, bm->cur_addr, (uint8_t *)&prd, 8);
             bm->cur_addr += 8;
             prd.addr = le32_to_cpu(prd.addr);
             prd.size = le32_to_cpu(prd.size);
@@ -112,7 +113,7 @@
             if (bm->cur_prd_last ||
                 (bm->cur_addr - bm->addr) >= BMDMA_PAGE_SIZE)
                 return 0;
-            cpu_physical_memory_read(bm->cur_addr, (uint8_t *)&prd, 8);
+            pci_dma_read(&bm->pci_dev->dev, bm->cur_addr, (uint8_t *)&prd, 8);
             bm->cur_addr += 8;
             prd.addr = le32_to_cpu(prd.addr);
             prd.size = le32_to_cpu(prd.size);
@@ -127,11 +128,11 @@
             l = bm->cur_prd_len;
         if (l > 0) {
             if (is_write) {
-                cpu_physical_memory_write(bm->cur_prd_addr,
-                                          s->io_buffer + s->io_buffer_index, l);
+                pci_dma_write(&bm->pci_dev->dev, bm->cur_prd_addr,
+                              s->io_buffer + s->io_buffer_index, l);
             } else {
-                cpu_physical_memory_read(bm->cur_prd_addr,
-                                          s->io_buffer + s->io_buffer_index, l);
+                pci_dma_read(&bm->pci_dev->dev, bm->cur_prd_addr,
+                             s->io_buffer + s->io_buffer_index, l);
             }
             bm->cur_prd_addr += l;
             bm->cur_prd_len -= l;
@@ -326,7 +327,7 @@
     bm->cmd = val & 0x09;
 }
 
-static uint64_t bmdma_addr_read(void *opaque, target_phys_addr_t addr,
+static uint64_t bmdma_addr_read(void *opaque, dma_addr_t addr,
                                 unsigned width)
 {
     BMDMAState *bm = opaque;
@@ -340,7 +341,7 @@
     return data;
 }
 
-static void bmdma_addr_write(void *opaque, target_phys_addr_t addr,
+static void bmdma_addr_write(void *opaque, dma_addr_t addr,
                              uint64_t data, unsigned width)
 {
     BMDMAState *bm = opaque;
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 675b659..02def4c 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -24,6 +24,7 @@
 #include "audiodev.h"
 #include "intel-hda.h"
 #include "intel-hda-defs.h"
+#include "dma.h"
 
 /* --------------------------------------------------------------------- */
 /* hda bus                                                               */
@@ -328,7 +329,7 @@
 
         rp = (d->corb_rp + 1) & 0xff;
         addr = intel_hda_addr(d->corb_lbase, d->corb_ubase);
-        verb = ldl_le_phys(addr + 4*rp);
+        verb = ldl_le_pci_dma(&d->pci, addr + 4*rp);
         d->corb_rp = rp;
 
         dprint(d, 2, "%s: [rp 0x%x] verb 0x%08x\n", __FUNCTION__, rp, verb);
@@ -360,8 +361,8 @@
     ex = (solicited ? 0 : (1 << 4)) | dev->cad;
     wp = (d->rirb_wp + 1) & 0xff;
     addr = intel_hda_addr(d->rirb_lbase, d->rirb_ubase);
-    stl_le_phys(addr + 8*wp, response);
-    stl_le_phys(addr + 8*wp + 4, ex);
+    stl_le_pci_dma(&d->pci, addr + 8*wp, response);
+    stl_le_pci_dma(&d->pci, addr + 8*wp + 4, ex);
     d->rirb_wp = wp;
 
     dprint(d, 2, "%s: [wp 0x%x] response 0x%x, extra 0x%x\n",
@@ -426,8 +427,7 @@
         dprint(d, 3, "dma: entry %d, pos %d/%d, copy %d\n",
                st->be, st->bp, st->bpl[st->be].len, copy);
 
-        cpu_physical_memory_rw(st->bpl[st->be].addr + st->bp,
-                               buf, copy, !output);
+        pci_dma_rw(&d->pci, st->bpl[st->be].addr + st->bp, buf, copy, !output);
         st->lpib += copy;
         st->bp += copy;
         buf += copy;
@@ -449,7 +449,7 @@
     }
     if (d->dp_lbase & 0x01) {
         addr = intel_hda_addr(d->dp_lbase & ~0x01, d->dp_ubase);
-        stl_le_phys(addr + 8*s, st->lpib);
+        stl_le_pci_dma(&d->pci, addr + 8*s, st->lpib);
     }
     dprint(d, 3, "dma: --\n");
 
@@ -471,7 +471,7 @@
     g_free(st->bpl);
     st->bpl = g_malloc(sizeof(bpl) * st->bentries);
     for (i = 0; i < st->bentries; i++, addr += 16) {
-        cpu_physical_memory_read(addr, buf, 16);
+        pci_dma_read(&d->pci, addr, buf, 16);
         st->bpl[i].addr  = le64_to_cpu(*(uint64_t *)buf);
         st->bpl[i].len   = le32_to_cpu(*(uint32_t *)(buf + 8));
         st->bpl[i].flags = le32_to_cpu(*(uint32_t *)(buf + 12));
diff --git a/hw/lance.c b/hw/lance.c
index d83e7f5..93d5fda 100644
--- a/hw/lance.c
+++ b/hw/lance.c
@@ -97,6 +97,7 @@
     .size = sizeof(NICState),
     .can_receive = pcnet_can_receive,
     .receive = pcnet_receive,
+    .link_status_changed = pcnet_set_link_status,
     .cleanup = lance_cleanup,
 };
 
diff --git a/hw/lsi53c895a.c b/hw/lsi53c895a.c
index 2984cea..fcc27d7 100644
--- a/hw/lsi53c895a.c
+++ b/hw/lsi53c895a.c
@@ -15,6 +15,8 @@
 #include "hw.h"
 #include "pci.h"
 #include "scsi.h"
+#include "block_int.h"
+#include "dma.h"
 
 //#define DEBUG_LSI
 //#define DEBUG_LSI_REG
@@ -390,10 +392,7 @@
 {
     uint32_t buf;
 
-    /* XXX: an optimization here used to fast-path the read from scripts
-     * memory.  But that bypasses any iommu.
-     */
-    cpu_physical_memory_read(addr, (uint8_t *)&buf, 4);
+    pci_dma_read(&s->dev, addr, (uint8_t *)&buf, 4);
     return cpu_to_le32(buf);
 }
 
@@ -532,7 +531,7 @@
 static void lsi_do_dma(LSIState *s, int out)
 {
     uint32_t count;
-    target_phys_addr_t addr;
+    dma_addr_t addr;
     SCSIDevice *dev;
 
     assert(s->current);
@@ -558,7 +557,7 @@
     else if (s->sbms)
         addr |= ((uint64_t)s->sbms << 32);
 
-    DPRINTF("DMA addr=0x" TARGET_FMT_plx " len=%d\n", addr, count);
+    DPRINTF("DMA addr=0x" DMA_ADDR_FMT " len=%d\n", addr, count);
     s->csbc += count;
     s->dnad += count;
     s->dbc -= count;
@@ -567,9 +566,9 @@
     }
     /* ??? Set SFBR to first data byte.  */
     if (out) {
-        cpu_physical_memory_read(addr, s->current->dma_buf, count);
+        pci_dma_read(&s->dev, addr, s->current->dma_buf, count);
     } else {
-        cpu_physical_memory_write(addr, s->current->dma_buf, count);
+        pci_dma_write(&s->dev, addr, s->current->dma_buf, count);
     }
     s->current->dma_len -= count;
     if (s->current->dma_len == 0) {
@@ -762,7 +761,7 @@
     DPRINTF("Send command len=%d\n", s->dbc);
     if (s->dbc > 16)
         s->dbc = 16;
-    cpu_physical_memory_read(s->dnad, buf, s->dbc);
+    pci_dma_read(&s->dev, s->dnad, buf, s->dbc);
     s->sfbr = buf[0];
     s->command_complete = 0;
 
@@ -813,7 +812,7 @@
     s->dbc = 1;
     status = s->status;
     s->sfbr = status;
-    cpu_physical_memory_write(s->dnad, &status, 1);
+    pci_dma_write(&s->dev, s->dnad, &status, 1);
     lsi_set_phase(s, PHASE_MI);
     s->msg_action = 1;
     lsi_add_msg_byte(s, 0); /* COMMAND COMPLETE */
@@ -827,7 +826,7 @@
     len = s->msg_len;
     if (len > s->dbc)
         len = s->dbc;
-    cpu_physical_memory_write(s->dnad, s->msg, len);
+    pci_dma_write(&s->dev, s->dnad, s->msg, len);
     /* Linux drivers rely on the last byte being in the SIDL.  */
     s->sidl = s->msg[len - 1];
     s->msg_len -= len;
@@ -859,7 +858,7 @@
 static uint8_t lsi_get_msgbyte(LSIState *s)
 {
     uint8_t data;
-    cpu_physical_memory_read(s->dnad, &data, 1);
+    pci_dma_read(&s->dev, s->dnad, &data, 1);
     s->dnad++;
     s->dbc--;
     return data;
@@ -1011,8 +1010,8 @@
     DPRINTF("memcpy dest 0x%08x src 0x%08x count %d\n", dest, src, count);
     while (count) {
         n = (count > LSI_BUF_SIZE) ? LSI_BUF_SIZE : count;
-        cpu_physical_memory_read(src, buf, n);
-        cpu_physical_memory_write(dest, buf, n);
+        pci_dma_read(&s->dev, src, buf, n);
+        pci_dma_write(&s->dev, dest, buf, n);
         src += n;
         dest += n;
         count -= n;
@@ -1080,7 +1079,7 @@
 
             /* 32-bit Table indirect */
             offset = sxt24(addr);
-            cpu_physical_memory_read(s->dsa + offset, (uint8_t *)buf, 8);
+            pci_dma_read(&s->dev, s->dsa + offset, (uint8_t *)buf, 8);
             /* byte count is stored in bits 0:23 only */
             s->dbc = cpu_to_le32(buf[0]) & 0xffffff;
             s->rbc = s->dbc;
@@ -1439,7 +1438,7 @@
             n = (insn & 7);
             reg = (insn >> 16) & 0xff;
             if (insn & (1 << 24)) {
-                cpu_physical_memory_read(addr, data, n);
+                pci_dma_read(&s->dev, addr, data, n);
                 DPRINTF("Load reg 0x%x size %d addr 0x%08x = %08x\n", reg, n,
                         addr, *(int *)data);
                 for (i = 0; i < n; i++) {
@@ -1450,7 +1449,7 @@
                 for (i = 0; i < n; i++) {
                     data[i] = lsi_reg_readb(s, reg + i);
                 }
-                cpu_physical_memory_write(addr, data, n);
+                pci_dma_write(&s->dev, addr, data, n);
             }
         }
     }
diff --git a/hw/pci.h b/hw/pci.h
index 98f30f7..4b2e785 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -5,6 +5,7 @@
 
 #include "qdev.h"
 #include "memory.h"
+#include "dma.h"
 
 /* PCI includes legacy ISA access.  */
 #include "isa.h"
@@ -483,4 +484,70 @@
     return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
 }
 
+/* DMA access functions */
+static inline int pci_dma_rw(PCIDevice *dev, dma_addr_t addr,
+                             void *buf, dma_addr_t len, DMADirection dir)
+{
+    cpu_physical_memory_rw(addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
+    return 0;
+}
+
+static inline int pci_dma_read(PCIDevice *dev, dma_addr_t addr,
+                               void *buf, dma_addr_t len)
+{
+    return pci_dma_rw(dev, addr, buf, len, DMA_DIRECTION_TO_DEVICE);
+}
+
+static inline int pci_dma_write(PCIDevice *dev, dma_addr_t addr,
+                                const void *buf, dma_addr_t len)
+{
+    return pci_dma_rw(dev, addr, (void *) buf, len, DMA_DIRECTION_FROM_DEVICE);
+}
+
+#define PCI_DMA_DEFINE_LDST(_l, _s, _bits)                              \
+    static inline uint##_bits##_t ld##_l##_pci_dma(PCIDevice *dev,      \
+                                                   dma_addr_t addr)     \
+    {                                                                   \
+        return ld##_l##_phys(addr);                                     \
+    }                                                                   \
+    static inline void st##_s##_pci_dma(PCIDevice *dev,                 \
+                          dma_addr_t addr, uint##_bits##_t val)         \
+    {                                                                   \
+        st##_s##_phys(addr, val);                                       \
+    }
+
+PCI_DMA_DEFINE_LDST(ub, b, 8);
+PCI_DMA_DEFINE_LDST(uw_le, w_le, 16)
+PCI_DMA_DEFINE_LDST(l_le, l_le, 32);
+PCI_DMA_DEFINE_LDST(q_le, q_le, 64);
+PCI_DMA_DEFINE_LDST(uw_be, w_be, 16)
+PCI_DMA_DEFINE_LDST(l_be, l_be, 32);
+PCI_DMA_DEFINE_LDST(q_be, q_be, 64);
+
+#undef PCI_DMA_DEFINE_LDST
+
+static inline void *pci_dma_map(PCIDevice *dev, dma_addr_t addr,
+                                dma_addr_t *plen, DMADirection dir)
+{
+    target_phys_addr_t len = *plen;
+    void *buf;
+
+    buf = cpu_physical_memory_map(addr, &len, dir == DMA_DIRECTION_FROM_DEVICE);
+    *plen = len;
+    return buf;
+}
+
+static inline void pci_dma_unmap(PCIDevice *dev, void *buffer, dma_addr_t len,
+                                 DMADirection dir, dma_addr_t access_len)
+{
+    cpu_physical_memory_unmap(buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
+                              access_len);
+}
+
+static inline void pci_dma_sglist_init(QEMUSGList *qsg, PCIDevice *dev,
+                                       int alloc_hint)
+{
+    qemu_sglist_init(qsg, alloc_hint);
+}
+
 #endif
diff --git a/hw/pcnet-pci.c b/hw/pcnet-pci.c
index fb2a00c..4e164da 100644
--- a/hw/pcnet-pci.c
+++ b/hw/pcnet-pci.c
@@ -31,6 +31,7 @@
 #include "net.h"
 #include "loader.h"
 #include "qemu-timer.h"
+#include "dma.h"
 
 #include "pcnet.h"
 
@@ -55,9 +56,9 @@
 #ifdef PCNET_DEBUG
     printf("pcnet_aprom_writeb addr=0x%08x val=0x%02x\n", addr, val);
 #endif
-    /* Check APROMWE bit to enable write access */
-    if (pcnet_bcr_readw(s,2) & 0x100)
+    if (BCR_APROMWE(s)) {
         s->prom[addr & 15] = val;
+    }
 }
 
 static uint32_t pcnet_aprom_readb(void *opaque, uint32_t addr)
@@ -75,12 +76,24 @@
 {
     PCNetState *d = opaque;
 
-    if (addr < 16 && size == 1) {
-        return pcnet_aprom_readb(d, addr);
-    } else if (addr >= 0x10 && addr < 0x20 && size == 2) {
-        return pcnet_ioport_readw(d, addr);
-    } else if (addr >= 0x10 && addr < 0x20 && size == 4) {
-        return pcnet_ioport_readl(d, addr);
+    if (addr < 0x10) {
+        if (!BCR_DWIO(d) && size == 1) {
+            return pcnet_aprom_readb(d, addr);
+        } else if (!BCR_DWIO(d) && (addr & 1) == 0 && size == 2) {
+            return pcnet_aprom_readb(d, addr) |
+                   (pcnet_aprom_readb(d, addr + 1) << 8);
+        } else if (BCR_DWIO(d) && (addr & 3) == 0 && size == 4) {
+            return pcnet_aprom_readb(d, addr) |
+                   (pcnet_aprom_readb(d, addr + 1) << 8) |
+                   (pcnet_aprom_readb(d, addr + 2) << 16) |
+                   (pcnet_aprom_readb(d, addr + 3) << 24);
+        }
+    } else {
+        if (size == 2) {
+            return pcnet_ioport_readw(d, addr);
+        } else if (size == 4) {
+            return pcnet_ioport_readl(d, addr);
+        }
     }
     return ((uint64_t)1 << (size * 8)) - 1;
 }
@@ -90,12 +103,24 @@
 {
     PCNetState *d = opaque;
 
-    if (addr < 16 && size == 1) {
-        return pcnet_aprom_writeb(d, addr, data);
-    } else if (addr >= 0x10 && addr < 0x20 && size == 2) {
-        return pcnet_ioport_writew(d, addr, data);
-    } else if (addr >= 0x10 && addr < 0x20 && size == 4) {
-        return pcnet_ioport_writel(d, addr, data);
+    if (addr < 0x10) {
+        if (!BCR_DWIO(d) && size == 1) {
+            pcnet_aprom_writeb(d, addr, data);
+        } else if (!BCR_DWIO(d) && (addr & 1) == 0 && size == 2) {
+            pcnet_aprom_writeb(d, addr, data & 0xff);
+            pcnet_aprom_writeb(d, addr + 1, data >> 8);
+        } else if (BCR_DWIO(d) && (addr & 3) == 0 && size == 4) {
+            pcnet_aprom_writeb(d, addr, data & 0xff);
+            pcnet_aprom_writeb(d, addr + 1, (data >> 8) & 0xff);
+            pcnet_aprom_writeb(d, addr + 2, (data >> 16) & 0xff);
+            pcnet_aprom_writeb(d, addr + 3, data >> 24);
+        }
+    } else {
+        if (size == 2) {
+            pcnet_ioport_writew(d, addr, data);
+        } else if (size == 4) {
+            pcnet_ioport_writel(d, addr, data);
+        }
     }
 }
 
@@ -230,13 +255,13 @@
 static void pci_physical_memory_write(void *dma_opaque, target_phys_addr_t addr,
                                       uint8_t *buf, int len, int do_bswap)
 {
-    cpu_physical_memory_write(addr, buf, len);
+    pci_dma_write(dma_opaque, addr, buf, len);
 }
 
 static void pci_physical_memory_read(void *dma_opaque, target_phys_addr_t addr,
                                      uint8_t *buf, int len, int do_bswap)
 {
-    cpu_physical_memory_read(addr, buf, len);
+    pci_dma_read(dma_opaque, addr, buf, len);
 }
 
 static void pci_pcnet_cleanup(VLANClientState *nc)
@@ -263,6 +288,7 @@
     .size = sizeof(NICState),
     .can_receive = pcnet_can_receive,
     .receive = pcnet_receive,
+    .link_status_changed = pcnet_set_link_status,
     .cleanup = pci_pcnet_cleanup,
 };
 
@@ -302,6 +328,7 @@
     s->irq = pci_dev->irq[0];
     s->phys_mem_read = pci_physical_memory_read;
     s->phys_mem_write = pci_physical_memory_write;
+    s->dma_opaque = pci_dev;
 
     if (!pci_dev->qdev.hotplugged) {
         static int loaded = 0;
diff --git a/hw/pcnet.c b/hw/pcnet.c
index cf16fd4..cba253b 100644
--- a/hw/pcnet.c
+++ b/hw/pcnet.c
@@ -58,24 +58,6 @@
     uint16_t ether_type;
 };
 
-/* BUS CONFIGURATION REGISTERS */
-#define BCR_MSRDA    0
-#define BCR_MSWRA    1
-#define BCR_MC       2
-#define BCR_LNKST    4
-#define BCR_LED1     5
-#define BCR_LED2     6
-#define BCR_LED3     7
-#define BCR_FDC      9
-#define BCR_BSBC     18
-#define BCR_EECAS    19
-#define BCR_SWS      20
-#define BCR_PLAT     22
-
-#define BCR_DWIO(S)      !!((S)->bcr[BCR_BSBC] & 0x0080)
-#define BCR_SSIZE32(S)   !!((S)->bcr[BCR_SWS ] & 0x0100)
-#define BCR_SWSTYLE(S)     ((S)->bcr[BCR_SWS ] & 0x00FF)
-
 #define CSR_INIT(S)      !!(((S)->csr[0])&0x0001)
 #define CSR_STRT(S)      !!(((S)->csr[0])&0x0002)
 #define CSR_STOP(S)      !!(((S)->csr[0])&0x0004)
@@ -1215,6 +1197,13 @@
     return size_;
 }
 
+void pcnet_set_link_status(VLANClientState *nc)
+{
+    PCNetState *d = DO_UPCAST(NICState, nc, nc)->opaque;
+
+    d->lnkst = nc->link_down ? 0 : 0x40;
+}
+
 static void pcnet_transmit(PCNetState *s)
 {
     target_phys_addr_t xmit_cxda = 0;
diff --git a/hw/pcnet.h b/hw/pcnet.h
index cd86bde..edc81c9 100644
--- a/hw/pcnet.h
+++ b/hw/pcnet.h
@@ -6,6 +6,25 @@
 
 #include "memory.h"
 
+/* BUS CONFIGURATION REGISTERS */
+#define BCR_MSRDA    0
+#define BCR_MSWRA    1
+#define BCR_MC       2
+#define BCR_LNKST    4
+#define BCR_LED1     5
+#define BCR_LED2     6
+#define BCR_LED3     7
+#define BCR_FDC      9
+#define BCR_BSBC     18
+#define BCR_EECAS    19
+#define BCR_SWS      20
+#define BCR_PLAT     22
+
+#define BCR_APROMWE(S)   !!((S)->bcr[BCR_MC  ] & 0x0100)
+#define BCR_DWIO(S)      !!((S)->bcr[BCR_BSBC] & 0x0080)
+#define BCR_SSIZE32(S)   !!((S)->bcr[BCR_SWS ] & 0x0100)
+#define BCR_SWSTYLE(S)     ((S)->bcr[BCR_SWS ] & 0x00FF)
+
 typedef struct PCNetState_st PCNetState;
 
 struct PCNetState_st {
@@ -39,6 +58,7 @@
 uint32_t pcnet_bcr_readw(PCNetState *s, uint32_t rap);
 int pcnet_can_receive(VLANClientState *nc);
 ssize_t pcnet_receive(VLANClientState *nc, const uint8_t *buf, size_t size_);
+void pcnet_set_link_status(VLANClientState *nc);
 void pcnet_common_cleanup(PCNetState *d);
 int pcnet_common_init(DeviceState *dev, PCNetState *s, NetClientInfo *info);
 extern const VMStateDescription vmstate_pcnet;
diff --git a/hw/ps2.c b/hw/ps2.c
index 24228c1..1d9057b 100644
--- a/hw/ps2.c
+++ b/hw/ps2.c
@@ -92,6 +92,7 @@
        not the keyboard controller.  */
     int translate;
     int scancode_set; /* 1=XT, 2=AT, 3=PS/2 */
+    int ledstate;
 } PS2KbdState;
 
 typedef struct {
@@ -195,11 +196,17 @@
     return val;
 }
 
+static void ps2_set_ledstate(PS2KbdState *s, int ledstate)
+{
+    s->ledstate = ledstate;
+    kbd_put_ledstate(ledstate);
+}
+
 static void ps2_reset_keyboard(PS2KbdState *s)
 {
     s->scan_enabled = 1;
     s->scancode_set = 2;
-    kbd_put_ledstate(0);
+    ps2_set_ledstate(s, 0);
 }
 
 void ps2_write_keyboard(void *opaque, int val)
@@ -274,7 +281,7 @@
         s->common.write_cmd = -1;
         break;
     case KBD_CMD_SET_LEDS:
-        kbd_put_ledstate(val);
+        ps2_set_ledstate(s, val);
         ps2_queue(&s->common, KBD_REPLY_ACK);
         s->common.write_cmd = -1;
         break;
@@ -557,6 +564,33 @@
     }
 };
 
+static bool ps2_keyboard_ledstate_needed(void *opaque)
+{
+    PS2KbdState *s = opaque;
+
+    return s->ledstate != 0; /* 0 is default state */
+}
+
+static int ps2_kbd_ledstate_post_load(void *opaque, int version_id)
+{
+    PS2KbdState *s = opaque;
+
+    kbd_put_ledstate(s->ledstate);
+    return 0;
+}
+
+static const VMStateDescription vmstate_ps2_keyboard_ledstate = {
+    .name = "ps2kbd/ledstate",
+    .version_id = 3,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
+    .post_load = ps2_kbd_ledstate_post_load,
+    .fields      = (VMStateField []) {
+        VMSTATE_INT32(ledstate, PS2KbdState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static int ps2_kbd_post_load(void* opaque, int version_id)
 {
     PS2KbdState *s = (PS2KbdState*)opaque;
@@ -578,6 +612,14 @@
         VMSTATE_INT32(translate, PS2KbdState),
         VMSTATE_INT32_V(scancode_set, PS2KbdState,3),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (VMStateSubsection []) {
+        {
+            .vmsd = &vmstate_ps2_keyboard_ledstate,
+            .needed = ps2_keyboard_ledstate_needed,
+        }, {
+            /* empty */
+        }
     }
 };
 
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 3753950..4c37993 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -53,6 +53,7 @@
 
 #include "hw.h"
 #include "pci.h"
+#include "dma.h"
 #include "qemu-timer.h"
 #include "net.h"
 #include "loader.h"
@@ -427,9 +428,6 @@
 /* Clears all tally counters */
 static void RTL8139TallyCounters_clear(RTL8139TallyCounters* counters);
 
-/* Writes tally counters to specified physical memory address */
-static void RTL8139TallyCounters_physical_memory_write(target_phys_addr_t tc_addr, RTL8139TallyCounters* counters);
-
 typedef struct RTL8139State {
     PCIDevice dev;
     uint8_t phys[8]; /* mac address */
@@ -512,6 +510,9 @@
     int rtl8139_mmio_io_addr_dummy;
 } RTL8139State;
 
+/* Writes tally counters to memory via DMA */
+static void RTL8139TallyCounters_dma_write(RTL8139State *s, dma_addr_t tc_addr);
+
 static void rtl8139_set_next_tctr_time(RTL8139State *s, int64_t current_time);
 
 static void prom9346_decode_command(EEprom9346 *eeprom, uint8_t command)
@@ -773,15 +774,15 @@
 
             if (size > wrapped)
             {
-                cpu_physical_memory_write( s->RxBuf + s->RxBufAddr,
-                                           buf, size-wrapped );
+                pci_dma_write(&s->dev, s->RxBuf + s->RxBufAddr,
+                              buf, size-wrapped);
             }
 
             /* reset buffer pointer */
             s->RxBufAddr = 0;
 
-            cpu_physical_memory_write( s->RxBuf + s->RxBufAddr,
-                                       buf + (size-wrapped), wrapped );
+            pci_dma_write(&s->dev, s->RxBuf + s->RxBufAddr,
+                          buf + (size-wrapped), wrapped);
 
             s->RxBufAddr = wrapped;
 
@@ -790,13 +791,13 @@
     }
 
     /* non-wrapping path or overwrapping enabled */
-    cpu_physical_memory_write( s->RxBuf + s->RxBufAddr, buf, size );
+    pci_dma_write(&s->dev, s->RxBuf + s->RxBufAddr, buf, size);
 
     s->RxBufAddr += size;
 }
 
 #define MIN_BUF_SIZE 60
-static inline target_phys_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
+static inline dma_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
 {
 #if TARGET_PHYS_ADDR_BITS > 32
     return low | ((target_phys_addr_t)high << 32);
@@ -979,24 +980,24 @@
 /* w3 high 32bit of Rx buffer ptr */
 
         int descriptor = s->currCPlusRxDesc;
-        target_phys_addr_t cplus_rx_ring_desc;
+        dma_addr_t cplus_rx_ring_desc;
 
         cplus_rx_ring_desc = rtl8139_addr64(s->RxRingAddrLO, s->RxRingAddrHI);
         cplus_rx_ring_desc += 16 * descriptor;
 
         DPRINTF("+++ C+ mode reading RX descriptor %d from host memory at "
-            "%08x %08x = "TARGET_FMT_plx"\n", descriptor, s->RxRingAddrHI,
+            "%08x %08x = "DMA_ADDR_FMT"\n", descriptor, s->RxRingAddrHI,
             s->RxRingAddrLO, cplus_rx_ring_desc);
 
         uint32_t val, rxdw0,rxdw1,rxbufLO,rxbufHI;
 
-        cpu_physical_memory_read(cplus_rx_ring_desc,    (uint8_t *)&val, 4);
+        pci_dma_read(&s->dev, cplus_rx_ring_desc, (uint8_t *)&val, 4);
         rxdw0 = le32_to_cpu(val);
-        cpu_physical_memory_read(cplus_rx_ring_desc+4,  (uint8_t *)&val, 4);
+        pci_dma_read(&s->dev, cplus_rx_ring_desc+4, (uint8_t *)&val, 4);
         rxdw1 = le32_to_cpu(val);
-        cpu_physical_memory_read(cplus_rx_ring_desc+8,  (uint8_t *)&val, 4);
+        pci_dma_read(&s->dev, cplus_rx_ring_desc+8, (uint8_t *)&val, 4);
         rxbufLO = le32_to_cpu(val);
-        cpu_physical_memory_read(cplus_rx_ring_desc+12, (uint8_t *)&val, 4);
+        pci_dma_read(&s->dev, cplus_rx_ring_desc+12, (uint8_t *)&val, 4);
         rxbufHI = le32_to_cpu(val);
 
         DPRINTF("+++ C+ mode RX descriptor %d %08x %08x %08x %08x\n",
@@ -1060,16 +1061,16 @@
             return size_;
         }
 
-        target_phys_addr_t rx_addr = rtl8139_addr64(rxbufLO, rxbufHI);
+        dma_addr_t rx_addr = rtl8139_addr64(rxbufLO, rxbufHI);
 
         /* receive/copy to target memory */
         if (dot1q_buf) {
-            cpu_physical_memory_write(rx_addr, buf, 2 * ETHER_ADDR_LEN);
-            cpu_physical_memory_write(rx_addr + 2 * ETHER_ADDR_LEN,
-                buf + 2 * ETHER_ADDR_LEN + VLAN_HLEN,
-                size - 2 * ETHER_ADDR_LEN);
+            pci_dma_write(&s->dev, rx_addr, buf, 2 * ETHER_ADDR_LEN);
+            pci_dma_write(&s->dev, rx_addr + 2 * ETHER_ADDR_LEN,
+                          buf + 2 * ETHER_ADDR_LEN + VLAN_HLEN,
+                          size - 2 * ETHER_ADDR_LEN);
         } else {
-            cpu_physical_memory_write(rx_addr, buf, size);
+            pci_dma_write(&s->dev, rx_addr, buf, size);
         }
 
         if (s->CpCmd & CPlusRxChkSum)
@@ -1079,7 +1080,7 @@
 
         /* write checksum */
         val = cpu_to_le32(crc32(0, buf, size_));
-        cpu_physical_memory_write( rx_addr+size, (uint8_t *)&val, 4);
+        pci_dma_write(&s->dev, rx_addr+size, (uint8_t *)&val, 4);
 
 /* first segment of received packet flag */
 #define CP_RX_STATUS_FS (1<<29)
@@ -1125,9 +1126,9 @@
 
         /* update ring data */
         val = cpu_to_le32(rxdw0);
-        cpu_physical_memory_write(cplus_rx_ring_desc,    (uint8_t *)&val, 4);
+        pci_dma_write(&s->dev, cplus_rx_ring_desc, (uint8_t *)&val, 4);
         val = cpu_to_le32(rxdw1);
-        cpu_physical_memory_write(cplus_rx_ring_desc+4,  (uint8_t *)&val, 4);
+        pci_dma_write(&s->dev, cplus_rx_ring_desc+4, (uint8_t *)&val, 4);
 
         /* update tally counter */
         ++s->tally_counters.RxOk;
@@ -1307,50 +1308,51 @@
     counters->TxUndrn = 0;
 }
 
-static void RTL8139TallyCounters_physical_memory_write(target_phys_addr_t tc_addr, RTL8139TallyCounters* tally_counters)
+static void RTL8139TallyCounters_dma_write(RTL8139State *s, dma_addr_t tc_addr)
 {
+    RTL8139TallyCounters *tally_counters = &s->tally_counters;
     uint16_t val16;
     uint32_t val32;
     uint64_t val64;
 
     val64 = cpu_to_le64(tally_counters->TxOk);
-    cpu_physical_memory_write(tc_addr + 0,    (uint8_t *)&val64, 8);
+    pci_dma_write(&s->dev, tc_addr + 0,     (uint8_t *)&val64, 8);
 
     val64 = cpu_to_le64(tally_counters->RxOk);
-    cpu_physical_memory_write(tc_addr + 8,    (uint8_t *)&val64, 8);
+    pci_dma_write(&s->dev, tc_addr + 8,     (uint8_t *)&val64, 8);
 
     val64 = cpu_to_le64(tally_counters->TxERR);
-    cpu_physical_memory_write(tc_addr + 16,    (uint8_t *)&val64, 8);
+    pci_dma_write(&s->dev, tc_addr + 16,    (uint8_t *)&val64, 8);
 
     val32 = cpu_to_le32(tally_counters->RxERR);
-    cpu_physical_memory_write(tc_addr + 24,    (uint8_t *)&val32, 4);
+    pci_dma_write(&s->dev, tc_addr + 24,    (uint8_t *)&val32, 4);
 
     val16 = cpu_to_le16(tally_counters->MissPkt);
-    cpu_physical_memory_write(tc_addr + 28,    (uint8_t *)&val16, 2);
+    pci_dma_write(&s->dev, tc_addr + 28,    (uint8_t *)&val16, 2);
 
     val16 = cpu_to_le16(tally_counters->FAE);
-    cpu_physical_memory_write(tc_addr + 30,    (uint8_t *)&val16, 2);
+    pci_dma_write(&s->dev, tc_addr + 30,    (uint8_t *)&val16, 2);
 
     val32 = cpu_to_le32(tally_counters->Tx1Col);
-    cpu_physical_memory_write(tc_addr + 32,    (uint8_t *)&val32, 4);
+    pci_dma_write(&s->dev, tc_addr + 32,    (uint8_t *)&val32, 4);
 
     val32 = cpu_to_le32(tally_counters->TxMCol);
-    cpu_physical_memory_write(tc_addr + 36,    (uint8_t *)&val32, 4);
+    pci_dma_write(&s->dev, tc_addr + 36,    (uint8_t *)&val32, 4);
 
     val64 = cpu_to_le64(tally_counters->RxOkPhy);
-    cpu_physical_memory_write(tc_addr + 40,    (uint8_t *)&val64, 8);
+    pci_dma_write(&s->dev, tc_addr + 40,    (uint8_t *)&val64, 8);
 
     val64 = cpu_to_le64(tally_counters->RxOkBrd);
-    cpu_physical_memory_write(tc_addr + 48,    (uint8_t *)&val64, 8);
+    pci_dma_write(&s->dev, tc_addr + 48,    (uint8_t *)&val64, 8);
 
     val32 = cpu_to_le32(tally_counters->RxOkMul);
-    cpu_physical_memory_write(tc_addr + 56,    (uint8_t *)&val32, 4);
+    pci_dma_write(&s->dev, tc_addr + 56,    (uint8_t *)&val32, 4);
 
     val16 = cpu_to_le16(tally_counters->TxAbt);
-    cpu_physical_memory_write(tc_addr + 60,    (uint8_t *)&val16, 2);
+    pci_dma_write(&s->dev, tc_addr + 60,    (uint8_t *)&val16, 2);
 
     val16 = cpu_to_le16(tally_counters->TxUndrn);
-    cpu_physical_memory_write(tc_addr + 62,    (uint8_t *)&val16, 2);
+    pci_dma_write(&s->dev, tc_addr + 62,    (uint8_t *)&val16, 2);
 }
 
 /* Loads values of tally counters from VM state file */
@@ -1842,7 +1844,7 @@
     DPRINTF("+++ transmit reading %d bytes from host memory at 0x%08x\n",
         txsize, s->TxAddr[descriptor]);
 
-    cpu_physical_memory_read(s->TxAddr[descriptor], txbuffer, txsize);
+    pci_dma_read(&s->dev, s->TxAddr[descriptor], txbuffer, txsize);
 
     /* Mark descriptor as transferred */
     s->TxStatus[descriptor] |= TxHostOwns;
@@ -1963,25 +1965,24 @@
 
     int descriptor = s->currCPlusTxDesc;
 
-    target_phys_addr_t cplus_tx_ring_desc =
-        rtl8139_addr64(s->TxAddr[0], s->TxAddr[1]);
+    dma_addr_t cplus_tx_ring_desc = rtl8139_addr64(s->TxAddr[0], s->TxAddr[1]);
 
     /* Normal priority ring */
     cplus_tx_ring_desc += 16 * descriptor;
 
     DPRINTF("+++ C+ mode reading TX descriptor %d from host memory at "
-        "%08x0x%08x = 0x"TARGET_FMT_plx"\n", descriptor, s->TxAddr[1],
+        "%08x0x%08x = 0x"DMA_ADDR_FMT"\n", descriptor, s->TxAddr[1],
         s->TxAddr[0], cplus_tx_ring_desc);
 
     uint32_t val, txdw0,txdw1,txbufLO,txbufHI;
 
-    cpu_physical_memory_read(cplus_tx_ring_desc,    (uint8_t *)&val, 4);
+    pci_dma_read(&s->dev, cplus_tx_ring_desc,    (uint8_t *)&val, 4);
     txdw0 = le32_to_cpu(val);
-    cpu_physical_memory_read(cplus_tx_ring_desc+4,  (uint8_t *)&val, 4);
+    pci_dma_read(&s->dev, cplus_tx_ring_desc+4,  (uint8_t *)&val, 4);
     txdw1 = le32_to_cpu(val);
-    cpu_physical_memory_read(cplus_tx_ring_desc+8,  (uint8_t *)&val, 4);
+    pci_dma_read(&s->dev, cplus_tx_ring_desc+8,  (uint8_t *)&val, 4);
     txbufLO = le32_to_cpu(val);
-    cpu_physical_memory_read(cplus_tx_ring_desc+12, (uint8_t *)&val, 4);
+    pci_dma_read(&s->dev, cplus_tx_ring_desc+12, (uint8_t *)&val, 4);
     txbufHI = le32_to_cpu(val);
 
     DPRINTF("+++ C+ mode TX descriptor %d %08x %08x %08x %08x\n", descriptor,
@@ -2047,7 +2048,7 @@
     }
 
     int txsize = txdw0 & CP_TX_BUFFER_SIZE_MASK;
-    target_phys_addr_t tx_addr = rtl8139_addr64(txbufLO, txbufHI);
+    dma_addr_t tx_addr = rtl8139_addr64(txbufLO, txbufHI);
 
     /* make sure we have enough space to assemble the packet */
     if (!s->cplus_txbuffer)
@@ -2086,10 +2087,11 @@
     /* append more data to the packet */
 
     DPRINTF("+++ C+ mode transmit reading %d bytes from host memory at "
-        TARGET_FMT_plx" to offset %d\n", txsize, tx_addr,
-        s->cplus_txbuffer_offset);
+            DMA_ADDR_FMT" to offset %d\n", txsize, tx_addr,
+            s->cplus_txbuffer_offset);
 
-    cpu_physical_memory_read(tx_addr, s->cplus_txbuffer + s->cplus_txbuffer_offset, txsize);
+    pci_dma_read(&s->dev, tx_addr,
+                 s->cplus_txbuffer + s->cplus_txbuffer_offset, txsize);
     s->cplus_txbuffer_offset += txsize;
 
     /* seek to next Rx descriptor */
@@ -2116,7 +2118,7 @@
 
     /* update ring data */
     val = cpu_to_le32(txdw0);
-    cpu_physical_memory_write(cplus_tx_ring_desc,    (uint8_t *)&val, 4);
+    pci_dma_write(&s->dev, cplus_tx_ring_desc, (uint8_t *)&val, 4);
 
     /* Now decide if descriptor being processed is holding the last segment of packet */
     if (txdw0 & CP_TX_LS)
@@ -2475,7 +2477,7 @@
             target_phys_addr_t tc_addr = rtl8139_addr64(s->TxStatus[0] & ~0x3f, s->TxStatus[1]);
 
             /* dump tally counters to specified memory location */
-            RTL8139TallyCounters_physical_memory_write( tc_addr, &s->tally_counters);
+            RTL8139TallyCounters_dma_write(s, tc_addr);
 
             /* mark dump completed */
             s->TxStatus[0] &= ~0x8;
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 2a5e637..7162588 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -51,7 +51,7 @@
             continue;
         }
 
-        QLIST_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
+        QTAILQ_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
             PCIDevice *dev = (PCIDevice *)qdev;
             if (dev->devfn == devfn) {
                 return dev;
@@ -397,7 +397,7 @@
 
     /* Populate PCI devices and allocate IRQs */
     devices = 0;
-    QLIST_FOREACH(qdev, &bus->qbus.children, sibling) {
+    QTAILQ_FOREACH(qdev, &bus->qbus.children, sibling) {
         PCIDevice *dev = DO_UPCAST(PCIDevice, qdev, qdev);
         int irq_index = pci_spapr_map_irq(dev, 0);
         uint32_t *irqmap = interrupt_map[devices];
diff --git a/hw/usb-ehci.c b/hw/usb-ehci.c
index bd374c1..cdd5aae 100644
--- a/hw/usb-ehci.c
+++ b/hw/usb-ehci.c
@@ -1101,12 +1101,13 @@
 // TODO : Put in common header file, duplication from usb-ohci.c
 
 /* Get an array of dwords from main memory */
-static inline int get_dwords(uint32_t addr, uint32_t *buf, int num)
+static inline int get_dwords(EHCIState *ehci, uint32_t addr,
+                             uint32_t *buf, int num)
 {
     int i;
 
     for(i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
-        cpu_physical_memory_rw(addr,(uint8_t *)buf, sizeof(*buf), 0);
+        pci_dma_read(&ehci->dev, addr, (uint8_t *)buf, sizeof(*buf));
         *buf = le32_to_cpu(*buf);
     }
 
@@ -1114,13 +1115,14 @@
 }
 
 /* Put an array of dwords in to main memory */
-static inline int put_dwords(uint32_t addr, uint32_t *buf, int num)
+static inline int put_dwords(EHCIState *ehci, uint32_t addr,
+                             uint32_t *buf, int num)
 {
     int i;
 
     for(i = 0; i < num; i++, buf++, addr += sizeof(*buf)) {
         uint32_t tmp = cpu_to_le32(*buf);
-        cpu_physical_memory_rw(addr,(uint8_t *)&tmp, sizeof(tmp), 1);
+        pci_dma_write(&ehci->dev, addr, (uint8_t *)&tmp, sizeof(tmp));
     }
 
     return 1;
@@ -1169,7 +1171,8 @@
     q->qh.bufptr[1] &= ~BUFPTR_CPROGMASK_MASK;
     q->qh.bufptr[2] &= ~BUFPTR_FRAMETAG_MASK;
 
-    put_dwords(NLPTR_GET(q->qhaddr), (uint32_t *) &q->qh, sizeof(EHCIqh) >> 2);
+    put_dwords(q->ehci, NLPTR_GET(q->qhaddr), (uint32_t *) &q->qh,
+               sizeof(EHCIqh) >> 2);
 
     return 0;
 }
@@ -1177,12 +1180,12 @@
 static int ehci_init_transfer(EHCIQueue *q)
 {
     uint32_t cpage, offset, bytes, plen;
-    target_phys_addr_t page;
+    dma_addr_t page;
 
     cpage  = get_field(q->qh.token, QTD_TOKEN_CPAGE);
     bytes  = get_field(q->qh.token, QTD_TOKEN_TBYTES);
     offset = q->qh.bufptr[0] & ~QTD_BUFPTR_MASK;
-    qemu_sglist_init(&q->sgl, 5);
+    pci_dma_sglist_init(&q->sgl, &q->ehci->dev, 5);
 
     while (bytes > 0) {
         if (cpage > 4) {
@@ -1428,7 +1431,7 @@
                 return USB_RET_PROCERR;
             }
 
-            qemu_sglist_init(&ehci->isgl, 2);
+            pci_dma_sglist_init(&ehci->isgl, &ehci->dev, 2);
             if (off + len > 4096) {
                 /* transfer crosses page border */
                 uint32_t len2 = off + len - 4096;
@@ -1532,7 +1535,8 @@
 
     /*  Find the head of the list (4.9.1.1) */
     for(i = 0; i < MAX_QH; i++) {
-        get_dwords(NLPTR_GET(entry), (uint32_t *) &qh, sizeof(EHCIqh) >> 2);
+        get_dwords(ehci, NLPTR_GET(entry), (uint32_t *) &qh,
+                   sizeof(EHCIqh) >> 2);
         ehci_trace_qh(NULL, NLPTR_GET(entry), &qh);
 
         if (qh.epchar & QH_EPCHAR_H) {
@@ -1629,7 +1633,8 @@
         goto out;
     }
 
-    get_dwords(NLPTR_GET(q->qhaddr), (uint32_t *) &q->qh, sizeof(EHCIqh) >> 2);
+    get_dwords(ehci, NLPTR_GET(q->qhaddr),
+               (uint32_t *) &q->qh, sizeof(EHCIqh) >> 2);
     ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &q->qh);
 
     if (q->async == EHCI_ASYNC_INFLIGHT) {
@@ -1698,7 +1703,7 @@
     assert(!async);
     entry = ehci_get_fetch_addr(ehci, async);
 
-    get_dwords(NLPTR_GET(entry),(uint32_t *) &itd,
+    get_dwords(ehci, NLPTR_GET(entry), (uint32_t *) &itd,
                sizeof(EHCIitd) >> 2);
     ehci_trace_itd(ehci, entry, &itd);
 
@@ -1706,8 +1711,8 @@
         return -1;
     }
 
-    put_dwords(NLPTR_GET(entry), (uint32_t *) &itd,
-                sizeof(EHCIitd) >> 2);
+    put_dwords(ehci, NLPTR_GET(entry), (uint32_t *) &itd,
+               sizeof(EHCIitd) >> 2);
     ehci_set_fetch_addr(ehci, async, itd.next);
     ehci_set_state(ehci, async, EST_FETCHENTRY);
 
@@ -1722,7 +1727,7 @@
     assert(!async);
     entry = ehci_get_fetch_addr(ehci, async);
 
-    get_dwords(NLPTR_GET(entry), (uint32_t *)&sitd,
+    get_dwords(ehci, NLPTR_GET(entry), (uint32_t *)&sitd,
                sizeof(EHCIsitd) >> 2);
     ehci_trace_sitd(ehci, entry, &sitd);
 
@@ -1784,7 +1789,8 @@
 {
     int again = 0;
 
-    get_dwords(NLPTR_GET(q->qtdaddr),(uint32_t *) &q->qtd, sizeof(EHCIqtd) >> 2);
+    get_dwords(q->ehci, NLPTR_GET(q->qtdaddr), (uint32_t *) &q->qtd,
+               sizeof(EHCIqtd) >> 2);
     ehci_trace_qtd(q, NLPTR_GET(q->qtdaddr), &q->qtd);
 
     if (q->qtd.token & QTD_TOKEN_ACTIVE) {
@@ -1827,7 +1833,7 @@
     uint32_t dwords = sizeof(EHCIqh) >> 2;
     uint32_t addr = NLPTR_GET(q->qhaddr);
 
-    put_dwords(addr + 3 * sizeof(uint32_t), qh + 3, dwords - 3);
+    put_dwords(q->ehci, addr + 3 * sizeof(uint32_t), qh + 3, dwords - 3);
 }
 
 static int ehci_state_execute(EHCIQueue *q, int async)
@@ -1947,8 +1953,8 @@
 
     /*  Write back the QTD from the QH area */
     ehci_trace_qtd(q, NLPTR_GET(q->qtdaddr), (EHCIqtd*) &q->qh.next_qtd);
-    put_dwords(NLPTR_GET(q->qtdaddr),(uint32_t *) &q->qh.next_qtd,
-                sizeof(EHCIqtd) >> 2);
+    put_dwords(q->ehci, NLPTR_GET(q->qtdaddr), (uint32_t *) &q->qh.next_qtd,
+               sizeof(EHCIqtd) >> 2);
 
     /*
      * EHCI specs say go horizontal here.
@@ -2148,7 +2154,7 @@
         }
         list |= ((ehci->frindex & 0x1ff8) >> 1);
 
-        cpu_physical_memory_rw(list, (uint8_t *) &entry, sizeof entry, 0);
+        pci_dma_read(&ehci->dev, list, (uint8_t *) &entry, sizeof entry);
         entry = le32_to_cpu(entry);
 
         DPRINTF("PERIODIC state adv fr=%d.  [%08X] -> %08X\n",
diff --git a/hw/usb-uhci.c b/hw/usb-uhci.c
index 171d787..f9e3ea5 100644
--- a/hw/usb-uhci.c
+++ b/hw/usb-uhci.c
@@ -178,7 +178,7 @@
     async->done  = 0;
     async->isoc  = 0;
     usb_packet_init(&async->packet);
-    qemu_sglist_init(&async->sgl, 1);
+    pci_dma_sglist_init(&async->sgl, &s->dev, 1);
 
     return async;
 }
@@ -876,7 +876,7 @@
         uint32_t link = async->td;
         uint32_t int_mask = 0, val;
 
-        cpu_physical_memory_read(link & ~0xf, (uint8_t *) &td, sizeof(td));
+        pci_dma_read(&s->dev, link & ~0xf, (uint8_t *) &td, sizeof(td));
         le32_to_cpus(&td.link);
         le32_to_cpus(&td.ctrl);
         le32_to_cpus(&td.token);
@@ -888,8 +888,8 @@
 
         /* update the status bits of the TD */
         val = cpu_to_le32(td.ctrl);
-        cpu_physical_memory_write((link & ~0xf) + 4,
-                                  (const uint8_t *)&val, sizeof(val));
+        pci_dma_write(&s->dev, (link & ~0xf) + 4,
+                      (const uint8_t *)&val, sizeof(val));
         uhci_async_free(s, async);
     } else {
         async->done = 1;
@@ -952,7 +952,7 @@
 
     DPRINTF("uhci: processing frame %d addr 0x%x\n" , s->frnum, frame_addr);
 
-    cpu_physical_memory_read(frame_addr, (uint8_t *)&link, 4);
+    pci_dma_read(&s->dev, frame_addr, (uint8_t *)&link, 4);
     le32_to_cpus(&link);
 
     int_mask = 0;
@@ -976,7 +976,7 @@
                 break;
             }
 
-            cpu_physical_memory_read(link & ~0xf, (uint8_t *) &qh, sizeof(qh));
+            pci_dma_read(&s->dev, link & ~0xf, (uint8_t *) &qh, sizeof(qh));
             le32_to_cpus(&qh.link);
             le32_to_cpus(&qh.el_link);
 
@@ -996,7 +996,7 @@
         }
 
         /* TD */
-        cpu_physical_memory_read(link & ~0xf, (uint8_t *) &td, sizeof(td));
+        pci_dma_read(&s->dev, link & ~0xf, (uint8_t *) &td, sizeof(td));
         le32_to_cpus(&td.link);
         le32_to_cpus(&td.ctrl);
         le32_to_cpus(&td.token);
@@ -1010,8 +1010,8 @@
         if (old_td_ctrl != td.ctrl) {
             /* update the status bits of the TD */
             val = cpu_to_le32(td.ctrl);
-            cpu_physical_memory_write((link & ~0xf) + 4,
-                                      (const uint8_t *)&val, sizeof(val));
+            pci_dma_write(&s->dev, (link & ~0xf) + 4,
+                          (const uint8_t *)&val, sizeof(val));
         }
 
         if (ret < 0) {
@@ -1039,8 +1039,8 @@
 	    /* update QH element link */
             qh.el_link = link;
             val = cpu_to_le32(qh.el_link);
-            cpu_physical_memory_write((curr_qh & ~0xf) + 4,
-                                          (const uint8_t *)&val, sizeof(val));
+            pci_dma_write(&s->dev, (curr_qh & ~0xf) + 4,
+                          (const uint8_t *)&val, sizeof(val));
 
             if (!depth_first(link)) {
                /* done with this QH */
diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c
index ca5923c..83a4b35 100644
--- a/hw/virtio-pci.c
+++ b/hw/virtio-pci.c
@@ -654,6 +654,8 @@
                           "virtio-pci", size);
     pci_register_bar(&proxy->pci_dev, 0, PCI_BASE_ADDRESS_SPACE_IO,
                      &proxy->bar);
+    pci_register_bar(&proxy->pci_dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY,
+                     &proxy->bar);
 
     if (!kvm_has_many_ioeventfds()) {
         proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD;
diff --git a/kvm-all.c b/kvm-all.c
index c09ddf7..4c466d6 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -740,6 +740,7 @@
         fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
                         "your host kernel command line\n");
 #endif
+        ret = s->vmfd;
         goto err;
     }
 
@@ -798,7 +799,7 @@
 
 err:
     if (s) {
-        if (s->vmfd != -1) {
+        if (s->vmfd >= 0) {
             close(s->vmfd);
         }
         if (s->fd != -1) {
diff --git a/monitor.c b/monitor.c
index 7c2a9b9..5ea35de 100644
--- a/monitor.c
+++ b/monitor.c
@@ -199,8 +199,8 @@
 
 static QLIST_HEAD(mon_list, Monitor) mon_list;
 
-static const mon_cmd_t mon_cmds[];
-static const mon_cmd_t info_cmds[];
+static mon_cmd_t mon_cmds[];
+static mon_cmd_t info_cmds[];
 
 static const mon_cmd_t qmp_cmds[];
 
@@ -2591,13 +2591,14 @@
     return -1;
 }
 
-static const mon_cmd_t mon_cmds[] = {
+/* mon_cmds and info_cmds would be sorted at runtime */
+static mon_cmd_t mon_cmds[] = {
 #include "hmp-commands.h"
     { NULL, NULL, },
 };
 
 /* Please update hmp-commands.hx when adding or changing commands */
-static const mon_cmd_t info_cmds[] = {
+static mon_cmd_t info_cmds[] = {
     {
         .name       = "version",
         .args_type  = "",
@@ -4832,6 +4833,25 @@
     }
 }
 
+static int
+compare_mon_cmd(const void *a, const void *b)
+{
+    return strcmp(((const mon_cmd_t *)a)->name,
+            ((const mon_cmd_t *)b)->name);
+}
+
+static void sortcmdlist(void)
+{
+    int array_num;
+    int elem_size = sizeof(mon_cmd_t);
+
+    array_num = sizeof(mon_cmds)/elem_size-1;
+    qsort((void *)mon_cmds, array_num, elem_size, compare_mon_cmd);
+
+    array_num = sizeof(info_cmds)/elem_size-1;
+    qsort((void *)info_cmds, array_num, elem_size, compare_mon_cmd);
+}
+
 
 /*
  * Local variables:
@@ -4874,6 +4894,8 @@
     QLIST_INSERT_HEAD(&mon_list, mon, entry);
     if (!default_mon || (flags & MONITOR_IS_DEFAULT))
         default_mon = mon;
+
+    sortcmdlist();
 }
 
 static void bdrv_password_cb(Monitor *mon, const char *password, void *opaque)
diff --git a/net.c b/net.c
index d05930c..cb52050 100644
--- a/net.c
+++ b/net.c
@@ -733,12 +733,7 @@
             return -1;
         }
     } else {
-        char *endptr = NULL;
-
-        fd = strtol(param, &endptr, 10);
-        if (*endptr || (fd == 0 && param == endptr)) {
-            return -1;
-        }
+        fd = qemu_parse_fd(param);
     }
 
     return fd;
diff --git a/qemu-common.h b/qemu-common.h
index 1c15cb1..2ce47aa 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -129,6 +129,7 @@
 int qemu_fls(int i);
 int qemu_fdatasync(int fd);
 int fcntl_setfl(int fd, int flag);
+int qemu_parse_fd(const char *param);
 
 /*
  * strtosz() suffixes used to specify the default treatment of an
diff --git a/savevm.c b/savevm.c
index f01838f..bee16c0 100644
--- a/savevm.c
+++ b/savevm.c
@@ -476,6 +476,8 @@
     if (len > 0) {
         f->buf_size += len;
         f->buf_offset += len;
+    } else if (len == 0) {
+        f->last_error = -EIO;
     } else if (len != -EAGAIN)
         f->last_error = len;
 }
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 0eba357..7a71324 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2206,12 +2206,6 @@
 			ERROR("space prohibited before that close parenthesis ')'\n" . $herecurr);
 		}
 
-#goto labels aren't indented, allow a single space however
-		if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
-		   !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
-			WARN("labels should not be indented\n" . $herecurr);
-		}
-
 # Return is not a function.
 		if (defined($stat) && $stat =~ /^.\s*return(\s*)(\(.*);/s) {
 			my $spacing = $1;
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index e96095a..23b1bf5 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -34,7 +34,7 @@
         vs->sasl.runSSF = vs->sasl.waitWriteSSF = vs->sasl.wantSSF = 0;
         vs->sasl.encodedLength = vs->sasl.encodedOffset = 0;
         vs->sasl.encoded = NULL;
-        free(vs->sasl.username);
+        g_free(vs->sasl.username);
         free(vs->sasl.mechlist);
         vs->sasl.username = vs->sasl.mechlist = NULL;
         sasl_dispose(&vs->sasl.conn);
@@ -506,7 +506,7 @@
         goto authabort;
 
     if (!(remoteAddr = vnc_socket_remote_addr("%s;%s", vs->csock))) {
-        free(localAddr);
+        g_free(localAddr);
         goto authabort;
     }
 
@@ -518,8 +518,8 @@
                           NULL, /* Callbacks, not needed */
                           SASL_SUCCESS_DATA,
                           &vs->sasl.conn);
-    free(localAddr);
-    free(remoteAddr);
+    g_free(localAddr);
+    g_free(remoteAddr);
     localAddr = remoteAddr = NULL;
 
     if (err != SASL_OK) {
diff --git a/ui/vnc-enc-hextile.c b/ui/vnc-enc-hextile.c
index d2905c8..c860dbb 100644
--- a/ui/vnc-enc-hextile.c
+++ b/ui/vnc-enc-hextile.c
@@ -80,8 +80,8 @@
                                   last_bg, last_fg, &has_bg, &has_fg);
         }
     }
-    free(last_fg);
-    free(last_bg);
+    g_free(last_fg);
+    g_free(last_bg);
 
     return 1;
 }
diff --git a/ui/vnc-tls.c b/ui/vnc-tls.c
index ffbd172..3aaa939 100644
--- a/ui/vnc-tls.c
+++ b/ui/vnc-tls.c
@@ -413,7 +413,7 @@
         vs->tls.session = NULL;
     }
     vs->tls.wiremode = VNC_WIREMODE_CLEAR;
-    free(vs->tls.dname);
+    g_free(vs->tls.dname);
 }
 
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 32d4cb7..40018f7 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -2911,7 +2911,7 @@
     if ((saslErr = sasl_server_init(NULL, "qemu")) != SASL_OK) {
         fprintf(stderr, "Failed to initialize SASL auth %s",
                 sasl_errstring(saslErr, NULL, NULL));
-        free(vs->display);
+        g_free(vs->display);
         vs->display = NULL;
         return -1;
     }
@@ -2925,7 +2925,7 @@
         else
             vs->lsock = inet_connect(display, SOCK_STREAM);
         if (-1 == vs->lsock) {
-            free(vs->display);
+            g_free(vs->display);
             vs->display = NULL;
             return -1;
         } else {
@@ -2946,10 +2946,10 @@
             vs->lsock = inet_listen(display, dpy, 256, SOCK_STREAM, 5900);
         }
         if (-1 == vs->lsock) {
-            free(dpy);
+            g_free(dpy);
             return -1;
         } else {
-            free(vs->display);
+            g_free(vs->display);
             vs->display = dpy;
         }
     }