RAMList: Upstream dirty tracking implementation.

This modifies the implementation of dirty RAM tracking to follow
the upstream implementation, which uses 3 parallel bitmap arrays
instead of a single one merging all components.

Change-Id: I75c418444310f159973840aa765db65294859702
diff --git a/Makefile.target b/Makefile.target
index e68de28..ca3bd44 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -311,6 +311,8 @@
     android/opengles.c \
     android/user-events-qemu.c \
     hw/core/loader.c \
+    util/bitmap.c \
+    util/bitops.c \
     ui/keymaps.c \
     util/qemu-timer-common.c \
     util/iov.c \
diff --git a/arch_init.c b/arch_init.c
index 16d6690..cfd0fad 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -126,13 +126,14 @@
     current_addr = block->offset + offset;
 
     do {
-        if (cpu_physical_memory_get_dirty(current_addr, MIGRATION_DIRTY_FLAG)) {
+        if (cpu_physical_memory_get_dirty(current_addr, TARGET_PAGE_SIZE,
+                                          DIRTY_MEMORY_MIGRATION)) {
             uint8_t *p;
             int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
 
             cpu_physical_memory_reset_dirty(current_addr,
-                                            current_addr + TARGET_PAGE_SIZE,
-                                            MIGRATION_DIRTY_FLAG);
+                                            TARGET_PAGE_SIZE,
+                                            DIRTY_MEMORY_MIGRATION);
 
             p = block->host + offset;
 
@@ -188,7 +189,8 @@
         ram_addr_t addr;
         for (addr = block->offset; addr < block->offset + block->length;
              addr += TARGET_PAGE_SIZE) {
-            if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) {
+            if (cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE,
+                                              DIRTY_MEMORY_MIGRATION)) {
                 count++;
             }
         }
@@ -279,9 +281,10 @@
         QTAILQ_FOREACH(block, &ram_list.blocks, next) {
             for (addr = block->offset; addr < block->offset + block->length;
                  addr += TARGET_PAGE_SIZE) {
-                if (!cpu_physical_memory_get_dirty(addr,
-                                                   MIGRATION_DIRTY_FLAG)) {
-                    cpu_physical_memory_set_dirty(addr);
+                if (!cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE,
+                                                   DIRTY_MEMORY_MIGRATION)) {
+                    cpu_physical_memory_set_dirty_flag(
+                            addr, DIRTY_MEMORY_MIGRATION);
                 }
             }
         }
diff --git a/cputlb.c b/cputlb.c
index c721f58..a999d49 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -118,9 +118,8 @@
    can be detected */
 void tlb_protect_code(ram_addr_t ram_addr)
 {
-    cpu_physical_memory_reset_dirty(ram_addr,
-                                    ram_addr + TARGET_PAGE_SIZE,
-                                    CODE_DIRTY_FLAG);
+    cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
+                                    DIRTY_MEMORY_CODE);
 }
 
 /* update the TLB so that writes in physical page 'phys_addr' are no longer
@@ -128,7 +127,7 @@
 void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr,
                              target_ulong vaddr)
 {
-    cpu_physical_memory_set_dirty_flags(ram_addr, CODE_DIRTY_FLAG);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
 }
 
 static bool tlb_is_dirty_ram(CPUTLBEntry *tlbe)
@@ -150,6 +149,26 @@
     }
 }
 
+void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
+{
+    CPUState *cpu;
+    CPUArchState *env;
+
+    CPU_FOREACH(cpu) {
+        int mmu_idx;
+
+        env = cpu->env_ptr;
+        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+            unsigned int i;
+
+            for (i = 0; i < CPU_TLB_SIZE; i++) {
+                tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
+                                      start1, length);
+            }
+        }
+    }
+}
+
 static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
 {
     if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
@@ -288,7 +307,7 @@
             /* Write access calls the I/O callback.  */
             te->addr_write = address | TLB_MMIO;
         } else if ((pd & ~TARGET_PAGE_MASK) == IO_MEM_RAM &&
-                   !cpu_physical_memory_is_dirty(pd)) {
+                   cpu_physical_memory_is_clean(pd)) {
             te->addr_write = address | TLB_NOTDIRTY;
         } else {
             te->addr_write = address;
diff --git a/exec.c b/exec.c
index 4818eaf..ef45e00 100644
--- a/exec.c
+++ b/exec.c
@@ -39,6 +39,7 @@
 #include "hw/hw.h"
 #include "hw/qdev.h"
 #include "hw/xen/xen.h"
+#include "qemu/bitmap.h"
 #include "qemu/osdep.h"
 #include "qemu/tls.h"
 #include "sysemu/kvm.h"
@@ -49,6 +50,7 @@
 #if defined(CONFIG_USER_ONLY)
 #include <qemu.h>
 #endif
+#include "translate-all.h"
 
 //#define DEBUG_SUBPAGE
 
@@ -532,41 +534,28 @@
     return block;
 }
 
-/* Note: start and end must be within the same ram block.  */
-void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
-                                     int dirty_flags)
+static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length)
 {
-    unsigned long length, start1;
-    int i;
+    ram_addr_t end = TARGET_PAGE_ALIGN(start + length);
 
     start &= TARGET_PAGE_MASK;
-    end = TARGET_PAGE_ALIGN(end);
 
-    length = end - start;
+    RAMBlock* block = qemu_get_ram_block(start);
+    assert(block == qemu_get_ram_block(end - 1));
+    uintptr_t start1 = (uintptr_t)block->host + (start - block->offset);
+    cpu_tlb_reset_dirty_all(start1, length);
+}
+
+/* Note: start and end must be within the same ram block.  */
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
+                                     unsigned client)
+{
     if (length == 0)
         return;
-    cpu_physical_memory_mask_dirty_range(start, length, dirty_flags);
+    cpu_physical_memory_clear_dirty_range(start, length, client);
 
-    /* we modify the TLB cache so that the dirty bit will be set again
-       when accessing the range */
-    start1 = (unsigned long)qemu_safe_ram_ptr(start);
-    /* Chek that we don't span multiple blocks - this breaks the
-       address comparisons below.  */
-    if ((unsigned long)qemu_safe_ram_ptr(end - 1) - start1
-            != (end - 1) - start) {
-        abort();
-    }
-
-    CPUState *cpu;
-    CPU_FOREACH(cpu) {
-        int mmu_idx;
-        for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
-            for(i = 0; i < CPU_TLB_SIZE; i++) {
-                CPUArchState* env = cpu->env_ptr;
-                tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
-                                      start1, length);
-            }
-        }
+    if (tcg_enabled()) {
+        tlb_reset_dirty_range_all(start, length);
     }
 }
 
@@ -603,7 +592,7 @@
         p = (void *)(unsigned long)((tlb_entry->addr_write & TARGET_PAGE_MASK)
             + tlb_entry->addend);
         ram_addr = qemu_ram_addr_from_host_nofail(p);
-        if (!cpu_physical_memory_is_dirty(ram_addr)) {
+        if (cpu_physical_memory_is_clean(ram_addr)) {
             tlb_entry->addr_write |= TLB_NOTDIRTY;
         }
     }
@@ -1079,6 +1068,9 @@
                                    ram_addr_t size, void *host)
 {
     RAMBlock *block, *new_block;
+    ram_addr_t old_ram_size, new_ram_size;
+
+    old_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
 
     size = TARGET_PAGE_ALIGN(size);
     new_block = g_malloc0(sizeof(*new_block));
@@ -1166,11 +1158,17 @@
     ram_list.version++;
     qemu_mutex_unlock_ramlist();
 
-    ram_list.phys_dirty = g_realloc(ram_list.phys_dirty,
-                                       last_ram_offset() >> TARGET_PAGE_BITS);
-    memset(ram_list.phys_dirty + (new_block->offset >> TARGET_PAGE_BITS),
-           0xff, size >> TARGET_PAGE_BITS);
-    //cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
+    new_ram_size = last_ram_offset() >> TARGET_PAGE_BITS;
+
+    if (new_ram_size > old_ram_size) {
+        int i;
+        for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
+            ram_list.dirty_memory[i] =
+                bitmap_zero_extend(ram_list.dirty_memory[i],
+                                   old_ram_size, new_ram_size);
+       }
+    }
+    cpu_physical_memory_set_dirty_range(new_block->offset, size);
 
     qemu_ram_setup_dump(new_block->host, size);
     //qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
@@ -1463,61 +1461,52 @@
 static void notdirty_mem_writeb(void *opaque, hwaddr ram_addr,
                                 uint32_t val)
 {
-    int dirty_flags;
-    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
-#if !defined(CONFIG_USER_ONLY)
+    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
         tb_invalidate_phys_page_fast0(ram_addr, 1);
-        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-#endif
     }
     stb_p(qemu_get_ram_ptr(ram_addr), val);
-    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
     /* we remove the notdirty callback only if the code has been
        flushed */
-    if (dirty_flags == 0xff)
-        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
+    if (!cpu_physical_memory_is_clean(ram_addr)) {
+        CPUArchState *env = current_cpu->env_ptr;
+        tlb_set_dirty(env, env->mem_io_vaddr);
+    }
 }
 
 static void notdirty_mem_writew(void *opaque, hwaddr ram_addr,
                                 uint32_t val)
 {
-    int dirty_flags;
-    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
-#if !defined(CONFIG_USER_ONLY)
-        tb_invalidate_phys_page_fast0(ram_addr, 2);
-        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-#endif
+    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+        tb_invalidate_phys_page_fast0(ram_addr, 1);
     }
     stw_p(qemu_get_ram_ptr(ram_addr), val);
-    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
     /* we remove the notdirty callback only if the code has been
        flushed */
-    if (dirty_flags == 0xff)
-        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
+    if (!cpu_physical_memory_is_clean(ram_addr)) {
+        CPUArchState *env = current_cpu->env_ptr;
+        tlb_set_dirty(env, env->mem_io_vaddr);
+    }
 }
 
 static void notdirty_mem_writel(void *opaque, hwaddr ram_addr,
                                 uint32_t val)
 {
-    int dirty_flags;
-    dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-    if (!(dirty_flags & CODE_DIRTY_FLAG)) {
-#if !defined(CONFIG_USER_ONLY)
-        tb_invalidate_phys_page_fast0(ram_addr, 4);
-        dirty_flags = cpu_physical_memory_get_dirty_flags(ram_addr);
-#endif
+    if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
+        tb_invalidate_phys_page_fast0(ram_addr, 1);
     }
     stl_p(qemu_get_ram_ptr(ram_addr), val);
-    dirty_flags |= (0xff & ~CODE_DIRTY_FLAG);
-    cpu_physical_memory_set_dirty_flags(ram_addr, dirty_flags);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_MIGRATION);
+    cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_VGA);
     /* we remove the notdirty callback only if the code has been
        flushed */
-    if (dirty_flags == 0xff)
-        tlb_set_dirty(cpu_single_env, cpu_single_env->mem_io_vaddr);
+    if (!cpu_physical_memory_is_clean(ram_addr)) {
+        CPUArchState *env = current_cpu->env_ptr;
+        tlb_set_dirty(env, env->mem_io_vaddr);
+    }
 }
 
 static CPUReadMemoryFunc * const error_mem_read[3] = {
@@ -1532,16 +1521,6 @@
     notdirty_mem_writel,
 };
 
-static void tb_check_watchpoint(CPUArchState* env)
-{
-    TranslationBlock *tb = tb_find_pc(env->mem_io_pc);
-    if (!tb) {
-        cpu_abort(env, "check_watchpoint: could not find TB for "
-                  "pc=%p", (void *)env->mem_io_pc);
-    }
-    cpu_restore_state(env, env->mem_io_pc);
-    tb_phys_invalidate(tb, -1);
-}
 
 /* Generate a debug exception if a watchpoint has been hit.  */
 static void check_watchpoint(int offset, int len_mask, int flags)
@@ -1919,11 +1898,12 @@
 static void invalidate_and_set_dirty(hwaddr addr,
                                      hwaddr length)
 {
-    if (!cpu_physical_memory_is_dirty(addr)) {
+    if (cpu_physical_memory_is_clean(addr)) {
         /* invalidate code */
         tb_invalidate_phys_page_range(addr, addr + length, 0);
         /* set dirty bit */
-        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
+        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_VGA);
+        cpu_physical_memory_set_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
     }
 }
 
@@ -2435,12 +2415,13 @@
         stl_p(ptr, val);
 
         if (unlikely(in_migration)) {
-            if (!cpu_physical_memory_is_dirty(addr1)) {
+            if (cpu_physical_memory_is_clean(addr1)) {
                 /* invalidate code */
                 tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
                 /* set dirty bit */
-                cpu_physical_memory_set_dirty_flags(
-                    addr1, (0xff & ~CODE_DIRTY_FLAG));
+                cpu_physical_memory_set_dirty_flag(addr1,
+                                                   DIRTY_MEMORY_MIGRATION);
+                cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
             }
         }
     }
@@ -2596,12 +2577,12 @@
             stw_p(ptr, val);
             break;
         }
-        if (!cpu_physical_memory_is_dirty(addr1)) {
+        if (cpu_physical_memory_is_clean(addr1)) {
             /* invalidate code */
             tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
             /* set dirty bit */
-            cpu_physical_memory_set_dirty_flags(addr1,
-                (0xff & ~CODE_DIRTY_FLAG));
+            cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_MIGRATION);
+            cpu_physical_memory_set_dirty_flag(addr1, DIRTY_MEMORY_VGA);
         }
     }
 }
diff --git a/hw/android/goldfish/fb.c b/hw/android/goldfish/fb.c
index 5fb3215..bbb392a 100644
--- a/hw/android/goldfish/fb.c
+++ b/hw/android/goldfish/fb.c
@@ -292,20 +292,9 @@
          * changed pixels.
          */
         if (dirty_addr != 0) {
-            int  dirty = 0;
-            int  len   = fbs->src_pitch;
-
-            while (len > 0) {
-                int  len2 = TARGET_PAGE_SIZE - (dirty_addr & (TARGET_PAGE_SIZE-1));
-
-                if (len2 > len)
-                    len2 = len;
-
-                dirty |= cpu_physical_memory_get_dirty(dirty_addr, VGA_DIRTY_FLAG);
-                dirty_addr  += len2;
-                len         -= len2;
-            }
-
+            int  dirty = cpu_physical_memory_get_dirty(dirty_addr,
+                                                       fbs->src_pitch,
+                                                       DIRTY_MEMORY_VGA);
             if (!dirty) { /* this line was not modified, skip to next one */
                 goto NEXT_LINE;
             }
@@ -445,8 +434,8 @@
 
     /* Always clear the dirty VGA bits */
     cpu_physical_memory_reset_dirty(dirty_base + rect->ymin * fbs->src_pitch,
-                                    dirty_base + (rect->ymax+1)* fbs->src_pitch,
-                                    VGA_DIRTY_FLAG);
+                                    (rect->ymax - rect->ymin + 1) * fbs->src_pitch,
+                                    DIRTY_MEMORY_VGA);
     return 1;
 }
 
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index cd04011..1a7b2ab 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -470,9 +470,14 @@
     int fd;
 } RAMBlock;
 
+#define DIRTY_MEMORY_VGA       0
+#define DIRTY_MEMORY_CODE      1
+#define DIRTY_MEMORY_MIGRATION 2
+#define DIRTY_MEMORY_NUM       3        /* num of dirty bits */
+
 typedef struct RAMList {
     QemuMutex mutex;
-    uint8_t *phys_dirty;
+    unsigned long *dirty_memory[DIRTY_MEMORY_NUM];
     RAMBlock *mru_block;
     QTAILQ_HEAD(ram, RAMBlock) blocks;
     uint32_t version;
diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h
index fd4e834..0b4ea36 100644
--- a/include/exec/ram_addr.h
+++ b/include/exec/ram_addr.h
@@ -20,7 +20,10 @@
 #define RAM_ADDR_H
 
 #ifndef CONFIG_USER_ONLY
+#include "exec/cpu-all.h"
 #include "hw/xen/xen.h"
+#include "qemu/bitmap.h"
+#include "qemu/bitops.h"
 
 ram_addr_t qemu_ram_alloc_from_ptr(DeviceState *dev, const char *name,
                                    ram_addr_t size, void *host);
@@ -37,48 +40,122 @@
 void qemu_ram_remap(ram_addr_t addr, ram_addr_t length);
 ram_addr_t qemu_ram_addr_from_host_nofail(void *ptr);
 
-static inline int cpu_physical_memory_get_dirty(ram_addr_t addr,
-                                                int dirty_flags)
+static inline int cpu_physical_memory_get_dirty(ram_addr_t start,
+                                                ram_addr_t length,
+                                                unsigned client)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] & dirty_flags;
+    unsigned long end, page, next;
+
+    assert(client < DIRTY_MEMORY_NUM);
+
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    next = find_next_bit(ram_list.dirty_memory[client], end, page);
+
+    return next < end;
 }
 
-static inline int cpu_physical_memory_get_dirty_flags(ram_addr_t addr)
+static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
+                                                      unsigned client)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS];
+    return cpu_physical_memory_get_dirty(addr, 1, client);
 }
 
-/* read dirty bit (return 0 or 1) */
-static inline int cpu_physical_memory_is_dirty(ram_addr_t addr)
+static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] == 0xff;
+    bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
+    bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
+    bool migration =
+        cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
+    return !(vga && code && migration);
 }
 
-static inline void cpu_physical_memory_set_dirty(ram_addr_t addr)
+static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
+                                                      unsigned client)
 {
-    ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] = 0xff;
+    assert(client < DIRTY_MEMORY_NUM);
+    set_bit(addr >> TARGET_PAGE_BITS, ram_list.dirty_memory[client]);
 }
 
-static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
-                                                      int dirty_flags)
+static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
+                                                       ram_addr_t length)
 {
-    return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
+    unsigned long end, page;
+
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page);
+    bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page);
+    bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_CODE], page, end - page);
+    //xen_modified_memory(start, length);
 }
 
-static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
-                                                        int length,
-                                                        int dirty_flags)
+#if !defined(_WIN32)
+static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
+                                                          ram_addr_t start,
+                                                          ram_addr_t pages)
 {
-    int i, mask, len;
-    uint8_t *p;
+    unsigned long i, j;
+    unsigned long page_number, c;
+    hwaddr addr;
+    ram_addr_t ram_addr;
+    unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
+    unsigned long hpratio = getpagesize() / TARGET_PAGE_SIZE;
+    unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
 
-    len = length >> TARGET_PAGE_BITS;
-    mask = ~dirty_flags;
-    p = ram_list.phys_dirty + (start >> TARGET_PAGE_BITS);
-    for (i = 0; i < len; i++) {
-        p[i] &= mask;
+    /* start address is aligned at the start of a word? */
+    if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
+        (hpratio == 1)) {
+        long k;
+        long nr = BITS_TO_LONGS(pages);
+
+        for (k = 0; k < nr; k++) {
+            if (bitmap[k]) {
+                unsigned long temp = leul_to_cpu(bitmap[k]);
+
+                ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION][page + k] |= temp;
+                ram_list.dirty_memory[DIRTY_MEMORY_VGA][page + k] |= temp;
+                ram_list.dirty_memory[DIRTY_MEMORY_CODE][page + k] |= temp;
+            }
+        }
+        //xen_modified_memory(start, pages);
+    } else {
+        /*
+         * bitmap-traveling is faster than memory-traveling (for addr...)
+         * especially when most of the memory is not dirty.
+         */
+        for (i = 0; i < len; i++) {
+            if (bitmap[i] != 0) {
+                c = leul_to_cpu(bitmap[i]);
+                do {
+                    j = ffsl(c) - 1;
+                    c &= ~(1ul << j);
+                    page_number = (i * HOST_LONG_BITS + j) * hpratio;
+                    addr = page_number * TARGET_PAGE_SIZE;
+                    ram_addr = start + addr;
+                    cpu_physical_memory_set_dirty_range(ram_addr,
+                                       TARGET_PAGE_SIZE * hpratio);
+                } while (c != 0);
+            }
+        }
     }
 }
+#endif /* not _WIN32 */
+
+static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
+                                                         ram_addr_t length,
+                                                         unsigned client)
+{
+    unsigned long end, page;
+
+    assert(client < DIRTY_MEMORY_NUM);
+    end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
+    page = start >> TARGET_PAGE_BITS;
+    bitmap_clear(ram_list.dirty_memory[client], page, end - page);
+}
+
+void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length,
+                                     unsigned client);
 
 int cpu_physical_memory_set_dirty_tracking(int enable);
 
@@ -87,8 +164,5 @@
 int cpu_physical_sync_dirty_bitmap(hwaddr start_addr,
                                    hwaddr end_addr);
 
-void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end,
-                                     int dirty_flags);
-
 #endif
 #endif
\ No newline at end of file
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 308bbb7..1babd5d 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -31,7 +31,7 @@
  * bitmap_andnot(dst, src1, src2, nbits)	*dst = *src1 & ~(*src2)
  * bitmap_complement(dst, src, nbits)		*dst = ~(*src)
  * bitmap_equal(src1, src2, nbits)		Are *src1 and *src2 equal?
- * bitmap_intersects(src1, src2, nbits) 	Do *src1 and *src2 overlap?
+ * bitmap_intersects(src1, src2, nbits)         Do *src1 and *src2 overlap?
  * bitmap_empty(src, nbits)			Are all bits zero in *src?
  * bitmap_full(src, nbits)			Are all bits set in *src?
  * bitmap_set(dst, pos, nbits)			Set specified bit area
@@ -62,71 +62,71 @@
         )
 
 #define DECLARE_BITMAP(name,bits)                  \
-	unsigned long name[BITS_TO_LONGS(bits)]
+        unsigned long name[BITS_TO_LONGS(bits)]
 
 #define small_nbits(nbits)                      \
-	((nbits) <= BITS_PER_LONG)
+        ((nbits) <= BITS_PER_LONG)
 
-int slow_bitmap_empty(const unsigned long *bitmap, int bits);
-int slow_bitmap_full(const unsigned long *bitmap, int bits);
+int slow_bitmap_empty(const unsigned long *bitmap, long bits);
+int slow_bitmap_full(const unsigned long *bitmap, long bits);
 int slow_bitmap_equal(const unsigned long *bitmap1,
-                   const unsigned long *bitmap2, int bits);
+                      const unsigned long *bitmap2, long bits);
 void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
-                         int bits);
+                            long bits);
 void slow_bitmap_shift_right(unsigned long *dst,
-                          const unsigned long *src, int shift, int bits);
+                             const unsigned long *src, int shift, long bits);
 void slow_bitmap_shift_left(unsigned long *dst,
-                         const unsigned long *src, int shift, int bits);
+                            const unsigned long *src, int shift, long bits);
 int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
-                 const unsigned long *bitmap2, int bits);
+                    const unsigned long *bitmap2, long bits);
 void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
-                 const unsigned long *bitmap2, int bits);
+                    const unsigned long *bitmap2, long bits);
 void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
-                  const unsigned long *bitmap2, int bits);
+                     const unsigned long *bitmap2, long bits);
 int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
-                    const unsigned long *bitmap2, int bits);
+                       const unsigned long *bitmap2, long bits);
 int slow_bitmap_intersects(const unsigned long *bitmap1,
-			const unsigned long *bitmap2, int bits);
+                           const unsigned long *bitmap2, long bits);
 
-static inline unsigned long *bitmap_new(int nbits)
+static inline unsigned long *bitmap_new(long nbits)
 {
-    int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+    long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
     return g_malloc0(len);
 }
 
-static inline void bitmap_zero(unsigned long *dst, int nbits)
+static inline void bitmap_zero(unsigned long *dst, long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = 0UL;
     } else {
-        int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+        long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
         memset(dst, 0, len);
     }
 }
 
-static inline void bitmap_fill(unsigned long *dst, int nbits)
+static inline void bitmap_fill(unsigned long *dst, long nbits)
 {
     size_t nlongs = BITS_TO_LONGS(nbits);
     if (!small_nbits(nbits)) {
-        int len = (nlongs - 1) * sizeof(unsigned long);
+        long len = (nlongs - 1) * sizeof(unsigned long);
         memset(dst, 0xff,  len);
     }
     dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
 }
 
 static inline void bitmap_copy(unsigned long *dst, const unsigned long *src,
-                               int nbits)
+                               long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = *src;
     } else {
-        int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+        long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
         memcpy(dst, src, len);
     }
 }
 
 static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
-                             const unsigned long *src2, int nbits)
+                             const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return (*dst = *src1 & *src2) != 0;
@@ -135,7 +135,7 @@
 }
 
 static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                             const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = *src1 | *src2;
@@ -145,7 +145,7 @@
 }
 
 static inline void bitmap_xor(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                              const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = *src1 ^ *src2;
@@ -155,7 +155,7 @@
 }
 
 static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                                const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return (*dst = *src1 & ~(*src2)) != 0;
@@ -163,8 +163,9 @@
     return slow_bitmap_andnot(dst, src1, src2, nbits);
 }
 
-static inline void bitmap_complement(unsigned long *dst, const unsigned long *src,
-			int nbits)
+static inline void bitmap_complement(unsigned long *dst,
+                                     const unsigned long *src,
+                                     long nbits)
 {
     if (small_nbits(nbits)) {
         *dst = ~(*src) & BITMAP_LAST_WORD_MASK(nbits);
@@ -174,7 +175,7 @@
 }
 
 static inline int bitmap_equal(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                               const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return ! ((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
@@ -183,7 +184,7 @@
     }
 }
 
-static inline int bitmap_empty(const unsigned long *src, int nbits)
+static inline int bitmap_empty(const unsigned long *src, long nbits)
 {
     if (small_nbits(nbits)) {
         return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
@@ -192,7 +193,7 @@
     }
 }
 
-static inline int bitmap_full(const unsigned long *src, int nbits)
+static inline int bitmap_full(const unsigned long *src, long nbits)
 {
     if (small_nbits(nbits)) {
         return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
@@ -202,7 +203,7 @@
 }
 
 static inline int bitmap_intersects(const unsigned long *src1,
-			const unsigned long *src2, int nbits)
+                                    const unsigned long *src2, long nbits)
 {
     if (small_nbits(nbits)) {
         return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
@@ -211,12 +212,21 @@
     }
 }
 
-void bitmap_set(unsigned long *map, int i, int len);
-void bitmap_clear(unsigned long *map, int start, int nr);
+void bitmap_set(unsigned long *map, long i, long len);
+void bitmap_clear(unsigned long *map, long start, long nr);
 unsigned long bitmap_find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask);
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned long nr,
+                                         unsigned long align_mask);
+
+static inline unsigned long *bitmap_zero_extend(unsigned long *old,
+                                                long old_nbits, long new_nbits)
+{
+    long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long);
+    unsigned long *new = g_realloc(old, new_len);
+    bitmap_clear(new, old_nbits, new_nbits - old_nbits);
+    return new;
+}
 
 #endif /* BITMAP_H */
diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h
index 304c90c..340b1e7 100644
--- a/include/qemu/bitops.h
+++ b/include/qemu/bitops.h
@@ -28,7 +28,7 @@
  * @nr: the bit to set
  * @addr: the address to start counting from
  */
-static inline void set_bit(int nr, unsigned long *addr)
+static inline void set_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -41,7 +41,7 @@
  * @nr: Bit to clear
  * @addr: Address to start counting from
  */
-static inline void clear_bit(int nr, unsigned long *addr)
+static inline void clear_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -54,7 +54,7 @@
  * @nr: Bit to change
  * @addr: Address to start counting from
  */
-static inline void change_bit(int nr, unsigned long *addr)
+static inline void change_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -67,7 +67,7 @@
  * @nr: Bit to set
  * @addr: Address to count from
  */
-static inline int test_and_set_bit(int nr, unsigned long *addr)
+static inline int test_and_set_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -82,7 +82,7 @@
  * @nr: Bit to clear
  * @addr: Address to count from
  */
-static inline int test_and_clear_bit(int nr, unsigned long *addr)
+static inline int test_and_clear_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -97,7 +97,7 @@
  * @nr: Bit to change
  * @addr: Address to count from
  */
-static inline int test_and_change_bit(int nr, unsigned long *addr)
+static inline int test_and_change_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
         unsigned long *p = addr + BIT_WORD(nr);
@@ -112,7 +112,7 @@
  * @nr: bit number to test
  * @addr: Address to start counting from
  */
-static inline int test_bit(int nr, const unsigned long *addr)
+static inline int test_bit(long nr, const unsigned long *addr)
 {
 	return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
 }
diff --git a/kvm-all.c b/kvm-all.c
index a8bcca4..cd8ddd3 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -300,6 +300,8 @@
     return 0;
 }
 
+#define ALIGN(x, y)  (((x)+(y)-1) & ~((y)-1))
+
 /**
  * kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
  * This function updates qemu's dirty bitmap using cpu_physical_memory_set_dirty().
@@ -312,9 +314,7 @@
                                    hwaddr end_addr)
 {
     KVMState *s = kvm_state;
-    unsigned long size, allocated_size = 0;
-    hwaddr phys_addr;
-    ram_addr_t addr;
+    size_t size, allocated_size = 0;
     KVMDirtyLog d;
     KVMSlot *mem;
     int ret = 0;
@@ -328,12 +328,13 @@
             break;
         }
 
-        size = ((mem->memory_size >> TARGET_PAGE_BITS) + 7) / 8;
+        size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
+                     /*HOST_LONG_BITS*/ 64) / 8;
         if (size > allocated_size) {
             d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
             allocated_size = size;
         }
-        memset(d.dirty_bitmap, 0, size);
+        memset(d.dirty_bitmap, 0, allocated_size);
 
         d.slot = mem->slot;
 
@@ -343,19 +344,10 @@
             break;
         }
 
-        for (phys_addr = mem->start_addr, addr = mem->phys_offset;
-             phys_addr - mem->start_addr < mem->memory_size;
-             phys_addr += TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
-            unsigned long *bitmap = (unsigned long *)d.dirty_bitmap;
-            unsigned nr = (phys_addr - mem->start_addr) >> TARGET_PAGE_BITS;
-            unsigned word = nr / (sizeof(*bitmap) * 8);
-            unsigned bit = nr % (sizeof(*bitmap) * 8);
-
-            if ((bitmap[word] >> bit) & 1) {
-                cpu_physical_memory_set_dirty(addr);
-            }
-        }
-        start_addr = phys_addr;
+        cpu_physical_memory_set_dirty_lebitmap(d.dirty_bitmap,
+                                               mem->phys_offset,
+                                               mem->memory_size / getpagesize());
+        start_addr += mem->memory_size;
         if (!start_addr) {
             // Handle wrap-around, which happens when a slot is mapped
             // at the end of the physical address space.
diff --git a/translate-all.c b/translate-all.c
index e4a5e2b..351d949 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -1404,6 +1404,7 @@
     tb_invalidate_phys_page_range(ram_addr, ram_addr + 1, 0);
 }
 #endif /* TARGET_HAS_ICE && !defined(CONFIG_USER_ONLY) */
+#endif  // !CONFIG_ANDROID
 
 void tb_check_watchpoint(CPUArchState *env)
 {
@@ -1417,7 +1418,6 @@
     cpu_restore_state_from_tb(tb, env, env->mem_io_pc);
     tb_phys_invalidate(tb, -1);
 }
-#endif  // !CONFIG_ANDROID
 
 #ifndef CONFIG_USER_ONLY
 /* mask must never be zero, except for A20 change call */
diff --git a/translate-all.h b/translate-all.h
index 167eb58..498db1b 100644
--- a/translate-all.h
+++ b/translate-all.h
@@ -19,6 +19,8 @@
 #ifndef TRANSLATE_ALL_H
 #define TRANSLATE_ALL_H
 
+#include <stdbool.h>
+
 /* Size of the L2 (and L3, etc) page tables.  */
 #define L2_BITS 10
 #define L2_SIZE (1 << L2_BITS)
@@ -27,6 +29,7 @@
     (((TARGET_PHYS_ADDR_SPACE_BITS - TARGET_PAGE_BITS - 1) / L2_BITS) + 1)
 
 /* translate-all.c */
+bool tcg_enabled(void);
 void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
 void cpu_unlink_tb(CPUOldState *cpu);
 void tb_check_watchpoint(CPUArchState *env);
diff --git a/util/bitmap.c b/util/bitmap.c
new file mode 100644
index 0000000..9c6bb52
--- /dev/null
+++ b/util/bitmap.c
@@ -0,0 +1,256 @@
+/*
+ * Bitmap Module
+ *
+ * Stolen from linux/src/lib/bitmap.c
+ *
+ * Copyright (C) 2010 Corentin Chary
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.
+ */
+
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an an
+ * array of unsigned longs.  The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'.  The implementation makes
+ * no particular effort to keep them zero.  It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures.
+ */
+
+int slow_bitmap_empty(const unsigned long *bitmap, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap[k]) {
+            return 0;
+        }
+    }
+    if (bits % BITS_PER_LONG) {
+        if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+int slow_bitmap_full(const unsigned long *bitmap, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (~bitmap[k]) {
+            return 0;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+int slow_bitmap_equal(const unsigned long *bitmap1,
+                      const unsigned long *bitmap2, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap1[k] != bitmap2[k]) {
+            return 0;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+                            long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        dst[k] = ~src[k];
+    }
+
+    if (bits % BITS_PER_LONG) {
+        dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+    }
+}
+
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+    unsigned long result = 0;
+
+    for (k = 0; k < nr; k++) {
+        result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+    }
+    return result != 0;
+}
+
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+
+    for (k = 0; k < nr; k++) {
+        dst[k] = bitmap1[k] | bitmap2[k];
+    }
+}
+
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+                     const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+
+    for (k = 0; k < nr; k++) {
+        dst[k] = bitmap1[k] ^ bitmap2[k];
+    }
+}
+
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+                       const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+    unsigned long result = 0;
+
+    for (k = 0; k < nr; k++) {
+        result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+    }
+    return result != 0;
+}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+    while (nr - bits_to_set >= 0) {
+        *p |= mask_to_set;
+        nr -= bits_to_set;
+        bits_to_set = BITS_PER_LONG;
+        mask_to_set = ~0UL;
+        p++;
+    }
+    if (nr) {
+        mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+        *p |= mask_to_set;
+    }
+}
+
+void bitmap_clear(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+    while (nr - bits_to_clear >= 0) {
+        *p &= ~mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        mask_to_clear = ~0UL;
+        p++;
+    }
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        *p &= ~mask_to_clear;
+    }
+}
+
+#define ALIGN_MASK(x,mask)      (((x)+(mask))&~(mask))
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned long nr,
+                                         unsigned long align_mask)
+{
+    unsigned long index, end, i;
+again:
+    index = find_next_zero_bit(map, size, start);
+
+    /* Align allocation */
+    index = ALIGN_MASK(index, align_mask);
+
+    end = index + nr;
+    if (end > size) {
+        return end;
+    }
+    i = find_next_bit(map, end, index);
+    if (i < end) {
+        start = i + 1;
+        goto again;
+    }
+    return index;
+}
+
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+                           const unsigned long *bitmap2, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap1[k] & bitmap2[k]) {
+            return 1;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+            return 1;
+        }
+    }
+    return 0;
+}