Rebase qemu-android over upstream QEMU v2.7.0
This patch rebases the qemu-android source base
on top of a much more recent version of upstream
QEMU (previous version was 2.2.0).
It was created by doing the following:
- Look at the history of all changes in qemu-android
over v2.2.0, and "clean it up" into a list of
small opiniated patches to do one of the following:
- port back fixes from upstream that happened
after v2.2.0
- provide bug-fixes / improvements required
by the Android emulator (but not directly
related for Android emulation).
- add Android-specific features.
- Rebase the "clean patches stack" on top of
QEMU v2.7.0 (i.e. 1dc33ed90bf). The end process
ended up being one of this patch's parents,
i.e. 41b9dc407
- Merge the latter with the 'theirs' strategy
(favoring the new sources over the current ones).
- Manually adjust any required changes by
comparing the state of both trees. Fix any
remaining conflicts.
NOTE: This updates the git submodule for the dtc/
sub-directory, you may need to run the
following command in your work directory
after pulling this change:
git submodule update dtc
Change-Id: I783ab762a696c8345c37e5ce2c966349f3710073
diff --git a/.dir-locals.el b/.dir-locals.el
new file mode 100644
index 0000000..3ac0cfc
--- /dev/null
+++ b/.dir-locals.el
@@ -0,0 +1,2 @@
+((c-mode . ((c-file-style . "stroustrup")
+ (indent-tabs-mode . nil))))
diff --git a/.gitignore b/.gitignore
index e32a584..88ec249 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@
/config-target.*
/config.status
/config-temp
+/trace-events-all
/trace/generated-tracers.h
/trace/generated-tracers.c
/trace/generated-tracers-dtrace.h
@@ -17,12 +18,15 @@
/trace/generated-tcg-tracers.h
/trace/generated-ust-provider.h
/trace/generated-ust.c
-/libcacard/trace/generated-tracers.c
+/ui/shader/texture-blit-frag.h
+/ui/shader/texture-blit-vert.h
*-timestamp
/*-softmmu
/*-darwin-user
/*-linux-user
/*-bsd-user
+/ivshmem-client
+/ivshmem-server
/libdis*
/libuser
/linux-headers/asm
@@ -32,19 +36,14 @@
/qapi-visit.[ch]
/qapi-event.[ch]
/qmp-commands.h
+/qmp-introspect.[ch]
/qmp-marshal.c
/qemu-doc.html
/qemu-tech.html
/qemu-doc.info
/qemu-tech.info
-/qemu.1
-/qemu.pod
-/qemu-img.1
-/qemu-img.pod
/qemu-img
/qemu-nbd
-/qemu-nbd.8
-/qemu-nbd.pod
/qemu-options.def
/qemu-options.texi
/qemu-img-cmds.texi
@@ -53,16 +52,17 @@
/qemu-ga
/qemu-bridge-helper
/qemu-monitor.texi
+/qemu-monitor-info.texi
/qmp-commands.txt
/vscclient
/fsdev/virtfs-proxy-helper
-/fsdev/virtfs-proxy-helper.1
-/fsdev/virtfs-proxy-helper.pod
+*.[1-9]
*.a
*.aux
*.cp
*.dvi
*.exe
+*.msi
*.dll
*.so
*.mo
@@ -70,6 +70,7 @@
*.ky
*.log
*.pdf
+*.pod
*.cps
*.fns
*.kys
@@ -94,6 +95,10 @@
/pc-bios/optionrom/linuxboot.bin
/pc-bios/optionrom/linuxboot.raw
/pc-bios/optionrom/linuxboot.img
+/pc-bios/optionrom/linuxboot_dma.asm
+/pc-bios/optionrom/linuxboot_dma.bin
+/pc-bios/optionrom/linuxboot_dma.raw
+/pc-bios/optionrom/linuxboot_dma.img
/pc-bios/optionrom/multiboot.asm
/pc-bios/optionrom/multiboot.bin
/pc-bios/optionrom/multiboot.raw
@@ -108,4 +113,5 @@
cscope.*
tags
TAGS
+docker-src.*
*~
diff --git a/.travis.yml b/.travis.yml
index ad66e5b..f30b10e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,100 +1,101 @@
+sudo: false
language: c
python:
- "2.4"
compiler:
- gcc
- clang
+cache: ccache
+addons:
+ apt:
+ packages:
+ - libaio-dev
+ - libattr1-dev
+ - libbrlapi-dev
+ - libcap-ng-dev
+ - libgnutls-dev
+ - libgtk-3-dev
+ - libiscsi-dev
+ - liblttng-ust-dev
+ - libnfs-dev
+ - libncurses5-dev
+ - libnss3-dev
+ - libpixman-1-dev
+ - libpng12-dev
+ - librados-dev
+ - libsdl1.2-dev
+ - libseccomp-dev
+ - libspice-protocol-dev
+ - libspice-server-dev
+ - libssh2-1-dev
+ - liburcu-dev
+ - libusb-1.0-0-dev
+ - libvte-2.90-dev
+ - sparse
+ - uuid-dev
+
+# The channel name "irc.oftc.net#qemu" is encrypted against qemu/qemu
+# to prevent IRC notifications from forks. This was created using:
+# $ travis encrypt -r "qemu/qemu" "irc.oftc.net#qemu"
notifications:
irc:
channels:
- - "irc.oftc.net#qemu"
+ - secure: "F7GDRgjuOo5IUyRLqSkmDL7kvdU4UcH3Lm/W2db2JnDHTGCqgEdaYEYKciyCLZ57vOTsTsOgesN8iUT7hNHBd1KWKjZe9KDTZWppWRYVwAwQMzVeSOsbbU4tRoJ6Pp+3qhH1Z0eGYR9ZgKYAoTumDFgSAYRp4IscKS8jkoedOqM="
on_success: change
on_failure: always
env:
global:
- - TEST_CMD=""
- - EXTRA_CONFIG=""
- # Development packages, EXTRA_PKGS saved for additional builds
- - CORE_PKGS="libusb-1.0-0-dev libiscsi-dev librados-dev libncurses5-dev"
- - NET_PKGS="libseccomp-dev libgnutls-dev libssh2-1-dev libspice-server-dev libspice-protocol-dev libnss3-dev"
- - GUI_PKGS="libgtk-3-dev libvte-2.90-dev libsdl1.2-dev libpng12-dev libpixman-1-dev"
- - EXTRA_PKGS=""
+ - TEST_CMD="make check"
matrix:
- # Group major targets together with their linux-user counterparts
- - TARGETS=alpha-softmmu,alpha-linux-user
- - TARGETS=arm-softmmu,arm-linux-user,armeb-linux-user,aarch64-softmmu,aarch64-linux-user
- - TARGETS=cris-softmmu,cris-linux-user
- - TARGETS=i386-softmmu,i386-linux-user,x86_64-softmmu,x86_64-linux-user
- - TARGETS=m68k-softmmu,m68k-linux-user
- - TARGETS=microblaze-softmmu,microblazeel-softmmu,microblaze-linux-user,microblazeel-linux-user
- - TARGETS=mips-softmmu,mips64-softmmu,mips64el-softmmu,mipsel-softmmu
- - TARGETS=mips-linux-user,mips64-linux-user,mips64el-linux-user,mipsel-linux-user,mipsn32-linux-user,mipsn32el-linux-user
- - TARGETS=or32-softmmu,or32-linux-user
- - TARGETS=ppc-softmmu,ppc64-softmmu,ppcemb-softmmu,ppc-linux-user,ppc64-linux-user,ppc64abi32-linux-user,ppc64le-linux-user
- - TARGETS=s390x-softmmu,s390x-linux-user
- - TARGETS=sh4-softmmu,sh4eb-softmmu,sh4-linux-user sh4eb-linux-user
- - TARGETS=sparc-softmmu,sparc64-softmmu,sparc-linux-user,sparc32plus-linux-user,sparc64-linux-user
- - TARGETS=unicore32-softmmu,unicore32-linux-user
- # Group remaining softmmu only targets into one build
- - TARGETS=lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,xtensaeb-softmmu
+ - CONFIG=""
+ - CONFIG="--enable-debug --enable-debug-tcg --enable-trace-backends=log"
+ - CONFIG="--disable-linux-aio --disable-cap-ng --disable-attr --disable-brlapi --disable-uuid --disable-libusb"
+ - CONFIG="--enable-modules"
+ - CONFIG="--with-coroutine=ucontext"
+ - CONFIG="--with-coroutine=sigaltstack"
git:
# we want to do this ourselves
submodules: false
before_install:
+ - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update ; fi
+ - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew install libffi gettext glib pixman ; fi
- wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
- git submodule update --init --recursive
- - sudo apt-get update -qq
- - sudo apt-get install -qq ${CORE_PKGS} ${NET_PKGS} ${GUI_PKGS} ${EXTRA_PKGS}
before_script:
- - ./configure --target-list=${TARGETS} --enable-debug-tcg ${EXTRA_CONFIG}
+ - ./configure ${CONFIG}
script:
- - make -j2 && ${TEST_CMD}
+ - make -j3 && ${TEST_CMD}
matrix:
- # We manually include a number of additional build for non-standard bits
include:
- # Make check target (we only do this once)
- - env:
- - TARGETS=alpha-softmmu,arm-softmmu,aarch64-softmmu,cris-softmmu,
- i386-softmmu,x86_64-softmmu,m68k-softmmu,microblaze-softmmu,
- microblazeel-softmmu,mips-softmmu,mips64-softmmu,
- mips64el-softmmu,mipsel-softmmu,or32-softmmu,ppc-softmmu,
- ppc64-softmmu,ppcemb-softmmu,s390x-softmmu,sh4-softmmu,
- sh4eb-softmmu,sparc-softmmu,sparc64-softmmu,
- unicore32-softmmu,unicore32-linux-user,
- lm32-softmmu,moxie-softmmu,tricore-softmmu,xtensa-softmmu,
- xtensaeb-softmmu
- TEST_CMD="make check"
+ # gprof/gcov are GCC features
+ - env: CONFIG="--enable-gprof --enable-gcov --disable-pie"
compiler: gcc
- # Debug related options
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_CONFIG="--enable-debug"
+ # We manually include builds which we disable "make check" for
+ - env: CONFIG="--enable-debug --enable-tcg-interpreter"
+ TEST_CMD=""
compiler: gcc
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_CONFIG="--enable-debug --enable-tcg-interpreter"
+ - env: CONFIG="--enable-trace-backends=simple"
+ TEST_CMD=""
compiler: gcc
- # All the extra -dev packages
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_PKGS="libaio-dev libcap-ng-dev libattr1-dev libbrlapi-dev uuid-dev libusb-1.0.0-dev"
+ - env: CONFIG="--enable-trace-backends=ftrace"
+ TEST_CMD=""
compiler: gcc
- # Currently configure doesn't force --disable-pie
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_CONFIG="--enable-gprof --enable-gcov --disable-pie"
+ - env: CONFIG="--enable-trace-backends=ust"
+ TEST_CMD=""
compiler: gcc
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_PKGS="sparse"
- EXTRA_CONFIG="--enable-sparse"
+ - env: CONFIG="--with-coroutine=gthread"
+ TEST_CMD=""
compiler: gcc
- # All the trace backends (apart from dtrace)
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_CONFIG="--enable-trace-backends=stderr"
+ - env: CONFIG=""
+ os: osx
+ compiler: clang
+ - env: CONFIG=""
+ sudo: required
+ addons:
+ dist: trusty
compiler: gcc
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_CONFIG="--enable-trace-backends=simple"
- compiler: gcc
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_CONFIG="--enable-trace-backends=ftrace"
- compiler: gcc
- - env: TARGETS=i386-softmmu,x86_64-softmmu
- EXTRA_PKGS="liblttng-ust-dev liburcu-dev"
- EXTRA_CONFIG="--enable-trace-backends=ust"
- compiler: gcc
+ before_install:
+ - sudo apt-get update -qq
+ - sudo apt-get build-dep -qq qemu
+ - wget -O - http://people.linaro.org/~alex.bennee/qemu-submodule-git-seed.tar.xz | tar -xvJ
+ - git submodule update --init --recursive
diff --git a/CODING_STYLE b/CODING_STYLE
index d46cfa5..e7fde15 100644
--- a/CODING_STYLE
+++ b/CODING_STYLE
@@ -31,7 +31,11 @@
2. Line width
-Lines are 80 characters; not longer.
+Lines should be 80 characters; try not to make them longer.
+
+Sometimes it is hard to do, especially when dealing with QEMU subsystems
+that use long function or symbol names. Even in that case, do not make
+lines much longer than 80 characters.
Rationale:
- Some people like to tile their 24" screens with a 6x4 matrix of 80x24
@@ -39,6 +43,8 @@
let them keep doing it.
- Code and especially patches is much more readable if limited to a sane
line length. Eighty is traditional.
+ - The four-space indentation makes the most common excuse ("But look
+ at all that white space on the left!") moot.
- It is the QEMU coding style.
3. Naming
@@ -87,10 +93,15 @@
5. Declarations
-Mixed declarations (interleaving statements and declarations within blocks)
-are not allowed; declarations should be at the beginning of blocks. In other
-words, the code should not generate warnings if using GCC's
--Wdeclaration-after-statement option.
+Mixed declarations (interleaving statements and declarations within
+blocks) are generally not allowed; declarations should be at the beginning
+of blocks.
+
+Every now and then, an exception is made for declarations inside a
+#ifdef or #ifndef block: if the code looks nicer, such declarations can
+be placed at the top of the block even if there are statements above.
+On the other hand, however, it's often best to move that #ifdef/#ifndef
+block to a separate function altogether.
6. Conditional statements
diff --git a/HACKING b/HACKING
index 12fbc8a..20a9101 100644
--- a/HACKING
+++ b/HACKING
@@ -157,3 +157,62 @@
* you may assume that integers are 2s complement representation
* you may assume that right shift of a signed integer duplicates
the sign bit (ie it is an arithmetic shift, not a logical shift)
+
+In addition, QEMU assumes that the compiler does not use the latitude
+given in C99 and C11 to treat aspects of signed '<<' as undefined, as
+documented in the GNU Compiler Collection manual starting at version 4.0.
+
+7. Error handling and reporting
+
+7.1 Reporting errors to the human user
+
+Do not use printf(), fprintf() or monitor_printf(). Instead, use
+error_report() or error_vreport() from error-report.h. This ensures the
+error is reported in the right place (current monitor or stderr), and in
+a uniform format.
+
+Use error_printf() & friends to print additional information.
+
+error_report() prints the current location. In certain common cases
+like command line parsing, the current location is tracked
+automatically. To manipulate it manually, use the loc_*() from
+error-report.h.
+
+7.2 Propagating errors
+
+An error can't always be reported to the user right where it's detected,
+but often needs to be propagated up the call chain to a place that can
+handle it. This can be done in various ways.
+
+The most flexible one is Error objects. See error.h for usage
+information.
+
+Use the simplest suitable method to communicate success / failure to
+callers. Stick to common methods: non-negative on success / -1 on
+error, non-negative / -errno, non-null / null, or Error objects.
+
+Example: when a function returns a non-null pointer on success, and it
+can fail only in one way (as far as the caller is concerned), returning
+null on failure is just fine, and certainly simpler and a lot easier on
+the eyes than propagating an Error object through an Error ** parameter.
+
+Example: when a function's callers need to report details on failure
+only the function really knows, use Error **, and set suitable errors.
+
+Do not report an error to the user when you're also returning an error
+for somebody else to handle. Leave the reporting to the place that
+consumes the error returned.
+
+7.3 Handling errors
+
+Calling exit() is fine when handling configuration errors during
+startup. It's problematic during normal operation. In particular,
+monitor commands should never exit().
+
+Do not call exit() or abort() to handle an error that can be triggered
+by the guest (e.g., some unimplemented corner case in guest code
+translation or device emulation). Guests should not be able to
+terminate QEMU.
+
+Note that &error_fatal is just another way to exit(1), and &error_abort
+is just another way to abort().
diff --git a/LICENSE b/LICENSE
index da70e94..0e0b4b9 100644
--- a/LICENSE
+++ b/LICENSE
@@ -11,7 +11,7 @@
As of July 2013, contributions under version 2 of the GNU General Public
License (and no later version) are only accepted for the following files
-or directories: bsd-user/, linux-user/, hw/misc/vfio.c, hw/xen/xen_pt*.
+or directories: bsd-user/, linux-user/, hw/vfio/, hw/xen/xen_pt*.
3) The Tiny Code Generator (TCG) is released under the BSD license
(see license headers in files).
diff --git a/MAINTAINERS b/MAINTAINERS
index bcb69e8..b6fb84e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -50,28 +50,46 @@
General Project Administration
------------------------------
-M: Anthony Liguori <aliguori@amazon.com>
M: Peter Maydell <peter.maydell@linaro.org>
+All patches CC here
+L: qemu-devel@nongnu.org
+F: *
+F: */
+
Responsible Disclosure, Reporting Security Issues
------------------------------
W: http://wiki.qemu.org/SecurityProcess
M: Michael S. Tsirkin <mst@redhat.com>
-M: Anthony Liguori <aliguori@amazon.com>
L: secalert@redhat.com
Guest CPU cores (TCG):
----------------------
Overall
L: qemu-devel@nongnu.org
-S: Odd fixes
+M: Paolo Bonzini <pbonzini@redhat.com>
+M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
+M: Richard Henderson <rth@twiddle.net>
+S: Maintained
F: cpu-exec.c
+F: cpu-exec-common.c
+F: cpus.c
F: cputlb.c
+F: exec.c
F: softmmu_template.h
-F: translate-all.c
-F: include/exec/cpu_ldst.h
-F: include/exec/cpu_ldst_template.h
+F: translate-all.*
+F: translate-common.c
+F: include/exec/cpu*.h
+F: include/exec/exec-all.h
F: include/exec/helper*.h
+F: include/exec/tb-hash.h
+
+FPU emulation
+M: Aurelien Jarno <aurelien@aurel32.net>
+M: Peter Maydell <peter.maydell@linaro.org>
+S: Odd Fixes
+F: fpu/
+F: include/fpu/
Alpha
M: Richard Henderson <rth@twiddle.net>
@@ -79,13 +97,18 @@
F: target-alpha/
F: hw/alpha/
F: tests/tcg/alpha/
+F: disas/alpha.c
ARM
M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: target-arm/
F: hw/arm/
F: hw/cpu/a*mpcore.c
+F: disas/arm.c
+F: disas/arm-a64.cc
+F: disas/libvixl/
CRIS
M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
@@ -93,13 +116,18 @@
F: target-cris/
F: hw/cris/
F: tests/tcg/cris/
+F: disas/cris.c
LM32
M: Michael Walle <michael@walle.cc>
S: Maintained
F: target-lm32/
+F: disas/lm32.c
F: hw/lm32/
-F: hw/char/lm32_*
+F: hw/*/lm32_*
+F: hw/*/milkymist-*
+F: include/hw/char/lm32_juart.h
+F: include/hw/lm32/
F: tests/tcg/lm32/
M68K
@@ -112,6 +140,7 @@
S: Maintained
F: target-microblaze/
F: hw/microblaze/
+F: disas/microblaze.c
MIPS
M: Aurelien Jarno <aurelien@aurel32.net>
@@ -120,11 +149,13 @@
F: target-mips/
F: hw/mips/
F: tests/tcg/mips/
+F: disas/mips.c
Moxie
M: Anthony Green <green@moxielogic.com>
S: Maintained
F: target-moxie/
+F: disas/moxie.c
OpenRISC
M: Jia Liu <proljc@gmail.com>
@@ -134,11 +165,13 @@
F: tests/tcg/openrisc/
PowerPC
+M: David Gibson <david@gibson.dropbear.id.au>
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Maintained
F: target-ppc/
F: hw/ppc/
+F: disas/ppc.c
S390
M: Richard Henderson <rth@twiddle.net>
@@ -146,19 +179,23 @@
S: Maintained
F: target-s390x/
F: hw/s390x/
+F: disas/s390.c
SH4
M: Aurelien Jarno <aurelien@aurel32.net>
S: Odd Fixes
F: target-sh4/
F: hw/sh4/
+F: disas/sh4.c
SPARC
-M: Blue Swirl <blauwirbel@gmail.com>
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
+M: Artyom Tarasenko <atar4qemu@gmail.com>
S: Maintained
F: target-sparc/
F: hw/sparc/
F: hw/sparc64/
+F: disas/sparc.c
UniCore32
M: Guan Xuetao <gxt@mprc.pku.edu.cn>
@@ -169,9 +206,11 @@
X86
M: Paolo Bonzini <pbonzini@redhat.com>
M: Richard Henderson <rth@twiddle.net>
-S: Odd Fixes
+M: Eduardo Habkost <ehabkost@redhat.com>
+S: Maintained
F: target-i386/
F: hw/i386/
+F: disas/i386.c
Xtensa
M: Max Filippov <jcmvbkbc@gmail.com>
@@ -196,9 +235,11 @@
S: Supported
F: kvm-*
F: */kvm.*
+F: include/sysemu/kvm*.h
ARM
M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: target-arm/kvm.c
@@ -218,9 +259,14 @@
M: Alexander Graf <agraf@suse.de>
S: Maintained
F: target-s390x/kvm.c
+F: target-s390x/ioinst.[ch]
+F: target-s390x/machine.c
F: hw/intc/s390_flic.c
F: hw/intc/s390_flic_kvm.c
F: include/hw/s390x/s390_flic.h
+F: gdb-xml/s390*.xml
+T: git git://github.com/cohuck/qemu.git s390-next
+T: git git://github.com/borntraeger/qemu.git s390-next
X86
M: Paolo Bonzini <pbonzini@redhat.com>
@@ -233,11 +279,21 @@
----------------------
X86
-M: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+M: Stefano Stabellini <sstabellini@kernel.org>
+M: Anthony Perard <anthony.perard@citrix.com>
L: xen-devel@lists.xensource.com
S: Supported
F: xen-*
F: */xen*
+F: hw/char/xen_console.c
+F: hw/display/xenfb.c
+F: hw/net/xen_nic.c
+F: hw/block/xen_*
+F: hw/xen/
+F: hw/xenpv/
+F: hw/i386/xen/
+F: include/hw/xen/
+F: include/sysemu/xen-mapcache.h
Hosts:
------
@@ -258,99 +314,166 @@
M: Stefan Weil <sw@weilnetz.de>
S: Maintained
F: *win32*
+F: qemu.nsi
ARM Machines
------------
Allwinner-a10
-M: Li Guang <lig.fnst@cn.fujitsu.com>
+M: Beniamino Galvani <b.galvani@gmail.com>
+L: qemu-arm@nongnu.org
S: Maintained
-F: hw/*/allwinner-a10*
-F: include/hw/*/allwinner-a10*
+F: hw/*/allwinner*
+F: include/hw/*/allwinner*
F: hw/arm/cubieboard.c
+ARM PrimeCell
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/char/pl011.c
+F: hw/display/pl110*
+F: hw/dma/pl080.c
+F: hw/dma/pl330.c
+F: hw/gpio/pl061.c
+F: hw/input/pl050.c
+F: hw/intc/pl190.c
+F: hw/sd/pl181.c
+F: hw/timer/pl031.c
+F: include/hw/arm/primecell.h
+
+ARM cores
+M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/intc/arm*
+F: hw/intc/gic_internal.h
+F: hw/misc/a9scu.c
+F: hw/misc/arm11scu.c
+F: hw/timer/a9gtimer*
+F: hw/timer/arm_*
+F: include/hw/arm/arm.h
+F: include/hw/intc/arm*
+F: include/hw/misc/a9scu.h
+F: include/hw/misc/arm11scu.h
+F: include/hw/timer/a9gtimer.h
+F: include/hw/timer/arm_mptimer.h
+
Exynos
-M: Evgeny Voevodin <e.voevodin@samsung.com>
-M: Maksim Kozlov <m.kozlov@samsung.com>
M: Igor Mitsyanko <i.mitsyanko@gmail.com>
-M: Dmitry Solodkiy <d.solodkiy@samsung.com>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/*/exynos*
+F: include/hw/arm/exynos4210.h
Calxeda Highbank
M: Rob Herring <robh@kernel.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/highbank.c
F: hw/net/xgmac.c
Canon DIGIC
M: Antony Pavlov <antonynpavlov@gmail.com>
+L: qemu-arm@nongnu.org
S: Maintained
F: include/hw/arm/digic.h
F: hw/*/digic*
Gumstix
L: qemu-devel@nongnu.org
+L: qemu-arm@nongnu.org
S: Orphan
F: hw/arm/gumstix.c
i.MX31
M: Peter Chubb <peter.chubb@nicta.com.au>
+L: qemu-arm@nongnu.org
S: Odd fixes
F: hw/*/imx*
F: hw/arm/kzm.c
+F: include/hw/arm/fsl-imx31.h
Integrator CP
M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/integratorcp.c
-Mainstone
-L: qemu-devel@nongnu.org
-S: Orphan
-F: hw/arm/mainstone.c
-
Musicpal
M: Jan Kiszka <jan.kiszka@web.de>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/musicpal.c
nSeries
M: Andrzej Zaborowski <balrogg@gmail.com>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/nseries.c
Palm
M: Andrzej Zaborowski <balrogg@gmail.com>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/palm.c
Real View
M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/arm/realview*
+F: hw/intc/realview_gic.c
+F: include/hw/intc/realview_gic.h
-Spitz
+PXA2XX
M: Andrzej Zaborowski <balrogg@gmail.com>
+L: qemu-arm@nongnu.org
S: Maintained
+F: hw/arm/mainstone.c
F: hw/arm/spitz.c
+F: hw/arm/tosa.c
+F: hw/arm/z2.c
+F: hw/*/pxa2xx*
+F: include/hw/arm/pxa.h
Stellaris
M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/*/stellaris*
Versatile PB
M: Peter Maydell <peter.maydell@linaro.org>
+L: qemu-arm@nongnu.org
S: Maintained
F: hw/*/versatile*
Xilinx Zynq
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+M: Alistair Francis <alistair.francis@xilinx.com>
+L: qemu-arm@nongnu.org
S: Maintained
-F: hw/arm/xilinx_zynq.c
-F: hw/misc/zynq_slcr.c
+F: hw/*/xilinx_*
F: hw/*/cadence_*
-F: hw/ssi/xilinx_spips.c
+F: hw/misc/zynq_slcr.c
+F: include/hw/xilinx.h
+X: hw/ssi/xilinx_*
+
+Xilinx ZynqMP
+M: Alistair Francis <alistair.francis@xilinx.com>
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/*/xlnx*.c
+F: include/hw/*/xlnx*.c
+
+ARM ACPI Subsystem
+M: Shannon Zhao <zhaoshenglong@huawei.com>
+M: Shannon Zhao <shannon.zhao@linaro.org>
+L: qemu-arm@nongnu.org
+S: Maintained
+F: hw/arm/virt-acpi-build.c
+F: include/hw/arm/virt-acpi-build.h
CRIS Machines
-------------
@@ -394,7 +517,7 @@
F: hw/microblaze/petalogix_s3adsp1800_mmu.c
petalogix_ml605
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
S: Maintained
F: hw/microblaze/petalogix_ml605_mmu.c
@@ -475,7 +598,7 @@
F: hw/misc/macio/
PReP
-M: Andreas Färber <andreas.faerber@web.de>
+L: qemu-devel@nongnu.org
L: qemu-ppc@nongnu.org
S: Odd Fixes
F: hw/ppc/prep.c
@@ -483,6 +606,7 @@
F: hw/isa/pc87312.[hc]
sPAPR
+M: David Gibson <david@gibson.dropbear.id.au>
M: Alexander Graf <agraf@suse.de>
L: qemu-ppc@nongnu.org
S: Supported
@@ -514,11 +638,13 @@
--------------
Sun4m
M: Blue Swirl <blauwirbel@gmail.com>
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
S: Maintained
F: hw/sparc/sun4m.c
Sun4u
M: Blue Swirl <blauwirbel@gmail.com>
+M: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
S: Maintained
F: hw/sparc64/sun4u.c
@@ -530,23 +656,18 @@
S390 Machines
-------------
-S390 Virtio
-M: Alexander Graf <agraf@suse.de>
-S: Maintained
-F: hw/s390x/s390-*.c
-
S390 Virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
M: Alexander Graf <agraf@suse.de>
S: Supported
-F: hw/s390x/s390-virtio-ccw.c
-F: hw/s390x/css.[hc]
-F: hw/s390x/sclp*.[hc]
-F: hw/s390x/ipl*.[hc]
+F: hw/char/sclp*.[hc]
+F: hw/s390x/
F: include/hw/s390x/
F: pc-bios/s390-ccw/
-T: git git://github.com/cohuck/qemu virtio-ccw-upstr
+F: hw/watchdog/wdt_diag288.c
+T: git git://github.com/cohuck/qemu.git s390-next
+T: git git://github.com/borntraeger/qemu.git s390-next
UniCore32 Machines
-------------
@@ -559,7 +680,6 @@
X86 Machines
------------
PC
-M: Anthony Liguori <aliguori@amazon.com>
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: include/hw/i386/
@@ -576,7 +696,32 @@
F: hw/acpi/ich9.c
F: include/hw/acpi/ich9.h
F: include/hw/acpi/piix.h
+F: hw/misc/sga.c
+PC Chipset
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Support
+F: hw/char/debugcon.c
+F: hw/char/parallel.c
+F: hw/char/serial*
+F: hw/dma/i8257*
+F: hw/i2c/pm_smbus.c
+F: hw/intc/apic*
+F: hw/intc/ioapic*
+F: hw/intc/i8259*
+F: hw/misc/debugexit.c
+F: hw/misc/pc-testdev.c
+F: hw/timer/hpet*
+F: hw/timer/i8254*
+F: hw/timer/mc146818rtc*
+
+Machine core
+M: Eduardo Habkost <ehabkost@redhat.com>
+M: Marcel Apfelbaum <marcel@redhat.com>
+S: Supported
+F: hw/core/machine.c
+F: include/hw/boards.h
Xtensa Machines
---------------
@@ -593,24 +738,69 @@
Devices
-------
+EDU
+M: Jiri Slaby <jslaby@suse.cz>
+S: Maintained
+F: hw/misc/edu.c
+
IDE
-M: Kevin Wolf <kwolf@redhat.com>
-M: Stefan Hajnoczi <stefanha@redhat.com>
-S: Odd Fixes
+M: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
F: include/hw/ide.h
F: hw/ide/
+F: hw/block/block.c
+F: hw/block/cdrom.c
+F: hw/block/hd-geometry.c
+F: tests/ide-test.c
+F: tests/ahci-test.c
+F: tests/libqos/ahci*
+T: git git://github.com/jnsnow/qemu.git ide
+
+Floppy
+M: John Snow <jsnow@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: hw/block/fdc.c
+F: include/hw/block/fdc.h
+F: tests/fdc-test.c
+T: git git://github.com/jnsnow/qemu.git ide
OMAP
M: Peter Maydell <peter.maydell@linaro.org>
S: Maintained
F: hw/*/omap*
+F: include/hw/arm/omap.h
+
+IPack
+M: Alberto Garcia <berto@igalia.com>
+S: Odd Fixes
+F: hw/char/ipoctal232.c
+F: hw/ipack/
PCI
M: Michael S. Tsirkin <mst@redhat.com>
+M: Marcel Apfelbaum <marcel@redhat.com>
S: Supported
F: include/hw/pci/*
+F: hw/misc/pci-testdev.c
F: hw/pci/*
+F: hw/pci-bridge/*
+
+ACPI/SMBIOS
+M: Michael S. Tsirkin <mst@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+S: Supported
+F: include/hw/acpi/*
+F: include/hw/smbios/*
+F: hw/mem/*
F: hw/acpi/*
+F: hw/smbios/*
+F: hw/i386/acpi-build.[hc]
+F: hw/i386/*dsl
+F: hw/arm/virt-acpi-build.c
+F: include/hw/arm/virt-acpi-build.h
+F: scripts/acpi*py
ppc4xx
M: Alexander Graf <agraf@suse.de>
@@ -624,6 +814,19 @@
L: qemu-ppc@nongnu.org
S: Supported
F: hw/ppc/e500*
+F: hw/pci-host/ppce500.c
+F: hw/net/fsl_etsec/
+
+Character devices
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Odd Fixes
+F: hw/char/
+
+Network devices
+M: Jason Wang <jasowang@redhat.com>
+S: Odd Fixes
+F: hw/net/
+T: git git://github.com/jasowang/qemu.git net
SCSI
M: Paolo Bonzini <pbonzini@redhat.com>
@@ -637,16 +840,27 @@
F: hw/scsi/lsi53c895a.c
SSI
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
S: Maintained
F: hw/ssi/*
F: hw/block/m25p80.c
+X: hw/ssi/xilinx_*
+
+Xilinx SPI
+M: Alistair Francis <alistair.francis@xilinx.com>
+M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
+S: Maintained
+F: hw/ssi/xilinx_*
USB
M: Gerd Hoffmann <kraxel@redhat.com>
S: Maintained
F: hw/usb/*
F: tests/usb-*-test.c
+F: docs/usb2.txt
+F: docs/usb-storage.txt
+F: include/hw/usb.h
+F: include/hw/usb/
USB (serial adapter)
M: Gerd Hoffmann <kraxel@redhat.com>
@@ -657,7 +871,8 @@
VFIO
M: Alex Williamson <alex.williamson@redhat.com>
S: Supported
-F: hw/misc/vfio.c
+F: hw/vfio/*
+F: include/hw/vfio/
vhost
M: Michael S. Tsirkin <mst@redhat.com>
@@ -665,56 +880,81 @@
F: hw/*/*vhost*
virtio
-M: Anthony Liguori <aliguori@amazon.com>
M: Michael S. Tsirkin <mst@redhat.com>
S: Supported
F: hw/*/virtio*
+F: net/vhost-user.c
+F: include/hw/virtio/
virtio-9p
M: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+M: Greg Kurz <groug@kaod.org>
S: Supported
F: hw/9pfs/
F: fsdev/
F: tests/virtio-9p-test.c
T: git git://github.com/kvaneesh/QEMU.git
+T: git git://github.com/gkurz/qemu.git 9p-next
virtio-blk
-M: Kevin Wolf <kwolf@redhat.com>
M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
S: Supported
F: hw/block/virtio-blk.c
+F: hw/block/dataplane/*
+F: hw/virtio/dataplane/*
+T: git git://github.com/stefanha/qemu.git block
virtio-ccw
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
S: Supported
F: hw/s390x/virtio-ccw.[hc]
-T: git git://github.com/cohuck/qemu virtio-ccw-upstr
+T: git git://github.com/cohuck/qemu.git s390-next
+T: git git://github.com/borntraeger/qemu.git s390-next
+
+virtio-input
+M: Gerd Hoffmann <kraxel@redhat.com>
+S: Maintained
+F: hw/input/virtio-input*.c
+F: include/hw/virtio/virtio-input.h
virtio-serial
M: Amit Shah <amit.shah@redhat.com>
S: Supported
F: hw/char/virtio-serial-bus.c
F: hw/char/virtio-console.c
+F: include/hw/virtio/virtio-serial.h
+
+virtio-rng
+M: Amit Shah <amit.shah@redhat.com>
+S: Supported
+F: hw/virtio/virtio-rng.c
+F: include/hw/virtio/virtio-rng.h
+F: include/sysemu/rng*.h
+F: backends/rng*.c
nvme
M: Keith Busch <keith.busch@intel.com>
+L: qemu-block@nongnu.org
S: Supported
F: hw/block/nvme*
F: tests/nvme-test.c
megasas
M: Hannes Reinecke <hare@suse.de>
+L: qemu-block@nongnu.org
S: Supported
F: hw/scsi/megasas.c
F: hw/scsi/mfi.h
-Xilinx EDK
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
-M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+Network packet abstractions
+M: Dmitry Fleytman <dmitry@daynix.com>
S: Maintained
-F: hw/*/xilinx_*
-F: include/hw/xilinx.h
+F: include/net/eth.h
+F: net/eth.c
+F: hw/net/net_rx_pkt*
+F: hw/net/net_tx_pkt*
Vmware
M: Dmitry Fleytman <dmitry@daynix.com>
@@ -722,10 +962,31 @@
F: hw/net/vmxnet*
F: hw/scsi/vmw_pvscsi*
+Rocker
+M: Jiri Pirko <jiri@resnulli.us>
+S: Maintained
+F: hw/net/rocker/
+
+NVDIMM
+M: Xiao Guangrong <guangrong.xiao@linux.intel.com>
+S: Maintained
+F: hw/acpi/nvdimm.c
+F: hw/mem/nvdimm.c
+F: include/hw/mem/nvdimm.h
+
+e1000x
+M: Dmitry Fleytman <dmitry@daynix.com>
+S: Maintained
+F: hw/net/e1000x*
+
+e1000e
+M: Dmitry Fleytman <dmitry@daynix.com>
+S: Maintained
+F: hw/net/e1000e*
+
Subsystems
----------
Audio
-M: Vassili Karpov (malc) <av1474@comtv.ru>
M: Gerd Hoffmann <kraxel@redhat.com>
S: Maintained
F: audio/
@@ -734,24 +995,53 @@
F: tests/es1370-test.c
F: tests/intel-hda-test.c
-Block
+Block layer core
M: Kevin Wolf <kwolf@redhat.com>
-M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Max Reitz <mreitz@redhat.com>
+L: qemu-block@nongnu.org
S: Supported
-F: async.c
-F: aio-*.c
F: block*
F: block/
F: hw/block/
+F: include/block/
F: qemu-img*
F: qemu-io*
-F: tests/image-fuzzer/
F: tests/qemu-iotests/
T: git git://repo.or.cz/qemu/kevin.git block
+
+Block I/O path
+M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Fam Zheng <famz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: async.c
+F: aio-*.c
+F: block/io.c
+F: migration/block*
+F: include/block/aio.h
T: git git://github.com/stefanha/qemu.git block
-Character Devices
-M: Anthony Liguori <aliguori@amazon.com>
+Block Jobs
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: blockjob.c
+F: include/block/blockjob.h
+F: block/backup.c
+F: block/commit.c
+F: block/stream.c
+F: block/mirror.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
+
+Block QAPI, monitor, command line
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: blockdev.c
+F: block/qapi.c
+F: qapi/block*.json
+T: git git://repo.or.cz/qemu/armbru.git block-next
+
+Character device backends
M: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: qemu-char.c
@@ -763,12 +1053,16 @@
S: Maintained
F: backends/baum.c
+Coverity model
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: scripts/coverity-model.c
+
CPU
-M: Andreas Färber <afaerber@suse.de>
+L: qemu-devel@nongnu.org
S: Supported
F: qom/cpu.c
F: include/qom/cpu.h
-F: target-i386/cpu.c
ICC Bus
M: Igor Mammedov <imammedo@redhat.com>
@@ -777,10 +1071,19 @@
F: hw/cpu/icc_bus.c
Device Tree
-M: Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+M: Peter Crosthwaite <crosthwaite.peter@gmail.com>
M: Alexander Graf <agraf@suse.de>
S: Maintained
-F: device_tree.[ch]
+F: device_tree.c
+F: include/sysemu/device_tree.h
+
+Error reporting
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
+F: include/qapi/error.h
+F: include/qemu/error-report.h
+F: util/error.c
+F: util/qemu-error.c
GDB stub
L: qemu-devel@nongnu.org
@@ -794,6 +1097,7 @@
F: include/exec/ioport.h
F: ioport.c
F: include/exec/memory.h
+F: include/exec/ram_addr.h
F: memory.c
F: include/exec/memory-internal.h
F: exec.c
@@ -802,24 +1106,23 @@
M: Gerd Hoffmann <kraxel@redhat.com>
S: Supported
F: include/ui/qemu-spice.h
+F: include/ui/spice-display.h
F: ui/spice-*.c
F: audio/spiceaudio.c
F: hw/display/qxl*
Graphics
-M: Anthony Liguori <aliguori@amazon.com>
M: Gerd Hoffmann <kraxel@redhat.com>
S: Odd Fixes
F: ui/
+F: include/ui/
Cocoa graphics
-M: Andreas Färber <andreas.faerber@web.de>
M: Peter Maydell <peter.maydell@linaro.org>
S: Odd Fixes
F: ui/cocoa.m
Main loop
-M: Anthony Liguori <aliguori@amazon.com>
M: Paolo Bonzini <pbonzini@redhat.com>
S: Maintained
F: cpus.c
@@ -835,12 +1138,12 @@
F: hmp-commands.hx
T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
-Network device layer
-M: Anthony Liguori <aliguori@amazon.com>
-M: Stefan Hajnoczi <stefanha@redhat.com>
+Network device backends
+M: Jason Wang <jasowang@redhat.com>
S: Maintained
F: net/
-T: git git://github.com/stefanha/qemu.git net
+F: include/net/
+T: git git://github.com/jasowang/qemu.git net
Netmap network backend
M: Luigi Rizzo <rizzo@iet.unipi.it>
@@ -853,31 +1156,62 @@
Network Block Device (NBD)
M: Paolo Bonzini <pbonzini@redhat.com>
S: Odd Fixes
-F: block/nbd.c
-F: nbd.*
+F: block/nbd*
+F: nbd/
+F: include/block/nbd*
F: qemu-nbd.c
T: git git://github.com/bonzini/qemu.git nbd-next
-QAPI
-M: Luiz Capitulino <lcapitulino@redhat.com>
-M: Michael Roth <mdroth@linux.vnet.ibm.com>
+NUMA
+M: Eduardo Habkost <ehabkost@redhat.com>
S: Maintained
+F: numa.c
+F: include/sysemu/numa.h
+T: git git://github.com/ehabkost/qemu.git numa
+
+Host Memory Backends
+M: Eduardo Habkost <ehabkost@redhat.com>
+M: Igor Mammedov <imammedo@redhat.com>
+S: Maintained
+F: backends/hostmem*.c
+F: include/sysemu/hostmem.h
+
+QAPI
+M: Markus Armbruster <armbru@redhat.com>
+M: Michael Roth <mdroth@linux.vnet.ibm.com>
+S: Supported
F: qapi/
+X: qapi/*.json
+F: include/qapi/
+X: include/qapi/qmp/
+F: include/qapi/qmp/dispatch.h
F: tests/qapi-schema/
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
+F: tests/test-*-visitor.c
+F: tests/test-qmp-*.c
+F: scripts/qapi*
+F: docs/qapi*
+T: git git://repo.or.cz/qemu/armbru.git qapi-next
QAPI Schema
M: Eric Blake <eblake@redhat.com>
-M: Luiz Capitulino <lcapitulino@redhat.com>
M: Markus Armbruster <armbru@redhat.com>
S: Supported
F: qapi-schema.json
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
+F: qapi/*.json
+T: git git://repo.or.cz/qemu/armbru.git qapi-next
QObject
M: Luiz Capitulino <lcapitulino@redhat.com>
S: Maintained
F: qobject/
+F: include/qapi/qmp/
+X: include/qapi/qmp/dispatch.h
+F: tests/check-qdict.c
+F: tests/check-qfloat.c
+F: tests/check-qint.c
+F: tests/check-qjson.c
+F: tests/check-qlist.c
+F: tests/check-qstring.c
T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
QEMU Guest Agent
@@ -887,7 +1221,6 @@
T: git git://github.com/mdroth/qemu.git qga
QOM
-M: Anthony Liguori <aliguori@amazon.com>
M: Andreas Färber <afaerber@suse.de>
S: Supported
T: git git://github.com/afaerber/qemu-cpu.git qom-next
@@ -895,21 +1228,33 @@
X: include/qom/cpu.h
F: qom/
X: qom/cpu.c
+F: tests/check-qom-interface.c
+F: tests/check-qom-proplist.c
F: tests/qom-test.c
QMP
-M: Luiz Capitulino <lcapitulino@redhat.com>
-S: Maintained
+M: Markus Armbruster <armbru@redhat.com>
+S: Supported
F: qmp.c
F: monitor.c
F: qmp-commands.hx
-F: QMP/
-T: git git://repo.or.cz/qemu/qmp-unstable.git queue/qmp
+F: docs/*qmp-*
+F: scripts/qmp/
+T: git git://repo.or.cz/qemu/armbru.git qapi-next
+
+Register API
+M: Alistair Francis <alistair.francis@xilinx.com>
+S: Maintained
+F: hw/core/register.c
+F: include/hw/register.h
SLIRP
+M: Samuel Thibault <samuel.thibault@ens-lyon.org>
M: Jan Kiszka <jan.kiszka@siemens.com>
S: Maintained
F: slirp/
+F: net/slirp.c
+F: include/net/slirp.h
T: git git://git.kiszka.org/qemu.git queues/slirp
Tracing
@@ -922,18 +1267,18 @@
T: git git://github.com/stefanha/qemu.git tracing
Checkpatch
-M: Blue Swirl <blauwirbel@gmail.com>
S: Odd Fixes
F: scripts/checkpatch.pl
Migration
M: Juan Quintela <quintela@redhat.com>
+M: Amit Shah <amit.shah@redhat.com>
S: Maintained
F: include/migration/
-F: migration*
-F: savevm.c
-F: arch_init.c
-F: vmstate.c
+F: migration/
+F: scripts/vmstate-static-checker.py
+F: tests/vmstate-static-checker-data/
+F: docs/migration.txt
Seccomp
M: Eduardo Otubo <eduardo.otubo@profitbricks.com>
@@ -941,6 +1286,50 @@
F: qemu-seccomp.c
F: include/sysemu/seccomp.h
+Cryptography
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Maintained
+F: crypto/
+F: include/crypto/
+F: tests/test-crypto-*
+
+Coroutines
+M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Kevin Wolf <kwolf@redhat.com>
+F: util/*coroutine*
+F: include/qemu/coroutine*
+F: tests/test-coroutine.c
+
+Buffers
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd fixes
+F: util/buffer.c
+F: include/qemu/buffer.h
+
+I/O Channels
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Maintained
+F: io/
+F: include/io/
+F: tests/test-io-*
+
+Sockets
+M: Daniel P. Berrange <berrange@redhat.com>
+M: Gerd Hoffmann <kraxel@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: include/qemu/sockets.h
+F: util/qemu-sockets.c
+
+Throttling infrastructure
+M: Alberto Garcia <berto@igalia.com>
+S: Supported
+F: block/throttle-groups.c
+F: include/block/throttle-groups.h
+F: include/qemu/throttle.h
+F: util/throttle.c
+L: qemu-block@nongnu.org
+
Usermode Emulation
------------------
Overall
@@ -950,8 +1339,7 @@
F: user-exec.c
BSD user
-M: Blue Swirl <blauwirbel@gmail.com>
-S: Maintained
+S: Orphan
F: bsd-user/
Linux user
@@ -970,54 +1358,60 @@
M: Claudio Fontana <claudio.fontana@huawei.com>
M: Claudio Fontana <claudio.fontana@gmail.com>
S: Maintained
+L: qemu-arm@nongnu.org
F: tcg/aarch64/
+F: disas/arm-a64.cc
+F: disas/libvixl/
ARM target
M: Andrzej Zaborowski <balrogg@gmail.com>
S: Maintained
+L: qemu-arm@nongnu.org
F: tcg/arm/
+F: disas/arm.c
i386 target
L: qemu-devel@nongnu.org
S: Maintained
F: tcg/i386/
+F: disas/i386.c
IA64 target
M: Aurelien Jarno <aurelien@aurel32.net>
S: Maintained
F: tcg/ia64/
+F: disas/ia64.c
MIPS target
M: Aurelien Jarno <aurelien@aurel32.net>
S: Maintained
F: tcg/mips/
+F: disas/mips.c
PPC
M: Vassili Karpov (malc) <av1474@comtv.ru>
S: Maintained
F: tcg/ppc/
-
-PPC64 target
-M: Vassili Karpov (malc) <av1474@comtv.ru>
-S: Maintained
-F: tcg/ppc64/
+F: disas/ppc.c
S390 target
M: Alexander Graf <agraf@suse.de>
M: Richard Henderson <rth@twiddle.net>
S: Maintained
F: tcg/s390/
+F: disas/s390.c
SPARC target
-M: Blue Swirl <blauwirbel@gmail.com>
-S: Maintained
+S: Odd Fixes
F: tcg/sparc/
+F: disas/sparc.c
TCI target
M: Stefan Weil <sw@weilnetz.de>
S: Maintained
F: tcg/tci/
F: tci.c
+F: disas/tci.c
Stable branches
---------------
@@ -1028,9 +1422,8 @@
Stable 0.15
L: qemu-stable@nongnu.org
-M: Andreas Färber <afaerber@suse.de>
T: git git://git.qemu-project.org/qemu-stable-0.15.git
-S: Supported
+S: Orphan
Stable 0.14
L: qemu-stable@nongnu.org
@@ -1046,28 +1439,38 @@
-------------
VMDK
M: Fam Zheng <famz@redhat.com>
+L: qemu-block@nongnu.org
S: Supported
F: block/vmdk.c
RBD
-M: Josh Durgin <josh.durgin@inktank.com>
+M: Josh Durgin <jdurgin@redhat.com>
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
S: Supported
F: block/rbd.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
Sheepdog
M: Hitoshi Mitake <mitake.hitoshi@lab.ntt.co.jp>
M: Liu Yuan <namei.unix@gmail.com>
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
L: sheepdog@lists.wpkg.org
S: Supported
F: block/sheepdog.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
VHDX
M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
S: Supported
F: block/vhdx*
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
VDI
M: Stefan Weil <sw@weilnetz.de>
+L: qemu-block@nongnu.org
S: Maintained
F: block/vdi.c
@@ -1075,26 +1478,169 @@
M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
M: Paolo Bonzini <pbonzini@redhat.com>
M: Peter Lieven <pl@kamp.de>
+L: qemu-block@nongnu.org
S: Supported
F: block/iscsi.c
NFS
+M: Jeff Cody <jcody@redhat.com>
M: Peter Lieven <pl@kamp.de>
+L: qemu-block@nongnu.org
S: Maintained
F: block/nfs.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
SSH
M: Richard W.M. Jones <rjones@redhat.com>
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
S: Supported
F: block/ssh.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
ARCHIPELAGO
-M: Chrysostomos Nanakos <cnanakos@grnet.gr>
M: Chrysostomos Nanakos <chris@include.gr>
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
S: Maintained
F: block/archipelago.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
+
+CURL
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/curl.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
+
+GLUSTER
+M: Jeff Cody <jcody@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/gluster.c
+T: git git://github.com/codyprime/qemu-kvm-jtc.git block
+
+Null Block Driver
+M: Fam Zheng <famz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/null.c
Bootdevice
M: Gonglei <arei.gonglei@huawei.com>
S: Maintained
F: bootdevice.c
+
+Quorum
+M: Alberto Garcia <berto@igalia.com>
+S: Supported
+F: block/quorum.c
+L: qemu-block@nongnu.org
+
+blkverify
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/blkverify.c
+
+bochs
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/bochs.c
+
+cloop
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/cloop.c
+
+dmg
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/dmg.c
+
+parallels
+M: Stefan Hajnoczi <stefanha@redhat.com>
+M: Denis V. Lunev <den@openvz.org>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/parallels.c
+F: docs/specs/parallels.txt
+
+qed
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/qed.c
+
+raw
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/linux-aio.c
+F: block/raw-aio.h
+F: block/raw-posix.c
+F: block/raw-win32.c
+F: block/raw_bsd.c
+F: block/win32-aio.c
+
+qcow2
+M: Kevin Wolf <kwolf@redhat.com>
+M: Max Reitz <mreitz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/qcow2*
+
+qcow
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/qcow.c
+
+blkdebug
+M: Kevin Wolf <kwolf@redhat.com>
+M: Max Reitz <mreitz@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/blkdebug.c
+
+vpc
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/vpc.c
+
+vvfat
+M: Kevin Wolf <kwolf@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: block/vvfat.c
+
+Image format fuzzer
+M: Stefan Hajnoczi <stefanha@redhat.com>
+L: qemu-block@nongnu.org
+S: Supported
+F: tests/image-fuzzer/
+
+Build and test automation
+-------------------------
+M: Alex Bennée <alex.bennee@linaro.org>
+L: qemu-devel@nongnu.org
+S: Supported
+F: .travis.yml
+
+Documentation
+-------------
+Build system architecture
+M: Daniel P. Berrange <berrange@redhat.com>
+S: Odd Fixes
+F: docs/build-system.txt
+
+Docker testing
+--------------
+Docker based testing framework and cases
+M: Fam Zheng <famz@redhat.com>
+S: Maintained
+F: tests/docker/
diff --git a/Makefile b/Makefile
index f505202..50b4b3a 100644
--- a/Makefile
+++ b/Makefile
@@ -3,6 +3,11 @@
# Always point to the root of the build tree (needs GNU make).
BUILD_DIR=$(CURDIR)
+# Before including a proper config-host.mak, assume we are in the source tree
+SRC_PATH=.
+
+UNCHECKED_GOALS := %clean TAGS cscope ctags docker docker-%
+
# All following code might depend on configuration variables
ifneq ($(wildcard config-host.mak),)
# Put the all: rule here so that config-host.mak can contain dependencies.
@@ -25,8 +30,7 @@
-include config-all-devices.mak
-include config-all-disas.mak
-include $(SRC_PATH)/rules.mak
-config-host.mak: $(SRC_PATH)/configure
+config-host.mak: $(SRC_PATH)/configure $(SRC_PATH)/pc-bios
@echo $@ is out-of-date, running configure
@# TODO: The next lines include code which supports a smooth
@# transition from old configurations without config.status.
@@ -38,15 +42,19 @@
fi
else
config-host.mak:
-ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
+ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
@echo "Please call configure before running make!"
@exit 1
endif
endif
-GENERATED_HEADERS = config-host.h qemu-options.def
+include $(SRC_PATH)/rules.mak
+
+GENERATED_HEADERS = qemu-version.h config-host.h qemu-options.def
GENERATED_HEADERS += qmp-commands.h qapi-types.h qapi-visit.h qapi-event.h
GENERATED_SOURCES += qmp-marshal.c qapi-types.c qapi-visit.c qapi-event.c
+GENERATED_HEADERS += qmp-introspect.h
+GENERATED_SOURCES += qmp-introspect.c
GENERATED_HEADERS += trace/generated-events.h
GENERATED_SOURCES += trace/generated-events.c
@@ -74,7 +82,7 @@
configure: ;
.PHONY: all clean cscope distclean dvi html info install install-doc \
- pdf recurse-all speed test dist
+ pdf recurse-all speed test dist msi FORCE
$(call set-vpath, $(SRC_PATH))
@@ -83,7 +91,8 @@
HELPERS-$(CONFIG_LINUX) = qemu-bridge-helper$(EXESUF)
ifdef BUILD_DOCS
-DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qmp-commands.txt
+DOCS=qemu-doc.html qemu-tech.html qemu.1 qemu-img.1 qemu-nbd.8 qemu-ga.8
+DOCS+=qmp-commands.txt
ifdef CONFIG_VIRTFS
DOCS+=fsdev/virtfs-proxy-helper.1
endif
@@ -108,9 +117,10 @@
-include $(SUBDIR_DEVICES_MAK_DEP)
-%/config-devices.mak: default-configs/%.mak
- $(call quiet-command,$(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $@ $<, " GEN $@")
- @if test -f $@; then \
+%/config-devices.mak: default-configs/%.mak $(SRC_PATH)/scripts/make_device_config.sh
+ $(call quiet-command, \
+ $(SHELL) $(SRC_PATH)/scripts/make_device_config.sh $< $*-config-devices.mak.d $@ > $@.tmp, " GEN $@.tmp")
+ $(call quiet-command, if test -f $@; then \
if cmp -s $@.old $@; then \
mv $@.tmp $@; \
cp -p $@ $@.old; \
@@ -126,7 +136,7 @@
else \
mv $@.tmp $@; \
cp -p $@ $@.old; \
- fi
+ fi, " GEN $@");
defconfig:
rm -f config-all-devices.mak $(SUBDIR_DEVICES_MAK)
@@ -139,30 +149,55 @@
stub-obj-y \
util-obj-y \
qga-obj-y \
+ ivshmem-client-obj-y \
+ ivshmem-server-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
+ crypto-obj-y \
+ crypto-aes-obj-y \
+ qom-obj-y \
+ io-obj-y \
common-obj-y \
common-obj-m)
ifneq ($(wildcard config-host.mak),)
-include $(SRC_PATH)/tests/Makefile
-endif
-ifeq ($(CONFIG_SMARTCARD_NSS),y)
-include $(SRC_PATH)/libcacard/Makefile
+include $(SRC_PATH)/tests/Makefile.include
endif
all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all modules
+qemu-version.h: FORCE
+ $(call quiet-command, \
+ (cd $(SRC_PATH); \
+ printf '#define QEMU_PKGVERSION '; \
+ if test -n "$(PKGVERSION)"; then \
+ printf '"$(PKGVERSION)"\n'; \
+ else \
+ if test -d .git; then \
+ printf '" ('; \
+ git describe --match 'v*' 2>/dev/null | tr -d '\n'; \
+ if ! git diff-index --quiet HEAD &>/dev/null; then \
+ printf -- '-dirty'; \
+ fi; \
+ printf ')"\n'; \
+ else \
+ printf '""\n'; \
+ fi; \
+ fi) > $@.tmp)
+ $(call quiet-command, cmp -s $@ $@.tmp || mv $@.tmp $@)
+
config-host.h: config-host.h-timestamp
config-host.h-timestamp: config-host.mak
-qemu-options.def: $(SRC_PATH)/qemu-options.hx
+qemu-options.def: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $@")
SUBDIR_RULES=$(patsubst %,subdir-%, $(TARGET_DIRS))
SOFTMMU_SUBDIR_RULES=$(filter %-softmmu,$(SUBDIR_RULES))
$(SOFTMMU_SUBDIR_RULES): $(block-obj-y)
+$(SOFTMMU_SUBDIR_RULES): $(crypto-obj-y)
+$(SOFTMMU_SUBDIR_RULES): $(io-obj-y)
$(SOFTMMU_SUBDIR_RULES): config-all-devices.mak
subdir-%:
@@ -187,19 +222,20 @@
dtc/%:
mkdir -p $@
-$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y)
+$(SUBDIR_RULES): libqemuutil.a libqemustub.a $(common-obj-y) $(qom-obj-y) $(crypto-aes-obj-$(CONFIG_USER_ONLY))
ROMSUBDIR_RULES=$(patsubst %,romsubdir-%, $(ROMS))
+# Only keep -O and -g cflags
romsubdir-%:
- $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/",)
+ $(call quiet-command,$(MAKE) $(SUBDIR_MAKEFLAGS) -C pc-bios/$* V="$(V)" TARGET_DIR="$*/" CFLAGS="$(filter -O% -g%,$(CFLAGS))",)
ALL_SUBDIRS=$(TARGET_DIRS) $(patsubst %,pc-bios/%, $(ROMS))
recurse-all: $(SUBDIR_RULES) $(ROMSUBDIR_RULES)
-$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h | $(BUILD_DIR)/version.lo
+$(BUILD_DIR)/version.o: $(SRC_PATH)/version.rc config-host.h | $(BUILD_DIR)/version.lo
$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<," RC version.o")
-$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc $(BUILD_DIR)/config-host.h
+$(BUILD_DIR)/version.lo: $(SRC_PATH)/version.rc config-host.h
$(call quiet-command,$(WINDRES) -I$(BUILD_DIR) -o $@ $<," RC version.lo")
Makefile: $(version-obj-y) $(version-lobj-y)
@@ -217,16 +253,16 @@
qemu-img.o: qemu-img-cmds.h
-qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
-qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
-qemu-io$(EXESUF): qemu-io.o $(block-obj-y) libqemuutil.a libqemustub.a
+qemu-img$(EXESUF): qemu-img.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
+qemu-io$(EXESUF): qemu-io.o $(block-obj-y) $(crypto-obj-y) $(io-obj-y) $(qom-obj-y) libqemuutil.a libqemustub.a
-qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
+qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o libqemuutil.a libqemustub.a
-fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/virtio-9p-marshal.o libqemuutil.a libqemustub.a
+fsdev/virtfs-proxy-helper$(EXESUF): fsdev/virtfs-proxy-helper.o fsdev/9p-marshal.o fsdev/9p-iov-marshal.o libqemuutil.a libqemustub.a
fsdev/virtfs-proxy-helper$(EXESUF): LIBS += -lcap
-qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx
+qemu-img-cmds.h: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $@")
qemu-ga$(EXESUF): LIBS = $(LIBS_QGA)
@@ -239,42 +275,49 @@
qga/qapi-generated/qga-qapi-types.c qga/qapi-generated/qga-qapi-types.h :\
$(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
- $(gen-out-type) -o qga/qapi-generated -p "qga-" -i $<, \
+ $(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
" GEN $@")
qga/qapi-generated/qga-qapi-visit.c qga/qapi-generated/qga-qapi-visit.h :\
$(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
- $(gen-out-type) -o qga/qapi-generated -p "qga-" -i $<, \
+ $(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
" GEN $@")
qga/qapi-generated/qga-qmp-commands.h qga/qapi-generated/qga-qmp-marshal.c :\
$(SRC_PATH)/qga/qapi-schema.json $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
- $(gen-out-type) -o qga/qapi-generated -p "qga-" -i $<, \
+ $(gen-out-type) -o qga/qapi-generated -p "qga-" $<, \
" GEN $@")
qapi-modules = $(SRC_PATH)/qapi-schema.json $(SRC_PATH)/qapi/common.json \
$(SRC_PATH)/qapi/block.json $(SRC_PATH)/qapi/block-core.json \
- $(SRC_PATH)/qapi/event.json
+ $(SRC_PATH)/qapi/event.json $(SRC_PATH)/qapi/introspect.json \
+ $(SRC_PATH)/qapi/crypto.json $(SRC_PATH)/qapi/rocker.json \
+ $(SRC_PATH)/qapi/trace.json
qapi-types.c qapi-types.h :\
$(qapi-modules) $(SRC_PATH)/scripts/qapi-types.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-types.py \
- $(gen-out-type) -o "." -b -i $<, \
+ $(gen-out-type) -o "." -b $<, \
" GEN $@")
qapi-visit.c qapi-visit.h :\
$(qapi-modules) $(SRC_PATH)/scripts/qapi-visit.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-visit.py \
- $(gen-out-type) -o "." -b -i $<, \
+ $(gen-out-type) -o "." -b $<, \
" GEN $@")
qapi-event.c qapi-event.h :\
$(qapi-modules) $(SRC_PATH)/scripts/qapi-event.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-event.py \
- $(gen-out-type) -o "." -b -i $<, \
+ $(gen-out-type) -o "." $<, \
" GEN $@")
qmp-commands.h qmp-marshal.c :\
$(qapi-modules) $(SRC_PATH)/scripts/qapi-commands.py $(qapi-py)
$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-commands.py \
- $(gen-out-type) -o "." -m -i $<, \
+ $(gen-out-type) -o "." -m $<, \
+ " GEN $@")
+qmp-introspect.h qmp-introspect.c :\
+$(qapi-modules) $(SRC_PATH)/scripts/qapi-introspect.py $(qapi-py)
+ $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/qapi-introspect.py \
+ $(gen-out-type) -o "." $<, \
" GEN $@")
QGALIB_GEN=$(addprefix qga/qapi-generated/, qga-qapi-types.h qga-qapi-visit.h qga-qmp-commands.h)
@@ -283,15 +326,44 @@
qemu-ga$(EXESUF): $(qga-obj-y) libqemuutil.a libqemustub.a
$(call LINK, $^)
+ifdef QEMU_GA_MSI_ENABLED
+QEMU_GA_MSI=qemu-ga-$(ARCH).msi
+
+msi: $(QEMU_GA_MSI)
+
+$(QEMU_GA_MSI): qemu-ga.exe $(QGA_VSS_PROVIDER)
+
+$(QEMU_GA_MSI): config-host.mak
+
+$(QEMU_GA_MSI): $(SRC_PATH)/qga/installer/qemu-ga.wxs
+ $(call quiet-command,QEMU_GA_VERSION="$(QEMU_GA_VERSION)" QEMU_GA_MANUFACTURER="$(QEMU_GA_MANUFACTURER)" QEMU_GA_DISTRO="$(QEMU_GA_DISTRO)" BUILD_DIR="$(BUILD_DIR)" \
+ wixl -o $@ $(QEMU_GA_MSI_ARCH) $(QEMU_GA_MSI_WITH_VSS) $(QEMU_GA_MSI_MINGW_DLL_PATH) $<, " WIXL $@")
+else
+msi:
+ @echo "MSI build not configured or dependency resolution failed (reconfigure with --enable-guest-agent-msi option)"
+endif
+
+ifneq ($(EXESUF),)
+.PHONY: qemu-ga
+qemu-ga: qemu-ga$(EXESUF) $(QGA_VSS_PROVIDER) $(QEMU_GA_MSI)
+endif
+
+ivshmem-client$(EXESUF): $(ivshmem-client-obj-y) libqemuutil.a libqemustub.a
+ $(call LINK, $^)
+ivshmem-server$(EXESUF): $(ivshmem-server-obj-y) libqemuutil.a libqemustub.a
+ $(call LINK, $^)
+
clean:
# avoid old build problems by removing potentially incorrect old files
rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h gen-op-arm.h
rm -f qemu-options.def
+ rm -f *.msi
find . \( -name '*.l[oa]' -o -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name '*.[oda]' \) -type f -exec rm {} +
rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* *.pod *~ */*~
rm -f fsdev/*.pod
rm -rf .libs */.libs
rm -f qemu-img-cmds.h
+ rm -f ui/shader/*-vert.h ui/shader/*-frag.h
@# May not be present in GENERATED_HEADERS
rm -f trace/generated-tracers-dtrace.dtrace*
rm -f trace/generated-tracers-dtrace.h*
@@ -303,6 +375,7 @@
if test -d $$d; then $(MAKE) -C $$d $@ || exit 1; fi; \
rm -f $$d/qemu-options.def; \
done
+ rm -f $(SUBDIR_DEVICES_MAK) config-all-devices.mak
VERSION ?= $(shell cat VERSION)
@@ -312,9 +385,9 @@
$(SRC_PATH)/scripts/make-release "$(SRC_PATH)" "$(patsubst qemu-%.tar.bz2,%,$@)"
distclean: clean
- rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi
- rm -f config-all-devices.mak config-all-disas.mak
- rm -f po/*.mo
+ rm -f config-host.mak config-host.h* config-host.ld $(DOCS) qemu-options.texi qemu-img-cmds.texi qemu-monitor.texi qemu-monitor-info.texi
+ rm -f config-all-devices.mak config-all-disas.mak config.status
+ rm -f po/*.mo tests/qemu-iotests/common.env
rm -f roms/seabios/config.mak roms/vgabios/config.mak
rm -f qemu-doc.info qemu-doc.aux qemu-doc.cp qemu-doc.cps qemu-doc.dvi
rm -f qemu-doc.fn qemu-doc.fns qemu-doc.info qemu-doc.ky qemu-doc.kys
@@ -327,8 +400,8 @@
rm -rf $$d || exit 1 ; \
done
rm -Rf .sdk
- if test -f pixman/config.log; then make -C pixman distclean; fi
- if test -f dtc/version_gen.h; then make $(DTC_MAKE_ARGS) clean; fi
+ if test -f pixman/config.log; then $(MAKE) -C pixman distclean; fi
+ if test -f dtc/version_gen.h; then $(MAKE) $(DTC_MAKE_ARGS) clean; fi
KEYMAPS=da en-gb et fr fr-ch is lt modifiers no pt-br sv \
ar de en-us fi fr-be hr it lv nl pl ru th \
@@ -337,17 +410,17 @@
ifdef INSTALL_BLOBS
BLOBS=bios.bin bios-256k.bin sgabios.bin vgabios.bin vgabios-cirrus.bin \
-vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin \
-acpi-dsdt.aml q35-acpi-dsdt.aml \
+vgabios-stdvga.bin vgabios-vmware.bin vgabios-qxl.bin vgabios-virtio.bin \
+acpi-dsdt.aml \
ppc_rom.bin openbios-sparc32 openbios-sparc64 openbios-ppc QEMU,tcx.bin QEMU,cgthree.bin \
pxe-e1000.rom pxe-eepro100.rom pxe-ne2k_pci.rom \
pxe-pcnet.rom pxe-rtl8139.rom pxe-virtio.rom \
efi-e1000.rom efi-eepro100.rom efi-ne2k_pci.rom \
efi-pcnet.rom efi-rtl8139.rom efi-virtio.rom \
+efi-e1000e.rom efi-vmxnet3.rom \
qemu-icon.bmp qemu_logo_no_text.svg \
bamboo.dtb petalogix-s3adsp1800.dtb petalogix-ml605.dtb \
-multiboot.bin linuxboot.bin kvmvapic.bin \
-s390-zipl.rom \
+multiboot.bin linuxboot.bin linuxboot_dma.bin kvmvapic.bin \
s390-ccw.img \
spapr-rtas.bin slof.bin \
palcode-clipper \
@@ -368,6 +441,9 @@
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man8"
$(INSTALL_DATA) qemu-nbd.8 "$(DESTDIR)$(mandir)/man8"
endif
+ifneq (,$(findstring qemu-ga,$(TOOLS)))
+ $(INSTALL_DATA) qemu-ga.8 "$(DESTDIR)$(mandir)/man8"
+endif
endif
ifdef CONFIG_VIRTFS
$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
@@ -384,16 +460,11 @@
endif
endif
-install-confdir:
- $(INSTALL_DIR) "$(DESTDIR)$(qemu_confdir)"
-install-sysconfig: install-datadir install-confdir
- $(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
-
-install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig \
+install: all $(if $(BUILD_DOCS),install-doc) \
install-datadir install-localstatedir
ifneq ($(TOOLS),)
- $(call install-prog,$(TOOLS),$(DESTDIR)$(bindir))
+ $(call install-prog,$(subst qemu-ga,qemu-ga$(EXESUF),$(TOOLS)),$(DESTDIR)$(bindir))
endif
ifneq ($(CONFIG_MODULES),)
$(INSTALL_DIR) "$(DESTDIR)$(qemu_moddir)"
@@ -418,7 +489,7 @@
set -e; for x in $(KEYMAPS); do \
$(INSTALL_DATA) $(SRC_PATH)/pc-bios/keymaps/$$x "$(DESTDIR)$(qemu_datadir)/keymaps"; \
done
- $(INSTALL_DATA) $(SRC_PATH)/trace-events "$(DESTDIR)$(qemu_datadir)/trace-events"
+ $(INSTALL_DATA) $(BUILD_DIR)/trace-events-all "$(DESTDIR)$(qemu_datadir)/trace-events-all"
for d in $(TARGET_DIRS); do \
$(MAKE) $(SUBDIR_MAKEFLAGS) TARGET_DIR=$$d/ -C $$d $@ || exit 1 ; \
done
@@ -427,15 +498,36 @@
test speed: all
$(MAKE) -C tests/tcg $@
+.PHONY: ctags
+ctags:
+ rm -f tags
+ find "$(SRC_PATH)" -name '*.[hc]' -exec ctags --append {} +
+
.PHONY: TAGS
TAGS:
- rm -f $@
+ rm -f TAGS
find "$(SRC_PATH)" -name '*.[hc]' -exec etags --append {} +
cscope:
- rm -f ./cscope.*
- find "$(SRC_PATH)" -name "*.[chsS]" -print | sed 's,^\./,,' > ./cscope.files
- cscope -b
+ rm -f "$(SRC_PATH)"/cscope.*
+ find "$(SRC_PATH)/" -name "*.[chsS]" -print | sed 's,^\./,,' > "$(SRC_PATH)/cscope.files"
+ cscope -b -i"$(SRC_PATH)/cscope.files"
+
+# opengl shader programs
+ui/shader/%-vert.h: $(SRC_PATH)/ui/shader/%.vert $(SRC_PATH)/scripts/shaderinclude.pl
+ @mkdir -p $(dir $@)
+ $(call quiet-command,\
+ perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
+ " VERT $@")
+
+ui/shader/%-frag.h: $(SRC_PATH)/ui/shader/%.frag $(SRC_PATH)/scripts/shaderinclude.pl
+ @mkdir -p $(dir $@)
+ $(call quiet-command,\
+ perl $(SRC_PATH)/scripts/shaderinclude.pl $< > $@,\
+ " FRAG $@")
+
+ui/console-gl.o: $(SRC_PATH)/ui/console-gl.c \
+ ui/shader/texture-blit-vert.h ui/shader/texture-blit-frag.h
# documentation
MAKEINFO=makeinfo
@@ -454,25 +546,29 @@
%.pdf: %.texi
$(call quiet-command,texi2pdf $(TEXIFLAG) -I . $<," GEN $@")
-qemu-options.texi: $(SRC_PATH)/qemu-options.hx
+qemu-options.texi: $(SRC_PATH)/qemu-options.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@," GEN $@")
-qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx
+qemu-monitor.texi: $(SRC_PATH)/hmp-commands.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@," GEN $@")
-qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx
+qemu-monitor-info.texi: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
+ $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@," GEN $@")
+
+qmp-commands.txt: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -q < $< > $@," GEN $@")
-qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx
+qemu-img-cmds.texi: $(SRC_PATH)/qemu-img-cmds.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -t < $< > $@," GEN $@")
-qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi
+qemu.1: qemu-doc.texi qemu-options.texi qemu-monitor.texi qemu-monitor-info.texi
$(call quiet-command, \
perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu.pod && \
$(POD2MAN) --section=1 --center=" " --release=" " qemu.pod > $@, \
" GEN $@")
+qemu.1: qemu-option-trace.texi
-qemu-img.1: qemu-img.texi qemu-img-cmds.texi
+qemu-img.1: qemu-img.texi qemu-option-trace.texi qemu-img-cmds.texi
$(call quiet-command, \
perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-img.pod && \
$(POD2MAN) --section=1 --center=" " --release=" " qemu-img.pod > $@, \
@@ -484,20 +580,27 @@
$(POD2MAN) --section=1 --center=" " --release=" " fsdev/virtfs-proxy-helper.pod > $@, \
" GEN $@")
-qemu-nbd.8: qemu-nbd.texi
+qemu-nbd.8: qemu-nbd.texi qemu-option-trace.texi
$(call quiet-command, \
perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-nbd.pod && \
$(POD2MAN) --section=8 --center=" " --release=" " qemu-nbd.pod > $@, \
" GEN $@")
+qemu-ga.8: qemu-ga.texi
+ $(call quiet-command, \
+ perl -Ww -- $(SRC_PATH)/scripts/texi2pod.pl $< qemu-ga.pod && \
+ $(POD2MAN) --section=8 --center=" " --release=" " qemu-ga.pod > $@, \
+ " GEN $@")
+
dvi: qemu-doc.dvi qemu-tech.dvi
html: qemu-doc.html qemu-tech.html
info: qemu-doc.info qemu-tech.info
pdf: qemu-doc.pdf qemu-tech.pdf
qemu-doc.dvi qemu-doc.html qemu-doc.info qemu-doc.pdf: \
- qemu-img.texi qemu-nbd.texi qemu-options.texi \
- qemu-monitor.texi qemu-img-cmds.texi
+ qemu-img.texi qemu-nbd.texi qemu-options.texi qemu-option-trace.texi \
+ qemu-monitor.texi qemu-img-cmds.texi qemu-ga.texi \
+ qemu-monitor-info.texi
ifdef CONFIG_WIN32
@@ -522,7 +625,7 @@
INSTDIR=/tmp/qemu-nsis
$(INSTALLER): $(SRC_PATH)/qemu.nsi
- make install prefix=${INSTDIR}
+ $(MAKE) install prefix=${INSTDIR}
ifdef SIGNCODE
(cd ${INSTDIR}; \
for i in *.exe; do \
@@ -547,6 +650,7 @@
$(if $(DLL_PATH),-DDLLDIR="$(DLL_PATH)") \
-DSRCDIR="$(SRC_PATH)" \
-DOUTFILE="$(INSTALLER)" \
+ -DDISPLAYVERSION="$(VERSION)" \
$(SRC_PATH)/qemu.nsi
rm -r ${INSTDIR}
ifdef SIGNCODE
@@ -556,10 +660,12 @@
# Add a dependency on the generated files, so that they are always
# rebuilt before other object files
-ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
+ifneq ($(filter-out $(UNCHECKED_GOALS),$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
Makefile: $(GENERATED_HEADERS)
endif
# Include automatically generated dependency files
# Dependencies in Makefile.objs files come from our recursive subdir rules
-include $(wildcard *.d tests/*.d)
+
+include $(SRC_PATH)/tests/docker/Makefile.include
diff --git a/Makefile.objs b/Makefile.objs
index 18fd35c..297600a 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -1,37 +1,38 @@
#######################################################################
# Common libraries for tools and emulators
-stub-obj-y = stubs/
-util-obj-y = util/ qobject/ qapi/ qapi-types.o qapi-visit.o qapi-event.o
+stub-obj-y = stubs/ crypto/
+util-obj-y = util/ qobject/ qapi/
+util-obj-y += qmp-introspect.o qapi-types.o qapi-visit.o qapi-event.o
#######################################################################
# block-obj-y is code used by both qemu system emulation and qemu-img
block-obj-y = async.o thread-pool.o
-block-obj-y += nbd.o block.o blockjob.o
+block-obj-y += nbd/
+block-obj-y += block.o blockjob.o
block-obj-y += main-loop.o iohandler.o qemu-timer.o
block-obj-$(CONFIG_POSIX) += aio-posix.o
block-obj-$(CONFIG_WIN32) += aio-win32.o
block-obj-y += block/
block-obj-y += qemu-io-cmds.o
-block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
-block-obj-y += qemu-coroutine-sleep.o
-block-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
-
block-obj-m = block/
+#######################################################################
+# crypto-obj-y is code used by both qemu system emulation and qemu-img
-######################################################################
-# smartcard
+crypto-obj-y = crypto/
+crypto-aes-obj-y = crypto/
-libcacard-y += libcacard/cac.o libcacard/event.o
-libcacard-y += libcacard/vcard.o libcacard/vreader.o
-libcacard-y += libcacard/vcard_emul_nss.o
-libcacard-y += libcacard/vcard_emul_type.o
-libcacard-y += libcacard/card_7816.o
-libcacard-y += libcacard/vcardt.o
-libcacard/vcard_emul_nss.o-cflags := $(NSS_CFLAGS)
-libcacard/vcard_emul_nss.o-libs := $(NSS_LIBS)
+#######################################################################
+# qom-obj-y is code used by both qemu system emulation and qemu-img
+
+qom-obj-y = qom/
+
+#######################################################################
+# io-obj-y is code used by both qemu system emulation and qemu-img
+
+io-obj-y = io/
######################################################################
# Target independent part of system emulation. The long term path is to
@@ -48,15 +49,9 @@
common-obj-$(CONFIG_LINUX) += fsdev/
-common-obj-y += migration.o migration-tcp.o
-common-obj-y += vmstate.o
-common-obj-y += qemu-file.o qemu-file-unix.o qemu-file-stdio.o
-common-obj-$(CONFIG_RDMA) += migration-rdma.o
+common-obj-y += migration/
common-obj-y += qemu-char.o #aio.o
-common-obj-y += block-migration.o
-common-obj-y += page_cache.o xbzrle.o
-
-common-obj-$(CONFIG_POSIX) += migration-exec.o migration-unix.o migration-fd.o
+common-obj-y += page_cache.o
common-obj-$(CONFIG_SPICE) += spice-qemu-char.o
@@ -64,6 +59,8 @@
common-obj-y += hw/
common-obj-y += accel.o
+common-obj-y += replay/
+
common-obj-y += ui/
common-obj-y += bt-host.o bt-vhci.o
bt-host.o-cflags := $(BLUEZ_CFLAGS)
@@ -71,6 +68,7 @@
common-obj-y += dma-helpers.o
common-obj-y += vl.o
vl.o-cflags := $(GPROF_CFLAGS) $(SDL_CFLAGS)
+vl.o-cflags += -DALLOW_CONFIG_ANDROID
common-obj-y += tpm.o
common-obj-$(CONFIG_SLIRP) += slirp/
@@ -79,18 +77,18 @@
common-obj-$(CONFIG_SECCOMP) += qemu-seccomp.o
-common-obj-$(CONFIG_SMARTCARD_NSS) += $(libcacard-y)
+common-obj-$(CONFIG_FDT) += device_tree.o
######################################################################
# qapi
common-obj-y += qmp-marshal.o
+common-obj-y += qmp-introspect.o
common-obj-y += qmp.o hmp.o
endif
#######################################################################
# Target-independent parts used in system and user emulation
-common-obj-y += qemu-log.o
common-obj-y += tcg-runtime.o
common-obj-y += hw/
common-obj-y += qom/
@@ -113,3 +111,52 @@
# by libqemuutil.a. These should be moved to a separate .json schema.
qga-obj-y = qga/
qga-vss-dll-obj-y = qga/
+
+######################################################################
+# contrib
+ivshmem-client-obj-y = contrib/ivshmem-client/
+ivshmem-server-obj-y = contrib/ivshmem-server/
+
+
+######################################################################
+trace-events-y = trace-events
+trace-events-y += util/trace-events
+trace-events-y += crypto/trace-events
+trace-events-y += io/trace-events
+trace-events-y += migration/trace-events
+trace-events-y += block/trace-events
+trace-events-y += hw/block/trace-events
+trace-events-y += hw/char/trace-events
+trace-events-y += hw/intc/trace-events
+trace-events-y += hw/net/trace-events
+trace-events-y += hw/virtio/trace-events
+trace-events-y += hw/audio/trace-events
+trace-events-y += hw/misc/trace-events
+trace-events-y += hw/usb/trace-events
+trace-events-y += hw/scsi/trace-events
+trace-events-y += hw/nvram/trace-events
+trace-events-y += hw/display/trace-events
+trace-events-y += hw/input/trace-events
+trace-events-y += hw/timer/trace-events
+trace-events-y += hw/dma/trace-events
+trace-events-y += hw/sparc/trace-events
+trace-events-y += hw/sd/trace-events
+trace-events-y += hw/isa/trace-events
+trace-events-y += hw/i386/trace-events
+trace-events-y += hw/9pfs/trace-events
+trace-events-y += hw/ppc/trace-events
+trace-events-y += hw/pci/trace-events
+trace-events-y += hw/s390x/trace-events
+trace-events-y += hw/vfio/trace-events
+trace-events-y += hw/acpi/trace-events
+trace-events-y += hw/arm/trace-events
+trace-events-y += hw/alpha/trace-events
+trace-events-y += ui/trace-events
+trace-events-y += audio/trace-events
+trace-events-y += net/trace-events
+trace-events-y += target-i386/trace-events
+trace-events-y += target-sparc/trace-events
+trace-events-y += target-s390x/trace-events
+trace-events-y += target-ppc/trace-events
+trace-events-y += qom/trace-events
+trace-events-y += linux-user/trace-events
diff --git a/Makefile.target b/Makefile.target
index 523602b..9bd4c2e 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -1,11 +1,13 @@
# -*- Mode: makefile -*-
+BUILD_DIR?=$(CURDIR)/..
+
include ../config-host.mak
include config-target.mak
include config-devices.mak
include $(SRC_PATH)/rules.mak
-$(call set-vpath, $(SRC_PATH))
+$(call set-vpath, $(SRC_PATH):$(BUILD_DIR))
ifdef CONFIG_LINUX
QEMU_CFLAGS += -I../linux-headers
endif
@@ -46,7 +48,7 @@
TARGET_TYPE=system
endif
-$(QEMU_PROG).stp-installed: $(SRC_PATH)/trace-events
+$(QEMU_PROG).stp-installed: $(BUILD_DIR)/trace-events-all
$(call quiet-command,$(TRACETOOL) \
--format=stap \
--backends=$(TRACE_BACKENDS) \
@@ -55,7 +57,7 @@
--target-type=$(TARGET_TYPE) \
< $< > $@," GEN $(TARGET_DIR)$(QEMU_PROG).stp-installed")
-$(QEMU_PROG).stp: $(SRC_PATH)/trace-events
+$(QEMU_PROG).stp: $(BUILD_DIR)/trace-events-all
$(call quiet-command,$(TRACETOOL) \
--format=stap \
--backends=$(TRACE_BACKENDS) \
@@ -64,7 +66,7 @@
--target-type=$(TARGET_TYPE) \
< $< > $@," GEN $(TARGET_DIR)$(QEMU_PROG).stp")
-$(QEMU_PROG)-simpletrace.stp: $(SRC_PATH)/trace-events
+$(QEMU_PROG)-simpletrace.stp: $(BUILD_DIR)/trace-events-all
$(call quiet-command,$(TRACETOOL) \
--format=simpletrace-stap \
--backends=$(TRACE_BACKENDS) \
@@ -83,8 +85,11 @@
#########################################################
# cpu emulator library
obj-y = exec.o translate-all.o cpu-exec.o
+obj-y += translate-common.o
+obj-y += cpu-exec-common.o
obj-y += tcg/tcg.o tcg/tcg-op.o tcg/optimize.o
obj-$(CONFIG_TCG_INTERPRETER) += tci.o
+obj-y += tcg/tcg-common.o
obj-$(CONFIG_TCG_INTERPRETER) += disas/tci.o
obj-y += fpu/softfloat.o
obj-y += target-$(TARGET_BASE_ARCH)/
@@ -103,7 +108,9 @@
ifdef CONFIG_LINUX_USER
-QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) -I$(SRC_PATH)/linux-user
+QEMU_CFLAGS+=-I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
+ -I$(SRC_PATH)/linux-user/host/$(ARCH) \
+ -I$(SRC_PATH)/linux-user
obj-y += linux-user/
obj-y += gdbstub.o thunk.o user-exec.o
@@ -129,12 +136,12 @@
obj-y += arch_init.o cpus.o monitor.o gdbstub.o balloon.o ioport.o numa.o
obj-y += qtest.o bootdevice.o
obj-y += hw/
-obj-$(CONFIG_FDT) += device_tree.o
obj-$(CONFIG_KVM) += kvm-all.o
-obj-y += memory.o savevm.o cputlb.o
+obj-y += memory.o cputlb.o
obj-y += memory_mapping.o
obj-y += dump.o
-LIBS+=$(libs_softmmu)
+obj-y += migration/ram.o migration/savevm.o
+LIBS := $(libs_softmmu) $(LIBS)
# xen support
obj-$(CONFIG_XEN) += xen-common.o
@@ -142,14 +149,16 @@
obj-$(call lnot,$(CONFIG_XEN)) += xen-common-stub.o
obj-$(call lnot,$(CONFIG_XEN_I386)) += xen-hvm-stub.o
-# HAX support
-ifdef CONFIG_WIN32
-obj-$(CONFIG_HAX) += target-i386/hax-all.o target-i386/hax-slot.o target-i386/hax-windows.o
+# HAX support, only when targetting i386 or x86_64
+ifeq (y,$(CONFIG_HAX))
+ifneq (,$(filter i386 x86_64,$(TARGET_NAME))
+obj-y += target-i386/hax-all.o target-i386/hax-slot.o
+obj-$(CONFIG_WIN32) += target-i386/hax-windows.o
+obj-$(CONFIG_DARWIN) += target-i386/hax-darwin.o
+else
+obj-y += hax-stub.o
endif
-ifdef CONFIG_DARWIN
-obj-$(CONFIG_HAX) += target-i386/hax-all.o target-i386/hax-slot.o target-i386/hax-darwin.o
-endif
-obj-$(call lnot,$(CONFIG_HAX)) += hax-stub.o
+endif # CONFIG_HAX
# Hardware support
ifeq ($(TARGET_NAME), sparc64)
@@ -158,7 +167,7 @@
obj-y += hw/$(TARGET_BASE_ARCH)/
endif
-GENERATED_HEADERS += hmp-commands.h qmp-commands-old.h
+GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h
endif # CONFIG_SOFTMMU
@@ -177,24 +186,41 @@
dummy := $(call unnest-vars,.., \
block-obj-y \
block-obj-m \
+ crypto-obj-y \
+ crypto-aes-obj-y \
+ qom-obj-y \
+ io-obj-y \
common-obj-y \
common-obj-m)
target-obj-y := $(target-obj-y-save)
all-obj-y += $(common-obj-y)
all-obj-y += $(target-obj-y)
+all-obj-y += $(qom-obj-y)
all-obj-$(CONFIG_SOFTMMU) += $(block-obj-y)
+all-obj-$(CONFIG_USER_ONLY) += $(crypto-aes-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
+all-obj-$(CONFIG_SOFTMMU) += $(io-obj-y)
+
+$(QEMU_PROG_BUILD): config-devices.mak
# build either PROG or PROGW
$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
- $(call LINK,$^)
+ $(call LINK, $(filter-out %.mak, $^))
+ifdef CONFIG_DARWIN
+ $(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@," REZ $(TARGET_DIR)$@")
+ $(call quiet-command,SetFile -a C $@," SETFILE $(TARGET_DIR)$@")
+endif
gdbstub-xml.c: $(TARGET_XML_FILES) $(SRC_PATH)/scripts/feature_to_c.sh
$(call quiet-command,rm -f $@ && $(SHELL) $(SRC_PATH)/scripts/feature_to_c.sh $@ $(TARGET_XML_FILES)," GEN $(TARGET_DIR)$@")
-hmp-commands.h: $(SRC_PATH)/hmp-commands.hx
+hmp-commands.h: $(SRC_PATH)/hmp-commands.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $(TARGET_DIR)$@")
-qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx
+hmp-commands-info.h: $(SRC_PATH)/hmp-commands-info.hx $(SRC_PATH)/scripts/hxtool
+ $(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $(TARGET_DIR)$@")
+
+qmp-commands-old.h: $(SRC_PATH)/qmp-commands.hx $(SRC_PATH)/scripts/hxtool
$(call quiet-command,sh $(SRC_PATH)/scripts/hxtool -h < $< > $@," GEN $(TARGET_DIR)$@")
clean:
diff --git a/README b/README
index c7c990d..f38193f 100644
--- a/README
+++ b/README
@@ -1,3 +1,107 @@
-Read the documentation in qemu-doc.html or on http://wiki.qemu-project.org
+ QEMU README
+ ===========
-- QEMU team
+QEMU is a generic and open source machine & userspace emulator and
+virtualizer.
+
+QEMU is capable of emulating a complete machine in software without any
+need for hardware virtualization support. By using dynamic translation,
+it achieves very good performance. QEMU can also integrate with the Xen
+and KVM hypervisors to provide emulated hardware while allowing the
+hypervisor to manage the CPU. With hypervisor support, QEMU can achieve
+near native performance for CPUs. When QEMU emulates CPUs directly it is
+capable of running operating systems made for one machine (e.g. an ARMv7
+board) on a different machine (e.g. an x86_64 PC board).
+
+QEMU is also capable of providing userspace API virtualization for Linux
+and BSD kernel interfaces. This allows binaries compiled against one
+architecture ABI (e.g. the Linux PPC64 ABI) to be run on a host using a
+different architecture ABI (e.g. the Linux x86_64 ABI). This does not
+involve any hardware emulation, simply CPU and syscall emulation.
+
+QEMU aims to fit into a variety of use cases. It can be invoked directly
+by users wishing to have full control over its behaviour and settings.
+It also aims to facilitate integration into higher level management
+layers, by providing a stable command line interface and monitor API.
+It is commonly invoked indirectly via the libvirt library when using
+open source applications such as oVirt, OpenStack and virt-manager.
+
+QEMU as a whole is released under the GNU General Public License,
+version 2. For full licensing details, consult the LICENSE file.
+
+
+Building
+========
+
+QEMU is multi-platform software intended to be buildable on all modern
+Linux platforms, OS-X, Win32 (via the Mingw64 toolchain) and a variety
+of other UNIX targets. The simple steps to build QEMU are:
+
+ mkdir build
+ cd build
+ ../configure
+ make
+
+Complete details of the process for building and configuring QEMU for
+all supported host platforms can be found in the qemu-tech.html file.
+Additional information can also be found online via the QEMU website:
+
+ http://qemu-project.org/Hosts/Linux
+ http://qemu-project.org/Hosts/W32
+
+
+Submitting patches
+==================
+
+The QEMU source code is maintained under the GIT version control system.
+
+ git clone git://git.qemu-project.org/qemu.git
+
+When submitting patches, the preferred approach is to use 'git
+format-patch' and/or 'git send-email' to format & send the mail to the
+qemu-devel@nongnu.org mailing list. All patches submitted must contain
+a 'Signed-off-by' line from the author. Patches should follow the
+guidelines set out in the HACKING and CODING_STYLE files.
+
+Additional information on submitting patches can be found online via
+the QEMU website
+
+ http://qemu-project.org/Contribute/SubmitAPatch
+ http://qemu-project.org/Contribute/TrivialPatches
+
+
+Bug reporting
+=============
+
+The QEMU project uses Launchpad as its primary upstream bug tracker. Bugs
+found when running code built from QEMU git or upstream released sources
+should be reported via:
+
+ https://bugs.launchpad.net/qemu/
+
+If using QEMU via an operating system vendor pre-built binary package, it
+is preferable to report bugs to the vendor's own bug tracker first. If
+the bug is also known to affect latest upstream code, it can also be
+reported via launchpad.
+
+For additional information on bug reporting consult:
+
+ http://qemu-project.org/Contribute/ReportABug
+
+
+Contact
+=======
+
+The QEMU community can be contacted in a number of ways, with the two
+main methods being email and IRC
+
+ - qemu-devel@nongnu.org
+ http://lists.nongnu.org/mailman/listinfo/qemu-devel
+ - #qemu on irc.oftc.net
+
+Information on additional methods of contacting the community can be
+found online via the QEMU website:
+
+ http://qemu-project.org/Contribute/StartHere
+
+-- End
diff --git a/VERSION b/VERSION
index ccbccc3..24ba9a3 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.2.0
+2.7.0
diff --git a/accel.c b/accel.c
index 8dc588f..6745813 100644
--- a/accel.c
+++ b/accel.c
@@ -23,6 +23,7 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "sysemu/accel.h"
#include "hw/boards.h"
#include "qemu-common.h"
@@ -121,14 +122,13 @@
if (!init_failed) {
fprintf(stderr, "No accelerator found!\n");
}
- return 1;
+ return -1;
}
if (init_failed) {
fprintf(stderr, "Back to %s accelerator.\n", acc->name);
}
-
- return !accel_initialised;
+ return 0;
}
diff --git a/aio-posix.c b/aio-posix.c
index d3ac06e..43162a9 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -13,10 +13,14 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block.h"
#include "qemu/queue.h"
#include "qemu/sockets.h"
+#ifdef CONFIG_EPOLL_CREATE1
+#include <sys/epoll.h>
+#endif
struct AioHandler
{
@@ -24,11 +28,167 @@
IOHandler *io_read;
IOHandler *io_write;
int deleted;
- int pollfds_idx;
void *opaque;
+ bool is_external;
QLIST_ENTRY(AioHandler) node;
};
+#ifdef CONFIG_EPOLL_CREATE1
+
+/* The fd number threashold to switch to epoll */
+#define EPOLL_ENABLE_THRESHOLD 64
+
+static void aio_epoll_disable(AioContext *ctx)
+{
+ ctx->epoll_available = false;
+ if (!ctx->epoll_enabled) {
+ return;
+ }
+ ctx->epoll_enabled = false;
+ close(ctx->epollfd);
+}
+
+static inline int epoll_events_from_pfd(int pfd_events)
+{
+ return (pfd_events & G_IO_IN ? EPOLLIN : 0) |
+ (pfd_events & G_IO_OUT ? EPOLLOUT : 0) |
+ (pfd_events & G_IO_HUP ? EPOLLHUP : 0) |
+ (pfd_events & G_IO_ERR ? EPOLLERR : 0);
+}
+
+static bool aio_epoll_try_enable(AioContext *ctx)
+{
+ AioHandler *node;
+ struct epoll_event event;
+
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
+ int r;
+ if (node->deleted || !node->pfd.events) {
+ continue;
+ }
+ event.events = epoll_events_from_pfd(node->pfd.events);
+ event.data.ptr = node;
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+ if (r) {
+ return false;
+ }
+ }
+ ctx->epoll_enabled = true;
+ return true;
+}
+
+static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
+{
+ struct epoll_event event;
+ int r;
+
+ if (!ctx->epoll_enabled) {
+ return;
+ }
+ if (!node->pfd.events) {
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_DEL, node->pfd.fd, &event);
+ if (r) {
+ aio_epoll_disable(ctx);
+ }
+ } else {
+ event.data.ptr = node;
+ event.events = epoll_events_from_pfd(node->pfd.events);
+ if (is_new) {
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_ADD, node->pfd.fd, &event);
+ if (r) {
+ aio_epoll_disable(ctx);
+ }
+ } else {
+ r = epoll_ctl(ctx->epollfd, EPOLL_CTL_MOD, node->pfd.fd, &event);
+ if (r) {
+ aio_epoll_disable(ctx);
+ }
+ }
+ }
+}
+
+static int aio_epoll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout)
+{
+ AioHandler *node;
+ int i, ret = 0;
+ struct epoll_event events[128];
+
+ assert(npfd == 1);
+ assert(pfds[0].fd == ctx->epollfd);
+ if (timeout > 0) {
+ ret = qemu_poll_ns(pfds, npfd, timeout);
+ }
+ if (timeout <= 0 || ret > 0) {
+ ret = epoll_wait(ctx->epollfd, events,
+ sizeof(events) / sizeof(events[0]),
+ timeout);
+ if (ret <= 0) {
+ goto out;
+ }
+ for (i = 0; i < ret; i++) {
+ int ev = events[i].events;
+ node = events[i].data.ptr;
+ node->pfd.revents = (ev & EPOLLIN ? G_IO_IN : 0) |
+ (ev & EPOLLOUT ? G_IO_OUT : 0) |
+ (ev & EPOLLHUP ? G_IO_HUP : 0) |
+ (ev & EPOLLERR ? G_IO_ERR : 0);
+ }
+ }
+out:
+ return ret;
+}
+
+static bool aio_epoll_enabled(AioContext *ctx)
+{
+ /* Fall back to ppoll when external clients are disabled. */
+ return !aio_external_disabled(ctx) && ctx->epoll_enabled;
+}
+
+static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout)
+{
+ if (!ctx->epoll_available) {
+ return false;
+ }
+ if (aio_epoll_enabled(ctx)) {
+ return true;
+ }
+ if (npfd >= EPOLL_ENABLE_THRESHOLD) {
+ if (aio_epoll_try_enable(ctx)) {
+ return true;
+ } else {
+ aio_epoll_disable(ctx);
+ }
+ }
+ return false;
+}
+
+#else
+
+static void aio_epoll_update(AioContext *ctx, AioHandler *node, bool is_new)
+{
+}
+
+static int aio_epoll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout)
+{
+ assert(false);
+}
+
+static bool aio_epoll_enabled(AioContext *ctx)
+{
+ return false;
+}
+
+static bool aio_epoll_check_poll(AioContext *ctx, GPollFD *pfds,
+ unsigned npfd, int64_t timeout)
+{
+ return false;
+}
+
+#endif
+
static AioHandler *find_aio_handler(AioContext *ctx, int fd)
{
AioHandler *node;
@@ -44,11 +204,14 @@
void aio_set_fd_handler(AioContext *ctx,
int fd,
+ bool is_external,
IOHandler *io_read,
IOHandler *io_write,
void *opaque)
{
AioHandler *node;
+ bool is_new = false;
+ bool deleted = false;
node = find_aio_handler(ctx, fd);
@@ -67,37 +230,43 @@
* releasing the walking_handlers lock.
*/
QLIST_REMOVE(node, node);
- g_free(node);
+ deleted = true;
}
}
} else {
if (node == NULL) {
/* Alloc and insert if it's not already there */
- node = g_malloc0(sizeof(AioHandler));
+ node = g_new0(AioHandler, 1);
node->pfd.fd = fd;
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
g_source_add_poll(&ctx->source, &node->pfd);
+ is_new = true;
}
/* Update handler with latest information */
node->io_read = io_read;
node->io_write = io_write;
node->opaque = opaque;
- node->pollfds_idx = -1;
+ node->is_external = is_external;
node->pfd.events = (io_read ? G_IO_IN | G_IO_HUP | G_IO_ERR : 0);
node->pfd.events |= (io_write ? G_IO_OUT | G_IO_ERR : 0);
}
+ aio_epoll_update(ctx, node, is_new);
aio_notify(ctx);
+ if (deleted) {
+ g_free(node);
+ }
}
void aio_set_event_notifier(AioContext *ctx,
EventNotifier *notifier,
+ bool is_external,
EventNotifierHandler *io_read)
{
aio_set_fd_handler(ctx, event_notifier_get_fd(notifier),
- (IOHandler *)io_read, NULL, notifier);
+ is_external, (IOHandler *)io_read, NULL, notifier);
}
bool aio_prepare(AioContext *ctx)
@@ -113,10 +282,12 @@
int revents;
revents = node->pfd.revents & node->pfd.events;
- if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read) {
+ if (revents & (G_IO_IN | G_IO_HUP | G_IO_ERR) && node->io_read &&
+ aio_node_check(ctx, node->is_external)) {
return true;
}
- if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write) {
+ if (revents & (G_IO_OUT | G_IO_ERR) && node->io_write &&
+ aio_node_check(ctx, node->is_external)) {
return true;
}
}
@@ -154,6 +325,7 @@
if (!node->deleted &&
(revents & (G_IO_IN | G_IO_HUP | G_IO_ERR)) &&
+ aio_node_check(ctx, node->is_external) &&
node->io_read) {
node->io_read(node->opaque);
@@ -164,6 +336,7 @@
}
if (!node->deleted &&
(revents & (G_IO_OUT | G_IO_ERR)) &&
+ aio_node_check(ctx, node->is_external) &&
node->io_write) {
node->io_write(node->opaque);
progress = true;
@@ -186,69 +359,142 @@
return progress;
}
+/* These thread-local variables are used only in a small part of aio_poll
+ * around the call to the poll() system call. In particular they are not
+ * used while aio_poll is performing callbacks, which makes it much easier
+ * to think about reentrancy!
+ *
+ * Stack-allocated arrays would be perfect but they have size limitations;
+ * heap allocation is expensive enough that we want to reuse arrays across
+ * calls to aio_poll(). And because poll() has to be called without holding
+ * any lock, the arrays cannot be stored in AioContext. Thread-local data
+ * has none of the disadvantages of these three options.
+ */
+static __thread GPollFD *pollfds;
+static __thread AioHandler **nodes;
+static __thread unsigned npfd, nalloc;
+static __thread Notifier pollfds_cleanup_notifier;
+
+static void pollfds_cleanup(Notifier *n, void *unused)
+{
+ g_assert(npfd == 0);
+ g_free(pollfds);
+ g_free(nodes);
+ nalloc = 0;
+}
+
+static void add_pollfd(AioHandler *node)
+{
+ if (npfd == nalloc) {
+ if (nalloc == 0) {
+ pollfds_cleanup_notifier.notify = pollfds_cleanup;
+ qemu_thread_atexit_add(&pollfds_cleanup_notifier);
+ nalloc = 8;
+ } else {
+ g_assert(nalloc <= INT_MAX);
+ nalloc *= 2;
+ }
+ pollfds = g_renew(GPollFD, pollfds, nalloc);
+ nodes = g_renew(AioHandler *, nodes, nalloc);
+ }
+ nodes[npfd] = node;
+ pollfds[npfd] = (GPollFD) {
+ .fd = node->pfd.fd,
+ .events = node->pfd.events,
+ };
+ npfd++;
+}
+
bool aio_poll(AioContext *ctx, bool blocking)
{
AioHandler *node;
- bool was_dispatching;
- int ret;
+ int i, ret;
bool progress;
+ int64_t timeout;
- was_dispatching = ctx->dispatching;
+ aio_context_acquire(ctx);
progress = false;
/* aio_notify can avoid the expensive event_notifier_set if
* everything (file descriptors, bottom halves, timers) will
* be re-evaluated before the next blocking poll(). This is
* already true when aio_poll is called with blocking == false;
- * if blocking == true, it is only true after poll() returns.
- *
- * If we're in a nested event loop, ctx->dispatching might be true.
- * In that case we can restore it just before returning, but we
- * have to clear it now.
+ * if blocking == true, it is only true after poll() returns,
+ * so disable the optimization now.
*/
- aio_set_dispatching(ctx, !blocking);
+ if (blocking) {
+ atomic_add(&ctx->notify_me, 2);
+ }
ctx->walking_handlers++;
- g_array_set_size(ctx->pollfds, 0);
+ assert(npfd == 0);
/* fill pollfds */
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- node->pollfds_idx = -1;
- if (!node->deleted && node->pfd.events) {
- GPollFD pfd = {
- .fd = node->pfd.fd,
- .events = node->pfd.events,
- };
- node->pollfds_idx = ctx->pollfds->len;
- g_array_append_val(ctx->pollfds, pfd);
+ if (!node->deleted && node->pfd.events
+ && !aio_epoll_enabled(ctx)
+ && aio_node_check(ctx, node->is_external)) {
+ add_pollfd(node);
}
}
- ctx->walking_handlers--;
+ timeout = blocking ? aio_compute_timeout(ctx) : 0;
/* wait until next event */
- ret = qemu_poll_ns((GPollFD *)ctx->pollfds->data,
- ctx->pollfds->len,
- blocking ? aio_compute_timeout(ctx) : 0);
+ if (timeout) {
+ aio_context_release(ctx);
+ }
+ if (aio_epoll_check_poll(ctx, pollfds, npfd, timeout)) {
+ AioHandler epoll_handler;
+
+ epoll_handler.pfd.fd = ctx->epollfd;
+ epoll_handler.pfd.events = G_IO_IN | G_IO_OUT | G_IO_HUP | G_IO_ERR;
+ npfd = 0;
+ add_pollfd(&epoll_handler);
+ ret = aio_epoll(ctx, pollfds, npfd, timeout);
+ } else {
+ ret = qemu_poll_ns(pollfds, npfd, timeout);
+ }
+ if (blocking) {
+ atomic_sub(&ctx->notify_me, 2);
+ }
+ if (timeout) {
+ aio_context_acquire(ctx);
+ }
+
+ aio_notify_accept(ctx);
/* if we have any readable fds, dispatch event */
if (ret > 0) {
- QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- if (node->pollfds_idx != -1) {
- GPollFD *pfd = &g_array_index(ctx->pollfds, GPollFD,
- node->pollfds_idx);
- node->pfd.revents = pfd->revents;
- }
+ for (i = 0; i < npfd; i++) {
+ nodes[i]->pfd.revents = pollfds[i].revents;
}
}
+ npfd = 0;
+ ctx->walking_handlers--;
+
/* Run dispatch even if there were no readable fds to run timers */
- aio_set_dispatching(ctx, true);
if (aio_dispatch(ctx)) {
progress = true;
}
- aio_set_dispatching(ctx, was_dispatching);
+ aio_context_release(ctx);
+
return progress;
}
+
+void aio_context_setup(AioContext *ctx)
+{
+#ifdef CONFIG_EPOLL_CREATE1
+ assert(!ctx->epollfd);
+ ctx->epollfd = epoll_create1(EPOLL_CLOEXEC);
+ if (ctx->epollfd == -1) {
+ fprintf(stderr, "Failed to create epoll instance: %s", strerror(errno));
+ ctx->epoll_available = false;
+ } else {
+ ctx->epoll_available = true;
+ }
+#endif
+}
diff --git a/aio-win32.c b/aio-win32.c
index d81313b..c8c249e 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -15,6 +15,7 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block.h"
#include "qemu/queue.h"
@@ -28,11 +29,13 @@
GPollFD pfd;
int deleted;
void *opaque;
+ bool is_external;
QLIST_ENTRY(AioHandler) node;
};
void aio_set_fd_handler(AioContext *ctx,
int fd,
+ bool is_external,
IOHandler *io_read,
IOHandler *io_write,
void *opaque)
@@ -67,7 +70,7 @@
if (node == NULL) {
/* Alloc and insert if it's not already there */
- node = g_malloc0(sizeof(AioHandler));
+ node = g_new0(AioHandler, 1);
node->pfd.fd = fd;
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
}
@@ -86,6 +89,7 @@
node->opaque = opaque;
node->io_read = io_read;
node->io_write = io_write;
+ node->is_external = is_external;
event = event_notifier_get_handle(&ctx->notifier);
WSAEventSelect(node->pfd.fd, event,
@@ -98,6 +102,7 @@
void aio_set_event_notifier(AioContext *ctx,
EventNotifier *e,
+ bool is_external,
EventNotifierHandler *io_notify)
{
AioHandler *node;
@@ -129,10 +134,11 @@
} else {
if (node == NULL) {
/* Alloc and insert if it's not already there */
- node = g_malloc0(sizeof(AioHandler));
+ node = g_new0(AioHandler, 1);
node->e = e;
node->pfd.fd = (uintptr_t)event_notifier_get_handle(e);
node->pfd.events = G_IO_IN;
+ node->is_external = is_external;
QLIST_INSERT_HEAD(&ctx->aio_handlers, node, node);
g_source_add_poll(&ctx->source, &node->pfd);
@@ -279,36 +285,33 @@
{
AioHandler *node;
HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
- bool was_dispatching, progress, have_select_revents, first;
+ bool progress, have_select_revents, first;
int count;
int timeout;
- have_select_revents = aio_prepare(ctx);
- if (have_select_revents) {
- blocking = false;
- }
-
- was_dispatching = ctx->dispatching;
+ aio_context_acquire(ctx);
progress = false;
/* aio_notify can avoid the expensive event_notifier_set if
* everything (file descriptors, bottom halves, timers) will
* be re-evaluated before the next blocking poll(). This is
* already true when aio_poll is called with blocking == false;
- * if blocking == true, it is only true after poll() returns.
- *
- * If we're in a nested event loop, ctx->dispatching might be true.
- * In that case we can restore it just before returning, but we
- * have to clear it now.
+ * if blocking == true, it is only true after poll() returns,
+ * so disable the optimization now.
*/
- aio_set_dispatching(ctx, !blocking);
+ if (blocking) {
+ atomic_add(&ctx->notify_me, 2);
+ }
+
+ have_select_revents = aio_prepare(ctx);
ctx->walking_handlers++;
/* fill fd sets */
count = 0;
QLIST_FOREACH(node, &ctx->aio_handlers, node) {
- if (!node->deleted && node->io_notify) {
+ if (!node->deleted && node->io_notify
+ && aio_node_check(ctx, node->is_external)) {
events[count++] = event_notifier_get_handle(node->e);
}
}
@@ -316,20 +319,36 @@
ctx->walking_handlers--;
first = true;
- /* wait until next event */
- while (count > 0) {
+ /* ctx->notifier is always registered. */
+ assert(count > 0);
+
+ /* Multiple iterations, all of them non-blocking except the first,
+ * may be necessary to process all pending events. After the first
+ * WaitForMultipleObjects call ctx->notify_me will be decremented.
+ */
+ do {
HANDLE event;
int ret;
- timeout = blocking
+ timeout = blocking && !have_select_revents
? qemu_timeout_ns_to_ms(aio_compute_timeout(ctx)) : 0;
- ret = WaitForMultipleObjects(count, events, FALSE, timeout);
- aio_set_dispatching(ctx, true);
-
- if (first && aio_bh_poll(ctx)) {
- progress = true;
+ if (timeout) {
+ aio_context_release(ctx);
}
- first = false;
+ ret = WaitForMultipleObjects(count, events, FALSE, timeout);
+ if (blocking) {
+ assert(first);
+ atomic_sub(&ctx->notify_me, 2);
+ }
+ if (timeout) {
+ aio_context_acquire(ctx);
+ }
+
+ if (first) {
+ aio_notify_accept(ctx);
+ progress |= aio_bh_poll(ctx);
+ first = false;
+ }
/* if we have any signaled events, dispatch event */
event = NULL;
@@ -344,10 +363,14 @@
blocking = false;
progress |= aio_dispatch_handlers(ctx, event);
- }
+ } while (count > 0);
progress |= timerlistgroup_run_timers(&ctx->tlg);
- aio_set_dispatching(ctx, was_dispatching);
+ aio_context_release(ctx);
return progress;
}
+
+void aio_context_setup(AioContext *ctx)
+{
+}
diff --git a/android-commands.h b/android-commands.h
deleted file mode 100644
index e612bb0..0000000
--- a/android-commands.h
+++ /dev/null
@@ -1,522 +0,0 @@
-
-/* hand-written for now; consider .hx autogen */
-
-static mon_cmd_t android_redir_cmds[] = {
- {
- .name = "list",
- .args_type = "",
- .params = "",
- .help = "list current redirections",
- .mhandler.cmd = android_console_redir_list,
- },
- {
- .name = "add",
- .args_type = "arg:s",
- .params = "[tcp|udp]:hostport:guestport",
- .help = "add new redirection",
- .mhandler.cmd = android_console_redir_add,
- },
- {
- .name = "del",
- .args_type = "arg:s",
- .params = "[tcp|udp]:hostport",
- .help = "remove existing redirection",
- .mhandler.cmd = android_console_redir_del,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_power_cmds[] = {
- {
- .name = "display",
- .args_type = "",
- .params = "",
- .help = "display battery and charger state",
- .mhandler.cmd = android_console_power_display,
- },
- {
- .name = "ac",
- .args_type = "arg:s?",
- .params = "",
- .help = "set AC charging state",
- .mhandler.cmd = android_console_power_ac,
- },
- {
- .name = "status",
- .args_type = "arg:s?",
- .params = "",
- .help = "set battery status",
- .mhandler.cmd = android_console_power_status,
- },
- {
- .name = "present",
- .args_type = "arg:s?",
- .params = "",
- .help = "set battery present state",
- .mhandler.cmd = android_console_power_present,
- },
- {
- .name = "health",
- .args_type = "arg:s?",
- .params = "",
- .help = "set battery health state",
- .mhandler.cmd = android_console_power_health,
- },
- {
- .name = "capacity",
- .args_type = "arg:s?",
- .params = "",
- .help = "set battery capacity state",
- .mhandler.cmd = android_console_power_capacity,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_event_cmds[] = {
- {
- .name = "types",
- .args_type = "arg:s?",
- .params = "",
- .help = "list all <type> aliases",
- .mhandler.cmd = android_console_event_types,
- },
- {
- .name = "codes",
- .args_type = "arg:s?",
- .params = "",
- .help = "list all <code> aliases for a given <type>",
- .mhandler.cmd = android_console_event_codes,
- },
- {
- .name = "send",
- .args_type = "arg:s?",
- .params = "",
- .help = "send a series of events to the kernel",
- .mhandler.cmd = android_console_event_send,
- },
- {
- .name = "text",
- .args_type = "arg:S?",
- .params = "",
- .help = "simulate keystrokes from a given text",
- .mhandler.cmd = android_console_event_text,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_avd_snapshot_cmds[] = {
- {
- .name = "list",
- .args_type = "",
- .params = "",
- .help = "'avd snapshot list' will show a list of all state snapshots "
- "that can be loaded",
- .mhandler.cmd = android_console_avd_snapshot_list,
- },
- {
- .name = "save",
- .args_type = "arg:s?",
- .params = "",
- .help = "'avd snapshot save <name>' will save the current (run-time) "
- "state to a snapshot with the given name",
- .mhandler.cmd = android_console_avd_snapshot_save,
- },
- {
- .name = "load",
- .args_type = "arg:s?",
- .params = "",
- .help = "'avd snapshot load <name>' will load the state snapshot of "
- "the given name",
- .mhandler.cmd = android_console_avd_snapshot_load,
- },
- {
- .name = "del",
- .args_type = "arg:s?",
- .params = "",
- .help = "'avd snapshot del <name>' will delete the state snapshot with "
- "the given name",
- .mhandler.cmd = android_console_avd_snapshot_del,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_avd_cmds[] = {
- {
- .name = "stop",
- .args_type = "",
- .params = "",
- .help = "stop the virtual device",
- .mhandler.cmd = android_console_avd_stop,
- },
- {
- .name = "start",
- .args_type = "",
- .params = "",
- .help = "start/restart the virtual device",
- .mhandler.cmd = android_console_avd_start,
- },
- {
- .name = "status",
- .args_type = "",
- .params = "",
- .help = "query virtual device status",
- .mhandler.cmd = android_console_avd_status,
- },
- {
- .name = "name",
- .args_type = "",
- .params = "",
- .help = "query virtual device name",
- .mhandler.cmd = android_console_avd_name,
- },
- {
- .name = "snapshot",
- .args_type = "item:s",
- .params = "",
- .help = "state snapshot commands",
- .mhandler.cmd = android_console_avd_snapshot,
- .sub_cmds.static_table = android_avd_snapshot_cmds,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_finger_cmds[] = {
- {
- .name = "touch",
- .args_type = "arg:s?",
- .params = "",
- .help = "touch fingerprint sensor with <fingerid>",
- .mhandler.cmd = android_console_finger_touch,
- },
- {
- .name = "remove",
- .args_type = "",
- .params = "",
- .help = "remove finger from the fingerprint sensor",
- .mhandler.cmd = android_console_finger_remove,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_geo_cmds[] = {
- {
- .name = "nmea",
- .args_type = "arg:s?",
- .params = "",
- .help = "send a GPS NMEA sentence\n"
- "'geo nema <sentence>' sends an NMEA 0183 sentence to the emulated device, as\n"
- "if it came from an emulated GPS modem. <sentence> must begin with '$GP'. Only\n"
- "'$GPGGA' and '$GPRCM' sentences are supported at the moment.\n",
- .mhandler.cmd = android_console_geo_nmea,
- },
- {
- .name = "fix",
- .args_type = "arg:S?",
- .params = "",
- .help = "send a simple GPS fix\n"
- "'geo fix <longitude> <latitude> [<altitude> [<satellites>]]'\n"
- " allows you to send a simple GPS fix to the emulated system.\n"
- " The parameters are:\n\n"
- " <longitude> longitude, in decimal degrees\n"
- " <latitude> latitude, in decimal degrees\n"
- " <altitude> optional altitude in meters\n"
- " <satellites> number of satellites being tracked (1-12)\n"
- "\n",
- .mhandler.cmd = android_console_geo_fix,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_sms_cmds[] = {
- {
- .name = "send",
- .args_type = "arg:S?",
- .params = "",
- .help = "send inbound SMS text message\n"
- "'sms send <phonenumber> <message>' allows you to simulate a new inbound sms message\n",
- .mhandler.cmd = android_console_sms_send,
- },
- {
- .name = "pdu",
- .args_type = "arg:s?",
- .params = "",
- .help = "send inbound SMS PDU\n"
- "'sms pdu <hexstring>' allows you to simulate a new inbound sms PDU\n"
- "(used internally when one emulator sends SMS messages to another instance).\n"
- "you probably don't want to play with this at all\n"
- "\n",
- .mhandler.cmd = android_console_sms_pdu,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_cdma_cmds[] = {
- {
- .name = "ssource",
- .args_type = "arg:s?",
- .params = "",
- .help = "set the current CDMA subscription source\n"
- "'cdma ssource <ssource>' allows you to specify where to read the subscription from\n",
- .mhandler.cmd = android_console_cdma_ssource,
- },
- {
- .name = "prl_version",
- .args_type = "arg:s?",
- .params = "",
- .help = "dump the current PRL version\n"
- "'cdma prl_version <version>' allows you to dump the current PRL version\n",
- .mhandler.cmd = android_console_cdma_prl_version,
- },
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_gsm_cmds[] = {
- {
- .name = "list",
- .args_type = "",
- .params = "",
- .help = "list current phone calls\n"
- "'gsm list' lists all inbound and outbound calls and their state\n",
- .mhandler.cmd = android_console_gsm_list,
- },
- {
- .name = "call",
- .args_type = "arg:s?",
- .params = "",
- .help = "create inbound phone call\n"
- "'gsm call <phonenumber>' allows you to simulate a new inbound call\n",
- .mhandler.cmd = android_console_gsm_call,
- },
- {
- .name = "busy",
- .args_type = "arg:s?",
- .params = "",
- .help = "close waiting outbound call as busy\n"
- "'gsm busy <phonenumber>' closes an outbound call, reporting\n"
- "the remote phone as busy. only possible if the call is 'waiting'.\n",
- .mhandler.cmd = android_console_gsm_busy,
- },
- {
- .name = "hold",
- .args_type = "arg:s?",
- .params = "",
- .help = "change the state of an outbound call to 'held'\n"
- "'gsm hold <remoteNumber>' change the state of a call to 'held'. this is only possible\n"
- "if the call in the 'waiting' or 'active' state\n",
- .mhandler.cmd = android_console_gsm_hold,
- },
- {
- .name = "accept",
- .args_type = "arg:s?",
- .params = "",
- .help = "change the state of an outbound call to 'active'\n"
- "'gsm accept <remoteNumber>' change the state of a call to 'active'. this is only possible\n"
- "if the call is in the 'waiting' or 'held' state\n",
- .mhandler.cmd = android_console_gsm_accept,
- },
- {
- .name = "cancel",
- .args_type = "arg:s?",
- .params = "",
- .help = "disconnect an inbound or outbound phone call\n"
- "'gsm cancel <phonenumber>' allows you to simulate the end of an inbound or outbound call\n",
- .mhandler.cmd = android_console_gsm_cancel,
- },
- {
- .name = "data",
- .args_type = "arg:s?",
- .params = "",
- .help = "modify data connection state"
- "'gsm data <state>' allows you to modify the data connection state\n",
- .mhandler.cmd = android_console_gsm_data,
- },
- {
- .name = "voice",
- .args_type = "arg:s?",
- .params = "",
- .help = "modify voice connection state"
- "'gsm voice <state>' allows you to modify the voice connection state\n",
- .mhandler.cmd = android_console_gsm_voice,
- },
- {
- .name = "status",
- .args_type = "",
- .params = "",
- .help = "display GSM status"
- "'gsm status' displays the current state of the GSM emulation\n",
- .mhandler.cmd = android_console_gsm_status,
- },
- {
- .name = "signal",
- .args_type = "arg:S?",
- .params = "",
- .help = "sets the rssi and ber"
- "signal <rssi> [<ber>]' changes the reported strength and error rate on next (15s) update.\n"
- "rssi range is 0..31 and 99 for unknown\n"
- "ber range is 0..7 percent and 99 for unknown\n",
- .mhandler.cmd = android_console_gsm_signal,
- },
- {
- .name = "signal-profile",
- .args_type = "arg:S?",
- .params = "",
- .help = "sets the signal strength profile"
- "signal-profile <strength>' changes the reported strength on next (15s) update.\n"
- "strength range is 0..4\n",
- .mhandler.cmd = android_console_gsm_signal_profile,
- },
- { NULL, NULL, },
-};
-
-#ifdef _WIN32
-// NOTE: HELP_COMMAND is defined by winuser.h which conflicts with this.
-#undef HELP_COMMAND
-#endif
-
-#define HELP_COMMAND \
- { \
- .name = "help|h|?", .args_type = "helptext:S?", .params = "", \
- .help = "print a list of commands", \
- .mhandler.cmd = android_console_help, \
- }
-
-#define QUIT_COMMAND \
- { \
- .name = "quit|exit", .args_type = "", .params = "", \
- .help = "quit control session", .mhandler.cmd = android_console_quit, \
- }
-
-#define AVD_HELP "control virtual device execution"
-
-static mon_cmd_t android_cmds[] = {
- HELP_COMMAND,
- {
- .name = "crash",
- .args_type = "",
- .params = "",
- .help = "crash the emulator instance",
- .mhandler.cmd = android_console_crash,
- },
- {
- .name = "kill",
- .args_type = "",
- .params = "",
- .help = "kill the emulator instance",
- .mhandler.cmd = android_console_kill,
- },
- QUIT_COMMAND,
- {
- .name = "redir",
- .args_type = "item:s?",
- .params = "",
- .help = "manage port redirections",
- .mhandler.cmd = android_console_redir,
- .sub_cmds.static_table = android_redir_cmds,
- },
- { .name = "power",
- .args_type = "item:s?",
- .params = "",
- .help = "power related commands",
- .mhandler.cmd = android_console_power,
- .sub_cmds.static_table = android_power_cmds,
- },
- { .name = "event",
- .args_type = "item:s?",
- .params = "",
- .help = "simulate hardware events",
- .mhandler.cmd = android_console_event,
- .sub_cmds.static_table = android_event_cmds,
- },
- { .name = "avd",
- .args_type = "item:s?",
- .params = "",
- .help = AVD_HELP,
- .mhandler.cmd = android_console_avd,
- .sub_cmds.static_table = android_avd_cmds,
- },
- { .name = "finger",
- .args_type = "item:s?",
- .params = "",
- .help = "manage emulator fingerprint",
- .mhandler.cmd = android_console_finger,
- .sub_cmds.static_table = android_finger_cmds,
- },
- { .name = "geo",
- .args_type = "item:s?",
- .params = "",
- .help = "Geo-location commands",
- .mhandler.cmd = android_console_geo,
- .sub_cmds.static_table = android_geo_cmds,
- },
- { .name = "sms",
- .args_type = "item:s?",
- .params = "",
- .help = "SMS related commands",
- .mhandler.cmd = android_console_sms,
- .sub_cmds.static_table = android_sms_cmds,
- },
- { .name = "cdma",
- .args_type = "item:s?",
- .params = "",
- .help = "CDMA related commands",
- .mhandler.cmd = android_console_cdma,
- .sub_cmds.static_table = android_cdma_cmds,
- },
- { .name = "gsm",
- .args_type = "item:s?",
- .params = "",
- .help = "GSM related commands",
- .mhandler.cmd = android_console_gsm,
- .sub_cmds.static_table = android_gsm_cmds,
- },
- {
- .name = "rotate",
- .args_type = "",
- .params = "",
- .help = "rotate the screen by 90 degrees",
- .mhandler.cmd = android_console_rotate_screen,
- },
-
- { NULL, NULL, },
-};
-
-static mon_cmd_t android_preauth_avd_cmds[] = {
- {
- .name = "name",
- .args_type = "",
- .params = "",
- .help = "query virtual device name",
- .mhandler.cmd = android_console_avd_name,
- },
- {
- NULL,
- NULL,
- },
-};
-
-/* "preauth" commands are the set of commands that are legal before
-* authentication. "avd name is special cased here because it is needed by
-* older versions of Android Studio */
-static mon_cmd_t android_preauth_cmds[] = {
- HELP_COMMAND,
- {
- .name = "auth",
- .args_type = "arg:s?",
- .params = "",
- .help = "use 'auth <auth_token>' to get extended functionality",
- .mhandler.cmd = android_console_auth,
- },
- {
- .name = "avd",
- .args_type = "item:s?",
- .params = "",
- .help = AVD_HELP,
- .mhandler.cmd = android_console_avd_preauth,
- .sub_cmds.static_table = android_preauth_avd_cmds,
- },
- QUIT_COMMAND,
- {
- NULL,
- NULL,
- },
-};
diff --git a/android-qemu2-glue/android_qemud.cpp b/android-qemu2-glue/android_qemud.cpp
index 26e4313..c6e32bc 100644
--- a/android-qemu2-glue/android_qemud.cpp
+++ b/android-qemu2-glue/android_qemud.cpp
@@ -16,8 +16,10 @@
#include "android-qemu2-glue/utils/stream.h"
extern "C" {
- #include "qemu-common.h"
- #include "migration/vmstate.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qom/object.h"
+#include "migration/vmstate.h"
}
/* Version number of snapshots code. Increment whenever the data saved
diff --git a/android-qemu2-glue/base/async/Looper.cpp b/android-qemu2-glue/base/async/Looper.cpp
index 3aa29a6..643047f 100644
--- a/android-qemu2-glue/base/async/Looper.cpp
+++ b/android-qemu2-glue/base/async/Looper.cpp
@@ -19,6 +19,7 @@
#include "android/utils/stream.h"
extern "C" {
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/timer.h"
#include "sysemu/char.h"
diff --git a/android-qemu2-glue/base/files/QemuFileStream.cpp b/android-qemu2-glue/base/files/QemuFileStream.cpp
index 32ac51e..33cb007 100644
--- a/android-qemu2-glue/base/files/QemuFileStream.cpp
+++ b/android-qemu2-glue/base/files/QemuFileStream.cpp
@@ -14,6 +14,7 @@
#include "android/base/Log.h"
extern "C" {
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "migration/qemu-file.h"
}
diff --git a/android-qemu2-glue/build/Makefile.qemu2-glue.mk b/android-qemu2-glue/build/Makefile.qemu2-glue.mk
index 330fb92..649c1a0 100644
--- a/android-qemu2-glue/build/Makefile.qemu2-glue.mk
+++ b/android-qemu2-glue/build/Makefile.qemu2-glue.mk
@@ -1,45 +1,55 @@
# A static library containing the android-emu glue code
-$(call start-emulator-library,libqemu2_glue)
+QEMU2_GLUE_INCLUDES := $(ANDROID_EMU_INCLUDES)
+
+$(call start-emulator-library,libqemu2-glue)
LOCAL_CFLAGS += $(QEMU2_CFLAGS)
LOCAL_C_INCLUDES += \
$(QEMU2_INCLUDES) \
+ $(QEMU2_GLUE_INCLUDES) \
$(LOCAL_PATH)/slirp \
- $(LOCAL_PATH)/tcg \
LOCAL_SRC_FILES := \
android_qemud.cpp \
- display.cpp \
- looper-qemu.cpp \
- net-android.cpp \
- qemu-battery-agent-impl.c \
- qemu-clipboard-agent-impl.cpp \
- qemu-cellular-agent-impl.c \
- qemu-display-agent-impl.cpp \
- qemu-finger-agent-impl.c \
- qemu-location-agent-impl.c \
- qemu-net-agent-impl.c \
- qemu-sensors-agent-impl.c \
- qemu-setup.cpp \
- qemu-telephony-agent-impl.c \
- qemu-user-event-agent-impl.c \
- qemu-vm-operations-impl.c \
- qemu-window-agent-impl.c \
- utils/stream.cpp \
base/async/Looper.cpp \
base/files/QemuFileStream.cpp \
+ display.cpp \
emulation/android_pipe_device.cpp \
emulation/charpipe.c \
emulation/CharSerialLine.cpp \
emulation/goldfish_sync.cpp \
emulation/serial_line.cpp \
emulation/VmLock.cpp \
+ looper-qemu.cpp \
+ net-android.cpp \
+ qemu-battery-agent-impl.c \
+ qemu-cellular-agent-impl.c \
+ qemu-clipboard-agent-impl.cpp \
+ qemu-display-agent-impl.cpp \
+ qemu-finger-agent-impl.c \
+ qemu-location-agent-impl.c \
+ qemu-net-agent-impl.c \
+ qemu-sensors-agent-impl.c \
+ qemu-setup.cpp \
+ qemu-setup-dns-servers.cpp \
+ qemu-telephony-agent-impl.c \
+ qemu-user-event-agent-impl.c \
+ qemu-vm-operations-impl.c \
+ qemu-window-agent-impl.c \
telephony/modem_init.c \
+ utils/stream.cpp \
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES:%=android-qemu2-glue/%)
$(call end-emulator-library)
-QEMU2_GLUE_STATIC_LIBRARIES := libqemu2_glue
+QEMU2_GLUE_STATIC_LIBRARIES := \
+ libqemu2-glue \
+ emulator-libui \
+ $(EMULATOR_LIBUI_STATIC_LIBRARIES)
+
+QEMU2_GLUE_LDFLAGS := $(EMULATOR_LIBUI_LDFLAGS)
+
+QEMU2_GLUE_LDLIBS := $(EMULATOR_LIBUI_LDLIBS)
diff --git a/android-qemu2-glue/build/Makefile.qemu2-sources.mk b/android-qemu2-glue/build/Makefile.qemu2-sources.mk
index 09bd237..94060c2 100644
--- a/android-qemu2-glue/build/Makefile.qemu2-sources.mk
+++ b/android-qemu2-glue/build/Makefile.qemu2-sources.mk
@@ -15,17 +15,20 @@
backends/rng.c \
backends/testdev.c \
backends/tpm.c \
- block-migration.c \
block.c \
block/accounting.c \
block/backup.c \
block/blkdebug.c \
+ block/blkreplay.c \
block/blkverify.c \
block/block-backend.c \
block/bochs.c \
block/cloop.c \
block/commit.c \
+ block/crypto.c \
+ block/dirty-bitmap.c \
block/dmg.c \
+ block/io.c \
block/mirror.c \
block/nbd-client.c \
block/nbd.c \
@@ -44,23 +47,52 @@
block/qed-l2-cache.c \
block/qed-table.c \
block/qed.c \
+ block/quorum.c \
block/raw_bsd.c \
block/sheepdog.c \
block/snapshot.c \
block/stream.c \
+ block/throttle-groups.c \
block/vdi.c \
block/vmdk.c \
block/vpc.c \
block/vvfat.c \
+ block/write-threshold.c \
blockdev-nbd.c \
blockdev.c \
blockjob.c \
bt-host.c \
bt-vhci.c \
+ crypto/aes.c \
+ crypto/afsplit.c \
+ crypto/block-luks.c \
+ crypto/block-qcow.c \
+ crypto/block.c \
+ crypto/cipher.c \
+ crypto/desrfb.c \
+ crypto/hash-glib.c \
+ crypto/hash.c \
+ crypto/init.c \
+ crypto/ivgen-essiv.c \
+ crypto/ivgen-plain.c \
+ crypto/ivgen-plain64.c \
+ crypto/ivgen.c \
+ crypto/pbkdf.c \
+ crypto/random-platform.c \
+ crypto/secret.c \
+ crypto/tlscreds.c \
+ crypto/tlscredsanon.c \
+ crypto/tlscredsx509.c \
+ crypto/tlssession.c \
+ crypto/xts.c \
device-hotplug.c \
+ device_tree.c \
disas/i386.c \
dma-helpers.c \
hmp.c \
+ hw/acpi/acpi_interface.c \
+ hw/acpi/aml-build.c \
+ hw/acpi/bios-linker-loader.c \
hw/audio/ac97.c \
hw/audio/es1370.c \
hw/audio/hda-codec.c \
@@ -81,19 +113,19 @@
hw/char/serial-pci.c \
hw/char/serial.c \
hw/char/virtio-console.c \
- hw/char/virtio-pingpong.c \
+ hw/core/bus.c \
hw/core/fw-path-provider.c \
hw/core/hotplug.c \
hw/core/irq.c \
hw/core/loader.c \
hw/core/machine.c \
- hw/core/nmi.c \
hw/core/null-machine.c \
- hw/core/platform-bus.c \
hw/core/qdev-properties-system.c \
hw/core/qdev-properties.c \
hw/core/qdev.c \
+ hw/core/register.c \
hw/core/sysbus.c \
+ hw/display/vga-pci.c \
hw/i2c/core.c \
hw/i2c/smbus.c \
hw/i2c/smbus_eeprom.c \
@@ -105,26 +137,33 @@
hw/ide/qdev.c \
hw/input/hid.c \
hw/input/ps2.c \
+ hw/input/virtio-input-hid.c \
+ hw/input/virtio-input.c \
hw/ipack/ipack.c \
hw/ipack/tpci200.c \
hw/isa/isa-bus.c \
hw/misc/pci-testdev.c \
hw/net/e1000.c \
+ hw/net/e1000e.c \
+ hw/net/e1000e_core.c \
+ hw/net/e1000x_common.c \
hw/net/eepro100.c \
hw/net/ne2000.c \
+ hw/net/net_rx_pkt.c \
+ hw/net/net_tx_pkt.c \
hw/net/pcnet-pci.c \
hw/net/pcnet.c \
+ hw/net/rocker/rocker.c \
+ hw/net/rocker/rocker_desc.c \
+ hw/net/rocker/rocker_fp.c \
+ hw/net/rocker/rocker_of_dpa.c \
+ hw/net/rocker/rocker_world.c \
hw/net/rtl8139.c \
hw/net/vmxnet3.c \
- hw/net/vmxnet_rx_pkt.c \
- hw/net/vmxnet_tx_pkt.c \
hw/nvram/eeprom93xx.c \
hw/nvram/fw_cfg.c \
- hw/pci-bridge/i82801b11.c \
- hw/pci-bridge/ioh3420.c \
hw/pci-bridge/pci_bridge_dev.c \
- hw/pci-bridge/xio3130_downstream.c \
- hw/pci-bridge/xio3130_upstream.c \
+ hw/pci-bridge/pci_expander_bridge.c \
hw/pci-host/pam.c \
hw/pci/msi.c \
hw/pci/msix.c \
@@ -142,12 +181,17 @@
hw/scsi/esp.c \
hw/scsi/lsi53c895a.c \
hw/scsi/megasas.c \
+ hw/scsi/mptconfig.c \
+ hw/scsi/mptendian.c \
+ hw/scsi/mptsas.c \
hw/scsi/scsi-bus.c \
hw/scsi/scsi-disk.c \
hw/scsi/scsi-generic.c \
hw/scsi/vmw_pvscsi.c \
+ hw/sd/core.c \
+ hw/sd/sd.c \
+ hw/sd/sdhci.c \
hw/usb/bus.c \
- hw/usb/ccid-card-passthru.c \
hw/usb/combined-packet.c \
hw/usb/core.c \
hw/usb/desc-msos.c \
@@ -163,29 +207,52 @@
hw/usb/dev-uas.c \
hw/usb/dev-wacom.c \
hw/usb/hcd-ehci-pci.c \
- hw/usb/hcd-ehci-sysbus.c \
hw/usb/hcd-ehci.c \
hw/usb/hcd-ohci.c \
hw/usb/hcd-uhci.c \
hw/usb/hcd-xhci.c \
hw/usb/host-stub.c \
hw/usb/libhw.c \
- hw/virtio/dataplane/vring.c \
hw/virtio/virtio-bus.c \
hw/virtio/virtio-mmio.c \
hw/virtio/virtio-pci.c \
hw/virtio/virtio-rng.c \
hw/watchdog/watchdog.c \
hw/watchdog/wdt_i6300esb.c \
+ io/channel-buffer.c \
+ io/channel-command.c \
+ io/channel-file.c \
+ io/channel-socket.c \
+ io/channel-tls.c \
+ io/channel-util.c \
+ io/channel-watch.c \
+ io/channel-websock.c \
+ io/channel.c \
+ io/task.c \
iohandler.c \
iothread.c \
main-loop.c \
- migration-tcp.c \
- migration.c \
- nbd.c \
+ migration/block.c \
+ migration/exec.c \
+ migration/fd.c \
+ migration/migration.c \
+ migration/postcopy-ram.c \
+ migration/qemu-file-channel.c \
+ migration/qemu-file.c \
+ migration/qjson.c \
+ migration/socket.c \
+ migration/tls.c \
+ migration/vmstate.c \
+ migration/xbzrle.c \
+ nbd/client.c \
+ nbd/common.c \
+ nbd/server.c \
net/checksum.c \
net/dump.c \
net/eth.c \
+ net/filter-buffer.c \
+ net/filter-mirror.c \
+ net/filter.c \
net/hub.c \
net/net.c \
net/queue.c \
@@ -195,15 +262,7 @@
page_cache.c \
qdev-monitor.c \
qemu-char.c \
- qemu-coroutine-io.c \
- qemu-coroutine-lock.c \
- qemu-coroutine-sleep.c \
- qemu-coroutine.c \
- qemu-file-stdio.c \
- qemu-file-unix.c \
- qemu-file.c \
qemu-io-cmds.c \
- qemu-log.c \
qemu-timer.c \
qmp.c \
qom/container.c \
@@ -211,16 +270,27 @@
qom/object.c \
qom/object_interfaces.c \
qom/qom-qobject.c \
+ replay/replay-char.c \
+ replay/replay-events.c \
+ replay/replay-input.c \
+ replay/replay-internal.c \
+ replay/replay-time.c \
+ replay/replay.c \
slirp/arp_table.c \
slirp/bootp.c \
slirp/cksum.c \
+ slirp/dhcpv6.c \
slirp/dnssearch.c \
slirp/if.c \
+ slirp/ip6_icmp.c \
+ slirp/ip6_input.c \
+ slirp/ip6_output.c \
slirp/ip_icmp.c \
slirp/ip_input.c \
slirp/ip_output.c \
slirp/mbuf.c \
slirp/misc.c \
+ slirp/ndp_table.c \
slirp/sbuf.c \
slirp/slirp.c \
slirp/socket.c \
@@ -230,38 +300,34 @@
slirp/tcp_timer.c \
slirp/tftp.c \
slirp/udp.c \
+ slirp/udp6.c \
tcg-runtime.c \
thread-pool.c \
tpm.c \
ui/console.c \
ui/cursor.c \
- ui/d3des.c \
ui/input-keymap.c \
ui/input-legacy.c \
ui/input.c \
ui/keymaps.c \
ui/qemu-pixman.c \
+ ui/vnc-auth-vencrypt.c \
ui/vnc-enc-hextile.c \
ui/vnc-enc-tight.c \
ui/vnc-enc-zlib.c \
ui/vnc-enc-zrle.c \
ui/vnc-jobs.c \
ui/vnc-palette.c \
+ ui/vnc-ws.c \
ui/vnc.c \
ui/x_keymap.c \
- vmstate.c \
- xbzrle.c \
QEMU2_COMMON_SOURCES_darwin-x86_64 := \
aio-posix.c \
audio/coreaudio.c \
backends/rng-random.c \
block/raw-posix.c \
- coroutine-sigaltstack.c \
hw/usb/dev-mtp.c \
- migration-exec.c \
- migration-fd.c \
- migration-unix.c \
net/tap-bsd.c \
net/tap.c \
net/vhost-user.c \
@@ -274,38 +340,35 @@
backends/hostmem-file.c \
backends/rng-random.c \
block/raw-posix.c \
- coroutine-ucontext.c \
fsdev/qemu-fsdev-dummy.c \
fsdev/qemu-fsdev-opts.c \
+ hw/input/virtio-input-host.c \
hw/tpm/tpm_passthrough.c \
+ hw/tpm/tpm_util.c \
hw/usb/dev-mtp.c \
- migration-exec.c \
- migration-fd.c \
- migration-unix.c \
net/tap-linux.c \
net/tap.c \
net/vhost-user.c \
os-posix.c \
+ ui/input-linux.c \
QEMU2_COMMON_SOURCES_windows-x86 := \
aio-win32.c \
audio/audio_win_int.c \
+ audio/dsoundaudio.c \
audio/winaudio.c \
- audio/winwaveaudio.c \
block/raw-win32.c \
block/win32-aio.c \
- coroutine-win32.c \
net/tap-win32.c \
os-win32.c \
QEMU2_COMMON_SOURCES_windows-x86_64 := \
aio-win32.c \
audio/audio_win_int.c \
+ audio/dsoundaudio.c \
audio/winaudio.c \
- audio/winwaveaudio.c \
block/raw-win32.c \
block/win32-aio.c \
- coroutine-win32.c \
net/tap-win32.c \
os-win32.c \
@@ -313,10 +376,10 @@
arch_init.c \
balloon.c \
bootdevice.c \
+ cpu-exec-common.c \
cpu-exec.c \
cpus.c \
cputlb.c \
- device_tree.c \
disas.c \
dump.c \
exec.c \
@@ -325,7 +388,13 @@
hw/block/dataplane/virtio-blk.c \
hw/block/virtio-blk.c \
hw/char/virtio-serial-bus.c \
+ hw/core/nmi.c \
+ hw/cpu/core.c \
hw/display/vga.c \
+ hw/display/virtio-gpu-3d.c \
+ hw/display/virtio-gpu-pci.c \
+ hw/display/virtio-gpu.c \
+ hw/misc/edu.c \
hw/net/vhost_net.c \
hw/net/virtio-net.c \
hw/scsi/virtio-scsi-dataplane.c \
@@ -335,23 +404,34 @@
ioport.c \
memory.c \
memory_mapping.c \
+ migration/ram.c \
+ migration/savevm.c \
monitor.c \
numa.c \
qtest.c \
- savevm.c \
tcg/optimize.c \
- tcg/tcg.c \
+ tcg/tcg-common.c \
tcg/tcg-op.c \
+ tcg/tcg.c \
+ trace/control-target.c \
translate-all.c \
- vl.c \
+ translate-common.c \
xen-common-stub.c \
xen-hvm-stub.c \
QEMU2_TARGET_aarch64_SOURCES := \
+ disas/arm-a64.cc \
disas/arm.c \
- hax-stub.c \
+ disas/libvixl/vixl/a64/decoder-a64.cc \
+ disas/libvixl/vixl/a64/disasm-a64.cc \
+ disas/libvixl/vixl/a64/instructions-a64.cc \
+ disas/libvixl/vixl/compiler-intrinsics.cc \
+ disas/libvixl/vixl/utils.cc \
hw/arm/allwinner-a10.c \
hw/arm/armv7m.c \
+ hw/arm/ast2400.c \
+ hw/arm/bcm2835_peripherals.c \
+ hw/arm/bcm2836.c \
hw/arm/boot.c \
hw/arm/collie.c \
hw/arm/cubieboard.c \
@@ -359,29 +439,42 @@
hw/arm/digic_boards.c \
hw/arm/exynos4210.c \
hw/arm/exynos4_boards.c \
+ hw/arm/fsl-imx25.c \
+ hw/arm/fsl-imx31.c \
+ hw/arm/fsl-imx6.c \
hw/arm/gumstix.c \
hw/arm/highbank.c \
+ hw/arm/imx25_pdk.c \
hw/arm/integratorcp.c \
hw/arm/kzm.c \
hw/arm/mainstone.c \
hw/arm/musicpal.c \
+ hw/arm/netduino2.c \
hw/arm/nseries.c \
hw/arm/omap1.c \
hw/arm/omap2.c \
hw/arm/omap_sx1.c \
hw/arm/palm.c \
+ hw/arm/palmetto-bmc.c \
hw/arm/pxa2xx.c \
hw/arm/pxa2xx_gpio.c \
hw/arm/pxa2xx_pic.c \
+ hw/arm/raspi.c \
hw/arm/realview.c \
+ hw/arm/sabrelite.c \
hw/arm/spitz.c \
hw/arm/stellaris.c \
+ hw/arm/stm32f205_soc.c \
hw/arm/strongarm.c \
+ hw/arm/sysbus-fdt.c \
hw/arm/tosa.c \
hw/arm/versatilepb.c \
hw/arm/vexpress.c \
+ hw/arm/virt-acpi-build.c \
hw/arm/virt.c \
hw/arm/xilinx_zynq.c \
+ hw/arm/xlnx-ep108.c \
+ hw/arm/xlnx-zynqmp.c \
hw/arm/z2.c \
hw/audio/lm4549.c \
hw/audio/marvell_88w8618.c \
@@ -392,20 +485,26 @@
hw/block/nand.c \
hw/block/onenand.c \
hw/block/pflash_cfi02.c \
+ hw/char/bcm2835_aux.c \
hw/char/cadence_uart.c \
hw/char/digic-uart.c \
hw/char/exynos4210_uart.c \
hw/char/imx_serial.c \
hw/char/omap_uart.c \
hw/char/pl011.c \
+ hw/char/stm32f2xx_usart.c \
+ hw/core/platform-bus.c \
hw/core/ptimer.c \
hw/cpu/a15mpcore.c \
hw/cpu/a9mpcore.c \
hw/cpu/arm11mpcore.c \
hw/cpu/realview_mpcore.c \
hw/display/ads7846.c \
+ hw/display/bcm2835_fb.c \
hw/display/blizzard.c \
+ hw/display/dpcd.c \
hw/display/exynos4210_fimd.c \
+ hw/display/framebuffer.c \
hw/display/omap_dss.c \
hw/display/omap_lcdc.c \
hw/display/pl110.c \
@@ -413,17 +512,26 @@
hw/display/ssd0303.c \
hw/display/ssd0323.c \
hw/display/tc6393xb.c \
+ hw/display/xlnx_dp.c \
+ hw/dma/bcm2835_dma.c \
hw/dma/omap_dma.c \
hw/dma/pl080.c \
hw/dma/pl330.c \
hw/dma/pxa2xx_dma.c \
hw/dma/soc_dma.c \
+ hw/dma/xlnx-zynq-devcfg.c \
+ hw/dma/xlnx_dpdma.c \
+ hw/gpio/gpio_key.c \
+ hw/gpio/imx_gpio.c \
hw/gpio/max7310.c \
hw/gpio/omap_gpio.c \
hw/gpio/pl061.c \
hw/gpio/zaurus.c \
+ hw/i2c/aspeed_i2c.c \
hw/i2c/bitbang_i2c.c \
hw/i2c/exynos4210_i2c.c \
+ hw/i2c/i2c-ddc.c \
+ hw/i2c/imx_i2c.c \
hw/i2c/omap_i2c.c \
hw/i2c/versatile_i2c.c \
hw/ide/microdrive.c \
@@ -436,7 +544,16 @@
hw/intc/allwinner-a10-pic.c \
hw/intc/arm_gic.c \
hw/intc/arm_gic_common.c \
+ hw/intc/arm_gicv2m.c \
+ hw/intc/arm_gicv3.c \
+ hw/intc/arm_gicv3_common.c \
+ hw/intc/arm_gicv3_cpuif.c \
+ hw/intc/arm_gicv3_dist.c \
+ hw/intc/arm_gicv3_redist.c \
hw/intc/armv7m_nvic.c \
+ hw/intc/aspeed_vic.c \
+ hw/intc/bcm2835_ic.c \
+ hw/intc/bcm2836_control.c \
hw/intc/exynos4210_combiner.c \
hw/intc/exynos4210_gic.c \
hw/intc/imx_avic.c \
@@ -448,8 +565,16 @@
hw/misc/arm_integrator_debug.c \
hw/misc/arm_l2x0.c \
hw/misc/arm_sysctl.c \
+ hw/misc/aspeed_scu.c \
+ hw/misc/auxbus.c \
+ hw/misc/bcm2835_mbox.c \
+ hw/misc/bcm2835_property.c \
hw/misc/cbus.c \
hw/misc/exynos4210_pmu.c \
+ hw/misc/imx25_ccm.c \
+ hw/misc/imx31_ccm.c \
+ hw/misc/imx6_ccm.c \
+ hw/misc/imx6_src.c \
hw/misc/imx_ccm.c \
hw/misc/max111x.c \
hw/misc/mst_fpga.c \
@@ -458,22 +583,31 @@
hw/misc/omap_l4.c \
hw/misc/omap_sdrc.c \
hw/misc/omap_tap.c \
+ hw/misc/stm32f2xx_syscfg.c \
hw/misc/tmp105.c \
+ hw/misc/zynq-xadc.c \
hw/misc/zynq_slcr.c \
hw/net/allwinner_emac.c \
hw/net/cadence_gem.c \
+ hw/net/imx_fec.c \
hw/net/lan9118.c \
hw/net/smc91c111.c \
hw/net/stellaris_enet.c \
hw/net/xgmac.c \
+ hw/pci-bridge/i82801b11.c \
+ hw/pci-bridge/ioh3420.c \
+ hw/pci-bridge/xio3130_downstream.c \
+ hw/pci-bridge/xio3130_upstream.c \
+ hw/pci-host/gpex.c \
hw/pci-host/versatile.c \
hw/pcmcia/pxa2xx.c \
hw/sd/omap_mmc.c \
hw/sd/pl181.c \
hw/sd/pxa2xx_mmci.c \
- hw/sd/sd.c \
- hw/sd/sdhci.c \
hw/sd/ssi-sd.c \
+ hw/smbios/smbios.c \
+ hw/ssi/aspeed_smc.c \
+ hw/ssi/imx_spi.c \
hw/ssi/omap_spi.c \
hw/ssi/pl022.c \
hw/ssi/ssi.c \
@@ -482,6 +616,7 @@
hw/timer/allwinner-a10-pit.c \
hw/timer/arm_mptimer.c \
hw/timer/arm_timer.c \
+ hw/timer/aspeed_timer.c \
hw/timer/cadence_ttc.c \
hw/timer/digic-timer.c \
hw/timer/ds1338.c \
@@ -494,10 +629,14 @@
hw/timer/omap_synctimer.c \
hw/timer/pl031.c \
hw/timer/pxa2xx_timer.c \
- hw/timer/tusb6010.c \
+ hw/timer/stm32f2xx_timer.c \
hw/timer/twl92230.c \
+ hw/usb/hcd-ehci-sysbus.c \
hw/usb/hcd-musb.c \
+ hw/usb/tusb6010.c \
kvm-stub.c \
+ target-arm/arch_dump.c \
+ target-arm/arm-powerctl.c \
target-arm/arm-semi.c \
target-arm/cpu.c \
target-arm/cpu64.c \
@@ -509,6 +648,7 @@
target-arm/iwmmxt_helper.c \
target-arm/kvm-stub.c \
target-arm/machine.c \
+ target-arm/monitor.c \
target-arm/neon_helper.c \
target-arm/op_helper.c \
target-arm/psci.c \
@@ -516,10 +656,18 @@
target-arm/translate.c \
QEMU2_TARGET_arm_SOURCES := \
+ disas/arm-a64.cc \
disas/arm.c \
- hax-stub.c \
+ disas/libvixl/vixl/a64/decoder-a64.cc \
+ disas/libvixl/vixl/a64/disasm-a64.cc \
+ disas/libvixl/vixl/a64/instructions-a64.cc \
+ disas/libvixl/vixl/compiler-intrinsics.cc \
+ disas/libvixl/vixl/utils.cc \
hw/arm/allwinner-a10.c \
hw/arm/armv7m.c \
+ hw/arm/ast2400.c \
+ hw/arm/bcm2835_peripherals.c \
+ hw/arm/bcm2836.c \
hw/arm/boot.c \
hw/arm/collie.c \
hw/arm/cubieboard.c \
@@ -527,27 +675,38 @@
hw/arm/digic_boards.c \
hw/arm/exynos4210.c \
hw/arm/exynos4_boards.c \
+ hw/arm/fsl-imx25.c \
+ hw/arm/fsl-imx31.c \
+ hw/arm/fsl-imx6.c \
hw/arm/gumstix.c \
hw/arm/highbank.c \
+ hw/arm/imx25_pdk.c \
hw/arm/integratorcp.c \
hw/arm/kzm.c \
hw/arm/mainstone.c \
hw/arm/musicpal.c \
+ hw/arm/netduino2.c \
hw/arm/nseries.c \
hw/arm/omap1.c \
hw/arm/omap2.c \
hw/arm/omap_sx1.c \
hw/arm/palm.c \
+ hw/arm/palmetto-bmc.c \
hw/arm/pxa2xx.c \
hw/arm/pxa2xx_gpio.c \
hw/arm/pxa2xx_pic.c \
+ hw/arm/raspi.c \
hw/arm/realview.c \
+ hw/arm/sabrelite.c \
hw/arm/spitz.c \
hw/arm/stellaris.c \
+ hw/arm/stm32f205_soc.c \
hw/arm/strongarm.c \
+ hw/arm/sysbus-fdt.c \
hw/arm/tosa.c \
hw/arm/versatilepb.c \
hw/arm/vexpress.c \
+ hw/arm/virt-acpi-build.c \
hw/arm/virt.c \
hw/arm/xilinx_zynq.c \
hw/arm/z2.c \
@@ -560,20 +719,25 @@
hw/block/nand.c \
hw/block/onenand.c \
hw/block/pflash_cfi02.c \
+ hw/char/bcm2835_aux.c \
hw/char/cadence_uart.c \
hw/char/digic-uart.c \
hw/char/exynos4210_uart.c \
hw/char/imx_serial.c \
hw/char/omap_uart.c \
hw/char/pl011.c \
+ hw/char/stm32f2xx_usart.c \
+ hw/core/platform-bus.c \
hw/core/ptimer.c \
hw/cpu/a15mpcore.c \
hw/cpu/a9mpcore.c \
hw/cpu/arm11mpcore.c \
hw/cpu/realview_mpcore.c \
hw/display/ads7846.c \
+ hw/display/bcm2835_fb.c \
hw/display/blizzard.c \
hw/display/exynos4210_fimd.c \
+ hw/display/framebuffer.c \
hw/display/omap_dss.c \
hw/display/omap_lcdc.c \
hw/display/pl110.c \
@@ -581,17 +745,23 @@
hw/display/ssd0303.c \
hw/display/ssd0323.c \
hw/display/tc6393xb.c \
+ hw/dma/bcm2835_dma.c \
hw/dma/omap_dma.c \
hw/dma/pl080.c \
hw/dma/pl330.c \
hw/dma/pxa2xx_dma.c \
hw/dma/soc_dma.c \
+ hw/dma/xlnx-zynq-devcfg.c \
+ hw/gpio/gpio_key.c \
+ hw/gpio/imx_gpio.c \
hw/gpio/max7310.c \
hw/gpio/omap_gpio.c \
hw/gpio/pl061.c \
hw/gpio/zaurus.c \
+ hw/i2c/aspeed_i2c.c \
hw/i2c/bitbang_i2c.c \
hw/i2c/exynos4210_i2c.c \
+ hw/i2c/imx_i2c.c \
hw/i2c/omap_i2c.c \
hw/i2c/versatile_i2c.c \
hw/ide/microdrive.c \
@@ -604,7 +774,16 @@
hw/intc/allwinner-a10-pic.c \
hw/intc/arm_gic.c \
hw/intc/arm_gic_common.c \
+ hw/intc/arm_gicv2m.c \
+ hw/intc/arm_gicv3.c \
+ hw/intc/arm_gicv3_common.c \
+ hw/intc/arm_gicv3_cpuif.c \
+ hw/intc/arm_gicv3_dist.c \
+ hw/intc/arm_gicv3_redist.c \
hw/intc/armv7m_nvic.c \
+ hw/intc/aspeed_vic.c \
+ hw/intc/bcm2835_ic.c \
+ hw/intc/bcm2836_control.c \
hw/intc/exynos4210_combiner.c \
hw/intc/exynos4210_gic.c \
hw/intc/imx_avic.c \
@@ -616,8 +795,15 @@
hw/misc/arm_integrator_debug.c \
hw/misc/arm_l2x0.c \
hw/misc/arm_sysctl.c \
+ hw/misc/aspeed_scu.c \
+ hw/misc/bcm2835_mbox.c \
+ hw/misc/bcm2835_property.c \
hw/misc/cbus.c \
hw/misc/exynos4210_pmu.c \
+ hw/misc/imx25_ccm.c \
+ hw/misc/imx31_ccm.c \
+ hw/misc/imx6_ccm.c \
+ hw/misc/imx6_src.c \
hw/misc/imx_ccm.c \
hw/misc/max111x.c \
hw/misc/mst_fpga.c \
@@ -626,22 +812,31 @@
hw/misc/omap_l4.c \
hw/misc/omap_sdrc.c \
hw/misc/omap_tap.c \
+ hw/misc/stm32f2xx_syscfg.c \
hw/misc/tmp105.c \
+ hw/misc/zynq-xadc.c \
hw/misc/zynq_slcr.c \
hw/net/allwinner_emac.c \
hw/net/cadence_gem.c \
+ hw/net/imx_fec.c \
hw/net/lan9118.c \
hw/net/smc91c111.c \
hw/net/stellaris_enet.c \
hw/net/xgmac.c \
+ hw/pci-bridge/i82801b11.c \
+ hw/pci-bridge/ioh3420.c \
+ hw/pci-bridge/xio3130_downstream.c \
+ hw/pci-bridge/xio3130_upstream.c \
+ hw/pci-host/gpex.c \
hw/pci-host/versatile.c \
hw/pcmcia/pxa2xx.c \
hw/sd/omap_mmc.c \
hw/sd/pl181.c \
hw/sd/pxa2xx_mmci.c \
- hw/sd/sd.c \
- hw/sd/sdhci.c \
hw/sd/ssi-sd.c \
+ hw/smbios/smbios.c \
+ hw/ssi/aspeed_smc.c \
+ hw/ssi/imx_spi.c \
hw/ssi/omap_spi.c \
hw/ssi/pl022.c \
hw/ssi/ssi.c \
@@ -650,6 +845,7 @@
hw/timer/allwinner-a10-pit.c \
hw/timer/arm_mptimer.c \
hw/timer/arm_timer.c \
+ hw/timer/aspeed_timer.c \
hw/timer/cadence_ttc.c \
hw/timer/digic-timer.c \
hw/timer/ds1338.c \
@@ -662,10 +858,14 @@
hw/timer/omap_synctimer.c \
hw/timer/pl031.c \
hw/timer/pxa2xx_timer.c \
- hw/timer/tusb6010.c \
+ hw/timer/stm32f2xx_timer.c \
hw/timer/twl92230.c \
+ hw/usb/hcd-ehci-sysbus.c \
hw/usb/hcd-musb.c \
+ hw/usb/tusb6010.c \
kvm-stub.c \
+ target-arm/arch_dump.c \
+ target-arm/arm-powerctl.c \
target-arm/arm-semi.c \
target-arm/cpu.c \
target-arm/crypto_helper.c \
@@ -674,19 +874,24 @@
target-arm/iwmmxt_helper.c \
target-arm/kvm-stub.c \
target-arm/machine.c \
+ target-arm/monitor.c \
target-arm/neon_helper.c \
target-arm/op_helper.c \
target-arm/psci.c \
target-arm/translate.c \
QEMU2_TARGET_i386_SOURCES := \
- hw/acpi/acpi_interface.c \
hw/acpi/core.c \
+ hw/acpi/cpu.c \
hw/acpi/cpu_hotplug.c \
hw/acpi/ich9.c \
+ hw/acpi/ipmi.c \
hw/acpi/memory_hotplug.c \
+ hw/acpi/memory_hotplug_acpi_table.c \
+ hw/acpi/nvdimm.c \
hw/acpi/pcihp.c \
hw/acpi/piix4.c \
+ hw/acpi/tco.c \
hw/audio/adlib.c \
hw/audio/cs4231a.c \
hw/audio/fmopl.c \
@@ -698,24 +903,21 @@
hw/block/fdc.c \
hw/char/debugcon.c \
hw/char/parallel.c \
- hw/cpu/icc_bus.c \
hw/display/cirrus_vga.c \
hw/display/vga-isa.c \
- hw/display/vga-pci.c \
+ hw/display/virtio-vga.c \
hw/display/vmware_vga.c \
hw/dma/i8257.c \
hw/i2c/pm_smbus.c \
hw/i2c/smbus_ich9.c \
- hw/i386/acpi-build.c \
- hw/i386/bios-linker-loader.c \
hw/i386/intel_iommu.c \
hw/i386/kvmvapic.c \
hw/i386/multiboot.c \
hw/i386/pc.c \
- hw/i386/pc_piix.c \
hw/i386/pc_q35.c \
hw/i386/pc_sysfw.c \
- hw/i386/smbios.c \
+ hw/i386/pci-assign-load-rom.c \
+ hw/i386/x86-iommu.c \
hw/ide/isa.c \
hw/ide/piix.c \
hw/input/pckbd.c \
@@ -726,8 +928,14 @@
hw/intc/i8259_common.c \
hw/intc/ioapic.c \
hw/intc/ioapic_common.c \
+ hw/ipmi/ipmi.c \
+ hw/ipmi/ipmi_bmc_extern.c \
+ hw/ipmi/ipmi_bmc_sim.c \
+ hw/ipmi/isa_ipmi_bt.c \
+ hw/ipmi/isa_ipmi_kcs.c \
hw/isa/apm.c \
hw/isa/lpc_ich9.c \
+ hw/mem/nvdimm.c \
hw/mem/pc-dimm.c \
hw/misc/applesmc.c \
hw/misc/debugexit.c \
@@ -736,9 +944,14 @@
hw/misc/sga.c \
hw/misc/vmport.c \
hw/net/ne2000-isa.c \
+ hw/pci-bridge/i82801b11.c \
+ hw/pci-bridge/ioh3420.c \
+ hw/pci-bridge/xio3130_downstream.c \
+ hw/pci-bridge/xio3130_upstream.c \
hw/pci-host/piix.c \
hw/pci-host/q35.c \
- hw/pci/pci-hotplug-old.c \
+ hw/smbios/smbios.c \
+ hw/smbios/smbios_type_38.c \
hw/timer/hpet.c \
hw/timer/i8254.c \
hw/timer/i8254_common.c \
@@ -747,6 +960,7 @@
hw/watchdog/wdt_ib700.c \
target-i386/arch_dump.c \
target-i386/arch_memory_mapping.c \
+ target-i386/bpt_helper.c \
target-i386/cc_helper.c \
target-i386/cpu.c \
target-i386/excp_helper.c \
@@ -757,6 +971,8 @@
target-i386/machine.c \
target-i386/mem_helper.c \
target-i386/misc_helper.c \
+ target-i386/monitor.c \
+ target-i386/mpx_helper.c \
target-i386/seg_helper.c \
target-i386/smm_helper.c \
target-i386/svm_helper.c \
@@ -764,12 +980,11 @@
QEMU2_TARGET_mips64el_SOURCES := \
disas/mips.c \
- hax-stub.c \
- hw/acpi/acpi_interface.c \
hw/acpi/core.c \
+ hw/acpi/cpu.c \
hw/acpi/cpu_hotplug.c \
- hw/acpi/ich9.c \
hw/acpi/memory_hotplug.c \
+ hw/acpi/memory_hotplug_acpi_table.c \
hw/acpi/pcihp.c \
hw/acpi/piix4.c \
hw/audio/adlib.c \
@@ -788,7 +1003,6 @@
hw/display/jazz_led.c \
hw/display/vga-isa-mm.c \
hw/display/vga-isa.c \
- hw/display/vga-pci.c \
hw/display/vmware_vga.c \
hw/dma/i8257.c \
hw/dma/rc4030.c \
@@ -800,10 +1014,12 @@
hw/input/pckbd.c \
hw/intc/i8259.c \
hw/intc/i8259_common.c \
+ hw/intc/mips_gic.c \
hw/isa/apm.c \
hw/isa/piix4.c \
hw/isa/vt82c686.c \
hw/mips/addr.c \
+ hw/mips/cps.c \
hw/mips/cputimer.c \
hw/mips/gt64xxx_pci.c \
hw/mips/mips_fulong2e.c \
@@ -812,6 +1028,9 @@
hw/mips/mips_malta.c \
hw/mips/mips_mipssim.c \
hw/mips/mips_r4k.c \
+ hw/misc/mips_cmgcr.c \
+ hw/misc/mips_cpc.c \
+ hw/misc/mips_itu.c \
hw/misc/pc-testdev.c \
hw/net/dp8393x.c \
hw/net/mipsnet.c \
@@ -821,6 +1040,7 @@
hw/timer/i8254.c \
hw/timer/i8254_common.c \
hw/timer/mc146818rtc.c \
+ hw/timer/mips_gictimer.c \
kvm-stub.c \
target-mips/cpu.c \
target-mips/dsp_helper.c \
@@ -828,18 +1048,18 @@
target-mips/helper.c \
target-mips/lmi_helper.c \
target-mips/machine.c \
+ target-mips/mips-semi.c \
target-mips/msa_helper.c \
target-mips/op_helper.c \
target-mips/translate.c \
QEMU2_TARGET_mipsel_SOURCES := \
disas/mips.c \
- hax-stub.c \
- hw/acpi/acpi_interface.c \
hw/acpi/core.c \
+ hw/acpi/cpu.c \
hw/acpi/cpu_hotplug.c \
- hw/acpi/ich9.c \
hw/acpi/memory_hotplug.c \
+ hw/acpi/memory_hotplug_acpi_table.c \
hw/acpi/pcihp.c \
hw/acpi/piix4.c \
hw/audio/adlib.c \
@@ -854,14 +1074,10 @@
hw/char/parallel.c \
hw/core/empty_slot.c \
hw/display/cirrus_vga.c \
- hw/display/g364fb.c \
- hw/display/jazz_led.c \
hw/display/vga-isa-mm.c \
hw/display/vga-isa.c \
- hw/display/vga-pci.c \
hw/display/vmware_vga.c \
hw/dma/i8257.c \
- hw/dma/rc4030.c \
hw/i2c/pm_smbus.c \
hw/i2c/smbus_ich9.c \
hw/ide/isa.c \
@@ -869,24 +1085,27 @@
hw/input/pckbd.c \
hw/intc/i8259.c \
hw/intc/i8259_common.c \
+ hw/intc/mips_gic.c \
hw/isa/apm.c \
hw/isa/piix4.c \
hw/mips/addr.c \
+ hw/mips/cps.c \
hw/mips/cputimer.c \
hw/mips/gt64xxx_pci.c \
hw/mips/mips_int.c \
- hw/mips/mips_jazz.c \
hw/mips/mips_malta.c \
hw/mips/mips_mipssim.c \
hw/mips/mips_r4k.c \
+ hw/misc/mips_cmgcr.c \
+ hw/misc/mips_cpc.c \
+ hw/misc/mips_itu.c \
hw/misc/pc-testdev.c \
- hw/net/dp8393x.c \
hw/net/mipsnet.c \
hw/net/ne2000-isa.c \
- hw/nvram/ds1225y.c \
hw/timer/i8254.c \
hw/timer/i8254_common.c \
hw/timer/mc146818rtc.c \
+ hw/timer/mips_gictimer.c \
kvm-stub.c \
target-mips/cpu.c \
target-mips/dsp_helper.c \
@@ -894,18 +1113,23 @@
target-mips/helper.c \
target-mips/lmi_helper.c \
target-mips/machine.c \
+ target-mips/mips-semi.c \
target-mips/msa_helper.c \
target-mips/op_helper.c \
target-mips/translate.c \
QEMU2_TARGET_x86_64_SOURCES := \
- hw/acpi/acpi_interface.c \
hw/acpi/core.c \
+ hw/acpi/cpu.c \
hw/acpi/cpu_hotplug.c \
hw/acpi/ich9.c \
+ hw/acpi/ipmi.c \
hw/acpi/memory_hotplug.c \
+ hw/acpi/memory_hotplug_acpi_table.c \
+ hw/acpi/nvdimm.c \
hw/acpi/pcihp.c \
hw/acpi/piix4.c \
+ hw/acpi/tco.c \
hw/audio/adlib.c \
hw/audio/cs4231a.c \
hw/audio/fmopl.c \
@@ -917,24 +1141,21 @@
hw/block/fdc.c \
hw/char/debugcon.c \
hw/char/parallel.c \
- hw/cpu/icc_bus.c \
hw/display/cirrus_vga.c \
hw/display/vga-isa.c \
- hw/display/vga-pci.c \
+ hw/display/virtio-vga.c \
hw/display/vmware_vga.c \
hw/dma/i8257.c \
hw/i2c/pm_smbus.c \
hw/i2c/smbus_ich9.c \
- hw/i386/acpi-build.c \
- hw/i386/bios-linker-loader.c \
hw/i386/intel_iommu.c \
hw/i386/kvmvapic.c \
hw/i386/multiboot.c \
hw/i386/pc.c \
- hw/i386/pc_piix.c \
hw/i386/pc_q35.c \
hw/i386/pc_sysfw.c \
- hw/i386/smbios.c \
+ hw/i386/pci-assign-load-rom.c \
+ hw/i386/x86-iommu.c \
hw/ide/isa.c \
hw/ide/piix.c \
hw/input/pckbd.c \
@@ -945,8 +1166,14 @@
hw/intc/i8259_common.c \
hw/intc/ioapic.c \
hw/intc/ioapic_common.c \
+ hw/ipmi/ipmi.c \
+ hw/ipmi/ipmi_bmc_extern.c \
+ hw/ipmi/ipmi_bmc_sim.c \
+ hw/ipmi/isa_ipmi_bt.c \
+ hw/ipmi/isa_ipmi_kcs.c \
hw/isa/apm.c \
hw/isa/lpc_ich9.c \
+ hw/mem/nvdimm.c \
hw/mem/pc-dimm.c \
hw/misc/applesmc.c \
hw/misc/debugexit.c \
@@ -955,9 +1182,14 @@
hw/misc/sga.c \
hw/misc/vmport.c \
hw/net/ne2000-isa.c \
+ hw/pci-bridge/i82801b11.c \
+ hw/pci-bridge/ioh3420.c \
+ hw/pci-bridge/xio3130_downstream.c \
+ hw/pci-bridge/xio3130_upstream.c \
hw/pci-host/piix.c \
hw/pci-host/q35.c \
- hw/pci/pci-hotplug-old.c \
+ hw/smbios/smbios.c \
+ hw/smbios/smbios_type_38.c \
hw/timer/hpet.c \
hw/timer/i8254.c \
hw/timer/i8254_common.c \
@@ -966,6 +1198,7 @@
hw/watchdog/wdt_ib700.c \
target-i386/arch_dump.c \
target-i386/arch_memory_mapping.c \
+ target-i386/bpt_helper.c \
target-i386/cc_helper.c \
target-i386/cpu.c \
target-i386/excp_helper.c \
@@ -976,6 +1209,8 @@
target-i386/machine.c \
target-i386/mem_helper.c \
target-i386/misc_helper.c \
+ target-i386/monitor.c \
+ target-i386/mpx_helper.c \
target-i386/seg_helper.c \
target-i386/smm_helper.c \
target-i386/svm_helper.c \
@@ -984,8 +1219,15 @@
QEMU2_TARGET_aarch64_SOURCES_darwin-x86_64 := \
QEMU2_TARGET_aarch64_SOURCES_linux-x86_64 := \
- hw/misc/vfio.c \
+ hw/misc/ivshmem.c \
hw/scsi/vhost-scsi.c \
+ hw/vfio/amd-xgbe.c \
+ hw/vfio/calxeda-xgmac.c \
+ hw/vfio/common.c \
+ hw/vfio/pci-quirks.c \
+ hw/vfio/pci.c \
+ hw/vfio/platform.c \
+ hw/vfio/spapr.c \
hw/virtio/vhost-backend.c \
hw/virtio/vhost-user.c \
hw/virtio/vhost.c \
@@ -997,8 +1239,15 @@
QEMU2_TARGET_arm_SOURCES_darwin-x86_64 := \
QEMU2_TARGET_arm_SOURCES_linux-x86_64 := \
- hw/misc/vfio.c \
+ hw/misc/ivshmem.c \
hw/scsi/vhost-scsi.c \
+ hw/vfio/amd-xgbe.c \
+ hw/vfio/calxeda-xgmac.c \
+ hw/vfio/common.c \
+ hw/vfio/pci-quirks.c \
+ hw/vfio/pci.c \
+ hw/vfio/platform.c \
+ hw/vfio/spapr.c \
hw/virtio/vhost-backend.c \
hw/virtio/vhost-user.c \
hw/virtio/vhost.c \
@@ -1009,47 +1258,52 @@
QEMU2_TARGET_i386_SOURCES_darwin-x86_64 := \
kvm-stub.c \
- target-i386/hax-all.c \
- target-i386/hax-darwin.c \
- target-i386/hax-slot.c \
target-i386/kvm-stub.c \
QEMU2_TARGET_i386_SOURCES_linux-x86_64 := \
- hax-stub.c \
hw/i386/kvm/apic.c \
hw/i386/kvm/clock.c \
hw/i386/kvm/i8254.c \
hw/i386/kvm/i8259.c \
hw/i386/kvm/ioapic.c \
hw/i386/kvm/pci-assign.c \
+ hw/misc/hyperv_testdev.c \
hw/misc/ivshmem.c \
- hw/misc/vfio.c \
hw/scsi/vhost-scsi.c \
+ hw/vfio/amd-xgbe.c \
+ hw/vfio/calxeda-xgmac.c \
+ hw/vfio/common.c \
+ hw/vfio/pci-quirks.c \
+ hw/vfio/pci.c \
+ hw/vfio/platform.c \
+ hw/vfio/spapr.c \
hw/virtio/vhost-backend.c \
hw/virtio/vhost-user.c \
hw/virtio/vhost.c \
kvm-all.c \
+ target-i386/hyperv.c \
target-i386/kvm.c \
QEMU2_TARGET_i386_SOURCES_windows-x86 := \
kvm-stub.c \
- target-i386/hax-all.c \
- target-i386/hax-slot.c \
- target-i386/hax-windows.c \
target-i386/kvm-stub.c \
QEMU2_TARGET_i386_SOURCES_windows-x86_64 := \
kvm-stub.c \
- target-i386/hax-all.c \
- target-i386/hax-slot.c \
- target-i386/hax-windows.c \
target-i386/kvm-stub.c \
QEMU2_TARGET_mips64el_SOURCES_darwin-x86_64 := \
QEMU2_TARGET_mips64el_SOURCES_linux-x86_64 := \
- hw/misc/vfio.c \
+ hw/misc/ivshmem.c \
hw/scsi/vhost-scsi.c \
+ hw/vfio/amd-xgbe.c \
+ hw/vfio/calxeda-xgmac.c \
+ hw/vfio/common.c \
+ hw/vfio/pci-quirks.c \
+ hw/vfio/pci.c \
+ hw/vfio/platform.c \
+ hw/vfio/spapr.c \
hw/virtio/vhost-backend.c \
hw/virtio/vhost-user.c \
hw/virtio/vhost.c \
@@ -1061,8 +1315,15 @@
QEMU2_TARGET_mipsel_SOURCES_darwin-x86_64 := \
QEMU2_TARGET_mipsel_SOURCES_linux-x86_64 := \
- hw/misc/vfio.c \
+ hw/misc/ivshmem.c \
hw/scsi/vhost-scsi.c \
+ hw/vfio/amd-xgbe.c \
+ hw/vfio/calxeda-xgmac.c \
+ hw/vfio/common.c \
+ hw/vfio/pci-quirks.c \
+ hw/vfio/pci.c \
+ hw/vfio/platform.c \
+ hw/vfio/spapr.c \
hw/virtio/vhost-backend.c \
hw/virtio/vhost-user.c \
hw/virtio/vhost.c \
@@ -1073,39 +1334,37 @@
QEMU2_TARGET_x86_64_SOURCES_darwin-x86_64 := \
kvm-stub.c \
- target-i386/hax-all.c \
- target-i386/hax-darwin.c \
- target-i386/hax-slot.c \
target-i386/kvm-stub.c \
QEMU2_TARGET_x86_64_SOURCES_linux-x86_64 := \
- hax-stub.c \
hw/i386/kvm/apic.c \
hw/i386/kvm/clock.c \
hw/i386/kvm/i8254.c \
hw/i386/kvm/i8259.c \
hw/i386/kvm/ioapic.c \
hw/i386/kvm/pci-assign.c \
+ hw/misc/hyperv_testdev.c \
hw/misc/ivshmem.c \
- hw/misc/vfio.c \
hw/scsi/vhost-scsi.c \
+ hw/vfio/amd-xgbe.c \
+ hw/vfio/calxeda-xgmac.c \
+ hw/vfio/common.c \
+ hw/vfio/pci-quirks.c \
+ hw/vfio/pci.c \
+ hw/vfio/platform.c \
+ hw/vfio/spapr.c \
hw/virtio/vhost-backend.c \
hw/virtio/vhost-user.c \
hw/virtio/vhost.c \
kvm-all.c \
+ target-i386/hyperv.c \
target-i386/kvm.c \
QEMU2_TARGET_x86_64_SOURCES_windows-x86 := \
kvm-stub.c \
- target-i386/hax-all.c \
- target-i386/hax-slot.c \
- target-i386/hax-windows.c \
target-i386/kvm-stub.c \
QEMU2_TARGET_x86_64_SOURCES_windows-x86_64 := \
kvm-stub.c \
- target-i386/hax-all.c \
- target-i386/hax-slot.c \
- target-i386/hax-windows.c \
target-i386/kvm-stub.c \
diff --git a/android-qemu2-glue/build/Makefile.qemu2-target.mk b/android-qemu2-glue/build/Makefile.qemu2-target.mk
index 621c87c..2c45668 100644
--- a/android-qemu2-glue/build/Makefile.qemu2-target.mk
+++ b/android-qemu2-glue/build/Makefile.qemu2-target.mk
@@ -36,13 +36,14 @@
# If $(QEMU2_TARGET_CPU) is $1, then return $2, or the empty string otherwise.
qemu2-if-target-arch = $(if $(filter $1,$(QEMU2_TARGET_CPU)),$2)
-$(call start-emulator-program,qemu-system-$(QEMU2_TARGET_SYSTEM))
+# A library that contains all QEMU2 target-specific sources, excluding
+# anything implemented by the glue code.
-LOCAL_CFLAGS += \
+QEMU2_SYSTEM_CFLAGS := \
$(QEMU2_CFLAGS) \
- $(EMULATOR_LIBUI_CFLAGS)
+ -DNEED_CPU_H \
-LOCAL_C_INCLUDES += \
+QEMU2_SYSTEM_INCLUDES := \
$(QEMU2_INCLUDES) \
$(QEMU2_DEPS_TOP_DIR)/include \
$(call qemu2-if-linux,$(LOCAL_PATH)/linux-headers) \
@@ -50,15 +51,31 @@
$(LOCAL_PATH)/target-$(QEMU2_TARGET_TARGET) \
$(LOCAL_PATH)/tcg \
$(LOCAL_PATH)/tcg/i386 \
- $(LOCAL_PATH)/target-$(QEMU2_TARGET_TARGET) \
- $(ANDROID_EMU_INCLUDES) \
- $(LIBCURL_INCLUDES) \
- $(EMULATOR_LIBUI_INCLUDES) \
-LOCAL_CFLAGS += -DNEED_CPU_H
+QEMU2_SYSTEM_LDFLAGS := $(QEMU2_DEPS_LDFLAGS)
+
+QEMU2_SYSTEM_LDLIBS := \
+ $(QEMU2_GLIB_LDLIBS) \
+ $(QEMU2_PIXMAN_LDLIBS) \
+ $(CXX_STD_LIB) \
+ -lfdt \
+ $(call qemu2-if-windows, -lvfw32 -ldxguid) \
+ $(call qemu2-if-linux, -lpulse) \
+
+QEMU2_SYSTEM_STATIC_LIBRARIES := \
+ emulator-zlib
+
+$(call start-emulator-library,libqemu2-system-$(QEMU2_TARGET_SYSTEM))
+
+LOCAL_CFLAGS += \
+ $(QEMU2_SYSTEM_CFLAGS) \
+ -DPOISON_CONFIG_ANDROID \
+
+LOCAL_C_INCLUDES += \
+ $(QEMU2_SYSTEM_INCLUDES) \
+ $(call qemu2-if-target,arm arm64,$(LOCAL_PATH)/disas/libvixl) \
LOCAL_SRC_FILES += \
- android-qemu2-glue/main.cpp \
$(QEMU2_TARGET_SOURCES) \
$(QEMU2_TARGET_$(QEMU2_TARGET_CPU)_SOURCES) \
$(QEMU2_TARGET_$(QEMU2_TARGET_CPU)_SOURCES_$(BUILD_TARGET_TAG))
@@ -66,14 +83,16 @@
LOCAL_SRC_FILES += \
hw/audio/goldfish_audio.c \
hw/char/goldfish_tty.c \
- hw/display/framebuffer.c \
hw/display/goldfish_fb.c \
+ $(call qemu2-if-target,arm arm64,, \
+ hw/display/framebuffer.c \
+ ) \
hw/input/goldfish_events.c \
hw/intc/goldfish_pic.c \
- hw/misc/goldfish_pipe.c \
- hw/misc/goldfish_battery.c \
- hw/misc/goldfish_sync.c \
hw/timer/goldfish_timer.c \
+ hw/misc/goldfish_battery.c \
+ hw/misc/goldfish_pipe.c \
+ hw/misc/goldfish_sync.c \
$(call qemu2-if-target,arm arm64,\
hw/arm/ranchu.c) \
$(call qemu2-if-target,mips mips64,\
@@ -88,40 +107,141 @@
endif
LOCAL_SRC_FILES += \
+ stubs/target-get-monitor-def.c \
$(call qemu2-if-target,arm arm64 mips mips64,\
- stubs/dump.c \
- stubs/pci-drive-hot-add.c \
stubs/qmp_pc_dimm_device_list.c \
) \
+ $(call qemu2-if-target,x86 x86_64,, \
+ stubs/pc_madt_cpu_entry.c \
+ stubs/smbios_type_38.c \
+ stubs/target-monitor-defs.c \
+ ) \
$(call qemu2-if-target,mips mips64, \
- stubs/arch-query-cpu-def.c)
+ stubs/dump.c \
+ stubs/arch-query-cpu-def.c \
+ ) \
+ $(call qemu2-if-linux,, \
+ stubs/vhost.c \
+ ) \
+
+# HAX support.
+HAX_COMMON_SOURCES := \
+ target-i386/hax-all.c \
+ target-i386/hax-slot.c \
+
+LOCAL_SRC_FILES += \
+ $(call qemu2-if-target,x86 x86_64, \
+ $(call qemu2-if-windows, \
+ $(HAX_COMMON_SOURCES) \
+ target-i386/hax-windows.c) \
+ $(call qemu2-if-darwin, \
+ $(HAX_COMMON_SOURCES) \
+ target-i386/hax-darwin.c) \
+ $(call qemu2-if-linux, \
+ hax-stub.c) \
+ , \
+ hax-stub.c \
+ ) \
LOCAL_PREBUILTS_OBJ_FILES += \
$(call qemu2-if-windows,$(QEMU2_AUTO_GENERATED_DIR)/version.o)
+$(call end-emulator-library)
+
+# The upstream version of QEMU2, without any Android-specific hacks.
+# This uses the regular SDL2 UI backend.
+
+$(call start-emulator-program,qemu-upstream-$(QEMU2_TARGET_SYSTEM))
+
LOCAL_WHOLE_STATIC_LIBRARIES += \
- libqemu2_common \
- libqemu2_glue \
- $(call qemu2-if-target,arm arm64, libqemu2_common_aarch64) \
+ libqemu2-system-$(QEMU2_TARGET_SYSTEM) \
+ libqemu2-common \
LOCAL_STATIC_LIBRARIES += \
- emulator-libui \
- $(EMULATOR_LIBUI_STATIC_LIBRARIES) \
+ $(QEMU2_SYSTEM_STATIC_LIBRARIES) \
+
+LOCAL_CFLAGS += \
+ $(QEMU2_SYSTEM_CFLAGS) \
+
+LOCAL_C_INCLUDES += \
+ $(QEMU2_SYSTEM_INCLUDES) \
+ $(QEMU2_SDL2_INCLUDES) \
+
+LOCAL_SRC_FILES += \
+ $(call qemu2-if-target,x86 x86_64, \
+ hw/i386/acpi-build.c \
+ hw/i386/pc_piix.c \
+ ) \
+ $(call qemu2-if-windows, \
+ stubs/win32-stubs.c \
+ ) \
+ ui/sdl2.c \
+ ui/sdl2-input.c \
+ ui/sdl2-2d.c \
+ vl.c \
+
+LOCAL_LDFLAGS += $(QEMU2_SYSTEM_LDFLAGS)
+
+LOCAL_LDLIBS += \
+ $(QEMU2_SYSTEM_LDLIBS) \
+ $(QEMU2_SDL2_LDLIBS) \
+
+LOCAL_INSTALL_DIR := qemu/$(BUILD_TARGET_TAG)
+
+$(call end-emulator-program)
+
+
+# The emulator-specific version of QEMU2, with CONFIG_ANDROID defined at
+# compile-time.
+
+$(call start-emulator-program,qemu-system-$(QEMU2_TARGET_SYSTEM))
+
+LOCAL_WHOLE_STATIC_LIBRARIES += \
+ libqemu2-glue \
+ libqemu2-system-$(QEMU2_TARGET_SYSTEM) \
+ libqemu2-common \
+
+LOCAL_STATIC_LIBRARIES += \
+ $(QEMU2_SYSTEM_STATIC_LIBRARIES) \
+ $(QEMU2_GLUE_STATIC_LIBRARIES) \
$(ANDROID_EMU_STATIC_LIBRARIES) \
+LOCAL_CFLAGS += \
+ $(QEMU2_SYSTEM_CFLAGS) \
+ -DCONFIG_ANDROID \
+
+LOCAL_C_INCLUDES += \
+ $(QEMU2_SYSTEM_INCLUDES) \
+ $(QEMU2_GLUE_INCLUDES) \
+ $(ANDROID_EMU_INCLUDES) \
+
+# For now, use stubs/sdl-null.c as an empty/fake SDL UI backend.
+# TODO: Use the glue code to use the Qt-based UI instead.
+LOCAL_SRC_FILES += \
+ android-qemu2-glue/main.cpp \
+ $(call qemu2-if-target,x86 x86_64, \
+ hw/i386/acpi-build.c \
+ hw/i386/pc_piix.c \
+ ) \
+ $(call qemu2-if-windows, \
+ android-qemu2-glue/stubs/win32-stubs.c \
+ ) \
+ vl.c \
+
+LOCAL_LDFLAGS += $(QEMU2_SYSTEM_LDFLAGS) $(QEMU2_GLUE_LDFLAGS)
+LOCAL_LDLIBS += $(QEMU2_SYSTEM_LDLIBS) $(QEMU2_GLUE_LDLIBS)
+
LOCAL_LDFLAGS += \
$(QEMU2_DEPS_LDFLAGS) \
- $(EMULATOR_LIBUI_LDFLAGS)
LOCAL_LDLIBS += \
$(QEMU2_GLIB_LDLIBS) \
$(QEMU2_PIXMAN_LDLIBS) \
$(CXX_STD_LIB) \
-lfdt \
- $(call qemu2-if-windows, -lvfw32) \
+ $(call qemu2-if-windows, -lvfw32 -ldxguid) \
$(call qemu2-if-linux, -lpulse) \
$(ANDROID_EMU_LDLIBS) \
- $(EMULATOR_LIBUI_LDLIBS) \
LOCAL_INSTALL_DIR := qemu/$(BUILD_TARGET_TAG)
diff --git a/android-qemu2-glue/build/Makefile.qemu2.mk b/android-qemu2-glue/build/Makefile.qemu2.mk
index 481f665..8934608 100644
--- a/android-qemu2-glue/build/Makefile.qemu2.mk
+++ b/android-qemu2-glue/build/Makefile.qemu2.mk
@@ -56,6 +56,47 @@
QEMU2_PIXMAN_INCLUDES := $(QEMU2_DEPS_TOP_DIR)/include/pixman-1
QEMU2_PIXMAN_LDLIBS := -lpixman-1
+QEMU2_SDL2_INCLUDES := $(QEMU2_DEPS_TOP_DIR)/include/SDL2
+QEMU2_SDL2_LDLIBS := \
+ $(call qemu2-if-windows, \
+ -lmingw32 \
+ ) \
+ -lSDL2 \
+ $(call qemu2-if-darwin,, \
+ -lSDL2main \
+ ) \
+ $(call qemu2-if-windows, \
+ -limm32 \
+ -ldinput8 \
+ -ldxguid \
+ -ldxerr8 \
+ -luser32 \
+ -lgdi32 \
+ -lwinmm \
+ -lole32 \
+ -loleaut32 \
+ -lshell32 \
+ -lversion \
+ -luuid \
+ , \
+ -ldl \
+ ) \
+
+ifeq (darwin,$(BUILD_TARGET_OS))
+# NOTE: Because the following contain commas, we cannot use qemu2-if-darwin!
+QEMU2_SDL2_LDLIBS += \
+ -Wl,-framework,OpenGL \
+ -Wl,-framework,ForceFeedback \
+ -lobjc \
+ -Wl,-framework,Cocoa \
+ -Wl,-framework,Carbon \
+ -Wl,-framework,IOKit \
+ -Wl,-framework,CoreAudio \
+ -Wl,-framework,AudioToolbox \
+ -Wl,-framework,AudioUnit \
+
+endif
+
# Ensure config-host.h can be found properly.
QEMU2_INCLUDES := $(LOCAL_PATH)/android-qemu2-glue/config/$(BUILD_TARGET_TAG)
@@ -64,7 +105,6 @@
$(LOCAL_PATH) \
$(LOCAL_PATH)/include \
$(QEMU2_AUTO_GENERATED_DIR) \
- $(ANDROID_EMU_INCLUDES) \
QEMU2_INCLUDES += $(QEMU2_GLIB_INCLUDES) $(QEMU2_PIXMAN_INCLUDES)
@@ -79,23 +119,33 @@
$(LIBCURL_CFLAGS) \
-D_GNU_SOURCE \
-D_FILE_OFFSET_BITS=64 \
- -DCONFIG_ANDROID \
$(call qemu2-if-darwin, -Wno-initializer-overrides) \
-include $(LOCAL_PATH)/android-qemu2-glue/build/Makefile.qemu2-glue.mk
+QEMU2_CFLAGS += \
+ -Wno-unused-function \
+ $(call qemu2-if-darwin, \
+ -Wno-unused-value \
+ -Wno-parentheses-equality \
+ -Wno-self-assign \
+ , \
+ -Wno-unused-variable \
+ -Wno-unused-but-set-variable \
+ -Wno-maybe-uninitialized \
+ ) \
+ -UNDEBUG \
+
+#include $(LOCAL_PATH)/android-qemu2-glue/build/Makefile.qemu2-glue.mk
#include $(LOCAL_PATH)/android-qemu2-glue/build/Makefile.qemu2-qt.mk
include $(LOCAL_PATH)/android-qemu2-glue/build/Makefile.qemu2-sources.mk
-# Custom fixes.
-QEMU2_COMMON_SOURCES += \
- stubs/kvm.c
-
# A static library containing target-independent code
-$(call start-emulator-library,libqemu2_common)
+$(call start-emulator-library,libqemu2-common)
-LOCAL_CFLAGS += $(QEMU2_CFLAGS)
+LOCAL_CFLAGS += \
+ $(QEMU2_CFLAGS) \
+ -DPOISON_CONFIG_ANDROID \
LOCAL_C_INCLUDES += \
$(QEMU2_INCLUDES) \
@@ -111,117 +161,112 @@
$(QEMU2_AUTO_GENERATED_DIR)/qapi-event.c \
$(QEMU2_AUTO_GENERATED_DIR)/qapi-types.c \
$(QEMU2_AUTO_GENERATED_DIR)/qapi-visit.c \
+ $(QEMU2_AUTO_GENERATED_DIR)/qmp-introspect.c \
$(QEMU2_AUTO_GENERATED_DIR)/qmp-marshal.c \
$(QEMU2_AUTO_GENERATED_DIR)/trace/generated-events.c \
$(QEMU2_AUTO_GENERATED_DIR)/trace/generated-tracers.c \
# Stuff from libqemuutil, order follows util/Makefile.objs
LOCAL_SRC_FILES += \
- qapi/qapi-visit-core.c \
+ crypto/pbkdf-stub.c \
+ qapi/opts-visitor.c \
+ qapi/qapi-clone-visitor.c \
qapi/qapi-dealloc-visitor.c \
+ qapi/qapi-util.c \
+ qapi/qapi-visit-core.c \
+ qapi/qmp-dispatch.c \
+ qapi/qmp-event.c \
qapi/qmp-input-visitor.c \
qapi/qmp-output-visitor.c \
qapi/qmp-registry.c \
- qapi/qmp-dispatch.c \
qapi/string-input-visitor.c \
qapi/string-output-visitor.c \
- qapi/opts-visitor.c \
- qapi/qmp-event.c \
- qapi/qapi-util.c \
- qobject/qint.c \
- qobject/qstring.c \
- qobject/qdict.c \
- qobject/qlist.c \
- qobject/qfloat.c \
- qobject/qbool.c \
- qobject/qjson.c \
qobject/json-lexer.c \
- qobject/json-streamer.c \
qobject/json-parser.c \
- qobject/qerror.c \
+ qobject/json-streamer.c \
+ qobject/qbool.c \
+ qobject/qdict.c \
+ qobject/qfloat.c \
+ qobject/qint.c \
+ qobject/qjson.c \
+ qobject/qlist.c \
+ qobject/qnull.c \
+ qobject/qobject.c \
+ qobject/qstring.c \
trace/control.c \
trace/qmp.c \
- util/osdep.c \
- util/cutils.c \
- util/unicode.c \
- util/qemu-timer-common.c \
- $(call qemu2-if-windows, \
- util/oslib-win32.c \
- util/qemu-thread-win32.c \
- util/event_notifier-win32.c \
- ) \
- $(call qemu2-if-posix, \
- util/oslib-posix.c \
- util/qemu-thread-posix.c \
- util/event_notifier-posix.c \
- util/qemu-openpty.c \
- ) \
- util/envlist.c \
- util/path.c \
- util/module.c \
- $(call qemu2-if-build-target-arch,x86, util/host-utils.c) \
+ util/abort.c \
+ util/acl.c \
+ util/base64.c \
util/bitmap.c \
util/bitops.c \
- util/hbitmap.c \
- util/fifo8.c \
- util/acl.c \
+ util/buffer.c \
+ util/crc32c.c \
+ util/cutils.c \
+ util/envlist.c \
util/error.c \
- util/qemu-error.c \
- $(call qemu2-if-posix, \
- util/compatfd.c \
- ) \
+ util/fifo8.c \
+ util/getauxval.c \
+ util/hexdump.c \
+ util/hbitmap.c \
util/id.c \
util/iov.c \
- util/aes.c \
- util/qemu-config.c \
- util/qemu-sockets.c \
- util/uri.c \
+ util/log.c \
+ util/module.c \
util/notify.c \
+ util/osdep.c \
+ util/path.c \
+ util/qdist.c \
+ util/qemu-config.c \
+ util/qemu-coroutine.c \
+ util/qemu-coroutine-io.c \
+ util/qemu-coroutine-lock.c \
+ util/qemu-coroutine-sleep.c \
+ util/qemu-error.c \
util/qemu-option.c \
util/qemu-progress.c \
- util/hexdump.c \
- util/crc32c.c \
- util/throttle.c \
- util/getauxval.c \
+ util/qemu-sockets.c \
+ util/qemu-timer-common.c \
+ util/qht.c \
+ util/range.c \
+ util/rcu.c \
util/readline.c \
util/rfifolock.c \
+ util/timed-average.c \
+ util/throttle.c \
+ util/unicode.c \
+ util/uri.c \
$(call qemu2-if-windows, \
- util/shared-library-win32.c \
+ util/coroutine-win32.c \
+ util/event_notifier-win32.c \
+ util/oslib-win32.c \
+ util/qemu-thread-win32.c \
+ ) \
+ $(call qemu2-if-linux, \
+ util/coroutine-ucontext.c \
+ util/memfd.c \
+ ) \
+ $(call qemu2-if-darwin, \
+ util/coroutine-sigaltstack.c \
) \
$(call qemu2-if-posix, \
- util/shared-library-posix.c \
+ util/event_notifier-posix.c \
+ util/mmap-alloc.c \
+ util/oslib-posix.c \
+ util/qemu-openpty.c \
+ util/qemu-thread-posix.c \
+ ) \
+ $(call qemu2-if-build-target-arch,x86, util/host-utils.c) \
+ $(call qemu2-if-posix, \
+ util/compatfd.c \
) \
$(call gen-hw-config-defs)
QEMU2_INCLUDES += $(QEMU_HW_CONFIG_DEFS_INCLUDES)
-LOCAL_SRC_FILES += \
- $(QEMU2_GLUE_SOURCES)
-
$(call end-emulator-library)
-# Special case, the following sources are only used by the arm64
-# target but cannot be part of libqemu2_aarch64 because they need
-# to be compiled without NEED_CPU_H
-$(call start-emulator-library,libqemu2_common_aarch64)
-
-LOCAL_CPP_EXTENSION := .cc
-
-LOCAL_C_INCLUDES += \
- $(QEMU2_INCLUDES) \
- $(LOCAL_PATH)/target-arm \
- $(LOCAL_PATH)/disas/libvixl
-
-LOCAL_CFLAGS += $(QEMU2_CFLAGS)
-
-LOCAL_SRC_FILES += \
- disas/arm-a64.cc \
- disas/libvixl/a64/decoder-a64.cc \
- disas/libvixl/a64/disasm-a64.cc \
- disas/libvixl/a64/instructions-a64.cc \
- disas/libvixl/utils.cc \
-
-$(call end-emulator-library)
+include $(LOCAL_PATH)/android-qemu2-glue/build/Makefile.qemu2-glue.mk
QEMU2_TARGET := x86
include $(LOCAL_PATH)/android-qemu2-glue/build/Makefile.qemu2-target.mk
diff --git a/android-qemu2-glue/build/configure.sh b/android-qemu2-glue/build/configure.sh
index c258d55..1dbe87f 100644
--- a/android-qemu2-glue/build/configure.sh
+++ b/android-qemu2-glue/build/configure.sh
@@ -7,6 +7,10 @@
QEMU2_AUTOGENERATED_DIR=$OUT_DIR/build/qemu2-qapi-auto-generated
+replace_with_if_different () {
+ cmp -s "$1" "$2" || mv "$2" "$1"
+}
+
probe_prebuilts_dir "QEMU2 Dependencies" \
QEMU2_DEPS_PREBUILTS_DIR \
qemu-android-deps
@@ -15,23 +19,76 @@
python $QEMU2_TOP_DIR/scripts/qapi-types.py \
--builtins \
- -i $QEMU2_TOP_DIR/qapi-schema.json \
- -o $QEMU2_AUTOGENERATED_DIR
+ -o $QEMU2_AUTOGENERATED_DIR \
+ $QEMU2_TOP_DIR/qapi-schema.json
python $QEMU2_TOP_DIR/scripts/qapi-visit.py \
--builtins \
- -i $QEMU2_TOP_DIR/qapi-schema.json \
- -o $QEMU2_AUTOGENERATED_DIR
+ -o $QEMU2_AUTOGENERATED_DIR \
+ $QEMU2_TOP_DIR/qapi-schema.json
python $QEMU2_TOP_DIR/scripts/qapi-event.py \
- --builtins \
- -i $QEMU2_TOP_DIR/qapi-schema.json \
- -o $QEMU2_AUTOGENERATED_DIR
+ -o $QEMU2_AUTOGENERATED_DIR \
+ $QEMU2_TOP_DIR/qapi-schema.json
+
+python $QEMU2_TOP_DIR/scripts/qapi-introspect.py \
+ -o $QEMU2_AUTOGENERATED_DIR \
+ $QEMU2_TOP_DIR/qapi-schema.json
python $QEMU2_TOP_DIR/scripts/qapi-commands.py \
--middle \
- -i $QEMU2_TOP_DIR/qapi-schema.json \
- -o $QEMU2_AUTOGENERATED_DIR
+ -o $QEMU2_AUTOGENERATED_DIR \
+ $QEMU2_TOP_DIR/qapi-schema.json
+
+TRACE_EVENT_FILES="\
+ trace-events \
+ util/trace-events \
+ crypto/trace-events \
+ io/trace-events \
+ migration/trace-events \
+ block/trace-events \
+ hw/block/trace-events \
+ hw/char/trace-events \
+ hw/intc/trace-events \
+ hw/net/trace-events \
+ hw/virtio/trace-events \
+ hw/audio/trace-events \
+ hw/misc/trace-events \
+ hw/usb/trace-events \
+ hw/scsi/trace-events \
+ hw/nvram/trace-events \
+ hw/display/trace-events \
+ hw/input/trace-events \
+ hw/timer/trace-events \
+ hw/dma/trace-events \
+ hw/sparc/trace-events \
+ hw/sd/trace-events \
+ hw/isa/trace-events \
+ hw/i386/trace-events \
+ hw/9pfs/trace-events \
+ hw/ppc/trace-events \
+ hw/pci/trace-events \
+ hw/s390x/trace-events \
+ hw/vfio/trace-events \
+ hw/acpi/trace-events \
+ hw/arm/trace-events \
+ hw/alpha/trace-events \
+ ui/trace-events \
+ audio/trace-events \
+ net/trace-events \
+ target-i386/trace-events \
+ target-sparc/trace-events \
+ target-s390x/trace-events \
+ target-ppc/trace-events \
+ qom/trace-events \
+ linux-user/trace-events \
+"
+
+TRACE_EVENTS_ALL=$QEMU2_AUTOGENERATED_DIR/trace-events-all
+rm -f "$TRACE_EVENTS_ALL" && touch "$TRACE_EVENTS_ALL"
+for TRACE_EVENT_FILE in $TRACE_EVENT_FILES; do
+ (cat "$QEMU2_TOP_DIR/$TRACE_EVENT_FILE" >> $TRACE_EVENTS_ALL)
+done
mkdir -p "$QEMU2_AUTOGENERATED_DIR"/trace
@@ -39,60 +96,69 @@
--backends=nop \
--format=h \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-tracers.h
python $QEMU2_TOP_DIR/scripts/tracetool.py \
--backends=nop \
--format=c \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-tracers.c
python $QEMU2_TOP_DIR/scripts/tracetool.py \
--backends=nop \
--format=events-h \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-events.h
python $QEMU2_TOP_DIR/scripts/tracetool.py \
--backends=nop \
--format=events-c \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-events.c
python $QEMU2_TOP_DIR/scripts/tracetool.py \
--backends=nop \
--format=tcg-helper-h \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-helpers.h
python $QEMU2_TOP_DIR/scripts/tracetool.py \
--backends=nop \
--format=tcg-helper-wrapper-h \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-helpers-wrappers.h
python $QEMU2_TOP_DIR/scripts/tracetool.py \
--backends=nop \
--format=tcg-h \
--target-type system \
- < $QEMU2_TOP_DIR/trace-events \
+ < $TRACE_EVENTS_ALL \
> $QEMU2_AUTOGENERATED_DIR/trace/generated-tcg-tracers.h
bash $QEMU2_TOP_DIR/scripts/hxtool -h \
< $QEMU2_TOP_DIR/qemu-options.hx \
> $QEMU2_AUTOGENERATED_DIR/qemu-options.def
+replace_with_if_different \
+ "$QEMU2_TOP_DIR/qemu-options.def" \
+ $QEMU2_AUTOGENERATED_DIR/qemu-options.def
+
+
bash $QEMU2_TOP_DIR/scripts/hxtool -h \
< $QEMU2_TOP_DIR/hmp-commands.hx \
> $QEMU2_AUTOGENERATED_DIR/hmp-commands.h
bash $QEMU2_TOP_DIR/scripts/hxtool -h \
+ < $QEMU2_TOP_DIR/hmp-commands-info.hx \
+ > $QEMU2_AUTOGENERATED_DIR/hmp-commands-info.h
+
+bash $QEMU2_TOP_DIR/scripts/hxtool -h \
< $QEMU2_TOP_DIR/qmp-commands.hx \
> $QEMU2_AUTOGENERATED_DIR/qmp-commands-old.h
@@ -100,13 +166,6 @@
< $QEMU2_TOP_DIR/qemu-img-cmds.hx \
> $QEMU2_AUTOGENERATED_DIR/qemu-img-cmds.h
-HEX_FILES=$(find $QEMU2_TOP_DIR/hw/i386/ -name "*.hex.generated" | \
- sed -e 's|'$QEMU2_TOP_DIR/'||g')
-mkdir -p $QEMU2_AUTOGENERATED_DIR/hw/i386
-for HEX_FILES in $HEX_FILES; do
- cp "$QEMU2_TOP_DIR/$HEX_FILES" "$QEMU2_AUTOGENERATED_DIR/${HEX_FILES%.generated}"
-done
-
rm -f $QEMU2_AUTOGENERATED_DIR/gdbstub-xml-arm64.c
bash $QEMU2_TOP_DIR/scripts/feature_to_c.sh \
$QEMU2_AUTOGENERATED_DIR/gdbstub-xml-arm64.c \
@@ -131,6 +190,22 @@
$QEMU2_TOP_DIR/version.rc
fi
+# Generate qemu-version.h from Git history.
+QEMU_VERSION_H=$QEMU2_AUTOGENERATED_DIR/qemu-version.h
+QEMU_VERSION_H_TMP=$QEMU_VERSION_H.tmp
+rm -f "$QEMU_VERSION_H"
+if [ -d "$QEMU2_TOP_DIR/.git" ]; then
+ QEMU_VERSION=$(cd "$QEMU2_TOP_DIR" && git describe --match 'v*' 2>/dev/null | tr -d '\n')
+else
+ QEMU_VERSION=$(date "+%Y-%m-%d")
+fi
+
+echo "QEMU2 : Version [$QEMU_VERSION]"
+
+printf "#define QEMU_PKGVERSION \"(android-%s)\"\n" "$QEMU_VERSION" > $QEMU_VERSION_H_TMP
+replace_with_if_different "$QEMU_VERSION_H" "$QEMU_VERSION_H_TMP"
+rm -f "$QEMU_VERSION_TMP_H"
+
# Work-around for a QEMU2 bug:
# $QEMU2/linux-headers/linux/kvm.h includes <asm/kvm.h>
# but $QEMU2/linux-headers/asm/ doesn't exist. It is supposed
@@ -139,4 +214,5 @@
# The end result is that the <asm/kvm.h> from the host system
# or toolchain sysroot is being included, which ends up in a
# conflict. Work around it by creating a symlink here
+rm -f $QEMU2_AUTOGENERATED_DIR/asm
ln -sf $QEMU2_TOP_DIR/linux-headers/asm-x86 $QEMU2_AUTOGENERATED_DIR/asm
diff --git a/android-qemu2-glue/config/darwin-x86_64/config-host.h b/android-qemu2-glue/config/darwin-x86_64/config-host.h
index 72dc9eb..1a7cdee 100644
--- a/android-qemu2-glue/config/darwin-x86_64/config-host.h
+++ b/android-qemu2-glue/config/darwin-x86_64/config-host.h
@@ -1,11 +1,11 @@
/* Automatically generated by create_config - do not modify */
-#define CONFIG_QEMU_CONFDIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/etc/qemu"
-#define CONFIG_QEMU_DATADIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/share/qemu"
-#define CONFIG_QEMU_DOCDIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/share/doc/qemu"
-#define CONFIG_QEMU_MODDIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/lib/qemu"
-#define CONFIG_QEMU_LOCALSTATEDIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/var"
-#define CONFIG_QEMU_HELPERDIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/libexec"
-#define CONFIG_QEMU_LOCALEDIR "/Volumes/Android/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/darwin-x86_64/share/locale"
+#define CONFIG_QEMU_CONFDIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/etc/qemu"
+#define CONFIG_QEMU_DATADIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/share/qemu"
+#define CONFIG_QEMU_DOCDIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/share/doc/qemu"
+#define CONFIG_QEMU_MODDIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/lib/qemu"
+#define CONFIG_QEMU_LOCALSTATEDIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/var"
+#define CONFIG_QEMU_HELPERDIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/libexec"
+#define CONFIG_QEMU_LOCALEDIR "/tmp/qemu-android-build/prebuilts/qemu-android-deps/darwin-x86_64/share/locale"
#define HOST_X86_64 1
#define CONFIG_POSIX 1
#define CONFIG_DARWIN 1
@@ -22,23 +22,23 @@
#define CONFIG_VNC 1
#define CONFIG_FNMATCH 1
#define CONFIG_UUID 1
-#define QEMU_VERSION "2.2.0"
-#define QEMU_PKGVERSION ""
+#define QEMU_VERSION "2.7.0"
#define CONFIG_SDL 1
#define CONFIG_SDLABI 2.0
+#define CONFIG_HAS_GLIB_SUBPROCESS_TESTS 1
+#define CONFIG_TLS_PRIORITY "NORMAL"
+#define HAVE_IFADDRS_H 1
#define CONFIG_IOVEC 1
#define CONFIG_FDT 1
#define CONFIG_MADVISE 1
#define CONFIG_POSIX_MADVISE 1
#define CONFIG_BSD 1
-#define CONFIG_ZERO_MALLOC 1
#define CONFIG_QOM_CAST_DEBUG 1
#define CONFIG_COROUTINE_BACKEND sigaltstack
#define CONFIG_COROUTINE_POOL 1
#define CONFIG_CPUID_H 1
#define CONFIG_INT128 1
-#define CONFIG_TPM $(CONFIG_SOFTMMU)
-#define CONFIG_TRACE_NOP 1
+#define CONFIG_TPM 1
+#define CONFIG_TRACE_LOG 1
#define CONFIG_TRACE_FILE trace
#define HOST_DSOSUF ".so"
-#define CONFIG_ANDROID 1
diff --git a/android-qemu2-glue/config/linux-x86_64/config-host.h b/android-qemu2-glue/config/linux-x86_64/config-host.h
index 739c90a..1cbccdf 100644
--- a/android-qemu2-glue/config/linux-x86_64/config-host.h
+++ b/android-qemu2-glue/config/linux-x86_64/config-host.h
@@ -22,8 +22,7 @@
NULL
#define CONFIG_VNC 1
#define CONFIG_FNMATCH 1
-#define QEMU_VERSION "2.2.0"
-#define QEMU_PKGVERSION ""
+#define QEMU_VERSION "2.7.0"
#define CONFIG_SDL 1
#define CONFIG_SDLABI 2.0
#define CONFIG_UTIMENSAT 1
@@ -40,12 +39,14 @@
#define CONFIG_PRCTL_PR_SET_TIMERSLACK 1
#define CONFIG_EPOLL 1
#define CONFIG_EPOLL_CREATE1 1
-#define CONFIG_EPOLL_PWAIT 1
#define CONFIG_SENDFILE 1
#define CONFIG_TIMERFD 1
#define CONFIG_INOTIFY 1
#define CONFIG_INOTIFY1 1
#define CONFIG_BYTESWAP_H 1
+#define CONFIG_HAS_GLIB_SUBPROCESS_TESTS 1
+#define CONFIG_TLS_PRIORITY "NORMAL"
+#define HAVE_IFADDRS_H 1
#define CONFIG_VHOST_SCSI 1
#define CONFIG_IOVEC 1
#define CONFIG_PREADV 1
@@ -54,9 +55,6 @@
#define CONFIG_FDATASYNC 1
#define CONFIG_MADVISE 1
#define CONFIG_POSIX_MADVISE 1
-#define CONFIG_SIGEV_THREAD_ID 1
-#define CONFIG_GLX 1
-#define CONFIG_ZERO_MALLOC 1
#define CONFIG_QOM_CAST_DEBUG 1
#define CONFIG_COROUTINE_BACKEND ucontext
#define CONFIG_COROUTINE_POOL 1
@@ -65,9 +63,9 @@
#define CONFIG_HAS_ENVIRON 1
#define CONFIG_CPUID_H 1
#define CONFIG_INT128 1
-#define CONFIG_TPM $(CONFIG_SOFTMMU)
+#define CONFIG_TPM 1
#define CONFIG_TPM_PASSTHROUGH 1
-#define CONFIG_TRACE_NOP 1
+#define CONFIG_TRACE_LOG 1
#define CONFIG_TRACE_FILE trace
+#define CONFIG_IASL iasl
#define HOST_DSOSUF ".so"
-#define CONFIG_ANDROID 1
diff --git a/android-qemu2-glue/config/target-mips/config-target.h b/android-qemu2-glue/config/target-mips/config-target.h
index f7c37f0..e0a14fb 100644
--- a/android-qemu2-glue/config/target-mips/config-target.h
+++ b/android-qemu2-glue/config/target-mips/config-target.h
@@ -2,6 +2,7 @@
#define TARGET_ABI_MIPSO32 1
#define TARGET_MIPS 1
#define TARGET_NAME "mipsel"
+#define TARGET_MIPS 1
#define CONFIG_SOFTMMU 1
#define CONFIG_I386_DIS 1
#define CONFIG_MIPS_DIS 1
diff --git a/android-qemu2-glue/config/windows-x86/config-host.h b/android-qemu2-glue/config/windows-x86/config-host.h
index 3bc3a6e..476bdb9 100644
--- a/android-qemu2-glue/config/windows-x86/config-host.h
+++ b/android-qemu2-glue/config/windows-x86/config-host.h
@@ -7,35 +7,35 @@
#define CONFIG_QEMU_LOCALEDIR "/opt2/digit/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/windows-x86/locale"
#define HOST_I386 1
#define CONFIG_WIN32 1
-#define CONFIG_FILEVERSION 2,2,0,0
-#define CONFIG_PRODUCTVERSION 2,2,0,0
+#define CONFIG_FILEVERSION 2,7,0,0
+#define CONFIG_PRODUCTVERSION 2,7,0,0
#define CONFIG_SLIRP 1
#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
#define CONFIG_AUDIO_DRIVERS \
&winaudio_audio_driver,\
- &winwave_audio_driver,\
+ &dsound_audio_driver,\
#define CONFIG_WINAUDIO 1
-#define CONFIG_WINWAVE 1
+#define CONFIG_DSOUND 1
#define CONFIG_AUDIO_WIN_INT 1
#define CONFIG_BDRV_RW_WHITELIST\
NULL
#define CONFIG_BDRV_RO_WHITELIST\
NULL
#define CONFIG_VNC 1
-#define QEMU_VERSION "2.2.0"
-#define QEMU_PKGVERSION ""
+#define QEMU_VERSION "2.7.0"
#define CONFIG_SDL 1
#define CONFIG_SDLABI 2.0
+#define CONFIG_HAS_GLIB_SUBPROCESS_TESTS 1
+#define CONFIG_TLS_PRIORITY "NORMAL"
#define CONFIG_FDT 1
-#define CONFIG_ZERO_MALLOC 1
#define CONFIG_QOM_CAST_DEBUG 1
#define CONFIG_COROUTINE_BACKEND win32
#define CONFIG_COROUTINE_POOL 1
#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
#define CONFIG_CPUID_H 1
-#define CONFIG_TPM $(CONFIG_SOFTMMU)
-#define CONFIG_TRACE_NOP 1
+#define CONFIG_TPM 1
+#define CONFIG_TRACE_LOG 1
#define CONFIG_TRACE_FILE trace
+#define CONFIG_IASL iasl
#define HOST_DSOSUF ".dll"
-#define CONFIG_ANDROID 1
diff --git a/android-qemu2-glue/config/windows-x86_64/config-host.h b/android-qemu2-glue/config/windows-x86_64/config-host.h
index a3df876..ff2870d 100644
--- a/android-qemu2-glue/config/windows-x86_64/config-host.h
+++ b/android-qemu2-glue/config/windows-x86_64/config-host.h
@@ -7,36 +7,36 @@
#define CONFIG_QEMU_LOCALEDIR "/opt2/digit/repo/studio-dev/prebuilts/android-emulator-build/qemu-android-deps/windows-x86_64/locale"
#define HOST_X86_64 1
#define CONFIG_WIN32 1
-#define CONFIG_FILEVERSION 2,2,0,0
-#define CONFIG_PRODUCTVERSION 2,2,0,0
+#define CONFIG_FILEVERSION 2,7,0,0
+#define CONFIG_PRODUCTVERSION 2,7,0,0
#define CONFIG_SLIRP 1
#define CONFIG_SMBD_COMMAND "/usr/sbin/smbd"
#define CONFIG_AUDIO_DRIVERS \
&winaudio_audio_driver,\
- &winwave_audio_driver,\
+ &dsound_audio_driver,\
#define CONFIG_WINAUDIO 1
-#define CONFIG_WINWAVE 1
+#define CONFIG_DSOUND 1
#define CONFIG_AUDIO_WIN_INT 1
#define CONFIG_BDRV_RW_WHITELIST\
NULL
#define CONFIG_BDRV_RO_WHITELIST\
NULL
#define CONFIG_VNC 1
-#define QEMU_VERSION "2.2.0"
-#define QEMU_PKGVERSION ""
+#define QEMU_VERSION "2.7.0"
#define CONFIG_SDL 1
#define CONFIG_SDLABI 2.0
+#define CONFIG_HAS_GLIB_SUBPROCESS_TESTS 1
+#define CONFIG_TLS_PRIORITY "NORMAL"
#define CONFIG_FDT 1
-#define CONFIG_ZERO_MALLOC 1
#define CONFIG_QOM_CAST_DEBUG 1
#define CONFIG_COROUTINE_BACKEND win32
#define CONFIG_COROUTINE_POOL 1
#define CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE 1
#define CONFIG_CPUID_H 1
#define CONFIG_INT128 1
-#define CONFIG_TPM $(CONFIG_SOFTMMU)
-#define CONFIG_TRACE_NOP 1
+#define CONFIG_TPM 1
+#define CONFIG_TRACE_LOG 1
#define CONFIG_TRACE_FILE trace
+#define CONFIG_IASL iasl
#define HOST_DSOSUF ".dll"
-#define CONFIG_ANDROID 1
diff --git a/android-qemu2-glue/display.cpp b/android-qemu2-glue/display.cpp
index 99a9683..0f2a055 100644
--- a/android-qemu2-glue/display.cpp
+++ b/android-qemu2-glue/display.cpp
@@ -19,7 +19,8 @@
#include "android/emulator-window.h"
extern "C" {
- #include "ui/console.h"
+#include "qemu/osdep.h"
+#include "ui/console.h"
}
namespace {
@@ -149,6 +150,10 @@
return true;
}
+extern "C" void sdl_display_early_init(int opengl) {
+ (void)opengl;
+}
+
extern "C" bool sdl_display_init(DisplayState* ds,
int full_screen,
int no_frame) {
diff --git a/android-qemu2-glue/display.h b/android-qemu2-glue/display.h
index 98fc9c7..9eb7148 100644
--- a/android-qemu2-glue/display.h
+++ b/android-qemu2-glue/display.h
@@ -18,8 +18,9 @@
ANDROID_BEGIN_HEADER
-#include "ui/console.h"
-#include "../qemu/android/framebuffer.h"
+#include "qemu/osdep.h"
+#include "qemu/typedefs.h"
+#include "android/framebuffer.h"
bool android_display_init(DisplayState* ds, QFrameBuffer* qfbuff);
diff --git a/android-qemu2-glue/emulation/CharSerialLine.cpp b/android-qemu2-glue/emulation/CharSerialLine.cpp
index 6a958f6..e3e4063 100644
--- a/android-qemu2-glue/emulation/CharSerialLine.cpp
+++ b/android-qemu2-glue/emulation/CharSerialLine.cpp
@@ -11,7 +11,10 @@
#include "android-qemu2-glue/emulation/CharSerialLine.h"
+#include <type_traits>
+
extern "C" {
+#include "qemu/osdep.h"
#include "sysemu/char.h"
}
diff --git a/android-qemu2-glue/emulation/VmLock.cpp b/android-qemu2-glue/emulation/VmLock.cpp
index ece6ab3..9bc0ce3 100644
--- a/android-qemu2-glue/emulation/VmLock.cpp
+++ b/android-qemu2-glue/emulation/VmLock.cpp
@@ -14,24 +14,15 @@
#include "android-qemu2-glue/emulation/VmLock.h"
-#include "android/base/Log.h"
+#include <type_traits>
extern "C" {
+#include "qemu/osdep.h"
#include "qemu/main-loop.h"
} // extern "C"
namespace qemu2 {
-// TECHNICAL NOTE:
-//
-// This implementation needs to protect against recursive lock() calls
-// which can happen because some code in AndroidEmu calls it, without
-// knowing whether it's running in the thread that holds the BQL or not.
-//
-// What we want is to ensure that qemu_mutex_lock_iothread() is always true
-// when we leave ::lock(), and that it will be false when ::unlock() has
-// been called as often as ::lock() was.
-
void VmLock::lock() {
qemu_mutex_lock_iothread();
}
@@ -41,7 +32,7 @@
}
bool VmLock::isLockedBySelf() const {
- return qemu_mutex_check_iothread();
+ return qemu_mutex_iothread_locked();
}
} // namespace qemu2
diff --git a/android-qemu2-glue/emulation/charpipe.c b/android-qemu2-glue/emulation/charpipe.c
index c03f8ac..1d4b59e 100644
--- a/android-qemu2-glue/emulation/charpipe.c
+++ b/android-qemu2-glue/emulation/charpipe.c
@@ -11,11 +11,12 @@
*/
#include "android/utils/debug.h"
#include "android/utils/cbuffer.h"
+#include "qemu/osdep.h"
#include "sysemu/char.h"
-#define xxDEBUG
+#define DEBUG 0
-#ifdef DEBUG
+#if DEBUG
# include <stdio.h>
# define D(...) ( fprintf( stderr, __VA_ARGS__ ), fprintf(stderr, "\n") )
#else
@@ -441,7 +442,7 @@
void
-charpipe_poll( void )
+qemu_charpipe_poll( void )
{
CharPipeState* cp = _s_charpipes;
CharPipeState* cp_end = cp + MAX_CHAR_PIPES;
diff --git a/android-qemu2-glue/emulation/charpipe.h b/android-qemu2-glue/emulation/charpipe.h
index 7a62487..2ab24e9 100644
--- a/android-qemu2-glue/emulation/charpipe.h
+++ b/android-qemu2-glue/emulation/charpipe.h
@@ -11,6 +11,7 @@
*/
#pragma once
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "android/utils/compiler.h"
@@ -20,16 +21,17 @@
* QEMU components. For Android, this is used to connect an emulated serial port
* with the android modem
*/
-extern int qemu_chr_open_charpipe( CharDriverState* *pfirst, CharDriverState* *psecond );
+extern int qemu_chr_open_charpipe(CharDriverState* *pfirst,
+ CharDriverState* *psecond);
/* create a buffering character driver for a given endpoint. The result will buffer
* anything that is sent to it but cannot be sent to the endpoint immediately.
* On the other hand, if the endpoint calls can_read() or read(), these calls
* are passed immediately to the can_read() or read() handlers of the result.
*/
-extern CharDriverState* qemu_chr_open_buffer( CharDriverState* endpoint );
+extern CharDriverState *qemu_chr_open_buffer(CharDriverState* endpoint);
/* must be called from the main event loop to poll all charpipes */
-extern void charpipe_poll( void );
+extern void qemu_charpipe_poll(void);
ANDROID_END_HEADER
diff --git a/android-qemu2-glue/main.cpp b/android-qemu2-glue/main.cpp
index 3735019..0a18f06 100755
--- a/android-qemu2-glue/main.cpp
+++ b/android-qemu2-glue/main.cpp
@@ -448,6 +448,7 @@
if (opts->skip_adb_auth) {
args[n++] = "-skip-adb-auth";
}
+
/** SNAPSHOT STORAGE HANDLING */
/* If we have a valid snapshot storage path */
@@ -709,12 +710,6 @@
args[n++] = "-cpu";
args[n++] = kTarget.qemuCpu;
- // Set env var to "on" for Intel PMU if the feature is enabled.
- // cpu.c will then read that.
- if (android::featurecontrol::isEnabled(android::featurecontrol::IntelPerformanceMonitoringUnit)) {
- System::get()->envSet("ANDROID_EMU_FEATURE_IntelPerformanceMonitoringUnit", "on");
- }
-
#if defined(TARGET_X86_64) || defined(TARGET_I386)
char* accel_status = NULL;
CpuAccelMode accel_mode = ACCEL_AUTO;
diff --git a/android-qemu2-glue/net-android.cpp b/android-qemu2-glue/net-android.cpp
index d5fe8b3..913e6b5 100644
--- a/android-qemu2-glue/net-android.cpp
+++ b/android-qemu2-glue/net-android.cpp
@@ -1,4 +1,4 @@
-// Copyright 2015 The Android Open Source Project
+// Copyright 2016 The Android Open Source Project
//
// This software is licensed under the terms of the GNU General Public
// License version 2, as published by the Free Software Foundation, and
@@ -9,7 +9,8 @@
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
-#include "net-android.h"
+#include "android-qemu2-glue/net-android.h"
+
#include "android/android.h"
#include "android/network/constants.h"
#include "android/network/globals.h"
@@ -17,48 +18,48 @@
#include "android/shaper.h"
extern "C" {
-#include "net/net.h"
+#include "qemu/osdep.h"
+#include "net/slirp.h"
}
-#if defined(CONFIG_SLIRP)
-static void* s_slirp_state = nullptr;
-static void* s_net_client_state = nullptr;
-static Slirp* s_slirp = nullptr;
+static void* s_opaque = nullptr;
-static void
-android_net_delay_in_cb(void* data, size_t size, void* opaque)
+void android_qemu_init_slirp_shapers(void)
{
- slirp_input(s_slirp, static_cast<const uint8_t*>(data), size);
-}
+ android_net_delay_in = netdelay_create(
+ [](void* data, size_t size, void* opaque) {
+ net_slirp_receive_raw(s_opaque,
+ static_cast<const uint8_t*>(data),
+ static_cast<int>(size));
+ });
-static void
-android_net_shaper_in_cb(void* data, size_t size, void* opaque)
-{
- netdelay_send_aux(android_net_delay_in, data, size, opaque);
-}
+ android_net_shaper_in = netshaper_create(1,
+ [](void* data, size_t size, void* opaque) {
+ netdelay_send_aux(android_net_delay_in, data, size, opaque);
+ });
-static void
-android_net_shaper_out_cb(void* data, size_t size, void* opaque)
-{
- qemu_send_packet(static_cast<NetClientState*>(s_net_client_state),
- static_cast<const uint8_t*>(data),
- size);
-}
-
-void
-slirp_init_shapers(void* slirp_state, void* net_client_state, Slirp* slirp)
-{
- s_slirp_state = slirp_state;
- s_net_client_state = net_client_state;
- s_slirp = slirp;
- android_net_delay_in = netdelay_create(android_net_delay_in_cb);
- android_net_shaper_in = netshaper_create(1, android_net_shaper_in_cb);
- android_net_shaper_out = netshaper_create(1, android_net_shaper_out_cb);
+ android_net_shaper_out = netshaper_create(1,
+ [](void* data, size_t size, void* opaque) {
+ net_slirp_output_raw(opaque,
+ static_cast<const uint8_t*>(data),
+ static_cast<int>(size));
+ });
netdelay_set_latency(android_net_delay_in, android_net_min_latency,
android_net_max_latency);
+
netshaper_set_rate(android_net_shaper_out, android_net_download_speed);
netshaper_set_rate(android_net_shaper_in, android_net_upload_speed);
-}
-#endif // CONFIG_SLIRP
+ s_opaque = net_slirp_set_shapers(
+ android_net_shaper_out,
+ [](void* opaque, const void* data, int len) {
+ netshaper_send_aux(static_cast<NetShaper>(opaque),
+ (char*)data, len, s_opaque);
+ },
+ android_net_shaper_in,
+ [](void* opaque, const void* data, int len) {
+ netshaper_send_aux(static_cast<NetShaper>(opaque),
+ (void*)data, len, s_opaque);
+ });
+}
diff --git a/android-qemu2-glue/net-android.h b/android-qemu2-glue/net-android.h
index 693d87e..83add05 100644
--- a/android-qemu2-glue/net-android.h
+++ b/android-qemu2-glue/net-android.h
@@ -1,4 +1,4 @@
-// Copyright 2015 The Android Open Source Project
+// Copyright 2016 The Android Open Source Project
//
// This software is licensed under the terms of the GNU General Public
// License version 2, as published by the Free Software Foundation, and
@@ -11,17 +11,10 @@
#pragma once
-#include "config-host.h"
#include "android/utils/compiler.h"
ANDROID_BEGIN_HEADER
-#if defined(CONFIG_SLIRP)
-#include "libslirp.h"
-
-void slirp_init_shapers(void* slirp_state,
- void* net_client_state,
- Slirp *slirp);
-#endif // CONFIG_SLIRP
+void android_qemu_init_slirp_shapers(void);
ANDROID_END_HEADER
diff --git a/android-qemu2-glue/qemu-display-agent-impl.cpp b/android-qemu2-glue/qemu-display-agent-impl.cpp
index 8559da5..8167372 100644
--- a/android-qemu2-glue/qemu-display-agent-impl.cpp
+++ b/android-qemu2-glue/qemu-display-agent-impl.cpp
@@ -15,9 +15,12 @@
#include "android-qemu2-glue/qemu-control-impl.h"
extern "C" {
- #include "ui/console.h"
+#include "qemu/osdep.h"
+#include "ui/console.h"
}
+#include <string.h>
+
static void getFrameBuffer(int* w, int* h, int* lineSize, int* bytesPerPixel,
uint8_t** frameBufferData) {
// find the first graphics console
@@ -56,36 +59,51 @@
}
namespace {
- struct dul_data {
- AndroidDisplayUpdateCallback callback;
- void* opaque;
- };
-}
-static void on_display_update(DisplayUpdateListener* dul,
- int x, int y, int w, int h) {
- dul_data* data = static_cast<dul_data*>(dul->opaque);
- data->callback(data->opaque, x, y, w, h);
-}
+struct AndroidDisplayChangeListener : public DisplayChangeListener {
+ AndroidDisplayChangeListener(AndroidDisplayUpdateCallback callback,
+ void* opaque) {
+ memset(this, 0, sizeof(*this));
+ mCallback = callback;
+ mOpaque = opaque;
+ this->ops = &kOps;
+ register_displaychangelistener(this);
+ }
+
+ ~AndroidDisplayChangeListener() {
+ unregister_displaychangelistener(this);
+ }
+
+ AndroidDisplayUpdateCallback mCallback;
+ void* mOpaque;
+
+ static void onDisplayUpdate(DisplayChangeListener* dcl,
+ int x, int y, int w, int h) {
+ auto adcl = reinterpret_cast<AndroidDisplayChangeListener*>(dcl);
+ adcl->mCallback(adcl->mOpaque, x, y, w, h);
+ }
+
+ static const DisplayChangeListenerOps kOps;
+};
+
+// static
+const DisplayChangeListenerOps AndroidDisplayChangeListener::kOps = {
+ .dpy_name = "qemu2 display",
+ .dpy_refresh = nullptr,
+ .dpy_gfx_update = &onDisplayUpdate,
+};
+
+} // namespace
static void registerUpdateListener(AndroidDisplayUpdateCallback callback,
void* opaque) {
- const auto listener = new DisplayUpdateListener();
- *listener = DisplayUpdateListener();
- listener->dpy_gfx_update = &on_display_update;
-
- auto data = new dul_data();
- data->callback = callback;
- data->opaque = opaque;
- listener->opaque = data;
-
- register_displayupdatelistener(listener);
+ static AndroidDisplayChangeListener* s_listener =
+ new AndroidDisplayChangeListener(callback, opaque);
}
-
static const QAndroidDisplayAgent displayAgent = {
- .getFrameBuffer = &getFrameBuffer,
- .registerUpdateListener = ®isterUpdateListener
+ .getFrameBuffer = &getFrameBuffer,
+ .registerUpdateListener = ®isterUpdateListener
};
const QAndroidDisplayAgent* const gQAndroidDisplayAgent = &displayAgent;
diff --git a/android-qemu2-glue/qemu-net-agent-impl.c b/android-qemu2-glue/qemu-net-agent-impl.c
index 3bb1e72..eac5b77 100644
--- a/android-qemu2-glue/qemu-net-agent-impl.c
+++ b/android-qemu2-glue/qemu-net-agent-impl.c
@@ -15,6 +15,7 @@
#include "android/emulation/control/net_agent.h"
#include "android/utils/sockets.h"
+#include "qemu/osdep.h"
#include "qemu/sockets.h"
#include "net/slirp.h"
@@ -22,21 +23,20 @@
#include "libslirp.h"
static bool isSlirpInited() {
- return net_slirp_is_inited() != 0;
+ return net_slirp_state() != NULL;
}
static bool slirpRedir(bool isUdp, int hostPort,
uint32_t guestAddr, int guestPort) {
struct in_addr host = { .s_addr = htonl(SOCK_ADDRESS_INET_LOOPBACK) };
struct in_addr guest = { .s_addr = 0 };
- return slirp_add_hostfwd(net_slirp_lookup(NULL, NULL, NULL), isUdp,
- host, hostPort, guest, guestPort) == 0;
+ return slirp_add_hostfwd(net_slirp_state(), isUdp, host, hostPort, guest,
+ guestPort) == 0;
}
bool slirpUnredir(bool isUdp, int hostPort) {
struct in_addr host = { .s_addr = htonl(SOCK_ADDRESS_INET_LOOPBACK) };
- return slirp_remove_hostfwd(net_slirp_lookup(NULL, NULL, NULL), isUdp,
- host, hostPort) == 0;
+ return slirp_remove_hostfwd(net_slirp_state(), isUdp, host, hostPort) == 0;
}
diff --git a/android-qemu2-glue/qemu-setup-dns-servers.cpp b/android-qemu2-glue/qemu-setup-dns-servers.cpp
new file mode 100644
index 0000000..108e53d
--- /dev/null
+++ b/android-qemu2-glue/qemu-setup-dns-servers.cpp
@@ -0,0 +1,175 @@
+// Copyright 2015 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "android-qemu2-glue/qemu-setup.h"
+
+#include "android/base/Log.h"
+#include "android/utils/debug.h"
+
+#ifdef _WIN32
+// This includes must happen before qemu/osdep.h to avoid compiler
+// errors regarding FD_SETSIZE being redefined.
+#include "android/base/sockets/Winsock.h"
+#include "android/base/sockets/SocketErrors.h"
+#else
+#include <sys/socket.h>
+#include <netdb.h>
+#endif
+
+extern "C" {
+#include "qemu/osdep.h"
+#include "net/slirp.h"
+#include "slirp/libslirp.h"
+} // extern "C"
+
+#include <vector>
+
+#include <errno.h>
+
+#define MAX_DNS_SERVERS 8
+
+static int s_num_dns_server_addresses = 0;
+static sockaddr_storage s_dns_server_addresses[MAX_DNS_SERVERS] = {};
+
+// Resolve host name |hostName| into a list of sockaddr_storage.
+// On success, return true and append the names to |*out|. On failure
+// return false, leave |*out| untouched, and sets errno.
+static bool resolveHostNameToList(
+ const char* hostName, std::vector<sockaddr_storage>* out) {
+ addrinfo* res = nullptr;
+ addrinfo hints = {};
+ hints.ai_family = AF_UNSPEC;
+ int ret = ::getaddrinfo(hostName, nullptr, &hints, &res);
+ if (ret != 0) {
+ // Handle errors.
+ int err = 0;
+ switch (ret) {
+ case EAI_AGAIN: // server is down
+ case EAI_FAIL: // server is sick
+ err = EHOSTDOWN;
+ break;
+/* NOTE that in x86_64-w64-mingw32 both EAI_NODATA and EAI_NONAME are the same */
+#if defined(EAI_NODATA) && (EAI_NODATA != EAI_NONAME)
+ case EAI_NODATA:
+#endif
+ case EAI_NONAME:
+ err = ENOENT;
+ break;
+
+ case EAI_MEMORY:
+ err = ENOMEM;
+ break;
+
+ default:
+ err = EINVAL;
+ }
+ errno = err;
+ return false;
+ }
+
+ int count = 0;
+
+ for (auto r = res; r != nullptr; r = r->ai_next) {
+ sockaddr_storage addr = {};
+ switch (r->ai_family) {
+ case AF_INET:
+ *(struct sockaddr_in *)&addr =
+ *(const struct sockaddr_in *)r->ai_addr;
+ break;
+
+ case AF_INET6:
+ *(struct sockaddr_in6 *)&addr =
+ *(const struct sockaddr_in6 *)r->ai_addr;
+ break;
+ default:
+ continue;
+ }
+ out->emplace_back(std::move(addr));
+ count++;
+ }
+ ::freeaddrinfo(res);
+
+ return (count > 0);
+}
+
+bool qemu_android_emulation_setup_dns_servers(const char* dns_servers,
+ int* pcount4, int* pcount6) {
+ CHECK(net_slirp_state() != nullptr) << "slirp stack should be inited!";
+
+ if (!dns_servers || !dns_servers[0]) {
+ // Empty list, use the default behaviour.
+ return 0;
+ }
+
+ std::vector<sockaddr_storage> server_addresses;
+
+ // Separate individual DNS server names, then resolve each one of them
+ // into one or more IP addresses. Support both IPv4 and IPv6 at the same
+ // time.
+ const char* p = dns_servers;
+ while (*p) {
+ const char* next_p;
+ const char* comma = strchr(p, ',');
+ if (!comma) {
+ comma = p + strlen(p);
+ next_p = comma;
+ } else {
+ next_p = comma + 1;
+ }
+ while (p < comma && *p == ' ') p++;
+ while (p < comma && comma[-1] == ' ') comma--;
+
+ if (comma > p) {
+ // Extract single server name.
+ std::string server(p, comma - p);
+ if (!resolveHostNameToList(server.c_str(), &server_addresses)) {
+ dwarning("Ignoring ivalid DNS address: [%s]\n", server.c_str());
+ }
+ }
+ p = next_p;
+ }
+
+ int count = static_cast<int>(server_addresses.size());
+ if (!count) {
+ return 0;
+ }
+
+ // Save it for qemu_android_emulator_init_slirp().
+ s_num_dns_server_addresses = count;
+ memcpy(s_dns_server_addresses, &server_addresses[0],
+ count * sizeof(server_addresses[0]));
+
+ // Count number of IPv4 and IPv6 DNS servers.
+ int count4 = 0;
+ int count6 = 0;
+ for (const auto& item : server_addresses) {
+ if (item.ss_family == AF_INET) {
+ count4 += 1;
+ } else if (item.ss_family == AF_INET6) {
+ count6 += 1;
+ }
+ }
+
+ *pcount4 = count4;
+ *pcount6 = count6;
+
+ return true;
+}
+
+void qemu_android_emulation_init_slirp(void) {
+ slirp_init_custom_dns_servers(static_cast<Slirp*>(net_slirp_state()),
+ s_dns_server_addresses,
+ s_num_dns_server_addresses);
+}
+
diff --git a/android-qemu2-glue/qemu-setup.cpp b/android-qemu2-glue/qemu-setup.cpp
index 0bbdb77..bdb0af8 100644
--- a/android-qemu2-glue/qemu-setup.cpp
+++ b/android-qemu2-glue/qemu-setup.cpp
@@ -18,27 +18,36 @@
#include "android/base/Log.h"
#include "android/console.h"
#include "android-qemu2-glue/emulation/android_pipe_device.h"
-#include "android-qemu2-glue/emulation/VmLock.h"
-#include "android-qemu2-glue/qemu-control-impl.h"
+#include "android-qemu2-glue/emulation/charpipe.h"
#include "android-qemu2-glue/emulation/goldfish_sync.h"
+#include "android-qemu2-glue/emulation/VmLock.h"
+#include "android-qemu2-glue/looper-qemu.h"
+#include "android-qemu2-glue/android_qemud.h"
+#include "android-qemu2-glue/net-android.h"
+#include "android-qemu2-glue/qemu-control-impl.h"
extern "C" {
+#include "qemu/osdep.h"
+#include "qemu-common.h"
#include "qemu/main-loop.h"
+#include "qemu/thread.h"
} // extern "C"
using android::VmLock;
-bool qemu_android_emulation_setup() {
- static const AndroidConsoleAgents consoleAgents = {
- gQAndroidBatteryAgent,
- gQAndroidFingerAgent,
- gQAndroidLocationAgent,
- gQAndroidTelephonyAgent,
- gQAndroidUserEventAgent,
- gQAndroidVmOperations,
- gQAndroidNetAgent
- };
+bool qemu_android_emulation_early_setup() {
+ // Ensure that the looper is set for the main thread and for any
+ // future thread created by QEMU.
+ qemu_looper_setForThread();
+ qemu_thread_register_setup_callback(qemu_looper_setForThread);
+ // Ensure charpipes i/o are handled properly.
+ main_loop_register_poll_callback(qemu_charpipe_poll);
+
+ // Register qemud-related snapshot callbacks.
+ android_qemu2_qemud_init();
+
+ // Ensure the VmLock implementation is setup.
VmLock* vmLock = new qemu2::VmLock();
VmLock* prevVmLock = VmLock::set(vmLock);
CHECK(prevVmLock == nullptr) << "Another VmLock was already installed!";
@@ -48,9 +57,30 @@
return false;
}
+ // Initialize host sync service.
if (!qemu_android_sync_init(vmLock)) {
return false;
}
+ return true;
+}
+
+bool qemu_android_emulation_setup() {
+ android_qemu_init_slirp_shapers();
+
+ // Initialize UI/console agents.
+ static const AndroidConsoleAgents consoleAgents = {
+ gQAndroidBatteryAgent,
+ gQAndroidFingerAgent,
+ gQAndroidLocationAgent,
+ gQAndroidTelephonyAgent,
+ gQAndroidUserEventAgent,
+ gQAndroidVmOperations,
+ gQAndroidNetAgent,
+ };
+
return android_emulation_setup(&consoleAgents);
}
+
+void qemu_android_emulation_teardown() {
+}
diff --git a/android-qemu2-glue/qemu-setup.h b/android-qemu2-glue/qemu-setup.h
index 1be84b4..aaf2f0d 100644
--- a/android-qemu2-glue/qemu-setup.h
+++ b/android-qemu2-glue/qemu-setup.h
@@ -20,6 +20,32 @@
ANDROID_BEGIN_HEADER
+/* Call this function at the start of the QEMU main() function to perform
+ * early setup of Android emulation. This will ensure the glue will inject
+ * all relevant callbacks into QEMU2. As well as setup the looper for the
+ * main thread. Return true on success, false otherwise. */
+extern bool qemu_android_emulation_early_setup(void);
+
+/* Call this function to setup a list of custom DNS servers to be used
+ * by the network stack. |dns_servers| must be the content of the
+ * -dns-server option, i.e. a comma-separated list of DNS server addresses.
+ * On success, return true and set |*count4| and |*count6| to the number
+ * of IPv4 and IPv6 IP addresses, respectively. Return false on failure. */
+extern bool qemu_android_emulation_setup_dns_servers(const char* dns_servers,
+ int* count4,
+ int* count6);
+
+/* Call this function after the slirp stack has been initialized, typically
+ * by calling net_init_clients() in vl.c, to inject Android-specific features
+ * (e.g. custom DNS server list) into the network stack. */
+extern void qemu_android_emulation_init_slirp(void);
+
+/* Call this function after the QEMU main() function has inited the
+ * machine, but before it has started it. */
extern bool qemu_android_emulation_setup(void);
+/* Call this function at the end of the QEMU main() function, just
+ * after the main loop has returned due to a machine exit. */
+extern void qemu_android_emulation_teardown(void);
+
ANDROID_END_HEADER
diff --git a/android-qemu2-glue/qemu-user-event-agent-impl.c b/android-qemu2-glue/qemu-user-event-agent-impl.c
index a19f1e1..c43e68c 100644
--- a/android-qemu2-glue/qemu-user-event-agent-impl.c
+++ b/android-qemu2-glue/qemu-user-event-agent-impl.c
@@ -13,6 +13,8 @@
#include "android/multitouch-screen.h"
#include "android/utils/debug.h"
+
+#include "qemu/osdep.h"
#include "hw/input/goldfish_events.h"
#include "ui/console.h"
diff --git a/android-qemu2-glue/qemu-vm-operations-impl.c b/android-qemu2-glue/qemu-vm-operations-impl.c
index 8cf1587..2f745ba 100644
--- a/android-qemu2-glue/qemu-vm-operations-impl.c
+++ b/android-qemu2-glue/qemu-vm-operations-impl.c
@@ -16,8 +16,8 @@
#include "android/emulation/control/callbacks.h"
#include "android/emulation/control/vm_operations.h"
-//#include "cpu.h"
-//#include "monitor/monitor.h"
+
+#include "qemu/osdep.h"
#include "sysemu/sysemu.h"
#include <stdlib.h>
diff --git a/android-qemu2-glue/qemu-window-agent-impl.c b/android-qemu2-glue/qemu-window-agent-impl.c
index 9687956..82b4996 100644
--- a/android-qemu2-glue/qemu-window-agent-impl.c
+++ b/android-qemu2-glue/qemu-window-agent-impl.c
@@ -21,4 +21,3 @@
const QAndroidEmulatorWindowAgent* const gQAndroidEmulatorWindowAgent =
&sQAndroidEmulatorWindowAgent;
-
diff --git a/android-qemu2-glue/scripts/gen-qemu2-sources-mk.py b/android-qemu2-glue/scripts/gen-qemu2-sources-mk.py
index 0404ba8..a951a0a 100755
--- a/android-qemu2-glue/scripts/gen-qemu2-sources-mk.py
+++ b/android-qemu2-glue/scripts/gen-qemu2-sources-mk.py
@@ -26,29 +26,28 @@
LINK_QEMU_PREFIX = 'LINK-qemu-system-'
IGNORED_OBJECTS = [
'../audio/sdlaudio.o',
- '../disas/arm-a64.o',
- '../disas/libvixl/a64/decoder-a64.o',
- '../disas/libvixl/a64/disasm-a64.o',
- '../disas/libvixl/a64/instructions-a64.o',
- '../disas/libvixl/utils.o',
'gdbstub-xml.o',
- '../hw/display/framebuffer.o',
- 'hw/misc/android_boot_properties.o',
- 'hw/misc/android_pipe_opengles.o',
+ 'hw/i386/acpi-build.o',
+ 'hw/i386/pc_piix.o',
+ '../qmp-introspect.o',
'../qmp-marshal.o',
'trace/generated-helpers.o',
'../ui/sdl_zoom.o',
'../ui/sdl.o',
'../ui/sdl2.o',
+ '../ui/sdl2-2d.o',
+ '../ui/sdl2-input.o',
+ '../vl.o',
'/version.o', # something from the Windows build
]
CC_OBJECTS = [
'disas/arm-a64.o',
- 'disas/libvixl/a64/decoder-a64.o',
- 'disas/libvixl/a64/disasm-a64.o',
- 'disas/libvixl/a64/instructions-a64.o',
- 'disas/libvixl/utils.o',
+ 'disas/libvixl/vixl/a64/decoder-a64.o',
+ 'disas/libvixl/vixl/a64/disasm-a64.o',
+ 'disas/libvixl/vixl/a64/instructions-a64.o',
+ 'disas/libvixl/vixl/compiler-intrinsics.o',
+ 'disas/libvixl/vixl/utils.o',
]
# objects which have to be moved to *TARGET files,
diff --git a/android-qemu2-glue/stubs/win32-stubs.c b/android-qemu2-glue/stubs/win32-stubs.c
new file mode 100644
index 0000000..aecdc29
--- /dev/null
+++ b/android-qemu2-glue/stubs/win32-stubs.c
@@ -0,0 +1,54 @@
+#include "qemu/osdep.h"
+
+#include "android/utils/win32_unicode.h"
+
+HANDLE win32CreateFile(
+ LPCTSTR lpFileName,
+ DWORD dwDesiredAccess,
+ DWORD dwShareMode,
+ LPSECURITY_ATTRIBUTES lpSecurityAttributes,
+ DWORD dwCreationDisposition,
+ DWORD dwFlagsAndAttributes,
+ HANDLE hTemplateFile)
+{
+ HANDLE result = INVALID_HANDLE_VALUE;
+ wchar_t* wide_name = win32_utf8_to_utf16_str(lpFileName);
+ if (wide_name != NULL) {
+ result = CreateFileW(wide_name, dwDesiredAccess,
+ dwShareMode, lpSecurityAttributes,
+ dwCreationDisposition, dwFlagsAndAttributes,
+ hTemplateFile);
+ free(wide_name);
+ }
+ return result;
+}
+
+DWORD win32GetCurrentDirectory(
+ DWORD nBufferLength,
+ LPTSTR lpBuffer)
+{
+ wchar_t wide_buffer[MAX_PATH];
+ DWORD ret = GetCurrentDirectoryW(MAX_PATH, wide_buffer);
+ if (ret == 0 || ret > MAX_PATH) {
+ return ret;
+ }
+ int ret2 = win32_utf16_to_utf8_buf(wide_buffer, lpBuffer, nBufferLength);
+ return (ret2 < 0 || (DWORD)ret2 > nBufferLength) ? 0 : (DWORD)ret2;
+}
+
+DWORD win32GetModuleFileName(
+ HMODULE hModule,
+ LPTSTR lpFilename,
+ DWORD nSize)
+{
+ wchar_t wide_buffer[MAX_PATH];
+ if (!GetModuleFileNameW(hModule, wide_buffer, MAX_PATH)) {
+ return 0;
+ }
+
+ int ret = win32_utf16_to_utf8_buf(wide_buffer, lpFilename, nSize);
+ if (ret < 0 || ret >= nSize) {
+ return 0;
+ }
+ return (DWORD)ret;
+}
diff --git a/android-qemu2-glue/telephony/modem_init.c b/android-qemu2-glue/telephony/modem_init.c
index 0377c25..0e397a6 100644
--- a/android-qemu2-glue/telephony/modem_init.c
+++ b/android-qemu2-glue/telephony/modem_init.c
@@ -14,8 +14,11 @@
#include "android/telephony/modem_driver.h"
#include "android-qemu2-glue/utils/stream.h"
+#include "qemu/osdep.h"
#include "hw/hw.h"
+#include <assert.h>
+
#define MODEM_DEV_STATE_SAVE_VERSION 1
static void modem_state_save(QEMUFile* file, void* opaque)
@@ -41,13 +44,13 @@
void qemu_android_modem_init(int base_port) {
android_modem_init(base_port);
- if (android_modem_serial_line != NULL) {
- register_savevm(NULL,
- "android_modem",
- 0,
- MODEM_DEV_STATE_SAVE_VERSION,
- modem_state_save,
- modem_state_load,
- android_modem);
- }
+ assert(android_modem_serial_line != NULL);
+
+ register_savevm(NULL,
+ "android_modem",
+ 0,
+ MODEM_DEV_STATE_SAVE_VERSION,
+ modem_state_save,
+ modem_state_load,
+ android_modem);
}
diff --git a/arch_init.c b/arch_init.c
index b94d23f..1d09f32 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -21,45 +21,19 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include <stdint.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#ifndef _WIN32
-#include <sys/types.h>
-#include <sys/mman.h>
-#endif
-#include "config.h"
-#include "monitor/monitor.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
#include "sysemu/sysemu.h"
-#include "qemu/bitops.h"
-#include "qemu/bitmap.h"
#include "sysemu/arch_init.h"
-#include "audio/audio.h"
-#include "hw/i386/pc.h"
#include "hw/pci/pci.h"
#include "hw/audio/audio.h"
-#include "sysemu/kvm.h"
-#include "migration/migration.h"
-#include "hw/i386/smbios.h"
-#include "exec/address-spaces.h"
-#include "hw/audio/pcspk.h"
-#include "migration/page_cache.h"
+#include "hw/smbios/smbios.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
#include "qmp-commands.h"
-#include "trace.h"
-#include "exec/cpu-all.h"
-#include "exec/ram_addr.h"
#include "hw/acpi/acpi.h"
-#include "qemu/host-utils.h"
-
-#ifdef DEBUG_ARCH_INIT
-#define DPRINTF(fmt, ...) \
- do { fprintf(stdout, "arch_init: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
- do { } while (0)
-#endif
+#include "qemu/help_option.h"
#ifdef TARGET_SPARC
int graphic_width = 1024;
@@ -109,23 +83,6 @@
#endif
const uint32_t arch_type = QEMU_ARCH;
-static bool mig_throttle_on;
-static int dirty_rate_high_cnt;
-static void check_guest_throttling(void);
-
-static uint64_t bitmap_sync_count;
-
-/***********************************************************/
-/* ram save/restore */
-
-#define RAM_SAVE_FLAG_FULL 0x01 /* Obsolete, not used anymore */
-#define RAM_SAVE_FLAG_COMPRESS 0x02
-#define RAM_SAVE_FLAG_MEM_SIZE 0x04
-#define RAM_SAVE_FLAG_PAGE 0x08
-#define RAM_SAVE_FLAG_EOS 0x10
-#define RAM_SAVE_FLAG_CONTINUE 0x20
-#define RAM_SAVE_FLAG_XBZRLE 0x40
-/* 0x80 is reserved in migration.h start with 0x100 next */
static struct defconfig_file {
const char *filename;
@@ -133,12 +90,9 @@
bool userconfig;
} default_config_files[] = {
{ CONFIG_QEMU_CONFDIR "/qemu.conf", true },
- { CONFIG_QEMU_CONFDIR "/target-" TARGET_NAME ".conf", true },
{ NULL }, /* end of list */
};
-static const uint8_t ZERO_TARGET_PAGE[TARGET_PAGE_SIZE];
-
int qemu_read_default_config_files(bool userconfig)
{
int ret;
@@ -157,1026 +111,6 @@
return 0;
}
-static inline bool is_zero_range(uint8_t *p, uint64_t size)
-{
- return buffer_find_nonzero_offset(p, size) == size;
-}
-
-/* struct contains XBZRLE cache and a static page
- used by the compression */
-static struct {
- /* buffer used for XBZRLE encoding */
- uint8_t *encoded_buf;
- /* buffer for storing page content */
- uint8_t *current_buf;
- /* Cache for XBZRLE, Protected by lock. */
- PageCache *cache;
- QemuMutex lock;
-} XBZRLE;
-
-/* buffer used for XBZRLE decoding */
-static uint8_t *xbzrle_decoded_buf;
-
-static void XBZRLE_cache_lock(void)
-{
- if (migrate_use_xbzrle())
- qemu_mutex_lock(&XBZRLE.lock);
-}
-
-static void XBZRLE_cache_unlock(void)
-{
- if (migrate_use_xbzrle())
- qemu_mutex_unlock(&XBZRLE.lock);
-}
-
-/*
- * called from qmp_migrate_set_cache_size in main thread, possibly while
- * a migration is in progress.
- * A running migration maybe using the cache and might finish during this
- * call, hence changes to the cache are protected by XBZRLE.lock().
- */
-int64_t xbzrle_cache_resize(int64_t new_size)
-{
- PageCache *new_cache;
- int64_t ret;
-
- if (new_size < TARGET_PAGE_SIZE) {
- return -1;
- }
-
- XBZRLE_cache_lock();
-
- if (XBZRLE.cache != NULL) {
- if (pow2floor(new_size) == migrate_xbzrle_cache_size()) {
- goto out_new_size;
- }
- new_cache = cache_init(new_size / TARGET_PAGE_SIZE,
- TARGET_PAGE_SIZE);
- if (!new_cache) {
- error_report("Error creating cache");
- ret = -1;
- goto out;
- }
-
- cache_fini(XBZRLE.cache);
- XBZRLE.cache = new_cache;
- }
-
-out_new_size:
- ret = pow2floor(new_size);
-out:
- XBZRLE_cache_unlock();
- return ret;
-}
-
-/* accounting for migration statistics */
-typedef struct AccountingInfo {
- uint64_t dup_pages;
- uint64_t skipped_pages;
- uint64_t norm_pages;
- uint64_t iterations;
- uint64_t xbzrle_bytes;
- uint64_t xbzrle_pages;
- uint64_t xbzrle_cache_miss;
- double xbzrle_cache_miss_rate;
- uint64_t xbzrle_overflows;
-} AccountingInfo;
-
-static AccountingInfo acct_info;
-
-static void acct_clear(void)
-{
- memset(&acct_info, 0, sizeof(acct_info));
-}
-
-uint64_t dup_mig_bytes_transferred(void)
-{
- return acct_info.dup_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t dup_mig_pages_transferred(void)
-{
- return acct_info.dup_pages;
-}
-
-uint64_t skipped_mig_bytes_transferred(void)
-{
- return acct_info.skipped_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t skipped_mig_pages_transferred(void)
-{
- return acct_info.skipped_pages;
-}
-
-uint64_t norm_mig_bytes_transferred(void)
-{
- return acct_info.norm_pages * TARGET_PAGE_SIZE;
-}
-
-uint64_t norm_mig_pages_transferred(void)
-{
- return acct_info.norm_pages;
-}
-
-uint64_t xbzrle_mig_bytes_transferred(void)
-{
- return acct_info.xbzrle_bytes;
-}
-
-uint64_t xbzrle_mig_pages_transferred(void)
-{
- return acct_info.xbzrle_pages;
-}
-
-uint64_t xbzrle_mig_pages_cache_miss(void)
-{
- return acct_info.xbzrle_cache_miss;
-}
-
-double xbzrle_mig_cache_miss_rate(void)
-{
- return acct_info.xbzrle_cache_miss_rate;
-}
-
-uint64_t xbzrle_mig_pages_overflow(void)
-{
- return acct_info.xbzrle_overflows;
-}
-
-static size_t save_block_hdr(QEMUFile *f, RAMBlock *block, ram_addr_t offset,
- int cont, int flag)
-{
- size_t size;
-
- qemu_put_be64(f, offset | cont | flag);
- size = 8;
-
- if (!cont) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr,
- strlen(block->idstr));
- size += 1 + strlen(block->idstr);
- }
- return size;
-}
-
-/* This is the last block that we have visited serching for dirty pages
- */
-static RAMBlock *last_seen_block;
-/* This is the last block from where we have sent data */
-static RAMBlock *last_sent_block;
-static ram_addr_t last_offset;
-static unsigned long *migration_bitmap;
-static uint64_t migration_dirty_pages;
-static uint32_t last_version;
-static bool ram_bulk_stage;
-
-/* Update the xbzrle cache to reflect a page that's been sent as all 0.
- * The important thing is that a stale (not-yet-0'd) page be replaced
- * by the new data.
- * As a bonus, if the page wasn't in the cache it gets added so that
- * when a small write is made into the 0'd page it gets XBZRLE sent
- */
-static void xbzrle_cache_zero_page(ram_addr_t current_addr)
-{
- if (ram_bulk_stage || !migrate_use_xbzrle()) {
- return;
- }
-
- /* We don't care if this fails to allocate a new cache page
- * as long as it updated an old one */
- cache_insert(XBZRLE.cache, current_addr, ZERO_TARGET_PAGE);
-}
-
-#define ENCODING_FLAG_XBZRLE 0x1
-
-static int save_xbzrle_page(QEMUFile *f, uint8_t **current_data,
- ram_addr_t current_addr, RAMBlock *block,
- ram_addr_t offset, int cont, bool last_stage)
-{
- int encoded_len = 0, bytes_sent = -1;
- uint8_t *prev_cached_page;
-
- if (!cache_is_cached(XBZRLE.cache, current_addr)) {
- acct_info.xbzrle_cache_miss++;
- if (!last_stage) {
- if (cache_insert(XBZRLE.cache, current_addr, *current_data) == -1) {
- return -1;
- } else {
- /* update *current_data when the page has been
- inserted into cache */
- *current_data = get_cached_data(XBZRLE.cache, current_addr);
- }
- }
- return -1;
- }
-
- prev_cached_page = get_cached_data(XBZRLE.cache, current_addr);
-
- /* save current buffer into memory */
- memcpy(XBZRLE.current_buf, *current_data, TARGET_PAGE_SIZE);
-
- /* XBZRLE encoding (if there is no overflow) */
- encoded_len = xbzrle_encode_buffer(prev_cached_page, XBZRLE.current_buf,
- TARGET_PAGE_SIZE, XBZRLE.encoded_buf,
- TARGET_PAGE_SIZE);
- if (encoded_len == 0) {
- DPRINTF("Skipping unmodified page\n");
- return 0;
- } else if (encoded_len == -1) {
- DPRINTF("Overflow\n");
- acct_info.xbzrle_overflows++;
- /* update data in the cache */
- if (!last_stage) {
- memcpy(prev_cached_page, *current_data, TARGET_PAGE_SIZE);
- *current_data = prev_cached_page;
- }
- return -1;
- }
-
- /* we need to update the data in the cache, in order to get the same data */
- if (!last_stage) {
- memcpy(prev_cached_page, XBZRLE.current_buf, TARGET_PAGE_SIZE);
- }
-
- /* Send XBZRLE based compressed page */
- bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_XBZRLE);
- qemu_put_byte(f, ENCODING_FLAG_XBZRLE);
- qemu_put_be16(f, encoded_len);
- qemu_put_buffer(f, XBZRLE.encoded_buf, encoded_len);
- bytes_sent += encoded_len + 1 + 2;
- acct_info.xbzrle_pages++;
- acct_info.xbzrle_bytes += bytes_sent;
-
- return bytes_sent;
-}
-
-static inline
-ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr,
- ram_addr_t start)
-{
- unsigned long base = mr->ram_addr >> TARGET_PAGE_BITS;
- unsigned long nr = base + (start >> TARGET_PAGE_BITS);
- uint64_t mr_size = TARGET_PAGE_ALIGN(memory_region_size(mr));
- unsigned long size = base + (mr_size >> TARGET_PAGE_BITS);
-
- unsigned long next;
-
- if (ram_bulk_stage && nr > base) {
- next = nr + 1;
- } else {
- next = find_next_bit(migration_bitmap, size, nr);
- }
-
- if (next < size) {
- clear_bit(next, migration_bitmap);
- migration_dirty_pages--;
- }
- return (next - base) << TARGET_PAGE_BITS;
-}
-
-static inline bool migration_bitmap_set_dirty(ram_addr_t addr)
-{
- bool ret;
- int nr = addr >> TARGET_PAGE_BITS;
-
- ret = test_and_set_bit(nr, migration_bitmap);
-
- if (!ret) {
- migration_dirty_pages++;
- }
- return ret;
-}
-
-static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length)
-{
- ram_addr_t addr;
- unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
-
- /* start address is aligned at the start of a word? */
- if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) {
- int k;
- int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
- unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION];
-
- for (k = page; k < page + nr; k++) {
- if (src[k]) {
- unsigned long new_dirty;
- new_dirty = ~migration_bitmap[k];
- migration_bitmap[k] |= src[k];
- new_dirty &= src[k];
- migration_dirty_pages += ctpopl(new_dirty);
- src[k] = 0;
- }
- }
- } else {
- for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
- if (cpu_physical_memory_get_dirty(start + addr,
- TARGET_PAGE_SIZE,
- DIRTY_MEMORY_MIGRATION)) {
- cpu_physical_memory_reset_dirty(start + addr,
- TARGET_PAGE_SIZE,
- DIRTY_MEMORY_MIGRATION);
- migration_bitmap_set_dirty(start + addr);
- }
- }
- }
-}
-
-
-/* Needs iothread lock! */
-/* Fix me: there are too many global variables used in migration process. */
-static int64_t start_time;
-static int64_t bytes_xfer_prev;
-static int64_t num_dirty_pages_period;
-
-static void migration_bitmap_sync_init(void)
-{
- start_time = 0;
- bytes_xfer_prev = 0;
- num_dirty_pages_period = 0;
-}
-
-static void migration_bitmap_sync(void)
-{
- RAMBlock *block;
- uint64_t num_dirty_pages_init = migration_dirty_pages;
- MigrationState *s = migrate_get_current();
- int64_t end_time;
- int64_t bytes_xfer_now;
- static uint64_t xbzrle_cache_miss_prev;
- static uint64_t iterations_prev;
-
- bitmap_sync_count++;
-
- if (!bytes_xfer_prev) {
- bytes_xfer_prev = ram_bytes_transferred();
- }
-
- if (!start_time) {
- start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
- }
-
- trace_migration_bitmap_sync_start();
- address_space_sync_dirty_bitmap(&address_space_memory);
-
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- migration_bitmap_sync_range(block->mr->ram_addr, block->length);
- }
- trace_migration_bitmap_sync_end(migration_dirty_pages
- - num_dirty_pages_init);
- num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
- end_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
-
- /* more than 1 second = 1000 millisecons */
- if (end_time > start_time + 1000) {
- if (migrate_auto_converge()) {
- /* The following detection logic can be refined later. For now:
- Check to see if the dirtied bytes is 50% more than the approx.
- amount of bytes that just got transferred since the last time we
- were in this routine. If that happens >N times (for now N==4)
- we turn on the throttle down logic */
- bytes_xfer_now = ram_bytes_transferred();
- if (s->dirty_pages_rate &&
- (num_dirty_pages_period * TARGET_PAGE_SIZE >
- (bytes_xfer_now - bytes_xfer_prev)/2) &&
- (dirty_rate_high_cnt++ > 4)) {
- trace_migration_throttle();
- mig_throttle_on = true;
- dirty_rate_high_cnt = 0;
- }
- bytes_xfer_prev = bytes_xfer_now;
- } else {
- mig_throttle_on = false;
- }
- if (migrate_use_xbzrle()) {
- if (iterations_prev != 0) {
- acct_info.xbzrle_cache_miss_rate =
- (double)(acct_info.xbzrle_cache_miss -
- xbzrle_cache_miss_prev) /
- (acct_info.iterations - iterations_prev);
- }
- iterations_prev = acct_info.iterations;
- xbzrle_cache_miss_prev = acct_info.xbzrle_cache_miss;
- }
- s->dirty_pages_rate = num_dirty_pages_period * 1000
- / (end_time - start_time);
- s->dirty_bytes_rate = s->dirty_pages_rate * TARGET_PAGE_SIZE;
- start_time = end_time;
- num_dirty_pages_period = 0;
- s->dirty_sync_count = bitmap_sync_count;
- }
-}
-
-/*
- * ram_save_page: Send the given page to the stream
- *
- * Returns: Number of bytes written.
- */
-static int ram_save_page(QEMUFile *f, RAMBlock* block, ram_addr_t offset,
- bool last_stage)
-{
- int bytes_sent;
- int cont;
- ram_addr_t current_addr;
- MemoryRegion *mr = block->mr;
- uint8_t *p;
- int ret;
- bool send_async = true;
-
- cont = (block == last_sent_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
-
- p = memory_region_get_ram_ptr(mr) + offset;
-
- /* In doubt sent page as normal */
- bytes_sent = -1;
- ret = ram_control_save_page(f, block->offset,
- offset, TARGET_PAGE_SIZE, &bytes_sent);
-
- XBZRLE_cache_lock();
-
- current_addr = block->offset + offset;
- if (ret != RAM_SAVE_CONTROL_NOT_SUPP) {
- if (ret != RAM_SAVE_CONTROL_DELAYED) {
- if (bytes_sent > 0) {
- acct_info.norm_pages++;
- } else if (bytes_sent == 0) {
- acct_info.dup_pages++;
- }
- }
- } else if (is_zero_range(p, TARGET_PAGE_SIZE)) {
- acct_info.dup_pages++;
- bytes_sent = save_block_hdr(f, block, offset, cont,
- RAM_SAVE_FLAG_COMPRESS);
- qemu_put_byte(f, 0);
- bytes_sent++;
- /* Must let xbzrle know, otherwise a previous (now 0'd) cached
- * page would be stale
- */
- xbzrle_cache_zero_page(current_addr);
- } else if (!ram_bulk_stage && migrate_use_xbzrle()) {
- bytes_sent = save_xbzrle_page(f, &p, current_addr, block,
- offset, cont, last_stage);
- if (!last_stage) {
- /* Can't send this cached data async, since the cache page
- * might get updated before it gets to the wire
- */
- send_async = false;
- }
- }
-
- /* XBZRLE overflow or normal page */
- if (bytes_sent == -1) {
- bytes_sent = save_block_hdr(f, block, offset, cont, RAM_SAVE_FLAG_PAGE);
- if (send_async) {
- qemu_put_buffer_async(f, p, TARGET_PAGE_SIZE);
- } else {
- qemu_put_buffer(f, p, TARGET_PAGE_SIZE);
- }
- bytes_sent += TARGET_PAGE_SIZE;
- acct_info.norm_pages++;
- }
-
- XBZRLE_cache_unlock();
-
- return bytes_sent;
-}
-
-/*
- * ram_find_and_save_block: Finds a page to send and sends it to f
- *
- * Returns: The number of bytes written.
- * 0 means no dirty pages
- */
-
-static int ram_find_and_save_block(QEMUFile *f, bool last_stage)
-{
- RAMBlock *block = last_seen_block;
- ram_addr_t offset = last_offset;
- bool complete_round = false;
- int bytes_sent = 0;
- MemoryRegion *mr;
-
- if (!block)
- block = QTAILQ_FIRST(&ram_list.blocks);
-
- while (true) {
- mr = block->mr;
- offset = migration_bitmap_find_and_reset_dirty(mr, offset);
- if (complete_round && block == last_seen_block &&
- offset >= last_offset) {
- break;
- }
- if (offset >= block->length) {
- offset = 0;
- block = QTAILQ_NEXT(block, next);
- if (!block) {
- block = QTAILQ_FIRST(&ram_list.blocks);
- complete_round = true;
- ram_bulk_stage = false;
- }
- } else {
- bytes_sent = ram_save_page(f, block, offset, last_stage);
-
- /* if page is unmodified, continue to the next */
- if (bytes_sent > 0) {
- last_sent_block = block;
- break;
- }
- }
- }
- last_seen_block = block;
- last_offset = offset;
-
- return bytes_sent;
-}
-
-static uint64_t bytes_transferred;
-
-void acct_update_position(QEMUFile *f, size_t size, bool zero)
-{
- uint64_t pages = size / TARGET_PAGE_SIZE;
- if (zero) {
- acct_info.dup_pages += pages;
- } else {
- acct_info.norm_pages += pages;
- bytes_transferred += size;
- qemu_update_position(f, size);
- }
-}
-
-static ram_addr_t ram_save_remaining(void)
-{
- return migration_dirty_pages;
-}
-
-uint64_t ram_bytes_remaining(void)
-{
- return ram_save_remaining() * TARGET_PAGE_SIZE;
-}
-
-uint64_t ram_bytes_transferred(void)
-{
- return bytes_transferred;
-}
-
-uint64_t ram_bytes_total(void)
-{
- RAMBlock *block;
- uint64_t total = 0;
-
- QTAILQ_FOREACH(block, &ram_list.blocks, next)
- total += block->length;
-
- return total;
-}
-
-void free_xbzrle_decoded_buf(void)
-{
- g_free(xbzrle_decoded_buf);
- xbzrle_decoded_buf = NULL;
-}
-
-static void migration_end(void)
-{
- if (migration_bitmap) {
- memory_global_dirty_log_stop();
- g_free(migration_bitmap);
- migration_bitmap = NULL;
- }
-
- XBZRLE_cache_lock();
- if (XBZRLE.cache) {
- cache_fini(XBZRLE.cache);
- g_free(XBZRLE.encoded_buf);
- g_free(XBZRLE.current_buf);
- XBZRLE.cache = NULL;
- XBZRLE.encoded_buf = NULL;
- XBZRLE.current_buf = NULL;
- }
- XBZRLE_cache_unlock();
-}
-
-static void ram_migration_cancel(void *opaque)
-{
- migration_end();
-}
-
-static void reset_ram_globals(void)
-{
- last_seen_block = NULL;
- last_sent_block = NULL;
- last_offset = 0;
- last_version = ram_list.version;
- ram_bulk_stage = true;
-}
-
-#define MAX_WAIT 50 /* ms, half buffered_file limit */
-
-static int ram_save_setup(QEMUFile *f, void *opaque)
-{
- RAMBlock *block;
- int64_t ram_bitmap_pages; /* Size of bitmap in pages, including gaps */
-
- mig_throttle_on = false;
- dirty_rate_high_cnt = 0;
- bitmap_sync_count = 0;
- migration_bitmap_sync_init();
-
- if (migrate_use_xbzrle()) {
- XBZRLE_cache_lock();
- XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
- TARGET_PAGE_SIZE,
- TARGET_PAGE_SIZE);
- if (!XBZRLE.cache) {
- XBZRLE_cache_unlock();
- error_report("Error creating cache");
- return -1;
- }
- XBZRLE_cache_unlock();
-
- /* We prefer not to abort if there is no memory */
- XBZRLE.encoded_buf = g_try_malloc0(TARGET_PAGE_SIZE);
- if (!XBZRLE.encoded_buf) {
- error_report("Error allocating encoded_buf");
- return -1;
- }
-
- XBZRLE.current_buf = g_try_malloc(TARGET_PAGE_SIZE);
- if (!XBZRLE.current_buf) {
- error_report("Error allocating current_buf");
- g_free(XBZRLE.encoded_buf);
- XBZRLE.encoded_buf = NULL;
- return -1;
- }
-
- acct_clear();
- }
-
- qemu_mutex_lock_iothread();
- qemu_mutex_lock_ramlist();
- bytes_transferred = 0;
- reset_ram_globals();
-
- ram_bitmap_pages = last_ram_offset() >> TARGET_PAGE_BITS;
- migration_bitmap = bitmap_new(ram_bitmap_pages);
- bitmap_set(migration_bitmap, 0, ram_bitmap_pages);
-
- /*
- * Count the total number of pages used by ram blocks not including any
- * gaps due to alignment or unplugs.
- */
- migration_dirty_pages = 0;
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- uint64_t block_pages;
-
- block_pages = block->length >> TARGET_PAGE_BITS;
- migration_dirty_pages += block_pages;
- }
-
- memory_global_dirty_log_start();
- migration_bitmap_sync();
- qemu_mutex_unlock_iothread();
-
- qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
-
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- qemu_put_byte(f, strlen(block->idstr));
- qemu_put_buffer(f, (uint8_t *)block->idstr, strlen(block->idstr));
- qemu_put_be64(f, block->length);
- }
-
- qemu_mutex_unlock_ramlist();
-
- ram_control_before_iterate(f, RAM_CONTROL_SETUP);
- ram_control_after_iterate(f, RAM_CONTROL_SETUP);
-
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
- return 0;
-}
-
-static int ram_save_iterate(QEMUFile *f, void *opaque)
-{
- int ret;
- int i;
- int64_t t0;
- int total_sent = 0;
-
- qemu_mutex_lock_ramlist();
-
- if (ram_list.version != last_version) {
- reset_ram_globals();
- }
-
- ram_control_before_iterate(f, RAM_CONTROL_ROUND);
-
- t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- i = 0;
- while ((ret = qemu_file_rate_limit(f)) == 0) {
- int bytes_sent;
-
- bytes_sent = ram_find_and_save_block(f, false);
- /* no more blocks to sent */
- if (bytes_sent == 0) {
- break;
- }
- total_sent += bytes_sent;
- acct_info.iterations++;
- check_guest_throttling();
- /* we want to check in the 1st loop, just in case it was the 1st time
- and we had to sync the dirty bitmap.
- qemu_get_clock_ns() is a bit expensive, so we only check each some
- iterations
- */
- if ((i & 63) == 0) {
- uint64_t t1 = (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - t0) / 1000000;
- if (t1 > MAX_WAIT) {
- DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
- t1, i);
- break;
- }
- }
- i++;
- }
-
- qemu_mutex_unlock_ramlist();
-
- /*
- * Must occur before EOS (or any QEMUFile operation)
- * because of RDMA protocol.
- */
- ram_control_after_iterate(f, RAM_CONTROL_ROUND);
-
- bytes_transferred += total_sent;
-
- /*
- * Do not count these 8 bytes into total_sent, so that we can
- * return 0 if no page had been dirtied.
- */
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
- bytes_transferred += 8;
-
- ret = qemu_file_get_error(f);
- if (ret < 0) {
- return ret;
- }
-
- return total_sent;
-}
-
-static int ram_save_complete(QEMUFile *f, void *opaque)
-{
- qemu_mutex_lock_ramlist();
- migration_bitmap_sync();
-
- ram_control_before_iterate(f, RAM_CONTROL_FINISH);
-
- /* try transferring iterative blocks of memory */
-
- /* flush all remaining blocks regardless of rate limiting */
- while (true) {
- int bytes_sent;
-
- bytes_sent = ram_find_and_save_block(f, true);
- /* no more blocks to sent */
- if (bytes_sent == 0) {
- break;
- }
- bytes_transferred += bytes_sent;
- }
-
- ram_control_after_iterate(f, RAM_CONTROL_FINISH);
- migration_end();
-
- qemu_mutex_unlock_ramlist();
- qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
-
- return 0;
-}
-
-static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
-{
- uint64_t remaining_size;
-
- remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
-
- if (remaining_size < max_size) {
- qemu_mutex_lock_iothread();
- migration_bitmap_sync();
- qemu_mutex_unlock_iothread();
- remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE;
- }
- return remaining_size;
-}
-
-static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host)
-{
- unsigned int xh_len;
- int xh_flags;
-
- if (!xbzrle_decoded_buf) {
- xbzrle_decoded_buf = g_malloc(TARGET_PAGE_SIZE);
- }
-
- /* extract RLE header */
- xh_flags = qemu_get_byte(f);
- xh_len = qemu_get_be16(f);
-
- if (xh_flags != ENCODING_FLAG_XBZRLE) {
- error_report("Failed to load XBZRLE page - wrong compression!");
- return -1;
- }
-
- if (xh_len > TARGET_PAGE_SIZE) {
- error_report("Failed to load XBZRLE page - len overflow!");
- return -1;
- }
- /* load data and decode */
- qemu_get_buffer(f, xbzrle_decoded_buf, xh_len);
-
- /* decode RLE */
- if (xbzrle_decode_buffer(xbzrle_decoded_buf, xh_len, host,
- TARGET_PAGE_SIZE) == -1) {
- error_report("Failed to load XBZRLE page - decode error!");
- return -1;
- }
-
- return 0;
-}
-
-static inline void *host_from_stream_offset(QEMUFile *f,
- ram_addr_t offset,
- int flags)
-{
- static RAMBlock *block = NULL;
- char id[256];
- uint8_t len;
-
- if (flags & RAM_SAVE_FLAG_CONTINUE) {
- if (!block || block->length <= offset) {
- error_report("Ack, bad migration stream!");
- return NULL;
- }
-
- return memory_region_get_ram_ptr(block->mr) + offset;
- }
-
- len = qemu_get_byte(f);
- qemu_get_buffer(f, (uint8_t *)id, len);
- id[len] = 0;
-
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- if (!strncmp(id, block->idstr, sizeof(id)) && block->length > offset) {
- return memory_region_get_ram_ptr(block->mr) + offset;
- }
- }
-
- error_report("Can't find block %s!", id);
- return NULL;
-}
-
-/*
- * If a page (or a whole RDMA chunk) has been
- * determined to be zero, then zap it.
- */
-void ram_handle_compressed(void *host, uint8_t ch, uint64_t size)
-{
- if (ch != 0 || !is_zero_range(host, size)) {
- memset(host, ch, size);
- }
-}
-
-static int ram_load(QEMUFile *f, void *opaque, int version_id)
-{
- int flags = 0, ret = 0;
- static uint64_t seq_iter;
-
- seq_iter++;
-
- if (version_id != 4) {
- ret = -EINVAL;
- }
-
- while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) {
- ram_addr_t addr, total_ram_bytes;
- void *host;
- uint8_t ch;
-
- addr = qemu_get_be64(f);
- flags = addr & ~TARGET_PAGE_MASK;
- addr &= TARGET_PAGE_MASK;
-
- switch (flags & ~RAM_SAVE_FLAG_CONTINUE) {
- case RAM_SAVE_FLAG_MEM_SIZE:
- /* Synchronize RAM block list */
- total_ram_bytes = addr;
- while (!ret && total_ram_bytes) {
- RAMBlock *block;
- uint8_t len;
- char id[256];
- ram_addr_t length;
-
- len = qemu_get_byte(f);
- qemu_get_buffer(f, (uint8_t *)id, len);
- id[len] = 0;
- length = qemu_get_be64(f);
-
- QTAILQ_FOREACH(block, &ram_list.blocks, next) {
- if (!strncmp(id, block->idstr, sizeof(id))) {
- if (block->length != length) {
- error_report("Length mismatch: %s: 0x" RAM_ADDR_FMT
- " in != 0x" RAM_ADDR_FMT, id, length,
- block->length);
- ret = -EINVAL;
- }
- break;
- }
- }
-
- if (!block) {
- error_report("Unknown ramblock \"%s\", cannot "
- "accept migration", id);
- ret = -EINVAL;
- }
-
- total_ram_bytes -= length;
- }
- break;
- case RAM_SAVE_FLAG_COMPRESS:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
-
- ch = qemu_get_byte(f);
- ram_handle_compressed(host, ch, TARGET_PAGE_SIZE);
- break;
- case RAM_SAVE_FLAG_PAGE:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
-
- qemu_get_buffer(f, host, TARGET_PAGE_SIZE);
- break;
- case RAM_SAVE_FLAG_XBZRLE:
- host = host_from_stream_offset(f, addr, flags);
- if (!host) {
- error_report("Illegal RAM offset " RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
-
- if (load_xbzrle(f, addr, host) < 0) {
- error_report("Failed to decompress XBZRLE page at "
- RAM_ADDR_FMT, addr);
- ret = -EINVAL;
- break;
- }
- break;
- case RAM_SAVE_FLAG_EOS:
- /* normal exit */
- break;
- default:
- if (flags & RAM_SAVE_FLAG_HOOK) {
- ram_control_load_hook(f, flags);
- } else {
- error_report("Unknown combination of migration flags: %#x",
- flags);
- ret = -EINVAL;
- }
- }
- if (!ret) {
- ret = qemu_file_get_error(f);
- }
- }
-
- DPRINTF("Completed load of VM with exit code %d seq iteration "
- "%" PRIu64 "\n", ret, seq_iter);
- return ret;
-}
-
-static SaveVMHandlers savevm_ram_handlers = {
- .save_live_setup = ram_save_setup,
- .save_live_iterate = ram_save_iterate,
- .save_live_complete = ram_save_complete,
- .save_live_pending = ram_save_pending,
- .load_state = ram_load,
- .cancel = ram_migration_cancel,
-};
-
-void ram_mig_init(void)
-{
- qemu_mutex_init(&XBZRLE.lock);
- register_savevm_live(NULL, "ram", 0, 4, &savevm_ram_handlers, NULL);
-}
-
struct soundhw {
const char *name;
const char *descr;
@@ -1329,9 +263,7 @@
acpi_table_add(opts, &err);
if (err) {
- error_report("Wrong acpi table provided: %s",
- error_get_pretty(err));
- error_free(err);
+ error_reportf_err(err, "Wrong acpi table provided: ");
exit(1);
}
#endif
@@ -1344,13 +276,6 @@
#endif
}
-void cpudef_init(void)
-{
-#if defined(cpudef_setup)
- cpudef_setup(); /* parse cpu definitions in target config file */
-#endif
-}
-
int kvm_available(void)
{
#ifdef CONFIG_KVM
@@ -1386,52 +311,3 @@
return info;
}
-
-/* Stub function that's gets run on the vcpu when its brought out of the
- VM to run inside qemu via async_run_on_cpu()*/
-static void mig_sleep_cpu(void *opq)
-{
- qemu_mutex_unlock_iothread();
- g_usleep(30*1000);
- qemu_mutex_lock_iothread();
-}
-
-/* To reduce the dirty rate explicitly disallow the VCPUs from spending
- much time in the VM. The migration thread will try to catchup.
- Workload will experience a performance drop.
-*/
-static void mig_throttle_guest_down(void)
-{
- CPUState *cpu;
-
- qemu_mutex_lock_iothread();
- CPU_FOREACH(cpu) {
- async_run_on_cpu(cpu, mig_sleep_cpu, NULL);
- }
- qemu_mutex_unlock_iothread();
-}
-
-static void check_guest_throttling(void)
-{
- static int64_t t0;
- int64_t t1;
-
- if (!mig_throttle_on) {
- return;
- }
-
- if (!t0) {
- t0 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- return;
- }
-
- t1 = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-
- /* If it has been more than 40 ms since the last time the guest
- * was throttled then do it again.
- */
- if (40 < (t1-t0)/1000000) {
- mig_throttle_guest_down();
- t0 = t1;
- }
-}
diff --git a/async.c b/async.c
index 6e1b282..3bca9b0 100644
--- a/async.c
+++ b/async.c
@@ -22,11 +22,14 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/aio.h"
#include "block/thread-pool.h"
#include "qemu/main-loop.h"
#include "qemu/atomic.h"
+#include "block/raw-aio.h"
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */
@@ -44,10 +47,12 @@
QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
{
QEMUBH *bh;
- bh = g_malloc0(sizeof(QEMUBH));
- bh->ctx = ctx;
- bh->cb = cb;
- bh->opaque = opaque;
+ bh = g_new(QEMUBH, 1);
+ *bh = (QEMUBH){
+ .ctx = ctx,
+ .cb = cb,
+ .opaque = opaque,
+ };
qemu_mutex_lock(&ctx->bh_lock);
bh->next = ctx->first_bh;
/* Make sure that the members are ready before putting bh into list */
@@ -57,6 +62,11 @@
return bh;
}
+void aio_bh_call(QEMUBH *bh)
+{
+ bh->cb(bh->opaque);
+}
+
/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
int aio_bh_poll(AioContext *ctx)
{
@@ -70,16 +80,19 @@
/* Make sure that fetching bh happens before accessing its members */
smp_read_barrier_depends();
next = bh->next;
- if (!bh->deleted && bh->scheduled) {
- bh->scheduled = 0;
- /* Paired with write barrier in bh schedule to ensure reading for
- * idle & callbacks coming after bh's scheduling.
- */
- smp_rmb();
- if (!bh->idle)
+ /* The atomic_xchg is paired with the one in qemu_bh_schedule. The
+ * implicit memory barrier ensures that the callback sees all writes
+ * done by the scheduling thread. It also ensures that the scheduling
+ * thread sees the zero before bh->cb has run, and thus will call
+ * aio_notify again if necessary.
+ */
+ if (!bh->deleted && atomic_xchg(&bh->scheduled, 0)) {
+ /* Idle BHs and the notify BH don't count as progress */
+ if (!bh->idle && bh != ctx->notify_dummy_bh) {
ret = 1;
+ }
bh->idle = 0;
- bh->cb(bh->opaque);
+ aio_bh_call(bh);
}
}
@@ -106,33 +119,28 @@
void qemu_bh_schedule_idle(QEMUBH *bh)
{
- if (bh->scheduled)
- return;
bh->idle = 1;
/* Make sure that idle & any writes needed by the callback are done
* before the locations are read in the aio_bh_poll.
*/
- smp_wmb();
- bh->scheduled = 1;
+ atomic_mb_set(&bh->scheduled, 1);
}
void qemu_bh_schedule(QEMUBH *bh)
{
AioContext *ctx;
- if (bh->scheduled)
- return;
ctx = bh->ctx;
bh->idle = 0;
- /* Make sure that:
+ /* The memory barrier implicit in atomic_xchg makes sure that:
* 1. idle & any writes needed by the callback are done before the
* locations are read in the aio_bh_poll.
* 2. ctx is loaded before scheduled is set and the callback has a chance
* to execute.
*/
- smp_mb();
- bh->scheduled = 1;
- aio_notify(ctx);
+ if (atomic_xchg(&bh->scheduled, 1) == 0) {
+ aio_notify(ctx);
+ }
}
@@ -186,6 +194,8 @@
{
AioContext *ctx = (AioContext *) source;
+ atomic_or(&ctx->notify_me, 1);
+
/* We assume there is no timeout already supplied */
*timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
@@ -202,10 +212,13 @@
AioContext *ctx = (AioContext *) source;
QEMUBH *bh;
+ atomic_and(&ctx->notify_me, ~1);
+ aio_notify_accept(ctx);
+
for (bh = ctx->first_bh; bh; bh = bh->next) {
if (!bh->deleted && bh->scheduled) {
return true;
- }
+ }
}
return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
}
@@ -227,12 +240,33 @@
{
AioContext *ctx = (AioContext *) source;
+ qemu_bh_delete(ctx->notify_dummy_bh);
thread_pool_free(ctx->thread_pool);
- aio_set_event_notifier(ctx, &ctx->notifier, NULL);
+
+#ifdef CONFIG_LINUX_AIO
+ if (ctx->linux_aio) {
+ laio_detach_aio_context(ctx->linux_aio, ctx);
+ laio_cleanup(ctx->linux_aio);
+ ctx->linux_aio = NULL;
+ }
+#endif
+
+ qemu_mutex_lock(&ctx->bh_lock);
+ while (ctx->first_bh) {
+ QEMUBH *next = ctx->first_bh->next;
+
+ /* qemu_bh_delete() must have been called on BHs in this AioContext */
+ assert(ctx->first_bh->deleted);
+
+ g_free(ctx->first_bh);
+ ctx->first_bh = next;
+ }
+ qemu_mutex_unlock(&ctx->bh_lock);
+
+ aio_set_event_notifier(ctx, &ctx->notifier, false, NULL);
event_notifier_cleanup(&ctx->notifier);
rfifolock_destroy(&ctx->lock);
qemu_mutex_destroy(&ctx->bh_lock);
- g_array_free(ctx->pollfds, TRUE);
timerlistgroup_deinit(&ctx->tlg);
}
@@ -257,24 +291,33 @@
return ctx->thread_pool;
}
-void aio_set_dispatching(AioContext *ctx, bool dispatching)
+#ifdef CONFIG_LINUX_AIO
+LinuxAioState *aio_get_linux_aio(AioContext *ctx)
{
- ctx->dispatching = dispatching;
- if (!dispatching) {
- /* Write ctx->dispatching before reading e.g. bh->scheduled.
- * Optimization: this is only needed when we're entering the "unsafe"
- * phase where other threads must call event_notifier_set.
- */
- smp_mb();
+ if (!ctx->linux_aio) {
+ ctx->linux_aio = laio_init();
+ laio_attach_aio_context(ctx->linux_aio, ctx);
}
+ return ctx->linux_aio;
}
+#endif
void aio_notify(AioContext *ctx)
{
- /* Write e.g. bh->scheduled before reading ctx->dispatching. */
+ /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
+ * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
+ */
smp_mb();
- if (!ctx->dispatching) {
+ if (ctx->notify_me) {
event_notifier_set(&ctx->notifier);
+ atomic_mb_set(&ctx->notified, true);
+ }
+}
+
+void aio_notify_accept(AioContext *ctx)
+{
+ if (atomic_xchg(&ctx->notified, false)) {
+ event_notifier_test_and_clear(&ctx->notifier);
}
}
@@ -285,31 +328,53 @@
static void aio_rfifolock_cb(void *opaque)
{
+ AioContext *ctx = opaque;
+
/* Kick owner thread in case they are blocked in aio_poll() */
- aio_notify(opaque);
+ qemu_bh_schedule(ctx->notify_dummy_bh);
+}
+
+static void notify_dummy_bh(void *opaque)
+{
+ /* Do nothing, we were invoked just to force the event loop to iterate */
+}
+
+static void event_notifier_dummy_cb(EventNotifier *e)
+{
}
AioContext *aio_context_new(Error **errp)
{
int ret;
AioContext *ctx;
+
ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
+ aio_context_setup(ctx);
+
ret = event_notifier_init(&ctx->notifier, false);
if (ret < 0) {
- g_source_destroy(&ctx->source);
error_setg_errno(errp, -ret, "Failed to initialize event notifier");
- return NULL;
+ goto fail;
}
+ g_source_set_can_recurse(&ctx->source, true);
aio_set_event_notifier(ctx, &ctx->notifier,
+ false,
(EventNotifierHandler *)
- event_notifier_test_and_clear);
- ctx->pollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
+ event_notifier_dummy_cb);
+#ifdef CONFIG_LINUX_AIO
+ ctx->linux_aio = NULL;
+#endif
ctx->thread_pool = NULL;
qemu_mutex_init(&ctx->bh_lock);
rfifolock_init(&ctx->lock, aio_rfifolock_cb, ctx);
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
+ ctx->notify_dummy_bh = aio_bh_new(ctx, notify_dummy_bh, NULL);
+
return ctx;
+fail:
+ g_source_destroy(&ctx->source);
+ return NULL;
}
void aio_context_ref(AioContext *ctx)
diff --git a/audio/Makefile.objs b/audio/Makefile.objs
index b495cb4..fb8e7a3 100644
--- a/audio/Makefile.objs
+++ b/audio/Makefile.objs
@@ -5,14 +5,10 @@
common-obj-$(CONFIG_COREAUDIO) += coreaudio.o
common-obj-$(CONFIG_ALSA) += alsaaudio.o
common-obj-$(CONFIG_DSOUND) += dsoundaudio.o
-common-obj-$(CONFIG_FMOD) += fmodaudio.o
-common-obj-$(CONFIG_ESD) += esdaudio.o
common-obj-$(CONFIG_PA) += paaudio.o
-common-obj-$(CONFIG_WINWAVE) += winwaveaudio.o
-common-obj-$(CONFIG_WINAUDIO) += winaudio.o
common-obj-$(CONFIG_AUDIO_PT_INT) += audio_pt_int.o
common-obj-$(CONFIG_AUDIO_WIN_INT) += audio_win_int.o
common-obj-y += wavcapture.o
+common-obj-$(CONFIG_WINAUDIO) += winaudio.o
-$(obj)/audio.o $(obj)/fmodaudio.o: QEMU_CFLAGS += $(FMOD_CFLAGS)
sdlaudio.o-cflags := $(SDL_CFLAGS)
diff --git a/audio/alsaaudio.c b/audio/alsaaudio.c
index 74ead97..3652a7b 100644
--- a/audio/alsaaudio.c
+++ b/audio/alsaaudio.c
@@ -21,10 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include <alsa/asoundlib.h>
#include "qemu-common.h"
#include "qemu/main-loop.h"
#include "audio.h"
+#include "trace.h"
#if QEMU_GNUC_PREREQ(4, 3)
#pragma GCC diagnostic ignored "-Waddress"
@@ -33,9 +35,28 @@
#define AUDIO_CAP "alsa"
#include "audio_int.h"
+typedef struct ALSAConf {
+ int size_in_usec_in;
+ int size_in_usec_out;
+ const char *pcm_name_in;
+ const char *pcm_name_out;
+ unsigned int buffer_size_in;
+ unsigned int period_size_in;
+ unsigned int buffer_size_out;
+ unsigned int period_size_out;
+ unsigned int threshold;
+
+ int buffer_size_in_overridden;
+ int period_size_in_overridden;
+
+ int buffer_size_out_overridden;
+ int period_size_out_overridden;
+} ALSAConf;
+
struct pollhlp {
snd_pcm_t *handle;
struct pollfd *pfds;
+ ALSAConf *conf;
int count;
int mask;
};
@@ -56,30 +77,6 @@
struct pollhlp pollhlp;
} ALSAVoiceIn;
-static struct {
- int size_in_usec_in;
- int size_in_usec_out;
- const char *pcm_name_in;
- const char *pcm_name_out;
- unsigned int buffer_size_in;
- unsigned int period_size_in;
- unsigned int buffer_size_out;
- unsigned int period_size_out;
- unsigned int threshold;
-
- int buffer_size_in_overridden;
- int period_size_in_overridden;
-
- int buffer_size_out_overridden;
- int period_size_out_overridden;
- int verbose;
-} conf = {
- .buffer_size_out = 4096,
- .period_size_out = 1024,
- .pcm_name_out = "default",
- .pcm_name_in = "default",
-};
-
struct alsa_params_req {
int freq;
snd_pcm_format_t fmt;
@@ -205,9 +202,7 @@
}
if (!(revents & hlp->mask)) {
- if (conf.verbose) {
- dolog ("revents = %d\n", revents);
- }
+ trace_alsa_revents(revents);
return;
}
@@ -266,31 +261,14 @@
for (i = 0; i < count; ++i) {
if (pfds[i].events & POLLIN) {
- err = qemu_set_fd_handler (pfds[i].fd, alsa_poll_handler,
- NULL, hlp);
+ qemu_set_fd_handler (pfds[i].fd, alsa_poll_handler, NULL, hlp);
}
if (pfds[i].events & POLLOUT) {
- if (conf.verbose) {
- dolog ("POLLOUT %d %d\n", i, pfds[i].fd);
- }
- err = qemu_set_fd_handler (pfds[i].fd, NULL,
- alsa_poll_handler, hlp);
+ trace_alsa_pollout(i, pfds[i].fd);
+ qemu_set_fd_handler (pfds[i].fd, NULL, alsa_poll_handler, hlp);
}
- if (conf.verbose) {
- dolog ("Set handler events=%#x index=%d fd=%d err=%d\n",
- pfds[i].events, i, pfds[i].fd, err);
- }
+ trace_alsa_set_handler(pfds[i].events, i, pfds[i].fd, err);
- if (err) {
- dolog ("Failed to set handler events=%#x index=%d fd=%d err=%d\n",
- pfds[i].events, i, pfds[i].fd, err);
-
- while (i--) {
- qemu_set_fd_handler (pfds[i].fd, NULL, NULL, NULL);
- }
- g_free (pfds);
- return -1;
- }
}
hlp->pfds = pfds;
hlp->count = count;
@@ -476,14 +454,15 @@
}
static int alsa_open (int in, struct alsa_params_req *req,
- struct alsa_params_obt *obt, snd_pcm_t **handlep)
+ struct alsa_params_obt *obt, snd_pcm_t **handlep,
+ ALSAConf *conf)
{
snd_pcm_t *handle;
snd_pcm_hw_params_t *hw_params;
int err;
int size_in_usec;
unsigned int freq, nchannels;
- const char *pcm_name = in ? conf.pcm_name_in : conf.pcm_name_out;
+ const char *pcm_name = in ? conf->pcm_name_in : conf->pcm_name_out;
snd_pcm_uframes_t obt_buffer_size;
const char *typ = in ? "ADC" : "DAC";
snd_pcm_format_t obtfmt;
@@ -522,7 +501,7 @@
}
err = snd_pcm_hw_params_set_format (handle, hw_params, req->fmt);
- if (err < 0 && conf.verbose) {
+ if (err < 0) {
alsa_logerr2 (err, typ, "Failed to set format %d\n", req->fmt);
}
@@ -654,7 +633,7 @@
goto err;
}
- if (!in && conf.threshold) {
+ if (!in && conf->threshold) {
snd_pcm_uframes_t threshold;
int bytes_per_sec;
@@ -676,7 +655,7 @@
break;
}
- threshold = (conf.threshold * bytes_per_sec) / 1000;
+ threshold = (conf->threshold * bytes_per_sec) / 1000;
alsa_set_threshold (handle, threshold);
}
@@ -686,10 +665,9 @@
*handlep = handle;
- if (conf.verbose &&
- (obtfmt != req->fmt ||
+ if (obtfmt != req->fmt ||
obt->nchannels != req->nchannels ||
- obt->freq != req->freq)) {
+ obt->freq != req->freq) {
dolog ("Audio parameters for %s\n", typ);
alsa_dump_info (req, obt, obtfmt);
}
@@ -743,9 +721,7 @@
if (written <= 0) {
switch (written) {
case 0:
- if (conf.verbose) {
- dolog ("Failed to write %d frames (wrote zero)\n", len);
- }
+ trace_alsa_wrote_zero(len);
return;
case -EPIPE:
@@ -754,9 +730,7 @@
len);
return;
}
- if (conf.verbose) {
- dolog ("Recovering from playback xrun\n");
- }
+ trace_alsa_xrun_out();
continue;
case -ESTRPIPE:
@@ -767,9 +741,7 @@
len);
return;
}
- if (conf.verbose) {
- dolog ("Resuming suspended output stream\n");
- }
+ trace_alsa_resume_out();
continue;
case -EAGAIN:
@@ -819,25 +791,27 @@
alsa->pcm_buf = NULL;
}
-static int alsa_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int alsa_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
ALSAVoiceOut *alsa = (ALSAVoiceOut *) hw;
struct alsa_params_req req;
struct alsa_params_obt obt;
snd_pcm_t *handle;
struct audsettings obt_as;
+ ALSAConf *conf = drv_opaque;
req.fmt = aud_to_alsafmt (as->fmt, as->endianness);
req.freq = as->freq;
req.nchannels = as->nchannels;
- req.period_size = conf.period_size_out;
- req.buffer_size = conf.buffer_size_out;
- req.size_in_usec = conf.size_in_usec_out;
+ req.period_size = conf->period_size_out;
+ req.buffer_size = conf->buffer_size_out;
+ req.size_in_usec = conf->size_in_usec_out;
req.override_mask =
- (conf.period_size_out_overridden ? 1 : 0) |
- (conf.buffer_size_out_overridden ? 2 : 0);
+ (conf->period_size_out_overridden ? 1 : 0) |
+ (conf->buffer_size_out_overridden ? 2 : 0);
- if (alsa_open (0, &req, &obt, &handle)) {
+ if (alsa_open (0, &req, &obt, &handle, conf)) {
return -1;
}
@@ -858,6 +832,7 @@
}
alsa->handle = handle;
+ alsa->pollhlp.conf = conf;
return 0;
}
@@ -928,25 +903,26 @@
return -1;
}
-static int alsa_init_in (HWVoiceIn *hw, struct audsettings *as)
+static int alsa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
{
ALSAVoiceIn *alsa = (ALSAVoiceIn *) hw;
struct alsa_params_req req;
struct alsa_params_obt obt;
snd_pcm_t *handle;
struct audsettings obt_as;
+ ALSAConf *conf = drv_opaque;
req.fmt = aud_to_alsafmt (as->fmt, as->endianness);
req.freq = as->freq;
req.nchannels = as->nchannels;
- req.period_size = conf.period_size_in;
- req.buffer_size = conf.buffer_size_in;
- req.size_in_usec = conf.size_in_usec_in;
+ req.period_size = conf->period_size_in;
+ req.buffer_size = conf->buffer_size_in;
+ req.size_in_usec = conf->size_in_usec_in;
req.override_mask =
- (conf.period_size_in_overridden ? 1 : 0) |
- (conf.buffer_size_in_overridden ? 2 : 0);
+ (conf->period_size_in_overridden ? 1 : 0) |
+ (conf->buffer_size_in_overridden ? 2 : 0);
- if (alsa_open (1, &req, &obt, &handle)) {
+ if (alsa_open (1, &req, &obt, &handle, conf)) {
return -1;
}
@@ -967,6 +943,7 @@
}
alsa->handle = handle;
+ alsa->pollhlp.conf = conf;
return 0;
}
@@ -1022,14 +999,10 @@
dolog ("Failed to resume suspended input stream\n");
return 0;
}
- if (conf.verbose) {
- dolog ("Resuming suspended input stream\n");
- }
+ trace_alsa_resume_in();
break;
default:
- if (conf.verbose) {
- dolog ("No frames available and ALSA state is %d\n", state);
- }
+ trace_alsa_no_frames(state);
return 0;
}
}
@@ -1064,9 +1037,7 @@
if (nread <= 0) {
switch (nread) {
case 0:
- if (conf.verbose) {
- dolog ("Failed to read %ld frames (read zero)\n", len);
- }
+ trace_alsa_read_zero(len);
goto exit;
case -EPIPE:
@@ -1074,9 +1045,7 @@
alsa_logerr (nread, "Failed to read %ld frames\n", len);
goto exit;
}
- if (conf.verbose) {
- dolog ("Recovering from capture xrun\n");
- }
+ trace_alsa_xrun_in();
continue;
case -EAGAIN:
@@ -1148,82 +1117,85 @@
return -1;
}
+static ALSAConf glob_conf = {
+ .buffer_size_out = 4096,
+ .period_size_out = 1024,
+ .pcm_name_out = "default",
+ .pcm_name_in = "default",
+};
+
static void *alsa_audio_init (void)
{
- return &conf;
+ ALSAConf *conf = g_malloc(sizeof(ALSAConf));
+ *conf = glob_conf;
+ return conf;
}
static void alsa_audio_fini (void *opaque)
{
- (void) opaque;
+ g_free(opaque);
}
static struct audio_option alsa_options[] = {
{
.name = "DAC_SIZE_IN_USEC",
.tag = AUD_OPT_BOOL,
- .valp = &conf.size_in_usec_out,
+ .valp = &glob_conf.size_in_usec_out,
.descr = "DAC period/buffer size in microseconds (otherwise in frames)"
},
{
.name = "DAC_PERIOD_SIZE",
.tag = AUD_OPT_INT,
- .valp = &conf.period_size_out,
+ .valp = &glob_conf.period_size_out,
.descr = "DAC period size (0 to go with system default)",
- .overriddenp = &conf.period_size_out_overridden
+ .overriddenp = &glob_conf.period_size_out_overridden
},
{
.name = "DAC_BUFFER_SIZE",
.tag = AUD_OPT_INT,
- .valp = &conf.buffer_size_out,
+ .valp = &glob_conf.buffer_size_out,
.descr = "DAC buffer size (0 to go with system default)",
- .overriddenp = &conf.buffer_size_out_overridden
+ .overriddenp = &glob_conf.buffer_size_out_overridden
},
{
.name = "ADC_SIZE_IN_USEC",
.tag = AUD_OPT_BOOL,
- .valp = &conf.size_in_usec_in,
+ .valp = &glob_conf.size_in_usec_in,
.descr =
"ADC period/buffer size in microseconds (otherwise in frames)"
},
{
.name = "ADC_PERIOD_SIZE",
.tag = AUD_OPT_INT,
- .valp = &conf.period_size_in,
+ .valp = &glob_conf.period_size_in,
.descr = "ADC period size (0 to go with system default)",
- .overriddenp = &conf.period_size_in_overridden
+ .overriddenp = &glob_conf.period_size_in_overridden
},
{
.name = "ADC_BUFFER_SIZE",
.tag = AUD_OPT_INT,
- .valp = &conf.buffer_size_in,
+ .valp = &glob_conf.buffer_size_in,
.descr = "ADC buffer size (0 to go with system default)",
- .overriddenp = &conf.buffer_size_in_overridden
+ .overriddenp = &glob_conf.buffer_size_in_overridden
},
{
.name = "THRESHOLD",
.tag = AUD_OPT_INT,
- .valp = &conf.threshold,
+ .valp = &glob_conf.threshold,
.descr = "(undocumented)"
},
{
.name = "DAC_DEV",
.tag = AUD_OPT_STR,
- .valp = &conf.pcm_name_out,
+ .valp = &glob_conf.pcm_name_out,
.descr = "DAC device name (for instance dmix)"
},
{
.name = "ADC_DEV",
.tag = AUD_OPT_STR,
- .valp = &conf.pcm_name_in,
+ .valp = &glob_conf.pcm_name_in,
.descr = "ADC device name"
},
- {
- .name = "VERBOSE",
- .tag = AUD_OPT_BOOL,
- .valp = &conf.verbose,
- .descr = "Behave in a more verbose way"
- },
{ /* End of list */ }
};
diff --git a/audio/audio.c b/audio/audio.c
index 9d018e9..c845a44 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -21,16 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "hw/hw.h"
#include "audio.h"
#include "monitor/monitor.h"
#include "qemu/timer.h"
#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
#define AUDIO_CAP "audio"
#include "audio_int.h"
-/* #define DEBUG_PLIVE */
/* #define DEBUG_LIVE */
/* #define DEBUG_OUT */
/* #define DEBUG_CAPTURE */
@@ -66,8 +67,6 @@
int hertz;
int64_t ticks;
} period;
- int plive;
- int log_to_monitor;
int try_poll_in;
int try_poll_out;
} conf = {
@@ -96,8 +95,6 @@
},
.period = { .hertz = 100 },
- .plive = 0,
- .log_to_monitor = 0,
.try_poll_in = 1,
.try_poll_out = 1,
};
@@ -331,20 +328,11 @@
void AUD_vlog (const char *cap, const char *fmt, va_list ap)
{
- if (conf.log_to_monitor) {
- if (cap) {
- monitor_printf(default_mon, "%s: ", cap);
- }
-
- monitor_vprintf(default_mon, fmt, ap);
+ if (cap) {
+ fprintf(stderr, "%s: ", cap);
}
- else {
- if (cap) {
- fprintf (stderr, "%s: ", cap);
- }
- vfprintf (stderr, fmt, ap);
- }
+ vfprintf(stderr, fmt, ap);
}
void AUD_log (const char *cap, const char *fmt, ...)
@@ -1143,8 +1131,6 @@
*/
int AUD_write (SWVoiceOut *sw, void *buf, int size)
{
- int bytes;
-
if (!sw) {
/* XXX: Consider options */
return size;
@@ -1155,14 +1141,11 @@
return 0;
}
- bytes = sw->hw->pcm_ops->write (sw, buf, size);
- return bytes;
+ return sw->hw->pcm_ops->write(sw, buf, size);
}
int AUD_read (SWVoiceIn *sw, void *buf, int size)
{
- int bytes;
-
if (!sw) {
/* XXX: Consider options */
return size;
@@ -1173,8 +1156,7 @@
return 0;
}
- bytes = sw->hw->pcm_ops->read (sw, buf, size);
- return bytes;
+ return sw->hw->pcm_ops->read(sw, buf, size);
}
int AUD_get_buffer_size_out (SWVoiceOut *sw)
@@ -1454,9 +1436,6 @@
while (sw) {
sw1 = sw->entries.le_next;
if (!sw->active && !sw->callback.fn) {
-#ifdef DEBUG_PLIVE
- dolog ("Finishing with old voice\n");
-#endif
audio_close_out (sw);
}
sw = sw1;
@@ -1648,18 +1627,6 @@
.valp = &conf.period.hertz,
.descr = "Timer period in HZ (0 - use lowest possible)"
},
- {
- .name = "PLIVE",
- .tag = AUD_OPT_BOOL,
- .valp = &conf.plive,
- .descr = "(undocumented)"
- },
- {
- .name = "LOG_TO_MONITOR",
- .tag = AUD_OPT_BOOL,
- .valp = &conf.log_to_monitor,
- .descr = "Print logging messages to monitor instead of stderr"
- },
{ /* End of list */ }
};
@@ -1772,13 +1739,21 @@
audio_reset_timer (s);
}
-static void audio_atexit (void)
+static bool is_cleaning_up;
+
+bool audio_is_cleaning_up(void)
+{
+ return is_cleaning_up;
+}
+
+void audio_cleanup(void)
{
AudioState *s = &glob_audio_state;
- HWVoiceOut *hwo = NULL;
- HWVoiceIn *hwi = NULL;
+ HWVoiceOut *hwo, *hwon;
+ HWVoiceIn *hwi, *hwin;
- while ((hwo = audio_pcm_hw_find_any_out (hwo))) {
+ is_cleaning_up = true;
+ QLIST_FOREACH_SAFE(hwo, &glob_audio_state.hw_head_out, entries, hwon) {
SWVoiceCap *sc;
if (hwo->enabled) {
@@ -1794,17 +1769,20 @@
cb->ops.destroy (cb->opaque);
}
}
+ QLIST_REMOVE(hwo, entries);
}
- while ((hwi = audio_pcm_hw_find_any_in (hwi))) {
+ QLIST_FOREACH_SAFE(hwi, &glob_audio_state.hw_head_in, entries, hwin) {
if (hwi->enabled) {
hwi->pcm_ops->ctl_in (hwi, VOICE_DISABLE);
}
hwi->pcm_ops->fini_in (hwi);
+ QLIST_REMOVE(hwi, entries);
}
if (s->drv) {
s->drv->fini (s->drv_opaque);
+ s->drv = NULL;
}
}
@@ -1832,12 +1810,9 @@
QLIST_INIT (&s->hw_head_out);
QLIST_INIT (&s->hw_head_in);
QLIST_INIT (&s->cap_head);
- atexit (audio_atexit);
+ atexit(audio_cleanup);
s->ts = timer_new_ns(QEMU_CLOCK_VIRTUAL, audio_timer, s);
- if (!s->ts) {
- hw_error("Could not create audio timer\n");
- }
audio_process_options ("AUDIO", audio_options);
@@ -1888,12 +1863,8 @@
if (!done) {
done = !audio_driver_init (s, &no_audio_driver);
- if (!done) {
- hw_error("Could not initialize audio subsystem\n");
- }
- else {
- dolog ("warning: Using timer based audio emulation\n");
- }
+ assert(done);
+ dolog("warning: Using timer based audio emulation\n");
}
if (conf.period.hertz <= 0) {
@@ -1904,8 +1875,7 @@
}
conf.period.ticks = 1;
} else {
- conf.period.ticks =
- muldiv64 (1, get_ticks_per_sec (), conf.period.hertz);
+ conf.period.ticks = NANOSECONDS_PER_SECOND / conf.period.hertz;
}
e = qemu_add_vm_change_state_handler (audio_vm_change_state_handler, s);
@@ -2007,8 +1977,7 @@
QLIST_INSERT_HEAD (&s->cap_head, cap, entries);
QLIST_INSERT_HEAD (&cap->cb_head, cb, entries);
- hw = NULL;
- while ((hw = audio_pcm_hw_find_any_out (hw))) {
+ QLIST_FOREACH(hw, &glob_audio_state.hw_head_out, entries) {
audio_attach_capture (hw);
}
return cap;
diff --git a/audio/audio.h b/audio/audio.h
index e7ea397..c3c5198 100644
--- a/audio/audio.h
+++ b/audio/audio.h
@@ -21,10 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+
#ifndef QEMU_AUDIO_H
#define QEMU_AUDIO_H
-#include "config-host.h"
#include "qemu/queue.h"
typedef void (*audio_callback_fn) (void *opaque, int avail);
@@ -163,4 +163,7 @@
int wav_start_capture (CaptureState *s, const char *path, int freq,
int bits, int nchannels);
-#endif /* audio.h */
+bool audio_is_cleaning_up(void);
+void audio_cleanup(void);
+
+#endif /* QEMU_AUDIO_H */
diff --git a/audio/audio_int.h b/audio/audio_int.h
index 031eb6e..6c6236a 100644
--- a/audio/audio_int.h
+++ b/audio/audio_int.h
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+
#ifndef QEMU_AUDIO_INT_H
#define QEMU_AUDIO_INT_H
@@ -156,13 +157,13 @@
};
struct audio_pcm_ops {
- int (*init_out)(HWVoiceOut *hw, struct audsettings *as);
+ int (*init_out)(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque);
void (*fini_out)(HWVoiceOut *hw);
int (*run_out) (HWVoiceOut *hw, int live);
int (*write) (SWVoiceOut *sw, void *buf, int size);
int (*ctl_out) (HWVoiceOut *hw, int cmd, ...);
- int (*init_in) (HWVoiceIn *hw, struct audsettings *as);
+ int (*init_in) (HWVoiceIn *hw, struct audsettings *as, void *drv_opaque);
void (*fini_in) (HWVoiceIn *hw);
int (*run_in) (HWVoiceIn *hw);
int (*read) (SWVoiceIn *sw, void *buf, int size);
@@ -206,14 +207,11 @@
extern struct audio_driver oss_audio_driver;
extern struct audio_driver sdl_audio_driver;
extern struct audio_driver wav_audio_driver;
-extern struct audio_driver fmod_audio_driver;
extern struct audio_driver alsa_audio_driver;
extern struct audio_driver coreaudio_audio_driver;
extern struct audio_driver dsound_audio_driver;
-extern struct audio_driver esd_audio_driver;
extern struct audio_driver pa_audio_driver;
extern struct audio_driver spice_audio_driver;
-extern struct audio_driver winwave_audio_driver;
extern struct audio_driver winaudio_audio_driver;
extern const struct mixeng_volume nominal_volume;
@@ -261,4 +259,4 @@
#define AUDIO_FUNC __FILE__ ":" AUDIO_STRINGIFY (__LINE__)
#endif
-#endif /* audio_int.h */
+#endif /* QEMU_AUDIO_INT_H */
diff --git a/audio/audio_pt_int.c b/audio/audio_pt_int.c
index 9a9c306..21ff9c5 100644
--- a/audio/audio_pt_int.c
+++ b/audio/audio_pt_int.c
@@ -1,3 +1,4 @@
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "audio.h"
diff --git a/audio/audio_pt_int.h b/audio/audio_pt_int.h
index 0dfff76..4c0c15b 100644
--- a/audio/audio_pt_int.h
+++ b/audio/audio_pt_int.h
@@ -19,4 +19,4 @@
int audio_pt_unlock_and_signal (struct audio_pt *, const char *);
int audio_pt_join (struct audio_pt *, void **, const char *);
-#endif /* audio_pt_int.h */
+#endif /* QEMU_AUDIO_PT_INT_H */
diff --git a/audio/audio_template.h b/audio/audio_template.h
index 8173188..99b27b2 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -191,9 +191,9 @@
audio_detach_capture (hw);
#endif
QLIST_REMOVE (hw, entries);
+ glue (hw->pcm_ops->fini_, TYPE) (hw);
glue (s->nb_hw_voices_, TYPE) += 1;
glue (audio_pcm_hw_free_resources_ ,TYPE) (hw);
- glue (hw->pcm_ops->fini_, TYPE) (hw);
g_free (hw);
*hwp = NULL;
}
@@ -262,7 +262,7 @@
#ifdef DAC
QLIST_INIT (&hw->cap_head);
#endif
- if (glue (hw->pcm_ops->init_, TYPE) (hw, as)) {
+ if (glue (hw->pcm_ops->init_, TYPE) (hw, as, s->drv_opaque)) {
goto err0;
}
@@ -398,10 +398,6 @@
)
{
AudioState *s = &glob_audio_state;
-#ifdef DAC
- int live = 0;
- SW *old_sw = NULL;
-#endif
if (audio_bug (AUDIO_FUNC, !card || !name || !callback_fn || !as)) {
dolog ("card=%p name=%p callback_fn=%p as=%p\n",
@@ -426,29 +422,6 @@
return sw;
}
-#ifdef DAC
- if (conf.plive && sw && (!sw->active && !sw->empty)) {
- live = sw->total_hw_samples_mixed;
-
-#ifdef DEBUG_PLIVE
- dolog ("Replacing voice %s with %d live samples\n", SW_NAME (sw), live);
- dolog ("Old %s freq %d, bits %d, channels %d\n",
- SW_NAME (sw), sw->info.freq, sw->info.bits, sw->info.nchannels);
- dolog ("New %s freq %d, bits %d, channels %d\n",
- name,
- as->freq,
- (as->fmt == AUD_FMT_S16 || as->fmt == AUD_FMT_U16) ? 16 : 8,
- as->nchannels);
-#endif
-
- if (live) {
- old_sw = sw;
- old_sw->callback.fn = NULL;
- sw = NULL;
- }
- }
-#endif
-
if (!glue (conf.fixed_, TYPE).enabled && sw) {
glue (AUD_close_, TYPE) (card, sw);
sw = NULL;
@@ -481,20 +454,6 @@
sw->callback.fn = callback_fn;
sw->callback.opaque = callback_opaque;
-#ifdef DAC
- if (live) {
- int mixed =
- (live << old_sw->info.shift)
- * old_sw->info.bytes_per_second
- / sw->info.bytes_per_second;
-
-#ifdef DEBUG_PLIVE
- dolog ("Silence will be mixed %d\n", mixed);
-#endif
- sw->total_hw_samples_mixed += mixed;
- }
-#endif
-
#ifdef DEBUG_AUDIO
dolog ("%s\n", name);
audio_pcm_print_info ("hw", &sw->hw->info);
diff --git a/audio/audio_win_int.c b/audio/audio_win_int.c
index e132405..6900008 100644
--- a/audio/audio_win_int.c
+++ b/audio/audio_win_int.c
@@ -1,5 +1,6 @@
/* public domain */
+#include "qemu/osdep.h"
#include "qemu-common.h"
#define AUDIO_CAP "win-int"
diff --git a/audio/coreaudio.c b/audio/coreaudio.c
index 8304a38..9ef76c4 100644
--- a/audio/coreaudio.c
+++ b/audio/coreaudio.c
@@ -23,8 +23,8 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include <CoreAudio/CoreAudio.h>
-#include <string.h> /* strerror */
#include <pthread.h> /* pthread_X */
#include "qemu-common.h"
@@ -33,33 +33,290 @@
#define AUDIO_CAP "coreaudio"
#include "audio_int.h"
-#if 0
-# define D(...) fprintf(stderr, __VA_ARGS__)
-#else
-# define D(...) ((void)0)
+#ifndef MAC_OS_X_VERSION_10_6
+#define MAC_OS_X_VERSION_10_6 1060
#endif
-struct {
- int out_buffer_frames;
- int out_nbuffers;
- int in_buffer_frames;
- int in_nbuffers;
- int isAtexit;
-} conf = {
- .out_buffer_frames = 512,
- .out_nbuffers = 4,
- .in_buffer_frames = 512,
- .in_nbuffers = 4,
- .isAtexit = 0
-};
+typedef struct {
+ int buffer_frames;
+ int nbuffers;
+} CoreaudioConf;
-/***************************************************************************************/
-/***************************************************************************************/
-/*** ***/
-/*** U T I L I T Y R O U T I N E S ***/
-/*** ***/
-/***************************************************************************************/
-/***************************************************************************************/
+typedef struct coreaudioVoiceBase {
+ pthread_mutex_t mutex;
+ AudioDeviceID deviceID;
+ UInt32 audioDevicePropertyBufferFrameSize;
+ AudioStreamBasicDescription streamBasicDescription;
+ AudioDeviceIOProcID ioprocid;
+ Boolean isInput;
+ int live;
+ int decr;
+ int pos;
+} coreaudioVoiceBase;
+
+typedef struct coreaudioVoiceOut {
+ HWVoiceOut hw;
+ coreaudioVoiceBase core;
+} coreaudioVoiceOut;
+
+typedef struct coreaudioVoiceIn {
+ HWVoiceIn hw;
+ coreaudioVoiceBase core;
+} coreaudioVoiceIn;
+
+// Set to true when atexit() is running.
+static bool gIsAtExit = false;
+
+static void coreaudio_atexit(void) {
+ gIsAtExit = true;
+}
+
+#if MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6
+/* The APIs used here only become available from 10.6 */
+
+static OSStatus coreaudio_get_voice(AudioDeviceID *id, Boolean isInput)
+{
+ UInt32 size = sizeof(*id);
+ AudioObjectPropertyAddress addr = {
+ isInput ? kAudioHardwarePropertyDefaultInputDevice
+ : kAudioHardwarePropertyDefaultOutputDevice,
+ kAudioObjectPropertyScopeGlobal,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectGetPropertyData(kAudioObjectSystemObject,
+ &addr,
+ 0,
+ NULL,
+ &size,
+ id);
+}
+
+static OSStatus coreaudio_get_framesizerange(AudioDeviceID id,
+ AudioValueRange *framerange,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*framerange);
+ AudioObjectPropertyAddress addr = {
+ kAudioDevicePropertyBufferFrameSizeRange,
+ isInput ? kAudioDevicePropertyScopeInput
+ : kAudioDevicePropertyScopeOutput,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectGetPropertyData(id,
+ &addr,
+ 0,
+ NULL,
+ &size,
+ framerange);
+}
+
+static OSStatus coreaudio_get_framesize(AudioDeviceID id,
+ UInt32 *framesize,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*framesize);
+ AudioObjectPropertyAddress addr = {
+ kAudioDevicePropertyBufferFrameSize,
+ isInput ? kAudioDevicePropertyScopeInput
+ : kAudioDevicePropertyScopeOutput,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectGetPropertyData(id,
+ &addr,
+ 0,
+ NULL,
+ &size,
+ framesize);
+}
+
+static OSStatus coreaudio_set_framesize(AudioDeviceID id,
+ UInt32 *framesize,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*framesize);
+ AudioObjectPropertyAddress addr = {
+ kAudioDevicePropertyBufferFrameSize,
+ isInput ? kAudioDevicePropertyScopeInput
+ : kAudioDevicePropertyScopeOutput,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectSetPropertyData(id,
+ &addr,
+ 0,
+ NULL,
+ size,
+ framesize);
+}
+
+static OSStatus coreaudio_get_streamformat(AudioDeviceID id,
+ AudioStreamBasicDescription *d,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*d);
+ AudioObjectPropertyAddress addr = {
+ kAudioDevicePropertyStreamFormat,
+ isInput ? kAudioDevicePropertyScopeInput
+ : kAudioDevicePropertyScopeOutput,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectGetPropertyData(id,
+ &addr,
+ 0,
+ NULL,
+ &size,
+ d);
+}
+
+static OSStatus coreaudio_set_streamformat(AudioDeviceID id,
+ AudioStreamBasicDescription *d,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*d);
+ AudioObjectPropertyAddress addr = {
+ kAudioDevicePropertyStreamFormat,
+ isInput ? kAudioDevicePropertyScopeInput
+ : kAudioDevicePropertyScopeOutput,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectSetPropertyData(id,
+ &addr,
+ 0,
+ NULL,
+ size,
+ d);
+}
+
+static OSStatus coreaudio_get_isrunning(AudioDeviceID id,
+ UInt32 *result,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*result);
+ AudioObjectPropertyAddress addr = {
+ kAudioDevicePropertyDeviceIsRunning,
+ isInput ? kAudioDevicePropertyScopeInput
+ : kAudioDevicePropertyScopeOutput,
+ kAudioObjectPropertyElementMaster
+ };
+
+ return AudioObjectGetPropertyData(id,
+ &addr,
+ 0,
+ NULL,
+ &size,
+ result);
+}
+#else
+/* Legacy versions of functions using deprecated APIs */
+
+static OSStatus coreaudio_get_voice(AudioDeviceID *id, Boolean isInput)
+{
+ UInt32 size = sizeof(*id);
+
+ return AudioHardwareGetProperty(
+ isInput ? kAudioHardwarePropertyDefaultInputDevice
+ : kAudioHardwarePropertyDefaultOutputDevice,
+ &size,
+ id);
+}
+
+static OSStatus coreaudio_get_framesizerange(AudioDeviceID id,
+ AudioValueRange *framerange,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*framerange);
+
+ return AudioDeviceGetProperty(
+ id,
+ 0,
+ isInput,
+ kAudioDevicePropertyBufferFrameSizeRange,
+ &size,
+ framerange);
+}
+
+static OSStatus coreaudio_get_framesize(AudioDeviceID id,
+ UInt32 *framesize,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*framesize);
+
+ return AudioDeviceGetProperty(
+ id,
+ 0,
+ isInput,
+ kAudioDevicePropertyBufferFrameSize,
+ &size,
+ framesize);
+}
+
+static OSStatus coreaudio_set_framesize(AudioDeviceID id,
+ UInt32 *framesize,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*framesize);
+
+ return AudioDeviceSetProperty(
+ id,
+ NULL,
+ 0,
+ isInput,
+ kAudioDevicePropertyBufferFrameSize,
+ size,
+ framesize);
+}
+
+static OSStatus coreaudio_get_streamformat(AudioDeviceID id,
+ AudioStreamBasicDescription *d,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*d);
+
+ return AudioDeviceGetProperty(
+ id,
+ 0,
+ isInput,
+ kAudioDevicePropertyStreamFormat,
+ &size,
+ d);
+}
+
+static OSStatus coreaudio_set_streamformat(AudioDeviceID id,
+ AudioStreamBasicDescription *d,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*d);
+
+ return AudioDeviceSetProperty(
+ id,
+ 0,
+ 0,
+ isInput,
+ kAudioDevicePropertyStreamFormat,
+ size,
+ d);
+}
+
+static OSStatus coreaudio_get_isrunning(AudioDeviceID id,
+ UInt32 *result,
+ Boolean isInput)
+{
+ UInt32 size = sizeof(*result);
+
+ return AudioDeviceGetProperty(
+ id,
+ 0,
+ isInput,
+ kAudioDevicePropertyDeviceIsRunning,
+ &size,
+ result);
+}
+#endif
static void coreaudio_logstatus (OSStatus status)
{
@@ -151,34 +408,11 @@
coreaudio_logstatus (status);
}
-/***************************************************************************************/
-/***************************************************************************************/
-/*** ***/
-/*** S H A R E D I N / O U T V O I C E ***/
-/*** ***/
-/***************************************************************************************/
-/***************************************************************************************/
-
-typedef struct coreAudioVoice {
- pthread_mutex_t mutex;
- AudioDeviceID deviceID;
- Boolean isInput;
- UInt32 bufferFrameSize;
- AudioStreamBasicDescription streamBasicDescription;
- AudioDeviceIOProc ioproc;
- int live;
- int decr;
- int pos;
-} coreaudioVoice;
-
-static inline UInt32 coreaudio_voice_isPlaying (coreaudioVoice *core)
+static inline UInt32 isPlaying (AudioDeviceID deviceID, Boolean isInput)
{
OSStatus status;
UInt32 result = 0;
- UInt32 propertySize = sizeof(core->deviceID);
- status = AudioDeviceGetProperty(
- core->deviceID, 0, core->isInput,
- kAudioDevicePropertyDeviceIsRunning, &propertySize, &result);
+ status = coreaudio_get_isrunning(deviceID, &result, isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr(status,
"Could not determine whether Device is playing\n");
@@ -186,12 +420,7 @@
return result;
}
-static void coreaudio_atexit (void)
-{
- conf.isAtexit = 1;
-}
-
-static int coreaudio_voice_lock (coreaudioVoice *core, const char *fn_name)
+static int coreaudio_lock (coreaudioVoiceBase *core, const char *fn_name)
{
int err;
@@ -204,7 +433,7 @@
return 0;
}
-static int coreaudio_voice_unlock (coreaudioVoice *core, const char *fn_name)
+static int coreaudio_unlock (coreaudioVoiceBase *core, const char *fn_name)
{
int err;
@@ -217,78 +446,20 @@
return 0;
}
-static int coreaudio_voice_ctl (coreaudioVoice *core, int cmd)
-{
- OSStatus status;
-
- switch (cmd) {
- case VOICE_ENABLE:
- /* start playback */
- D("%s: %s started\n", __FUNCTION__, core->isInput ? "input" : "output");
- if (!coreaudio_voice_isPlaying(core)) {
- status = AudioDeviceStart(core->deviceID, core->ioproc);
- if (status != kAudioHardwareNoError) {
- coreaudio_logerr (status, "Could not resume playback\n");
- }
- }
- break;
-
- case VOICE_DISABLE:
- /* stop playback */
- D("%s: %s stopped\n", __FUNCTION__, core->isInput ? "input" : "output");
- if (!conf.isAtexit) {
- if (coreaudio_voice_isPlaying(core)) {
- status = AudioDeviceStop(core->deviceID, core->ioproc);
- if (status != kAudioHardwareNoError) {
- coreaudio_logerr (status, "Could not pause playback\n");
- }
- }
- }
- break;
- }
- return 0;
-}
-
-static void coreaudio_voice_fini (coreaudioVoice *core)
+static int coreaudio_init_base(coreaudioVoiceBase *core,
+ struct audsettings *as,
+ void *drv_opaque,
+ AudioDeviceIOProc ioproc,
+ void *hw,
+ Boolean isInput)
{
OSStatus status;
int err;
-
- pthread_mutex_lock(&core->mutex);
-
- /* stop playback */
- coreaudio_voice_ctl(core, VOICE_DISABLE);
-
- /* remove callback */
- status = AudioDeviceRemoveIOProc(core->deviceID, core->ioproc);
- if (status != kAudioHardwareNoError) {
- coreaudio_logerr (status, "Could not remove IOProc\n");
- }
-
- core->deviceID = kAudioDeviceUnknown;
-
- pthread_mutex_unlock(&core->mutex);
-
- /* destroy mutex */
- err = pthread_mutex_destroy(&core->mutex);
- if (err) {
- dolog("Could not destroy mutex\nReason: %s\n", strerror (err));
- }
-}
-
-
-static int coreaudio_voice_init (coreaudioVoice *core, struct audsettings *as,
- int frameSize, AudioDeviceIOProc ioproc,
- void *hw, int input)
-{
- OSStatus status;
- UInt32 propertySize;
- int err;
- int bits = 8;
+ const char *typ = isInput ? "record" : "playback";
AudioValueRange frameRange;
- const char* typ = input ? "input" : "playback";
+ CoreaudioConf *conf = drv_opaque;
- core->isInput = input ? true : false;
+ core->isInput = isInput;
/* create mutex */
err = pthread_mutex_init(&core->mutex, NULL);
@@ -297,22 +468,9 @@
return -1;
}
- if (as->fmt == AUD_FMT_S16 || as->fmt == AUD_FMT_U16) {
- bits = 16;
- }
+ // audio_pcm_init_info (&hw->info, as);
- // TODO: audio_pcm_init_info (&hw->info, as);
- /* open default output device */
- /* note: we use DefaultSystemOutputDevice because DefaultOutputDevice seems to
- * always link to the internal speakers, and not the ones selected through system properties
- * go figure...
- */
- propertySize = sizeof(core->deviceID);
- status = AudioHardwareGetProperty(
- input ? kAudioHardwarePropertyDefaultInputDevice :
- kAudioHardwarePropertyDefaultSystemOutputDevice,
- &propertySize,
- &core->deviceID);
+ status = coreaudio_get_voice(&core->deviceID, isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ,
"Could not get default %s device\n", typ);
@@ -324,74 +482,52 @@
}
/* get minimum and maximum buffer frame sizes */
- propertySize = sizeof(frameRange);
- status = AudioDeviceGetProperty(
- core->deviceID,
- 0,
- core->isInput,
- kAudioDevicePropertyBufferFrameSizeRange,
- &propertySize,
- &frameRange);
+ status = coreaudio_get_framesizerange(core->deviceID,
+ &frameRange, isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ,
"Could not get device buffer frame range\n");
return -1;
}
- if (frameRange.mMinimum > frameSize) {
- core->bufferFrameSize = (UInt32) frameRange.mMinimum;
- dolog ("warning: Upsizing Output Buffer Frames to %f\n", frameRange.mMinimum);
+ if (frameRange.mMinimum > conf->buffer_frames) {
+ core->audioDevicePropertyBufferFrameSize = (UInt32) frameRange.mMinimum;
+ dolog ("warning: Upsizing Buffer Frames to %f\n", frameRange.mMinimum);
}
- else if (frameRange.mMaximum < frameSize) {
- core->bufferFrameSize = (UInt32) frameRange.mMaximum;
- dolog ("warning: Downsizing Output Buffer Frames to %f\n", frameRange.mMaximum);
+ else if (frameRange.mMaximum < conf->buffer_frames) {
+ core->audioDevicePropertyBufferFrameSize = (UInt32) frameRange.mMaximum;
+ dolog ("warning: Downsizing Buffer Frames to %f\n", frameRange.mMaximum);
}
else {
- core->bufferFrameSize = frameSize;
+ core->audioDevicePropertyBufferFrameSize = conf->buffer_frames;
}
/* set Buffer Frame Size */
- propertySize = sizeof(core->bufferFrameSize);
- status = AudioDeviceSetProperty(
- core->deviceID,
- NULL,
- 0,
- core->isInput,
- kAudioDevicePropertyBufferFrameSize,
- propertySize,
- &core->bufferFrameSize);
+ status = coreaudio_set_framesize(core->deviceID,
+ &core->audioDevicePropertyBufferFrameSize,
+ isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ,
"Could not set device buffer frame size %" PRIu32 "\n",
- (uint32_t)core->bufferFrameSize);
+ (uint32_t)core->audioDevicePropertyBufferFrameSize);
return -1;
}
/* get Buffer Frame Size */
- propertySize = sizeof(core->bufferFrameSize);
- status = AudioDeviceGetProperty(
- core->deviceID,
- 0,
- core->isInput,
- kAudioDevicePropertyBufferFrameSize,
- &propertySize,
- &core->bufferFrameSize);
+ status = coreaudio_get_framesize(core->deviceID,
+ &core->audioDevicePropertyBufferFrameSize,
+ isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ,
"Could not get device buffer frame size\n");
return -1;
}
- // TODO: hw->samples = *pNBuffers * core->bufferFrameSize;
+ // hw->samples = conf->nbuffers * core->audioDevicePropertyBufferFrameSize;
/* get StreamFormat */
- propertySize = sizeof(core->streamBasicDescription);
- status = AudioDeviceGetProperty(
- core->deviceID,
- 0,
- core->isInput,
- kAudioDevicePropertyStreamFormat,
- &propertySize,
- &core->streamBasicDescription);
+ status = coreaudio_get_streamformat(core->deviceID,
+ &core->streamBasicDescription,
+ isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ,
"Could not get Device Stream properties\n");
@@ -401,15 +537,9 @@
/* set Samplerate */
core->streamBasicDescription.mSampleRate = (Float64) as->freq;
- propertySize = sizeof(core->streamBasicDescription);
- status = AudioDeviceSetProperty(
- core->deviceID,
- 0,
- 0,
- core->isInput,
- kAudioDevicePropertyStreamFormat,
- propertySize,
- &core->streamBasicDescription);
+ status = coreaudio_set_streamformat(core->deviceID,
+ &core->streamBasicDescription,
+ isInput);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ, "Could not set samplerate %d\n",
as->freq);
@@ -418,20 +548,23 @@
}
/* set Callback */
- core->ioproc = ioproc;
- status = AudioDeviceAddIOProc(core->deviceID, ioproc, hw);
- if (status != kAudioHardwareNoError) {
+ core->ioprocid = NULL;
+ status = AudioDeviceCreateIOProcID(core->deviceID,
+ ioproc,
+ hw,
+ &core->ioprocid);
+ if (status != kAudioHardwareNoError || core->ioprocid == NULL) {
coreaudio_logerr2 (status, typ, "Could not set IOProc\n");
core->deviceID = kAudioDeviceUnknown;
return -1;
}
/* start Playback */
- if (!input && !coreaudio_voice_isPlaying(core)) {
- status = AudioDeviceStart(core->deviceID, core->ioproc);
+ if (!isPlaying(core->deviceID, isInput)) {
+ status = AudioDeviceStart(core->deviceID, core->ioprocid);
if (status != kAudioHardwareNoError) {
coreaudio_logerr2 (status, typ, "Could not start playback\n");
- AudioDeviceRemoveIOProc(core->deviceID, core->ioproc);
+ AudioDeviceDestroyIOProcID(core->deviceID, core->ioprocid);
core->deviceID = kAudioDeviceUnknown;
return -1;
}
@@ -440,29 +573,76 @@
return 0;
}
+static void coreaudio_fini_base (coreaudioVoiceBase *core)
+{
+ OSStatus status;
+ int err;
-/***************************************************************************************/
-/***************************************************************************************/
-/*** ***/
-/*** O U T P U T V O I C E ***/
-/*** ***/
-/***************************************************************************************/
-/***************************************************************************************/
+ if (!audio_is_cleaning_up()) {
+ /* stop playback */
+ if (isPlaying(core->deviceID, core->isInput)) {
+ status = AudioDeviceStop(core->deviceID, core->ioprocid);
+ if (status != kAudioHardwareNoError) {
+ coreaudio_logerr (status, "Could not stop %s\n",
+ core->isInput ? "recording" : "playback");
+ }
+ }
-typedef struct coreaudioVoiceOut {
- HWVoiceOut hw;
- coreaudioVoice core[1];
-} coreaudioVoiceOut;
+ /* remove callback */
+ status = AudioDeviceDestroyIOProcID(core->deviceID,
+ core->ioprocid);
+ if (status != kAudioHardwareNoError) {
+ coreaudio_logerr (status, "Could not remove IOProc\n");
+ }
+ }
+ core->deviceID = kAudioDeviceUnknown;
-#define CORE_OUT(hw) ((coreaudioVoiceOut*)(hw))->core
+ /* destroy mutex */
+ err = pthread_mutex_destroy(&core->mutex);
+ if (err) {
+ dolog("Could not destroy mutex\nReason: %s\n", strerror (err));
+ }
+}
+static int coreaudio_ctl_base (coreaudioVoiceBase *core, int cmd)
+{
+ OSStatus status;
+
+ switch (cmd) {
+ case VOICE_ENABLE:
+ /* start playback */
+ if (!isPlaying(core->deviceID, core->isInput)) {
+ status = AudioDeviceStart(core->deviceID, core->ioprocid);
+ if (status != kAudioHardwareNoError) {
+ coreaudio_logerr (status, "Could not resume %s\n",
+ core->isInput ? "recording" : "playback");
+ }
+ }
+ break;
+
+ case VOICE_DISABLE:
+ /* stop playback */
+ if (!audio_is_cleaning_up()) {
+ if (isPlaying(core->deviceID, core->isInput)) {
+ status = AudioDeviceStop(core->deviceID,
+ core->ioprocid);
+ if (status != kAudioHardwareNoError) {
+ coreaudio_logerr (status, "Could not pause %s\n",
+ core->isInput ? "recording" : "playback");
+ }
+ }
+ }
+ break;
+ }
+ return 0;
+}
static int coreaudio_run_out (HWVoiceOut *hw, int live)
{
int decr;
- coreaudioVoice *core = CORE_OUT(hw);
+ coreaudioVoiceBase *core = &((coreaudioVoiceOut *)hw)->core;
- if (conf.isAtexit || coreaudio_voice_lock (core, "coreaudio_run_out")) {
+ if (coreaudio_lock (core, "coreaudio_run_out")) {
return 0;
}
@@ -479,12 +659,12 @@
core->live = live - decr;
hw->rpos = core->pos;
- coreaudio_voice_unlock (core, "coreaudio_run_out");
+ coreaudio_unlock (core, "coreaudio_run_out");
return decr;
}
/* callback to feed audiooutput buffer */
-static OSStatus audioOutDeviceIOProc(
+static OSStatus audioOutputDeviceIOProc(
AudioDeviceID inDevice,
const AudioTimeStamp* inNow,
const AudioBufferList* inInputData,
@@ -496,7 +676,7 @@
UInt32 frame, frameCount;
float *out = outOutputData->mBuffers[0].mData;
HWVoiceOut *hw = hwptr;
- coreaudioVoice *core = CORE_OUT(hw);
+ coreaudioVoiceBase *core = &((coreaudioVoiceOut *) hw)->core;
int rpos, live;
struct st_sample *src;
#ifndef FLOAT_MIXENG
@@ -507,18 +687,18 @@
#endif
#endif
- if (coreaudio_voice_lock (core, "audioDeviceIOProc")) {
+ if (coreaudio_lock (core, "audioOutputDeviceIOProc")) {
inInputTime = 0;
return 0;
}
- frameCount = core->bufferFrameSize;
+ frameCount = core->audioDevicePropertyBufferFrameSize;
live = core->live;
/* if there are not enough samples, set signal and return */
- if (live < (int)frameCount) {
+ if (live < frameCount) {
inInputTime = 0;
- coreaudio_voice_unlock (core, "audioDeviceIOProc(empty)");
+ coreaudio_unlock (core, "audioOutputDeviceIOProc(empty)");
return 0;
}
@@ -545,7 +725,7 @@
core->decr += frameCount;
core->pos = rpos;
- coreaudio_voice_unlock (core, "audioDeviceIOProc");
+ coreaudio_unlock (core, "audioOutputDeviceIOProc");
return 0;
}
@@ -554,75 +734,54 @@
return audio_pcm_sw_write (sw, buf, len);
}
-static int coreaudio_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int coreaudio_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
- coreaudioVoice *core = CORE_OUT(hw);
- int err;
+ coreaudioVoiceBase *core = &((coreaudioVoiceOut *) hw)->core;
+ CoreaudioConf *conf = drv_opaque;
audio_pcm_init_info (&hw->info, as);
- err = coreaudio_voice_init (core, as, conf.out_buffer_frames, audioOutDeviceIOProc, hw, 0);
- if (err < 0)
- return err;
+ if (coreaudio_init_base(core, as, drv_opaque,
+ audioOutputDeviceIOProc, hw, false) < 0) {
+ return -1;
+ }
- hw->samples = core->bufferFrameSize * conf.out_nbuffers;
+ hw->samples = conf->nbuffers * core->audioDevicePropertyBufferFrameSize;
+
return 0;
}
-
static void coreaudio_fini_out (HWVoiceOut *hw)
{
- coreaudioVoice *core = CORE_OUT(hw);
-
- coreaudio_voice_fini (core);
+ coreaudioVoiceBase *core = &((coreaudioVoiceOut *) hw)->core;
+ coreaudio_fini_base(core);
}
-static int
-coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)
+static int coreaudio_ctl_out (HWVoiceOut *hw, int cmd, ...)
{
- coreaudioVoice *core = CORE_OUT(hw);
-
- return coreaudio_voice_ctl (core, cmd);
+ coreaudioVoiceBase *core = &((coreaudioVoiceOut *) hw)->core;
+ return coreaudio_ctl_base(core, cmd);
}
-/***************************************************************************************/
-/***************************************************************************************/
-/*** ***/
-/*** I N P U T V O I C E ***/
-/*** ***/
-/***************************************************************************************/
-/***************************************************************************************/
-
-
-
-typedef struct coreaudioVoiceIn {
- HWVoiceIn hw;
- coreaudioVoice core[1];
-} coreaudioVoiceIn;
-
-#define CORE_IN(hw) ((coreaudioVoiceIn *) (hw))->core
-
-
static int coreaudio_run_in (HWVoiceIn *hw)
{
int decr;
- coreaudioVoice *core = CORE_IN(hw);
+ coreaudioVoiceBase *core = &((coreaudioVoiceIn *)hw)->core;
- if (conf.isAtexit || coreaudio_voice_lock (core, "coreaudio_run_in")) {
+ if (gIsAtExit || coreaudio_lock (core, "coreaudio_run_in")) {
return 0;
}
- D("%s: core.decr=%d core.pos=%d\n", __FUNCTION__, core->decr, core->pos);
decr = core->decr;
core->decr -= decr;
hw->wpos = core->pos;
- coreaudio_voice_unlock (core, "coreaudio_run_in");
+ coreaudio_unlock (core, "coreaudio_run_in");
return decr;
}
-
-/* callback to feed audiooutput buffer */
-static OSStatus audioInDeviceIOProc(
+/* callback to feed audioinput buffer */
+static OSStatus audioInputDeviceIOProc(
AudioDeviceID inDevice,
const AudioTimeStamp* inNow,
const AudioBufferList* inInputData,
@@ -634,7 +793,7 @@
UInt32 frame, frameCount;
float *in = inInputData->mBuffers[0].mData;
HWVoiceIn *hw = hwptr;
- coreaudioVoice *core = CORE_IN(hw);
+ coreaudioVoiceBase *core = &((coreaudioVoiceIn *)hw)->core;
int wpos, avail;
struct st_sample *dst;
#ifndef FLOAT_MIXENG
@@ -645,21 +804,18 @@
#endif
#endif
- if (coreaudio_voice_lock (core, "audioDeviceIOProc")) {
+ if (coreaudio_lock (core, "audioInputDeviceIOProc")) {
inInputTime = 0;
return 0;
}
- frameCount = core->bufferFrameSize;
+ frameCount = core->audioDevicePropertyBufferFrameSize;
avail = hw->samples - hw->total_samples_captured - core->decr;
- D("%s: enter avail=%d core.decr=%d core.pos=%d hw.samples=%d hw.total_samples_captured=%d frameCount=%d\n",
- __FUNCTION__, avail, core->decr, core->pos, hw->samples, hw->total_samples_captured, (int)frameCount);
-
/* if there are not enough samples, set signal and return */
if (avail < (int)frameCount) {
inInputTime = 0;
- coreaudio_voice_unlock (core, "audioDeviceIOProc(empty)");
+ coreaudio_unlock (core, "audioInputDeviceIOProc(empty)");
return 0;
}
@@ -686,84 +842,75 @@
core->decr += frameCount;
core->pos = wpos;
- D("exit: core.decr=%d core.pos=%d\n", core->decr, core->pos);
- coreaudio_voice_unlock (core, "audioDeviceIOProc");
+ coreaudio_unlock (core, "audioInputDeviceIOProc");
return 0;
}
static int coreaudio_read (SWVoiceIn *sw, void *buf, int len)
{
- int result = audio_pcm_sw_read (sw, buf, len);
- D("%s: audio_pcm_sw_read(%d) returned %d\n", __FUNCTION__, len, result);
- return result;
+ return audio_pcm_sw_read (sw, buf, len);
}
-static int coreaudio_init_in (HWVoiceIn *hw, struct audsettings *as)
+static int coreaudio_init_in(HWVoiceIn *hw, struct audsettings *as,
+ void *drv_opaque)
{
- coreaudioVoice* core = CORE_IN(hw);
- int err;
+ coreaudioVoiceBase *core = &((coreaudioVoiceIn *) hw)->core;
+ CoreaudioConf *conf = drv_opaque;
audio_pcm_init_info (&hw->info, as);
- err = coreaudio_voice_init (core, as, conf.in_buffer_frames, audioInDeviceIOProc, hw, 1);
- if (err < 0) {
- return err;
+ if (coreaudio_init_base(core, as, drv_opaque,
+ audioInputDeviceIOProc, hw, true) < 0) {
+ return -1;
}
- hw->samples = core->bufferFrameSize * conf.in_nbuffers;
+ hw->samples = conf->nbuffers * core->audioDevicePropertyBufferFrameSize;
+
return 0;
}
-
static void coreaudio_fini_in (HWVoiceIn *hw)
{
-
- coreaudioVoice* core = CORE_IN(hw);
-
- coreaudio_voice_fini(core);
+ coreaudioVoiceBase *core = &((coreaudioVoiceIn *) hw)->core;
+ coreaudio_fini_base(core);
}
static int coreaudio_ctl_in (HWVoiceIn *hw, int cmd, ...)
{
- coreaudioVoice* core = CORE_IN(hw);
-
- return coreaudio_voice_ctl(core, cmd);
+ coreaudioVoiceBase *core = &((coreaudioVoiceIn *) hw)->core;
+ return coreaudio_ctl_base(core, cmd);
}
+static CoreaudioConf glob_conf = {
+ .buffer_frames = 512,
+ .nbuffers = 4,
+};
+
static void *coreaudio_audio_init (void)
{
+ CoreaudioConf *conf = g_malloc(sizeof(CoreaudioConf));
+ *conf = glob_conf;
+
atexit(coreaudio_atexit);
- return &coreaudio_audio_init;
+ return conf;
}
static void coreaudio_audio_fini (void *opaque)
{
- (void) opaque;
+ g_free(opaque);
}
static struct audio_option coreaudio_options[] = {
{
- .name = "OUT_BUFFER_SIZE",
+ .name = "BUFFER_SIZE",
.tag = AUD_OPT_INT,
- .valp = &conf.out_buffer_frames,
- .descr = "Size of the output buffer in frames"
+ .valp = &glob_conf.buffer_frames,
+ .descr = "Size of the buffer in frames"
},
{
- .name = "OUT_BUFFER_COUNT",
+ .name = "BUFFER_COUNT",
.tag = AUD_OPT_INT,
- .valp = &conf.out_nbuffers,
- .descr = "Number of output buffers"
- },
- {
- .name = "IN_BUFFER_SIZE",
- .tag = AUD_OPT_INT,
- .valp = &conf.in_buffer_frames,
- .descr = "Size of the input buffer in frames"
- },
- {
- .name = "IN_BUFFER_COUNT",
- .tag = AUD_OPT_INT,
- .valp = &conf.in_nbuffers,
- .descr = "Number of input buffers"
+ .valp = &glob_conf.nbuffers,
+ .descr = "Number of buffers"
},
{ /* End of list */ }
};
diff --git a/audio/dsound_template.h b/audio/dsound_template.h
index 8b37d16..b439f33 100644
--- a/audio/dsound_template.h
+++ b/audio/dsound_template.h
@@ -67,11 +67,11 @@
LPVOID *p2p,
DWORD *blen1p,
DWORD *blen2p,
- int entire
+ int entire,
+ dsound *s
)
{
HRESULT hr;
- int i;
LPVOID p1 = NULL, p2 = NULL;
DWORD blen1 = 0, blen2 = 0;
DWORD flag;
@@ -81,37 +81,18 @@
#else
flag = entire ? DSBLOCK_ENTIREBUFFER : 0;
#endif
- for (i = 0; i < conf.lock_retries; ++i) {
- hr = glue (IFACE, _Lock) (
- buf,
- pos,
- len,
- &p1,
- &blen1,
- &p2,
- &blen2,
- flag
- );
+ hr = glue(IFACE, _Lock)(buf, pos, len, &p1, &blen1, &p2, &blen2, flag);
- if (FAILED (hr)) {
+ if (FAILED (hr)) {
#ifndef DSBTYPE_IN
- if (hr == DSERR_BUFFERLOST) {
- if (glue (dsound_restore_, TYPE) (buf)) {
- dsound_logerr (hr, "Could not lock " NAME "\n");
- goto fail;
- }
- continue;
+ if (hr == DSERR_BUFFERLOST) {
+ if (glue (dsound_restore_, TYPE) (buf, s)) {
+ dsound_logerr (hr, "Could not lock " NAME "\n");
}
-#endif
- dsound_logerr (hr, "Could not lock " NAME "\n");
goto fail;
}
-
- break;
- }
-
- if (i == conf.lock_retries) {
- dolog ("%d attempts to lock " NAME " failed\n", i);
+#endif
+ dsound_logerr (hr, "Could not lock " NAME "\n");
goto fail;
}
@@ -174,16 +155,19 @@
}
#ifdef DSBTYPE_IN
-static int dsound_init_in (HWVoiceIn *hw, struct audsettings *as)
+static int dsound_init_in(HWVoiceIn *hw, struct audsettings *as,
+ void *drv_opaque)
#else
-static int dsound_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int dsound_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
#endif
{
int err;
HRESULT hr;
- dsound *s = &glob_dsound;
+ dsound *s = drv_opaque;
WAVEFORMATEX wfx;
struct audsettings obt_as;
+ DSoundConf *conf = &s->conf;
#ifdef DSBTYPE_IN
const char *typ = "ADC";
DSoundVoiceIn *ds = (DSoundVoiceIn *) hw;
@@ -210,7 +194,7 @@
bd.dwSize = sizeof (bd);
bd.lpwfxFormat = &wfx;
#ifdef DSBTYPE_IN
- bd.dwBufferBytes = conf.bufsize_in;
+ bd.dwBufferBytes = conf->bufsize_in;
hr = IDirectSoundCapture_CreateCaptureBuffer (
s->dsound_capture,
&bd,
@@ -219,7 +203,7 @@
);
#else
bd.dwFlags = DSBCAPS_STICKYFOCUS | DSBCAPS_GETCURRENTPOSITION2;
- bd.dwBufferBytes = conf.bufsize_out;
+ bd.dwBufferBytes = conf->bufsize_out;
hr = IDirectSound_CreateSoundBuffer (
s->dsound,
&bd,
@@ -269,6 +253,7 @@
);
}
hw->samples = bc.dwBufferBytes >> hw->info.shift;
+ ds->s = s;
#ifdef DEBUG_DSOUND
dolog ("caps %ld, desc %ld\n",
diff --git a/audio/dsoundaudio.c b/audio/dsoundaudio.c
index e2d89fd..516846e 100644
--- a/audio/dsoundaudio.c
+++ b/audio/dsoundaudio.c
@@ -26,6 +26,7 @@
* SEAL 1.07 by Carlos 'pel' Hasan was used as documentation
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "audio.h"
@@ -41,42 +42,25 @@
/* #define DEBUG_DSOUND */
-static struct {
- int lock_retries;
- int restore_retries;
- int getstatus_retries;
- int set_primary;
+typedef struct {
int bufsize_in;
int bufsize_out;
- struct audsettings settings;
int latency_millis;
-} conf = {
- .lock_retries = 1,
- .restore_retries = 1,
- .getstatus_retries = 1,
- .set_primary = 0,
- .bufsize_in = 16384,
- .bufsize_out = 16384,
- .settings.freq = 44100,
- .settings.nchannels = 2,
- .settings.fmt = AUD_FMT_S16,
- .latency_millis = 10
-};
+} DSoundConf;
typedef struct {
LPDIRECTSOUND dsound;
LPDIRECTSOUNDCAPTURE dsound_capture;
- LPDIRECTSOUNDBUFFER dsound_primary_buffer;
struct audsettings settings;
+ DSoundConf conf;
} dsound;
-static dsound glob_dsound;
-
typedef struct {
HWVoiceOut hw;
LPDIRECTSOUNDBUFFER dsound_buffer;
DWORD old_pos;
int first_time;
+ dsound *s;
#ifdef DEBUG_DSOUND
DWORD old_ppos;
DWORD played;
@@ -88,6 +72,7 @@
HWVoiceIn hw;
int first_time;
LPDIRECTSOUNDCAPTUREBUFFER dsound_capture_buffer;
+ dsound *s;
} DSoundVoiceIn;
static void dsound_log_hresult (HRESULT hr)
@@ -281,29 +266,17 @@
}
#endif
-static int dsound_restore_out (LPDIRECTSOUNDBUFFER dsb)
+static int dsound_restore_out (LPDIRECTSOUNDBUFFER dsb, dsound *s)
{
HRESULT hr;
- int i;
- for (i = 0; i < conf.restore_retries; ++i) {
- hr = IDirectSoundBuffer_Restore (dsb);
+ hr = IDirectSoundBuffer_Restore (dsb);
- switch (hr) {
- case DS_OK:
- return 0;
-
- case DSERR_BUFFERLOST:
- continue;
-
- default:
- dsound_logerr (hr, "Could not restore playback buffer\n");
- return -1;
- }
+ if (hr != DS_OK) {
+ dsound_logerr (hr, "Could not restore playback buffer\n");
+ return -1;
}
-
- dolog ("%d attempts to restore playback buffer failed\n", i);
- return -1;
+ return 0;
}
#include "dsound_template.h"
@@ -311,25 +284,20 @@
#include "dsound_template.h"
#undef DSBTYPE_IN
-static int dsound_get_status_out (LPDIRECTSOUNDBUFFER dsb, DWORD *statusp)
+static int dsound_get_status_out (LPDIRECTSOUNDBUFFER dsb, DWORD *statusp,
+ dsound *s)
{
HRESULT hr;
- int i;
- for (i = 0; i < conf.getstatus_retries; ++i) {
- hr = IDirectSoundBuffer_GetStatus (dsb, statusp);
- if (FAILED (hr)) {
- dsound_logerr (hr, "Could not get playback buffer status\n");
- return -1;
- }
+ hr = IDirectSoundBuffer_GetStatus (dsb, statusp);
+ if (FAILED (hr)) {
+ dsound_logerr (hr, "Could not get playback buffer status\n");
+ return -1;
+ }
- if (*statusp & DSERR_BUFFERLOST) {
- if (dsound_restore_out (dsb)) {
- return -1;
- }
- continue;
- }
- break;
+ if (*statusp & DSERR_BUFFERLOST) {
+ dsound_restore_out(dsb, s);
+ return -1;
}
return 0;
@@ -376,7 +344,8 @@
hw->rpos = pos % hw->samples;
}
-static void dsound_clear_sample (HWVoiceOut *hw, LPDIRECTSOUNDBUFFER dsb)
+static void dsound_clear_sample (HWVoiceOut *hw, LPDIRECTSOUNDBUFFER dsb,
+ dsound *s)
{
int err;
LPVOID p1, p2;
@@ -389,7 +358,8 @@
hw->samples << hw->info.shift,
&p1, &p2,
&blen1, &blen2,
- 1
+ 1,
+ s
);
if (err) {
return;
@@ -415,25 +385,9 @@
dsound_unlock_out (dsb, p1, p2, blen1, blen2);
}
-static void dsound_close (dsound *s)
-{
- HRESULT hr;
-
- if (s->dsound_primary_buffer) {
- hr = IDirectSoundBuffer_Release (s->dsound_primary_buffer);
- if (FAILED (hr)) {
- dsound_logerr (hr, "Could not release primary buffer\n");
- }
- s->dsound_primary_buffer = NULL;
- }
-}
-
static int dsound_open (dsound *s)
{
- int err;
HRESULT hr;
- WAVEFORMATEX wfx;
- DSBUFFERDESC dsbd;
HWND hwnd;
hwnd = GetForegroundWindow ();
@@ -449,63 +403,7 @@
return -1;
}
- if (!conf.set_primary) {
- return 0;
- }
-
- err = waveformat_from_audio_settings (&wfx, &conf.settings);
- if (err) {
- return -1;
- }
-
- memset (&dsbd, 0, sizeof (dsbd));
- dsbd.dwSize = sizeof (dsbd);
- dsbd.dwFlags = DSBCAPS_PRIMARYBUFFER;
- dsbd.dwBufferBytes = 0;
- dsbd.lpwfxFormat = NULL;
-
- hr = IDirectSound_CreateSoundBuffer (
- s->dsound,
- &dsbd,
- &s->dsound_primary_buffer,
- NULL
- );
- if (FAILED (hr)) {
- dsound_logerr (hr, "Could not create primary playback buffer\n");
- return -1;
- }
-
- hr = IDirectSoundBuffer_SetFormat (s->dsound_primary_buffer, &wfx);
- if (FAILED (hr)) {
- dsound_logerr (hr, "Could not set primary playback buffer format\n");
- }
-
- hr = IDirectSoundBuffer_GetFormat (
- s->dsound_primary_buffer,
- &wfx,
- sizeof (wfx),
- NULL
- );
- if (FAILED (hr)) {
- dsound_logerr (hr, "Could not get primary playback buffer format\n");
- goto fail0;
- }
-
-#ifdef DEBUG_DSOUND
- dolog ("Primary\n");
- print_wave_format (&wfx);
-#endif
-
- err = waveformat_to_audio_settings (&wfx, &s->settings);
- if (err) {
- goto fail0;
- }
-
return 0;
-
- fail0:
- dsound_close (s);
- return -1;
}
static int dsound_ctl_out (HWVoiceOut *hw, int cmd, ...)
@@ -514,6 +412,7 @@
DWORD status;
DSoundVoiceOut *ds = (DSoundVoiceOut *) hw;
LPDIRECTSOUNDBUFFER dsb = ds->dsound_buffer;
+ dsound *s = ds->s;
if (!dsb) {
dolog ("Attempt to control voice without a buffer\n");
@@ -522,7 +421,7 @@
switch (cmd) {
case VOICE_ENABLE:
- if (dsound_get_status_out (dsb, &status)) {
+ if (dsound_get_status_out (dsb, &status, s)) {
return -1;
}
@@ -531,7 +430,7 @@
return 0;
}
- dsound_clear_sample (hw, dsb);
+ dsound_clear_sample (hw, dsb, s);
hr = IDirectSoundBuffer_Play (dsb, 0, 0, DSBPLAY_LOOPING);
if (FAILED (hr)) {
@@ -541,7 +440,7 @@
break;
case VOICE_DISABLE:
- if (dsound_get_status_out (dsb, &status)) {
+ if (dsound_get_status_out (dsb, &status, s)) {
return -1;
}
@@ -578,6 +477,8 @@
DWORD wpos, ppos, old_pos;
LPVOID p1, p2;
int bufsize;
+ dsound *s = ds->s;
+ DSoundConf *conf = &s->conf;
if (!dsb) {
dolog ("Attempt to run empty with playback buffer\n");
@@ -600,14 +501,14 @@
len = live << hwshift;
if (ds->first_time) {
- if (conf.latency_millis) {
+ if (conf->latency_millis) {
DWORD cur_blat;
cur_blat = audio_ring_dist (wpos, ppos, bufsize);
ds->first_time = 0;
old_pos = wpos;
old_pos +=
- millis_to_bytes (&hw->info, conf.latency_millis) - cur_blat;
+ millis_to_bytes (&hw->info, conf->latency_millis) - cur_blat;
old_pos %= bufsize;
old_pos &= ~hw->info.align;
}
@@ -663,7 +564,8 @@
len,
&p1, &p2,
&blen1, &blen2,
- 0
+ 0,
+ s
);
if (err) {
return 0;
@@ -766,6 +668,7 @@
DWORD cpos, rpos;
LPVOID p1, p2;
int hwshift;
+ dsound *s = ds->s;
if (!dscb) {
dolog ("Attempt to run without capture buffer\n");
@@ -820,7 +723,8 @@
&p2,
&blen1,
&blen2,
- 0
+ 0,
+ s
);
if (err) {
return 0;
@@ -843,12 +747,19 @@
return decr;
}
+static DSoundConf glob_conf = {
+ .bufsize_in = 16384,
+ .bufsize_out = 16384,
+ .latency_millis = 10
+};
+
static void dsound_audio_fini (void *opaque)
{
HRESULT hr;
dsound *s = opaque;
if (!s->dsound) {
+ g_free(s);
return;
}
@@ -859,6 +770,7 @@
s->dsound = NULL;
if (!s->dsound_capture) {
+ g_free(s);
return;
}
@@ -867,17 +779,21 @@
dsound_logerr (hr, "Could not release DirectSoundCapture\n");
}
s->dsound_capture = NULL;
+
+ g_free(s);
}
static void *dsound_audio_init (void)
{
int err;
HRESULT hr;
- dsound *s = &glob_dsound;
+ dsound *s = g_malloc0(sizeof(dsound));
+ s->conf = glob_conf;
hr = CoInitialize (NULL);
if (FAILED (hr)) {
dsound_logerr (hr, "Could not initialize COM\n");
+ g_free(s);
return NULL;
}
@@ -890,6 +806,7 @@
);
if (FAILED (hr)) {
dsound_logerr (hr, "Could not create DirectSound instance\n");
+ g_free(s);
return NULL;
}
@@ -901,7 +818,7 @@
if (FAILED (hr)) {
dsound_logerr (hr, "Could not release DirectSound\n");
}
- s->dsound = NULL;
+ g_free(s);
return NULL;
}
@@ -939,63 +856,21 @@
static struct audio_option dsound_options[] = {
{
- .name = "LOCK_RETRIES",
- .tag = AUD_OPT_INT,
- .valp = &conf.lock_retries,
- .descr = "Number of times to attempt locking the buffer"
- },
- {
- .name = "RESTOURE_RETRIES",
- .tag = AUD_OPT_INT,
- .valp = &conf.restore_retries,
- .descr = "Number of times to attempt restoring the buffer"
- },
- {
- .name = "GETSTATUS_RETRIES",
- .tag = AUD_OPT_INT,
- .valp = &conf.getstatus_retries,
- .descr = "Number of times to attempt getting status of the buffer"
- },
- {
- .name = "SET_PRIMARY",
- .tag = AUD_OPT_BOOL,
- .valp = &conf.set_primary,
- .descr = "Set the parameters of primary buffer"
- },
- {
.name = "LATENCY_MILLIS",
.tag = AUD_OPT_INT,
- .valp = &conf.latency_millis,
+ .valp = &glob_conf.latency_millis,
.descr = "(undocumented)"
},
{
- .name = "PRIMARY_FREQ",
- .tag = AUD_OPT_INT,
- .valp = &conf.settings.freq,
- .descr = "Primary buffer frequency"
- },
- {
- .name = "PRIMARY_CHANNELS",
- .tag = AUD_OPT_INT,
- .valp = &conf.settings.nchannels,
- .descr = "Primary buffer number of channels (1 - mono, 2 - stereo)"
- },
- {
- .name = "PRIMARY_FMT",
- .tag = AUD_OPT_FMT,
- .valp = &conf.settings.fmt,
- .descr = "Primary buffer format"
- },
- {
.name = "BUFSIZE_OUT",
.tag = AUD_OPT_INT,
- .valp = &conf.bufsize_out,
+ .valp = &glob_conf.bufsize_out,
.descr = "(undocumented)"
},
{
.name = "BUFSIZE_IN",
.tag = AUD_OPT_INT,
- .valp = &conf.bufsize_in,
+ .valp = &glob_conf.bufsize_in,
.descr = "(undocumented)"
},
{ /* End of list */ }
diff --git a/audio/esdaudio.c b/audio/esdaudio.c
deleted file mode 100644
index eea9cce..0000000
--- a/audio/esdaudio.c
+++ /dev/null
@@ -1,557 +0,0 @@
-/*
- * QEMU ESD audio driver
- *
- * Copyright (c) 2006 Frederick Reeve (brushed up by malc)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <esd.h>
-#include "qemu-common.h"
-#include "audio.h"
-
-#define AUDIO_CAP "esd"
-#include "audio_int.h"
-#include "audio_pt_int.h"
-
-typedef struct {
- HWVoiceOut hw;
- int done;
- int live;
- int decr;
- int rpos;
- void *pcm_buf;
- int fd;
- struct audio_pt pt;
-} ESDVoiceOut;
-
-typedef struct {
- HWVoiceIn hw;
- int done;
- int dead;
- int incr;
- int wpos;
- void *pcm_buf;
- int fd;
- struct audio_pt pt;
-} ESDVoiceIn;
-
-static struct {
- int samples;
- int divisor;
- char *dac_host;
- char *adc_host;
-} conf = {
- .samples = 1024,
- .divisor = 2,
-};
-
-static void GCC_FMT_ATTR (2, 3) qesd_logerr (int err, const char *fmt, ...)
-{
- va_list ap;
-
- va_start (ap, fmt);
- AUD_vlog (AUDIO_CAP, fmt, ap);
- va_end (ap);
-
- AUD_log (AUDIO_CAP, "Reason: %s\n", strerror (err));
-}
-
-/* playback */
-static void *qesd_thread_out (void *arg)
-{
- ESDVoiceOut *esd = arg;
- HWVoiceOut *hw = &esd->hw;
- int threshold;
-
- threshold = conf.divisor ? hw->samples / conf.divisor : 0;
-
- if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
- return NULL;
- }
-
- for (;;) {
- int decr, to_mix, rpos;
-
- for (;;) {
- if (esd->done) {
- goto exit;
- }
-
- if (esd->live > threshold) {
- break;
- }
-
- if (audio_pt_wait (&esd->pt, AUDIO_FUNC)) {
- goto exit;
- }
- }
-
- decr = to_mix = esd->live;
- rpos = hw->rpos;
-
- if (audio_pt_unlock (&esd->pt, AUDIO_FUNC)) {
- return NULL;
- }
-
- while (to_mix) {
- ssize_t written;
- int chunk = audio_MIN (to_mix, hw->samples - rpos);
- struct st_sample *src = hw->mix_buf + rpos;
-
- hw->clip (esd->pcm_buf, src, chunk);
-
- again:
- written = write (esd->fd, esd->pcm_buf, chunk << hw->info.shift);
- if (written == -1) {
- if (errno == EINTR || errno == EAGAIN) {
- goto again;
- }
- qesd_logerr (errno, "write failed\n");
- return NULL;
- }
-
- if (written != chunk << hw->info.shift) {
- int wsamples = written >> hw->info.shift;
- int wbytes = wsamples << hw->info.shift;
- if (wbytes != written) {
- dolog ("warning: Misaligned write %d (requested %zd), "
- "alignment %d\n",
- wbytes, written, hw->info.align + 1);
- }
- to_mix -= wsamples;
- rpos = (rpos + wsamples) % hw->samples;
- break;
- }
-
- rpos = (rpos + chunk) % hw->samples;
- to_mix -= chunk;
- }
-
- if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
- return NULL;
- }
-
- esd->rpos = rpos;
- esd->live -= decr;
- esd->decr += decr;
- }
-
- exit:
- audio_pt_unlock (&esd->pt, AUDIO_FUNC);
- return NULL;
-}
-
-static int qesd_run_out (HWVoiceOut *hw, int live)
-{
- int decr;
- ESDVoiceOut *esd = (ESDVoiceOut *) hw;
-
- if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
- return 0;
- }
-
- decr = audio_MIN (live, esd->decr);
- esd->decr -= decr;
- esd->live = live - decr;
- hw->rpos = esd->rpos;
- if (esd->live > 0) {
- audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
- }
- else {
- audio_pt_unlock (&esd->pt, AUDIO_FUNC);
- }
- return decr;
-}
-
-static int qesd_write (SWVoiceOut *sw, void *buf, int len)
-{
- return audio_pcm_sw_write (sw, buf, len);
-}
-
-static int qesd_init_out (HWVoiceOut *hw, struct audsettings *as)
-{
- ESDVoiceOut *esd = (ESDVoiceOut *) hw;
- struct audsettings obt_as = *as;
- int esdfmt = ESD_STREAM | ESD_PLAY;
-
- esdfmt |= (as->nchannels == 2) ? ESD_STEREO : ESD_MONO;
- switch (as->fmt) {
- case AUD_FMT_S8:
- case AUD_FMT_U8:
- esdfmt |= ESD_BITS8;
- obt_as.fmt = AUD_FMT_U8;
- break;
-
- case AUD_FMT_S32:
- case AUD_FMT_U32:
- dolog ("Will use 16 instead of 32 bit samples\n");
- /* fall through */
- case AUD_FMT_S16:
- case AUD_FMT_U16:
- deffmt:
- esdfmt |= ESD_BITS16;
- obt_as.fmt = AUD_FMT_S16;
- break;
-
- default:
- dolog ("Internal logic error: Bad audio format %d\n", as->fmt);
- goto deffmt;
-
- }
- obt_as.endianness = AUDIO_HOST_ENDIANNESS;
-
- audio_pcm_init_info (&hw->info, &obt_as);
-
- hw->samples = conf.samples;
- esd->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
- if (!esd->pcm_buf) {
- dolog ("Could not allocate buffer (%d bytes)\n",
- hw->samples << hw->info.shift);
- return -1;
- }
-
- esd->fd = esd_play_stream (esdfmt, as->freq, conf.dac_host, NULL);
- if (esd->fd < 0) {
- qesd_logerr (errno, "esd_play_stream failed\n");
- goto fail1;
- }
-
- if (audio_pt_init (&esd->pt, qesd_thread_out, esd, AUDIO_CAP, AUDIO_FUNC)) {
- goto fail2;
- }
-
- return 0;
-
- fail2:
- if (close (esd->fd)) {
- qesd_logerr (errno, "%s: close on esd socket(%d) failed\n",
- AUDIO_FUNC, esd->fd);
- }
- esd->fd = -1;
-
- fail1:
- g_free (esd->pcm_buf);
- esd->pcm_buf = NULL;
- return -1;
-}
-
-static void qesd_fini_out (HWVoiceOut *hw)
-{
- void *ret;
- ESDVoiceOut *esd = (ESDVoiceOut *) hw;
-
- audio_pt_lock (&esd->pt, AUDIO_FUNC);
- esd->done = 1;
- audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
- audio_pt_join (&esd->pt, &ret, AUDIO_FUNC);
-
- if (esd->fd >= 0) {
- if (close (esd->fd)) {
- qesd_logerr (errno, "failed to close esd socket\n");
- }
- esd->fd = -1;
- }
-
- audio_pt_fini (&esd->pt, AUDIO_FUNC);
-
- g_free (esd->pcm_buf);
- esd->pcm_buf = NULL;
-}
-
-static int qesd_ctl_out (HWVoiceOut *hw, int cmd, ...)
-{
- (void) hw;
- (void) cmd;
- return 0;
-}
-
-/* capture */
-static void *qesd_thread_in (void *arg)
-{
- ESDVoiceIn *esd = arg;
- HWVoiceIn *hw = &esd->hw;
- int threshold;
-
- threshold = conf.divisor ? hw->samples / conf.divisor : 0;
-
- if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
- return NULL;
- }
-
- for (;;) {
- int incr, to_grab, wpos;
-
- for (;;) {
- if (esd->done) {
- goto exit;
- }
-
- if (esd->dead > threshold) {
- break;
- }
-
- if (audio_pt_wait (&esd->pt, AUDIO_FUNC)) {
- goto exit;
- }
- }
-
- incr = to_grab = esd->dead;
- wpos = hw->wpos;
-
- if (audio_pt_unlock (&esd->pt, AUDIO_FUNC)) {
- return NULL;
- }
-
- while (to_grab) {
- ssize_t nread;
- int chunk = audio_MIN (to_grab, hw->samples - wpos);
- void *buf = advance (esd->pcm_buf, wpos);
-
- again:
- nread = read (esd->fd, buf, chunk << hw->info.shift);
- if (nread == -1) {
- if (errno == EINTR || errno == EAGAIN) {
- goto again;
- }
- qesd_logerr (errno, "read failed\n");
- return NULL;
- }
-
- if (nread != chunk << hw->info.shift) {
- int rsamples = nread >> hw->info.shift;
- int rbytes = rsamples << hw->info.shift;
- if (rbytes != nread) {
- dolog ("warning: Misaligned write %d (requested %zd), "
- "alignment %d\n",
- rbytes, nread, hw->info.align + 1);
- }
- to_grab -= rsamples;
- wpos = (wpos + rsamples) % hw->samples;
- break;
- }
-
- hw->conv (hw->conv_buf + wpos, buf, nread >> hw->info.shift);
- wpos = (wpos + chunk) % hw->samples;
- to_grab -= chunk;
- }
-
- if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
- return NULL;
- }
-
- esd->wpos = wpos;
- esd->dead -= incr;
- esd->incr += incr;
- }
-
- exit:
- audio_pt_unlock (&esd->pt, AUDIO_FUNC);
- return NULL;
-}
-
-static int qesd_run_in (HWVoiceIn *hw)
-{
- int live, incr, dead;
- ESDVoiceIn *esd = (ESDVoiceIn *) hw;
-
- if (audio_pt_lock (&esd->pt, AUDIO_FUNC)) {
- return 0;
- }
-
- live = audio_pcm_hw_get_live_in (hw);
- dead = hw->samples - live;
- incr = audio_MIN (dead, esd->incr);
- esd->incr -= incr;
- esd->dead = dead - incr;
- hw->wpos = esd->wpos;
- if (esd->dead > 0) {
- audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
- }
- else {
- audio_pt_unlock (&esd->pt, AUDIO_FUNC);
- }
- return incr;
-}
-
-static int qesd_read (SWVoiceIn *sw, void *buf, int len)
-{
- return audio_pcm_sw_read (sw, buf, len);
-}
-
-static int qesd_init_in (HWVoiceIn *hw, struct audsettings *as)
-{
- ESDVoiceIn *esd = (ESDVoiceIn *) hw;
- struct audsettings obt_as = *as;
- int esdfmt = ESD_STREAM | ESD_RECORD;
-
- esdfmt |= (as->nchannels == 2) ? ESD_STEREO : ESD_MONO;
- switch (as->fmt) {
- case AUD_FMT_S8:
- case AUD_FMT_U8:
- esdfmt |= ESD_BITS8;
- obt_as.fmt = AUD_FMT_U8;
- break;
-
- case AUD_FMT_S16:
- case AUD_FMT_U16:
- esdfmt |= ESD_BITS16;
- obt_as.fmt = AUD_FMT_S16;
- break;
-
- case AUD_FMT_S32:
- case AUD_FMT_U32:
- dolog ("Will use 16 instead of 32 bit samples\n");
- esdfmt |= ESD_BITS16;
- obt_as.fmt = AUD_FMT_S16;
- break;
- }
- obt_as.endianness = AUDIO_HOST_ENDIANNESS;
-
- audio_pcm_init_info (&hw->info, &obt_as);
-
- hw->samples = conf.samples;
- esd->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
- if (!esd->pcm_buf) {
- dolog ("Could not allocate buffer (%d bytes)\n",
- hw->samples << hw->info.shift);
- return -1;
- }
-
- esd->fd = esd_record_stream (esdfmt, as->freq, conf.adc_host, NULL);
- if (esd->fd < 0) {
- qesd_logerr (errno, "esd_record_stream failed\n");
- goto fail1;
- }
-
- if (audio_pt_init (&esd->pt, qesd_thread_in, esd, AUDIO_CAP, AUDIO_FUNC)) {
- goto fail2;
- }
-
- return 0;
-
- fail2:
- if (close (esd->fd)) {
- qesd_logerr (errno, "%s: close on esd socket(%d) failed\n",
- AUDIO_FUNC, esd->fd);
- }
- esd->fd = -1;
-
- fail1:
- g_free (esd->pcm_buf);
- esd->pcm_buf = NULL;
- return -1;
-}
-
-static void qesd_fini_in (HWVoiceIn *hw)
-{
- void *ret;
- ESDVoiceIn *esd = (ESDVoiceIn *) hw;
-
- audio_pt_lock (&esd->pt, AUDIO_FUNC);
- esd->done = 1;
- audio_pt_unlock_and_signal (&esd->pt, AUDIO_FUNC);
- audio_pt_join (&esd->pt, &ret, AUDIO_FUNC);
-
- if (esd->fd >= 0) {
- if (close (esd->fd)) {
- qesd_logerr (errno, "failed to close esd socket\n");
- }
- esd->fd = -1;
- }
-
- audio_pt_fini (&esd->pt, AUDIO_FUNC);
-
- g_free (esd->pcm_buf);
- esd->pcm_buf = NULL;
-}
-
-static int qesd_ctl_in (HWVoiceIn *hw, int cmd, ...)
-{
- (void) hw;
- (void) cmd;
- return 0;
-}
-
-/* common */
-static void *qesd_audio_init (void)
-{
- return &conf;
-}
-
-static void qesd_audio_fini (void *opaque)
-{
- (void) opaque;
- ldebug ("esd_fini");
-}
-
-struct audio_option qesd_options[] = {
- {
- .name = "SAMPLES",
- .tag = AUD_OPT_INT,
- .valp = &conf.samples,
- .descr = "buffer size in samples"
- },
- {
- .name = "DIVISOR",
- .tag = AUD_OPT_INT,
- .valp = &conf.divisor,
- .descr = "threshold divisor"
- },
- {
- .name = "DAC_HOST",
- .tag = AUD_OPT_STR,
- .valp = &conf.dac_host,
- .descr = "playback host"
- },
- {
- .name = "ADC_HOST",
- .tag = AUD_OPT_STR,
- .valp = &conf.adc_host,
- .descr = "capture host"
- },
- { /* End of list */ }
-};
-
-static struct audio_pcm_ops qesd_pcm_ops = {
- .init_out = qesd_init_out,
- .fini_out = qesd_fini_out,
- .run_out = qesd_run_out,
- .write = qesd_write,
- .ctl_out = qesd_ctl_out,
-
- .init_in = qesd_init_in,
- .fini_in = qesd_fini_in,
- .run_in = qesd_run_in,
- .read = qesd_read,
- .ctl_in = qesd_ctl_in,
-};
-
-struct audio_driver esd_audio_driver = {
- .name = "esd",
- .descr = "http://en.wikipedia.org/wiki/Esound",
- .options = qesd_options,
- .init = qesd_audio_init,
- .fini = qesd_audio_fini,
- .pcm_ops = &qesd_pcm_ops,
- .can_be_default = 0,
- .max_voices_out = INT_MAX,
- .max_voices_in = INT_MAX,
- .voice_size_out = sizeof (ESDVoiceOut),
- .voice_size_in = sizeof (ESDVoiceIn)
-};
diff --git a/audio/fmodaudio.c b/audio/fmodaudio.c
deleted file mode 100644
index fabf84d..0000000
--- a/audio/fmodaudio.c
+++ /dev/null
@@ -1,685 +0,0 @@
-/*
- * QEMU FMOD audio driver
- *
- * Copyright (c) 2004-2005 Vassili Karpov (malc)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <fmod.h>
-#include <fmod_errors.h>
-#include "qemu-common.h"
-#include "audio.h"
-
-#define AUDIO_CAP "fmod"
-#include "audio_int.h"
-
-typedef struct FMODVoiceOut {
- HWVoiceOut hw;
- unsigned int old_pos;
- FSOUND_SAMPLE *fmod_sample;
- int channel;
-} FMODVoiceOut;
-
-typedef struct FMODVoiceIn {
- HWVoiceIn hw;
- FSOUND_SAMPLE *fmod_sample;
-} FMODVoiceIn;
-
-static struct {
- const char *drvname;
- int nb_samples;
- int freq;
- int nb_channels;
- int bufsize;
- int broken_adc;
-} conf = {
- .nb_samples = 2048 * 2,
- .freq = 44100,
- .nb_channels = 2,
-};
-
-static void GCC_FMT_ATTR (1, 2) fmod_logerr (const char *fmt, ...)
-{
- va_list ap;
-
- va_start (ap, fmt);
- AUD_vlog (AUDIO_CAP, fmt, ap);
- va_end (ap);
-
- AUD_log (AUDIO_CAP, "Reason: %s\n",
- FMOD_ErrorString (FSOUND_GetError ()));
-}
-
-static void GCC_FMT_ATTR (2, 3) fmod_logerr2 (
- const char *typ,
- const char *fmt,
- ...
- )
-{
- va_list ap;
-
- AUD_log (AUDIO_CAP, "Could not initialize %s\n", typ);
-
- va_start (ap, fmt);
- AUD_vlog (AUDIO_CAP, fmt, ap);
- va_end (ap);
-
- AUD_log (AUDIO_CAP, "Reason: %s\n",
- FMOD_ErrorString (FSOUND_GetError ()));
-}
-
-static int fmod_write (SWVoiceOut *sw, void *buf, int len)
-{
- return audio_pcm_sw_write (sw, buf, len);
-}
-
-static void fmod_clear_sample (FMODVoiceOut *fmd)
-{
- HWVoiceOut *hw = &fmd->hw;
- int status;
- void *p1 = 0, *p2 = 0;
- unsigned int len1 = 0, len2 = 0;
-
- status = FSOUND_Sample_Lock (
- fmd->fmod_sample,
- 0,
- hw->samples << hw->info.shift,
- &p1,
- &p2,
- &len1,
- &len2
- );
-
- if (!status) {
- fmod_logerr ("Failed to lock sample\n");
- return;
- }
-
- if ((len1 & hw->info.align) || (len2 & hw->info.align)) {
- dolog ("Lock returned misaligned length %d, %d, alignment %d\n",
- len1, len2, hw->info.align + 1);
- goto fail;
- }
-
- if ((len1 + len2) - (hw->samples << hw->info.shift)) {
- dolog ("Lock returned incomplete length %d, %d\n",
- len1 + len2, hw->samples << hw->info.shift);
- goto fail;
- }
-
- audio_pcm_info_clear_buf (&hw->info, p1, hw->samples);
-
- fail:
- status = FSOUND_Sample_Unlock (fmd->fmod_sample, p1, p2, len1, len2);
- if (!status) {
- fmod_logerr ("Failed to unlock sample\n");
- }
-}
-
-static void fmod_write_sample (HWVoiceOut *hw, uint8_t *dst, int dst_len)
-{
- int src_len1 = dst_len;
- int src_len2 = 0;
- int pos = hw->rpos + dst_len;
- struct st_sample *src1 = hw->mix_buf + hw->rpos;
- struct st_sample *src2 = NULL;
-
- if (pos > hw->samples) {
- src_len1 = hw->samples - hw->rpos;
- src2 = hw->mix_buf;
- src_len2 = dst_len - src_len1;
- pos = src_len2;
- }
-
- if (src_len1) {
- hw->clip (dst, src1, src_len1);
- }
-
- if (src_len2) {
- dst = advance (dst, src_len1 << hw->info.shift);
- hw->clip (dst, src2, src_len2);
- }
-
- hw->rpos = pos % hw->samples;
-}
-
-static int fmod_unlock_sample (FSOUND_SAMPLE *sample, void *p1, void *p2,
- unsigned int blen1, unsigned int blen2)
-{
- int status = FSOUND_Sample_Unlock (sample, p1, p2, blen1, blen2);
- if (!status) {
- fmod_logerr ("Failed to unlock sample\n");
- return -1;
- }
- return 0;
-}
-
-static int fmod_lock_sample (
- FSOUND_SAMPLE *sample,
- struct audio_pcm_info *info,
- int pos,
- int len,
- void **p1,
- void **p2,
- unsigned int *blen1,
- unsigned int *blen2
- )
-{
- int status;
-
- status = FSOUND_Sample_Lock (
- sample,
- pos << info->shift,
- len << info->shift,
- p1,
- p2,
- blen1,
- blen2
- );
-
- if (!status) {
- fmod_logerr ("Failed to lock sample\n");
- return -1;
- }
-
- if ((*blen1 & info->align) || (*blen2 & info->align)) {
- dolog ("Lock returned misaligned length %d, %d, alignment %d\n",
- *blen1, *blen2, info->align + 1);
-
- fmod_unlock_sample (sample, *p1, *p2, *blen1, *blen2);
-
- *p1 = NULL - 1;
- *p2 = NULL - 1;
- *blen1 = ~0U;
- *blen2 = ~0U;
- return -1;
- }
-
- if (!*p1 && *blen1) {
- dolog ("warning: !p1 && blen1=%d\n", *blen1);
- *blen1 = 0;
- }
-
- if (!p2 && *blen2) {
- dolog ("warning: !p2 && blen2=%d\n", *blen2);
- *blen2 = 0;
- }
-
- return 0;
-}
-
-static int fmod_run_out (HWVoiceOut *hw, int live)
-{
- FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
- int decr;
- void *p1 = 0, *p2 = 0;
- unsigned int blen1 = 0, blen2 = 0;
- unsigned int len1 = 0, len2 = 0;
-
- if (!hw->pending_disable) {
- return 0;
- }
-
- decr = live;
-
- if (fmd->channel >= 0) {
- int len = decr;
- int old_pos = fmd->old_pos;
- int ppos = FSOUND_GetCurrentPosition (fmd->channel);
-
- if (ppos == old_pos || !ppos) {
- return 0;
- }
-
- if ((old_pos < ppos) && ((old_pos + len) > ppos)) {
- len = ppos - old_pos;
- }
- else {
- if ((old_pos > ppos) && ((old_pos + len) > (ppos + hw->samples))) {
- len = hw->samples - old_pos + ppos;
- }
- }
- decr = len;
-
- if (audio_bug (AUDIO_FUNC, decr < 0)) {
- dolog ("decr=%d live=%d ppos=%d old_pos=%d len=%d\n",
- decr, live, ppos, old_pos, len);
- return 0;
- }
- }
-
-
- if (!decr) {
- return 0;
- }
-
- if (fmod_lock_sample (fmd->fmod_sample, &fmd->hw.info,
- fmd->old_pos, decr,
- &p1, &p2,
- &blen1, &blen2)) {
- return 0;
- }
-
- len1 = blen1 >> hw->info.shift;
- len2 = blen2 >> hw->info.shift;
- ldebug ("%p %p %d %d %d %d\n", p1, p2, len1, len2, blen1, blen2);
- decr = len1 + len2;
-
- if (p1 && len1) {
- fmod_write_sample (hw, p1, len1);
- }
-
- if (p2 && len2) {
- fmod_write_sample (hw, p2, len2);
- }
-
- fmod_unlock_sample (fmd->fmod_sample, p1, p2, blen1, blen2);
-
- fmd->old_pos = (fmd->old_pos + decr) % hw->samples;
- return decr;
-}
-
-static int aud_to_fmodfmt (audfmt_e fmt, int stereo)
-{
- int mode = FSOUND_LOOP_NORMAL;
-
- switch (fmt) {
- case AUD_FMT_S8:
- mode |= FSOUND_SIGNED | FSOUND_8BITS;
- break;
-
- case AUD_FMT_U8:
- mode |= FSOUND_UNSIGNED | FSOUND_8BITS;
- break;
-
- case AUD_FMT_S16:
- mode |= FSOUND_SIGNED | FSOUND_16BITS;
- break;
-
- case AUD_FMT_U16:
- mode |= FSOUND_UNSIGNED | FSOUND_16BITS;
- break;
-
- default:
- dolog ("Internal logic error: Bad audio format %d\n", fmt);
-#ifdef DEBUG_FMOD
- abort ();
-#endif
- mode |= FSOUND_8BITS;
- }
- mode |= stereo ? FSOUND_STEREO : FSOUND_MONO;
- return mode;
-}
-
-static void fmod_fini_out (HWVoiceOut *hw)
-{
- FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
-
- if (fmd->fmod_sample) {
- FSOUND_Sample_Free (fmd->fmod_sample);
- fmd->fmod_sample = 0;
-
- if (fmd->channel >= 0) {
- FSOUND_StopSound (fmd->channel);
- }
- }
-}
-
-static int fmod_init_out (HWVoiceOut *hw, struct audsettings *as)
-{
- int mode, channel;
- FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
- struct audsettings obt_as = *as;
-
- mode = aud_to_fmodfmt (as->fmt, as->nchannels == 2 ? 1 : 0);
- fmd->fmod_sample = FSOUND_Sample_Alloc (
- FSOUND_FREE, /* index */
- conf.nb_samples, /* length */
- mode, /* mode */
- as->freq, /* freq */
- 255, /* volume */
- 128, /* pan */
- 255 /* priority */
- );
-
- if (!fmd->fmod_sample) {
- fmod_logerr2 ("DAC", "Failed to allocate FMOD sample\n");
- return -1;
- }
-
- channel = FSOUND_PlaySoundEx (FSOUND_FREE, fmd->fmod_sample, 0, 1);
- if (channel < 0) {
- fmod_logerr2 ("DAC", "Failed to start playing sound\n");
- FSOUND_Sample_Free (fmd->fmod_sample);
- return -1;
- }
- fmd->channel = channel;
-
- /* FMOD always operates on little endian frames? */
- obt_as.endianness = 0;
- audio_pcm_init_info (&hw->info, &obt_as);
- hw->samples = conf.nb_samples;
- return 0;
-}
-
-static int fmod_ctl_out (HWVoiceOut *hw, int cmd, ...)
-{
- int status;
- FMODVoiceOut *fmd = (FMODVoiceOut *) hw;
-
- switch (cmd) {
- case VOICE_ENABLE:
- fmod_clear_sample (fmd);
- status = FSOUND_SetPaused (fmd->channel, 0);
- if (!status) {
- fmod_logerr ("Failed to resume channel %d\n", fmd->channel);
- }
- break;
-
- case VOICE_DISABLE:
- status = FSOUND_SetPaused (fmd->channel, 1);
- if (!status) {
- fmod_logerr ("Failed to pause channel %d\n", fmd->channel);
- }
- break;
- }
- return 0;
-}
-
-static int fmod_init_in (HWVoiceIn *hw, struct audsettings *as)
-{
- int mode;
- FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
- struct audsettings obt_as = *as;
-
- if (conf.broken_adc) {
- return -1;
- }
-
- mode = aud_to_fmodfmt (as->fmt, as->nchannels == 2 ? 1 : 0);
- fmd->fmod_sample = FSOUND_Sample_Alloc (
- FSOUND_FREE, /* index */
- conf.nb_samples, /* length */
- mode, /* mode */
- as->freq, /* freq */
- 255, /* volume */
- 128, /* pan */
- 255 /* priority */
- );
-
- if (!fmd->fmod_sample) {
- fmod_logerr2 ("ADC", "Failed to allocate FMOD sample\n");
- return -1;
- }
-
- /* FMOD always operates on little endian frames? */
- obt_as.endianness = 0;
- audio_pcm_init_info (&hw->info, &obt_as);
- hw->samples = conf.nb_samples;
- return 0;
-}
-
-static void fmod_fini_in (HWVoiceIn *hw)
-{
- FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
-
- if (fmd->fmod_sample) {
- FSOUND_Record_Stop ();
- FSOUND_Sample_Free (fmd->fmod_sample);
- fmd->fmod_sample = 0;
- }
-}
-
-static int fmod_run_in (HWVoiceIn *hw)
-{
- FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
- int hwshift = hw->info.shift;
- int live, dead, new_pos, len;
- unsigned int blen1 = 0, blen2 = 0;
- unsigned int len1, len2;
- unsigned int decr;
- void *p1, *p2;
-
- live = audio_pcm_hw_get_live_in (hw);
- dead = hw->samples - live;
- if (!dead) {
- return 0;
- }
-
- new_pos = FSOUND_Record_GetPosition ();
- if (new_pos < 0) {
- fmod_logerr ("Could not get recording position\n");
- return 0;
- }
-
- len = audio_ring_dist (new_pos, hw->wpos, hw->samples);
- if (!len) {
- return 0;
- }
- len = audio_MIN (len, dead);
-
- if (fmod_lock_sample (fmd->fmod_sample, &fmd->hw.info,
- hw->wpos, len,
- &p1, &p2,
- &blen1, &blen2)) {
- return 0;
- }
-
- len1 = blen1 >> hwshift;
- len2 = blen2 >> hwshift;
- decr = len1 + len2;
-
- if (p1 && blen1) {
- hw->conv (hw->conv_buf + hw->wpos, p1, len1);
- }
- if (p2 && len2) {
- hw->conv (hw->conv_buf, p2, len2);
- }
-
- fmod_unlock_sample (fmd->fmod_sample, p1, p2, blen1, blen2);
- hw->wpos = (hw->wpos + decr) % hw->samples;
- return decr;
-}
-
-static struct {
- const char *name;
- int type;
-} drvtab[] = {
- { .name = "none", .type = FSOUND_OUTPUT_NOSOUND },
-#ifdef _WIN32
- { .name = "winmm", .type = FSOUND_OUTPUT_WINMM },
- { .name = "dsound", .type = FSOUND_OUTPUT_DSOUND },
- { .name = "a3d", .type = FSOUND_OUTPUT_A3D },
- { .name = "asio", .type = FSOUND_OUTPUT_ASIO },
-#endif
-#ifdef __linux__
- { .name = "oss", .type = FSOUND_OUTPUT_OSS },
- { .name = "alsa", .type = FSOUND_OUTPUT_ALSA },
- { .name = "esd", .type = FSOUND_OUTPUT_ESD },
-#endif
-#ifdef __APPLE__
- { .name = "mac", .type = FSOUND_OUTPUT_MAC },
-#endif
-#if 0
- { .name = "xbox", .type = FSOUND_OUTPUT_XBOX },
- { .name = "ps2", .type = FSOUND_OUTPUT_PS2 },
- { .name = "gcube", .type = FSOUND_OUTPUT_GC },
-#endif
- { .name = "none-realtime", .type = FSOUND_OUTPUT_NOSOUND_NONREALTIME }
-};
-
-static void *fmod_audio_init (void)
-{
- size_t i;
- double ver;
- int status;
- int output_type = -1;
- const char *drv = conf.drvname;
-
- ver = FSOUND_GetVersion ();
- if (ver < FMOD_VERSION) {
- dolog ("Wrong FMOD version %f, need at least %f\n", ver, FMOD_VERSION);
- return NULL;
- }
-
-#ifdef __linux__
- if (ver < 3.75) {
- dolog ("FMOD before 3.75 has bug preventing ADC from working\n"
- "ADC will be disabled.\n");
- conf.broken_adc = 1;
- }
-#endif
-
- if (drv) {
- int found = 0;
- for (i = 0; i < ARRAY_SIZE (drvtab); i++) {
- if (!strcmp (drv, drvtab[i].name)) {
- output_type = drvtab[i].type;
- found = 1;
- break;
- }
- }
- if (!found) {
- dolog ("Unknown FMOD driver `%s'\n", drv);
- dolog ("Valid drivers:\n");
- for (i = 0; i < ARRAY_SIZE (drvtab); i++) {
- dolog (" %s\n", drvtab[i].name);
- }
- }
- }
-
- if (output_type != -1) {
- status = FSOUND_SetOutput (output_type);
- if (!status) {
- fmod_logerr ("FSOUND_SetOutput(%d) failed\n", output_type);
- return NULL;
- }
- }
-
- if (conf.bufsize) {
- status = FSOUND_SetBufferSize (conf.bufsize);
- if (!status) {
- fmod_logerr ("FSOUND_SetBufferSize (%d) failed\n", conf.bufsize);
- }
- }
-
- status = FSOUND_Init (conf.freq, conf.nb_channels, 0);
- if (!status) {
- fmod_logerr ("FSOUND_Init failed\n");
- return NULL;
- }
-
- return &conf;
-}
-
-static int fmod_read (SWVoiceIn *sw, void *buf, int size)
-{
- return audio_pcm_sw_read (sw, buf, size);
-}
-
-static int fmod_ctl_in (HWVoiceIn *hw, int cmd, ...)
-{
- int status;
- FMODVoiceIn *fmd = (FMODVoiceIn *) hw;
-
- switch (cmd) {
- case VOICE_ENABLE:
- status = FSOUND_Record_StartSample (fmd->fmod_sample, 1);
- if (!status) {
- fmod_logerr ("Failed to start recording\n");
- }
- break;
-
- case VOICE_DISABLE:
- status = FSOUND_Record_Stop ();
- if (!status) {
- fmod_logerr ("Failed to stop recording\n");
- }
- break;
- }
- return 0;
-}
-
-static void fmod_audio_fini (void *opaque)
-{
- (void) opaque;
- FSOUND_Close ();
-}
-
-static struct audio_option fmod_options[] = {
- {
- .name = "DRV",
- .tag = AUD_OPT_STR,
- .valp = &conf.drvname,
- .descr = "FMOD driver"
- },
- {
- .name = "FREQ",
- .tag = AUD_OPT_INT,
- .valp = &conf.freq,
- .descr = "Default frequency"
- },
- {
- .name = "SAMPLES",
- .tag = AUD_OPT_INT,
- .valp = &conf.nb_samples,
- .descr = "Buffer size in samples"
- },
- {
- .name = "CHANNELS",
- .tag = AUD_OPT_INT,
- .valp = &conf.nb_channels,
- .descr = "Number of default channels (1 - mono, 2 - stereo)"
- },
- {
- .name = "BUFSIZE",
- .tag = AUD_OPT_INT,
- .valp = &conf.bufsize,
- .descr = "(undocumented)"
- },
- { /* End of list */ }
-};
-
-static struct audio_pcm_ops fmod_pcm_ops = {
- .init_out = fmod_init_out,
- .fini_out = fmod_fini_out,
- .run_out = fmod_run_out,
- .write = fmod_write,
- .ctl_out = fmod_ctl_out,
-
- .init_in = fmod_init_in,
- .fini_in = fmod_fini_in,
- .run_in = fmod_run_in,
- .read = fmod_read,
- .ctl_in = fmod_ctl_in
-};
-
-struct audio_driver fmod_audio_driver = {
- .name = "fmod",
- .descr = "FMOD 3.xx http://www.fmod.org",
- .options = fmod_options,
- .init = fmod_audio_init,
- .fini = fmod_audio_fini,
- .pcm_ops = &fmod_pcm_ops,
- .can_be_default = 1,
- .max_voices_out = INT_MAX,
- .max_voices_in = INT_MAX,
- .voice_size_out = sizeof (FMODVoiceOut),
- .voice_size_in = sizeof (FMODVoiceIn)
-};
diff --git a/audio/mixeng.c b/audio/mixeng.c
index 0e4976f..66c0328 100644
--- a/audio/mixeng.c
+++ b/audio/mixeng.c
@@ -22,7 +22,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
+#include "qemu/bswap.h"
#include "audio.h"
#define AUDIO_CAP "mixeng"
@@ -269,7 +271,7 @@
* August 21, 1998
* Copyright 1998 Fabrice Bellard.
*
- * [Rewrote completly the code of Lance Norskog And Sundry
+ * [Rewrote completely the code of Lance Norskog And Sundry
* Contributors with a more efficient algorithm.]
*
* This source code is freely redistributable and may be used for
diff --git a/audio/mixeng.h b/audio/mixeng.h
index 9de443b..b53a5ef 100644
--- a/audio/mixeng.h
+++ b/audio/mixeng.h
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+
#ifndef QEMU_MIXENG_H
#define QEMU_MIXENG_H
@@ -48,4 +49,4 @@
void mixeng_clear (struct st_sample *buf, int len);
void mixeng_volume (struct st_sample *buf, int len, struct mixeng_volume *vol);
-#endif /* mixeng.h */
+#endif /* QEMU_MIXENG_H */
diff --git a/audio/noaudio.c b/audio/noaudio.c
index cb38662..9ca9eaf 100644
--- a/audio/noaudio.c
+++ b/audio/noaudio.c
@@ -21,7 +21,9 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
+#include "qemu/host-utils.h"
#include "audio.h"
#include "qemu/timer.h"
@@ -48,8 +50,8 @@
now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
ticks = now - no->old_ticks;
- bytes = muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
- bytes = audio_MIN (bytes, INT_MAX);
+ bytes = muldiv64(ticks, hw->info.bytes_per_second, NANOSECONDS_PER_SECOND);
+ bytes = audio_MIN(bytes, INT_MAX);
samples = bytes >> hw->info.shift;
no->old_ticks = now;
@@ -60,10 +62,10 @@
static int no_write (SWVoiceOut *sw, void *buf, int len)
{
- return audio_pcm_sw_write (sw, buf, len);
+ return audio_pcm_sw_write(sw, buf, len);
}
-static int no_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int no_init_out(HWVoiceOut *hw, struct audsettings *as, void *drv_opaque)
{
audio_pcm_init_info (&hw->info, as);
hw->samples = 1024;
@@ -82,7 +84,7 @@
return 0;
}
-static int no_init_in (HWVoiceIn *hw, struct audsettings *as)
+static int no_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
{
audio_pcm_init_info (&hw->info, as);
hw->samples = 1024;
@@ -105,7 +107,7 @@
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
int64_t ticks = now - no->old_ticks;
int64_t bytes =
- muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
+ muldiv64(ticks, hw->info.bytes_per_second, NANOSECONDS_PER_SECOND);
no->old_ticks = now;
bytes = audio_MIN (bytes, INT_MAX);
diff --git a/audio/ossaudio.c b/audio/ossaudio.c
index 4db2ca6..0edd7ea 100644
--- a/audio/ossaudio.c
+++ b/audio/ossaudio.c
@@ -21,15 +21,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/types.h>
+#include "qemu/osdep.h"
#include <sys/ioctl.h>
#include <sys/soundcard.h>
#include "qemu-common.h"
#include "qemu/main-loop.h"
#include "qemu/host-utils.h"
#include "audio.h"
+#include "trace.h"
#define AUDIO_CAP "oss"
#include "audio_int.h"
@@ -38,6 +37,16 @@
#define USE_DSP_POLICY
#endif
+typedef struct OSSConf {
+ int try_mmap;
+ int nfrags;
+ int fragsize;
+ const char *devpath_out;
+ const char *devpath_in;
+ int exclusive;
+ int policy;
+} OSSConf;
+
typedef struct OSSVoiceOut {
HWVoiceOut hw;
void *pcm_buf;
@@ -47,6 +56,7 @@
int fragsize;
int mmapped;
int pending;
+ OSSConf *conf;
} OSSVoiceOut;
typedef struct OSSVoiceIn {
@@ -55,28 +65,9 @@
int fd;
int nfrags;
int fragsize;
+ OSSConf *conf;
} OSSVoiceIn;
-static struct {
- int try_mmap;
- int nfrags;
- int fragsize;
- const char *devpath_out;
- const char *devpath_in;
- int debug;
- int exclusive;
- int policy;
-} conf = {
- .try_mmap = 0,
- .nfrags = 4,
- .fragsize = 4096,
- .devpath_out = "/dev/dsp",
- .devpath_in = "/dev/dsp",
- .debug = 0,
- .exclusive = 0,
- .policy = 5
-};
-
struct oss_params {
int freq;
audfmt_e fmt;
@@ -138,18 +129,18 @@
audio_run ("oss_poll_in");
}
-static int oss_poll_out (HWVoiceOut *hw)
+static void oss_poll_out (HWVoiceOut *hw)
{
OSSVoiceOut *oss = (OSSVoiceOut *) hw;
- return qemu_set_fd_handler (oss->fd, NULL, oss_helper_poll_out, NULL);
+ qemu_set_fd_handler (oss->fd, NULL, oss_helper_poll_out, NULL);
}
-static int oss_poll_in (HWVoiceIn *hw)
+static void oss_poll_in (HWVoiceIn *hw)
{
OSSVoiceIn *oss = (OSSVoiceIn *) hw;
- return qemu_set_fd_handler (oss->fd, oss_helper_poll_in, NULL, NULL);
+ qemu_set_fd_handler (oss->fd, oss_helper_poll_in, NULL, NULL);
}
static int oss_write (SWVoiceOut *sw, void *buf, int len)
@@ -272,18 +263,18 @@
#endif
static int oss_open (int in, struct oss_params *req,
- struct oss_params *obt, int *pfd)
+ struct oss_params *obt, int *pfd, OSSConf* conf)
{
int fd;
- int oflags = conf.exclusive ? O_EXCL : 0;
+ int oflags = conf->exclusive ? O_EXCL : 0;
audio_buf_info abinfo;
int fmt, freq, nchannels;
int setfragment = 1;
- const char *dspname = in ? conf.devpath_in : conf.devpath_out;
+ const char *dspname = in ? conf->devpath_in : conf->devpath_out;
const char *typ = in ? "ADC" : "DAC";
/* Kludge needed to have working mmap on Linux */
- oflags |= conf.try_mmap ? O_RDWR : (in ? O_RDONLY : O_WRONLY);
+ oflags |= conf->try_mmap ? O_RDWR : (in ? O_RDONLY : O_WRONLY);
fd = open (dspname, oflags | O_NONBLOCK);
if (-1 == fd) {
@@ -317,20 +308,18 @@
}
#ifdef USE_DSP_POLICY
- if (conf.policy >= 0) {
+ if (conf->policy >= 0) {
int version;
if (!oss_get_version (fd, &version, typ)) {
- if (conf.debug) {
- dolog ("OSS version = %#x\n", version);
- }
+ trace_oss_version(version);
if (version >= 0x040000) {
- int policy = conf.policy;
+ int policy = conf->policy;
if (ioctl (fd, SNDCTL_DSP_POLICY, &policy)) {
oss_logerr2 (errno, typ,
"Failed to set timing policy to %d\n",
- conf.policy);
+ conf->policy);
goto err;
}
setfragment = 0;
@@ -458,19 +447,12 @@
}
if (abinfo.bytes > bufsize) {
- if (conf.debug) {
- dolog ("warning: Invalid available size, size=%d bufsize=%d\n"
- "please report your OS/audio hw to av1474@comtv.ru\n",
- abinfo.bytes, bufsize);
- }
+ trace_oss_invalid_available_size(abinfo.bytes, bufsize);
abinfo.bytes = bufsize;
}
if (abinfo.bytes < 0) {
- if (conf.debug) {
- dolog ("warning: Invalid available size, size=%d bufsize=%d\n",
- abinfo.bytes, bufsize);
- }
+ trace_oss_invalid_available_size(abinfo.bytes, bufsize);
return 0;
}
@@ -510,7 +492,8 @@
}
}
-static int oss_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int oss_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
OSSVoiceOut *oss = (OSSVoiceOut *) hw;
struct oss_params req, obt;
@@ -519,16 +502,17 @@
int fd;
audfmt_e effective_fmt;
struct audsettings obt_as;
+ OSSConf *conf = drv_opaque;
oss->fd = -1;
req.fmt = aud_to_ossfmt (as->fmt, as->endianness);
req.freq = as->freq;
req.nchannels = as->nchannels;
- req.fragsize = conf.fragsize;
- req.nfrags = conf.nfrags;
+ req.fragsize = conf->fragsize;
+ req.nfrags = conf->nfrags;
- if (oss_open (0, &req, &obt, &fd)) {
+ if (oss_open (0, &req, &obt, &fd, conf)) {
return -1;
}
@@ -555,7 +539,7 @@
hw->samples = (obt.nfrags * obt.fragsize) >> hw->info.shift;
oss->mmapped = 0;
- if (conf.try_mmap) {
+ if (conf->try_mmap) {
oss->pcm_buf = mmap (
NULL,
hw->samples << hw->info.shift,
@@ -615,6 +599,7 @@
}
oss->fd = fd;
+ oss->conf = conf;
return 0;
}
@@ -634,7 +619,8 @@
va_end (ap);
ldebug ("enabling voice\n");
- if (poll_mode && oss_poll_out (hw)) {
+ if (poll_mode) {
+ oss_poll_out (hw);
poll_mode = 0;
}
hw->poll_mode = poll_mode;
@@ -676,7 +662,7 @@
return 0;
}
-static int oss_init_in (HWVoiceIn *hw, struct audsettings *as)
+static int oss_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
{
OSSVoiceIn *oss = (OSSVoiceIn *) hw;
struct oss_params req, obt;
@@ -685,15 +671,16 @@
int fd;
audfmt_e effective_fmt;
struct audsettings obt_as;
+ OSSConf *conf = drv_opaque;
oss->fd = -1;
req.fmt = aud_to_ossfmt (as->fmt, as->endianness);
req.freq = as->freq;
req.nchannels = as->nchannels;
- req.fragsize = conf.fragsize;
- req.nfrags = conf.nfrags;
- if (oss_open (1, &req, &obt, &fd)) {
+ req.fragsize = conf->fragsize;
+ req.nfrags = conf->nfrags;
+ if (oss_open (1, &req, &obt, &fd, conf)) {
return -1;
}
@@ -727,6 +714,7 @@
}
oss->fd = fd;
+ oss->conf = conf;
return 0;
}
@@ -828,7 +816,8 @@
poll_mode = va_arg (ap, int);
va_end (ap);
- if (poll_mode && oss_poll_in (hw)) {
+ if (poll_mode) {
+ oss_poll_in (hw);
poll_mode = 0;
}
hw->poll_mode = poll_mode;
@@ -845,71 +834,79 @@
return 0;
}
+static OSSConf glob_conf = {
+ .try_mmap = 0,
+ .nfrags = 4,
+ .fragsize = 4096,
+ .devpath_out = "/dev/dsp",
+ .devpath_in = "/dev/dsp",
+ .exclusive = 0,
+ .policy = 5
+};
+
static void *oss_audio_init (void)
{
- if (access(conf.devpath_in, R_OK | W_OK) < 0 ||
- access(conf.devpath_out, R_OK | W_OK) < 0) {
+ OSSConf *conf = g_malloc(sizeof(OSSConf));
+ *conf = glob_conf;
+
+ if (access(conf->devpath_in, R_OK | W_OK) < 0 ||
+ access(conf->devpath_out, R_OK | W_OK) < 0) {
+ g_free(conf);
return NULL;
}
- return &conf;
+ return conf;
}
static void oss_audio_fini (void *opaque)
{
- (void) opaque;
+ g_free(opaque);
}
static struct audio_option oss_options[] = {
{
.name = "FRAGSIZE",
.tag = AUD_OPT_INT,
- .valp = &conf.fragsize,
+ .valp = &glob_conf.fragsize,
.descr = "Fragment size in bytes"
},
{
.name = "NFRAGS",
.tag = AUD_OPT_INT,
- .valp = &conf.nfrags,
+ .valp = &glob_conf.nfrags,
.descr = "Number of fragments"
},
{
.name = "MMAP",
.tag = AUD_OPT_BOOL,
- .valp = &conf.try_mmap,
+ .valp = &glob_conf.try_mmap,
.descr = "Try using memory mapped access"
},
{
.name = "DAC_DEV",
.tag = AUD_OPT_STR,
- .valp = &conf.devpath_out,
+ .valp = &glob_conf.devpath_out,
.descr = "Path to DAC device"
},
{
.name = "ADC_DEV",
.tag = AUD_OPT_STR,
- .valp = &conf.devpath_in,
+ .valp = &glob_conf.devpath_in,
.descr = "Path to ADC device"
},
{
.name = "EXCLUSIVE",
.tag = AUD_OPT_BOOL,
- .valp = &conf.exclusive,
- .descr = "Open device in exclusive mode (vmix wont work)"
+ .valp = &glob_conf.exclusive,
+ .descr = "Open device in exclusive mode (vmix won't work)"
},
#ifdef USE_DSP_POLICY
{
.name = "POLICY",
.tag = AUD_OPT_INT,
- .valp = &conf.policy,
+ .valp = &glob_conf.policy,
.descr = "Set the timing policy of the device, -1 to use fragment mode",
},
#endif
- {
- .name = "DEBUG",
- .tag = AUD_OPT_BOOL,
- .valp = &conf.debug,
- .descr = "Turn on some debugging messages"
- },
{ /* End of list */ }
};
diff --git a/audio/paaudio.c b/audio/paaudio.c
index 887a5db..b69dc2d 100644
--- a/audio/paaudio.c
+++ b/audio/paaudio.c
@@ -1,4 +1,5 @@
/* public domain */
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "audio.h"
@@ -9,6 +10,19 @@
#include "audio_pt_int.h"
typedef struct {
+ int samples;
+ char *server;
+ char *sink;
+ char *source;
+} PAConf;
+
+typedef struct {
+ PAConf conf;
+ pa_threaded_mainloop *mainloop;
+ pa_context *context;
+} paaudio;
+
+typedef struct {
HWVoiceOut hw;
int done;
int live;
@@ -17,6 +31,7 @@
pa_stream *stream;
void *pcm_buf;
struct audio_pt pt;
+ paaudio *g;
} PAVoiceOut;
typedef struct {
@@ -30,20 +45,10 @@
struct audio_pt pt;
const void *read_data;
size_t read_index, read_length;
+ paaudio *g;
} PAVoiceIn;
-typedef struct {
- int samples;
- char *server;
- char *sink;
- char *source;
- pa_threaded_mainloop *mainloop;
- pa_context *context;
-} paaudio;
-
-static paaudio glob_paaudio = {
- .samples = 4096,
-};
+static void qpa_audio_fini(void *opaque);
static void GCC_FMT_ATTR (2, 3) qpa_logerr (int err, const char *fmt, ...)
{
@@ -106,7 +111,7 @@
static int qpa_simple_read (PAVoiceIn *p, void *data, size_t length, int *rerror)
{
- paaudio *g = &glob_paaudio;
+ paaudio *g = p->g;
pa_threaded_mainloop_lock (g->mainloop);
@@ -160,7 +165,7 @@
static int qpa_simple_write (PAVoiceOut *p, const void *data, size_t length, int *rerror)
{
- paaudio *g = &glob_paaudio;
+ paaudio *g = p->g;
pa_threaded_mainloop_lock (g->mainloop);
@@ -222,7 +227,7 @@
}
}
- decr = to_mix = audio_MIN (pa->live, glob_paaudio.samples >> 2);
+ decr = to_mix = audio_MIN (pa->live, pa->g->conf.samples >> 2);
rpos = pa->rpos;
if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) {
@@ -314,7 +319,7 @@
}
}
- incr = to_grab = audio_MIN (pa->dead, glob_paaudio.samples >> 2);
+ incr = to_grab = audio_MIN (pa->dead, pa->g->conf.samples >> 2);
wpos = pa->wpos;
if (audio_pt_unlock (&pa->pt, AUDIO_FUNC)) {
@@ -430,7 +435,7 @@
static void context_state_cb (pa_context *c, void *userdata)
{
- paaudio *g = &glob_paaudio;
+ paaudio *g = userdata;
switch (pa_context_get_state(c)) {
case PA_CONTEXT_READY:
@@ -449,7 +454,7 @@
static void stream_state_cb (pa_stream *s, void * userdata)
{
- paaudio *g = &glob_paaudio;
+ paaudio *g = userdata;
switch (pa_stream_get_state (s)) {
@@ -467,23 +472,21 @@
static void stream_request_cb (pa_stream *s, size_t length, void *userdata)
{
- paaudio *g = &glob_paaudio;
+ paaudio *g = userdata;
pa_threaded_mainloop_signal (g->mainloop, 0);
}
static pa_stream *qpa_simple_new (
- const char *server,
+ paaudio *g,
const char *name,
pa_stream_direction_t dir,
const char *dev,
- const char *stream_name,
const pa_sample_spec *ss,
const pa_channel_map *map,
const pa_buffer_attr *attr,
int *rerror)
{
- paaudio *g = &glob_paaudio;
int r;
pa_stream *stream;
@@ -535,13 +538,15 @@
return NULL;
}
-static int qpa_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int qpa_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
int error;
- static pa_sample_spec ss;
- static pa_buffer_attr ba;
+ pa_sample_spec ss;
+ pa_buffer_attr ba;
struct audsettings obt_as = *as;
PAVoiceOut *pa = (PAVoiceOut *) hw;
+ paaudio *g = pa->g = drv_opaque;
ss.format = audfmt_to_pa (as->fmt, as->endianness);
ss.channels = as->nchannels;
@@ -559,11 +564,10 @@
obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness);
pa->stream = qpa_simple_new (
- glob_paaudio.server,
+ g,
"qemu",
PA_STREAM_PLAYBACK,
- glob_paaudio.sink,
- "pcm.playback",
+ g->conf.sink,
&ss,
NULL, /* channel map */
&ba, /* buffering attributes */
@@ -575,7 +579,7 @@
}
audio_pcm_init_info (&hw->info, &obt_as);
- hw->samples = glob_paaudio.samples;
+ hw->samples = g->conf.samples;
pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
pa->rpos = hw->rpos;
if (!pa->pcm_buf) {
@@ -602,13 +606,14 @@
return -1;
}
-static int qpa_init_in (HWVoiceIn *hw, struct audsettings *as)
+static int qpa_init_in(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
{
int error;
- static pa_sample_spec ss;
- static pa_buffer_attr ba;
+ pa_sample_spec ss;
+ pa_buffer_attr ba;
struct audsettings obt_as = *as;
PAVoiceIn *pa = (PAVoiceIn *) hw;
+ paaudio *g = pa->g = drv_opaque;
ss.format = audfmt_to_pa (as->fmt, as->endianness);
ss.channels = as->nchannels;
@@ -624,14 +629,13 @@
obt_as.fmt = pa_to_audfmt (ss.format, &obt_as.endianness);
pa->stream = qpa_simple_new (
- glob_paaudio.server,
+ g,
"qemu",
PA_STREAM_RECORD,
- glob_paaudio.source,
- "pcm.capture",
+ g->conf.source,
&ss,
NULL, /* channel map */
- &ba, /* buffering attributes */
+ &ba, /* buffering attributes */
&error
);
if (!pa->stream) {
@@ -640,7 +644,7 @@
}
audio_pcm_init_info (&hw->info, &obt_as);
- hw->samples = glob_paaudio.samples;
+ hw->samples = g->conf.samples;
pa->pcm_buf = audio_calloc (AUDIO_FUNC, hw->samples, 1 << hw->info.shift);
pa->wpos = hw->wpos;
if (!pa->pcm_buf) {
@@ -713,7 +717,7 @@
PAVoiceOut *pa = (PAVoiceOut *) hw;
pa_operation *op;
pa_cvolume v;
- paaudio *g = &glob_paaudio;
+ paaudio *g = pa->g;
#ifdef PA_CHECK_VERSION /* macro is present in 0.9.16+ */
pa_cvolume_init (&v); /* function is present in 0.9.13+ */
@@ -765,7 +769,7 @@
PAVoiceIn *pa = (PAVoiceIn *) hw;
pa_operation *op;
pa_cvolume v;
- paaudio *g = &glob_paaudio;
+ paaudio *g = pa->g;
#ifdef PA_CHECK_VERSION
pa_cvolume_init (&v);
@@ -837,23 +841,31 @@
}
/* common */
+static PAConf glob_conf = {
+ .samples = 4096,
+};
+
static void *qpa_audio_init (void)
{
- paaudio *g = &glob_paaudio;
+ paaudio *g = g_malloc(sizeof(paaudio));
+ g->conf = glob_conf;
+ g->mainloop = NULL;
+ g->context = NULL;
g->mainloop = pa_threaded_mainloop_new ();
if (!g->mainloop) {
goto fail;
}
- g->context = pa_context_new (pa_threaded_mainloop_get_api (g->mainloop), glob_paaudio.server);
+ g->context = pa_context_new (pa_threaded_mainloop_get_api (g->mainloop),
+ g->conf.server);
if (!g->context) {
goto fail;
}
pa_context_set_state_callback (g->context, context_state_cb, g);
- if (pa_context_connect (g->context, glob_paaudio.server, 0, NULL) < 0) {
+ if (pa_context_connect (g->context, g->conf.server, 0, NULL) < 0) {
qpa_logerr (pa_context_errno (g->context),
"pa_context_connect() failed\n");
goto fail;
@@ -886,12 +898,13 @@
pa_threaded_mainloop_unlock (g->mainloop);
- return &glob_paaudio;
+ return g;
unlock_and_fail:
pa_threaded_mainloop_unlock (g->mainloop);
fail:
AUD_log (AUDIO_CAP, "Failed to initialize PA context");
+ qpa_audio_fini(g);
return NULL;
}
@@ -906,39 +919,38 @@
if (g->context) {
pa_context_disconnect (g->context);
pa_context_unref (g->context);
- g->context = NULL;
}
if (g->mainloop) {
pa_threaded_mainloop_free (g->mainloop);
}
- g->mainloop = NULL;
+ g_free(g);
}
struct audio_option qpa_options[] = {
{
.name = "SAMPLES",
.tag = AUD_OPT_INT,
- .valp = &glob_paaudio.samples,
+ .valp = &glob_conf.samples,
.descr = "buffer size in samples"
},
{
.name = "SERVER",
.tag = AUD_OPT_STR,
- .valp = &glob_paaudio.server,
+ .valp = &glob_conf.server,
.descr = "server address"
},
{
.name = "SINK",
.tag = AUD_OPT_STR,
- .valp = &glob_paaudio.sink,
+ .valp = &glob_conf.sink,
.descr = "sink device name"
},
{
.name = "SOURCE",
.tag = AUD_OPT_STR,
- .valp = &glob_paaudio.source,
+ .valp = &glob_conf.source,
.descr = "source device name"
},
{ /* End of list */ }
diff --git a/audio/sdlaudio.c b/audio/sdlaudio.c
index d24daa5..db69fe1 100644
--- a/audio/sdlaudio.c
+++ b/audio/sdlaudio.c
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include <SDL.h>
#include <SDL_thread.h>
#include "qemu-common.h"
@@ -55,6 +56,7 @@
SDL_mutex *mutex;
SDL_sem *sem;
int initialized;
+ bool driver_created;
} glob_sdl;
typedef struct SDLAudioState SDLAudioState;
@@ -332,7 +334,8 @@
sdl_close (&glob_sdl);
}
-static int sdl_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int sdl_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
SDLVoiceOut *sdl = (SDLVoiceOut *) hw;
SDLAudioState *s = &glob_sdl;
@@ -392,6 +395,10 @@
static void *sdl_audio_init (void)
{
SDLAudioState *s = &glob_sdl;
+ if (s->driver_created) {
+ sdl_logerr("Can't create multiple sdl backends\n");
+ return NULL;
+ }
if (SDL_InitSubSystem (SDL_INIT_AUDIO)) {
sdl_logerr ("SDL failed to initialize audio subsystem\n");
@@ -413,6 +420,7 @@
return NULL;
}
+ s->driver_created = true;
return s;
}
@@ -423,6 +431,7 @@
SDL_DestroySemaphore (s->sem);
SDL_DestroyMutex (s->mutex);
SDL_QuitSubSystem (SDL_INIT_AUDIO);
+ s->driver_created = false;
}
static struct audio_option sdl_options[] = {
diff --git a/audio/spiceaudio.c b/audio/spiceaudio.c
index 7b79bed..5580e76 100644
--- a/audio/spiceaudio.c
+++ b/audio/spiceaudio.c
@@ -17,7 +17,10 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "qemu/osdep.h"
#include "hw/hw.h"
+#include "qemu/host-utils.h"
+#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "ui/qemu-spice.h"
@@ -102,11 +105,11 @@
now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
ticks = now - rate->start_ticks;
- bytes = muldiv64 (ticks, info->bytes_per_second, get_ticks_per_sec ());
+ bytes = muldiv64(ticks, info->bytes_per_second, NANOSECONDS_PER_SECOND);
samples = (bytes - rate->bytes_sent) >> info->shift;
if (samples < 0 || samples > 65536) {
error_report("Resetting rate control (%" PRId64 " samples)", samples);
- rate_start (rate);
+ rate_start(rate);
samples = 0;
}
rate->bytes_sent += samples << info->shift;
@@ -115,7 +118,8 @@
/* playback */
-static int line_out_init (HWVoiceOut *hw, struct audsettings *as)
+static int line_out_init(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
SpiceVoiceOut *out = container_of (hw, SpiceVoiceOut, hw);
struct audsettings settings;
@@ -243,7 +247,7 @@
/* record */
-static int line_in_init (HWVoiceIn *hw, struct audsettings *as)
+static int line_in_init(HWVoiceIn *hw, struct audsettings *as, void *drv_opaque)
{
SpiceVoiceIn *in = container_of (hw, SpiceVoiceIn, hw);
struct audsettings settings;
diff --git a/audio/trace-events b/audio/trace-events
new file mode 100644
index 0000000..5173590
--- /dev/null
+++ b/audio/trace-events
@@ -0,0 +1,17 @@
+# See docs/tracing.txt for syntax documentation.
+
+# audio/alsaaudio.c
+alsa_revents(int revents) "revents = %d"
+alsa_pollout(int i, int fd) "i = %d fd = %d"
+alsa_set_handler(int events, int index, int fd, int err) "events=%#x index=%d fd=%d err=%d"
+alsa_wrote_zero(int len) "Failed to write %d frames (wrote zero)"
+alsa_read_zero(long len) "Failed to read %ld frames (read zero)"
+alsa_xrun_out(void) "Recovering from playback xrun"
+alsa_xrun_in(void) "Recovering from capture xrun"
+alsa_resume_out(void) "Resuming suspended output stream"
+alsa_resume_in(void) "Resuming suspended input stream"
+alsa_no_frames(int state) "No frames available and ALSA state is %d"
+
+# audio/ossaudio.c
+oss_version(int version) "OSS version = %#x"
+oss_invalid_available_size(int size, int bufsize) "Invalid available size, size=%d bufsize=%d"
diff --git a/audio/wavaudio.c b/audio/wavaudio.c
index 6846a1a..341eec3 100644
--- a/audio/wavaudio.c
+++ b/audio/wavaudio.c
@@ -21,7 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "hw/hw.h"
+#include "qemu/osdep.h"
+#include "qemu/host-utils.h"
#include "qemu/timer.h"
#include "audio.h"
@@ -36,15 +37,10 @@
int total_samples;
} WAVVoiceOut;
-static struct {
+typedef struct {
struct audsettings settings;
const char *wav_path;
-} conf = {
- .settings.freq = 44100,
- .settings.nchannels = 2,
- .settings.fmt = AUD_FMT_S16,
- .wav_path = "qemu.wav"
-};
+} WAVConf;
static int wav_run_out (HWVoiceOut *hw, int live)
{
@@ -55,7 +51,7 @@
int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
int64_t ticks = now - wav->old_ticks;
int64_t bytes =
- muldiv64 (ticks, hw->info.bytes_per_second, get_ticks_per_sec ());
+ muldiv64(ticks, hw->info.bytes_per_second, NANOSECONDS_PER_SECOND);
if (bytes > INT_MAX) {
samples = INT_MAX >> hw->info.shift;
@@ -105,7 +101,8 @@
}
}
-static int wav_init_out (HWVoiceOut *hw, struct audsettings *as)
+static int wav_init_out(HWVoiceOut *hw, struct audsettings *as,
+ void *drv_opaque)
{
WAVVoiceOut *wav = (WAVVoiceOut *) hw;
int bits16 = 0, stereo = 0;
@@ -115,9 +112,8 @@
0x02, 0x00, 0x44, 0xac, 0x00, 0x00, 0x10, 0xb1, 0x02, 0x00, 0x04,
0x00, 0x10, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00
};
- struct audsettings wav_as = conf.settings;
-
- (void) as;
+ WAVConf *conf = drv_opaque;
+ struct audsettings wav_as = conf->settings;
stereo = wav_as.nchannels == 2;
switch (wav_as.fmt) {
@@ -155,10 +151,10 @@
le_store (hdr + 28, hw->info.freq << (bits16 + stereo), 4);
le_store (hdr + 32, 1 << (bits16 + stereo), 2);
- wav->f = fopen (conf.wav_path, "wb");
+ wav->f = fopen (conf->wav_path, "wb");
if (!wav->f) {
dolog ("Failed to open wave file `%s'\nReason: %s\n",
- conf.wav_path, strerror (errno));
+ conf->wav_path, strerror (errno));
g_free (wav->pcm_buf);
wav->pcm_buf = NULL;
return -1;
@@ -226,40 +222,49 @@
return 0;
}
+static WAVConf glob_conf = {
+ .settings.freq = 44100,
+ .settings.nchannels = 2,
+ .settings.fmt = AUD_FMT_S16,
+ .wav_path = "qemu.wav"
+};
+
static void *wav_audio_init (void)
{
- return &conf;
+ WAVConf *conf = g_malloc(sizeof(WAVConf));
+ *conf = glob_conf;
+ return conf;
}
static void wav_audio_fini (void *opaque)
{
- (void) opaque;
ldebug ("wav_fini");
+ g_free(opaque);
}
static struct audio_option wav_options[] = {
{
.name = "FREQUENCY",
.tag = AUD_OPT_INT,
- .valp = &conf.settings.freq,
+ .valp = &glob_conf.settings.freq,
.descr = "Frequency"
},
{
.name = "FORMAT",
.tag = AUD_OPT_FMT,
- .valp = &conf.settings.fmt,
+ .valp = &glob_conf.settings.fmt,
.descr = "Format"
},
{
.name = "DAC_FIXED_CHANNELS",
.tag = AUD_OPT_INT,
- .valp = &conf.settings.nchannels,
+ .valp = &glob_conf.settings.nchannels,
.descr = "Number of channels (1 - mono, 2 - stereo)"
},
{
.name = "PATH",
.tag = AUD_OPT_STR,
- .valp = &conf.wav_path,
+ .valp = &glob_conf.wav_path,
.descr = "Path to wave file"
},
{ /* End of list */ }
diff --git a/audio/wavcapture.c b/audio/wavcapture.c
index 6f6d792..8bfb9e7 100644
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -1,5 +1,7 @@
+#include "qemu/osdep.h"
#include "hw/hw.h"
#include "monitor/monitor.h"
+#include "qemu/error-report.h"
#include "audio.h"
typedef struct {
diff --git a/audio/winaudio.c b/audio/winaudio.c
index 6aec129..63f923f 100644
--- a/audio/winaudio.c
+++ b/audio/winaudio.c
@@ -23,6 +23,7 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/sysemu.h"
#include "audio.h"
@@ -161,7 +162,7 @@
static int
-winaudio_out_init (HWVoiceOut *hw, struct audsettings *as)
+winaudio_out_init (HWVoiceOut *hw, struct audsettings *as, void* drv_opaque)
{
WinAudioOut* s = (WinAudioOut*) hw;
MMRESULT result;
@@ -296,14 +297,6 @@
played += wav_samples;
s->write_pos += wav_bytes;
if (s->write_pos == s->write_size) {
-#if DEBUG
- int64_t now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - start_time;
- int64_t diff = now - last_time;
-
- D("run_out: (%7.3f:%7d):waveOutWrite buffer:%d\n",
- now/1e9, (now-last_time)/1e9, s->write_index);
- last_time = now;
-#endif
waveOutWrite( s->waveout, wav_buffer, sizeof(*wav_buffer) );
s->write_pos = 0;
s->write_index += 1;
@@ -407,7 +400,7 @@
static int
-winaudio_in_init (HWVoiceIn *hw, struct audsettings *as)
+winaudio_in_init (HWVoiceIn *hw, struct audsettings *as, void* drv_opaque)
{
WinAudioIn* s = (WinAudioIn*) hw;
MMRESULT result;
diff --git a/audio/winwaveaudio.c b/audio/winwaveaudio.c
deleted file mode 100644
index 8dbd145..0000000
--- a/audio/winwaveaudio.c
+++ /dev/null
@@ -1,717 +0,0 @@
-/* public domain */
-
-#include "qemu-common.h"
-#include "sysemu/sysemu.h"
-#include "audio.h"
-
-#define AUDIO_CAP "winwave"
-#include "audio_int.h"
-
-#include <windows.h>
-#include <mmsystem.h>
-
-#include "audio_win_int.h"
-
-static struct {
- int dac_headers;
- int dac_samples;
- int adc_headers;
- int adc_samples;
-} conf = {
- .dac_headers = 4,
- .dac_samples = 1024,
- .adc_headers = 4,
- .adc_samples = 1024
-};
-
-typedef struct {
- HWVoiceOut hw;
- HWAVEOUT hwo;
- WAVEHDR *hdrs;
- HANDLE event;
- void *pcm_buf;
- int avail;
- int pending;
- int curhdr;
- int paused;
- CRITICAL_SECTION crit_sect;
-} WaveVoiceOut;
-
-typedef struct {
- HWVoiceIn hw;
- HWAVEIN hwi;
- WAVEHDR *hdrs;
- HANDLE event;
- void *pcm_buf;
- int curhdr;
- int paused;
- int rpos;
- int avail;
- CRITICAL_SECTION crit_sect;
-} WaveVoiceIn;
-
-static void winwave_log_mmresult (MMRESULT mr)
-{
- const char *str = "BUG";
-
- switch (mr) {
- case MMSYSERR_NOERROR:
- str = "Success";
- break;
-
- case MMSYSERR_INVALHANDLE:
- str = "Specified device handle is invalid";
- break;
-
- case MMSYSERR_BADDEVICEID:
- str = "Specified device id is out of range";
- break;
-
- case MMSYSERR_NODRIVER:
- str = "No device driver is present";
- break;
-
- case MMSYSERR_NOMEM:
- str = "Unable to allocate or lock memory";
- break;
-
- case WAVERR_SYNC:
- str = "Device is synchronous but waveOutOpen was called "
- "without using the WINWAVE_ALLOWSYNC flag";
- break;
-
- case WAVERR_UNPREPARED:
- str = "The data block pointed to by the pwh parameter "
- "hasn't been prepared";
- break;
-
- case WAVERR_STILLPLAYING:
- str = "There are still buffers in the queue";
- break;
-
- default:
- dolog ("Reason: Unknown (MMRESULT %#x)\n", mr);
- return;
- }
-
- dolog ("Reason: %s\n", str);
-}
-
-static void GCC_FMT_ATTR (2, 3) winwave_logerr (
- MMRESULT mr,
- const char *fmt,
- ...
- )
-{
- va_list ap;
-
- va_start (ap, fmt);
- AUD_vlog (AUDIO_CAP, fmt, ap);
- va_end (ap);
-
- AUD_log (NULL, " failed\n");
- winwave_log_mmresult (mr);
-}
-
-static void winwave_anal_close_out (WaveVoiceOut *wave)
-{
- MMRESULT mr;
-
- mr = waveOutClose (wave->hwo);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutClose");
- }
- wave->hwo = NULL;
-}
-
-static void CALLBACK winwave_callback_out (
- HWAVEOUT hwo,
- UINT msg,
- DWORD_PTR dwInstance,
- DWORD_PTR dwParam1,
- DWORD_PTR dwParam2
- )
-{
- WaveVoiceOut *wave = (WaveVoiceOut *) dwInstance;
-
- switch (msg) {
- case WOM_DONE:
- {
- WAVEHDR *h = (WAVEHDR *) dwParam1;
- if (!h->dwUser) {
- h->dwUser = 1;
- EnterCriticalSection (&wave->crit_sect);
- {
- wave->avail += conf.dac_samples;
- }
- LeaveCriticalSection (&wave->crit_sect);
- if (wave->hw.poll_mode) {
- if (!SetEvent (wave->event)) {
- dolog ("DAC SetEvent failed %lx\n", GetLastError ());
- }
- }
- }
- }
- break;
-
- case WOM_CLOSE:
- case WOM_OPEN:
- break;
-
- default:
- dolog ("unknown wave out callback msg %x\n", msg);
- }
-}
-
-static int winwave_init_out (HWVoiceOut *hw, struct audsettings *as)
-{
- int i;
- int err;
- MMRESULT mr;
- WAVEFORMATEX wfx;
- WaveVoiceOut *wave;
-
- wave = (WaveVoiceOut *) hw;
-
- InitializeCriticalSection (&wave->crit_sect);
-
- err = waveformat_from_audio_settings (&wfx, as);
- if (err) {
- goto err0;
- }
-
- mr = waveOutOpen (&wave->hwo, WAVE_MAPPER, &wfx,
- (DWORD_PTR) winwave_callback_out,
- (DWORD_PTR) wave, CALLBACK_FUNCTION);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutOpen");
- goto err1;
- }
-
- wave->hdrs = audio_calloc (AUDIO_FUNC, conf.dac_headers,
- sizeof (*wave->hdrs));
- if (!wave->hdrs) {
- goto err2;
- }
-
- audio_pcm_init_info (&hw->info, as);
- hw->samples = conf.dac_samples * conf.dac_headers;
- wave->avail = hw->samples;
-
- wave->pcm_buf = audio_calloc (AUDIO_FUNC, conf.dac_samples,
- conf.dac_headers << hw->info.shift);
- if (!wave->pcm_buf) {
- goto err3;
- }
-
- for (i = 0; i < conf.dac_headers; ++i) {
- WAVEHDR *h = &wave->hdrs[i];
-
- h->dwUser = 0;
- h->dwBufferLength = conf.dac_samples << hw->info.shift;
- h->lpData = advance (wave->pcm_buf, i * h->dwBufferLength);
- h->dwFlags = 0;
-
- mr = waveOutPrepareHeader (wave->hwo, h, sizeof (*h));
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutPrepareHeader(%d)", i);
- goto err4;
- }
- }
-
- return 0;
-
- err4:
- g_free (wave->pcm_buf);
- err3:
- g_free (wave->hdrs);
- err2:
- winwave_anal_close_out (wave);
- err1:
- err0:
- return -1;
-}
-
-static int winwave_write (SWVoiceOut *sw, void *buf, int len)
-{
- return audio_pcm_sw_write (sw, buf, len);
-}
-
-static int winwave_run_out (HWVoiceOut *hw, int live)
-{
- WaveVoiceOut *wave = (WaveVoiceOut *) hw;
- int decr;
- int doreset;
-
- EnterCriticalSection (&wave->crit_sect);
- {
- decr = audio_MIN (live, wave->avail);
- decr = audio_pcm_hw_clip_out (hw, wave->pcm_buf, decr, wave->pending);
- wave->pending += decr;
- wave->avail -= decr;
- }
- LeaveCriticalSection (&wave->crit_sect);
-
- doreset = hw->poll_mode && (wave->pending >= conf.dac_samples);
- if (doreset && !ResetEvent (wave->event)) {
- dolog ("DAC ResetEvent failed %lx\n", GetLastError ());
- }
-
- while (wave->pending >= conf.dac_samples) {
- MMRESULT mr;
- WAVEHDR *h = &wave->hdrs[wave->curhdr];
-
- h->dwUser = 0;
- mr = waveOutWrite (wave->hwo, h, sizeof (*h));
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutWrite(%d)", wave->curhdr);
- break;
- }
-
- wave->pending -= conf.dac_samples;
- wave->curhdr = (wave->curhdr + 1) % conf.dac_headers;
- }
-
- return decr;
-}
-
-static void winwave_poll (void *opaque)
-{
- (void) opaque;
- audio_run ("winwave_poll");
-}
-
-static void winwave_fini_out (HWVoiceOut *hw)
-{
- int i;
- MMRESULT mr;
- WaveVoiceOut *wave = (WaveVoiceOut *) hw;
-
- mr = waveOutReset (wave->hwo);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutReset");
- }
-
- for (i = 0; i < conf.dac_headers; ++i) {
- mr = waveOutUnprepareHeader (wave->hwo, &wave->hdrs[i],
- sizeof (wave->hdrs[i]));
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutUnprepareHeader(%d)", i);
- }
- }
-
- winwave_anal_close_out (wave);
-
- if (wave->event) {
- qemu_del_wait_object (wave->event, winwave_poll, wave);
- if (!CloseHandle (wave->event)) {
- dolog ("DAC CloseHandle failed %lx\n", GetLastError ());
- }
- wave->event = NULL;
- }
-
- g_free (wave->pcm_buf);
- wave->pcm_buf = NULL;
-
- g_free (wave->hdrs);
- wave->hdrs = NULL;
-}
-
-static int winwave_ctl_out (HWVoiceOut *hw, int cmd, ...)
-{
- MMRESULT mr;
- WaveVoiceOut *wave = (WaveVoiceOut *) hw;
-
- switch (cmd) {
- case VOICE_ENABLE:
- {
- va_list ap;
- int poll_mode;
-
- va_start (ap, cmd);
- poll_mode = va_arg (ap, int);
- va_end (ap);
-
- if (poll_mode && !wave->event) {
- wave->event = CreateEvent (NULL, TRUE, TRUE, NULL);
- if (!wave->event) {
- dolog ("DAC CreateEvent: %lx, poll mode will be disabled\n",
- GetLastError ());
- }
- }
-
- if (wave->event) {
- int ret;
-
- ret = qemu_add_wait_object (wave->event, winwave_poll, wave);
- hw->poll_mode = (ret == 0);
- }
- else {
- hw->poll_mode = 0;
- }
- wave->paused = 0;
- }
- return 0;
-
- case VOICE_DISABLE:
- if (!wave->paused) {
- mr = waveOutReset (wave->hwo);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveOutReset");
- }
- else {
- wave->paused = 1;
- }
- }
- if (wave->event) {
- qemu_del_wait_object (wave->event, winwave_poll, wave);
- }
- return 0;
- }
- return -1;
-}
-
-static void winwave_anal_close_in (WaveVoiceIn *wave)
-{
- MMRESULT mr;
-
- mr = waveInClose (wave->hwi);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInClose");
- }
- wave->hwi = NULL;
-}
-
-static void CALLBACK winwave_callback_in (
- HWAVEIN *hwi,
- UINT msg,
- DWORD_PTR dwInstance,
- DWORD_PTR dwParam1,
- DWORD_PTR dwParam2
- )
-{
- WaveVoiceIn *wave = (WaveVoiceIn *) dwInstance;
-
- switch (msg) {
- case WIM_DATA:
- {
- WAVEHDR *h = (WAVEHDR *) dwParam1;
- if (!h->dwUser) {
- h->dwUser = 1;
- EnterCriticalSection (&wave->crit_sect);
- {
- wave->avail += conf.adc_samples;
- }
- LeaveCriticalSection (&wave->crit_sect);
- if (wave->hw.poll_mode) {
- if (!SetEvent (wave->event)) {
- dolog ("ADC SetEvent failed %lx\n", GetLastError ());
- }
- }
- }
- }
- break;
-
- case WIM_CLOSE:
- case WIM_OPEN:
- break;
-
- default:
- dolog ("unknown wave in callback msg %x\n", msg);
- }
-}
-
-static void winwave_add_buffers (WaveVoiceIn *wave, int samples)
-{
- int doreset;
-
- doreset = wave->hw.poll_mode && (samples >= conf.adc_samples);
- if (doreset && !ResetEvent (wave->event)) {
- dolog ("ADC ResetEvent failed %lx\n", GetLastError ());
- }
-
- while (samples >= conf.adc_samples) {
- MMRESULT mr;
- WAVEHDR *h = &wave->hdrs[wave->curhdr];
-
- h->dwUser = 0;
- mr = waveInAddBuffer (wave->hwi, h, sizeof (*h));
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInAddBuffer(%d)", wave->curhdr);
- }
- wave->curhdr = (wave->curhdr + 1) % conf.adc_headers;
- samples -= conf.adc_samples;
- }
-}
-
-static int winwave_init_in (HWVoiceIn *hw, struct audsettings *as)
-{
- int i;
- int err;
- MMRESULT mr;
- WAVEFORMATEX wfx;
- WaveVoiceIn *wave;
-
- wave = (WaveVoiceIn *) hw;
-
- InitializeCriticalSection (&wave->crit_sect);
-
- err = waveformat_from_audio_settings (&wfx, as);
- if (err) {
- goto err0;
- }
-
- mr = waveInOpen (&wave->hwi, WAVE_MAPPER, &wfx,
- (DWORD_PTR) winwave_callback_in,
- (DWORD_PTR) wave, CALLBACK_FUNCTION);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInOpen");
- goto err1;
- }
-
- wave->hdrs = audio_calloc (AUDIO_FUNC, conf.dac_headers,
- sizeof (*wave->hdrs));
- if (!wave->hdrs) {
- goto err2;
- }
-
- audio_pcm_init_info (&hw->info, as);
- hw->samples = conf.adc_samples * conf.adc_headers;
- wave->avail = 0;
-
- wave->pcm_buf = audio_calloc (AUDIO_FUNC, conf.adc_samples,
- conf.adc_headers << hw->info.shift);
- if (!wave->pcm_buf) {
- goto err3;
- }
-
- for (i = 0; i < conf.adc_headers; ++i) {
- WAVEHDR *h = &wave->hdrs[i];
-
- h->dwUser = 0;
- h->dwBufferLength = conf.adc_samples << hw->info.shift;
- h->lpData = advance (wave->pcm_buf, i * h->dwBufferLength);
- h->dwFlags = 0;
-
- mr = waveInPrepareHeader (wave->hwi, h, sizeof (*h));
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInPrepareHeader(%d)", i);
- goto err4;
- }
- }
-
- wave->paused = 1;
- winwave_add_buffers (wave, hw->samples);
- return 0;
-
- err4:
- g_free (wave->pcm_buf);
- err3:
- g_free (wave->hdrs);
- err2:
- winwave_anal_close_in (wave);
- err1:
- err0:
- return -1;
-}
-
-static void winwave_fini_in (HWVoiceIn *hw)
-{
- int i;
- MMRESULT mr;
- WaveVoiceIn *wave = (WaveVoiceIn *) hw;
-
- mr = waveInReset (wave->hwi);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInReset");
- }
-
- for (i = 0; i < conf.adc_headers; ++i) {
- mr = waveInUnprepareHeader (wave->hwi, &wave->hdrs[i],
- sizeof (wave->hdrs[i]));
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInUnprepareHeader(%d)", i);
- }
- }
-
- winwave_anal_close_in (wave);
-
- if (wave->event) {
- qemu_del_wait_object (wave->event, winwave_poll, wave);
- if (!CloseHandle (wave->event)) {
- dolog ("ADC CloseHandle failed %lx\n", GetLastError ());
- }
- wave->event = NULL;
- }
-
- g_free (wave->pcm_buf);
- wave->pcm_buf = NULL;
-
- g_free (wave->hdrs);
- wave->hdrs = NULL;
-}
-
-static int winwave_run_in (HWVoiceIn *hw)
-{
- WaveVoiceIn *wave = (WaveVoiceIn *) hw;
- int live = audio_pcm_hw_get_live_in (hw);
- int dead = hw->samples - live;
- int decr, ret;
-
- if (!dead) {
- return 0;
- }
-
- EnterCriticalSection (&wave->crit_sect);
- {
- decr = audio_MIN (dead, wave->avail);
- wave->avail -= decr;
- }
- LeaveCriticalSection (&wave->crit_sect);
-
- ret = decr;
- while (decr) {
- int left = hw->samples - hw->wpos;
- int conv = audio_MIN (left, decr);
- hw->conv (hw->conv_buf + hw->wpos,
- advance (wave->pcm_buf, wave->rpos << hw->info.shift),
- conv);
-
- wave->rpos = (wave->rpos + conv) % hw->samples;
- hw->wpos = (hw->wpos + conv) % hw->samples;
- decr -= conv;
- }
-
- winwave_add_buffers (wave, ret);
- return ret;
-}
-
-static int winwave_read (SWVoiceIn *sw, void *buf, int size)
-{
- return audio_pcm_sw_read (sw, buf, size);
-}
-
-static int winwave_ctl_in (HWVoiceIn *hw, int cmd, ...)
-{
- MMRESULT mr;
- WaveVoiceIn *wave = (WaveVoiceIn *) hw;
-
- switch (cmd) {
- case VOICE_ENABLE:
- {
- va_list ap;
- int poll_mode;
-
- va_start (ap, cmd);
- poll_mode = va_arg (ap, int);
- va_end (ap);
-
- if (poll_mode && !wave->event) {
- wave->event = CreateEvent (NULL, TRUE, TRUE, NULL);
- if (!wave->event) {
- dolog ("ADC CreateEvent: %lx, poll mode will be disabled\n",
- GetLastError ());
- }
- }
-
- if (wave->event) {
- int ret;
-
- ret = qemu_add_wait_object (wave->event, winwave_poll, wave);
- hw->poll_mode = (ret == 0);
- }
- else {
- hw->poll_mode = 0;
- }
- if (wave->paused) {
- mr = waveInStart (wave->hwi);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInStart");
- }
- wave->paused = 0;
- }
- }
- return 0;
-
- case VOICE_DISABLE:
- if (!wave->paused) {
- mr = waveInStop (wave->hwi);
- if (mr != MMSYSERR_NOERROR) {
- winwave_logerr (mr, "waveInStop");
- }
- else {
- wave->paused = 1;
- }
- }
- if (wave->event) {
- qemu_del_wait_object (wave->event, winwave_poll, wave);
- }
- return 0;
- }
- return 0;
-}
-
-static void *winwave_audio_init (void)
-{
- return &conf;
-}
-
-static void winwave_audio_fini (void *opaque)
-{
- (void) opaque;
-}
-
-static struct audio_option winwave_options[] = {
- {
- .name = "DAC_HEADERS",
- .tag = AUD_OPT_INT,
- .valp = &conf.dac_headers,
- .descr = "DAC number of headers",
- },
- {
- .name = "DAC_SAMPLES",
- .tag = AUD_OPT_INT,
- .valp = &conf.dac_samples,
- .descr = "DAC number of samples per header",
- },
- {
- .name = "ADC_HEADERS",
- .tag = AUD_OPT_INT,
- .valp = &conf.adc_headers,
- .descr = "ADC number of headers",
- },
- {
- .name = "ADC_SAMPLES",
- .tag = AUD_OPT_INT,
- .valp = &conf.adc_samples,
- .descr = "ADC number of samples per header",
- },
- { /* End of list */ }
-};
-
-static struct audio_pcm_ops winwave_pcm_ops = {
- .init_out = winwave_init_out,
- .fini_out = winwave_fini_out,
- .run_out = winwave_run_out,
- .write = winwave_write,
- .ctl_out = winwave_ctl_out,
- .init_in = winwave_init_in,
- .fini_in = winwave_fini_in,
- .run_in = winwave_run_in,
- .read = winwave_read,
- .ctl_in = winwave_ctl_in
-};
-
-struct audio_driver winwave_audio_driver = {
- .name = "winwave",
- .descr = "Windows Waveform Audio http://msdn.microsoft.com",
- .options = winwave_options,
- .init = winwave_audio_init,
- .fini = winwave_audio_fini,
- .pcm_ops = &winwave_pcm_ops,
- .can_be_default = 1,
- .max_voices_out = INT_MAX,
- .max_voices_in = INT_MAX,
- .voice_size_out = sizeof (WaveVoiceOut),
- .voice_size_in = sizeof (WaveVoiceIn)
-};
diff --git a/backends/baum.c b/backends/baum.c
index a69aaff..c537141 100644
--- a/backends/baum.c
+++ b/backends/baum.c
@@ -21,6 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "qemu/timer.h"
@@ -303,7 +305,7 @@
return 0;
cur++;
}
- DPRINTF("Dropped %d bytes!\n", cur - buf);
+ DPRINTF("Dropped %td bytes!\n", cur - buf);
}
#define EAT(c) do {\
@@ -335,7 +337,7 @@
/* Allow 100ms to complete the DisplayData packet */
timer_mod(baum->cellCount_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- get_ticks_per_sec() / 10);
+ NANOSECONDS_PER_SECOND / 10);
for (i = 0; i < baum->x * baum->y ; i++) {
EAT(c);
cells[i] = c;
@@ -561,8 +563,12 @@
g_free(baum);
}
-CharDriverState *chr_baum_init(void)
+static CharDriverState *chr_baum_init(const char *id,
+ ChardevBackend *backend,
+ ChardevReturn *ret,
+ Error **errp)
{
+ ChardevCommon *common = backend->u.braille.data;
BaumDriverState *baum;
CharDriverState *chr;
brlapi_handle_t *handle;
@@ -573,8 +579,12 @@
#endif
int tty;
+ chr = qemu_chr_alloc(common, errp);
+ if (!chr) {
+ return NULL;
+ }
baum = g_malloc0(sizeof(BaumDriverState));
- baum->chr = chr = qemu_chr_alloc();
+ baum->chr = chr;
chr->opaque = baum;
chr->chr_write = baum_write;
@@ -586,14 +596,16 @@
baum->brlapi_fd = brlapi__openConnection(handle, NULL, NULL);
if (baum->brlapi_fd == -1) {
- brlapi_perror("baum_init: brlapi_openConnection");
+ error_setg(errp, "brlapi__openConnection: %s",
+ brlapi_strerror(brlapi_error_location()));
goto fail_handle;
}
baum->cellCount_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, baum_cellCount_timer_cb, baum);
if (brlapi__getDisplaySize(handle, &baum->x, &baum->y) == -1) {
- brlapi_perror("baum_init: brlapi_getDisplaySize");
+ error_setg(errp, "brlapi__getDisplaySize: %s",
+ brlapi_strerror(brlapi_error_location()));
goto fail;
}
@@ -609,7 +621,8 @@
tty = BRLAPI_TTY_DEFAULT;
if (brlapi__enterTtyMode(handle, tty, NULL) == -1) {
- brlapi_perror("baum_init: brlapi_enterTtyMode");
+ error_setg(errp, "brlapi__enterTtyMode: %s",
+ brlapi_strerror(brlapi_error_location()));
goto fail;
}
@@ -629,7 +642,8 @@
static void register_types(void)
{
- register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL);
+ register_char_driver("braille", CHARDEV_BACKEND_KIND_BRAILLE, NULL,
+ chr_baum_init);
}
type_init(register_types);
diff --git a/backends/hostmem-file.c b/backends/hostmem-file.c
index 5179994..5c4b808 100644
--- a/backends/hostmem-file.c
+++ b/backends/hostmem-file.c
@@ -9,6 +9,8 @@
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "sysemu/hostmem.h"
#include "sysemu/sysemu.h"
@@ -43,18 +45,21 @@
return;
}
if (!fb->mem_path) {
- error_setg(errp, "mem_path property not set");
+ error_setg(errp, "mem-path property not set");
return;
}
#ifndef CONFIG_LINUX
error_setg(errp, "-mem-path not supported on this host");
#else
if (!memory_region_size(&backend->mr)) {
+ gchar *path;
backend->force_prealloc = mem_prealloc;
+ path = object_get_canonical_path(OBJECT(backend));
memory_region_init_ram_from_file(&backend->mr, OBJECT(backend),
- object_get_canonical_path(OBJECT(backend)),
+ path,
backend->size, fb->share,
fb->mem_path, errp);
+ g_free(path);
}
#endif
}
@@ -83,9 +88,7 @@
error_setg(errp, "cannot change property value");
return;
}
- if (fb->mem_path) {
- g_free(fb->mem_path);
- }
+ g_free(fb->mem_path);
fb->mem_path = g_strdup(str);
}
@@ -118,11 +121,19 @@
set_mem_path, NULL);
}
+static void file_backend_instance_finalize(Object *o)
+{
+ HostMemoryBackendFile *fb = MEMORY_BACKEND_FILE(o);
+
+ g_free(fb->mem_path);
+}
+
static const TypeInfo file_backend_info = {
.name = TYPE_MEMORY_BACKEND_FILE,
.parent = TYPE_MEMORY_BACKEND,
.class_init = file_backend_class_init,
.instance_init = file_backend_instance_init,
+ .instance_finalize = file_backend_instance_finalize,
.instance_size = sizeof(HostMemoryBackendFile),
};
diff --git a/backends/hostmem-ram.c b/backends/hostmem-ram.c
index a67a134..04a7ac3 100644
--- a/backends/hostmem-ram.c
+++ b/backends/hostmem-ram.c
@@ -9,7 +9,9 @@
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/hostmem.h"
+#include "qapi/error.h"
#include "qom/object_interfaces.h"
#define TYPE_MEMORY_BACKEND_RAM "memory-backend-ram"
diff --git a/backends/hostmem.c b/backends/hostmem.c
index 99e8f99..b7a208d 100644
--- a/backends/hostmem.c
+++ b/backends/hostmem.c
@@ -9,11 +9,13 @@
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/hostmem.h"
+#include "hw/boards.h"
+#include "qapi/error.h"
#include "qapi/visitor.h"
#include "qapi-types.h"
#include "qapi-visit.h"
-#include "qapi/qmp/qerror.h"
#include "qemu/config-file.h"
#include "qom/object_interfaces.h"
@@ -26,18 +28,18 @@
#endif
static void
-host_memory_backend_get_size(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
uint64_t value = backend->size;
- visit_type_size(v, &value, name, errp);
+ visit_type_size(v, name, &value, errp);
}
static void
-host_memory_backend_set_size(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
Error *local_err = NULL;
@@ -48,7 +50,7 @@
goto out;
}
- visit_type_size(v, &value, name, &local_err);
+ visit_type_size(v, name, &value, &local_err);
if (local_err) {
goto out;
}
@@ -62,9 +64,17 @@
error_propagate(errp, local_err);
}
+static uint16List **host_memory_append_node(uint16List **node,
+ unsigned long value)
+{
+ *node = g_malloc0(sizeof(**node));
+ (*node)->value = value;
+ return &(*node)->next;
+}
+
static void
-host_memory_backend_get_host_nodes(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
uint16List *host_nodes = NULL;
@@ -72,13 +82,12 @@
unsigned long value;
value = find_first_bit(backend->host_nodes, MAX_NODES);
- if (value == MAX_NODES) {
- return;
- }
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ node = host_memory_append_node(node, value);
+
+ if (value == MAX_NODES) {
+ goto out;
+ }
do {
value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
@@ -86,23 +95,22 @@
break;
}
- *node = g_malloc0(sizeof(**node));
- (*node)->value = value;
- node = &(*node)->next;
+ node = host_memory_append_node(node, value);
} while (true);
- visit_type_uint16List(v, &host_nodes, name, errp);
+out:
+ visit_type_uint16List(v, name, &host_nodes, errp);
}
static void
-host_memory_backend_set_host_nodes(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
{
#ifdef CONFIG_NUMA
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
uint16List *l = NULL;
- visit_type_uint16List(v, &l, name, errp);
+ visit_type_uint16List(v, name, &l, errp);
while (l) {
bitmap_set(backend->host_nodes, l->value, 1);
@@ -113,24 +121,17 @@
#endif
}
-static void
-host_memory_backend_get_policy(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+static int
+host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- int policy = backend->policy;
-
- visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
+ return backend->policy;
}
static void
-host_memory_backend_set_policy(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
- int policy;
-
- visit_type_enum(v, &policy, HostMemPolicy_lookup, NULL, name, errp);
backend->policy = policy;
#ifndef CONFIG_NUMA
@@ -202,6 +203,7 @@
static void host_memory_backend_set_prealloc(Object *obj, bool value,
Error **errp)
{
+ Error *local_err = NULL;
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
if (backend->force_prealloc) {
@@ -222,7 +224,11 @@
void *ptr = memory_region_get_ram_ptr(&backend->mr);
uint64_t sz = memory_region_size(&backend->mr);
- os_mem_prealloc(fd, ptr, sz);
+ os_mem_prealloc(fd, ptr, sz, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
backend->prealloc = true;
}
}
@@ -230,11 +236,10 @@
static void host_memory_backend_init(Object *obj)
{
HostMemoryBackend *backend = MEMORY_BACKEND(obj);
+ MachineState *machine = MACHINE(qdev_get_machine());
- backend->merge = qemu_opt_get_bool(qemu_get_machine_opts(),
- "mem-merge", true);
- backend->dump = qemu_opt_get_bool(qemu_get_machine_opts(),
- "dump-guest-core", true);
+ backend->merge = machine_mem_merge(machine);
+ backend->dump = machine_dump_guest_core(machine);
backend->prealloc = mem_prealloc;
object_property_add_bool(obj, "merge",
@@ -252,9 +257,10 @@
object_property_add(obj, "host-nodes", "int",
host_memory_backend_get_host_nodes,
host_memory_backend_set_host_nodes, NULL, NULL, NULL);
- object_property_add(obj, "policy", "str",
- host_memory_backend_get_policy,
- host_memory_backend_set_policy, NULL, NULL, NULL);
+ object_property_add_enum(obj, "policy", "HostMemPolicy",
+ HostMemPolicy_lookup,
+ host_memory_backend_get_policy,
+ host_memory_backend_set_policy, NULL);
}
MemoryRegion *
@@ -263,6 +269,16 @@
return memory_region_size(&backend->mr) ? &backend->mr : NULL;
}
+void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
+{
+ backend->is_mapped = mapped;
+}
+
+bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
+{
+ return backend->is_mapped;
+}
+
static void
host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
{
@@ -275,8 +291,7 @@
if (bc->alloc) {
bc->alloc(backend, &local_err);
if (local_err) {
- error_propagate(errp, local_err);
- return;
+ goto out;
}
ptr = memory_region_get_ram_ptr(&backend->mr);
@@ -320,9 +335,11 @@
assert(maxnode <= MAX_NODES);
if (mbind(ptr, sz, backend->policy,
maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
- error_setg_errno(errp, errno,
- "cannot bind memory to host NUMA nodes");
- return;
+ if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
+ error_setg_errno(errp, errno,
+ "cannot bind memory to host NUMA nodes");
+ return;
+ }
}
#endif
/* Preallocate memory after the NUMA policy has been instantiated.
@@ -330,9 +347,25 @@
* specified NUMA policy in place.
*/
if (backend->prealloc) {
- os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
+ os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz,
+ &local_err);
+ if (local_err) {
+ goto out;
+ }
}
}
+out:
+ error_propagate(errp, local_err);
+}
+
+static bool
+host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
+{
+ if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
+ return false;
+ } else {
+ return true;
+ }
}
static void
@@ -341,6 +374,7 @@
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
ucc->complete = host_memory_backend_memory_complete;
+ ucc->can_be_deleted = host_memory_backend_can_be_deleted;
}
static const TypeInfo host_memory_backend_info = {
diff --git a/backends/msmouse.c b/backends/msmouse.c
index 0119110..aeb9055 100644
--- a/backends/msmouse.c
+++ b/backends/msmouse.c
@@ -21,20 +21,55 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include <stdlib.h>
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/char.h"
#include "ui/console.h"
+#include "ui/input.h"
#define MSMOUSE_LO6(n) ((n) & 0x3f)
#define MSMOUSE_HI2(n) (((n) & 0xc0) >> 6)
-static void msmouse_event(void *opaque,
- int dx, int dy, int dz, int buttons_state)
-{
- CharDriverState *chr = (CharDriverState *)opaque;
+typedef struct {
+ CharDriverState *chr;
+ QemuInputHandlerState *hs;
+ int axis[INPUT_AXIS__MAX];
+ bool btns[INPUT_BUTTON__MAX];
+ bool btnc[INPUT_BUTTON__MAX];
+ uint8_t outbuf[32];
+ int outlen;
+} MouseState;
+static void msmouse_chr_accept_input(CharDriverState *chr)
+{
+ MouseState *mouse = chr->opaque;
+ int len;
+
+ len = qemu_chr_be_can_write(chr);
+ if (len > mouse->outlen) {
+ len = mouse->outlen;
+ }
+ if (!len) {
+ return;
+ }
+
+ qemu_chr_be_write(chr, mouse->outbuf, len);
+ mouse->outlen -= len;
+ if (mouse->outlen) {
+ memmove(mouse->outbuf, mouse->outbuf + len, mouse->outlen);
+ }
+}
+
+static void msmouse_queue_event(MouseState *mouse)
+{
unsigned char bytes[4] = { 0x40, 0x00, 0x00, 0x00 };
+ int dx, dy, count = 3;
+
+ dx = mouse->axis[INPUT_AXIS_X];
+ mouse->axis[INPUT_AXIS_X] = 0;
+
+ dy = mouse->axis[INPUT_AXIS_Y];
+ mouse->axis[INPUT_AXIS_Y] = 0;
/* Movement deltas */
bytes[0] |= (MSMOUSE_HI2(dy) << 2) | MSMOUSE_HI2(dx);
@@ -42,14 +77,54 @@
bytes[2] |= MSMOUSE_LO6(dy);
/* Buttons */
- bytes[0] |= (buttons_state & 0x01 ? 0x20 : 0x00);
- bytes[0] |= (buttons_state & 0x02 ? 0x10 : 0x00);
- bytes[3] |= (buttons_state & 0x04 ? 0x20 : 0x00);
+ bytes[0] |= (mouse->btns[INPUT_BUTTON_LEFT] ? 0x20 : 0x00);
+ bytes[0] |= (mouse->btns[INPUT_BUTTON_RIGHT] ? 0x10 : 0x00);
+ if (mouse->btns[INPUT_BUTTON_MIDDLE] ||
+ mouse->btnc[INPUT_BUTTON_MIDDLE]) {
+ bytes[3] |= (mouse->btns[INPUT_BUTTON_MIDDLE] ? 0x20 : 0x00);
+ mouse->btnc[INPUT_BUTTON_MIDDLE] = false;
+ count = 4;
+ }
- /* We always send the packet of, so that we do not have to keep track
- of previous state of the middle button. This can potentially confuse
- some very old drivers for two button mice though. */
- qemu_chr_be_write(chr, bytes, 4);
+ if (mouse->outlen <= sizeof(mouse->outbuf) - count) {
+ memcpy(mouse->outbuf + mouse->outlen, bytes, count);
+ mouse->outlen += count;
+ } else {
+ /* queue full -> drop event */
+ }
+}
+
+static void msmouse_input_event(DeviceState *dev, QemuConsole *src,
+ InputEvent *evt)
+{
+ MouseState *mouse = (MouseState *)dev;
+ InputMoveEvent *move;
+ InputBtnEvent *btn;
+
+ switch (evt->type) {
+ case INPUT_EVENT_KIND_REL:
+ move = evt->u.rel.data;
+ mouse->axis[move->axis] += move->value;
+ break;
+
+ case INPUT_EVENT_KIND_BTN:
+ btn = evt->u.btn.data;
+ mouse->btns[btn->button] = btn->down;
+ mouse->btnc[btn->button] = true;
+ break;
+
+ default:
+ /* keep gcc happy */
+ break;
+ }
+}
+
+static void msmouse_input_sync(DeviceState *dev)
+{
+ MouseState *mouse = (MouseState *)dev;
+
+ msmouse_queue_event(mouse);
+ msmouse_chr_accept_input(mouse->chr);
}
static int msmouse_chr_write (struct CharDriverState *s, const uint8_t *buf, int len)
@@ -60,26 +135,49 @@
static void msmouse_chr_close (struct CharDriverState *chr)
{
- g_free (chr);
+ MouseState *mouse = chr->opaque;
+
+ qemu_input_handler_unregister(mouse->hs);
+ g_free(mouse);
+ g_free(chr);
}
-CharDriverState *qemu_chr_open_msmouse(void)
+static QemuInputHandler msmouse_handler = {
+ .name = "QEMU Microsoft Mouse",
+ .mask = INPUT_EVENT_MASK_BTN | INPUT_EVENT_MASK_REL,
+ .event = msmouse_input_event,
+ .sync = msmouse_input_sync,
+};
+
+static CharDriverState *qemu_chr_open_msmouse(const char *id,
+ ChardevBackend *backend,
+ ChardevReturn *ret,
+ Error **errp)
{
+ ChardevCommon *common = backend->u.msmouse.data;
+ MouseState *mouse;
CharDriverState *chr;
- chr = qemu_chr_alloc();
+ chr = qemu_chr_alloc(common, errp);
chr->chr_write = msmouse_chr_write;
chr->chr_close = msmouse_chr_close;
+ chr->chr_accept_input = msmouse_chr_accept_input;
chr->explicit_be_open = true;
- qemu_add_mouse_event_handler(msmouse_event, chr, 0, "QEMU Microsoft Mouse");
+ mouse = g_new0(MouseState, 1);
+ mouse->hs = qemu_input_handler_register((DeviceState *)mouse,
+ &msmouse_handler);
+
+ mouse->chr = chr;
+ chr->opaque = mouse;
return chr;
}
static void register_types(void)
{
- register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL);
+ register_char_driver("msmouse", CHARDEV_BACKEND_KIND_MSMOUSE, NULL,
+ qemu_chr_open_msmouse);
}
type_init(register_types);
diff --git a/backends/rng-egd.c b/backends/rng-egd.c
index 2962795..7a1b924 100644
--- a/backends/rng-egd.c
+++ b/backends/rng-egd.c
@@ -10,8 +10,10 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/rng.h"
#include "sysemu/char.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "hw/qdev.h" /* just for DEFINE_PROP_CHR */
@@ -24,33 +26,12 @@
CharDriverState *chr;
char *chr_name;
-
- GSList *requests;
} RngEgd;
-typedef struct RngRequest
-{
- EntropyReceiveFunc *receive_entropy;
- uint8_t *data;
- void *opaque;
- size_t offset;
- size_t size;
-} RngRequest;
-
-static void rng_egd_request_entropy(RngBackend *b, size_t size,
- EntropyReceiveFunc *receive_entropy,
- void *opaque)
+static void rng_egd_request_entropy(RngBackend *b, RngRequest *req)
{
RngEgd *s = RNG_EGD(b);
- RngRequest *req;
-
- req = g_malloc(sizeof(*req));
-
- req->offset = 0;
- req->size = size;
- req->receive_entropy = receive_entropy;
- req->opaque = opaque;
- req->data = g_malloc(req->size);
+ size_t size = req->size;
while (size > 0) {
uint8_t header[2];
@@ -64,24 +45,15 @@
size -= len;
}
-
- s->requests = g_slist_append(s->requests, req);
-}
-
-static void rng_egd_free_request(RngRequest *req)
-{
- g_free(req->data);
- g_free(req);
}
static int rng_egd_chr_can_read(void *opaque)
{
RngEgd *s = RNG_EGD(opaque);
- GSList *i;
+ RngRequest *req;
int size = 0;
- for (i = s->requests; i; i = i->next) {
- RngRequest *req = i->data;
+ QSIMPLEQ_FOREACH(req, &s->parent.requests, next) {
size += req->size - req->offset;
}
@@ -93,8 +65,8 @@
RngEgd *s = RNG_EGD(opaque);
size_t buf_offset = 0;
- while (size > 0 && s->requests) {
- RngRequest *req = s->requests->data;
+ while (size > 0 && !QSIMPLEQ_EMPTY(&s->parent.requests)) {
+ RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
int len = MIN(size, req->size - req->offset);
memcpy(req->data + req->offset, buf + buf_offset, len);
@@ -103,56 +75,32 @@
size -= len;
if (req->offset == req->size) {
- s->requests = g_slist_remove_link(s->requests, s->requests);
-
req->receive_entropy(req->opaque, req->data, req->size);
- rng_egd_free_request(req);
+ rng_backend_finalize_request(&s->parent, req);
}
}
}
-static void rng_egd_free_requests(RngEgd *s)
-{
- GSList *i;
-
- for (i = s->requests; i; i = i->next) {
- rng_egd_free_request(i->data);
- }
-
- g_slist_free(s->requests);
- s->requests = NULL;
-}
-
-static void rng_egd_cancel_requests(RngBackend *b)
-{
- RngEgd *s = RNG_EGD(b);
-
- /* We simply delete the list of pending requests. If there is data in the
- * queue waiting to be read, this is okay, because there will always be
- * more data than we requested originally
- */
- rng_egd_free_requests(s);
-}
-
static void rng_egd_opened(RngBackend *b, Error **errp)
{
RngEgd *s = RNG_EGD(b);
if (s->chr_name == NULL) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE,
- "chardev", "a valid character device");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ "chardev", "a valid character device");
return;
}
s->chr = qemu_chr_find(s->chr_name);
if (s->chr == NULL) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, s->chr_name);
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", s->chr_name);
return;
}
if (qemu_chr_fe_claim(s->chr) != 0) {
- error_set(errp, QERR_DEVICE_IN_USE, s->chr_name);
+ error_setg(errp, QERR_DEVICE_IN_USE, s->chr_name);
return;
}
@@ -167,7 +115,7 @@
RngEgd *s = RNG_EGD(b);
if (b->opened) {
- error_set(errp, QERR_PERMISSION_DENIED);
+ error_setg(errp, QERR_PERMISSION_DENIED);
} else {
g_free(s->chr_name);
s->chr_name = g_strdup(value);
@@ -202,8 +150,6 @@
}
g_free(s->chr_name);
-
- rng_egd_free_requests(s);
}
static void rng_egd_class_init(ObjectClass *klass, void *data)
@@ -211,7 +157,6 @@
RngBackendClass *rbc = RNG_BACKEND_CLASS(klass);
rbc->request_entropy = rng_egd_request_entropy;
- rbc->cancel_requests = rng_egd_cancel_requests;
rbc->opened = rng_egd_opened;
}
diff --git a/backends/rng-random.c b/backends/rng-random.c
index 601d9dc..e2a49b0 100644
--- a/backends/rng-random.c
+++ b/backends/rng-random.c
@@ -10,21 +10,19 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/rng-random.h"
#include "sysemu/rng.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qemu/main-loop.h"
-struct RndRandom
+struct RngRandom
{
RngBackend parent;
int fd;
char *filename;
-
- EntropyReceiveFunc *receive_func;
- void *opaque;
- size_t size;
};
/**
@@ -36,46 +34,45 @@
static void entropy_available(void *opaque)
{
- RndRandom *s = RNG_RANDOM(opaque);
- uint8_t buffer[s->size];
- ssize_t len;
+ RngRandom *s = RNG_RANDOM(opaque);
- len = read(s->fd, buffer, s->size);
- if (len < 0 && errno == EAGAIN) {
- return;
+ while (!QSIMPLEQ_EMPTY(&s->parent.requests)) {
+ RngRequest *req = QSIMPLEQ_FIRST(&s->parent.requests);
+ ssize_t len;
+
+ len = read(s->fd, req->data, req->size);
+ if (len < 0 && errno == EAGAIN) {
+ return;
+ }
+ g_assert(len != -1);
+
+ req->receive_entropy(req->opaque, req->data, len);
+
+ rng_backend_finalize_request(&s->parent, req);
}
- g_assert(len != -1);
- s->receive_func(s->opaque, buffer, len);
- s->receive_func = NULL;
-
+ /* We've drained all requests, the fd handler can be reset. */
qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
}
-static void rng_random_request_entropy(RngBackend *b, size_t size,
- EntropyReceiveFunc *receive_entropy,
- void *opaque)
+static void rng_random_request_entropy(RngBackend *b, RngRequest *req)
{
- RndRandom *s = RNG_RANDOM(b);
+ RngRandom *s = RNG_RANDOM(b);
- if (s->receive_func) {
- s->receive_func(s->opaque, NULL, 0);
+ if (QSIMPLEQ_EMPTY(&s->parent.requests)) {
+ /* If there are no pending requests yet, we need to
+ * install our fd handler. */
+ qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
}
-
- s->receive_func = receive_entropy;
- s->opaque = opaque;
- s->size = size;
-
- qemu_set_fd_handler(s->fd, entropy_available, NULL, s);
}
static void rng_random_opened(RngBackend *b, Error **errp)
{
- RndRandom *s = RNG_RANDOM(b);
+ RngRandom *s = RNG_RANDOM(b);
if (s->filename == NULL) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE,
- "filename", "a valid filename");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ "filename", "a valid filename");
} else {
s->fd = qemu_open(s->filename, O_RDONLY | O_NONBLOCK);
if (s->fd == -1) {
@@ -86,23 +83,19 @@
static char *rng_random_get_filename(Object *obj, Error **errp)
{
- RndRandom *s = RNG_RANDOM(obj);
+ RngRandom *s = RNG_RANDOM(obj);
- if (s->filename) {
- return g_strdup(s->filename);
- }
-
- return NULL;
+ return g_strdup(s->filename);
}
static void rng_random_set_filename(Object *obj, const char *filename,
Error **errp)
{
RngBackend *b = RNG_BACKEND(obj);
- RndRandom *s = RNG_RANDOM(obj);
+ RngRandom *s = RNG_RANDOM(obj);
if (b->opened) {
- error_set(errp, QERR_PERMISSION_DENIED);
+ error_setg(errp, QERR_PERMISSION_DENIED);
return;
}
@@ -112,7 +105,7 @@
static void rng_random_init(Object *obj)
{
- RndRandom *s = RNG_RANDOM(obj);
+ RngRandom *s = RNG_RANDOM(obj);
object_property_add_str(obj, "filename",
rng_random_get_filename,
@@ -125,7 +118,7 @@
static void rng_random_finalize(Object *obj)
{
- RndRandom *s = RNG_RANDOM(obj);
+ RngRandom *s = RNG_RANDOM(obj);
if (s->fd != -1) {
qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
@@ -146,7 +139,7 @@
static const TypeInfo rng_random_info = {
.name = TYPE_RNG_RANDOM,
.parent = TYPE_RNG_BACKEND,
- .instance_size = sizeof(RndRandom),
+ .instance_size = sizeof(RngRandom),
.class_init = rng_random_class_init,
.instance_init = rng_random_init,
.instance_finalize = rng_random_finalize,
diff --git a/backends/rng.c b/backends/rng.c
index 0f2fc11..398ebe4 100644
--- a/backends/rng.c
+++ b/backends/rng.c
@@ -10,7 +10,9 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/rng.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "qom/object_interfaces.h"
@@ -19,18 +21,20 @@
void *opaque)
{
RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+ RngRequest *req;
if (k->request_entropy) {
- k->request_entropy(s, size, receive_entropy, opaque);
- }
-}
+ req = g_malloc(sizeof(*req));
-void rng_backend_cancel_requests(RngBackend *s)
-{
- RngBackendClass *k = RNG_BACKEND_GET_CLASS(s);
+ req->offset = 0;
+ req->size = size;
+ req->receive_entropy = receive_entropy;
+ req->opaque = opaque;
+ req->data = g_malloc(req->size);
- if (k->cancel_requests) {
- k->cancel_requests(s);
+ k->request_entropy(s, req);
+
+ QSIMPLEQ_INSERT_TAIL(&s->requests, req, next);
}
}
@@ -57,7 +61,7 @@
}
if (!value && s->opened) {
- error_set(errp, QERR_PERMISSION_DENIED);
+ error_setg(errp, QERR_PERMISSION_DENIED);
return;
}
@@ -72,14 +76,48 @@
s->opened = true;
}
+static void rng_backend_free_request(RngRequest *req)
+{
+ g_free(req->data);
+ g_free(req);
+}
+
+static void rng_backend_free_requests(RngBackend *s)
+{
+ RngRequest *req, *next;
+
+ QSIMPLEQ_FOREACH_SAFE(req, &s->requests, next, next) {
+ rng_backend_free_request(req);
+ }
+
+ QSIMPLEQ_INIT(&s->requests);
+}
+
+void rng_backend_finalize_request(RngBackend *s, RngRequest *req)
+{
+ QSIMPLEQ_REMOVE(&s->requests, req, RngRequest, next);
+ rng_backend_free_request(req);
+}
+
static void rng_backend_init(Object *obj)
{
+ RngBackend *s = RNG_BACKEND(obj);
+
+ QSIMPLEQ_INIT(&s->requests);
+
object_property_add_bool(obj, "opened",
rng_backend_prop_get_opened,
rng_backend_prop_set_opened,
NULL);
}
+static void rng_backend_finalize(Object *obj)
+{
+ RngBackend *s = RNG_BACKEND(obj);
+
+ rng_backend_free_requests(s);
+}
+
static void rng_backend_class_init(ObjectClass *oc, void *data)
{
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
@@ -92,6 +130,7 @@
.parent = TYPE_OBJECT,
.instance_size = sizeof(RngBackend),
.instance_init = rng_backend_init,
+ .instance_finalize = rng_backend_finalize,
.class_size = sizeof(RngBackendClass),
.class_init = rng_backend_class_init,
.abstract = true,
diff --git a/backends/testdev.c b/backends/testdev.c
index eba396a..3ab1c90 100644
--- a/backends/testdev.c
+++ b/backends/testdev.c
@@ -23,6 +23,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/char.h"
@@ -108,13 +109,16 @@
g_free(testdev);
}
-CharDriverState *chr_testdev_init(void)
+static CharDriverState *chr_testdev_init(const char *id,
+ ChardevBackend *backend,
+ ChardevReturn *ret,
+ Error **errp)
{
TestdevCharState *testdev;
CharDriverState *chr;
- testdev = g_malloc0(sizeof(TestdevCharState));
- testdev->chr = chr = g_malloc0(sizeof(CharDriverState));
+ testdev = g_new0(TestdevCharState, 1);
+ testdev->chr = chr = g_new0(CharDriverState, 1);
chr->opaque = testdev;
chr->chr_write = testdev_write;
@@ -125,7 +129,8 @@
static void register_types(void)
{
- register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL);
+ register_char_driver("testdev", CHARDEV_BACKEND_KIND_TESTDEV, NULL,
+ chr_testdev_init);
}
type_init(register_types);
diff --git a/backends/tpm.c b/backends/tpm.c
index 01860c4..536f262 100644
--- a/backends/tpm.c
+++ b/backends/tpm.c
@@ -12,7 +12,9 @@
* Based on backends/rng.c by Anthony Liguori
*/
+#include "qemu/osdep.h"
#include "sysemu/tpm_backend.h"
+#include "qapi/error.h"
#include "qapi/qmp/qerror.h"
#include "sysemu/tpm.h"
#include "qemu/thread.h"
@@ -36,7 +38,7 @@
{
TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
- return k->ops->destroy(s);
+ k->ops->destroy(s);
}
int tpm_backend_init(TPMBackend *s, TPMState *state,
@@ -96,6 +98,20 @@
return k->ops->get_tpm_established_flag(s);
}
+int tpm_backend_reset_tpm_established_flag(TPMBackend *s, uint8_t locty)
+{
+ TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+ return k->ops->reset_tpm_established_flag(s, locty);
+}
+
+TPMVersion tpm_backend_get_tpm_version(TPMBackend *s)
+{
+ TPMBackendClass *k = TPM_BACKEND_GET_CLASS(s);
+
+ return k->ops->get_tpm_version(s);
+}
+
static bool tpm_backend_prop_get_opened(Object *obj, Error **errp)
{
TPMBackend *s = TPM_BACKEND(obj);
@@ -119,7 +135,7 @@
}
if (!value && s->opened) {
- error_set(errp, QERR_PERMISSION_DENIED);
+ error_setg(errp, QERR_PERMISSION_DENIED);
return;
}
@@ -165,17 +181,6 @@
}
}
-void tpm_backend_thread_tpm_reset(TPMBackendThread *tbt,
- GFunc func, gpointer user_data)
-{
- if (!tbt->pool) {
- tpm_backend_thread_create(tbt, func, user_data);
- } else {
- g_thread_pool_push(tbt->pool, (gpointer)TPM_BACKEND_CMD_TPM_RESET,
- NULL);
- }
-}
-
static const TypeInfo tpm_backend_info = {
.name = TYPE_TPM_BACKEND,
.parent = TYPE_OBJECT,
diff --git a/balloon.c b/balloon.c
index b70da4f..f2ef50c 100644
--- a/balloon.c
+++ b/balloon.c
@@ -24,17 +24,45 @@
* THE SOFTWARE.
*/
-#include "monitor/monitor.h"
+#include "qemu/osdep.h"
+#include "qemu-common.h"
#include "exec/cpu-common.h"
#include "sysemu/kvm.h"
#include "sysemu/balloon.h"
#include "trace.h"
#include "qmp-commands.h"
+#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qjson.h"
static QEMUBalloonEvent *balloon_event_fn;
static QEMUBalloonStatus *balloon_stat_fn;
static void *balloon_opaque;
+static bool balloon_inhibited;
+
+bool qemu_balloon_is_inhibited(void)
+{
+ return balloon_inhibited;
+}
+
+void qemu_balloon_inhibit(bool state)
+{
+ balloon_inhibited = state;
+}
+
+static bool have_balloon(Error **errp)
+{
+ if (kvm_enabled() && !kvm_has_sync_mmu()) {
+ error_set(errp, ERROR_CLASS_KVM_MISSING_CAP,
+ "Using KVM without synchronous MMU, balloon unavailable");
+ return false;
+ }
+ if (!balloon_event_fn) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
+ "No balloon device has been activated");
+ return false;
+ }
+ return true;
+}
int qemu_add_balloon_handler(QEMUBalloonEvent *event_func,
QEMUBalloonStatus *stat_func, void *opaque)
@@ -43,7 +71,6 @@
/* We're already registered one balloon handler. How many can
* a guest really have?
*/
- error_report("Another balloon device already registered");
return -1;
}
balloon_event_fn = event_func;
@@ -62,58 +89,30 @@
balloon_opaque = NULL;
}
-static int qemu_balloon(ram_addr_t target)
-{
- if (!balloon_event_fn) {
- return 0;
- }
- trace_balloon_event(balloon_opaque, target);
- balloon_event_fn(balloon_opaque, target);
- return 1;
-}
-
-static int qemu_balloon_status(BalloonInfo *info)
-{
- if (!balloon_stat_fn) {
- return 0;
- }
- balloon_stat_fn(balloon_opaque, info);
- return 1;
-}
-
BalloonInfo *qmp_query_balloon(Error **errp)
{
BalloonInfo *info;
- if (kvm_enabled() && !kvm_has_sync_mmu()) {
- error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
+ if (!have_balloon(errp)) {
return NULL;
}
info = g_malloc0(sizeof(*info));
-
- if (qemu_balloon_status(info) == 0) {
- error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon");
- qapi_free_BalloonInfo(info);
- return NULL;
- }
-
+ balloon_stat_fn(balloon_opaque, info);
return info;
}
-void qmp_balloon(int64_t value, Error **errp)
+void qmp_balloon(int64_t target, Error **errp)
{
- if (kvm_enabled() && !kvm_has_sync_mmu()) {
- error_set(errp, QERR_KVM_MISSING_CAP, "synchronous MMU", "balloon");
+ if (!have_balloon(errp)) {
return;
}
- if (value <= 0) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
+ if (target <= 0) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "target", "a size");
return;
}
-
- if (qemu_balloon(value) == 0) {
- error_set(errp, QERR_DEVICE_NOT_ACTIVE, "balloon");
- }
+
+ trace_balloon_event(balloon_opaque, target);
+ balloon_event_fn(balloon_opaque, target);
}
diff --git a/block-migration.c b/block-migration.c
deleted file mode 100644
index 08db01a..0000000
--- a/block-migration.c
+++ /dev/null
@@ -1,884 +0,0 @@
-/*
- * QEMU live block migration
- *
- * Copyright IBM, Corp. 2009
- *
- * Authors:
- * Liran Schour <lirans@il.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-
-#include "qemu-common.h"
-#include "block/block.h"
-#include "qemu/error-report.h"
-#include "qemu/main-loop.h"
-#include "hw/hw.h"
-#include "qemu/queue.h"
-#include "qemu/timer.h"
-#include "migration/block.h"
-#include "migration/migration.h"
-#include "sysemu/blockdev.h"
-#include <assert.h>
-
-#define BLOCK_SIZE (1 << 20)
-#define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
-
-#define BLK_MIG_FLAG_DEVICE_BLOCK 0x01
-#define BLK_MIG_FLAG_EOS 0x02
-#define BLK_MIG_FLAG_PROGRESS 0x04
-#define BLK_MIG_FLAG_ZERO_BLOCK 0x08
-
-#define MAX_IS_ALLOCATED_SEARCH 65536
-
-//#define DEBUG_BLK_MIGRATION
-
-#ifdef DEBUG_BLK_MIGRATION
-#define DPRINTF(fmt, ...) \
- do { printf("blk_migration: " fmt, ## __VA_ARGS__); } while (0)
-#else
-#define DPRINTF(fmt, ...) \
- do { } while (0)
-#endif
-
-typedef struct BlkMigDevState {
- /* Written during setup phase. Can be read without a lock. */
- BlockDriverState *bs;
- int shared_base;
- int64_t total_sectors;
- QSIMPLEQ_ENTRY(BlkMigDevState) entry;
-
- /* Only used by migration thread. Does not need a lock. */
- int bulk_completed;
- int64_t cur_sector;
- int64_t cur_dirty;
-
- /* Protected by block migration lock. */
- unsigned long *aio_bitmap;
- int64_t completed_sectors;
- BdrvDirtyBitmap *dirty_bitmap;
- Error *blocker;
-} BlkMigDevState;
-
-typedef struct BlkMigBlock {
- /* Only used by migration thread. */
- uint8_t *buf;
- BlkMigDevState *bmds;
- int64_t sector;
- int nr_sectors;
- struct iovec iov;
- QEMUIOVector qiov;
- BlockAIOCB *aiocb;
-
- /* Protected by block migration lock. */
- int ret;
- QSIMPLEQ_ENTRY(BlkMigBlock) entry;
-} BlkMigBlock;
-
-typedef struct BlkMigState {
- /* Written during setup phase. Can be read without a lock. */
- int blk_enable;
- int shared_base;
- QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list;
- int64_t total_sector_sum;
- bool zero_blocks;
-
- /* Protected by lock. */
- QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list;
- int submitted;
- int read_done;
-
- /* Only used by migration thread. Does not need a lock. */
- int transferred;
- int prev_progress;
- int bulk_completed;
-
- /* Lock must be taken _inside_ the iothread lock. */
- QemuMutex lock;
-} BlkMigState;
-
-static BlkMigState block_mig_state;
-
-static void blk_mig_lock(void)
-{
- qemu_mutex_lock(&block_mig_state.lock);
-}
-
-static void blk_mig_unlock(void)
-{
- qemu_mutex_unlock(&block_mig_state.lock);
-}
-
-/* Must run outside of the iothread lock during the bulk phase,
- * or the VM will stall.
- */
-
-static void blk_send(QEMUFile *f, BlkMigBlock * blk)
-{
- int len;
- uint64_t flags = BLK_MIG_FLAG_DEVICE_BLOCK;
-
- if (block_mig_state.zero_blocks &&
- buffer_is_zero(blk->buf, BLOCK_SIZE)) {
- flags |= BLK_MIG_FLAG_ZERO_BLOCK;
- }
-
- /* sector number and flags */
- qemu_put_be64(f, (blk->sector << BDRV_SECTOR_BITS)
- | flags);
-
- /* device name */
- len = strlen(bdrv_get_device_name(blk->bmds->bs));
- qemu_put_byte(f, len);
- qemu_put_buffer(f, (uint8_t *)bdrv_get_device_name(blk->bmds->bs), len);
-
- /* if a block is zero we need to flush here since the network
- * bandwidth is now a lot higher than the storage device bandwidth.
- * thus if we queue zero blocks we slow down the migration */
- if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
- qemu_fflush(f);
- return;
- }
-
- qemu_put_buffer(f, blk->buf, BLOCK_SIZE);
-}
-
-int blk_mig_active(void)
-{
- return !QSIMPLEQ_EMPTY(&block_mig_state.bmds_list);
-}
-
-uint64_t blk_mig_bytes_transferred(void)
-{
- BlkMigDevState *bmds;
- uint64_t sum = 0;
-
- blk_mig_lock();
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- sum += bmds->completed_sectors;
- }
- blk_mig_unlock();
- return sum << BDRV_SECTOR_BITS;
-}
-
-uint64_t blk_mig_bytes_remaining(void)
-{
- return blk_mig_bytes_total() - blk_mig_bytes_transferred();
-}
-
-uint64_t blk_mig_bytes_total(void)
-{
- BlkMigDevState *bmds;
- uint64_t sum = 0;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- sum += bmds->total_sectors;
- }
- return sum << BDRV_SECTOR_BITS;
-}
-
-
-/* Called with migration lock held. */
-
-static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector)
-{
- int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- if (sector < bdrv_nb_sectors(bmds->bs)) {
- return !!(bmds->aio_bitmap[chunk / (sizeof(unsigned long) * 8)] &
- (1UL << (chunk % (sizeof(unsigned long) * 8))));
- } else {
- return 0;
- }
-}
-
-/* Called with migration lock held. */
-
-static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num,
- int nb_sectors, int set)
-{
- int64_t start, end;
- unsigned long val, idx, bit;
-
- start = sector_num / BDRV_SECTORS_PER_DIRTY_CHUNK;
- end = (sector_num + nb_sectors - 1) / BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- for (; start <= end; start++) {
- idx = start / (sizeof(unsigned long) * 8);
- bit = start % (sizeof(unsigned long) * 8);
- val = bmds->aio_bitmap[idx];
- if (set) {
- val |= 1UL << bit;
- } else {
- val &= ~(1UL << bit);
- }
- bmds->aio_bitmap[idx] = val;
- }
-}
-
-static void alloc_aio_bitmap(BlkMigDevState *bmds)
-{
- BlockDriverState *bs = bmds->bs;
- int64_t bitmap_size;
-
- bitmap_size = bdrv_nb_sectors(bs) + BDRV_SECTORS_PER_DIRTY_CHUNK * 8 - 1;
- bitmap_size /= BDRV_SECTORS_PER_DIRTY_CHUNK * 8;
-
- bmds->aio_bitmap = g_malloc0(bitmap_size);
-}
-
-/* Never hold migration lock when yielding to the main loop! */
-
-static void blk_mig_read_cb(void *opaque, int ret)
-{
- BlkMigBlock *blk = opaque;
-
- blk_mig_lock();
- blk->ret = ret;
-
- QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry);
- bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0);
-
- block_mig_state.submitted--;
- block_mig_state.read_done++;
- assert(block_mig_state.submitted >= 0);
- blk_mig_unlock();
-}
-
-/* Called with no lock taken. */
-
-static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds)
-{
- int64_t total_sectors = bmds->total_sectors;
- int64_t cur_sector = bmds->cur_sector;
- BlockDriverState *bs = bmds->bs;
- BlkMigBlock *blk;
- int nr_sectors;
-
- if (bmds->shared_base) {
- qemu_mutex_lock_iothread();
- while (cur_sector < total_sectors &&
- !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH,
- &nr_sectors)) {
- cur_sector += nr_sectors;
- }
- qemu_mutex_unlock_iothread();
- }
-
- if (cur_sector >= total_sectors) {
- bmds->cur_sector = bmds->completed_sectors = total_sectors;
- return 1;
- }
-
- bmds->completed_sectors = cur_sector;
-
- cur_sector &= ~((int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK - 1);
-
- /* we are going to transfer a full block even if it is not allocated */
- nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
-
- if (total_sectors - cur_sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
- nr_sectors = total_sectors - cur_sector;
- }
-
- blk = g_new(BlkMigBlock, 1);
- blk->buf = g_malloc(BLOCK_SIZE);
- blk->bmds = bmds;
- blk->sector = cur_sector;
- blk->nr_sectors = nr_sectors;
-
- blk->iov.iov_base = blk->buf;
- blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
- qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
-
- blk_mig_lock();
- block_mig_state.submitted++;
- blk_mig_unlock();
-
- qemu_mutex_lock_iothread();
- blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov,
- nr_sectors, blk_mig_read_cb, blk);
-
- bdrv_reset_dirty(bs, cur_sector, nr_sectors);
- qemu_mutex_unlock_iothread();
-
- bmds->cur_sector = cur_sector + nr_sectors;
- return (bmds->cur_sector >= total_sectors);
-}
-
-/* Called with iothread lock taken. */
-
-static int set_dirty_tracking(void)
-{
- BlkMigDevState *bmds;
- int ret;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- bmds->dirty_bitmap = bdrv_create_dirty_bitmap(bmds->bs, BLOCK_SIZE,
- NULL);
- if (!bmds->dirty_bitmap) {
- ret = -errno;
- goto fail;
- }
- }
- return 0;
-
-fail:
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- if (bmds->dirty_bitmap) {
- bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
- }
- }
- return ret;
-}
-
-static void unset_dirty_tracking(void)
-{
- BlkMigDevState *bmds;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- bdrv_release_dirty_bitmap(bmds->bs, bmds->dirty_bitmap);
- }
-}
-
-static void init_blk_migration(QEMUFile *f)
-{
- BlockDriverState *bs;
- BlkMigDevState *bmds;
- int64_t sectors;
-
- block_mig_state.submitted = 0;
- block_mig_state.read_done = 0;
- block_mig_state.transferred = 0;
- block_mig_state.total_sector_sum = 0;
- block_mig_state.prev_progress = -1;
- block_mig_state.bulk_completed = 0;
- block_mig_state.zero_blocks = migrate_zero_blocks();
-
- for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
- if (bdrv_is_read_only(bs)) {
- continue;
- }
-
- sectors = bdrv_nb_sectors(bs);
- if (sectors <= 0) {
- return;
- }
-
- bmds = g_new0(BlkMigDevState, 1);
- bmds->bs = bs;
- bmds->bulk_completed = 0;
- bmds->total_sectors = sectors;
- bmds->completed_sectors = 0;
- bmds->shared_base = block_mig_state.shared_base;
- alloc_aio_bitmap(bmds);
- error_setg(&bmds->blocker, "block device is in use by migration");
- bdrv_op_block_all(bs, bmds->blocker);
- bdrv_ref(bs);
-
- block_mig_state.total_sector_sum += sectors;
-
- if (bmds->shared_base) {
- DPRINTF("Start migration for %s with shared base image\n",
- bdrv_get_device_name(bs));
- } else {
- DPRINTF("Start full migration for %s\n", bdrv_get_device_name(bs));
- }
-
- QSIMPLEQ_INSERT_TAIL(&block_mig_state.bmds_list, bmds, entry);
- }
-}
-
-/* Called with no lock taken. */
-
-static int blk_mig_save_bulked_block(QEMUFile *f)
-{
- int64_t completed_sector_sum = 0;
- BlkMigDevState *bmds;
- int progress;
- int ret = 0;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- if (bmds->bulk_completed == 0) {
- if (mig_save_device_bulk(f, bmds) == 1) {
- /* completed bulk section for this device */
- bmds->bulk_completed = 1;
- }
- completed_sector_sum += bmds->completed_sectors;
- ret = 1;
- break;
- } else {
- completed_sector_sum += bmds->completed_sectors;
- }
- }
-
- if (block_mig_state.total_sector_sum != 0) {
- progress = completed_sector_sum * 100 /
- block_mig_state.total_sector_sum;
- } else {
- progress = 100;
- }
- if (progress != block_mig_state.prev_progress) {
- block_mig_state.prev_progress = progress;
- qemu_put_be64(f, (progress << BDRV_SECTOR_BITS)
- | BLK_MIG_FLAG_PROGRESS);
- DPRINTF("Completed %d %%\r", progress);
- }
-
- return ret;
-}
-
-static void blk_mig_reset_dirty_cursor(void)
-{
- BlkMigDevState *bmds;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- bmds->cur_dirty = 0;
- }
-}
-
-/* Called with iothread lock taken. */
-
-static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds,
- int is_async)
-{
- BlkMigBlock *blk;
- int64_t total_sectors = bmds->total_sectors;
- int64_t sector;
- int nr_sectors;
- int ret = -EIO;
-
- for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) {
- blk_mig_lock();
- if (bmds_aio_inflight(bmds, sector)) {
- blk_mig_unlock();
- bdrv_drain_all();
- } else {
- blk_mig_unlock();
- }
- if (bdrv_get_dirty(bmds->bs, bmds->dirty_bitmap, sector)) {
-
- if (total_sectors - sector < BDRV_SECTORS_PER_DIRTY_CHUNK) {
- nr_sectors = total_sectors - sector;
- } else {
- nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
- }
- blk = g_new(BlkMigBlock, 1);
- blk->buf = g_malloc(BLOCK_SIZE);
- blk->bmds = bmds;
- blk->sector = sector;
- blk->nr_sectors = nr_sectors;
-
- if (is_async) {
- blk->iov.iov_base = blk->buf;
- blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE;
- qemu_iovec_init_external(&blk->qiov, &blk->iov, 1);
-
- blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov,
- nr_sectors, blk_mig_read_cb, blk);
-
- blk_mig_lock();
- block_mig_state.submitted++;
- bmds_set_aio_inflight(bmds, sector, nr_sectors, 1);
- blk_mig_unlock();
- } else {
- ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors);
- if (ret < 0) {
- goto error;
- }
- blk_send(f, blk);
-
- g_free(blk->buf);
- g_free(blk);
- }
-
- bdrv_reset_dirty(bmds->bs, sector, nr_sectors);
- break;
- }
- sector += BDRV_SECTORS_PER_DIRTY_CHUNK;
- bmds->cur_dirty = sector;
- }
-
- return (bmds->cur_dirty >= bmds->total_sectors);
-
-error:
- DPRINTF("Error reading sector %" PRId64 "\n", sector);
- g_free(blk->buf);
- g_free(blk);
- return ret;
-}
-
-/* Called with iothread lock taken.
- *
- * return value:
- * 0: too much data for max_downtime
- * 1: few enough data for max_downtime
-*/
-static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
-{
- BlkMigDevState *bmds;
- int ret = 1;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- ret = mig_save_device_dirty(f, bmds, is_async);
- if (ret <= 0) {
- break;
- }
- }
-
- return ret;
-}
-
-/* Called with no locks taken. */
-
-static int flush_blks(QEMUFile *f)
-{
- BlkMigBlock *blk;
- int ret = 0;
-
- DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
- __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
- block_mig_state.transferred);
-
- blk_mig_lock();
- while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
- if (qemu_file_rate_limit(f)) {
- break;
- }
- if (blk->ret < 0) {
- ret = blk->ret;
- break;
- }
-
- QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
- blk_mig_unlock();
- blk_send(f, blk);
- blk_mig_lock();
-
- g_free(blk->buf);
- g_free(blk);
-
- block_mig_state.read_done--;
- block_mig_state.transferred++;
- assert(block_mig_state.read_done >= 0);
- }
- blk_mig_unlock();
-
- DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
- block_mig_state.submitted, block_mig_state.read_done,
- block_mig_state.transferred);
- return ret;
-}
-
-/* Called with iothread lock taken. */
-
-static int64_t get_remaining_dirty(void)
-{
- BlkMigDevState *bmds;
- int64_t dirty = 0;
-
- QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
- dirty += bdrv_get_dirty_count(bmds->bs, bmds->dirty_bitmap);
- }
-
- return dirty << BDRV_SECTOR_BITS;
-}
-
-/* Called with iothread lock taken. */
-
-static void blk_mig_cleanup(void)
-{
- BlkMigDevState *bmds;
- BlkMigBlock *blk;
-
- bdrv_drain_all();
-
- unset_dirty_tracking();
-
- blk_mig_lock();
- while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
- QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry);
- bdrv_op_unblock_all(bmds->bs, bmds->blocker);
- error_free(bmds->blocker);
- bdrv_unref(bmds->bs);
- g_free(bmds->aio_bitmap);
- g_free(bmds);
- }
-
- while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) {
- QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry);
- g_free(blk->buf);
- g_free(blk);
- }
- blk_mig_unlock();
-}
-
-static void block_migration_cancel(void *opaque)
-{
- blk_mig_cleanup();
-}
-
-static int block_save_setup(QEMUFile *f, void *opaque)
-{
- int ret;
-
- DPRINTF("Enter save live setup submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
-
- qemu_mutex_lock_iothread();
- init_blk_migration(f);
-
- /* start track dirty blocks */
- ret = set_dirty_tracking();
-
- if (ret) {
- qemu_mutex_unlock_iothread();
- return ret;
- }
-
- qemu_mutex_unlock_iothread();
-
- ret = flush_blks(f);
- blk_mig_reset_dirty_cursor();
- qemu_put_be64(f, BLK_MIG_FLAG_EOS);
-
- return ret;
-}
-
-static int block_save_iterate(QEMUFile *f, void *opaque)
-{
- int ret;
- int64_t last_ftell = qemu_ftell(f);
-
- DPRINTF("Enter save live iterate submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
-
- ret = flush_blks(f);
- if (ret) {
- return ret;
- }
-
- blk_mig_reset_dirty_cursor();
-
- /* control the rate of transfer */
- blk_mig_lock();
- while ((block_mig_state.submitted +
- block_mig_state.read_done) * BLOCK_SIZE <
- qemu_file_get_rate_limit(f)) {
- blk_mig_unlock();
- if (block_mig_state.bulk_completed == 0) {
- /* first finish the bulk phase */
- if (blk_mig_save_bulked_block(f) == 0) {
- /* finished saving bulk on all devices */
- block_mig_state.bulk_completed = 1;
- }
- ret = 0;
- } else {
- /* Always called with iothread lock taken for
- * simplicity, block_save_complete also calls it.
- */
- qemu_mutex_lock_iothread();
- ret = blk_mig_save_dirty_block(f, 1);
- qemu_mutex_unlock_iothread();
- }
- if (ret < 0) {
- return ret;
- }
- blk_mig_lock();
- if (ret != 0) {
- /* no more dirty blocks */
- break;
- }
- }
- blk_mig_unlock();
-
- ret = flush_blks(f);
- if (ret) {
- return ret;
- }
-
- qemu_put_be64(f, BLK_MIG_FLAG_EOS);
- return qemu_ftell(f) - last_ftell;
-}
-
-/* Called with iothread lock taken. */
-
-static int block_save_complete(QEMUFile *f, void *opaque)
-{
- int ret;
-
- DPRINTF("Enter save live complete submitted %d transferred %d\n",
- block_mig_state.submitted, block_mig_state.transferred);
-
- ret = flush_blks(f);
- if (ret) {
- return ret;
- }
-
- blk_mig_reset_dirty_cursor();
-
- /* we know for sure that save bulk is completed and
- all async read completed */
- blk_mig_lock();
- assert(block_mig_state.submitted == 0);
- blk_mig_unlock();
-
- do {
- ret = blk_mig_save_dirty_block(f, 0);
- if (ret < 0) {
- return ret;
- }
- } while (ret == 0);
-
- /* report completion */
- qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
-
- DPRINTF("Block migration completed\n");
-
- qemu_put_be64(f, BLK_MIG_FLAG_EOS);
-
- blk_mig_cleanup();
- return 0;
-}
-
-static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size)
-{
- /* Estimate pending number of bytes to send */
- uint64_t pending;
-
- qemu_mutex_lock_iothread();
- blk_mig_lock();
- pending = get_remaining_dirty() +
- block_mig_state.submitted * BLOCK_SIZE +
- block_mig_state.read_done * BLOCK_SIZE;
-
- /* Report at least one block pending during bulk phase */
- if (pending == 0 && !block_mig_state.bulk_completed) {
- pending = BLOCK_SIZE;
- }
- blk_mig_unlock();
- qemu_mutex_unlock_iothread();
-
- DPRINTF("Enter save live pending %" PRIu64 "\n", pending);
- return pending;
-}
-
-static int block_load(QEMUFile *f, void *opaque, int version_id)
-{
- static int banner_printed;
- int len, flags;
- char device_name[256];
- int64_t addr;
- BlockDriverState *bs, *bs_prev = NULL;
- uint8_t *buf;
- int64_t total_sectors = 0;
- int nr_sectors;
- int ret;
-
- do {
- addr = qemu_get_be64(f);
-
- flags = addr & ~BDRV_SECTOR_MASK;
- addr >>= BDRV_SECTOR_BITS;
-
- if (flags & BLK_MIG_FLAG_DEVICE_BLOCK) {
- /* get device name */
- len = qemu_get_byte(f);
- qemu_get_buffer(f, (uint8_t *)device_name, len);
- device_name[len] = '\0';
-
- bs = bdrv_find(device_name);
- if (!bs) {
- fprintf(stderr, "Error unknown block device %s\n",
- device_name);
- return -EINVAL;
- }
-
- if (bs != bs_prev) {
- bs_prev = bs;
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors <= 0) {
- error_report("Error getting length of block device %s",
- device_name);
- return -EINVAL;
- }
- }
-
- if (total_sectors - addr < BDRV_SECTORS_PER_DIRTY_CHUNK) {
- nr_sectors = total_sectors - addr;
- } else {
- nr_sectors = BDRV_SECTORS_PER_DIRTY_CHUNK;
- }
-
- if (flags & BLK_MIG_FLAG_ZERO_BLOCK) {
- ret = bdrv_write_zeroes(bs, addr, nr_sectors,
- BDRV_REQ_MAY_UNMAP);
- } else {
- buf = g_malloc(BLOCK_SIZE);
- qemu_get_buffer(f, buf, BLOCK_SIZE);
- ret = bdrv_write(bs, addr, buf, nr_sectors);
- g_free(buf);
- }
-
- if (ret < 0) {
- return ret;
- }
- } else if (flags & BLK_MIG_FLAG_PROGRESS) {
- if (!banner_printed) {
- printf("Receiving block device images\n");
- banner_printed = 1;
- }
- printf("Completed %d %%%c", (int)addr,
- (addr == 100) ? '\n' : '\r');
- fflush(stdout);
- } else if (!(flags & BLK_MIG_FLAG_EOS)) {
- fprintf(stderr, "Unknown block migration flags: %#x\n", flags);
- return -EINVAL;
- }
- ret = qemu_file_get_error(f);
- if (ret != 0) {
- return ret;
- }
- } while (!(flags & BLK_MIG_FLAG_EOS));
-
- return 0;
-}
-
-static void block_set_params(const MigrationParams *params, void *opaque)
-{
- block_mig_state.blk_enable = params->blk;
- block_mig_state.shared_base = params->shared;
-
- /* shared base means that blk_enable = 1 */
- block_mig_state.blk_enable |= params->shared;
-}
-
-static bool block_is_active(void *opaque)
-{
- return block_mig_state.blk_enable == 1;
-}
-
-static SaveVMHandlers savevm_block_handlers = {
- .set_params = block_set_params,
- .save_live_setup = block_save_setup,
- .save_live_iterate = block_save_iterate,
- .save_live_complete = block_save_complete,
- .save_live_pending = block_save_pending,
- .load_state = block_load,
- .cancel = block_migration_cancel,
- .is_active = block_is_active,
-};
-
-void blk_mig_init(void)
-{
- QSIMPLEQ_INIT(&block_mig_state.bmds_list);
- QSIMPLEQ_INIT(&block_mig_state.blk_list);
- qemu_mutex_init(&block_mig_state.lock);
-
- register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers,
- &block_mig_state);
-}
diff --git a/block.c b/block.c
index 457afd4..8efa999 100644
--- a/block.c
+++ b/block.c
@@ -21,25 +21,27 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "config-host.h"
-#include "qemu-common.h"
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qjson.h"
#include "sysemu/block-backend.h"
#include "sysemu/sysemu.h"
#include "qemu/notify.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
#include "block/qapi.h"
#include "qmp-commands.h"
#include "qemu/timer.h"
#include "qapi-event.h"
+#include "qemu/cutils.h"
+#include "qemu/id.h"
#ifdef CONFIG_BSD
-#include <sys/types.h>
-#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/queue.h>
#ifndef __DragonFly__
@@ -51,52 +53,24 @@
#include <windows.h>
#endif
-struct BdrvDirtyBitmap {
- HBitmap *bitmap;
- QLIST_ENTRY(BdrvDirtyBitmap) list;
-};
-
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque);
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov);
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags);
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BdrvRequestFlags flags,
- BlockCompletionFunc *cb,
- void *opaque,
- bool is_write);
-static void coroutine_fn bdrv_co_do_rw(void *opaque);
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
-
-static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
- QTAILQ_HEAD_INITIALIZER(bdrv_states);
-
static QTAILQ_HEAD(, BlockDriverState) graph_bdrv_states =
QTAILQ_HEAD_INITIALIZER(graph_bdrv_states);
+static QTAILQ_HEAD(, BlockDriverState) all_bdrv_states =
+ QTAILQ_HEAD_INITIALIZER(all_bdrv_states);
+
static QLIST_HEAD(, BlockDriver) bdrv_drivers =
QLIST_HEAD_INITIALIZER(bdrv_drivers);
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+ const char *reference,
+ QDict *options, int flags,
+ BlockDriverState *parent,
+ const BdrvChildRole *child_role,
+ Error **errp);
+
/* If non-zero, use only whitelisted block drivers */
static int use_bdrv_whitelist;
@@ -120,116 +94,28 @@
}
#endif
-/* throttling disk I/O limits */
-void bdrv_set_io_limits(BlockDriverState *bs,
- ThrottleConfig *cfg)
-{
- int i;
-
- throttle_config(&bs->throttle_state, cfg);
-
- for (i = 0; i < 2; i++) {
- qemu_co_enter_next(&bs->throttled_reqs[i]);
- }
-}
-
-/* this function drain all the throttled IOs */
-static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
-{
- bool drained = false;
- bool enabled = bs->io_limits_enabled;
- int i;
-
- bs->io_limits_enabled = false;
-
- for (i = 0; i < 2; i++) {
- while (qemu_co_enter_next(&bs->throttled_reqs[i])) {
- drained = true;
- }
- }
-
- bs->io_limits_enabled = enabled;
-
- return drained;
-}
-
-void bdrv_io_limits_disable(BlockDriverState *bs)
-{
- bs->io_limits_enabled = false;
-
- bdrv_start_throttled_reqs(bs);
-
- throttle_destroy(&bs->throttle_state);
-}
-
-static void bdrv_throttle_read_timer_cb(void *opaque)
-{
- BlockDriverState *bs = opaque;
- qemu_co_enter_next(&bs->throttled_reqs[0]);
-}
-
-static void bdrv_throttle_write_timer_cb(void *opaque)
-{
- BlockDriverState *bs = opaque;
- qemu_co_enter_next(&bs->throttled_reqs[1]);
-}
-
-/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs)
-{
- assert(!bs->io_limits_enabled);
- throttle_init(&bs->throttle_state,
- bdrv_get_aio_context(bs),
- QEMU_CLOCK_VIRTUAL,
- bdrv_throttle_read_timer_cb,
- bdrv_throttle_write_timer_cb,
- bs);
- bs->io_limits_enabled = true;
-}
-
-/* This function makes an IO wait if needed
- *
- * @nb_sectors: the number of sectors of the IO
- * @is_write: is the IO a write
- */
-static void bdrv_io_limits_intercept(BlockDriverState *bs,
- unsigned int bytes,
- bool is_write)
-{
- /* does this io must wait */
- bool must_wait = throttle_schedule_timer(&bs->throttle_state, is_write);
-
- /* if must wait or any request of this type throttled queue the IO */
- if (must_wait ||
- !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
- qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
- }
-
- /* the IO will be executed, do the accounting */
- throttle_account(&bs->throttle_state, is_write, bytes);
-
-
- /* if the next request must wait -> do nothing */
- if (throttle_schedule_timer(&bs->throttle_state, is_write)) {
- return;
- }
-
- /* else queue next request for execution */
- qemu_co_queue_next(&bs->throttled_reqs[is_write]);
-}
-
size_t bdrv_opt_mem_align(BlockDriverState *bs)
{
if (!bs || !bs->drv) {
- /* 4k should be on the safe side */
- return 4096;
+ /* page size or 4k (hdd sector size) should be on the safe side */
+ return MAX(4096, getpagesize());
}
return bs->bl.opt_mem_alignment;
}
+size_t bdrv_min_mem_align(BlockDriverState *bs)
+{
+ if (!bs || !bs->drv) {
+ /* page size or 4k (hdd sector size) should be on the safe side */
+ return MAX(4096, getpagesize());
+ }
+
+ return bs->bl.min_mem_alignment;
+}
+
/* check if the path starts with "<protocol>:" */
-static int path_has_protocol(const char *path)
+int path_has_protocol(const char *path)
{
const char *p;
@@ -246,7 +132,7 @@
return *p == ':';
}
-static int path_check_absolute(const char *path)
+int path_is_absolute(const char *path)
{
#ifdef _WIN32
/* specific case for names like: "\\.\d:" */
@@ -271,7 +157,7 @@
if (dest_size <= 0)
return;
- if (path_check_absolute(filename)) {
+ if (path_is_absolute(filename)) {
pstrcpy(dest, dest_size, filename);
} else {
p = strchr(base_path, ':');
@@ -303,43 +189,37 @@
}
}
-void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz)
+void bdrv_get_full_backing_filename_from_filename(const char *backed,
+ const char *backing,
+ char *dest, size_t sz,
+ Error **errp)
{
- if (bs->backing_file[0] == '\0' || path_has_protocol(bs->backing_file)) {
- pstrcpy(dest, sz, bs->backing_file);
+ if (backing[0] == '\0' || path_has_protocol(backing) ||
+ path_is_absolute(backing))
+ {
+ pstrcpy(dest, sz, backing);
+ } else if (backed[0] == '\0' || strstart(backed, "json:", NULL)) {
+ error_setg(errp, "Cannot use relative backing file names for '%s'",
+ backed);
} else {
- path_combine(dest, sz, bs->filename, bs->backing_file);
+ path_combine(dest, sz, backed, backing);
}
}
+void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
+ Error **errp)
+{
+ char *backed = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
+
+ bdrv_get_full_backing_filename_from_filename(backed, bs->backing_file,
+ dest, sz, errp);
+}
+
void bdrv_register(BlockDriver *bdrv)
{
- /* Block drivers without coroutine functions need emulation */
- if (!bdrv->bdrv_co_readv) {
- bdrv->bdrv_co_readv = bdrv_co_readv_em;
- bdrv->bdrv_co_writev = bdrv_co_writev_em;
-
- /* bdrv_co_readv_em()/brdv_co_writev_em() work in terms of aio, so if
- * the block driver lacks aio we need to emulate that too.
- */
- if (!bdrv->bdrv_aio_readv) {
- /* add AIO emulation layer */
- bdrv->bdrv_aio_readv = bdrv_aio_readv_em;
- bdrv->bdrv_aio_writev = bdrv_aio_writev_em;
- }
- }
-
QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
}
-BlockDriverState *bdrv_new_root(void)
-{
- BlockDriverState *bs = bdrv_new();
-
- QTAILQ_INSERT_TAIL(&bdrv_states, bs, device_list);
- return bs;
-}
-
BlockDriverState *bdrv_new(void)
{
BlockDriverState *bs;
@@ -350,20 +230,15 @@
for (i = 0; i < BLOCK_OP_TYPE_MAX; i++) {
QLIST_INIT(&bs->op_blockers[i]);
}
- bdrv_iostatus_disable(bs);
- notifier_list_init(&bs->close_notifiers);
notifier_with_return_list_init(&bs->before_write_notifiers);
- qemu_co_queue_init(&bs->throttled_reqs[0]);
- qemu_co_queue_init(&bs->throttled_reqs[1]);
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
- return bs;
-}
+ qemu_co_queue_init(&bs->flush_queue);
-void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify)
-{
- notifier_list_add(&bs->close_notifiers, notify);
+ QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
+
+ return bs;
}
BlockDriver *bdrv_find_format(const char *format_name)
@@ -406,11 +281,9 @@
return 0;
}
-BlockDriver *bdrv_find_whitelisted_format(const char *format_name,
- bool read_only)
+bool bdrv_uses_whitelist(void)
{
- BlockDriver *drv = bdrv_find_format(format_name);
- return drv && bdrv_is_whitelisted(drv, read_only) ? drv : NULL;
+ return use_bdrv_whitelist;
}
typedef struct CreateCo {
@@ -430,9 +303,7 @@
assert(cco->drv);
ret = cco->drv->bdrv_create(cco->filename, cco->opts, &local_err);
- if (local_err) {
- error_propagate(&cco->err, local_err);
- }
+ error_propagate(&cco->err, local_err);
cco->ret = ret;
}
@@ -460,8 +331,8 @@
/* Fast-path if already in coroutine context */
bdrv_create_co_entry(&cco);
} else {
- co = qemu_coroutine_create(bdrv_create_co_entry);
- qemu_coroutine_enter(co, &cco);
+ co = qemu_coroutine_create(bdrv_create_co_entry, &cco);
+ qemu_coroutine_enter(co);
while (cco.ret == NOT_DONE) {
aio_poll(qemu_get_aio_context(), true);
}
@@ -487,65 +358,48 @@
Error *local_err = NULL;
int ret;
- drv = bdrv_find_protocol(filename, true);
+ drv = bdrv_find_protocol(filename, true, errp);
if (drv == NULL) {
- error_setg(errp, "Could not find protocol for file '%s'", filename);
return -ENOENT;
}
ret = bdrv_create(drv, filename, opts, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+/**
+ * Try to get @bs's logical and physical block size.
+ * On success, store them in @bsz struct and return 0.
+ * On failure return -errno.
+ * @bs must not be empty.
+ */
+int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
BlockDriver *drv = bs->drv;
- Error *local_err = NULL;
- memset(&bs->bl, 0, sizeof(bs->bl));
-
- if (!drv) {
- return;
+ if (drv && drv->bdrv_probe_blocksizes) {
+ return drv->bdrv_probe_blocksizes(bs, bsz);
}
- /* Take some limits from the children as a default */
- if (bs->file) {
- bdrv_refresh_limits(bs->file, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
- bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length;
- bs->bl.max_transfer_length = bs->file->bl.max_transfer_length;
- bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment;
- } else {
- bs->bl.opt_mem_alignment = 512;
+ return -ENOTSUP;
+}
+
+/**
+ * Try to get @bs's geometry (cyls, heads, sectors).
+ * On success, store them in @geo struct and return 0.
+ * On failure return -errno.
+ * @bs must not be empty.
+ */
+int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (drv && drv->bdrv_probe_geometry) {
+ return drv->bdrv_probe_geometry(bs, geo);
}
- if (bs->backing_hd) {
- bdrv_refresh_limits(bs->backing_hd, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
- bs->bl.opt_transfer_length =
- MAX(bs->bl.opt_transfer_length,
- bs->backing_hd->bl.opt_transfer_length);
- bs->bl.max_transfer_length =
- MIN_NON_ZERO(bs->bl.max_transfer_length,
- bs->backing_hd->bl.max_transfer_length);
- bs->bl.opt_mem_alignment =
- MAX(bs->bl.opt_mem_alignment,
- bs->backing_hd->bl.opt_mem_alignment);
- }
-
- /* Then let the driver override it */
- if (drv->bdrv_refresh_limits) {
- drv->bdrv_refresh_limits(bs, errp);
- }
+ return -ENOTSUP;
}
/*
@@ -607,7 +461,8 @@
}
BlockDriver *bdrv_find_protocol(const char *filename,
- bool allow_protocol_prefix)
+ bool allow_protocol_prefix,
+ Error **errp)
{
BlockDriver *drv1;
char protocol[128];
@@ -629,7 +484,7 @@
}
if (!path_has_protocol(filename) || !allow_protocol_prefix) {
- return bdrv_find_format("file");
+ return &bdrv_file;
}
p = strchr(filename, ':');
@@ -645,29 +500,59 @@
return drv1;
}
}
+
+ error_setg(errp, "Unknown protocol '%s'", protocol);
return NULL;
}
-static int find_image_format(BlockDriverState *bs, const char *filename,
+/*
+ * Guess image format by probing its contents.
+ * This is not a good idea when your image is raw (CVE-2008-2004), but
+ * we do it anyway for backward compatibility.
+ *
+ * @buf contains the image's first @buf_size bytes.
+ * @buf_size is the buffer size in bytes (generally BLOCK_PROBE_BUF_SIZE,
+ * but can be smaller if the image file is smaller)
+ * @filename is its filename.
+ *
+ * For all block drivers, call the bdrv_probe() method to get its
+ * probing score.
+ * Return the first block driver with the highest probing score.
+ */
+BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
+ const char *filename)
+{
+ int score_max = 0, score;
+ BlockDriver *drv = NULL, *d;
+
+ QLIST_FOREACH(d, &bdrv_drivers, list) {
+ if (d->bdrv_probe) {
+ score = d->bdrv_probe(buf, buf_size, filename);
+ if (score > score_max) {
+ score_max = score;
+ drv = d;
+ }
+ }
+ }
+
+ return drv;
+}
+
+static int find_image_format(BdrvChild *file, const char *filename,
BlockDriver **pdrv, Error **errp)
{
- int score, score_max;
- BlockDriver *drv1, *drv;
- uint8_t buf[2048];
+ BlockDriverState *bs = file->bs;
+ BlockDriver *drv;
+ uint8_t buf[BLOCK_PROBE_BUF_SIZE];
int ret = 0;
/* Return the raw BlockDriver * to scsi-generic devices or empty drives */
- if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
- drv = bdrv_find_format("raw");
- if (!drv) {
- error_setg(errp, "Could not find raw image format");
- ret = -ENOENT;
- }
- *pdrv = drv;
+ if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) {
+ *pdrv = &bdrv_raw;
return ret;
}
- ret = bdrv_pread(bs, 0, buf, sizeof(buf));
+ ret = bdrv_pread(file, 0, buf, sizeof(buf));
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not read image for determining its "
"format");
@@ -675,17 +560,7 @@
return ret;
}
- score_max = 0;
- drv = NULL;
- QLIST_FOREACH(drv1, &bdrv_drivers, list) {
- if (drv1->bdrv_probe) {
- score = drv1->bdrv_probe(buf, ret, filename);
- if (score > score_max) {
- score_max = score;
- drv = drv1;
- }
- }
- }
+ drv = bdrv_probe_all(buf, ret, filename);
if (!drv) {
error_setg(errp, "Could not determine image format: No compatible "
"driver found");
@@ -704,7 +579,7 @@
BlockDriver *drv = bs->drv;
/* Do not attempt drv->bdrv_getlength() on scsi-generic devices */
- if (bs->sg)
+ if (bdrv_is_sg(bs))
return 0;
/* query actual device if possible, otherwise just trust the hint */
@@ -721,6 +596,20 @@
}
/**
+ * Combines a QDict of new block driver @options with any missing options taken
+ * from @old_options, so that leaving out an option defaults to its old value.
+ */
+static void bdrv_join_options(BlockDriverState *bs, QDict *options,
+ QDict *old_options)
+{
+ if (bs->drv && bs->drv->bdrv_join_options) {
+ bs->drv->bdrv_join_options(options, old_options);
+ } else {
+ qdict_join(options, old_options, false);
+ }
+}
+
+/**
* Set open flags for a given discard mode
*
* Return 0 on success, -1 if the discard mode was invalid.
@@ -745,21 +634,23 @@
*
* Return 0 on success, -1 if the cache mode was invalid.
*/
-int bdrv_parse_cache_flags(const char *mode, int *flags)
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
{
*flags &= ~BDRV_O_CACHE_MASK;
if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
- *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
+ *writethrough = false;
+ *flags |= BDRV_O_NOCACHE;
} else if (!strcmp(mode, "directsync")) {
+ *writethrough = true;
*flags |= BDRV_O_NOCACHE;
} else if (!strcmp(mode, "writeback")) {
- *flags |= BDRV_O_CACHE_WB;
+ *writethrough = false;
} else if (!strcmp(mode, "unsafe")) {
- *flags |= BDRV_O_CACHE_WB;
+ *writethrough = false;
*flags |= BDRV_O_NO_FLUSH;
} else if (!strcmp(mode, "writethrough")) {
- /* this is the default */
+ *writethrough = true;
} else {
return -1;
}
@@ -767,69 +658,124 @@
return 0;
}
-/**
- * The copy-on-read flag is actually a reference count so multiple users may
- * use the feature without worrying about clobbering its previous state.
- * Copy-on-read stays enabled until all users have called to disable it.
- */
-void bdrv_enable_copy_on_read(BlockDriverState *bs)
+static void bdrv_child_cb_drained_begin(BdrvChild *child)
{
- bs->copy_on_read++;
+ BlockDriverState *bs = child->opaque;
+ bdrv_drained_begin(bs);
}
-void bdrv_disable_copy_on_read(BlockDriverState *bs)
+static void bdrv_child_cb_drained_end(BdrvChild *child)
{
- assert(bs->copy_on_read > 0);
- bs->copy_on_read--;
+ BlockDriverState *bs = child->opaque;
+ bdrv_drained_end(bs);
}
/*
- * Returns the flags that a temporary snapshot should get, based on the
- * originally requested flags (the originally requested image will have flags
- * like a backing file)
+ * Returns the options and flags that a temporary snapshot should get, based on
+ * the originally requested flags (the originally requested image will have
+ * flags like a backing file)
*/
-static int bdrv_temp_snapshot_flags(int flags)
+static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
{
- return (flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+ *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
+
+ /* For temporary files, unconditional cache=unsafe is fine */
+ qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
+ qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
+
+ /* aio=native doesn't work for cache.direct=off, so disable it for the
+ * temporary snapshot */
+ *child_flags &= ~BDRV_O_NATIVE_AIO;
}
/*
- * Returns the flags that bs->file should get, based on the given flags for
- * the parent BDS
+ * Returns the options and flags that bs->file should get if a protocol driver
+ * is expected, based on the given options and flags for the parent BDS
*/
-static int bdrv_inherited_flags(int flags)
+static void bdrv_inherited_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
{
+ int flags = parent_flags;
+
/* Enable protocol handling, disable format probing for bs->file */
flags |= BDRV_O_PROTOCOL;
+ /* If the cache mode isn't explicitly set, inherit direct and no-flush from
+ * the parent. */
+ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
+ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+
/* Our block drivers take care to send flushes and respect unmap policy,
- * so we can enable both unconditionally on lower layers. */
- flags |= BDRV_O_CACHE_WB | BDRV_O_UNMAP;
+ * so we can default to enable both on lower layers regardless of the
+ * corresponding parent options. */
+ flags |= BDRV_O_UNMAP;
/* Clear flags that only apply to the top layer */
- flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
+ flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
+ BDRV_O_NO_IO);
- return flags;
+ *child_flags = flags;
}
+const BdrvChildRole child_file = {
+ .inherit_options = bdrv_inherited_options,
+ .drained_begin = bdrv_child_cb_drained_begin,
+ .drained_end = bdrv_child_cb_drained_end,
+};
+
/*
- * Returns the flags that bs->backing_hd should get, based on the given flags
- * for the parent BDS
+ * Returns the options and flags that bs->file should get if the use of formats
+ * (and not only protocols) is permitted for it, based on the given options and
+ * flags for the parent BDS
*/
-static int bdrv_backing_flags(int flags)
+static void bdrv_inherited_fmt_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
{
+ child_file.inherit_options(child_flags, child_options,
+ parent_flags, parent_options);
+
+ *child_flags &= ~(BDRV_O_PROTOCOL | BDRV_O_NO_IO);
+}
+
+const BdrvChildRole child_format = {
+ .inherit_options = bdrv_inherited_fmt_options,
+ .drained_begin = bdrv_child_cb_drained_begin,
+ .drained_end = bdrv_child_cb_drained_end,
+};
+
+/*
+ * Returns the options and flags that bs->backing should get, based on the
+ * given options and flags for the parent BDS
+ */
+static void bdrv_backing_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
+{
+ int flags = parent_flags;
+
+ /* The cache mode is inherited unmodified for backing files; except WCE,
+ * which is only applied on the top level (BlockBackend) */
+ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
+ qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
+
/* backing files always opened read-only */
flags &= ~(BDRV_O_RDWR | BDRV_O_COPY_ON_READ);
/* snapshot=on is handled on the top layer */
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_TEMPORARY);
- return flags;
+ *child_flags = flags;
}
+static const BdrvChildRole child_backing = {
+ .inherit_options = bdrv_backing_options,
+ .drained_begin = bdrv_child_cb_drained_begin,
+ .drained_end = bdrv_child_cb_drained_end,
+};
+
static int bdrv_open_flags(BlockDriverState *bs, int flags)
{
- int open_flags = flags | BDRV_O_CACHE_WB;
+ int open_flags = flags;
/*
* Clear flags that are internal to the block layer before opening the
@@ -847,16 +793,46 @@
return open_flags;
}
+static void update_flags_from_options(int *flags, QemuOpts *opts)
+{
+ *flags &= ~BDRV_O_CACHE_MASK;
+
+ assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
+ if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
+ *flags |= BDRV_O_NO_FLUSH;
+ }
+
+ assert(qemu_opt_find(opts, BDRV_OPT_CACHE_DIRECT));
+ if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_DIRECT, false)) {
+ *flags |= BDRV_O_NOCACHE;
+ }
+}
+
+static void update_options_from_flags(QDict *options, int flags)
+{
+ if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
+ qdict_put(options, BDRV_OPT_CACHE_DIRECT,
+ qbool_from_bool(flags & BDRV_O_NOCACHE));
+ }
+ if (!qdict_haskey(options, BDRV_OPT_CACHE_NO_FLUSH)) {
+ qdict_put(options, BDRV_OPT_CACHE_NO_FLUSH,
+ qbool_from_bool(flags & BDRV_O_NO_FLUSH));
+ }
+}
+
static void bdrv_assign_node_name(BlockDriverState *bs,
const char *node_name,
Error **errp)
{
- if (!node_name) {
- return;
- }
+ char *gen_node_name = NULL;
- /* Check for empty string or invalid characters */
- if (!id_wellformed(node_name)) {
+ if (!node_name) {
+ node_name = gen_node_name = id_generate(ID_BLOCK);
+ } else if (!id_wellformed(node_name)) {
+ /*
+ * Check for empty string or invalid characters, but not if it is
+ * generated (generated names use characters not available to the user)
+ */
error_setg(errp, "Invalid node name");
return;
}
@@ -865,39 +841,83 @@
if (blk_by_name(node_name)) {
error_setg(errp, "node-name=%s is conflicting with a device id",
node_name);
- return;
+ goto out;
}
/* takes care of avoiding duplicates node names */
if (bdrv_find_node(node_name)) {
error_setg(errp, "Duplicate node name");
- return;
+ goto out;
}
/* copy node name into the bs and insert it into the graph list */
pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
+out:
+ g_free(gen_node_name);
}
+static QemuOptsList bdrv_runtime_opts = {
+ .name = "bdrv_common",
+ .head = QTAILQ_HEAD_INITIALIZER(bdrv_runtime_opts.head),
+ .desc = {
+ {
+ .name = "node-name",
+ .type = QEMU_OPT_STRING,
+ .help = "Node name of the block device node",
+ },
+ {
+ .name = "driver",
+ .type = QEMU_OPT_STRING,
+ .help = "Block driver to use for the node",
+ },
+ {
+ .name = BDRV_OPT_CACHE_DIRECT,
+ .type = QEMU_OPT_BOOL,
+ .help = "Bypass software writeback cache on the host",
+ },
+ {
+ .name = BDRV_OPT_CACHE_NO_FLUSH,
+ .type = QEMU_OPT_BOOL,
+ .help = "Ignore flush requests",
+ },
+ { /* end of list */ }
+ },
+};
+
/*
* Common part for opening disk images and files
*
* Removes all processed options from *options.
*/
-static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file,
- QDict *options, int flags, BlockDriver *drv, Error **errp)
+static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
+ QDict *options, Error **errp)
{
int ret, open_flags;
const char *filename;
+ const char *driver_name = NULL;
const char *node_name = NULL;
+ QemuOpts *opts;
+ BlockDriver *drv;
Error *local_err = NULL;
- assert(drv != NULL);
assert(bs->file == NULL);
assert(options != NULL && bs->options != options);
+ opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail_opts;
+ }
+
+ driver_name = qemu_opt_get(opts, "driver");
+ drv = bdrv_find_format(driver_name);
+ assert(drv != NULL);
+
if (file != NULL) {
- filename = file->filename;
+ filename = file->bs->filename;
} else {
filename = qdict_get_try_str(options, "filename");
}
@@ -905,34 +925,22 @@
if (drv->bdrv_needs_filename && !filename) {
error_setg(errp, "The '%s' block driver requires a file name",
drv->format_name);
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail_opts;
}
- trace_bdrv_open_common(bs, filename ?: "", flags, drv->format_name);
+ trace_bdrv_open_common(bs, filename ?: "", bs->open_flags,
+ drv->format_name);
- node_name = qdict_get_try_str(options, "node-name");
+ node_name = qemu_opt_get(opts, "node-name");
bdrv_assign_node_name(bs, node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return -EINVAL;
- }
- qdict_del(options, "node-name");
-
- /* bdrv_open() with directly using a protocol as drv. This layer is already
- * opened, so assign it to bs (while file becomes a closed BlockDriverState)
- * and return immediately. */
- if (file != NULL && drv->bdrv_file_open) {
- bdrv_swap(file, bs);
- return 0;
+ ret = -EINVAL;
+ goto fail_opts;
}
- bs->open_flags = flags;
- bs->guest_block_size = 512;
- bs->request_alignment = 512;
- bs->zero_beyond_eof = true;
- open_flags = bdrv_open_flags(bs, flags);
- bs->read_only = !(open_flags & BDRV_O_RDWR);
- bs->growable = !!(flags & BDRV_O_PROTOCOL);
+ bs->read_only = !(bs->open_flags & BDRV_O_RDWR);
if (use_bdrv_whitelist && !bdrv_is_whitelisted(drv, bs->read_only)) {
error_setg(errp,
@@ -940,16 +948,18 @@
? "Driver '%s' can only be used for read-only devices"
: "Driver '%s' is not whitelisted",
drv->format_name);
- return -ENOTSUP;
+ ret = -ENOTSUP;
+ goto fail_opts;
}
assert(bs->copy_on_read == 0); /* bdrv_new() and bdrv_close() make it so */
- if (flags & BDRV_O_COPY_ON_READ) {
+ if (bs->open_flags & BDRV_O_COPY_ON_READ) {
if (!bs->read_only) {
bdrv_enable_copy_on_read(bs);
} else {
error_setg(errp, "Can't use copy-on-read on read-only device");
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail_opts;
}
}
@@ -963,9 +973,11 @@
bs->drv = drv;
bs->opaque = g_malloc0(drv->instance_size);
- bs->enable_write_cache = !!(flags & BDRV_O_CACHE_WB);
+ /* Apply cache mode options */
+ update_flags_from_options(&bs->open_flags, opts);
/* Open the image, either directly or using a protocol */
+ open_flags = bdrv_open_flags(bs, bs->open_flags);
if (drv->bdrv_file_open) {
assert(file == NULL);
assert(!drv->bdrv_needs_filename || filename != NULL);
@@ -1006,7 +1018,10 @@
}
assert(bdrv_opt_mem_align(bs) != 0);
- assert((bs->request_alignment != 0) || bs->sg);
+ assert(bdrv_min_mem_align(bs) != 0);
+ assert(is_power_of_2(bs->bl.request_alignment));
+
+ qemu_opts_del(opts);
return 0;
free_and_fail:
@@ -1014,6 +1029,8 @@
g_free(bs->opaque);
bs->opaque = NULL;
bs->drv = NULL;
+fail_opts:
+ qemu_opts_del(opts);
return ret;
}
@@ -1045,34 +1062,66 @@
return options;
}
-/*
- * Fills in default options for opening images and converts the legacy
- * filename/flags pair to option QDict entries.
- */
-static int bdrv_fill_options(QDict **options, const char **pfilename, int flags,
- BlockDriver *drv, Error **errp)
+static void parse_json_protocol(QDict *options, const char **pfilename,
+ Error **errp)
{
- const char *filename = *pfilename;
- const char *drvname;
- bool protocol = flags & BDRV_O_PROTOCOL;
- bool parse_filename = false;
+ QDict *json_options;
Error *local_err = NULL;
/* Parse json: pseudo-protocol */
- if (filename && g_str_has_prefix(filename, "json:")) {
- QDict *json_options = parse_json_filename(filename, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return -EINVAL;
- }
-
- /* Options given in the filename have lower priority than options
- * specified directly */
- qdict_join(*options, json_options, false);
- QDECREF(json_options);
- *pfilename = filename = NULL;
+ if (!*pfilename || !g_str_has_prefix(*pfilename, "json:")) {
+ return;
}
+ json_options = parse_json_filename(*pfilename, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ /* Options given in the filename have lower priority than options
+ * specified directly */
+ qdict_join(options, json_options, false);
+ QDECREF(json_options);
+ *pfilename = NULL;
+}
+
+/*
+ * Fills in default options for opening images and converts the legacy
+ * filename/flags pair to option QDict entries.
+ * The BDRV_O_PROTOCOL flag in *flags will be set or cleared accordingly if a
+ * block driver has been specified explicitly.
+ */
+static int bdrv_fill_options(QDict **options, const char *filename,
+ int *flags, Error **errp)
+{
+ const char *drvname;
+ bool protocol = *flags & BDRV_O_PROTOCOL;
+ bool parse_filename = false;
+ BlockDriver *drv = NULL;
+ Error *local_err = NULL;
+
+ drvname = qdict_get_try_str(*options, "driver");
+ if (drvname) {
+ drv = bdrv_find_format(drvname);
+ if (!drv) {
+ error_setg(errp, "Unknown driver '%s'", drvname);
+ return -ENOENT;
+ }
+ /* If the user has explicitly specified the driver, this choice should
+ * override the BDRV_O_PROTOCOL flag */
+ protocol = drv->bdrv_file_open;
+ }
+
+ if (protocol) {
+ *flags |= BDRV_O_PROTOCOL;
+ } else {
+ *flags &= ~BDRV_O_PROTOCOL;
+ }
+
+ /* Translate cache options from flags into options */
+ update_options_from_flags(*options, *flags);
+
/* Fetch the file name from the options QDict if necessary */
if (protocol && filename) {
if (!qdict_haskey(*options, "filename")) {
@@ -1087,36 +1136,19 @@
/* Find the right block driver */
filename = qdict_get_try_str(*options, "filename");
- drvname = qdict_get_try_str(*options, "driver");
- if (drv) {
- if (drvname) {
- error_setg(errp, "Driver specified twice");
- return -EINVAL;
- }
- drvname = drv->format_name;
- qdict_put(*options, "driver", qstring_from_str(drvname));
- } else {
- if (!drvname && protocol) {
- if (filename) {
- drv = bdrv_find_protocol(filename, parse_filename);
- if (!drv) {
- error_setg(errp, "Unknown protocol");
- return -EINVAL;
- }
-
- drvname = drv->format_name;
- qdict_put(*options, "driver", qstring_from_str(drvname));
- } else {
- error_setg(errp, "Must specify either driver or file");
+ if (!drvname && protocol) {
+ if (filename) {
+ drv = bdrv_find_protocol(filename, parse_filename, errp);
+ if (!drv) {
return -EINVAL;
}
- } else if (drvname) {
- drv = bdrv_find_format(drvname);
- if (!drv) {
- error_setg(errp, "Unknown driver '%s'", drvname);
- return -ENOENT;
- }
+
+ drvname = drv->format_name;
+ qdict_put(*options, "driver", qstring_from_str(drvname));
+ } else {
+ error_setg(errp, "Must specify either driver or file");
+ return -EINVAL;
}
}
@@ -1138,32 +1170,147 @@
return 0;
}
-void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
{
+ BlockDriverState *old_bs = child->bs;
- if (bs->backing_hd) {
- assert(bs->backing_blocker);
- bdrv_op_unblock_all(bs->backing_hd, bs->backing_blocker);
- } else if (backing_hd) {
- error_setg(&bs->backing_blocker,
- "device is used as backing hd of '%s'",
- bdrv_get_device_name(bs));
+ if (old_bs) {
+ if (old_bs->quiesce_counter && child->role->drained_end) {
+ child->role->drained_end(child);
+ }
+ QLIST_REMOVE(child, next_parent);
}
- bs->backing_hd = backing_hd;
+ child->bs = new_bs;
+
+ if (new_bs) {
+ QLIST_INSERT_HEAD(&new_bs->parents, child, next_parent);
+ if (new_bs->quiesce_counter && child->role->drained_begin) {
+ child->role->drained_begin(child);
+ }
+ }
+}
+
+BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildRole *child_role,
+ void *opaque)
+{
+ BdrvChild *child = g_new(BdrvChild, 1);
+ *child = (BdrvChild) {
+ .bs = NULL,
+ .name = g_strdup(child_name),
+ .role = child_role,
+ .opaque = opaque,
+ };
+
+ bdrv_replace_child(child, child_bs);
+
+ return child;
+}
+
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+ BlockDriverState *child_bs,
+ const char *child_name,
+ const BdrvChildRole *child_role)
+{
+ BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role,
+ parent_bs);
+ QLIST_INSERT_HEAD(&parent_bs->children, child, next);
+ return child;
+}
+
+static void bdrv_detach_child(BdrvChild *child)
+{
+ if (child->next.le_prev) {
+ QLIST_REMOVE(child, next);
+ child->next.le_prev = NULL;
+ }
+
+ bdrv_replace_child(child, NULL);
+
+ g_free(child->name);
+ g_free(child);
+}
+
+void bdrv_root_unref_child(BdrvChild *child)
+{
+ BlockDriverState *child_bs;
+
+ child_bs = child->bs;
+ bdrv_detach_child(child);
+ bdrv_unref(child_bs);
+}
+
+void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
+{
+ if (child == NULL) {
+ return;
+ }
+
+ if (child->bs->inherits_from == parent) {
+ child->bs->inherits_from = NULL;
+ }
+
+ bdrv_root_unref_child(child);
+}
+
+
+static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
+{
+ BdrvChild *c;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->change_media) {
+ c->role->change_media(c, load);
+ }
+ }
+}
+
+static void bdrv_parent_cb_resize(BlockDriverState *bs)
+{
+ BdrvChild *c;
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->resize) {
+ c->role->resize(c);
+ }
+ }
+}
+
+/*
+ * Sets the backing file link of a BDS. A new reference is created; callers
+ * which don't need their own reference any more must call bdrv_unref().
+ */
+void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd)
+{
+ if (backing_hd) {
+ bdrv_ref(backing_hd);
+ }
+
+ if (bs->backing) {
+ assert(bs->backing_blocker);
+ bdrv_op_unblock_all(bs->backing->bs, bs->backing_blocker);
+ bdrv_unref_child(bs, bs->backing);
+ } else if (backing_hd) {
+ error_setg(&bs->backing_blocker,
+ "node is used as backing hd of '%s'",
+ bdrv_get_device_or_node_name(bs));
+ }
+
if (!backing_hd) {
error_free(bs->backing_blocker);
bs->backing_blocker = NULL;
+ bs->backing = NULL;
goto out;
}
+ bs->backing = bdrv_attach_child(bs, backing_hd, "backing", &child_backing);
bs->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_hd->filename);
pstrcpy(bs->backing_format, sizeof(bs->backing_format),
backing_hd->drv ? backing_hd->drv->format_name : "");
- bdrv_op_block_all(bs->backing_hd, bs->backing_blocker);
+ bdrv_op_block_all(backing_hd, bs->backing_blocker);
/* Otherwise we won't be able to commit due to check in bdrv_commit */
- bdrv_op_unblock(bs->backing_hd, BLOCK_OP_TYPE_COMMIT,
+ bdrv_op_unblock(backing_hd, BLOCK_OP_TYPE_COMMIT_TARGET,
bs->backing_blocker);
out:
bdrv_refresh_limits(bs, NULL);
@@ -1172,37 +1319,56 @@
/*
* Opens the backing file for a BlockDriverState if not yet open
*
- * options is a QDict of options to pass to the block drivers, or NULL for an
- * empty set of options. The reference to the QDict is transferred to this
- * function (even on failure), so if the caller intends to reuse the dictionary,
- * it needs to use QINCREF() before calling bdrv_file_open.
+ * bdref_key specifies the key for the image's BlockdevRef in the options QDict.
+ * That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
+ * itself, all options starting with "${bdref_key}." are considered part of the
+ * BlockdevRef.
+ *
+ * TODO Can this be unified with bdrv_open_image()?
*/
-int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp)
+int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
+ const char *bdref_key, Error **errp)
{
char *backing_filename = g_malloc0(PATH_MAX);
+ char *bdref_key_dot;
+ const char *reference = NULL;
int ret = 0;
- BlockDriver *back_drv = NULL;
BlockDriverState *backing_hd;
+ QDict *options;
+ QDict *tmp_parent_options = NULL;
Error *local_err = NULL;
- if (bs->backing_hd != NULL) {
- QDECREF(options);
+ if (bs->backing != NULL) {
goto free_exit;
}
/* NULL means an empty set of options */
- if (options == NULL) {
- options = qdict_new();
+ if (parent_options == NULL) {
+ tmp_parent_options = qdict_new();
+ parent_options = tmp_parent_options;
}
bs->open_flags &= ~BDRV_O_NO_BACKING;
- if (qdict_haskey(options, "file.filename")) {
+
+ bdref_key_dot = g_strdup_printf("%s.", bdref_key);
+ qdict_extract_subqdict(parent_options, &options, bdref_key_dot);
+ g_free(bdref_key_dot);
+
+ reference = qdict_get_try_str(parent_options, bdref_key);
+ if (reference || qdict_haskey(options, "file.filename")) {
backing_filename[0] = '\0';
} else if (bs->backing_file[0] == '\0' && qdict_size(options) == 0) {
QDECREF(options);
goto free_exit;
} else {
- bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX);
+ bdrv_get_full_backing_filename(bs, backing_filename, PATH_MAX,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ QDECREF(options);
+ goto free_exit;
+ }
}
if (!bs->drv || !bs->drv->supports_backing) {
@@ -1212,29 +1378,30 @@
goto free_exit;
}
- backing_hd = bdrv_new();
-
- if (bs->backing_format[0] != '\0') {
- back_drv = bdrv_find_format(bs->backing_format);
+ if (bs->backing_format[0] != '\0' && !qdict_haskey(options, "driver")) {
+ qdict_put(options, "driver", qstring_from_str(bs->backing_format));
}
- assert(bs->backing_hd == NULL);
- ret = bdrv_open(&backing_hd,
- *backing_filename ? backing_filename : NULL, NULL, options,
- bdrv_backing_flags(bs->open_flags), back_drv, &local_err);
- if (ret < 0) {
- bdrv_unref(backing_hd);
- backing_hd = NULL;
+ backing_hd = bdrv_open_inherit(*backing_filename ? backing_filename : NULL,
+ reference, options, 0, bs, &child_backing,
+ errp);
+ if (!backing_hd) {
bs->open_flags |= BDRV_O_NO_BACKING;
- error_setg(errp, "Could not open backing file: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_prepend(errp, "Could not open backing file: ");
+ ret = -EINVAL;
goto free_exit;
}
+
+ /* Hook up the backing file link; drop our reference, bs owns the
+ * backing_hd reference now */
bdrv_set_backing_hd(bs, backing_hd);
+ bdrv_unref(backing_hd);
+
+ qdict_del(parent_options, bdref_key);
free_exit:
g_free(backing_filename);
+ QDECREF(tmp_parent_options);
return ret;
}
@@ -1243,7 +1410,7 @@
* device's options.
*
* If allow_none is true, no image will be opened if filename is false and no
- * BlockdevRef is given. *pbs will remain unchanged and 0 will be returned.
+ * BlockdevRef is given. NULL will be returned, but errp remains unset.
*
* bdrev_key specifies the key for the image's BlockdevRef in the options QDict.
* That QDict has to be flattened; therefore, if the BlockdevRef is a QDict
@@ -1251,20 +1418,20 @@
* BlockdevRef.
*
* The BlockdevRef will be removed from the options QDict.
- *
- * To conform with the behavior of bdrv_open(), *pbs has to be NULL.
*/
-int bdrv_open_image(BlockDriverState **pbs, const char *filename,
- QDict *options, const char *bdref_key, int flags,
- bool allow_none, Error **errp)
+BdrvChild *bdrv_open_child(const char *filename,
+ QDict *options, const char *bdref_key,
+ BlockDriverState* parent,
+ const BdrvChildRole *child_role,
+ bool allow_none, Error **errp)
{
+ BdrvChild *c = NULL;
+ BlockDriverState *bs;
QDict *image_options;
- int ret;
char *bdref_key_dot;
const char *reference;
- assert(pbs);
- assert(*pbs == NULL);
+ assert(child_role != NULL);
bdref_key_dot = g_strdup_printf("%s.", bdref_key);
qdict_extract_subqdict(options, &image_options, bdref_key_dot);
@@ -1272,34 +1439,37 @@
reference = qdict_get_try_str(options, bdref_key);
if (!filename && !reference && !qdict_size(image_options)) {
- if (allow_none) {
- ret = 0;
- } else {
+ if (!allow_none) {
error_setg(errp, "A block device must be specified for \"%s\"",
bdref_key);
- ret = -EINVAL;
}
QDECREF(image_options);
goto done;
}
- ret = bdrv_open(pbs, filename, reference, image_options, flags, NULL, errp);
+ bs = bdrv_open_inherit(filename, reference, image_options, 0,
+ parent, child_role, errp);
+ if (!bs) {
+ goto done;
+ }
+
+ c = bdrv_attach_child(parent, bs, bdref_key, child_role);
done:
qdict_del(options, bdref_key);
- return ret;
+ return c;
}
-int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp)
+static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
+ int flags,
+ QDict *snapshot_options,
+ Error **errp)
{
/* TODO: extra byte is a hack to ensure MAX_PATH space on Windows. */
char *tmp_filename = g_malloc0(PATH_MAX + 1);
int64_t total_size;
- BlockDriver *bdrv_qcow2;
QemuOpts *opts = NULL;
- QDict *snapshot_options;
BlockDriverState *bs_snapshot;
- Error *local_err;
int ret;
/* if snapshot, we create a temporary backing file and open it
@@ -1308,7 +1478,6 @@
/* Get the required size from the image */
total_size = bdrv_getlength(bs);
if (total_size < 0) {
- ret = total_size;
error_setg_errno(errp, -total_size, "Could not get image size");
goto out;
}
@@ -1320,41 +1489,45 @@
goto out;
}
- bdrv_qcow2 = bdrv_find_format("qcow2");
- opts = qemu_opts_create(bdrv_qcow2->create_opts, NULL, 0,
+ opts = qemu_opts_create(bdrv_qcow2.create_opts, NULL, 0,
&error_abort);
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size);
- ret = bdrv_create(bdrv_qcow2, tmp_filename, opts, &local_err);
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE, total_size, &error_abort);
+ ret = bdrv_create(&bdrv_qcow2, tmp_filename, opts, errp);
qemu_opts_del(opts);
if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not create temporary overlay "
- "'%s': %s", tmp_filename,
- error_get_pretty(local_err));
- error_free(local_err);
+ error_prepend(errp, "Could not create temporary overlay '%s': ",
+ tmp_filename);
goto out;
}
- /* Prepare a new options QDict for the temporary file */
- snapshot_options = qdict_new();
+ /* Prepare options QDict for the temporary file */
qdict_put(snapshot_options, "file.driver",
qstring_from_str("file"));
qdict_put(snapshot_options, "file.filename",
qstring_from_str(tmp_filename));
+ qdict_put(snapshot_options, "driver",
+ qstring_from_str("qcow2"));
- bs_snapshot = bdrv_new();
-
- ret = bdrv_open(&bs_snapshot, NULL, NULL, snapshot_options,
- flags, bdrv_qcow2, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
+ bs_snapshot = bdrv_open(NULL, NULL, snapshot_options, flags, errp);
+ snapshot_options = NULL;
+ if (!bs_snapshot) {
+ ret = -EINVAL;
goto out;
}
+ /* bdrv_append() consumes a strong reference to bs_snapshot (i.e. it will
+ * call bdrv_unref() on it), so in order to be able to return one, we have
+ * to increase bs_snapshot's refcount here */
+ bdrv_ref(bs_snapshot);
bdrv_append(bs_snapshot, bs);
-out:
g_free(tmp_filename);
- return ret;
+ return bs_snapshot;
+
+out:
+ QDECREF(snapshot_options);
+ g_free(tmp_filename);
+ return NULL;
}
/*
@@ -1372,81 +1545,92 @@
* should be opened. If specified, neither options nor a filename may be given,
* nor can an existing BDS be reused (that is, *pbs has to be NULL).
*/
-int bdrv_open(BlockDriverState **pbs, const char *filename,
- const char *reference, QDict *options, int flags,
- BlockDriver *drv, Error **errp)
+static BlockDriverState *bdrv_open_inherit(const char *filename,
+ const char *reference,
+ QDict *options, int flags,
+ BlockDriverState *parent,
+ const BdrvChildRole *child_role,
+ Error **errp)
{
int ret;
- BlockDriverState *file = NULL, *bs;
+ BdrvChild *file = NULL;
+ BlockDriverState *bs;
+ BlockDriver *drv = NULL;
const char *drvname;
+ const char *backing;
Error *local_err = NULL;
+ QDict *snapshot_options = NULL;
int snapshot_flags = 0;
- assert(pbs);
+ assert(!child_role || !flags);
+ assert(!child_role == !parent);
if (reference) {
bool options_non_empty = options ? qdict_size(options) : false;
QDECREF(options);
- if (*pbs) {
- error_setg(errp, "Cannot reuse an existing BDS when referencing "
- "another block device");
- return -EINVAL;
- }
-
if (filename || options_non_empty) {
error_setg(errp, "Cannot reference an existing block device with "
"additional options or a new filename");
- return -EINVAL;
+ return NULL;
}
bs = bdrv_lookup_bs(reference, reference, errp);
if (!bs) {
- return -ENODEV;
+ return NULL;
}
+
bdrv_ref(bs);
- *pbs = bs;
- return 0;
+ return bs;
}
- if (*pbs) {
- bs = *pbs;
- } else {
- bs = bdrv_new();
- }
+ bs = bdrv_new();
/* NULL means an empty set of options */
if (options == NULL) {
options = qdict_new();
}
- ret = bdrv_fill_options(&options, &filename, flags, drv, &local_err);
+ /* json: syntax counts as explicit options, as if in the QDict */
+ parse_json_protocol(options, &filename, &local_err);
if (local_err) {
goto fail;
}
+ bs->explicit_options = qdict_clone_shallow(options);
+
+ if (child_role) {
+ bs->inherits_from = parent;
+ child_role->inherit_options(&flags, options,
+ parent->open_flags, parent->options);
+ }
+
+ ret = bdrv_fill_options(&options, filename, &flags, &local_err);
+ if (local_err) {
+ goto fail;
+ }
+
+ bs->open_flags = flags;
+ bs->options = options;
+ options = qdict_clone_shallow(options);
+
/* Find the right image format driver */
- drv = NULL;
drvname = qdict_get_try_str(options, "driver");
if (drvname) {
drv = bdrv_find_format(drvname);
- qdict_del(options, "driver");
if (!drv) {
error_setg(errp, "Unknown driver: '%s'", drvname);
- ret = -EINVAL;
goto fail;
}
}
assert(drvname || !(flags & BDRV_O_PROTOCOL));
- if (drv && !drv->bdrv_file_open) {
- /* If the user explicitly wants a format driver here, we'll need to add
- * another layer for the protocol in bs->file */
- flags &= ~BDRV_O_PROTOCOL;
- }
- bs->options = options;
- options = qdict_clone_shallow(options);
+ backing = qdict_get_try_str(options, "backing");
+ if (backing && *backing == '\0') {
+ flags |= BDRV_O_NO_BACKING;
+ qdict_del(options, "backing");
+ }
/* Open image file without format layer */
if ((flags & BDRV_O_PROTOCOL) == 0) {
@@ -1454,48 +1638,66 @@
flags |= BDRV_O_ALLOW_RDWR;
}
if (flags & BDRV_O_SNAPSHOT) {
- snapshot_flags = bdrv_temp_snapshot_flags(flags);
- flags = bdrv_backing_flags(flags);
+ snapshot_options = qdict_new();
+ bdrv_temp_snapshot_options(&snapshot_flags, snapshot_options,
+ flags, options);
+ bdrv_backing_options(&flags, options, flags, options);
}
- assert(file == NULL);
- ret = bdrv_open_image(&file, filename, options, "file",
- bdrv_inherited_flags(flags),
- true, &local_err);
- if (ret < 0) {
+ bs->open_flags = flags;
+
+ file = bdrv_open_child(filename, options, "file", bs,
+ &child_file, true, &local_err);
+ if (local_err) {
goto fail;
}
}
/* Image format probing */
+ bs->probed = !drv;
if (!drv && file) {
ret = find_image_format(file, filename, &drv, &local_err);
if (ret < 0) {
goto fail;
}
+ /*
+ * This option update would logically belong in bdrv_fill_options(),
+ * but we first need to open bs->file for the probing to work, while
+ * opening bs->file already requires the (mostly) final set of options
+ * so that cache mode etc. can be inherited.
+ *
+ * Adding the driver later is somewhat ugly, but it's not an option
+ * that would ever be inherited, so it's correct. We just need to make
+ * sure to update both bs->options (which has the full effective
+ * options for bs) and options (which has file.* already removed).
+ */
+ qdict_put(bs->options, "driver", qstring_from_str(drv->format_name));
+ qdict_put(options, "driver", qstring_from_str(drv->format_name));
} else if (!drv) {
error_setg(errp, "Must specify either driver or file");
- ret = -EINVAL;
goto fail;
}
+ /* BDRV_O_PROTOCOL must be set iff a protocol BDS is about to be created */
+ assert(!!(flags & BDRV_O_PROTOCOL) == !!drv->bdrv_file_open);
+ /* file must be NULL if a protocol BDS is about to be created
+ * (the inverse results in an error message from bdrv_open_common()) */
+ assert(!(flags & BDRV_O_PROTOCOL) || !file);
+
/* Open the image */
- ret = bdrv_open_common(bs, file, options, flags, drv, &local_err);
+ ret = bdrv_open_common(bs, file, options, &local_err);
if (ret < 0) {
goto fail;
}
if (file && (bs->file != file)) {
- bdrv_unref(file);
+ bdrv_unref_child(bs, file);
file = NULL;
}
/* If there is a backing file, use it */
if ((flags & BDRV_O_NO_BACKING) == 0) {
- QDict *backing_options;
-
- qdict_extract_subqdict(options, &backing_options, "backing.");
- ret = bdrv_open_backing_file(bs, backing_options, &local_err);
+ ret = bdrv_open_backing_file(bs, options, "backing", &local_err);
if (ret < 0) {
goto close_and_fail;
}
@@ -1503,15 +1705,6 @@
bdrv_refresh_filename(bs);
- /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
- * temporary snapshot afterwards. */
- if (snapshot_flags) {
- ret = bdrv_append_temp_snapshot(bs, snapshot_flags, &local_err);
- if (local_err) {
- goto close_and_fail;
- }
- }
-
/* Check if any unknown options were used */
if (options && (qdict_size(options) != 0)) {
const QDictEntry *entry = qdict_first(options);
@@ -1519,62 +1712,72 @@
error_setg(errp, "Block protocol '%s' doesn't support the option "
"'%s'", drv->format_name, entry->key);
} else {
- error_setg(errp, "Block format '%s' used by device '%s' doesn't "
- "support the option '%s'", drv->format_name,
- bdrv_get_device_name(bs), entry->key);
+ error_setg(errp,
+ "Block format '%s' does not support the option '%s'",
+ drv->format_name, entry->key);
}
- ret = -EINVAL;
goto close_and_fail;
}
if (!bdrv_key_required(bs)) {
- if (bs->blk) {
- blk_dev_change_media_cb(bs->blk, true);
- }
+ bdrv_parent_cb_change_media(bs, true);
} else if (!runstate_check(RUN_STATE_PRELAUNCH)
&& !runstate_check(RUN_STATE_INMIGRATE)
&& !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
error_setg(errp,
"Guest must be stopped for opening of encrypted image");
- ret = -EBUSY;
goto close_and_fail;
}
QDECREF(options);
- *pbs = bs;
- return 0;
+
+ /* For snapshot=on, create a temporary qcow2 overlay. bs points to the
+ * temporary snapshot afterwards. */
+ if (snapshot_flags) {
+ BlockDriverState *snapshot_bs;
+ snapshot_bs = bdrv_append_temp_snapshot(bs, snapshot_flags,
+ snapshot_options, &local_err);
+ snapshot_options = NULL;
+ if (local_err) {
+ goto close_and_fail;
+ }
+ /* We are not going to return bs but the overlay on top of it
+ * (snapshot_bs); thus, we have to drop the strong reference to bs
+ * (which we obtained by calling bdrv_new()). bs will not be deleted,
+ * though, because the overlay still has a reference to it. */
+ bdrv_unref(bs);
+ bs = snapshot_bs;
+ }
+
+ return bs;
fail:
if (file != NULL) {
- bdrv_unref(file);
+ bdrv_unref_child(bs, file);
}
+ QDECREF(snapshot_options);
+ QDECREF(bs->explicit_options);
QDECREF(bs->options);
QDECREF(options);
bs->options = NULL;
- if (!*pbs) {
- /* If *pbs is NULL, a new BDS has been created in this function and
- needs to be freed now. Otherwise, it does not need to be closed,
- since it has not really been opened yet. */
- bdrv_unref(bs);
- }
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ bdrv_unref(bs);
+ error_propagate(errp, local_err);
+ return NULL;
close_and_fail:
- /* See fail path, but now the BDS has to be always closed */
- if (*pbs) {
- bdrv_close(bs);
- } else {
- bdrv_unref(bs);
- }
+ bdrv_unref(bs);
+ QDECREF(snapshot_options);
QDECREF(options);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ error_propagate(errp, local_err);
+ return NULL;
+}
+
+BlockDriverState *bdrv_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp)
+{
+ return bdrv_open_inherit(filename, reference, options, flags, NULL,
+ NULL, errp);
}
typedef struct BlockReopenQueueEntry {
@@ -1595,39 +1798,116 @@
*
* bs is the BlockDriverState to add to the reopen queue.
*
+ * options contains the changed options for the associated bs
+ * (the BlockReopenQueue takes ownership)
+ *
* flags contains the open flags for the associated bs
*
* returns a pointer to bs_queue, which is either the newly allocated
* bs_queue, or the existing bs_queue being used.
*
*/
-BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs, int flags)
+static BlockReopenQueue *bdrv_reopen_queue_child(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ const BdrvChildRole *role,
+ QDict *parent_options,
+ int parent_flags)
{
assert(bs != NULL);
BlockReopenQueueEntry *bs_entry;
+ BdrvChild *child;
+ QDict *old_options, *explicit_options;
+
if (bs_queue == NULL) {
bs_queue = g_new0(BlockReopenQueue, 1);
QSIMPLEQ_INIT(bs_queue);
}
+ if (!options) {
+ options = qdict_new();
+ }
+
+ /*
+ * Precedence of options:
+ * 1. Explicitly passed in options (highest)
+ * 2. Set in flags (only for top level)
+ * 3. Retained from explicitly set options of bs
+ * 4. Inherited from parent node
+ * 5. Retained from effective options of bs
+ */
+
+ if (!parent_options) {
+ /*
+ * Any setting represented by flags is always updated. If the
+ * corresponding QDict option is set, it takes precedence. Otherwise
+ * the flag is translated into a QDict option. The old setting of bs is
+ * not considered.
+ */
+ update_options_from_flags(options, flags);
+ }
+
+ /* Old explicitly set values (don't overwrite by inherited value) */
+ old_options = qdict_clone_shallow(bs->explicit_options);
+ bdrv_join_options(bs, options, old_options);
+ QDECREF(old_options);
+
+ explicit_options = qdict_clone_shallow(options);
+
+ /* Inherit from parent node */
+ if (parent_options) {
+ assert(!flags);
+ role->inherit_options(&flags, options, parent_flags, parent_options);
+ }
+
+ /* Old values are used for options that aren't set yet */
+ old_options = qdict_clone_shallow(bs->options);
+ bdrv_join_options(bs, options, old_options);
+ QDECREF(old_options);
+
/* bdrv_open() masks this flag out */
flags &= ~BDRV_O_PROTOCOL;
- if (bs->file) {
- bdrv_reopen_queue(bs_queue, bs->file, bdrv_inherited_flags(flags));
+ QLIST_FOREACH(child, &bs->children, next) {
+ QDict *new_child_options;
+ char *child_key_dot;
+
+ /* reopen can only change the options of block devices that were
+ * implicitly created and inherited options. For other (referenced)
+ * block devices, a syntax like "backing.foo" results in an error. */
+ if (child->bs->inherits_from != bs) {
+ continue;
+ }
+
+ child_key_dot = g_strdup_printf("%s.", child->name);
+ qdict_extract_subqdict(options, &new_child_options, child_key_dot);
+ g_free(child_key_dot);
+
+ bdrv_reopen_queue_child(bs_queue, child->bs, new_child_options, 0,
+ child->role, options, flags);
}
bs_entry = g_new0(BlockReopenQueueEntry, 1);
QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
bs_entry->state.bs = bs;
+ bs_entry->state.options = options;
+ bs_entry->state.explicit_options = explicit_options;
bs_entry->state.flags = flags;
return bs_queue;
}
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+ BlockDriverState *bs,
+ QDict *options, int flags)
+{
+ return bdrv_reopen_queue_child(bs_queue, bs, options, flags,
+ NULL, NULL, 0);
+}
+
/*
* Reopen multiple BlockDriverStates atomically & transactionally.
*
@@ -1674,7 +1954,10 @@
QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
if (ret && bs_entry->prepared) {
bdrv_reopen_abort(&bs_entry->state);
+ } else if (ret) {
+ QDECREF(bs_entry->state.explicit_options);
}
+ QDECREF(bs_entry->state.options);
g_free(bs_entry);
}
g_free(bs_queue);
@@ -1687,7 +1970,7 @@
{
int ret = -1;
Error *local_err = NULL;
- BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+ BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, NULL, bdrv_flags);
ret = bdrv_reopen_multiple(queue, &local_err);
if (local_err != NULL) {
@@ -1720,25 +2003,49 @@
int ret = -1;
Error *local_err = NULL;
BlockDriver *drv;
+ QemuOpts *opts;
+ const char *value;
assert(reopen_state != NULL);
assert(reopen_state->bs->drv != NULL);
drv = reopen_state->bs->drv;
+ /* Process generic block layer options */
+ opts = qemu_opts_create(&bdrv_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, reopen_state->options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto error;
+ }
+
+ update_flags_from_options(&reopen_state->flags, opts);
+
+ /* node-name and driver must be unchanged. Put them back into the QDict, so
+ * that they are checked at the end of this function. */
+ value = qemu_opt_get(opts, "node-name");
+ if (value) {
+ qdict_put(reopen_state->options, "node-name", qstring_from_str(value));
+ }
+
+ value = qemu_opt_get(opts, "driver");
+ if (value) {
+ qdict_put(reopen_state->options, "driver", qstring_from_str(value));
+ }
+
/* if we are to stay read-only, do not allow permission change
* to r/w */
if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
reopen_state->flags & BDRV_O_RDWR) {
- error_set(errp, QERR_DEVICE_IS_READ_ONLY,
- bdrv_get_device_name(reopen_state->bs));
+ error_setg(errp, "Node '%s' is read only",
+ bdrv_get_device_or_node_name(reopen_state->bs));
goto error;
}
ret = bdrv_flush(reopen_state->bs);
if (ret) {
- error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
- strerror(-ret));
+ error_setg_errno(errp, -ret, "Error flushing drive");
goto error;
}
@@ -1756,16 +2063,37 @@
} else {
/* It is currently mandatory to have a bdrv_reopen_prepare()
* handler for each supported drv. */
- error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- drv->format_name, bdrv_get_device_name(reopen_state->bs),
- "reopening of file");
+ error_setg(errp, "Block format '%s' used by node '%s' "
+ "does not support reopening files", drv->format_name,
+ bdrv_get_device_or_node_name(reopen_state->bs));
ret = -1;
goto error;
}
+ /* Options that are not handled are only okay if they are unchanged
+ * compared to the old state. It is expected that some options are only
+ * used for the initial open, but not reopen (e.g. filename) */
+ if (qdict_size(reopen_state->options)) {
+ const QDictEntry *entry = qdict_first(reopen_state->options);
+
+ do {
+ QString *new_obj = qobject_to_qstring(entry->value);
+ const char *new = qstring_get_str(new_obj);
+ const char *old = qdict_get_try_str(reopen_state->bs->options,
+ entry->key);
+
+ if (!old || strcmp(new, old)) {
+ error_setg(errp, "Cannot change the option '%s'", entry->key);
+ ret = -EINVAL;
+ goto error;
+ }
+ } while ((entry = qdict_next(reopen_state->options, entry)));
+ }
+
ret = 0;
error:
+ qemu_opts_del(opts);
return ret;
}
@@ -1788,9 +2116,10 @@
}
/* set BDS specific flags now */
+ QDECREF(reopen_state->bs->explicit_options);
+
+ reopen_state->bs->explicit_options = reopen_state->explicit_options;
reopen_state->bs->open_flags = reopen_state->flags;
- reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
- BDRV_O_CACHE_WB);
reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
bdrv_refresh_limits(reopen_state->bs, NULL);
@@ -1811,295 +2140,111 @@
if (drv->bdrv_reopen_abort) {
drv->bdrv_reopen_abort(reopen_state);
}
+
+ QDECREF(reopen_state->explicit_options);
}
-void bdrv_close(BlockDriverState *bs)
+static void bdrv_close(BlockDriverState *bs)
{
BdrvAioNotifier *ban, *ban_next;
- if (bs->job) {
- block_job_cancel_sync(bs->job);
- }
- bdrv_drain_all(); /* complete I/O */
+ assert(!bs->job);
+ assert(!bs->refcnt);
+
+ bdrv_drained_begin(bs); /* complete I/O */
bdrv_flush(bs);
- bdrv_drain_all(); /* in case flush left pending I/O */
- notifier_list_notify(&bs->close_notifiers, bs);
+ bdrv_drain(bs); /* in case flush left pending I/O */
+
+ bdrv_release_named_dirty_bitmaps(bs);
+ assert(QLIST_EMPTY(&bs->dirty_bitmaps));
if (bs->drv) {
- if (bs->backing_hd) {
- BlockDriverState *backing_hd = bs->backing_hd;
- bdrv_set_backing_hd(bs, NULL);
- bdrv_unref(backing_hd);
- }
+ BdrvChild *child, *next;
+
bs->drv->bdrv_close(bs);
+ bs->drv = NULL;
+
+ bdrv_set_backing_hd(bs, NULL);
+
+ if (bs->file != NULL) {
+ bdrv_unref_child(bs, bs->file);
+ bs->file = NULL;
+ }
+
+ QLIST_FOREACH_SAFE(child, &bs->children, next, next) {
+ /* TODO Remove bdrv_unref() from drivers' close function and use
+ * bdrv_unref_child() here */
+ if (child->bs->inherits_from == bs) {
+ child->bs->inherits_from = NULL;
+ }
+ bdrv_detach_child(child);
+ }
+
g_free(bs->opaque);
bs->opaque = NULL;
- bs->drv = NULL;
bs->copy_on_read = 0;
bs->backing_file[0] = '\0';
bs->backing_format[0] = '\0';
bs->total_sectors = 0;
- bs->encrypted = 0;
- bs->valid_key = 0;
- bs->sg = 0;
- bs->growable = 0;
- bs->zero_beyond_eof = false;
+ bs->encrypted = false;
+ bs->valid_key = false;
+ bs->sg = false;
QDECREF(bs->options);
+ QDECREF(bs->explicit_options);
bs->options = NULL;
QDECREF(bs->full_open_options);
bs->full_open_options = NULL;
-
- if (bs->file != NULL) {
- bdrv_unref(bs->file);
- bs->file = NULL;
- }
- }
-
- if (bs->blk) {
- blk_dev_change_media_cb(bs->blk, false);
- }
-
- /*throttling disk I/O limits*/
- if (bs->io_limits_enabled) {
- bdrv_io_limits_disable(bs);
}
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
g_free(ban);
}
QLIST_INIT(&bs->aio_notifiers);
+ bdrv_drained_end(bs);
}
void bdrv_close_all(void)
{
- BlockDriverState *bs;
+ block_job_cancel_sync_all();
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
+ /* Drop references from requests still in flight, such as canceled block
+ * jobs whose AIO context has not been polled yet */
+ bdrv_drain_all();
- aio_context_acquire(aio_context);
- bdrv_close(bs);
- aio_context_release(aio_context);
- }
+ blk_remove_all_bs();
+ blockdev_close_all_bdrv_states();
+
+ assert(QTAILQ_EMPTY(&all_bdrv_states));
}
-/* Check if any requests are in-flight (including throttled requests) */
-static bool bdrv_requests_pending(BlockDriverState *bs)
+static void change_parent_backing_link(BlockDriverState *from,
+ BlockDriverState *to)
{
- if (!QLIST_EMPTY(&bs->tracked_requests)) {
- return true;
- }
- if (!qemu_co_queue_empty(&bs->throttled_reqs[0])) {
- return true;
- }
- if (!qemu_co_queue_empty(&bs->throttled_reqs[1])) {
- return true;
- }
- if (bs->file && bdrv_requests_pending(bs->file)) {
- return true;
- }
- if (bs->backing_hd && bdrv_requests_pending(bs->backing_hd)) {
- return true;
- }
- return false;
-}
+ BdrvChild *c, *next, *to_c;
-static bool bdrv_drain_one(BlockDriverState *bs)
-{
- bool bs_busy;
-
- bdrv_flush_io_queue(bs);
- bdrv_start_throttled_reqs(bs);
- bs_busy = bdrv_requests_pending(bs);
- bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy);
- return bs_busy;
-}
-
-/*
- * Wait for pending requests to complete on a single BlockDriverState subtree
- *
- * See the warning in bdrv_drain_all(). This function can only be called if
- * you are sure nothing can generate I/O because you have op blockers
- * installed.
- *
- * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
- * AioContext.
- */
-void bdrv_drain(BlockDriverState *bs)
-{
- while (bdrv_drain_one(bs)) {
- /* Keep iterating */
- }
-}
-
-/*
- * Wait for pending requests to complete across all BlockDriverStates
- *
- * This function does not flush data to disk, use bdrv_flush_all() for that
- * after calling this function.
- *
- * Note that completion of an asynchronous I/O operation can trigger any
- * number of other I/O operations on other devices---for example a coroutine
- * can be arbitrarily complex and a constant flow of I/O can come until the
- * coroutine is complete. Because of this, it is not possible to have a
- * function to drain a single device's I/O queue.
- */
-void bdrv_drain_all(void)
-{
- /* Always run first iteration so any pending completion BHs run */
- bool busy = true;
- BlockDriverState *bs;
-
- while (busy) {
- busy = false;
-
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
-
- aio_context_acquire(aio_context);
- busy |= bdrv_drain_one(bs);
- aio_context_release(aio_context);
+ QLIST_FOREACH_SAFE(c, &from->parents, next_parent, next) {
+ if (c->role == &child_backing) {
+ /* @from is generally not allowed to be a backing file, except for
+ * when @to is the overlay. In that case, @from may not be replaced
+ * by @to as @to's backing node. */
+ QLIST_FOREACH(to_c, &to->children, next) {
+ if (to_c == c) {
+ break;
+ }
+ }
+ if (to_c) {
+ continue;
+ }
}
+
+ assert(c->role != &child_backing);
+ bdrv_ref(to);
+ bdrv_replace_child(c, to);
+ bdrv_unref(from);
}
}
-/* make a BlockDriverState anonymous by removing from bdrv_state and
- * graph_bdrv_state list.
- Also, NULL terminate the device_name to prevent double remove */
-void bdrv_make_anon(BlockDriverState *bs)
-{
- /*
- * Take care to remove bs from bdrv_states only when it's actually
- * in it. Note that bs->device_list.tqe_prev is initially null,
- * and gets set to non-null by QTAILQ_INSERT_TAIL(). Establish
- * the useful invariant "bs in bdrv_states iff bs->tqe_prev" by
- * resetting it to null on remove.
- */
- if (bs->device_list.tqe_prev) {
- QTAILQ_REMOVE(&bdrv_states, bs, device_list);
- bs->device_list.tqe_prev = NULL;
- }
- if (bs->node_name[0] != '\0') {
- QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
- }
- bs->node_name[0] = '\0';
-}
-
-static void bdrv_rebind(BlockDriverState *bs)
-{
- if (bs->drv && bs->drv->bdrv_rebind) {
- bs->drv->bdrv_rebind(bs);
- }
-}
-
-static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
- BlockDriverState *bs_src)
-{
- /* move some fields that need to stay attached to the device */
-
- /* dev info */
- bs_dest->guest_block_size = bs_src->guest_block_size;
- bs_dest->copy_on_read = bs_src->copy_on_read;
-
- bs_dest->enable_write_cache = bs_src->enable_write_cache;
-
- /* i/o throttled req */
- memcpy(&bs_dest->throttle_state,
- &bs_src->throttle_state,
- sizeof(ThrottleState));
- bs_dest->throttled_reqs[0] = bs_src->throttled_reqs[0];
- bs_dest->throttled_reqs[1] = bs_src->throttled_reqs[1];
- bs_dest->io_limits_enabled = bs_src->io_limits_enabled;
-
- /* r/w error */
- bs_dest->on_read_error = bs_src->on_read_error;
- bs_dest->on_write_error = bs_src->on_write_error;
-
- /* i/o status */
- bs_dest->iostatus_enabled = bs_src->iostatus_enabled;
- bs_dest->iostatus = bs_src->iostatus;
-
- /* dirty bitmap */
- bs_dest->dirty_bitmaps = bs_src->dirty_bitmaps;
-
- /* reference count */
- bs_dest->refcnt = bs_src->refcnt;
-
- /* job */
- bs_dest->job = bs_src->job;
-
- /* keep the same entry in bdrv_states */
- bs_dest->device_list = bs_src->device_list;
- bs_dest->blk = bs_src->blk;
-
- memcpy(bs_dest->op_blockers, bs_src->op_blockers,
- sizeof(bs_dest->op_blockers));
-}
-
-/*
- * Swap bs contents for two image chains while they are live,
- * while keeping required fields on the BlockDriverState that is
- * actually attached to a device.
- *
- * This will modify the BlockDriverState fields, and swap contents
- * between bs_new and bs_old. Both bs_new and bs_old are modified.
- *
- * bs_new must not be attached to a BlockBackend.
- *
- * This function does not create any image files.
- */
-void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old)
-{
- BlockDriverState tmp;
-
- /* The code needs to swap the node_name but simply swapping node_list won't
- * work so first remove the nodes from the graph list, do the swap then
- * insert them back if needed.
- */
- if (bs_new->node_name[0] != '\0') {
- QTAILQ_REMOVE(&graph_bdrv_states, bs_new, node_list);
- }
- if (bs_old->node_name[0] != '\0') {
- QTAILQ_REMOVE(&graph_bdrv_states, bs_old, node_list);
- }
-
- /* bs_new must be unattached and shouldn't have anything fancy enabled */
- assert(!bs_new->blk);
- assert(QLIST_EMPTY(&bs_new->dirty_bitmaps));
- assert(bs_new->job == NULL);
- assert(bs_new->io_limits_enabled == false);
- assert(!throttle_have_timer(&bs_new->throttle_state));
-
- tmp = *bs_new;
- *bs_new = *bs_old;
- *bs_old = tmp;
-
- /* there are some fields that should not be swapped, move them back */
- bdrv_move_feature_fields(&tmp, bs_old);
- bdrv_move_feature_fields(bs_old, bs_new);
- bdrv_move_feature_fields(bs_new, &tmp);
-
- /* bs_new must remain unattached */
- assert(!bs_new->blk);
-
- /* Check a few fields that should remain attached to the device */
- assert(bs_new->job == NULL);
- assert(bs_new->io_limits_enabled == false);
- assert(!throttle_have_timer(&bs_new->throttle_state));
-
- /* insert the nodes back into the graph node list if needed */
- if (bs_new->node_name[0] != '\0') {
- QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_new, node_list);
- }
- if (bs_old->node_name[0] != '\0') {
- QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs_old, node_list);
- }
-
- bdrv_rebind(bs_new);
- bdrv_rebind(bs_old);
-}
-
/*
* Add new bs contents at the top of an image chain while the chain is
* live, while keeping required fields on the top layer.
@@ -2110,14 +2255,38 @@
* bs_new must not be attached to a BlockBackend.
*
* This function does not create any image files.
+ *
+ * bdrv_append() takes ownership of a bs_new reference and unrefs it because
+ * that's what the callers commonly need. bs_new will be referenced by the old
+ * parents of bs_top after bdrv_append() returns. If the caller needs to keep a
+ * reference of its own, it must call bdrv_ref().
*/
void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
{
- bdrv_swap(bs_new, bs_top);
+ assert(!bdrv_requests_pending(bs_top));
+ assert(!bdrv_requests_pending(bs_new));
- /* The contents of 'tmp' will become bs_top, as we are
- * swapping bs_new and bs_top contents. */
- bdrv_set_backing_hd(bs_top, bs_new);
+ bdrv_ref(bs_top);
+
+ change_parent_backing_link(bs_top, bs_new);
+ bdrv_set_backing_hd(bs_new, bs_top);
+ bdrv_unref(bs_top);
+
+ /* bs_new is now referenced by its new parents, we don't need the
+ * additional reference any more. */
+ bdrv_unref(bs_new);
+}
+
+void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
+{
+ assert(!bdrv_requests_pending(old));
+ assert(!bdrv_requests_pending(new));
+
+ bdrv_ref(old);
+
+ change_parent_backing_link(old, new);
+
+ bdrv_unref(old);
}
static void bdrv_delete(BlockDriverState *bs)
@@ -2125,12 +2294,14 @@
assert(!bs->job);
assert(bdrv_op_blocker_is_empty(bs));
assert(!bs->refcnt);
- assert(QLIST_EMPTY(&bs->dirty_bitmaps));
bdrv_close(bs);
/* remove from list, if necessary */
- bdrv_make_anon(bs);
+ if (bs->node_name[0] != '\0') {
+ QTAILQ_REMOVE(&graph_bdrv_states, bs, node_list);
+ }
+ QTAILQ_REMOVE(&all_bdrv_states, bs, bs_list);
g_free(bs);
}
@@ -2155,285 +2326,6 @@
return bs->drv->bdrv_check(bs, res, fix);
}
-#define COMMIT_BUF_SECTORS 2048
-
-/* commit COW file into the raw image */
-int bdrv_commit(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- int64_t sector, total_sectors, length, backing_length;
- int n, ro, open_flags;
- int ret = 0;
- uint8_t *buf = NULL;
- char filename[PATH_MAX];
-
- if (!drv)
- return -ENOMEDIUM;
-
- if (!bs->backing_hd) {
- return -ENOTSUP;
- }
-
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, NULL) ||
- bdrv_op_is_blocked(bs->backing_hd, BLOCK_OP_TYPE_COMMIT, NULL)) {
- return -EBUSY;
- }
-
- ro = bs->backing_hd->read_only;
- /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
- pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
- open_flags = bs->backing_hd->open_flags;
-
- if (ro) {
- if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
- return -EACCES;
- }
- }
-
- length = bdrv_getlength(bs);
- if (length < 0) {
- ret = length;
- goto ro_cleanup;
- }
-
- backing_length = bdrv_getlength(bs->backing_hd);
- if (backing_length < 0) {
- ret = backing_length;
- goto ro_cleanup;
- }
-
- /* If our top snapshot is larger than the backing file image,
- * grow the backing file image if possible. If not possible,
- * we must return an error */
- if (length > backing_length) {
- ret = bdrv_truncate(bs->backing_hd, length);
- if (ret < 0) {
- goto ro_cleanup;
- }
- }
-
- total_sectors = length >> BDRV_SECTOR_BITS;
-
- /* qemu_try_blockalign() for bs will choose an alignment that works for
- * bs->backing_hd as well, so no need to compare the alignment manually. */
- buf = qemu_try_blockalign(bs, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
- if (buf == NULL) {
- ret = -ENOMEM;
- goto ro_cleanup;
- }
-
- for (sector = 0; sector < total_sectors; sector += n) {
- ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
- if (ret < 0) {
- goto ro_cleanup;
- }
- if (ret) {
- ret = bdrv_read(bs, sector, buf, n);
- if (ret < 0) {
- goto ro_cleanup;
- }
-
- ret = bdrv_write(bs->backing_hd, sector, buf, n);
- if (ret < 0) {
- goto ro_cleanup;
- }
- }
- }
-
- if (drv->bdrv_make_empty) {
- ret = drv->bdrv_make_empty(bs);
- if (ret < 0) {
- goto ro_cleanup;
- }
- bdrv_flush(bs);
- }
-
- /*
- * Make sure all data we wrote to the backing device is actually
- * stable on disk.
- */
- if (bs->backing_hd) {
- bdrv_flush(bs->backing_hd);
- }
-
- ret = 0;
-ro_cleanup:
- qemu_vfree(buf);
-
- if (ro) {
- /* ignoring error return here */
- bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
- }
-
- return ret;
-}
-
-int bdrv_commit_all(void)
-{
- BlockDriverState *bs;
-
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
-
- aio_context_acquire(aio_context);
- if (bs->drv && bs->backing_hd) {
- int ret = bdrv_commit(bs);
- if (ret < 0) {
- aio_context_release(aio_context);
- return ret;
- }
- }
- aio_context_release(aio_context);
- }
- return 0;
-}
-
-/**
- * Remove an active request from the tracked requests list
- *
- * This function should be called when a tracked request is completing.
- */
-static void tracked_request_end(BdrvTrackedRequest *req)
-{
- if (req->serialising) {
- req->bs->serialising_in_flight--;
- }
-
- QLIST_REMOVE(req, list);
- qemu_co_queue_restart_all(&req->wait_queue);
-}
-
-/**
- * Add an active request to the tracked requests list
- */
-static void tracked_request_begin(BdrvTrackedRequest *req,
- BlockDriverState *bs,
- int64_t offset,
- unsigned int bytes, bool is_write)
-{
- *req = (BdrvTrackedRequest){
- .bs = bs,
- .offset = offset,
- .bytes = bytes,
- .is_write = is_write,
- .co = qemu_coroutine_self(),
- .serialising = false,
- .overlap_offset = offset,
- .overlap_bytes = bytes,
- };
-
- qemu_co_queue_init(&req->wait_queue);
-
- QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
-}
-
-static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
-{
- int64_t overlap_offset = req->offset & ~(align - 1);
- unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
- - overlap_offset;
-
- if (!req->serialising) {
- req->bs->serialising_in_flight++;
- req->serialising = true;
- }
-
- req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
- req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
-}
-
-/**
- * Round a region to cluster boundaries
- */
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- int64_t *cluster_sector_num,
- int *cluster_nb_sectors)
-{
- BlockDriverInfo bdi;
-
- if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
- *cluster_sector_num = sector_num;
- *cluster_nb_sectors = nb_sectors;
- } else {
- int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
- *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
- *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
- nb_sectors, c);
- }
-}
-
-static int bdrv_get_cluster_size(BlockDriverState *bs)
-{
- BlockDriverInfo bdi;
- int ret;
-
- ret = bdrv_get_info(bs, &bdi);
- if (ret < 0 || bdi.cluster_size == 0) {
- return bs->request_alignment;
- } else {
- return bdi.cluster_size;
- }
-}
-
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
- int64_t offset, unsigned int bytes)
-{
- /* aaaa bbbb */
- if (offset >= req->overlap_offset + req->overlap_bytes) {
- return false;
- }
- /* bbbb aaaa */
- if (req->overlap_offset >= offset + bytes) {
- return false;
- }
- return true;
-}
-
-static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
-{
- BlockDriverState *bs = self->bs;
- BdrvTrackedRequest *req;
- bool retry;
- bool waited = false;
-
- if (!bs->serialising_in_flight) {
- return false;
- }
-
- do {
- retry = false;
- QLIST_FOREACH(req, &bs->tracked_requests, list) {
- if (req == self || (!req->serialising && !self->serialising)) {
- continue;
- }
- if (tracked_request_overlaps(req, self->overlap_offset,
- self->overlap_bytes))
- {
- /* Hitting this means there was a reentrant request, for
- * example, a block driver issuing nested requests. This must
- * never happen since it means deadlock.
- */
- assert(qemu_coroutine_self() != req->co);
-
- /* If the request is already (indirectly) waiting for us, or
- * will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case). */
- if (!req->waiting_for) {
- self->waiting_for = req;
- qemu_co_queue_wait(&req->wait_queue);
- self->waiting_for = NULL;
- retry = true;
- waited = true;
- break;
- }
- }
- }
- } while (retry);
-
- return waited;
-}
-
/*
* Return values:
* 0 - success
@@ -2479,8 +2371,8 @@
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs)
{
- while (active && bs != active->backing_hd) {
- active = active->backing_hd;
+ while (active && bs != backing_bs(active)) {
+ active = backing_bs(active);
}
return active;
@@ -2492,12 +2384,6 @@
return bdrv_find_overlay(bs, NULL);
}
-typedef struct BlkIntermediateStates {
- BlockDriverState *bs;
- QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
/*
* Drops images above 'base' up to and including 'top', and sets the image
* above 'top' to have base as its backing file.
@@ -2530,15 +2416,9 @@
int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base, const char *backing_file_str)
{
- BlockDriverState *intermediate;
- BlockDriverState *base_bs = NULL;
BlockDriverState *new_top_bs = NULL;
- BlkIntermediateStates *intermediate_state, *next;
int ret = -EIO;
- QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
- QSIMPLEQ_INIT(&states_to_delete);
-
if (!top->drv || !base->drv) {
goto exit;
}
@@ -2550,909 +2430,32 @@
goto exit;
}
- /* special case of new_top_bs->backing_hd already pointing to base - nothing
+ /* special case of new_top_bs->backing->bs already pointing to base - nothing
* to do, no intermediate images */
- if (new_top_bs->backing_hd == base) {
+ if (backing_bs(new_top_bs) == base) {
ret = 0;
goto exit;
}
- intermediate = top;
-
- /* now we will go down through the list, and add each BDS we find
- * into our deletion queue, until we hit the 'base'
- */
- while (intermediate) {
- intermediate_state = g_new0(BlkIntermediateStates, 1);
- intermediate_state->bs = intermediate;
- QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
-
- if (intermediate->backing_hd == base) {
- base_bs = intermediate->backing_hd;
- break;
- }
- intermediate = intermediate->backing_hd;
- }
- if (base_bs == NULL) {
- /* something went wrong, we did not end at the base. safely
- * unravel everything, and exit with error */
+ /* Make sure that base is in the backing chain of top */
+ if (!bdrv_chain_contains(top, base)) {
goto exit;
}
/* success - we can delete the intermediate states, and link top->base */
- backing_file_str = backing_file_str ? backing_file_str : base_bs->filename;
+ backing_file_str = backing_file_str ? backing_file_str : base->filename;
ret = bdrv_change_backing_file(new_top_bs, backing_file_str,
- base_bs->drv ? base_bs->drv->format_name : "");
+ base->drv ? base->drv->format_name : "");
if (ret) {
goto exit;
}
- bdrv_set_backing_hd(new_top_bs, base_bs);
+ bdrv_set_backing_hd(new_top_bs, base);
- QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
- /* so that bdrv_close() does not recursively close the chain */
- bdrv_set_backing_hd(intermediate_state->bs, NULL);
- bdrv_unref(intermediate_state->bs);
- }
ret = 0;
-
exit:
- QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
- g_free(intermediate_state);
- }
return ret;
}
-
-static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
- size_t size)
-{
- int64_t len;
-
- if (size > INT_MAX) {
- return -EIO;
- }
-
- if (!bdrv_is_inserted(bs))
- return -ENOMEDIUM;
-
- if (bs->growable)
- return 0;
-
- len = bdrv_getlength(bs);
-
- if (offset < 0)
- return -EIO;
-
- if ((offset > len) || (len - offset < size))
- return -EIO;
-
- return 0;
-}
-
-static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
- return -EIO;
- }
-
- return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
- nb_sectors * BDRV_SECTOR_SIZE);
-}
-
-typedef struct RwCo {
- BlockDriverState *bs;
- int64_t offset;
- QEMUIOVector *qiov;
- bool is_write;
- int ret;
- BdrvRequestFlags flags;
-} RwCo;
-
-static void coroutine_fn bdrv_rw_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- if (!rwco->is_write) {
- rwco->ret = bdrv_co_do_preadv(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
- } else {
- rwco->ret = bdrv_co_do_pwritev(rwco->bs, rwco->offset,
- rwco->qiov->size, rwco->qiov,
- rwco->flags);
- }
-}
-
-/*
- * Process a vectored synchronous request using coroutines
- */
-static int bdrv_prwv_co(BlockDriverState *bs, int64_t offset,
- QEMUIOVector *qiov, bool is_write,
- BdrvRequestFlags flags)
-{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .offset = offset,
- .qiov = qiov,
- .is_write = is_write,
- .ret = NOT_DONE,
- .flags = flags,
- };
-
- /**
- * In sync call context, when the vcpu is blocked, this throttling timer
- * will not fire; so the I/O throttling function has to be disabled here
- * if it has been enabled.
- */
- if (bs->io_limits_enabled) {
- fprintf(stderr, "Disabling I/O throttling on '%s' due "
- "to synchronous I/O.\n", bdrv_get_device_name(bs));
- bdrv_io_limits_disable(bs);
- }
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_rw_co_entry(&rwco);
- } else {
- AioContext *aio_context = bdrv_get_aio_context(bs);
-
- co = qemu_coroutine_create(bdrv_rw_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- aio_poll(aio_context, true);
- }
- }
- return rwco.ret;
-}
-
-/*
- * Process a synchronous request using coroutines
- */
-static int bdrv_rw_co(BlockDriverState *bs, int64_t sector_num, uint8_t *buf,
- int nb_sectors, bool is_write, BdrvRequestFlags flags)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
- };
-
- if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
- return -EINVAL;
- }
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_prwv_co(bs, sector_num << BDRV_SECTOR_BITS,
- &qiov, is_write, flags);
-}
-
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- return bdrv_rw_co(bs, sector_num, buf, nb_sectors, false, 0);
-}
-
-/* Just like bdrv_read(), but with I/O throttling temporarily disabled */
-int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- bool enabled;
- int ret;
-
- enabled = bs->io_limits_enabled;
- bs->io_limits_enabled = false;
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- bs->io_limits_enabled = enabled;
- return ret;
-}
-
-/* Return < 0 if error. Important errors are:
- -EIO generic I/O error (may happen for all errors)
- -ENOMEDIUM No media inserted.
- -EINVAL Invalid sector number or nb_sectors
- -EACCES Trying to write a read-only device
-*/
-int bdrv_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- return bdrv_rw_co(bs, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
-}
-
-int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
-{
- return bdrv_rw_co(bs, sector_num, NULL, nb_sectors, true,
- BDRV_REQ_ZERO_WRITE | flags);
-}
-
-/*
- * Completely zero out a block device with the help of bdrv_write_zeroes.
- * The operation is sped up by checking the block status and only writing
- * zeroes to the device if they currently do not return zeroes. Optional
- * flags are passed through to bdrv_write_zeroes (e.g. BDRV_REQ_MAY_UNMAP).
- *
- * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
- */
-int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags)
-{
- int64_t target_sectors, ret, nb_sectors, sector_num = 0;
- int n;
-
- target_sectors = bdrv_nb_sectors(bs);
- if (target_sectors < 0) {
- return target_sectors;
- }
-
- for (;;) {
- nb_sectors = target_sectors - sector_num;
- if (nb_sectors <= 0) {
- return 0;
- }
- if (nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
- nb_sectors = INT_MAX / BDRV_SECTOR_SIZE;
- }
- ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n);
- if (ret < 0) {
- error_report("error getting block status at sector %" PRId64 ": %s",
- sector_num, strerror(-ret));
- return ret;
- }
- if (ret & BDRV_BLOCK_ZERO) {
- sector_num += n;
- continue;
- }
- ret = bdrv_write_zeroes(bs, sector_num, n, flags);
- if (ret < 0) {
- error_report("error writing zeroes at sector %" PRId64 ": %s",
- sector_num, strerror(-ret));
- return ret;
- }
- sector_num += n;
- }
-}
-
-int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int bytes)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *)buf,
- .iov_len = bytes,
- };
- int ret;
-
- if (bytes < 0) {
- return -EINVAL;
- }
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- ret = bdrv_prwv_co(bs, offset, &qiov, false, 0);
- if (ret < 0) {
- return ret;
- }
-
- return bytes;
-}
-
-int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov)
-{
- int ret;
-
- ret = bdrv_prwv_co(bs, offset, qiov, true, 0);
- if (ret < 0) {
- return ret;
- }
-
- return qiov->size;
-}
-
-int bdrv_pwrite(BlockDriverState *bs, int64_t offset,
- const void *buf, int bytes)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = bytes,
- };
-
- if (bytes < 0) {
- return -EINVAL;
- }
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_pwritev(bs, offset, &qiov);
-}
-
-/*
- * Writes to the file and ensures that no writes are reordered across this
- * request (acts as a barrier)
- *
- * Returns 0 on success, -errno in error cases.
- */
-int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset,
- const void *buf, int count)
-{
- int ret;
-
- ret = bdrv_pwrite(bs, offset, buf, count);
- if (ret < 0) {
- return ret;
- }
-
- /* No flush needed for cache modes that already do it */
- if (bs->enable_write_cache) {
- bdrv_flush(bs);
- }
-
- return 0;
-}
-
-static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- /* Perform I/O through a temporary buffer so that users who scribble over
- * their read buffer while the operation is in progress do not end up
- * modifying the image file. This is critical for zero-copy guest I/O
- * where anything might happen inside guest memory.
- */
- void *bounce_buffer;
-
- BlockDriver *drv = bs->drv;
- struct iovec iov;
- QEMUIOVector bounce_qiov;
- int64_t cluster_sector_num;
- int cluster_nb_sectors;
- size_t skip_bytes;
- int ret;
-
- /* Cover entire cluster so no additional backing file I/O is required when
- * allocating cluster in the image file.
- */
- bdrv_round_to_clusters(bs, sector_num, nb_sectors,
- &cluster_sector_num, &cluster_nb_sectors);
-
- trace_bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors,
- cluster_sector_num, cluster_nb_sectors);
-
- iov.iov_len = cluster_nb_sectors * BDRV_SECTOR_SIZE;
- iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
- if (bounce_buffer == NULL) {
- ret = -ENOMEM;
- goto err;
- }
-
- qemu_iovec_init_external(&bounce_qiov, &iov, 1);
-
- ret = drv->bdrv_co_readv(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
- if (ret < 0) {
- goto err;
- }
-
- if (drv->bdrv_co_write_zeroes &&
- buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors, 0);
- } else {
- /* This does not change the data on the disk, it is not necessary
- * to flush even in cache=writethrough mode.
- */
- ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
- &bounce_qiov);
- }
-
- if (ret < 0) {
- /* It might be okay to ignore write errors for guest requests. If this
- * is a deliberate copy-on-read then we don't want to ignore the error.
- * Simply report it in all cases.
- */
- goto err;
- }
-
- skip_bytes = (sector_num - cluster_sector_num) * BDRV_SECTOR_SIZE;
- qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes,
- nb_sectors * BDRV_SECTOR_SIZE);
-
-err:
- qemu_vfree(bounce_buffer);
- return ret;
-}
-
-/*
- * Forwards an already correctly aligned request to the BlockDriver. This
- * handles copy on read and zeroing after EOF; any other features must be
- * implemented by the caller.
- */
-static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
- BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
- int64_t align, QEMUIOVector *qiov, int flags)
-{
- BlockDriver *drv = bs->drv;
- int ret;
-
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert(!qiov || bytes == qiov->size);
-
- /* Handle Copy on Read and associated serialisation */
- if (flags & BDRV_REQ_COPY_ON_READ) {
- /* If we touch the same cluster it counts as an overlap. This
- * guarantees that allocating writes will be serialized and not race
- * with each other for the same cluster. For example, in copy-on-read
- * it ensures that the CoR read and write operations are atomic and
- * guest writes cannot interleave between them. */
- mark_request_serialising(req, bdrv_get_cluster_size(bs));
- }
-
- wait_serialising_requests(req);
-
- if (flags & BDRV_REQ_COPY_ON_READ) {
- int pnum;
-
- ret = bdrv_is_allocated(bs, sector_num, nb_sectors, &pnum);
- if (ret < 0) {
- goto out;
- }
-
- if (!ret || pnum != nb_sectors) {
- ret = bdrv_co_do_copy_on_readv(bs, sector_num, nb_sectors, qiov);
- goto out;
- }
- }
-
- /* Forward the request to the BlockDriver */
- if (!(bs->zero_beyond_eof && bs->growable)) {
- ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
- } else {
- /* Read zeros after EOF of growable BDSes */
- int64_t total_sectors, max_nb_sectors;
-
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- ret = total_sectors;
- goto out;
- }
-
- max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
- align >> BDRV_SECTOR_BITS);
- if (max_nb_sectors > 0) {
- QEMUIOVector local_qiov;
- size_t local_sectors;
-
- max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
- local_sectors = MIN(max_nb_sectors, nb_sectors);
-
- qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, 0,
- local_sectors * BDRV_SECTOR_SIZE);
-
- ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
- &local_qiov);
-
- qemu_iovec_destroy(&local_qiov);
- } else {
- ret = 0;
- }
-
- /* Reading beyond end of file is supposed to produce zeroes */
- if (ret == 0 && total_sectors < sector_num + nb_sectors) {
- uint64_t offset = MAX(0, total_sectors - sector_num);
- uint64_t bytes = (sector_num + nb_sectors - offset) *
- BDRV_SECTOR_SIZE;
- qemu_iovec_memset(qiov, offset * BDRV_SECTOR_SIZE, 0, bytes);
- }
- }
-
-out:
- return ret;
-}
-
-/*
- * Handle a read request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- BlockDriver *drv = bs->drv;
- BdrvTrackedRequest req;
-
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
- uint8_t *head_buf = NULL;
- uint8_t *tail_buf = NULL;
- QEMUIOVector local_qiov;
- bool use_local_qiov = false;
- int ret;
-
- if (!drv) {
- return -ENOMEDIUM;
- }
- if (bdrv_check_byte_request(bs, offset, bytes)) {
- return -EIO;
- }
-
- if (bs->copy_on_read) {
- flags |= BDRV_REQ_COPY_ON_READ;
- }
-
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, bytes, false);
- }
-
- /* Align read if necessary by padding qiov */
- if (offset & (align - 1)) {
- head_buf = qemu_blockalign(bs, align);
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
-
- bytes += offset & (align - 1);
- offset = offset & ~(align - 1);
- }
-
- if ((offset + bytes) & (align - 1)) {
- if (!use_local_qiov) {
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
- }
- tail_buf = qemu_blockalign(bs, align);
- qemu_iovec_add(&local_qiov, tail_buf,
- align - ((offset + bytes) & (align - 1)));
-
- bytes = ROUND_UP(bytes, align);
- }
-
- tracked_request_begin(&req, bs, offset, bytes, false);
- ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
- use_local_qiov ? &local_qiov : qiov,
- flags);
- tracked_request_end(&req);
-
- if (use_local_qiov) {
- qemu_iovec_destroy(&local_qiov);
- qemu_vfree(head_buf);
- qemu_vfree(tail_buf);
- }
-
- return ret;
-}
-
-static int coroutine_fn bdrv_co_do_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- if (nb_sectors < 0 || nb_sectors > (UINT_MAX >> BDRV_SECTOR_BITS)) {
- return -EINVAL;
- }
-
- return bdrv_co_do_preadv(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
-}
-
-int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_copy_on_readv(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_readv(bs, sector_num, nb_sectors, qiov,
- BDRV_REQ_COPY_ON_READ);
-}
-
-/* if no limit is specified in the BlockLimits use a default
- * of 32768 512-byte sectors (16 MiB) per request.
- */
-#define MAX_WRITE_ZEROES_DEFAULT 32768
-
-static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
-{
- BlockDriver *drv = bs->drv;
- QEMUIOVector qiov;
- struct iovec iov = {0};
- int ret = 0;
-
- int max_write_zeroes = bs->bl.max_write_zeroes ?
- bs->bl.max_write_zeroes : MAX_WRITE_ZEROES_DEFAULT;
-
- while (nb_sectors > 0 && !ret) {
- int num = nb_sectors;
-
- /* Align request. Block drivers can expect the "bulk" of the request
- * to be aligned.
- */
- if (bs->bl.write_zeroes_alignment
- && num > bs->bl.write_zeroes_alignment) {
- if (sector_num % bs->bl.write_zeroes_alignment != 0) {
- /* Make a small request up to the first aligned sector. */
- num = bs->bl.write_zeroes_alignment;
- num -= sector_num % bs->bl.write_zeroes_alignment;
- } else if ((sector_num + num) % bs->bl.write_zeroes_alignment != 0) {
- /* Shorten the request to the last aligned sector. num cannot
- * underflow because num > bs->bl.write_zeroes_alignment.
- */
- num -= (sector_num + num) % bs->bl.write_zeroes_alignment;
- }
- }
-
- /* limit request size */
- if (num > max_write_zeroes) {
- num = max_write_zeroes;
- }
-
- ret = -ENOTSUP;
- /* First try the efficient write zeroes operation */
- if (drv->bdrv_co_write_zeroes) {
- ret = drv->bdrv_co_write_zeroes(bs, sector_num, num, flags);
- }
-
- if (ret == -ENOTSUP) {
- /* Fall back to bounce buffer if write zeroes is unsupported */
- iov.iov_len = num * BDRV_SECTOR_SIZE;
- if (iov.iov_base == NULL) {
- iov.iov_base = qemu_try_blockalign(bs, num * BDRV_SECTOR_SIZE);
- if (iov.iov_base == NULL) {
- ret = -ENOMEM;
- goto fail;
- }
- memset(iov.iov_base, 0, num * BDRV_SECTOR_SIZE);
- }
- qemu_iovec_init_external(&qiov, &iov, 1);
-
- ret = drv->bdrv_co_writev(bs, sector_num, num, &qiov);
-
- /* Keep bounce buffer around if it is big enough for all
- * all future requests.
- */
- if (num < max_write_zeroes) {
- qemu_vfree(iov.iov_base);
- iov.iov_base = NULL;
- }
- }
-
- sector_num += num;
- nb_sectors -= num;
- }
-
-fail:
- qemu_vfree(iov.iov_base);
- return ret;
-}
-
-/*
- * Forwards an already correctly aligned write request to the BlockDriver.
- */
-static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
- BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
- QEMUIOVector *qiov, int flags)
-{
- BlockDriver *drv = bs->drv;
- bool waited;
- int ret;
-
- int64_t sector_num = offset >> BDRV_SECTOR_BITS;
- unsigned int nb_sectors = bytes >> BDRV_SECTOR_BITS;
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert(!qiov || bytes == qiov->size);
-
- waited = wait_serialising_requests(req);
- assert(!waited || !req->serialising);
- assert(req->overlap_offset <= offset);
- assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
- (void)waited;
-
- ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
-
- if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
- !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_write_zeroes &&
- qemu_iovec_is_zero(qiov)) {
- flags |= BDRV_REQ_ZERO_WRITE;
- if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
- flags |= BDRV_REQ_MAY_UNMAP;
- }
- }
-
- if (ret < 0) {
- /* Do nothing, write notifier decided to fail this request */
- } else if (flags & BDRV_REQ_ZERO_WRITE) {
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_ZERO);
- ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors, flags);
- } else {
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV);
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
- }
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_DONE);
-
- if (ret == 0 && !bs->enable_write_cache) {
- ret = bdrv_co_flush(bs);
- }
-
- bdrv_set_dirty(bs, sector_num, nb_sectors);
-
- block_acct_highest_sector(&bs->stats, sector_num, nb_sectors);
-
- if (bs->growable && ret >= 0) {
- bs->total_sectors = MAX(bs->total_sectors, sector_num + nb_sectors);
- }
-
- return ret;
-}
-
-/*
- * Handle a write request in coroutine context
- */
-static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- BdrvTrackedRequest req;
- /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */
- uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment);
- uint8_t *head_buf = NULL;
- uint8_t *tail_buf = NULL;
- QEMUIOVector local_qiov;
- bool use_local_qiov = false;
- int ret;
-
- if (!bs->drv) {
- return -ENOMEDIUM;
- }
- if (bs->read_only) {
- return -EACCES;
- }
- if (bdrv_check_byte_request(bs, offset, bytes)) {
- return -EIO;
- }
-
- /* throttling disk I/O */
- if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, bytes, true);
- }
-
- /*
- * Align write if necessary by performing a read-modify-write cycle.
- * Pad qiov with the read parts and be sure to have a tracked request not
- * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
- */
- tracked_request_begin(&req, bs, offset, bytes, true);
-
- if (offset & (align - 1)) {
- QEMUIOVector head_qiov;
- struct iovec head_iov;
-
- mark_request_serialising(&req, align);
- wait_serialising_requests(&req);
-
- head_buf = qemu_blockalign(bs, align);
- head_iov = (struct iovec) {
- .iov_base = head_buf,
- .iov_len = align,
- };
- qemu_iovec_init_external(&head_qiov, &head_iov, 1);
-
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
- align, &head_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
-
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
-
- bytes += offset & (align - 1);
- offset = offset & ~(align - 1);
- }
-
- if ((offset + bytes) & (align - 1)) {
- QEMUIOVector tail_qiov;
- struct iovec tail_iov;
- size_t tail_bytes;
- bool waited;
-
- mark_request_serialising(&req, align);
- waited = wait_serialising_requests(&req);
- assert(!waited || !use_local_qiov);
- (void)waited;
-
- tail_buf = qemu_blockalign(bs, align);
- tail_iov = (struct iovec) {
- .iov_base = tail_buf,
- .iov_len = align,
- };
- qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
-
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
- align, &tail_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
-
- if (!use_local_qiov) {
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
- }
-
- tail_bytes = (offset + bytes) & (align - 1);
- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
-
- bytes = ROUND_UP(bytes, align);
- }
-
- ret = bdrv_aligned_pwritev(bs, &req, offset, bytes,
- use_local_qiov ? &local_qiov : qiov,
- flags);
-
-fail:
- tracked_request_end(&req);
-
- if (use_local_qiov) {
- qemu_iovec_destroy(&local_qiov);
- }
- qemu_vfree(head_buf);
- qemu_vfree(tail_buf);
-
- return ret;
-}
-
-static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
- BdrvRequestFlags flags)
-{
- if (nb_sectors < 0 || nb_sectors > (INT_MAX >> BDRV_SECTOR_BITS)) {
- return -EINVAL;
- }
-
- return bdrv_co_do_pwritev(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
-}
-
-int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- trace_bdrv_co_writev(bs, sector_num, nb_sectors);
-
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, qiov, 0);
-}
-
-int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BdrvRequestFlags flags)
-{
- trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags);
-
- if (!(bs->open_flags & BDRV_O_UNMAP)) {
- flags &= ~BDRV_REQ_MAY_UNMAP;
- }
-
- return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL,
- BDRV_REQ_ZERO_WRITE | flags);
-}
-
/**
* Truncate file to 'offset' bytes (needed only for file protocols)
*/
@@ -3470,9 +2473,9 @@
ret = drv->bdrv_truncate(bs, offset);
if (ret == 0) {
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
- if (bs->blk) {
- blk_dev_resize_cb(bs->blk);
- }
+ bdrv_dirty_bitmap_truncate(bs);
+ bdrv_parent_cb_resize(bs);
+ ++bs->write_gen;
}
return ret;
}
@@ -3491,7 +2494,7 @@
return drv->bdrv_get_allocated_file_size(bs);
}
if (bs->file) {
- return bdrv_get_allocated_file_size(bs->file);
+ return bdrv_get_allocated_file_size(bs->file->bs);
}
return -ENOTSUP;
}
@@ -3523,6 +2526,7 @@
{
int64_t ret = bdrv_nb_sectors(bs);
+ ret = ret > INT64_MAX / BDRV_SECTOR_SIZE ? -EFBIG : ret;
return ret < 0 ? ret : ret * BDRV_SECTOR_SIZE;
}
@@ -3534,130 +2538,39 @@
*nb_sectors_ptr = nb_sectors < 0 ? 0 : nb_sectors;
}
-void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
- BlockdevOnError on_write_error)
-{
- bs->on_read_error = on_read_error;
- bs->on_write_error = on_write_error;
-}
-
-BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
-{
- return is_read ? bs->on_read_error : bs->on_write_error;
-}
-
-BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
-{
- BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
-
- switch (on_err) {
- case BLOCKDEV_ON_ERROR_ENOSPC:
- return (error == ENOSPC) ?
- BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
- case BLOCKDEV_ON_ERROR_STOP:
- return BLOCK_ERROR_ACTION_STOP;
- case BLOCKDEV_ON_ERROR_REPORT:
- return BLOCK_ERROR_ACTION_REPORT;
- case BLOCKDEV_ON_ERROR_IGNORE:
- return BLOCK_ERROR_ACTION_IGNORE;
- default:
- abort();
- }
-}
-
-static void send_qmp_error_event(BlockDriverState *bs,
- BlockErrorAction action,
- bool is_read, int error)
-{
- IoOperationType optype;
-
- optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
- qapi_event_send_block_io_error(bdrv_get_device_name(bs), optype, action,
- bdrv_iostatus_is_enabled(bs),
- error == ENOSPC, strerror(error),
- &error_abort);
-}
-
-/* This is done by device models because, while the block layer knows
- * about the error, it does not know whether an operation comes from
- * the device or the block layer (from a job, for example).
- */
-void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
- bool is_read, int error)
-{
- assert(error >= 0);
-
- if (action == BLOCK_ERROR_ACTION_STOP) {
- /* First set the iostatus, so that "info block" returns an iostatus
- * that matches the events raised so far (an additional error iostatus
- * is fine, but not a lost one).
- */
- bdrv_iostatus_set_err(bs, error);
-
- /* Then raise the request to stop the VM and the event.
- * qemu_system_vmstop_request_prepare has two effects. First,
- * it ensures that the STOP event always comes after the
- * BLOCK_IO_ERROR event. Second, it ensures that even if management
- * can observe the STOP event and do a "cont" before the STOP
- * event is issued, the VM will not stop. In this case, vm_start()
- * also ensures that the STOP/RESUME pair of events is emitted.
- */
- qemu_system_vmstop_request_prepare();
- send_qmp_error_event(bs, action, is_read, error);
- qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
- } else {
- send_qmp_error_event(bs, action, is_read, error);
- }
-}
-
-int bdrv_is_read_only(BlockDriverState *bs)
+bool bdrv_is_read_only(BlockDriverState *bs)
{
return bs->read_only;
}
-int bdrv_is_sg(BlockDriverState *bs)
+bool bdrv_is_sg(BlockDriverState *bs)
{
return bs->sg;
}
-int bdrv_enable_write_cache(BlockDriverState *bs)
+bool bdrv_is_encrypted(BlockDriverState *bs)
{
- return bs->enable_write_cache;
-}
-
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
-{
- bs->enable_write_cache = wce;
-
- /* so a reopen() will preserve wce */
- if (wce) {
- bs->open_flags |= BDRV_O_CACHE_WB;
- } else {
- bs->open_flags &= ~BDRV_O_CACHE_WB;
+ if (bs->backing && bs->backing->bs->encrypted) {
+ return true;
}
-}
-
-int bdrv_is_encrypted(BlockDriverState *bs)
-{
- if (bs->backing_hd && bs->backing_hd->encrypted)
- return 1;
return bs->encrypted;
}
-int bdrv_key_required(BlockDriverState *bs)
+bool bdrv_key_required(BlockDriverState *bs)
{
- BlockDriverState *backing_hd = bs->backing_hd;
+ BdrvChild *backing = bs->backing;
- if (backing_hd && backing_hd->encrypted && !backing_hd->valid_key)
- return 1;
+ if (backing && backing->bs->encrypted && !backing->bs->valid_key) {
+ return true;
+ }
return (bs->encrypted && !bs->valid_key);
}
int bdrv_set_key(BlockDriverState *bs, const char *key)
{
int ret;
- if (bs->backing_hd && bs->backing_hd->encrypted) {
- ret = bdrv_set_key(bs->backing_hd, key);
+ if (bs->backing && bs->backing->bs->encrypted) {
+ ret = bdrv_set_key(bs->backing->bs, key);
if (ret < 0)
return ret;
if (!bs->encrypted)
@@ -3670,17 +2583,45 @@
}
ret = bs->drv->bdrv_set_key(bs, key);
if (ret < 0) {
- bs->valid_key = 0;
+ bs->valid_key = false;
} else if (!bs->valid_key) {
- bs->valid_key = 1;
- if (bs->blk) {
- /* call the change callback now, we skipped it on open */
- blk_dev_change_media_cb(bs->blk, true);
- }
+ /* call the change callback now, we skipped it on open */
+ bs->valid_key = true;
+ bdrv_parent_cb_change_media(bs, true);
}
return ret;
}
+/*
+ * Provide an encryption key for @bs.
+ * If @key is non-null:
+ * If @bs is not encrypted, fail.
+ * Else if the key is invalid, fail.
+ * Else set @bs's key to @key, replacing the existing key, if any.
+ * If @key is null:
+ * If @bs is encrypted and still lacks a key, fail.
+ * Else do nothing.
+ * On failure, store an error object through @errp if non-null.
+ */
+void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp)
+{
+ if (key) {
+ if (!bdrv_is_encrypted(bs)) {
+ error_setg(errp, "Node '%s' is not encrypted",
+ bdrv_get_device_or_node_name(bs));
+ } else if (bdrv_set_key(bs, key) < 0) {
+ error_setg(errp, QERR_INVALID_PASSWORD);
+ }
+ } else {
+ if (bdrv_key_required(bs)) {
+ error_set(errp, ERROR_CLASS_DEVICE_ENCRYPTED,
+ "'%s' (%s) is encrypted",
+ bdrv_get_device_or_node_name(bs),
+ bdrv_get_encrypted_filename(bs));
+ }
+ }
+}
+
const char *bdrv_get_format_name(BlockDriverState *bs)
{
return bs->drv ? bs->drv->format_name : NULL;
@@ -3723,15 +2664,6 @@
g_free(formats);
}
-/* This function is to find block backend bs */
-/* TODO convert callers to blk_by_name(), then remove */
-BlockDriverState *bdrv_find(const char *name)
-{
- BlockBackend *blk = blk_by_name(name);
-
- return blk ? blk_bs(blk) : NULL;
-}
-
/* This function is to find a node in the bs graph */
BlockDriverState *bdrv_find_node(const char *node_name)
{
@@ -3748,15 +2680,20 @@
}
/* Put this QMP function here so it can access the static graph_bdrv_states. */
-BlockDeviceInfoList *bdrv_named_nodes_list(void)
+BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp)
{
BlockDeviceInfoList *list, *entry;
BlockDriverState *bs;
list = NULL;
QTAILQ_FOREACH(bs, &graph_bdrv_states, node_list) {
+ BlockDeviceInfo *info = bdrv_block_device_info(NULL, bs, errp);
+ if (!info) {
+ qapi_free_BlockDeviceInfoList(list);
+ return NULL;
+ }
entry = g_malloc0(sizeof(*entry));
- entry->value = bdrv_block_device_info(bs);
+ entry->value = info;
entry->next = list;
list = entry;
}
@@ -3775,7 +2712,12 @@
blk = blk_by_name(device);
if (blk) {
- return blk_bs(blk);
+ bs = blk_bs(blk);
+ if (!bs) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ }
+
+ return bs;
}
}
@@ -3798,24 +2740,56 @@
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
{
while (top && top != base) {
- top = top->backing_hd;
+ top = backing_bs(top);
}
return top != NULL;
}
-BlockDriverState *bdrv_next(BlockDriverState *bs)
+BlockDriverState *bdrv_next_node(BlockDriverState *bs)
{
if (!bs) {
- return QTAILQ_FIRST(&bdrv_states);
+ return QTAILQ_FIRST(&graph_bdrv_states);
}
- return QTAILQ_NEXT(bs, device_list);
+ return QTAILQ_NEXT(bs, node_list);
+}
+
+const char *bdrv_get_node_name(const BlockDriverState *bs)
+{
+ return bs->node_name;
+}
+
+const char *bdrv_get_parent_name(const BlockDriverState *bs)
+{
+ BdrvChild *c;
+ const char *name;
+
+ /* If multiple parents have a name, just pick the first one. */
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->get_name) {
+ name = c->role->get_name(c);
+ if (name && *name) {
+ return name;
+ }
+ }
+ }
+
+ return NULL;
}
/* TODO check what callers really want: bs->node_name or blk_name() */
const char *bdrv_get_device_name(const BlockDriverState *bs)
{
- return bs->blk ? blk_name(bs->blk) : "";
+ return bdrv_get_parent_name(bs) ?: "";
+}
+
+/* This can be used to identify nodes that might not have a device
+ * name associated. Since node and device names live in the same
+ * namespace, the result is unambiguous. The exception is if both are
+ * absent, then this returns an empty (non-null) string. */
+const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
+{
+ return bdrv_get_parent_name(bs) ?: bs->node_name;
}
int bdrv_get_flags(BlockDriverState *bs)
@@ -3823,26 +2797,6 @@
return bs->open_flags;
}
-int bdrv_flush_all(void)
-{
- BlockDriverState *bs;
- int result = 0;
-
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
- int ret;
-
- aio_context_acquire(aio_context);
- ret = bdrv_flush(bs);
- if (ret < 0 && !result) {
- result = ret;
- }
- aio_context_release(aio_context);
- }
-
- return result;
-}
-
int bdrv_has_zero_init_1(BlockDriverState *bs)
{
return 1;
@@ -3854,7 +2808,7 @@
/* If BS is a copy on write image, it is initialized to
the contents of the base image, which may not be zeroes. */
- if (bs->backing_hd) {
+ if (bs->backing) {
return 0;
}
if (bs->drv->bdrv_has_zero_init) {
@@ -3869,7 +2823,7 @@
{
BlockDriverInfo bdi;
- if (bs->backing_hd) {
+ if (bs->backing) {
return false;
}
@@ -3884,7 +2838,7 @@
{
BlockDriverInfo bdi;
- if (bs->backing_hd || !(bs->open_flags & BDRV_O_UNMAP)) {
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
return false;
}
@@ -3895,225 +2849,9 @@
return false;
}
-typedef struct BdrvCoGetBlockStatusData {
- BlockDriverState *bs;
- BlockDriverState *base;
- int64_t sector_num;
- int nb_sectors;
- int *pnum;
- int64_t ret;
- bool done;
-} BdrvCoGetBlockStatusData;
-
-/*
- * Returns the allocation status of the specified sectors.
- * Drivers not implementing the functionality are assumed to not support
- * backing files, hence all their sectors are reported as allocated.
- *
- * If 'sector_num' is beyond the end of the disk image the return value is 0
- * and 'pnum' is set to 0.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
- * beyond the end of the disk image it will be clamped.
- */
-static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- int64_t total_sectors;
- int64_t n;
- int64_t ret, ret2;
-
- total_sectors = bdrv_nb_sectors(bs);
- if (total_sectors < 0) {
- return total_sectors;
- }
-
- if (sector_num >= total_sectors) {
- *pnum = 0;
- return 0;
- }
-
- n = total_sectors - sector_num;
- if (n < nb_sectors) {
- nb_sectors = n;
- }
-
- if (!bs->drv->bdrv_co_get_block_status) {
- *pnum = nb_sectors;
- ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
- if (bs->drv->protocol_name) {
- ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
- }
- return ret;
- }
-
- ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum);
- if (ret < 0) {
- *pnum = 0;
- return ret;
- }
-
- if (ret & BDRV_BLOCK_RAW) {
- assert(ret & BDRV_BLOCK_OFFSET_VALID);
- return bdrv_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
- *pnum, pnum);
- }
-
- if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
- ret |= BDRV_BLOCK_ALLOCATED;
- }
-
- if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) {
- if (bdrv_unallocated_blocks_are_zero(bs)) {
- ret |= BDRV_BLOCK_ZERO;
- } else if (bs->backing_hd) {
- BlockDriverState *bs2 = bs->backing_hd;
- int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
- if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
- ret |= BDRV_BLOCK_ZERO;
- }
- }
- }
-
- if (bs->file &&
- (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
- (ret & BDRV_BLOCK_OFFSET_VALID)) {
- int file_pnum;
-
- ret2 = bdrv_co_get_block_status(bs->file, ret >> BDRV_SECTOR_BITS,
- *pnum, &file_pnum);
- if (ret2 >= 0) {
- /* Ignore errors. This is just providing extra information, it
- * is useful but not necessary.
- */
- if (!file_pnum) {
- /* !file_pnum indicates an offset at or beyond the EOF; it is
- * perfectly valid for the format block driver to point to such
- * offsets, so catch it and mark everything as zero */
- ret |= BDRV_BLOCK_ZERO;
- } else {
- /* Limit request to the range reported by the protocol driver */
- *pnum = file_pnum;
- ret |= (ret2 & BDRV_BLOCK_ZERO);
- }
- }
- }
-
- return ret;
-}
-
-/* Coroutine wrapper for bdrv_get_block_status() */
-static void coroutine_fn bdrv_get_block_status_co_entry(void *opaque)
-{
- BdrvCoGetBlockStatusData *data = opaque;
- BlockDriverState *bs = data->bs;
-
- data->ret = bdrv_co_get_block_status(bs, data->sector_num, data->nb_sectors,
- data->pnum);
- data->done = true;
-}
-
-/*
- * Synchronous wrapper around bdrv_co_get_block_status().
- *
- * See bdrv_co_get_block_status() for details.
- */
-int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- Coroutine *co;
- BdrvCoGetBlockStatusData data = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .pnum = pnum,
- .done = false,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_get_block_status_co_entry(&data);
- } else {
- AioContext *aio_context = bdrv_get_aio_context(bs);
-
- co = qemu_coroutine_create(bdrv_get_block_status_co_entry);
- qemu_coroutine_enter(co, &data);
- while (!data.done) {
- aio_poll(aio_context, true);
- }
- }
- return data.ret;
-}
-
-int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum);
- if (ret < 0) {
- return ret;
- }
- return !!(ret & BDRV_BLOCK_ALLOCATED);
-}
-
-/*
- * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
- *
- * Return true if the given sector is allocated in any image between
- * BASE and TOP (inclusive). BASE can be NULL to check if the given
- * sector is allocated in any image of the chain. Return false otherwise.
- *
- * 'pnum' is set to the number of sectors (including and immediately following
- * the specified sector) that are known to be in the same
- * allocated/unallocated state.
- *
- */
-int bdrv_is_allocated_above(BlockDriverState *top,
- BlockDriverState *base,
- int64_t sector_num,
- int nb_sectors, int *pnum)
-{
- BlockDriverState *intermediate;
- int ret, n = nb_sectors;
-
- intermediate = top;
- while (intermediate && intermediate != base) {
- int pnum_inter;
- ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
- &pnum_inter);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- *pnum = pnum_inter;
- return 1;
- }
-
- /*
- * [sector_num, nb_sectors] is unallocated on top but intermediate
- * might have
- *
- * [sector_num+x, nr_sectors] allocated.
- */
- if (n > pnum_inter &&
- (intermediate == top ||
- sector_num + pnum_inter < intermediate->total_sectors)) {
- n = pnum_inter;
- }
-
- intermediate = intermediate->backing_hd;
- }
-
- *pnum = n;
- return 0;
-}
-
const char *bdrv_get_encrypted_filename(BlockDriverState *bs)
{
- if (bs->backing_hd && bs->backing_hd->encrypted)
+ if (bs->backing && bs->backing->bs->encrypted)
return bs->backing_file;
else if (bs->encrypted)
return bs->filename;
@@ -4127,22 +2865,6 @@
pstrcpy(filename, filename_size, bs->backing_file);
}
-int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (!drv->bdrv_write_compressed)
- return -ENOTSUP;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
- return -EIO;
-
- assert(QLIST_EMPTY(&bs->dirty_bitmaps));
-
- return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
-}
-
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
BlockDriver *drv = bs->drv;
@@ -4163,48 +2885,7 @@
return NULL;
}
-int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
- int64_t pos, int size)
-{
- QEMUIOVector qiov;
- struct iovec iov = {
- .iov_base = (void *) buf,
- .iov_len = size,
- };
-
- qemu_iovec_init_external(&qiov, &iov, 1);
- return bdrv_writev_vmstate(bs, &qiov, pos);
-}
-
-int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
-{
- BlockDriver *drv = bs->drv;
-
- if (!drv) {
- return -ENOMEDIUM;
- } else if (drv->bdrv_save_vmstate) {
- return drv->bdrv_save_vmstate(bs, qiov, pos);
- } else if (bs->file) {
- return bdrv_writev_vmstate(bs->file, qiov, pos);
- }
-
- return -ENOTSUP;
-}
-
-int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
-{
- BlockDriver *drv = bs->drv;
- if (!drv)
- return -ENOMEDIUM;
- if (drv->bdrv_load_vmstate)
- return drv->bdrv_load_vmstate(bs, buf, pos, size);
- if (bs->file)
- return bdrv_load_vmstate(bs->file, buf, pos, size);
- return -ENOTSUP;
-}
-
-void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
if (!bs || !bs->drv || !bs->drv->bdrv_debug_event) {
return;
@@ -4217,7 +2898,7 @@
const char *tag)
{
while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
- bs = bs->file;
+ bs = bs->file ? bs->file->bs : NULL;
}
if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
@@ -4230,7 +2911,7 @@
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
{
while (bs && bs->drv && !bs->drv->bdrv_debug_remove_breakpoint) {
- bs = bs->file;
+ bs = bs->file ? bs->file->bs : NULL;
}
if (bs && bs->drv && bs->drv->bdrv_debug_remove_breakpoint) {
@@ -4243,7 +2924,7 @@
int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
{
while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
- bs = bs->file;
+ bs = bs->file ? bs->file->bs : NULL;
}
if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
@@ -4256,7 +2937,7 @@
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
{
while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
- bs = bs->file;
+ bs = bs->file ? bs->file->bs : NULL;
}
if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
@@ -4295,13 +2976,13 @@
is_protocol = path_has_protocol(backing_file);
- for (curr_bs = bs; curr_bs->backing_hd; curr_bs = curr_bs->backing_hd) {
+ for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) {
/* If either of the filename paths is actually a protocol, then
* compare unmodified paths; otherwise make paths relative */
if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
if (strcmp(backing_file, curr_bs->backing_file) == 0) {
- retval = curr_bs->backing_hd;
+ retval = curr_bs->backing->bs;
break;
}
} else {
@@ -4325,7 +3006,7 @@
}
if (strcmp(backing_file_full, filename_full) == 0) {
- retval = curr_bs->backing_hd;
+ retval = curr_bs->backing->bs;
break;
}
}
@@ -4343,455 +3024,11 @@
return 0;
}
- if (!bs->backing_hd) {
+ if (!bs->backing) {
return 0;
}
- return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
-}
-
-/**************************************************************/
-/* async I/Os */
-
-BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
- cb, opaque, false);
-}
-
-BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num,
- QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, qiov, nb_sectors, 0,
- cb, opaque, true);
-}
-
-BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_write_zeroes(bs, sector_num, nb_sectors, flags, opaque);
-
- return bdrv_co_aio_rw_vector(bs, sector_num, NULL, nb_sectors,
- BDRV_REQ_ZERO_WRITE | flags,
- cb, opaque, true);
-}
-
-
-typedef struct MultiwriteCB {
- int error;
- int num_requests;
- int num_callbacks;
- struct {
- BlockCompletionFunc *cb;
- void *opaque;
- QEMUIOVector *free_qiov;
- } callbacks[];
-} MultiwriteCB;
-
-static void multiwrite_user_cb(MultiwriteCB *mcb)
-{
- int i;
-
- for (i = 0; i < mcb->num_callbacks; i++) {
- mcb->callbacks[i].cb(mcb->callbacks[i].opaque, mcb->error);
- if (mcb->callbacks[i].free_qiov) {
- qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
- }
- g_free(mcb->callbacks[i].free_qiov);
- }
-}
-
-static void multiwrite_cb(void *opaque, int ret)
-{
- MultiwriteCB *mcb = opaque;
-
- trace_multiwrite_cb(mcb, ret);
-
- if (ret < 0 && !mcb->error) {
- mcb->error = ret;
- }
-
- mcb->num_requests--;
- if (mcb->num_requests == 0) {
- multiwrite_user_cb(mcb);
- g_free(mcb);
- }
-}
-
-static int multiwrite_req_compare(const void *a, const void *b)
-{
- const BlockRequest *req1 = a, *req2 = b;
-
- /*
- * Note that we can't simply subtract req2->sector from req1->sector
- * here as that could overflow the return value.
- */
- if (req1->sector > req2->sector) {
- return 1;
- } else if (req1->sector < req2->sector) {
- return -1;
- } else {
- return 0;
- }
-}
-
-/*
- * Takes a bunch of requests and tries to merge them. Returns the number of
- * requests that remain after merging.
- */
-static int multiwrite_merge(BlockDriverState *bs, BlockRequest *reqs,
- int num_reqs, MultiwriteCB *mcb)
-{
- int i, outidx;
-
- // Sort requests by start sector
- qsort(reqs, num_reqs, sizeof(*reqs), &multiwrite_req_compare);
-
- // Check if adjacent requests touch the same clusters. If so, combine them,
- // filling up gaps with zero sectors.
- outidx = 0;
- for (i = 1; i < num_reqs; i++) {
- int merge = 0;
- int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
-
- // Handle exactly sequential writes and overlapping writes.
- if (reqs[i].sector <= oldreq_last) {
- merge = 1;
- }
-
- if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
- merge = 0;
- }
-
- if (bs->bl.max_transfer_length && reqs[outidx].nb_sectors +
- reqs[i].nb_sectors > bs->bl.max_transfer_length) {
- merge = 0;
- }
-
- if (merge) {
- size_t size;
- QEMUIOVector *qiov = g_malloc0(sizeof(*qiov));
- qemu_iovec_init(qiov,
- reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1);
-
- // Add the first request to the merged one. If the requests are
- // overlapping, drop the last sectors of the first request.
- size = (reqs[i].sector - reqs[outidx].sector) << 9;
- qemu_iovec_concat(qiov, reqs[outidx].qiov, 0, size);
-
- // We should need to add any zeros between the two requests
- assert (reqs[i].sector <= oldreq_last);
-
- // Add the second request
- qemu_iovec_concat(qiov, reqs[i].qiov, 0, reqs[i].qiov->size);
-
- // Add tail of first request, if necessary
- if (qiov->size < reqs[outidx].qiov->size) {
- qemu_iovec_concat(qiov, reqs[outidx].qiov, qiov->size,
- reqs[outidx].qiov->size - qiov->size);
- }
-
- reqs[outidx].nb_sectors = qiov->size >> 9;
- reqs[outidx].qiov = qiov;
-
- mcb->callbacks[i].free_qiov = reqs[outidx].qiov;
- } else {
- outidx++;
- reqs[outidx].sector = reqs[i].sector;
- reqs[outidx].nb_sectors = reqs[i].nb_sectors;
- reqs[outidx].qiov = reqs[i].qiov;
- }
- }
-
- return outidx + 1;
-}
-
-/*
- * Submit multiple AIO write requests at once.
- *
- * On success, the function returns 0 and all requests in the reqs array have
- * been submitted. In error case this function returns -1, and any of the
- * requests may or may not be submitted yet. In particular, this means that the
- * callback will be called for some of the requests, for others it won't. The
- * caller must check the error field of the BlockRequest to wait for the right
- * callbacks (if error != 0, no callback will be called).
- *
- * The implementation may modify the contents of the reqs array, e.g. to merge
- * requests. However, the fields opaque and error are left unmodified as they
- * are used to signal failure for a single request to the caller.
- */
-int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, int num_reqs)
-{
- MultiwriteCB *mcb;
- int i;
-
- /* don't submit writes if we don't have a medium */
- if (bs->drv == NULL) {
- for (i = 0; i < num_reqs; i++) {
- reqs[i].error = -ENOMEDIUM;
- }
- return -1;
- }
-
- if (num_reqs == 0) {
- return 0;
- }
-
- // Create MultiwriteCB structure
- mcb = g_malloc0(sizeof(*mcb) + num_reqs * sizeof(*mcb->callbacks));
- mcb->num_requests = 0;
- mcb->num_callbacks = num_reqs;
-
- for (i = 0; i < num_reqs; i++) {
- mcb->callbacks[i].cb = reqs[i].cb;
- mcb->callbacks[i].opaque = reqs[i].opaque;
- }
-
- // Check for mergable requests
- num_reqs = multiwrite_merge(bs, reqs, num_reqs, mcb);
-
- trace_bdrv_aio_multiwrite(mcb, mcb->num_callbacks, num_reqs);
-
- /* Run the aio requests. */
- mcb->num_requests = num_reqs;
- for (i = 0; i < num_reqs; i++) {
- bdrv_co_aio_rw_vector(bs, reqs[i].sector, reqs[i].qiov,
- reqs[i].nb_sectors, reqs[i].flags,
- multiwrite_cb, mcb,
- true);
- }
-
- return 0;
-}
-
-void bdrv_aio_cancel(BlockAIOCB *acb)
-{
- qemu_aio_ref(acb);
- bdrv_aio_cancel_async(acb);
- while (acb->refcnt > 1) {
- if (acb->aiocb_info->get_aio_context) {
- aio_poll(acb->aiocb_info->get_aio_context(acb), true);
- } else if (acb->bs) {
- aio_poll(bdrv_get_aio_context(acb->bs), true);
- } else {
- abort();
- }
- }
- qemu_aio_unref(acb);
-}
-
-/* Async version of aio cancel. The caller is not blocked if the acb implements
- * cancel_async, otherwise we do nothing and let the request normally complete.
- * In either case the completion callback must be called. */
-void bdrv_aio_cancel_async(BlockAIOCB *acb)
-{
- if (acb->aiocb_info->cancel_async) {
- acb->aiocb_info->cancel_async(acb);
- }
-}
-
-/**************************************************************/
-/* async block device emulation */
-
-typedef struct BlockAIOCBSync {
- BlockAIOCB common;
- QEMUBH *bh;
- int ret;
- /* vector translation state */
- QEMUIOVector *qiov;
- uint8_t *bounce;
- int is_write;
-} BlockAIOCBSync;
-
-static const AIOCBInfo bdrv_em_aiocb_info = {
- .aiocb_size = sizeof(BlockAIOCBSync),
-};
-
-static void bdrv_aio_bh_cb(void *opaque)
-{
- BlockAIOCBSync *acb = opaque;
-
- if (!acb->is_write && acb->ret >= 0) {
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
- }
- qemu_vfree(acb->bounce);
- acb->common.cb(acb->common.opaque, acb->ret);
- qemu_bh_delete(acb->bh);
- acb->bh = NULL;
- qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *bdrv_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque,
- int is_write)
-
-{
- BlockAIOCBSync *acb;
-
- acb = qemu_aio_get(&bdrv_em_aiocb_info, bs, cb, opaque);
- acb->is_write = is_write;
- acb->qiov = qiov;
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_aio_bh_cb, acb);
-
- if (acb->bounce == NULL) {
- acb->ret = -ENOMEM;
- } else if (is_write) {
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
- acb->ret = bs->drv->bdrv_write(bs, sector_num, acb->bounce, nb_sectors);
- } else {
- acb->ret = bs->drv->bdrv_read(bs, sector_num, acb->bounce, nb_sectors);
- }
-
- qemu_bh_schedule(acb->bh);
-
- return &acb->common;
-}
-
-static BlockAIOCB *bdrv_aio_readv_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
-}
-
-static BlockAIOCB *bdrv_aio_writev_em(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
-}
-
-
-typedef struct BlockAIOCBCoroutine {
- BlockAIOCB common;
- BlockRequest req;
- bool is_write;
- bool *done;
- QEMUBH* bh;
-} BlockAIOCBCoroutine;
-
-static const AIOCBInfo bdrv_em_co_aiocb_info = {
- .aiocb_size = sizeof(BlockAIOCBCoroutine),
-};
-
-static void bdrv_co_em_bh(void *opaque)
-{
- BlockAIOCBCoroutine *acb = opaque;
-
- acb->common.cb(acb->common.opaque, acb->req.error);
-
- qemu_bh_delete(acb->bh);
- qemu_aio_unref(acb);
-}
-
-/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
-static void coroutine_fn bdrv_co_do_rw(void *opaque)
-{
- BlockAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- if (!acb->is_write) {
- acb->req.error = bdrv_co_do_readv(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
- } else {
- acb->req.error = bdrv_co_do_writev(bs, acb->req.sector,
- acb->req.nb_sectors, acb->req.qiov, acb->req.flags);
- }
-
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-static BlockAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs,
- int64_t sector_num,
- QEMUIOVector *qiov,
- int nb_sectors,
- BdrvRequestFlags flags,
- BlockCompletionFunc *cb,
- void *opaque,
- bool is_write)
-{
- Coroutine *co;
- BlockAIOCBCoroutine *acb;
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- acb->req.qiov = qiov;
- acb->req.flags = flags;
- acb->is_write = is_write;
-
- co = qemu_coroutine_create(bdrv_co_do_rw);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
-{
- BlockAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- acb->req.error = bdrv_co_flush(bs);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque)
-{
- trace_bdrv_aio_flush(bs, opaque);
-
- Coroutine *co;
- BlockAIOCBCoroutine *acb;
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
-
- co = qemu_coroutine_create(bdrv_aio_flush_co_entry);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
-}
-
-static void coroutine_fn bdrv_aio_discard_co_entry(void *opaque)
-{
- BlockAIOCBCoroutine *acb = opaque;
- BlockDriverState *bs = acb->common.bs;
-
- acb->req.error = bdrv_co_discard(bs, acb->req.sector, acb->req.nb_sectors);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
- qemu_bh_schedule(acb->bh);
-}
-
-BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- Coroutine *co;
- BlockAIOCBCoroutine *acb;
-
- trace_bdrv_aio_discard(bs, sector_num, nb_sectors, opaque);
-
- acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
- acb->req.sector = sector_num;
- acb->req.nb_sectors = nb_sectors;
- co = qemu_coroutine_create(bdrv_aio_discard_co_entry);
- qemu_coroutine_enter(co, acb);
-
- return &acb->common;
+ return 1 + bdrv_get_backing_file_depth(bs->backing->bs);
}
void bdrv_init(void)
@@ -4805,163 +3042,9 @@
bdrv_init();
}
-void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
- BlockCompletionFunc *cb, void *opaque)
-{
- BlockAIOCB *acb;
-
- acb = g_slice_alloc(aiocb_info->aiocb_size);
- acb->aiocb_info = aiocb_info;
- acb->bs = bs;
- acb->cb = cb;
- acb->opaque = opaque;
- acb->refcnt = 1;
- return acb;
-}
-
-void qemu_aio_ref(void *p)
-{
- BlockAIOCB *acb = p;
- acb->refcnt++;
-}
-
-void qemu_aio_unref(void *p)
-{
- BlockAIOCB *acb = p;
- assert(acb->refcnt > 0);
- if (--acb->refcnt == 0) {
- g_slice_free1(acb->aiocb_info->aiocb_size, acb);
- }
-}
-
-/**************************************************************/
-/* Coroutine block device emulation */
-
-typedef struct CoroutineIOCompletion {
- Coroutine *coroutine;
- int ret;
-} CoroutineIOCompletion;
-
-static void bdrv_co_io_em_complete(void *opaque, int ret)
-{
- CoroutineIOCompletion *co = opaque;
-
- co->ret = ret;
- qemu_coroutine_enter(co->coroutine, NULL);
-}
-
-static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *iov,
- bool is_write)
-{
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
- BlockAIOCB *acb;
-
- if (is_write) {
- acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- } else {
- acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors,
- bdrv_co_io_em_complete, &co);
- }
-
- trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb);
- if (!acb) {
- return -EIO;
- }
- qemu_coroutine_yield();
-
- return co.ret;
-}
-
-static int coroutine_fn bdrv_co_readv_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, false);
-}
-
-static int coroutine_fn bdrv_co_writev_em(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
-{
- return bdrv_co_io_em(bs, sector_num, nb_sectors, iov, true);
-}
-
-static void coroutine_fn bdrv_flush_co_entry(void *opaque)
-{
- RwCo *rwco = opaque;
-
- rwco->ret = bdrv_co_flush(rwco->bs);
-}
-
-int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
-{
- int ret;
-
- if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
- return 0;
- }
-
- /* Write back cached data to the OS even with cache=unsafe */
- BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
- if (bs->drv->bdrv_co_flush_to_os) {
- ret = bs->drv->bdrv_co_flush_to_os(bs);
- if (ret < 0) {
- return ret;
- }
- }
-
- /* But don't actually force it to the disk with cache=unsafe */
- if (bs->open_flags & BDRV_O_NO_FLUSH) {
- goto flush_parent;
- }
-
- BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
- if (bs->drv->bdrv_co_flush_to_disk) {
- ret = bs->drv->bdrv_co_flush_to_disk(bs);
- } else if (bs->drv->bdrv_aio_flush) {
- BlockAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- ret = -EIO;
- } else {
- qemu_coroutine_yield();
- ret = co.ret;
- }
- } else {
- /*
- * Some block drivers always operate in either writethrough or unsafe
- * mode and don't support bdrv_flush therefore. Usually qemu doesn't
- * know how the server works (because the behaviour is hardcoded or
- * depends on server-side configuration), so we can't ensure that
- * everything is safe on disk. Returning an error doesn't work because
- * that would break guests even if the server operates in writethrough
- * mode.
- *
- * Let's hope the user knows what he's doing.
- */
- ret = 0;
- }
- if (ret < 0) {
- return ret;
- }
-
- /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
- * in the case of cache=unsafe, so there are no useless flushes.
- */
-flush_parent:
- return bdrv_co_flush(bs->file);
-}
-
void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
{
+ BdrvChild *child;
Error *local_err = NULL;
int ret;
@@ -4969,23 +3052,32 @@
return;
}
- if (!(bs->open_flags & BDRV_O_INCOMING)) {
+ if (!(bs->open_flags & BDRV_O_INACTIVE)) {
return;
}
- bs->open_flags &= ~BDRV_O_INCOMING;
+ bs->open_flags &= ~BDRV_O_INACTIVE;
if (bs->drv->bdrv_invalidate_cache) {
bs->drv->bdrv_invalidate_cache(bs, &local_err);
- } else if (bs->file) {
- bdrv_invalidate_cache(bs->file, &local_err);
+ if (local_err) {
+ bs->open_flags |= BDRV_O_INACTIVE;
+ error_propagate(errp, local_err);
+ return;
+ }
}
- if (local_err) {
- error_propagate(errp, local_err);
- return;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_invalidate_cache(child->bs, &local_err);
+ if (local_err) {
+ bs->open_flags |= BDRV_O_INACTIVE;
+ error_propagate(errp, local_err);
+ return;
+ }
}
ret = refresh_total_sectors(bs, bs->total_sectors);
if (ret < 0) {
+ bs->open_flags |= BDRV_O_INACTIVE;
error_setg_errno(errp, -ret, "Could not refresh total sector count");
return;
}
@@ -4995,8 +3087,9 @@
{
BlockDriverState *bs;
Error *local_err = NULL;
+ BdrvNextIterator it;
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
@@ -5009,143 +3102,62 @@
}
}
-int bdrv_flush(BlockDriverState *bs)
+static int bdrv_inactivate_recurse(BlockDriverState *bs,
+ bool setting_flag)
{
- Coroutine *co;
- RwCo rwco = {
- .bs = bs,
- .ret = NOT_DONE,
- };
-
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_flush_co_entry(&rwco);
- } else {
- AioContext *aio_context = bdrv_get_aio_context(bs);
-
- co = qemu_coroutine_create(bdrv_flush_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- aio_poll(aio_context, true);
- }
- }
-
- return rwco.ret;
-}
-
-typedef struct DiscardCo {
- BlockDriverState *bs;
- int64_t sector_num;
- int nb_sectors;
+ BdrvChild *child;
int ret;
-} DiscardCo;
-static void coroutine_fn bdrv_discard_co_entry(void *opaque)
-{
- DiscardCo *rwco = opaque;
- rwco->ret = bdrv_co_discard(rwco->bs, rwco->sector_num, rwco->nb_sectors);
-}
-
-/* if no limit is specified in the BlockLimits use a default
- * of 32768 512-byte sectors (16 MiB) per request.
- */
-#define MAX_DISCARD_DEFAULT 32768
-
-int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
-{
- int max_discard;
-
- if (!bs->drv) {
- return -ENOMEDIUM;
- } else if (bdrv_check_request(bs, sector_num, nb_sectors)) {
- return -EIO;
- } else if (bs->read_only) {
- return -EROFS;
- }
-
- bdrv_reset_dirty(bs, sector_num, nb_sectors);
-
- /* Do nothing if disabled. */
- if (!(bs->open_flags & BDRV_O_UNMAP)) {
- return 0;
- }
-
- if (!bs->drv->bdrv_co_discard && !bs->drv->bdrv_aio_discard) {
- return 0;
- }
-
- max_discard = bs->bl.max_discard ? bs->bl.max_discard : MAX_DISCARD_DEFAULT;
- while (nb_sectors > 0) {
- int ret;
- int num = nb_sectors;
-
- /* align request */
- if (bs->bl.discard_alignment &&
- num >= bs->bl.discard_alignment &&
- sector_num % bs->bl.discard_alignment) {
- if (num > bs->bl.discard_alignment) {
- num = bs->bl.discard_alignment;
- }
- num -= sector_num % bs->bl.discard_alignment;
- }
-
- /* limit request size */
- if (num > max_discard) {
- num = max_discard;
- }
-
- if (bs->drv->bdrv_co_discard) {
- ret = bs->drv->bdrv_co_discard(bs, sector_num, num);
- } else {
- BlockAIOCB *acb;
- CoroutineIOCompletion co = {
- .coroutine = qemu_coroutine_self(),
- };
-
- acb = bs->drv->bdrv_aio_discard(bs, sector_num, nb_sectors,
- bdrv_co_io_em_complete, &co);
- if (acb == NULL) {
- return -EIO;
- } else {
- qemu_coroutine_yield();
- ret = co.ret;
- }
- }
- if (ret && ret != -ENOTSUP) {
+ if (!setting_flag && bs->drv->bdrv_inactivate) {
+ ret = bs->drv->bdrv_inactivate(bs);
+ if (ret < 0) {
return ret;
}
+ }
- sector_num += num;
- nb_sectors -= num;
+ QLIST_FOREACH(child, &bs->children, next) {
+ ret = bdrv_inactivate_recurse(child->bs, setting_flag);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ if (setting_flag) {
+ bs->open_flags |= BDRV_O_INACTIVE;
}
return 0;
}
-int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors)
+int bdrv_inactivate_all(void)
{
- Coroutine *co;
- DiscardCo rwco = {
- .bs = bs,
- .sector_num = sector_num,
- .nb_sectors = nb_sectors,
- .ret = NOT_DONE,
- };
+ BlockDriverState *bs = NULL;
+ BdrvNextIterator it;
+ int ret = 0;
+ int pass;
- if (qemu_in_coroutine()) {
- /* Fast-path if already in coroutine context */
- bdrv_discard_co_entry(&rwco);
- } else {
- AioContext *aio_context = bdrv_get_aio_context(bs);
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ aio_context_acquire(bdrv_get_aio_context(bs));
+ }
- co = qemu_coroutine_create(bdrv_discard_co_entry);
- qemu_coroutine_enter(co, &rwco);
- while (rwco.ret == NOT_DONE) {
- aio_poll(aio_context, true);
+ /* We do two passes of inactivation. The first pass calls to drivers'
+ * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
+ * the second pass sets the BDRV_O_INACTIVE flag so that no further write
+ * is allowed. */
+ for (pass = 0; pass < 2; pass++) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ ret = bdrv_inactivate_recurse(bs, pass);
+ if (ret < 0) {
+ goto out;
+ }
}
}
- return rwco.ret;
+out:
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ aio_context_release(bdrv_get_aio_context(bs));
+ }
+
+ return ret;
}
/**************************************************************/
@@ -5154,15 +3166,23 @@
/**
* Return TRUE if the media is present
*/
-int bdrv_is_inserted(BlockDriverState *bs)
+bool bdrv_is_inserted(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
+ BdrvChild *child;
- if (!drv)
- return 0;
- if (!drv->bdrv_is_inserted)
- return 1;
- return drv->bdrv_is_inserted(bs);
+ if (!drv) {
+ return false;
+ }
+ if (drv->bdrv_is_inserted) {
+ return drv->bdrv_is_inserted(bs);
+ }
+ QLIST_FOREACH(child, &bs->children, next) {
+ if (!bdrv_is_inserted(child->bs)) {
+ return false;
+ }
+ }
+ return true;
}
/**
@@ -5213,179 +3233,6 @@
}
}
-/* needed for generic scsi interface */
-
-int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_ioctl)
- return drv->bdrv_ioctl(bs, req, buf);
- return -ENOTSUP;
-}
-
-BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
- unsigned long int req, void *buf,
- BlockCompletionFunc *cb, void *opaque)
-{
- BlockDriver *drv = bs->drv;
-
- if (drv && drv->bdrv_aio_ioctl)
- return drv->bdrv_aio_ioctl(bs, req, buf, cb, opaque);
- return NULL;
-}
-
-void bdrv_set_guest_block_size(BlockDriverState *bs, int align)
-{
- bs->guest_block_size = align;
-}
-
-void *qemu_blockalign(BlockDriverState *bs, size_t size)
-{
- return qemu_memalign(bdrv_opt_mem_align(bs), size);
-}
-
-void *qemu_blockalign0(BlockDriverState *bs, size_t size)
-{
- return memset(qemu_blockalign(bs, size), 0, size);
-}
-
-void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
-{
- size_t align = bdrv_opt_mem_align(bs);
-
- /* Ensure that NULL is never returned on success */
- assert(align > 0);
- if (size == 0) {
- size = align;
- }
-
- return qemu_try_memalign(align, size);
-}
-
-void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
-{
- void *mem = qemu_try_blockalign(bs, size);
-
- if (mem) {
- memset(mem, 0, size);
- }
-
- return mem;
-}
-
-/*
- * Check if all memory in this vector is sector aligned.
- */
-bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
-{
- int i;
- size_t alignment = bdrv_opt_mem_align(bs);
-
- for (i = 0; i < qiov->niov; i++) {
- if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
- return false;
- }
- if (qiov->iov[i].iov_len % alignment) {
- return false;
- }
- }
-
- return true;
-}
-
-BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, int granularity,
- Error **errp)
-{
- int64_t bitmap_size;
- BdrvDirtyBitmap *bitmap;
-
- assert((granularity & (granularity - 1)) == 0);
-
- granularity >>= BDRV_SECTOR_BITS;
- assert(granularity);
- bitmap_size = bdrv_nb_sectors(bs);
- if (bitmap_size < 0) {
- error_setg_errno(errp, -bitmap_size, "could not get length of device");
- errno = -bitmap_size;
- return NULL;
- }
- bitmap = g_new0(BdrvDirtyBitmap, 1);
- bitmap->bitmap = hbitmap_alloc(bitmap_size, ffs(granularity) - 1);
- QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
- return bitmap;
-}
-
-void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
- BdrvDirtyBitmap *bm, *next;
- QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
- if (bm == bitmap) {
- QLIST_REMOVE(bitmap, list);
- hbitmap_free(bitmap->bitmap);
- g_free(bitmap);
- return;
- }
- }
-}
-
-BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
-{
- BdrvDirtyBitmap *bm;
- BlockDirtyInfoList *list = NULL;
- BlockDirtyInfoList **plist = &list;
-
- QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
- BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
- BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
- info->count = bdrv_get_dirty_count(bs, bm);
- info->granularity =
- ((int64_t) BDRV_SECTOR_SIZE << hbitmap_granularity(bm->bitmap));
- entry->value = info;
- *plist = entry;
- plist = &entry->next;
- }
-
- return list;
-}
-
-int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector)
-{
- if (bitmap) {
- return hbitmap_get(bitmap->bitmap, sector);
- } else {
- return 0;
- }
-}
-
-void bdrv_dirty_iter_init(BlockDriverState *bs,
- BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
-{
- hbitmap_iter_init(hbi, bitmap->bitmap, 0);
-}
-
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
- int nr_sectors)
-{
- BdrvDirtyBitmap *bitmap;
- QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
- hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
- }
-}
-
-void bdrv_reset_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors)
-{
- BdrvDirtyBitmap *bitmap;
- QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
- hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
- }
-}
-
-int64_t bdrv_get_dirty_count(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
-{
- return hbitmap_count(bitmap->bitmap);
-}
-
/* Get a reference to bs */
void bdrv_ref(BlockDriverState *bs)
{
@@ -5418,9 +3265,9 @@
if (!QLIST_EMPTY(&bs->op_blockers[op])) {
blocker = QLIST_FIRST(&bs->op_blockers[op]);
if (errp) {
- error_setg(errp, "Device '%s' is busy: %s",
- bdrv_get_device_name(bs),
- error_get_pretty(blocker->reason));
+ *errp = error_copy(blocker->reason);
+ error_prepend(errp, "Node '%s' is busy: ",
+ bdrv_get_device_or_node_name(bs));
}
return true;
}
@@ -5477,46 +3324,6 @@
return true;
}
-void bdrv_iostatus_enable(BlockDriverState *bs)
-{
- bs->iostatus_enabled = true;
- bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
-}
-
-/* The I/O status is only enabled if the drive explicitly
- * enables it _and_ the VM is configured to stop on errors */
-bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
-{
- return (bs->iostatus_enabled &&
- (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
- bs->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
- bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
-}
-
-void bdrv_iostatus_disable(BlockDriverState *bs)
-{
- bs->iostatus_enabled = false;
-}
-
-void bdrv_iostatus_reset(BlockDriverState *bs)
-{
- if (bdrv_iostatus_is_enabled(bs)) {
- bs->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
- if (bs->job) {
- block_job_iostatus_reset(bs->job);
- }
- }
-}
-
-void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
-{
- assert(bdrv_iostatus_is_enabled(bs));
- if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
- BLOCK_DEVICE_IO_STATUS_FAILED;
- }
-}
-
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
char *options, uint64_t img_size, int flags,
@@ -5527,7 +3334,6 @@
const char *backing_fmt, *backing_file;
int64_t size;
BlockDriver *drv, *proto_drv;
- BlockDriver *backing_drv = NULL;
Error *local_err = NULL;
int ret = 0;
@@ -5538,9 +3344,20 @@
return;
}
- proto_drv = bdrv_find_protocol(filename, true);
+ proto_drv = bdrv_find_protocol(filename, true, errp);
if (!proto_drv) {
- error_setg(errp, "Unknown protocol '%s'", filename);
+ return;
+ }
+
+ if (!drv->create_opts) {
+ error_setg(errp, "Format driver '%s' does not support image creation",
+ drv->format_name);
+ return;
+ }
+
+ if (!proto_drv->create_opts) {
+ error_setg(errp, "Protocol driver '%s' does not support image creation",
+ proto_drv->format_name);
return;
}
@@ -5549,18 +3366,22 @@
/* Create parameter list with default values */
opts = qemu_opts_create(create_opts, NULL, 0, &error_abort);
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size);
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE, img_size, &error_abort);
/* Parse -o options */
if (options) {
- if (qemu_opts_do_parse(opts, options, NULL) != 0) {
+ qemu_opts_do_parse(opts, options, NULL, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ local_err = NULL;
error_setg(errp, "Invalid options for file format '%s'", fmt);
goto out;
}
}
if (base_filename) {
- if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename)) {
+ qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, base_filename, &local_err);
+ if (local_err) {
error_setg(errp, "Backing file not supported for file format '%s'",
fmt);
goto out;
@@ -5568,7 +3389,8 @@
}
if (base_fmt) {
- if (qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt)) {
+ qemu_opt_set(opts, BLOCK_OPT_BACKING_FMT, base_fmt, &local_err);
+ if (local_err) {
error_setg(errp, "Backing file format not supported for file "
"format '%s'", fmt);
goto out;
@@ -5585,14 +3407,6 @@
}
backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
- if (backing_fmt) {
- backing_drv = bdrv_find_format(backing_fmt);
- if (!backing_drv) {
- error_setg(errp, "Unknown backing file format '%s'",
- backing_fmt);
- goto out;
- }
- }
// The size for the image must always be specified, with one exception:
// If we are using a backing file, we can obtain the size from there
@@ -5600,17 +3414,33 @@
if (size == -1) {
if (backing_file) {
BlockDriverState *bs;
+ char *full_backing = g_new0(char, PATH_MAX);
int64_t size;
int back_flags;
+ QDict *backing_options = NULL;
+
+ bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+ full_backing, PATH_MAX,
+ &local_err);
+ if (local_err) {
+ g_free(full_backing);
+ goto out;
+ }
/* backing files always opened read-only */
- back_flags =
- flags & ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
+ back_flags = flags;
+ back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
- bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, back_flags,
- backing_drv, &local_err);
- if (ret < 0) {
+ if (backing_fmt) {
+ backing_options = qdict_new();
+ qdict_put(backing_options, "driver",
+ qstring_from_str(backing_fmt));
+ }
+
+ bs = bdrv_open(full_backing, NULL, backing_options, back_flags,
+ &local_err);
+ g_free(full_backing);
+ if (!bs) {
goto out;
}
size = bdrv_getlength(bs);
@@ -5621,7 +3451,7 @@
goto out;
}
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size);
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE, size, &error_abort);
bdrv_unref(bs);
} else {
@@ -5632,7 +3462,7 @@
if (!quiet) {
printf("Formatting '%s', fmt=%s ", filename, fmt);
- qemu_opts_print(opts);
+ qemu_opts_print(opts, " ");
puts("");
}
@@ -5655,9 +3485,7 @@
out:
qemu_opts_del(opts);
qemu_opts_free(create_opts);
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
}
AioContext *bdrv_get_aio_context(BlockDriverState *bs)
@@ -5665,29 +3493,40 @@
return bs->aio_context;
}
+static void bdrv_do_remove_aio_context_notifier(BdrvAioNotifier *ban)
+{
+ QLIST_REMOVE(ban, list);
+ g_free(ban);
+}
+
void bdrv_detach_aio_context(BlockDriverState *bs)
{
- BdrvAioNotifier *baf;
+ BdrvAioNotifier *baf, *baf_tmp;
+ BdrvChild *child;
if (!bs->drv) {
return;
}
- QLIST_FOREACH(baf, &bs->aio_notifiers, list) {
- baf->detach_aio_context(baf->opaque);
+ assert(!bs->walking_aio_notifiers);
+ bs->walking_aio_notifiers = true;
+ QLIST_FOREACH_SAFE(baf, &bs->aio_notifiers, list, baf_tmp) {
+ if (baf->deleted) {
+ bdrv_do_remove_aio_context_notifier(baf);
+ } else {
+ baf->detach_aio_context(baf->opaque);
+ }
}
+ /* Never mind iterating again to check for ->deleted. bdrv_close() will
+ * remove remaining aio notifiers if we aren't called again.
+ */
+ bs->walking_aio_notifiers = false;
- if (bs->io_limits_enabled) {
- throttle_detach_aio_context(&bs->throttle_state);
- }
if (bs->drv->bdrv_detach_aio_context) {
bs->drv->bdrv_detach_aio_context(bs);
}
- if (bs->file) {
- bdrv_detach_aio_context(bs->file);
- }
- if (bs->backing_hd) {
- bdrv_detach_aio_context(bs->backing_hd);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_detach_aio_context(child->bs);
}
bs->aio_context = NULL;
@@ -5696,7 +3535,8 @@
void bdrv_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
- BdrvAioNotifier *ban;
+ BdrvAioNotifier *ban, *ban_tmp;
+ BdrvChild *child;
if (!bs->drv) {
return;
@@ -5704,27 +3544,28 @@
bs->aio_context = new_context;
- if (bs->backing_hd) {
- bdrv_attach_aio_context(bs->backing_hd, new_context);
- }
- if (bs->file) {
- bdrv_attach_aio_context(bs->file, new_context);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_attach_aio_context(child->bs, new_context);
}
if (bs->drv->bdrv_attach_aio_context) {
bs->drv->bdrv_attach_aio_context(bs, new_context);
}
- if (bs->io_limits_enabled) {
- throttle_attach_aio_context(&bs->throttle_state, new_context);
- }
- QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
- ban->attached_aio_context(new_context, ban->opaque);
+ assert(!bs->walking_aio_notifiers);
+ bs->walking_aio_notifiers = true;
+ QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_tmp) {
+ if (ban->deleted) {
+ bdrv_do_remove_aio_context_notifier(ban);
+ } else {
+ ban->attached_aio_context(new_context, ban->opaque);
+ }
}
+ bs->walking_aio_notifiers = false;
}
void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
{
- bdrv_drain_all(); /* ensure there are no in-flight requests */
+ bdrv_drain(bs); /* ensure there are no in-flight requests */
bdrv_detach_aio_context(bs);
@@ -5761,11 +3602,14 @@
QLIST_FOREACH_SAFE(ban, &bs->aio_notifiers, list, ban_next) {
if (ban->attached_aio_context == attached_aio_context &&
ban->detach_aio_context == detach_aio_context &&
- ban->opaque == opaque)
+ ban->opaque == opaque &&
+ ban->deleted == false)
{
- QLIST_REMOVE(ban, list);
- g_free(ban);
-
+ if (bs->walking_aio_notifiers) {
+ ban->deleted = true;
+ } else {
+ bdrv_do_remove_aio_context_notifier(ban);
+ }
return;
}
}
@@ -5773,19 +3617,13 @@
abort();
}
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
- NotifierWithReturn *notifier)
-{
- notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
-
int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
{
if (!bs->drv->bdrv_amend_options) {
return -ENOTSUP;
}
- return bs->drv->bdrv_amend_options(bs, opts, status_cb);
+ return bs->drv->bdrv_amend_options(bs, opts, status_cb, cb_opaque);
}
/* This function will be called by the bdrv_recurse_is_first_non_filter method
@@ -5828,9 +3666,10 @@
bool bdrv_is_first_non_filter(BlockDriverState *candidate)
{
BlockDriverState *bs;
+ BdrvNextIterator it;
/* walk down the bs forest recursively */
- QTAILQ_FOREACH(bs, &bdrv_states, device_list) {
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
bool perm;
/* try to recurse in this top level bs */
@@ -5845,7 +3684,8 @@
return false;
}
-BlockDriverState *check_to_replace_node(const char *node_name, Error **errp)
+BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
+ const char *node_name, Error **errp)
{
BlockDriverState *to_replace_bs = bdrv_find_node(node_name);
AioContext *aio_context;
@@ -5868,7 +3708,7 @@
* Another benefit is that this tests exclude backing files which are
* blocked by the backing blockers.
*/
- if (!bdrv_is_first_non_filter(to_replace_bs)) {
+ if (!bdrv_recurse_is_first_non_filter(parent_bs, to_replace_bs)) {
error_setg(errp, "Only top most non filter can be replaced");
to_replace_bs = NULL;
goto out;
@@ -5879,53 +3719,42 @@
return to_replace_bs;
}
-void bdrv_io_plug(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_plug) {
- drv->bdrv_io_plug(bs);
- } else if (bs->file) {
- bdrv_io_plug(bs->file);
- }
-}
-
-void bdrv_io_unplug(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_unplug) {
- drv->bdrv_io_unplug(bs);
- } else if (bs->file) {
- bdrv_io_unplug(bs->file);
- }
-}
-
-void bdrv_flush_io_queue(BlockDriverState *bs)
-{
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_flush_io_queue) {
- drv->bdrv_flush_io_queue(bs);
- } else if (bs->file) {
- bdrv_flush_io_queue(bs->file);
- }
-}
-
static bool append_open_options(QDict *d, BlockDriverState *bs)
{
const QDictEntry *entry;
+ QemuOptDesc *desc;
+ BdrvChild *child;
bool found_any = false;
+ const char *p;
for (entry = qdict_first(bs->options); entry;
entry = qdict_next(bs->options, entry))
{
- /* Only take options for this level and exclude all non-driver-specific
- * options */
- if (!strchr(qdict_entry_key(entry), '.') &&
- strcmp(qdict_entry_key(entry), "node-name"))
- {
- qobject_incref(qdict_entry_value(entry));
- qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
- found_any = true;
+ /* Exclude options for children */
+ QLIST_FOREACH(child, &bs->children, next) {
+ if (strstart(qdict_entry_key(entry), child->name, &p)
+ && (!*p || *p == '.'))
+ {
+ break;
+ }
}
+ if (child) {
+ continue;
+ }
+
+ /* And exclude all non-driver-specific options */
+ for (desc = bdrv_runtime_opts.desc; desc->name; desc++) {
+ if (!strcmp(qdict_entry_key(entry), desc->name)) {
+ break;
+ }
+ }
+ if (desc->name) {
+ continue;
+ }
+
+ qobject_incref(qdict_entry_value(entry));
+ qdict_put_obj(d, qdict_entry_key(entry), qdict_entry_value(entry));
+ found_any = true;
}
return found_any;
@@ -5955,7 +3784,7 @@
/* This BDS's file name will most probably depend on its file's name, so
* refresh that first */
if (bs->file) {
- bdrv_refresh_filename(bs->file);
+ bdrv_refresh_filename(bs->file->bs);
}
if (drv->bdrv_refresh_filename) {
@@ -5967,7 +3796,10 @@
bs->full_open_options = NULL;
}
- drv->bdrv_refresh_filename(bs);
+ opts = qdict_new();
+ append_open_options(opts, bs);
+ drv->bdrv_refresh_filename(bs, opts);
+ QDECREF(opts);
} else if (bs->file) {
/* Try to reconstruct valid information from the underlying file */
bool has_open_options;
@@ -5983,19 +3815,20 @@
/* If no specific options have been given for this BDS, the filename of
* the underlying file should suffice for this one as well */
- if (bs->file->exact_filename[0] && !has_open_options) {
- strcpy(bs->exact_filename, bs->file->exact_filename);
+ if (bs->file->bs->exact_filename[0] && !has_open_options) {
+ strcpy(bs->exact_filename, bs->file->bs->exact_filename);
}
/* Reconstructing the full options QDict is simple for most format block
* drivers, as long as the full options are known for the underlying
* file BDS. The full options QDict of that file BDS should somehow
* contain a representation of the filename, therefore the following
* suffices without querying the (exact_)filename of this BDS. */
- if (bs->file->full_open_options) {
+ if (bs->file->bs->full_open_options) {
qdict_put_obj(opts, "driver",
QOBJECT(qstring_from_str(drv->format_name)));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "file", QOBJECT(bs->file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "file",
+ QOBJECT(bs->file->bs->full_open_options));
bs->full_open_options = opts;
} else {
@@ -6039,13 +3872,51 @@
}
}
-/* This accessor function purpose is to allow the device models to access the
- * BlockAcctStats structure embedded inside a BlockDriverState without being
- * aware of the BlockDriverState structure layout.
- * It will go away when the BlockAcctStats structure will be moved inside
- * the device models.
+/*
+ * Hot add/remove a BDS's child. So the user can take a child offline when
+ * it is broken and take a new child online
*/
-BlockAcctStats *bdrv_get_stats(BlockDriverState *bs)
+void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
+ Error **errp)
{
- return &bs->stats;
+
+ if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
+ error_setg(errp, "The node %s does not support adding a child",
+ bdrv_get_device_or_node_name(parent_bs));
+ return;
+ }
+
+ if (!QLIST_EMPTY(&child_bs->parents)) {
+ error_setg(errp, "The node %s already has a parent",
+ child_bs->node_name);
+ return;
+ }
+
+ parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
+}
+
+void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
+{
+ BdrvChild *tmp;
+
+ if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
+ error_setg(errp, "The node %s does not support removing a child",
+ bdrv_get_device_or_node_name(parent_bs));
+ return;
+ }
+
+ QLIST_FOREACH(tmp, &parent_bs->children, next) {
+ if (tmp == child) {
+ break;
+ }
+ }
+
+ if (!tmp) {
+ error_setg(errp, "The node %s does not have a child named %s",
+ bdrv_get_device_or_node_name(parent_bs),
+ bdrv_get_device_or_node_name(child->bs));
+ return;
+ }
+
+ parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
}
diff --git a/block/Makefile.objs b/block/Makefile.objs
index 04b0e43..2593a2f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,15 +1,16 @@
-block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vvfat.o
+block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o
block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
block-obj-y += qed-check.o
block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
-block-obj-$(CONFIG_QUORUM) += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += quorum.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
block-obj-y += block-backend.o snapshot.o qapi.o
block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
block-obj-$(CONFIG_POSIX) += raw-posix.o
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
-block-obj-y += null.o mirror.o
+block-obj-y += null.o mirror.o commit.o io.o
+block-obj-y += throttle-groups.o
block-obj-y += nbd.o nbd-client.o sheepdog.o
block-obj-$(CONFIG_LIBISCSI) += iscsi.o
@@ -19,10 +20,12 @@
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
block-obj-$(CONFIG_ARCHIPELAGO) += archipelago.o
block-obj-$(CONFIG_LIBSSH2) += ssh.o
-block-obj-y += accounting.o
+block-obj-y += accounting.o dirty-bitmap.o
+block-obj-y += write-threshold.o
+
+block-obj-y += crypto.o
common-obj-y += stream.o
-common-obj-y += commit.o
common-obj-y += backup.o
iscsi.o-cflags := $(LIBISCSI_CFLAGS)
@@ -36,5 +39,7 @@
ssh.o-cflags := $(LIBSSH2_CFLAGS)
ssh.o-libs := $(LIBSSH2_LIBS)
archipelago.o-libs := $(ARCHIPELAGO_LIBS)
+block-obj-m += dmg.o
+dmg.o-libs := $(BZIP2_LIBS)
qcow.o-libs := -lz
linux-aio.o-libs := -laio
diff --git a/block/accounting.c b/block/accounting.c
index edbb1cc..3f457c4 100644
--- a/block/accounting.c
+++ b/block/accounting.c
@@ -2,6 +2,7 @@
* QEMU System Emulator block accounting
*
* Copyright (c) 2011 Christoph Hellwig
+ * Copyright (c) 2015 Igalia, S.L.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,8 +23,58 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/accounting.h"
#include "block/block_int.h"
+#include "qemu/timer.h"
+#include "sysemu/qtest.h"
+
+static QEMUClockType clock_type = QEMU_CLOCK_REALTIME;
+static const int qtest_latency_ns = NANOSECONDS_PER_SECOND / 1000;
+
+void block_acct_init(BlockAcctStats *stats, bool account_invalid,
+ bool account_failed)
+{
+ stats->account_invalid = account_invalid;
+ stats->account_failed = account_failed;
+
+ if (qtest_enabled()) {
+ clock_type = QEMU_CLOCK_VIRTUAL;
+ }
+}
+
+void block_acct_cleanup(BlockAcctStats *stats)
+{
+ BlockAcctTimedStats *s, *next;
+ QSLIST_FOREACH_SAFE(s, &stats->intervals, entries, next) {
+ g_free(s);
+ }
+}
+
+void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length)
+{
+ BlockAcctTimedStats *s;
+ unsigned i;
+
+ s = g_new0(BlockAcctTimedStats, 1);
+ s->interval_length = interval_length;
+ QSLIST_INSERT_HEAD(&stats->intervals, s, entries);
+
+ for (i = 0; i < BLOCK_MAX_IOTYPE; i++) {
+ timed_average_init(&s->latency[i], clock_type,
+ (uint64_t) interval_length * NANOSECONDS_PER_SECOND);
+ }
+}
+
+BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats,
+ BlockAcctTimedStats *s)
+{
+ if (s == NULL) {
+ return QSLIST_FIRST(&stats->intervals);
+ } else {
+ return QSLIST_NEXT(s, entries);
+ }
+}
void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie,
int64_t bytes, enum BlockAcctType type)
@@ -31,24 +82,92 @@
assert(type < BLOCK_MAX_IOTYPE);
cookie->bytes = bytes;
- cookie->start_time_ns = get_clock();
+ cookie->start_time_ns = qemu_clock_get_ns(clock_type);
cookie->type = type;
}
void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
+ BlockAcctTimedStats *s;
+ int64_t time_ns = qemu_clock_get_ns(clock_type);
+ int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+ if (qtest_enabled()) {
+ latency_ns = qtest_latency_ns;
+ }
+
assert(cookie->type < BLOCK_MAX_IOTYPE);
stats->nr_bytes[cookie->type] += cookie->bytes;
stats->nr_ops[cookie->type]++;
- stats->total_time_ns[cookie->type] += get_clock() - cookie->start_time_ns;
+ stats->total_time_ns[cookie->type] += latency_ns;
+ stats->last_access_time_ns = time_ns;
+
+ QSLIST_FOREACH(s, &stats->intervals, entries) {
+ timed_average_account(&s->latency[cookie->type], latency_ns);
+ }
}
-
-void block_acct_highest_sector(BlockAcctStats *stats, int64_t sector_num,
- unsigned int nb_sectors)
+void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie)
{
- if (stats->wr_highest_sector < sector_num + nb_sectors - 1) {
- stats->wr_highest_sector = sector_num + nb_sectors - 1;
+ assert(cookie->type < BLOCK_MAX_IOTYPE);
+
+ stats->failed_ops[cookie->type]++;
+
+ if (stats->account_failed) {
+ BlockAcctTimedStats *s;
+ int64_t time_ns = qemu_clock_get_ns(clock_type);
+ int64_t latency_ns = time_ns - cookie->start_time_ns;
+
+ if (qtest_enabled()) {
+ latency_ns = qtest_latency_ns;
+ }
+
+ stats->total_time_ns[cookie->type] += latency_ns;
+ stats->last_access_time_ns = time_ns;
+
+ QSLIST_FOREACH(s, &stats->intervals, entries) {
+ timed_average_account(&s->latency[cookie->type], latency_ns);
+ }
}
}
+
+void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type)
+{
+ assert(type < BLOCK_MAX_IOTYPE);
+
+ /* block_acct_done() and block_acct_failed() update
+ * total_time_ns[], but this one does not. The reason is that
+ * invalid requests are accounted during their submission,
+ * therefore there's no actual I/O involved. */
+
+ stats->invalid_ops[type]++;
+
+ if (stats->account_invalid) {
+ stats->last_access_time_ns = qemu_clock_get_ns(clock_type);
+ }
+}
+
+void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type,
+ int num_requests)
+{
+ assert(type < BLOCK_MAX_IOTYPE);
+ stats->merged[type] += num_requests;
+}
+
+int64_t block_acct_idle_time_ns(BlockAcctStats *stats)
+{
+ return qemu_clock_get_ns(clock_type) - stats->last_access_time_ns;
+}
+
+double block_acct_queue_depth(BlockAcctTimedStats *stats,
+ enum BlockAcctType type)
+{
+ uint64_t sum, elapsed;
+
+ assert(type < BLOCK_MAX_IOTYPE);
+
+ sum = timed_average_sum(&stats->latency[type], &elapsed);
+
+ return (double) sum / elapsed;
+}
diff --git a/block/archipelago.c b/block/archipelago.c
index a8114b5..37b8aca 100644
--- a/block/archipelago.c
+++ b/block/archipelago.c
@@ -50,7 +50,8 @@
*
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qemu/error-report.h"
#include "qemu/thread.h"
@@ -59,7 +60,6 @@
#include "qapi/qmp/qjson.h"
#include "qemu/atomic.h"
-#include <inttypes.h>
#include <xseg/xseg.h>
#include <xseg/protocol.h>
@@ -291,7 +291,7 @@
ret = qemu_archipelago_xseg_init(s);
if (ret < 0) {
- error_report("Cannot initialize XSEG. Aborting...\n");
+ error_report("Cannot initialize XSEG. Aborting...");
goto err_exit;
}
@@ -645,7 +645,7 @@
target = xseg_get_target(xseg, req);
if (!target) {
- error_setg(errp, "Cannot get XSEG target.\n");
+ error_setg(errp, "Cannot get XSEG target.");
goto err_exit;
}
memcpy(target, volname, targetlen);
@@ -889,7 +889,7 @@
return &aio_cb->common;
err_exit:
- error_report("qemu_archipelago_aio_rw(): I/O Error\n");
+ error_report("qemu_archipelago_aio_rw(): I/O Error");
qemu_aio_unref(aio_cb);
return NULL;
}
@@ -974,11 +974,9 @@
static int64_t qemu_archipelago_getlength(BlockDriverState *bs)
{
- int64_t ret;
BDRVArchipelagoState *s = bs->opaque;
- ret = archipelago_volume_info(s);
- return ret;
+ return archipelago_volume_info(s);
}
static int qemu_archipelago_truncate(BlockDriverState *bs, int64_t offset)
diff --git a/block/backup.c b/block/backup.c
index 792e655..2c05323 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -11,20 +11,20 @@
*
*/
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "qemu/cutils.h"
+#include "sysemu/block-backend.h"
+#include "qemu/bitmap.h"
-#define BACKUP_CLUSTER_BITS 16
-#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS)
-#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
-
+#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
#define SLICE_TIME 100000000ULL /* ns */
typedef struct CowRequest {
@@ -36,17 +36,27 @@
typedef struct BackupBlockJob {
BlockJob common;
- BlockDriverState *target;
+ BlockBackend *target;
+ /* bitmap for sync=incremental */
+ BdrvDirtyBitmap *sync_bitmap;
MirrorSyncMode sync_mode;
RateLimit limit;
BlockdevOnError on_source_error;
BlockdevOnError on_target_error;
CoRwlock flush_rwlock;
uint64_t sectors_read;
- HBitmap *bitmap;
+ unsigned long *done_bitmap;
+ int64_t cluster_size;
+ NotifierWithReturn before_write;
QLIST_HEAD(, CowRequest) inflight_reqs;
} BackupBlockJob;
+/* Size of a cluster in sectors, instead of bytes. */
+static inline int64_t cluster_size_sectors(BackupBlockJob *job)
+{
+ return job->cluster_size / BDRV_SECTOR_SIZE;
+}
+
/* See if in-flight requests overlap and wait for them to complete */
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
int64_t start,
@@ -84,23 +94,25 @@
qemu_co_queue_restart_all(&req->wait_queue);
}
-static int coroutine_fn backup_do_cow(BlockDriverState *bs,
+static int coroutine_fn backup_do_cow(BackupBlockJob *job,
int64_t sector_num, int nb_sectors,
- bool *error_is_read)
+ bool *error_is_read,
+ bool is_write_notifier)
{
- BackupBlockJob *job = (BackupBlockJob *)bs->job;
+ BlockBackend *blk = job->common.blk;
CowRequest cow_request;
struct iovec iov;
QEMUIOVector bounce_qiov;
void *bounce_buffer = NULL;
int ret = 0;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
int64_t start, end;
int n;
qemu_co_rwlock_rdlock(&job->flush_rwlock);
- start = sector_num / BACKUP_SECTORS_PER_CLUSTER;
- end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER);
+ start = sector_num / sectors_per_cluster;
+ end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
@@ -108,26 +120,27 @@
cow_request_begin(&cow_request, job, start, end);
for (; start < end; start++) {
- if (hbitmap_get(job->bitmap, start)) {
+ if (test_bit(start, job->done_bitmap)) {
trace_backup_do_cow_skip(job, start);
continue; /* already copied */
}
trace_backup_do_cow_process(job, start);
- n = MIN(BACKUP_SECTORS_PER_CLUSTER,
+ n = MIN(sectors_per_cluster,
job->common.len / BDRV_SECTOR_SIZE -
- start * BACKUP_SECTORS_PER_CLUSTER);
+ start * sectors_per_cluster);
if (!bounce_buffer) {
- bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE);
+ bounce_buffer = blk_blockalign(blk, job->cluster_size);
}
iov.iov_base = bounce_buffer;
iov.iov_len = n * BDRV_SECTOR_SIZE;
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
- ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n,
- &bounce_qiov);
+ ret = blk_co_preadv(blk, start * job->cluster_size,
+ bounce_qiov.size, &bounce_qiov,
+ is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
if (ret < 0) {
trace_backup_do_cow_read_fail(job, start, ret);
if (error_is_read) {
@@ -137,13 +150,11 @@
}
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
- ret = bdrv_co_write_zeroes(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER,
- n, BDRV_REQ_MAY_UNMAP);
+ ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
+ bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
} else {
- ret = bdrv_co_writev(job->target,
- start * BACKUP_SECTORS_PER_CLUSTER, n,
- &bounce_qiov);
+ ret = blk_co_pwritev(job->target, start * job->cluster_size,
+ bounce_qiov.size, &bounce_qiov, 0);
}
if (ret < 0) {
trace_backup_do_cow_write_fail(job, start, ret);
@@ -153,7 +164,7 @@
goto out;
}
- hbitmap_set(job->bitmap, start, 1);
+ set_bit(start, job->done_bitmap);
/* Publish progress, guest I/O counts as progress too. Note that the
* offset field is an opaque progress value, it is not a disk offset.
@@ -180,14 +191,16 @@
NotifierWithReturn *notifier,
void *opaque)
{
+ BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
BdrvTrackedRequest *req = opaque;
int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
+ assert(req->bs == blk_bs(job->common.blk));
assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- return backup_do_cow(req->bs, sector_num, nb_sectors, NULL);
+ return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
}
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -195,35 +208,69 @@
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
if (speed < 0) {
- error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
-static void backup_iostatus_reset(BlockJob *job)
+static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDriverState *bs = blk_bs(job->common.blk);
+
+ if (ret < 0 || block_job_is_cancelled(&job->common)) {
+ /* Merge the successor back into the parent, delete nothing. */
+ bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
+ assert(bm);
+ } else {
+ /* Everything is fine, delete this bitmap and install the backup. */
+ bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
+ assert(bm);
+ }
+}
+
+static void backup_commit(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ if (s->sync_bitmap) {
+ backup_cleanup_sync_bitmap(s, 0);
+ }
+}
+
+static void backup_abort(BlockJob *job)
+{
+ BackupBlockJob *s = container_of(job, BackupBlockJob, common);
+ if (s->sync_bitmap) {
+ backup_cleanup_sync_bitmap(s, -1);
+ }
+}
+
+static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
{
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
- bdrv_iostatus_reset(s->target);
+ blk_set_aio_context(s->target, aio_context);
}
static const BlockJobDriver backup_job_driver = {
- .instance_size = sizeof(BackupBlockJob),
- .job_type = BLOCK_JOB_TYPE_BACKUP,
- .set_speed = backup_set_speed,
- .iostatus_reset = backup_iostatus_reset,
+ .instance_size = sizeof(BackupBlockJob),
+ .job_type = BLOCK_JOB_TYPE_BACKUP,
+ .set_speed = backup_set_speed,
+ .commit = backup_commit,
+ .abort = backup_abort,
+ .attached_aio_context = backup_attached_aio_context,
};
static BlockErrorAction backup_error_action(BackupBlockJob *job,
bool read, int error)
{
if (read) {
- return block_job_error_action(&job->common, job->common.bs,
- job->on_source_error, true, error);
+ return block_job_error_action(&job->common, job->on_source_error,
+ true, error);
} else {
- return block_job_error_action(&job->common, job->target,
- job->on_target_error, false, error);
+ return block_job_error_action(&job->common, job->on_target_error,
+ false, error);
}
}
@@ -236,70 +283,132 @@
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
BackupCompleteData *data = opaque;
- bdrv_unref(s->target);
+ blk_unref(s->target);
block_job_completed(job, data->ret);
g_free(data);
}
+static bool coroutine_fn yield_and_check(BackupBlockJob *job)
+{
+ if (block_job_is_cancelled(&job->common)) {
+ return true;
+ }
+
+ /* we need to yield so that bdrv_drain_all() returns.
+ * (without, VM does not reboot)
+ */
+ if (job->common.speed) {
+ uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
+ job->sectors_read);
+ job->sectors_read = 0;
+ block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
+ } else {
+ block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
+ }
+
+ if (block_job_is_cancelled(&job->common)) {
+ return true;
+ }
+
+ return false;
+}
+
+static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
+{
+ bool error_is_read;
+ int ret = 0;
+ int clusters_per_iter;
+ uint32_t granularity;
+ int64_t sector;
+ int64_t cluster;
+ int64_t end;
+ int64_t last_cluster = -1;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
+ HBitmapIter hbi;
+
+ granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
+ clusters_per_iter = MAX((granularity / job->cluster_size), 1);
+ bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
+
+ /* Find the next dirty sector(s) */
+ while ((sector = hbitmap_iter_next(&hbi)) != -1) {
+ cluster = sector / sectors_per_cluster;
+
+ /* Fake progress updates for any clusters we skipped */
+ if (cluster != last_cluster + 1) {
+ job->common.offset += ((cluster - last_cluster - 1) *
+ job->cluster_size);
+ }
+
+ for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
+ do {
+ if (yield_and_check(job)) {
+ return ret;
+ }
+ ret = backup_do_cow(job, cluster * sectors_per_cluster,
+ sectors_per_cluster, &error_is_read,
+ false);
+ if ((ret < 0) &&
+ backup_error_action(job, error_is_read, -ret) ==
+ BLOCK_ERROR_ACTION_REPORT) {
+ return ret;
+ }
+ } while (ret < 0);
+ }
+
+ /* If the bitmap granularity is smaller than the backup granularity,
+ * we need to advance the iterator pointer to the next cluster. */
+ if (granularity < job->cluster_size) {
+ bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
+ }
+
+ last_cluster = cluster - 1;
+ }
+
+ /* Play some final catchup with the progress meter */
+ end = DIV_ROUND_UP(job->common.len, job->cluster_size);
+ if (last_cluster + 1 < end) {
+ job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
+ }
+
+ return ret;
+}
+
static void coroutine_fn backup_run(void *opaque)
{
BackupBlockJob *job = opaque;
BackupCompleteData *data;
- BlockDriverState *bs = job->common.bs;
- BlockDriverState *target = job->target;
- BlockdevOnError on_target_error = job->on_target_error;
- NotifierWithReturn before_write = {
- .notify = backup_before_write_notify,
- };
+ BlockDriverState *bs = blk_bs(job->common.blk);
+ BlockBackend *target = job->target;
int64_t start, end;
+ int64_t sectors_per_cluster = cluster_size_sectors(job);
int ret = 0;
QLIST_INIT(&job->inflight_reqs);
qemu_co_rwlock_init(&job->flush_rwlock);
start = 0;
- end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE,
- BACKUP_SECTORS_PER_CLUSTER);
+ end = DIV_ROUND_UP(job->common.len, job->cluster_size);
- job->bitmap = hbitmap_alloc(end, 0);
+ job->done_bitmap = bitmap_new(end);
- bdrv_set_enable_write_cache(target, true);
- bdrv_set_on_error(target, on_target_error, on_target_error);
- bdrv_iostatus_enable(target);
-
- bdrv_add_before_write_notifier(bs, &before_write);
+ job->before_write.notify = backup_before_write_notify;
+ bdrv_add_before_write_notifier(bs, &job->before_write);
if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
while (!block_job_is_cancelled(&job->common)) {
/* Yield until the job is cancelled. We just let our before_write
* notify callback service CoW requests. */
- job->common.busy = false;
- qemu_coroutine_yield();
- job->common.busy = true;
+ block_job_yield(&job->common);
}
+ } else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+ ret = backup_run_incremental(job);
} else {
/* Both FULL and TOP SYNC_MODE's require copying.. */
for (; start < end; start++) {
bool error_is_read;
-
- if (block_job_is_cancelled(&job->common)) {
- break;
- }
-
- /* we need to yield so that qemu_aio_flush() returns.
- * (without, VM does not reboot)
- */
- if (job->common.speed) {
- uint64_t delay_ns = ratelimit_calculate_delay(
- &job->limit, job->sectors_read);
- job->sectors_read = 0;
- block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
- } else {
- block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
- }
-
- if (block_job_is_cancelled(&job->common)) {
+ if (yield_and_check(job)) {
break;
}
@@ -310,7 +419,7 @@
/* Check to see if these blocks are already in the
* backing file. */
- for (i = 0; i < BACKUP_SECTORS_PER_CLUSTER;) {
+ for (i = 0; i < sectors_per_cluster;) {
/* bdrv_is_allocated() only returns true/false based
* on the first set of sectors it comes across that
* are are all in the same state.
@@ -319,8 +428,8 @@
* needed but at some point that is always the case. */
alloced =
bdrv_is_allocated(bs,
- start * BACKUP_SECTORS_PER_CLUSTER + i,
- BACKUP_SECTORS_PER_CLUSTER - i, &n);
+ start * sectors_per_cluster + i,
+ sectors_per_cluster - i, &n);
i += n;
if (alloced == 1 || n == 0) {
@@ -335,8 +444,8 @@
}
}
/* FULL sync mode we copy the whole drive. */
- ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER,
- BACKUP_SECTORS_PER_CLUSTER, &error_is_read);
+ ret = backup_do_cow(job, start * sectors_per_cluster,
+ sectors_per_cluster, &error_is_read, false);
if (ret < 0) {
/* Depending on error action, fail now or retry cluster */
BlockErrorAction action =
@@ -351,38 +460,77 @@
}
}
- notifier_with_return_remove(&before_write);
+ notifier_with_return_remove(&job->before_write);
/* wait until pending backup_do_cow() calls have completed */
qemu_co_rwlock_wrlock(&job->flush_rwlock);
qemu_co_rwlock_unlock(&job->flush_rwlock);
+ g_free(job->done_bitmap);
- hbitmap_free(job->bitmap);
-
- bdrv_iostatus_disable(target);
+ bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
data = g_malloc(sizeof(*data));
data->ret = ret;
block_job_defer_to_main_loop(&job->common, backup_complete, data);
}
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, MirrorSyncMode sync_mode,
+void backup_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
- Error **errp)
+ BlockJobTxn *txn, Error **errp)
{
int64_t len;
+ BlockDriverInfo bdi;
+ BackupBlockJob *job = NULL;
+ int ret;
assert(bs);
assert(target);
- assert(cb);
- if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
- on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
- error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
+ if (bs == target) {
+ error_setg(errp, "Source and target cannot be the same");
+ return;
+ }
+
+ if (!bdrv_is_inserted(bs)) {
+ error_setg(errp, "Device is not inserted: %s",
+ bdrv_get_device_name(bs));
+ return;
+ }
+
+ if (!bdrv_is_inserted(target)) {
+ error_setg(errp, "Device is not inserted: %s",
+ bdrv_get_device_name(target));
+ return;
+ }
+
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
+ return;
+ }
+
+ if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
+ return;
+ }
+
+ if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+ if (!sync_bitmap) {
+ error_setg(errp, "must provide a valid bitmap name for "
+ "\"incremental\" sync mode");
+ return;
+ }
+
+ /* Create a new bitmap, and freeze/disable this one. */
+ if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
+ return;
+ }
+ } else if (sync_bitmap) {
+ error_setg(errp,
+ "a sync_bitmap was provided to backup_run, "
+ "but received an incompatible sync_mode (%s)",
+ MirrorSyncMode_lookup[sync_mode]);
return;
}
@@ -390,20 +538,55 @@
if (len < 0) {
error_setg_errno(errp, -len, "unable to get length for '%s'",
bdrv_get_device_name(bs));
- return;
+ goto error;
}
- BackupBlockJob *job = block_job_create(&backup_job_driver, bs, speed,
- cb, opaque, errp);
+ job = block_job_create(job_id, &backup_job_driver, bs, speed,
+ cb, opaque, errp);
if (!job) {
- return;
+ goto error;
}
+ job->target = blk_new();
+ blk_insert_bs(job->target, target);
+
job->on_source_error = on_source_error;
job->on_target_error = on_target_error;
- job->target = target;
job->sync_mode = sync_mode;
+ job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
+ sync_bitmap : NULL;
+
+ /* If there is no backing file on the target, we cannot rely on COW if our
+ * backup cluster size is smaller than the target cluster size. Even for
+ * targets with a backing file, try to avoid COW if possible. */
+ ret = bdrv_get_info(target, &bdi);
+ if (ret < 0 && !target->backing) {
+ error_setg_errno(errp, -ret,
+ "Couldn't determine the cluster size of the target image, "
+ "which has no backing file");
+ error_append_hint(errp,
+ "Aborting, since this may create an unusable destination image\n");
+ goto error;
+ } else if (ret < 0 && target->backing) {
+ /* Not fatal; just trudge on ahead. */
+ job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
+ } else {
+ job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
+ }
+
+ bdrv_op_block_all(target, job->common.blocker);
job->common.len = len;
- job->common.co = qemu_coroutine_create(backup_run);
- qemu_coroutine_enter(job->common.co, job);
+ job->common.co = qemu_coroutine_create(backup_run, job);
+ block_job_txn_add_job(txn, &job->common);
+ qemu_coroutine_enter(job->common.co);
+ return;
+
+ error:
+ if (sync_bitmap) {
+ bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
+ }
+ if (job) {
+ blk_unref(job->target);
+ block_job_unref(&job->common);
+ }
}
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 862d93b..d5db166 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -22,7 +22,9 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/config-file.h"
#include "block/block_int.h"
#include "qemu/module.h"
@@ -30,12 +32,17 @@
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
+#include "sysemu/qtest.h"
typedef struct BDRVBlkdebugState {
int state;
int new_state;
+ int align;
- QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
+ /* For blkdebug_refresh_filename() */
+ char *config_file;
+
+ QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
QLIST_HEAD(, BlkdebugSuspendedReq) suspended_reqs;
} BDRVBlkdebugState;
@@ -63,7 +70,7 @@
};
typedef struct BlkdebugRule {
- BlkDebugEvent event;
+ BlkdebugEvent event;
int action;
int state;
union {
@@ -142,69 +149,12 @@
NULL
};
-static const char *event_names[BLKDBG_EVENT_MAX] = {
- [BLKDBG_L1_UPDATE] = "l1_update",
- [BLKDBG_L1_GROW_ALLOC_TABLE] = "l1_grow.alloc_table",
- [BLKDBG_L1_GROW_WRITE_TABLE] = "l1_grow.write_table",
- [BLKDBG_L1_GROW_ACTIVATE_TABLE] = "l1_grow.activate_table",
-
- [BLKDBG_L2_LOAD] = "l2_load",
- [BLKDBG_L2_UPDATE] = "l2_update",
- [BLKDBG_L2_UPDATE_COMPRESSED] = "l2_update_compressed",
- [BLKDBG_L2_ALLOC_COW_READ] = "l2_alloc.cow_read",
- [BLKDBG_L2_ALLOC_WRITE] = "l2_alloc.write",
-
- [BLKDBG_READ_AIO] = "read_aio",
- [BLKDBG_READ_BACKING_AIO] = "read_backing_aio",
- [BLKDBG_READ_COMPRESSED] = "read_compressed",
-
- [BLKDBG_WRITE_AIO] = "write_aio",
- [BLKDBG_WRITE_COMPRESSED] = "write_compressed",
-
- [BLKDBG_VMSTATE_LOAD] = "vmstate_load",
- [BLKDBG_VMSTATE_SAVE] = "vmstate_save",
-
- [BLKDBG_COW_READ] = "cow_read",
- [BLKDBG_COW_WRITE] = "cow_write",
-
- [BLKDBG_REFTABLE_LOAD] = "reftable_load",
- [BLKDBG_REFTABLE_GROW] = "reftable_grow",
- [BLKDBG_REFTABLE_UPDATE] = "reftable_update",
-
- [BLKDBG_REFBLOCK_LOAD] = "refblock_load",
- [BLKDBG_REFBLOCK_UPDATE] = "refblock_update",
- [BLKDBG_REFBLOCK_UPDATE_PART] = "refblock_update_part",
- [BLKDBG_REFBLOCK_ALLOC] = "refblock_alloc",
- [BLKDBG_REFBLOCK_ALLOC_HOOKUP] = "refblock_alloc.hookup",
- [BLKDBG_REFBLOCK_ALLOC_WRITE] = "refblock_alloc.write",
- [BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS] = "refblock_alloc.write_blocks",
- [BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE] = "refblock_alloc.write_table",
- [BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE] = "refblock_alloc.switch_table",
-
- [BLKDBG_CLUSTER_ALLOC] = "cluster_alloc",
- [BLKDBG_CLUSTER_ALLOC_BYTES] = "cluster_alloc_bytes",
- [BLKDBG_CLUSTER_FREE] = "cluster_free",
-
- [BLKDBG_FLUSH_TO_OS] = "flush_to_os",
- [BLKDBG_FLUSH_TO_DISK] = "flush_to_disk",
-
- [BLKDBG_PWRITEV_RMW_HEAD] = "pwritev_rmw.head",
- [BLKDBG_PWRITEV_RMW_AFTER_HEAD] = "pwritev_rmw.after_head",
- [BLKDBG_PWRITEV_RMW_TAIL] = "pwritev_rmw.tail",
- [BLKDBG_PWRITEV_RMW_AFTER_TAIL] = "pwritev_rmw.after_tail",
- [BLKDBG_PWRITEV] = "pwritev",
- [BLKDBG_PWRITEV_ZERO] = "pwritev_zero",
- [BLKDBG_PWRITEV_DONE] = "pwritev_done",
-
- [BLKDBG_EMPTY_IMAGE_PREPARE] = "empty_image_prepare",
-};
-
-static int get_event_by_name(const char *name, BlkDebugEvent *event)
+static int get_event_by_name(const char *name, BlkdebugEvent *event)
{
int i;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
- if (!strcmp(event_names[i], name)) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
+ if (!strcmp(BlkdebugEvent_lookup[i], name)) {
*event = i;
return 0;
}
@@ -216,24 +166,23 @@
struct add_rule_data {
BDRVBlkdebugState *s;
int action;
- Error **errp;
};
-static int add_rule(QemuOpts *opts, void *opaque)
+static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
{
struct add_rule_data *d = opaque;
BDRVBlkdebugState *s = d->s;
const char* event_name;
- BlkDebugEvent event;
+ BlkdebugEvent event;
struct BlkdebugRule *rule;
/* Find the right event for the rule */
event_name = qemu_opt_get(opts, "event");
if (!event_name) {
- error_setg(d->errp, "Missing event name for rule");
+ error_setg(errp, "Missing event name for rule");
return -1;
} else if (get_event_by_name(event_name, &event) < 0) {
- error_setg(d->errp, "Invalid event name \"%s\"", event_name);
+ error_setg(errp, "Invalid event name \"%s\"", event_name);
return -1;
}
@@ -319,8 +268,7 @@
d.s = s;
d.action = ACTION_INJECT_ERROR;
- d.errp = &local_err;
- qemu_opts_foreach(&inject_error_opts, add_rule, &d, 1);
+ qemu_opts_foreach(&inject_error_opts, add_rule, &d, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -328,7 +276,7 @@
}
d.action = ACTION_SET_STATE;
- qemu_opts_foreach(&set_state_opts, add_rule, &d, 1);
+ qemu_opts_foreach(&set_state_opts, add_rule, &d, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
@@ -406,7 +354,6 @@
BDRVBlkdebugState *s = bs->opaque;
QemuOpts *opts;
Error *local_err = NULL;
- const char *config;
uint64_t align;
int ret;
@@ -419,8 +366,8 @@
}
/* Read rules from config file or command line options */
- config = qemu_opt_get(opts, "config");
- ret = read_config(s, config, options, errp);
+ s->config_file = g_strdup(qemu_opt_get(opts, "config"));
+ ret = read_config(s, s->config_file, options, errp);
if (ret) {
goto out;
}
@@ -428,20 +375,20 @@
/* Set initial state */
s->state = 1;
- /* Open the backing file */
- assert(bs->file == NULL);
- ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-image"), options, "image",
- flags | BDRV_O_PROTOCOL, false, &local_err);
- if (ret < 0) {
+ /* Open the image file */
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
goto out;
}
/* Set request alignment */
- align = qemu_opt_get_size(opts, "align", bs->request_alignment);
- if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
- bs->request_alignment = align;
- } else {
+ align = qemu_opt_get_size(opts, "align", 0);
+ if (align < INT_MAX && is_power_of_2(align)) {
+ s->align = align;
+ } else if (align) {
error_setg(errp, "Invalid alignment");
ret = -EINVAL;
goto fail_unref;
@@ -451,8 +398,11 @@
goto out;
fail_unref:
- bdrv_unref(bs->file);
+ bdrv_unref_child(bs, bs->file);
out:
+ if (ret < 0) {
+ g_free(s->config_file);
+ }
qemu_opts_del(opts);
return ret;
}
@@ -472,12 +422,14 @@
int error = rule->options.inject.error;
struct BlkdebugAIOCB *acb;
QEMUBH *bh;
+ bool immediately = rule->options.inject.immediately;
if (rule->options.inject.once) {
- QSIMPLEQ_INIT(&s->active_rules);
+ QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
+ remove_rule(rule);
}
- if (rule->options.inject.immediately) {
+ if (immediately) {
return NULL;
}
@@ -510,7 +462,8 @@
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+ return bdrv_aio_readv(bs->file, sector_num, qiov, nb_sectors,
+ cb, opaque);
}
static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
@@ -532,7 +485,8 @@
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors, cb, opaque);
+ return bdrv_aio_writev(bs->file, sector_num, qiov, nb_sectors,
+ cb, opaque);
}
static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
@@ -551,7 +505,7 @@
return inject_error(bs, cb, opaque, rule);
}
- return bdrv_aio_flush(bs->file, cb, opaque);
+ return bdrv_aio_flush(bs->file->bs, cb, opaque);
}
@@ -561,11 +515,13 @@
BlkdebugRule *rule, *next;
int i;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
remove_rule(rule);
}
}
+
+ g_free(s->config_file);
}
static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
@@ -581,9 +537,13 @@
remove_rule(rule);
QLIST_INSERT_HEAD(&s->suspended_reqs, &r, next);
- printf("blkdebug: Suspended request '%s'\n", r.tag);
+ if (!qtest_enabled()) {
+ printf("blkdebug: Suspended request '%s'\n", r.tag);
+ }
qemu_coroutine_yield();
- printf("blkdebug: Resuming request '%s'\n", r.tag);
+ if (!qtest_enabled()) {
+ printf("blkdebug: Resuming request '%s'\n", r.tag);
+ }
QLIST_REMOVE(&r, next);
g_free(r.tag);
@@ -620,13 +580,13 @@
return injected;
}
-static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event)
+static void blkdebug_debug_event(BlockDriverState *bs, BlkdebugEvent event)
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule, *next;
bool injected;
- assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
+ assert((int)event >= 0 && event < BLKDBG__MAX);
injected = false;
s->new_state = s->state;
@@ -641,7 +601,7 @@
{
BDRVBlkdebugState *s = bs->opaque;
struct BlkdebugRule *rule;
- BlkDebugEvent blkdebug_event;
+ BlkdebugEvent blkdebug_event;
if (get_event_by_name(event, &blkdebug_event) < 0) {
return -ENOENT;
@@ -668,7 +628,7 @@
QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
if (!strcmp(r->tag, tag)) {
- qemu_coroutine_enter(r->co, NULL);
+ qemu_coroutine_enter(r->co);
return 0;
}
}
@@ -683,7 +643,7 @@
BlkdebugRule *rule, *next;
int i, ret = -ENOENT;
- for (i = 0; i < BLKDBG_EVENT_MAX; i++) {
+ for (i = 0; i < BLKDBG__MAX; i++) {
QLIST_FOREACH_SAFE(rule, &s->rules[i], next, next) {
if (rule->action == ACTION_SUSPEND &&
!strcmp(rule->options.suspend.tag, tag)) {
@@ -694,7 +654,7 @@
}
QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
if (!strcmp(r->tag, tag)) {
- qemu_coroutine_enter(r->co, NULL);
+ qemu_coroutine_enter(r->co);
ret = 0;
}
}
@@ -716,101 +676,73 @@
static int64_t blkdebug_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file);
+ return bdrv_getlength(bs->file->bs);
}
-static void blkdebug_refresh_filename(BlockDriverState *bs)
+static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
+{
+ return bdrv_truncate(bs->file->bs, offset);
+}
+
+static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkdebugState *s = bs->opaque;
- struct BlkdebugRule *rule;
QDict *opts;
- QList *inject_error_list = NULL, *set_state_list = NULL;
- QList *suspend_list = NULL;
- int event;
+ const QDictEntry *e;
+ bool force_json = false;
- if (!bs->file->full_open_options) {
+ for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+ if (strcmp(qdict_entry_key(e), "config") &&
+ strcmp(qdict_entry_key(e), "x-image"))
+ {
+ force_json = true;
+ break;
+ }
+ }
+
+ if (force_json && !bs->file->bs->full_open_options) {
/* The config file cannot be recreated, so creating a plain filename
* is impossible */
return;
}
+ if (!force_json && bs->file->bs->exact_filename[0]) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename),
+ "blkdebug:%s:%s", s->config_file ?: "",
+ bs->file->bs->exact_filename);
+ }
+
opts = qdict_new();
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "image", QOBJECT(bs->file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
- for (event = 0; event < BLKDBG_EVENT_MAX; event++) {
- QLIST_FOREACH(rule, &s->rules[event], next) {
- if (rule->action == ACTION_INJECT_ERROR) {
- QDict *inject_error = qdict_new();
-
- qdict_put_obj(inject_error, "event", QOBJECT(qstring_from_str(
- BlkdebugEvent_lookup[rule->event])));
- qdict_put_obj(inject_error, "state",
- QOBJECT(qint_from_int(rule->state)));
- qdict_put_obj(inject_error, "errno", QOBJECT(qint_from_int(
- rule->options.inject.error)));
- qdict_put_obj(inject_error, "sector", QOBJECT(qint_from_int(
- rule->options.inject.sector)));
- qdict_put_obj(inject_error, "once", QOBJECT(qbool_from_int(
- rule->options.inject.once)));
- qdict_put_obj(inject_error, "immediately",
- QOBJECT(qbool_from_int(
- rule->options.inject.immediately)));
-
- if (!inject_error_list) {
- inject_error_list = qlist_new();
- }
-
- qlist_append_obj(inject_error_list, QOBJECT(inject_error));
- } else if (rule->action == ACTION_SET_STATE) {
- QDict *set_state = qdict_new();
-
- qdict_put_obj(set_state, "event", QOBJECT(qstring_from_str(
- BlkdebugEvent_lookup[rule->event])));
- qdict_put_obj(set_state, "state",
- QOBJECT(qint_from_int(rule->state)));
- qdict_put_obj(set_state, "new_state", QOBJECT(qint_from_int(
- rule->options.set_state.new_state)));
-
- if (!set_state_list) {
- set_state_list = qlist_new();
- }
-
- qlist_append_obj(set_state_list, QOBJECT(set_state));
- } else if (rule->action == ACTION_SUSPEND) {
- QDict *suspend = qdict_new();
-
- qdict_put_obj(suspend, "event", QOBJECT(qstring_from_str(
- BlkdebugEvent_lookup[rule->event])));
- qdict_put_obj(suspend, "state",
- QOBJECT(qint_from_int(rule->state)));
- qdict_put_obj(suspend, "tag", QOBJECT(qstring_from_str(
- rule->options.suspend.tag)));
-
- if (!suspend_list) {
- suspend_list = qlist_new();
- }
-
- qlist_append_obj(suspend_list, QOBJECT(suspend));
- }
+ for (e = qdict_first(options); e; e = qdict_next(options, e)) {
+ if (strcmp(qdict_entry_key(e), "x-image")) {
+ qobject_incref(qdict_entry_value(e));
+ qdict_put_obj(opts, qdict_entry_key(e), qdict_entry_value(e));
}
}
- if (inject_error_list) {
- qdict_put_obj(opts, "inject-error", QOBJECT(inject_error_list));
- }
- if (set_state_list) {
- qdict_put_obj(opts, "set-state", QOBJECT(set_state_list));
- }
- if (suspend_list) {
- qdict_put_obj(opts, "suspend", QOBJECT(suspend_list));
- }
-
bs->full_open_options = opts;
}
+static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BDRVBlkdebugState *s = bs->opaque;
+
+ if (s->align) {
+ bs->bl.request_alignment = s->align;
+ }
+}
+
+static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
static BlockDriver bdrv_blkdebug = {
.format_name = "blkdebug",
.protocol_name = "blkdebug",
@@ -819,8 +751,11 @@
.bdrv_parse_filename = blkdebug_parse_filename,
.bdrv_file_open = blkdebug_open,
.bdrv_close = blkdebug_close,
+ .bdrv_reopen_prepare = blkdebug_reopen_prepare,
.bdrv_getlength = blkdebug_getlength,
+ .bdrv_truncate = blkdebug_truncate,
.bdrv_refresh_filename = blkdebug_refresh_filename,
+ .bdrv_refresh_limits = blkdebug_refresh_limits,
.bdrv_aio_readv = blkdebug_aio_readv,
.bdrv_aio_writev = blkdebug_aio_writev,
diff --git a/block/blkreplay.c b/block/blkreplay.c
new file mode 100755
index 0000000..30f9d5f
--- /dev/null
+++ b/block/blkreplay.c
@@ -0,0 +1,160 @@
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+#include "qapi/error.h"
+
+typedef struct Request {
+ Coroutine *co;
+ QEMUBH *bh;
+} Request;
+
+/* Next request id.
+ This counter is global, because requests from different
+ block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ int ret;
+
+ /* Open the image file */
+ bs->file = bdrv_open_child(NULL, options, "image",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ ret = 0;
+fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
+ return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+ return bdrv_getlength(bs->file->bs);
+}
+
+/* This bh is used for synchronization of return from coroutines.
+ It continues yielded coroutine which then finishes its execution.
+ BH is called adjusted to some replay checkpoint, therefore
+ record and replay will always finish coroutines deterministically.
+*/
+static void blkreplay_bh_cb(void *opaque)
+{
+ Request *req = opaque;
+ qemu_coroutine_enter(req->co);
+ qemu_bh_delete(req->bh);
+ g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+ Coroutine *co)
+{
+ Request *req = g_new(Request, 1);
+ *req = (Request) {
+ .co = co,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req),
+ };
+ replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_pdiscard(bs->file->bs, offset, count);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+ uint64_t reqid = request_id++;
+ int ret = bdrv_co_flush(bs->file->bs);
+ block_request_create(reqid, bs, qemu_coroutine_self());
+ qemu_coroutine_yield();
+
+ return ret;
+}
+
+static BlockDriver bdrv_blkreplay = {
+ .format_name = "blkreplay",
+ .protocol_name = "blkreplay",
+ .instance_size = 0,
+
+ .bdrv_file_open = blkreplay_open,
+ .bdrv_close = blkreplay_close,
+ .bdrv_getlength = blkreplay_getlength,
+
+ .bdrv_co_preadv = blkreplay_co_preadv,
+ .bdrv_co_pwritev = blkreplay_co_pwritev,
+
+ .bdrv_co_pwrite_zeroes = blkreplay_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = blkreplay_co_pdiscard,
+ .bdrv_co_flush = blkreplay_co_flush,
+};
+
+static void bdrv_blkreplay_init(void)
+{
+ bdrv_register(&bdrv_blkreplay);
+}
+
+block_init(bdrv_blkreplay_init);
diff --git a/block/blkverify.c b/block/blkverify.c
index 438dff8..da62d75 100644
--- a/block/blkverify.c
+++ b/block/blkverify.c
@@ -7,14 +7,16 @@
* See the COPYING file in the top-level directory.
*/
-#include <stdarg.h>
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "block/block_int.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
typedef struct {
- BlockDriverState *test_file;
+ BdrvChild *test_file;
} BDRVBlkverifyState;
typedef struct BlkverifyAIOCB BlkverifyAIOCB;
@@ -123,26 +125,29 @@
}
/* Open the raw file */
- assert(bs->file == NULL);
- ret = bdrv_open_image(&bs->file, qemu_opt_get(opts, "x-raw"), options,
- "raw", flags | BDRV_O_PROTOCOL, false, &local_err);
- if (ret < 0) {
+ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-raw"), options, "raw",
+ bs, &child_file, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
goto fail;
}
/* Open the test file */
- assert(s->test_file == NULL);
- ret = bdrv_open_image(&s->test_file, qemu_opt_get(opts, "x-image"), options,
- "test", flags, false, &local_err);
- if (ret < 0) {
+ s->test_file = bdrv_open_child(qemu_opt_get(opts, "x-image"), options,
+ "test", bs, &child_format, false,
+ &local_err);
+ if (local_err) {
+ ret = -EINVAL;
error_propagate(errp, local_err);
- s->test_file = NULL;
goto fail;
}
ret = 0;
fail:
+ if (ret < 0) {
+ bdrv_unref_child(bs, bs->file);
+ }
qemu_opts_del(opts);
return ret;
}
@@ -151,7 +156,7 @@
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_unref(s->test_file);
+ bdrv_unref_child(bs, s->test_file);
s->test_file = NULL;
}
@@ -159,7 +164,7 @@
{
BDRVBlkverifyState *s = bs->opaque;
- return bdrv_getlength(s->test_file);
+ return bdrv_getlength(s->test_file->bs);
}
static BlkverifyAIOCB *blkverify_aio_get(BlockDriverState *bs, bool is_write,
@@ -238,7 +243,7 @@
nb_sectors, cb, opaque);
acb->verify = blkverify_verify_readv;
- acb->buf = qemu_blockalign(bs->file, qiov->size);
+ acb->buf = qemu_blockalign(bs->file->bs, qiov->size);
qemu_iovec_init(&acb->raw_qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->raw_qiov, qiov, acb->buf);
@@ -271,7 +276,7 @@
BDRVBlkverifyState *s = bs->opaque;
/* Only flush test file, the raw file is not important */
- return bdrv_aio_flush(s->test_file, cb, opaque);
+ return bdrv_aio_flush(s->test_file->bs, cb, opaque);
}
static bool blkverify_recurse_is_first_non_filter(BlockDriverState *bs,
@@ -279,54 +284,44 @@
{
BDRVBlkverifyState *s = bs->opaque;
- bool perm = bdrv_recurse_is_first_non_filter(bs->file, candidate);
+ bool perm = bdrv_recurse_is_first_non_filter(bs->file->bs, candidate);
if (perm) {
return true;
}
- return bdrv_recurse_is_first_non_filter(s->test_file, candidate);
+ return bdrv_recurse_is_first_non_filter(s->test_file->bs, candidate);
}
-/* Propagate AioContext changes to ->test_file */
-static void blkverify_detach_aio_context(BlockDriverState *bs)
+static void blkverify_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVBlkverifyState *s = bs->opaque;
- bdrv_detach_aio_context(s->test_file);
-}
+ /* bs->file->bs has already been refreshed */
+ bdrv_refresh_filename(s->test_file->bs);
-static void blkverify_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVBlkverifyState *s = bs->opaque;
-
- bdrv_attach_aio_context(s->test_file, new_context);
-}
-
-static void blkverify_refresh_filename(BlockDriverState *bs)
-{
- BDRVBlkverifyState *s = bs->opaque;
-
- /* bs->file has already been refreshed */
- bdrv_refresh_filename(s->test_file);
-
- if (bs->file->full_open_options && s->test_file->full_open_options) {
+ if (bs->file->bs->full_open_options
+ && s->test_file->bs->full_open_options)
+ {
QDict *opts = qdict_new();
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkverify")));
- QINCREF(bs->file->full_open_options);
- qdict_put_obj(opts, "raw", QOBJECT(bs->file->full_open_options));
- QINCREF(s->test_file->full_open_options);
- qdict_put_obj(opts, "test", QOBJECT(s->test_file->full_open_options));
+ QINCREF(bs->file->bs->full_open_options);
+ qdict_put_obj(opts, "raw", QOBJECT(bs->file->bs->full_open_options));
+ QINCREF(s->test_file->bs->full_open_options);
+ qdict_put_obj(opts, "test",
+ QOBJECT(s->test_file->bs->full_open_options));
bs->full_open_options = opts;
}
- if (bs->file->exact_filename[0] && s->test_file->exact_filename[0]) {
+ if (bs->file->bs->exact_filename[0]
+ && s->test_file->bs->exact_filename[0])
+ {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
"blkverify:%s:%s",
- bs->file->exact_filename, s->test_file->exact_filename);
+ bs->file->bs->exact_filename,
+ s->test_file->bs->exact_filename);
}
}
@@ -345,9 +340,6 @@
.bdrv_aio_writev = blkverify_aio_writev,
.bdrv_aio_flush = blkverify_aio_flush,
- .bdrv_attach_aio_context = blkverify_attach_aio_context,
- .bdrv_detach_aio_context = blkverify_detach_aio_context,
-
.is_filter = true,
.bdrv_recurse_is_first_non_filter = blkverify_recurse_is_first_non_filter,
};
diff --git a/block/block-backend.c b/block/block-backend.c
index d0692b1..effa038 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -1,7 +1,7 @@
/*
* QEMU Block backends
*
- * Copyright (C) 2014 Red Hat, Inc.
+ * Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
@@ -10,103 +10,177 @@
* or later. See the COPYING.LIB file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/block-backend.h"
#include "block/block_int.h"
+#include "block/blockjob.h"
+#include "block/throttle-groups.h"
#include "sysemu/blockdev.h"
+#include "sysemu/sysemu.h"
#include "qapi-event.h"
+#include "qemu/id.h"
+#include "trace.h"
/* Number of coroutines to reserve per attached device model */
#define COROUTINE_POOL_RESERVATION 64
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb);
+
struct BlockBackend {
char *name;
int refcnt;
- BlockDriverState *bs;
+ BdrvChild *root;
DriveInfo *legacy_dinfo; /* null unless created by drive_new() */
- QTAILQ_ENTRY(BlockBackend) link; /* for blk_backends */
+ QTAILQ_ENTRY(BlockBackend) link; /* for block_backends */
+ QTAILQ_ENTRY(BlockBackend) monitor_link; /* for monitor_block_backends */
+ BlockBackendPublic public;
void *dev; /* attached device model, if any */
/* TODO change to DeviceState when all users are qdevified */
const BlockDevOps *dev_ops;
void *dev_opaque;
+
+ /* the block size for which the guest device expects atomicity */
+ int guest_block_size;
+
+ /* If the BDS tree is removed, some of its options are stored here (which
+ * can be used to restore those options in the new BDS on insert) */
+ BlockBackendRootState root_state;
+
+ bool enable_write_cache;
+
+ /* I/O stats (display with "info blockstats"). */
+ BlockAcctStats stats;
+
+ BlockdevOnError on_read_error, on_write_error;
+ bool iostatus_enabled;
+ BlockDeviceIoStatus iostatus;
+
+ bool allow_write_beyond_eof;
+
+ NotifierList remove_bs_notifiers, insert_bs_notifiers;
+};
+
+typedef struct BlockBackendAIOCB {
+ BlockAIOCB common;
+ QEMUBH *bh;
+ BlockBackend *blk;
+ int ret;
+} BlockBackendAIOCB;
+
+static const AIOCBInfo block_backend_aiocb_info = {
+ .get_aio_context = blk_aiocb_get_aio_context,
+ .aiocb_size = sizeof(BlockBackendAIOCB),
};
static void drive_info_del(DriveInfo *dinfo);
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs);
-/* All the BlockBackends (except for hidden ones) */
-static QTAILQ_HEAD(, BlockBackend) blk_backends =
- QTAILQ_HEAD_INITIALIZER(blk_backends);
+/* All BlockBackends */
+static QTAILQ_HEAD(, BlockBackend) block_backends =
+ QTAILQ_HEAD_INITIALIZER(block_backends);
+
+/* All BlockBackends referenced by the monitor and which are iterated through by
+ * blk_next() */
+static QTAILQ_HEAD(, BlockBackend) monitor_block_backends =
+ QTAILQ_HEAD_INITIALIZER(monitor_block_backends);
+
+static void blk_root_inherit_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
+{
+ /* We're not supposed to call this function for root nodes */
+ abort();
+}
+static void blk_root_drained_begin(BdrvChild *child);
+static void blk_root_drained_end(BdrvChild *child);
+
+static void blk_root_change_media(BdrvChild *child, bool load);
+static void blk_root_resize(BdrvChild *child);
+
+static const char *blk_root_get_name(BdrvChild *child)
+{
+ return blk_name(child->opaque);
+}
+
+static const BdrvChildRole child_root = {
+ .inherit_options = blk_root_inherit_options,
+
+ .change_media = blk_root_change_media,
+ .resize = blk_root_resize,
+ .get_name = blk_root_get_name,
+
+ .drained_begin = blk_root_drained_begin,
+ .drained_end = blk_root_drained_end,
+};
/*
- * Create a new BlockBackend with @name, with a reference count of one.
- * @name must not be null or empty.
- * Fail if a BlockBackend with this name already exists.
+ * Create a new BlockBackend with a reference count of one.
* Store an error through @errp on failure, unless it's null.
* Return the new BlockBackend on success, null on failure.
*/
-BlockBackend *blk_new(const char *name, Error **errp)
+BlockBackend *blk_new(void)
{
BlockBackend *blk;
- assert(name && name[0]);
- if (!id_wellformed(name)) {
- error_setg(errp, "Invalid device name");
- return NULL;
- }
- if (blk_by_name(name)) {
- error_setg(errp, "Device with id '%s' already exists", name);
- return NULL;
- }
- if (bdrv_find_node(name)) {
- error_setg(errp,
- "Device name '%s' conflicts with an existing node name",
- name);
- return NULL;
- }
-
blk = g_new0(BlockBackend, 1);
- blk->name = g_strdup(name);
blk->refcnt = 1;
- QTAILQ_INSERT_TAIL(&blk_backends, blk, link);
+ blk_set_enable_write_cache(blk, true);
+
+ qemu_co_queue_init(&blk->public.throttled_reqs[0]);
+ qemu_co_queue_init(&blk->public.throttled_reqs[1]);
+
+ notifier_list_init(&blk->remove_bs_notifiers);
+ notifier_list_init(&blk->insert_bs_notifiers);
+
+ QTAILQ_INSERT_TAIL(&block_backends, blk, link);
return blk;
}
/*
- * Create a new BlockBackend with a new BlockDriverState attached.
- * Otherwise just like blk_new(), which see.
+ * Creates a new BlockBackend, opens a new BlockDriverState, and connects both.
+ *
+ * Just as with bdrv_open(), after having called this function the reference to
+ * @options belongs to the block layer (even on failure).
+ *
+ * TODO: Remove @filename and @flags; it should be possible to specify a whole
+ * BDS tree just by specifying the @options QDict (or @reference,
+ * alternatively). At the time of adding this function, this is not possible,
+ * though, so callers of this function have to be able to specify @filename and
+ * @flags.
*/
-BlockBackend *blk_new_with_bs(const char *name, Error **errp)
+BlockBackend *blk_new_open(const char *filename, const char *reference,
+ QDict *options, int flags, Error **errp)
{
BlockBackend *blk;
BlockDriverState *bs;
- blk = blk_new(name, errp);
- if (!blk) {
+ blk = blk_new();
+ bs = bdrv_open(filename, reference, options, flags, errp);
+ if (!bs) {
+ blk_unref(blk);
return NULL;
}
- bs = bdrv_new_root();
- blk->bs = bs;
- bs->blk = blk;
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+
return blk;
}
static void blk_delete(BlockBackend *blk)
{
assert(!blk->refcnt);
+ assert(!blk->name);
assert(!blk->dev);
- if (blk->bs) {
- assert(blk->bs->blk == blk);
- blk->bs->blk = NULL;
- bdrv_unref(blk->bs);
- blk->bs = NULL;
+ if (blk->root) {
+ blk_remove_bs(blk);
}
- /* Avoid double-remove after blk_hide_on_behalf_of_do_drive_del() */
- if (blk->name[0]) {
- QTAILQ_REMOVE(&blk_backends, blk, link);
- }
- g_free(blk->name);
+ assert(QLIST_EMPTY(&blk->remove_bs_notifiers.notifiers));
+ assert(QLIST_EMPTY(&blk->insert_bs_notifiers.notifiers));
+ QTAILQ_REMOVE(&block_backends, blk, link);
drive_info_del(blk->legacy_dinfo);
+ block_acct_cleanup(&blk->stats);
g_free(blk);
}
@@ -120,6 +194,11 @@
g_free(dinfo);
}
+int blk_get_refcnt(BlockBackend *blk)
+{
+ return blk ? blk->refcnt : 0;
+}
+
/*
* Increment @blk's reference count.
* @blk must not be null.
@@ -145,7 +224,32 @@
}
/*
- * Return the BlockBackend after @blk.
+ * Behaves similarly to blk_next() but iterates over all BlockBackends, even the
+ * ones which are hidden (i.e. are not referenced by the monitor).
+ */
+static BlockBackend *blk_all_next(BlockBackend *blk)
+{
+ return blk ? QTAILQ_NEXT(blk, link)
+ : QTAILQ_FIRST(&block_backends);
+}
+
+void blk_remove_all_bs(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *ctx = blk_get_aio_context(blk);
+
+ aio_context_acquire(ctx);
+ if (blk->root) {
+ blk_remove_bs(blk);
+ }
+ aio_context_release(ctx);
+ }
+}
+
+/*
+ * Return the monitor-owned BlockBackend after @blk.
* If @blk is null, return the first one.
* Else, return @blk's next sibling, which may be null.
*
@@ -156,17 +260,108 @@
*/
BlockBackend *blk_next(BlockBackend *blk)
{
- return blk ? QTAILQ_NEXT(blk, link) : QTAILQ_FIRST(&blk_backends);
+ return blk ? QTAILQ_NEXT(blk, monitor_link)
+ : QTAILQ_FIRST(&monitor_block_backends);
+}
+
+/* Iterates over all top-level BlockDriverStates, i.e. BDSs that are owned by
+ * the monitor or attached to a BlockBackend */
+BlockDriverState *bdrv_next(BdrvNextIterator *it)
+{
+ BlockDriverState *bs;
+
+ /* First, return all root nodes of BlockBackends. In order to avoid
+ * returning a BDS twice when multiple BBs refer to it, we only return it
+ * if the BB is the first one in the parent list of the BDS. */
+ if (it->phase == BDRV_NEXT_BACKEND_ROOTS) {
+ do {
+ it->blk = blk_all_next(it->blk);
+ bs = it->blk ? blk_bs(it->blk) : NULL;
+ } while (it->blk && (bs == NULL || bdrv_first_blk(bs) != it->blk));
+
+ if (bs) {
+ return bs;
+ }
+ it->phase = BDRV_NEXT_MONITOR_OWNED;
+ }
+
+ /* Then return the monitor-owned BDSes without a BB attached. Ignore all
+ * BDSes that are attached to a BlockBackend here; they have been handled
+ * by the above block already */
+ do {
+ it->bs = bdrv_next_monitor_owned(it->bs);
+ bs = it->bs;
+ } while (bs && bdrv_has_blk(bs));
+
+ return bs;
+}
+
+BlockDriverState *bdrv_first(BdrvNextIterator *it)
+{
+ *it = (BdrvNextIterator) {
+ .phase = BDRV_NEXT_BACKEND_ROOTS,
+ };
+
+ return bdrv_next(it);
+}
+
+/*
+ * Add a BlockBackend into the list of backends referenced by the monitor, with
+ * the given @name acting as the handle for the monitor.
+ * Strictly for use by blockdev.c.
+ *
+ * @name must not be null or empty.
+ *
+ * Returns true on success and false on failure. In the latter case, an Error
+ * object is returned through @errp.
+ */
+bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp)
+{
+ assert(!blk->name);
+ assert(name && name[0]);
+
+ if (!id_wellformed(name)) {
+ error_setg(errp, "Invalid device name");
+ return false;
+ }
+ if (blk_by_name(name)) {
+ error_setg(errp, "Device with id '%s' already exists", name);
+ return false;
+ }
+ if (bdrv_find_node(name)) {
+ error_setg(errp,
+ "Device name '%s' conflicts with an existing node name",
+ name);
+ return false;
+ }
+
+ blk->name = g_strdup(name);
+ QTAILQ_INSERT_TAIL(&monitor_block_backends, blk, monitor_link);
+ return true;
+}
+
+/*
+ * Remove a BlockBackend from the list of backends referenced by the monitor.
+ * Strictly for use by blockdev.c.
+ */
+void monitor_remove_blk(BlockBackend *blk)
+{
+ if (!blk->name) {
+ return;
+ }
+
+ QTAILQ_REMOVE(&monitor_block_backends, blk, monitor_link);
+ g_free(blk->name);
+ blk->name = NULL;
}
/*
* Return @blk's name, a non-null string.
- * Wart: the name is empty iff @blk has been hidden with
- * blk_hide_on_behalf_of_do_drive_del().
+ * Returns an empty string iff @blk is not referenced by the monitor.
*/
const char *blk_name(BlockBackend *blk)
{
- return blk->name;
+ return blk->name ?: "";
}
/*
@@ -175,10 +370,10 @@
*/
BlockBackend *blk_by_name(const char *name)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
assert(name);
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (!strcmp(name, blk->name)) {
return blk;
}
@@ -191,7 +386,27 @@
*/
BlockDriverState *blk_bs(BlockBackend *blk)
{
- return blk->bs;
+ return blk->root ? blk->root->bs : NULL;
+}
+
+static BlockBackend *bdrv_first_blk(BlockDriverState *bs)
+{
+ BdrvChild *child;
+ QLIST_FOREACH(child, &bs->parents, next_parent) {
+ if (child->role == &child_root) {
+ return child->opaque;
+ }
+ }
+
+ return NULL;
+}
+
+/*
+ * Returns true if @bs has an associated BlockBackend.
+ */
+bool bdrv_has_blk(BlockDriverState *bs)
+{
+ return bdrv_first_blk(bs) != NULL;
}
/*
@@ -219,9 +434,9 @@
*/
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo)
{
- BlockBackend *blk;
+ BlockBackend *blk = NULL;
- QTAILQ_FOREACH(blk, &blk_backends, link) {
+ while ((blk = blk_next(blk)) != NULL) {
if (blk->legacy_dinfo == dinfo) {
return blk;
}
@@ -230,20 +445,49 @@
}
/*
- * Hide @blk.
- * @blk must not have been hidden already.
- * Make attached BlockDriverState, if any, anonymous.
- * Once hidden, @blk is invisible to all functions that don't receive
- * it as argument. For example, blk_by_name() won't return it.
- * Strictly for use by do_drive_del().
- * TODO get rid of it!
+ * Returns a pointer to the publicly accessible fields of @blk.
*/
-void blk_hide_on_behalf_of_do_drive_del(BlockBackend *blk)
+BlockBackendPublic *blk_get_public(BlockBackend *blk)
{
- QTAILQ_REMOVE(&blk_backends, blk, link);
- blk->name[0] = 0;
- if (blk->bs) {
- bdrv_make_anon(blk->bs);
+ return &blk->public;
+}
+
+/*
+ * Returns a BlockBackend given the associated @public fields.
+ */
+BlockBackend *blk_by_public(BlockBackendPublic *public)
+{
+ return container_of(public, BlockBackend, public);
+}
+
+/*
+ * Disassociates the currently associated BlockDriverState from @blk.
+ */
+void blk_remove_bs(BlockBackend *blk)
+{
+ notifier_list_notify(&blk->remove_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
+
+ blk_update_root_state(blk);
+
+ bdrv_root_unref_child(blk->root);
+ blk->root = NULL;
+}
+
+/*
+ * Associates a new BlockDriverState with @blk.
+ */
+void blk_insert_bs(BlockBackend *blk, BlockDriverState *bs)
+{
+ bdrv_ref(bs);
+ blk->root = bdrv_root_attach_child(bs, "root", &child_root, blk);
+
+ notifier_list_notify(&blk->insert_bs_notifiers, blk);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(
+ &blk->public.throttle_timers, bdrv_get_aio_context(bs));
}
}
@@ -259,10 +503,7 @@
}
blk_ref(blk);
blk->dev = dev;
- bdrv_iostatus_reset(blk->bs);
-
- /* We're expecting I/O from the device so bump up coroutine pool size */
- qemu_coroutine_adjust_pool_size(COROUTINE_POOL_RESERVATION);
+ blk_iostatus_reset(blk);
return 0;
}
@@ -289,8 +530,7 @@
blk->dev = NULL;
blk->dev_ops = NULL;
blk->dev_opaque = NULL;
- bdrv_set_guest_block_size(blk->bs, 512);
- qemu_coroutine_adjust_pool_size(-COROUTINE_POOL_RESERVATION);
+ blk->guest_block_size = 512;
blk_unref(blk);
}
@@ -324,22 +564,24 @@
void blk_dev_change_media_cb(BlockBackend *blk, bool load)
{
if (blk->dev_ops && blk->dev_ops->change_media_cb) {
- bool tray_was_closed = !blk_dev_is_tray_open(blk);
+ bool tray_was_open, tray_is_open;
+ tray_was_open = blk_dev_is_tray_open(blk);
blk->dev_ops->change_media_cb(blk->dev_opaque, load);
- if (tray_was_closed) {
- /* tray open */
- qapi_event_send_device_tray_moved(blk_name(blk),
- true, &error_abort);
- }
- if (load) {
- /* tray close */
- qapi_event_send_device_tray_moved(blk_name(blk),
- false, &error_abort);
+ tray_is_open = blk_dev_is_tray_open(blk);
+
+ if (tray_was_open != tray_is_open) {
+ qapi_event_send_device_tray_moved(blk_name(blk), tray_is_open,
+ &error_abort);
}
}
}
+static void blk_root_change_media(BdrvChild *child, bool load)
+{
+ blk_dev_change_media_cb(child->opaque, load);
+}
+
/*
* Does @blk's attached device model have removable media?
* %true if no device model is attached.
@@ -350,6 +592,14 @@
}
/*
+ * Does @blk's attached device model have a tray?
+ */
+bool blk_dev_has_tray(BlockBackend *blk)
+{
+ return blk->dev_ops && blk->dev_ops->is_tray_open;
+}
+
+/*
* Notify @blk's attached device model of a media eject request.
* If @force is true, the medium is about to be yanked out forcefully.
*/
@@ -365,7 +615,7 @@
*/
bool blk_dev_is_tray_open(BlockBackend *blk)
{
- if (blk->dev_ops && blk->dev_ops->is_tray_open) {
+ if (blk_dev_has_tray(blk)) {
return blk->dev_ops->is_tray_open(blk->dev_opaque);
}
return false;
@@ -386,8 +636,10 @@
/*
* Notify @blk's attached device model of a backend size change.
*/
-void blk_dev_resize_cb(BlockBackend *blk)
+static void blk_root_resize(BdrvChild *child)
{
+ BlockBackend *blk = child->opaque;
+
if (blk->dev_ops && blk->dev_ops->resize_cb) {
blk->dev_ops->resize_cb(blk->dev_opaque);
}
@@ -395,80 +647,434 @@
void blk_iostatus_enable(BlockBackend *blk)
{
- bdrv_iostatus_enable(blk->bs);
+ blk->iostatus_enabled = true;
+ blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
}
-int blk_read(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+/* The I/O status is only enabled if the drive explicitly
+ * enables it _and_ the VM is configured to stop on errors */
+bool blk_iostatus_is_enabled(const BlockBackend *blk)
{
- return bdrv_read(blk->bs, sector_num, buf, nb_sectors);
+ return (blk->iostatus_enabled &&
+ (blk->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+ blk->on_write_error == BLOCKDEV_ON_ERROR_STOP ||
+ blk->on_read_error == BLOCKDEV_ON_ERROR_STOP));
}
-int blk_read_unthrottled(BlockBackend *blk, int64_t sector_num, uint8_t *buf,
- int nb_sectors)
+BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk)
{
- return bdrv_read_unthrottled(blk->bs, sector_num, buf, nb_sectors);
+ return blk->iostatus;
}
-int blk_write(BlockBackend *blk, int64_t sector_num, const uint8_t *buf,
- int nb_sectors)
+void blk_iostatus_disable(BlockBackend *blk)
{
- return bdrv_write(blk->bs, sector_num, buf, nb_sectors);
+ blk->iostatus_enabled = false;
}
-BlockAIOCB *blk_aio_write_zeroes(BlockBackend *blk, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags,
- BlockCompletionFunc *cb, void *opaque)
+void blk_iostatus_reset(BlockBackend *blk)
{
- return bdrv_aio_write_zeroes(blk->bs, sector_num, nb_sectors, flags,
- cb, opaque);
+ if (blk_iostatus_is_enabled(blk)) {
+ BlockDriverState *bs = blk_bs(blk);
+ blk->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+ if (bs && bs->job) {
+ block_job_iostatus_reset(bs->job);
+ }
+ }
+}
+
+void blk_iostatus_set_err(BlockBackend *blk, int error)
+{
+ assert(blk_iostatus_is_enabled(blk));
+ if (blk->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+ blk->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+ BLOCK_DEVICE_IO_STATUS_FAILED;
+ }
+}
+
+void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow)
+{
+ blk->allow_write_beyond_eof = allow;
+}
+
+static int blk_check_byte_request(BlockBackend *blk, int64_t offset,
+ size_t size)
+{
+ int64_t len;
+
+ if (size > INT_MAX) {
+ return -EIO;
+ }
+
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ if (offset < 0) {
+ return -EIO;
+ }
+
+ if (!blk->allow_write_beyond_eof) {
+ len = blk_getlength(blk);
+ if (len < 0) {
+ return len;
+ }
+
+ if (offset > len || len - offset < size) {
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+static int blk_check_request(BlockBackend *blk, int64_t sector_num,
+ int nb_sectors)
+{
+ if (sector_num < 0 || sector_num > INT64_MAX / BDRV_SECTOR_SIZE) {
+ return -EIO;
+ }
+
+ if (nb_sectors < 0 || nb_sectors > INT_MAX / BDRV_SECTOR_SIZE) {
+ return -EIO;
+ }
+
+ return blk_check_byte_request(blk, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ trace_blk_co_preadv(blk, blk_bs(blk), offset, bytes, flags);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, false);
+ }
+
+ return bdrv_co_preadv(blk->root, offset, bytes, qiov, flags);
+}
+
+int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
+ unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ int ret;
+
+ trace_blk_co_pwritev(blk, blk_bs(blk), offset, bytes, flags);
+
+ ret = blk_check_byte_request(blk, offset, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* throttling disk I/O */
+ if (blk->public.throttle_state) {
+ throttle_group_co_io_limits_intercept(blk, bytes, true);
+ }
+
+ if (!blk->enable_write_cache) {
+ flags |= BDRV_REQ_FUA;
+ }
+
+ return bdrv_co_pwritev(blk->root, offset, bytes, qiov, flags);
+}
+
+typedef struct BlkRwCo {
+ BlockBackend *blk;
+ int64_t offset;
+ QEMUIOVector *qiov;
+ int ret;
+ BdrvRequestFlags flags;
+} BlkRwCo;
+
+static void blk_read_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static void blk_write_entry(void *opaque)
+{
+ BlkRwCo *rwco = opaque;
+
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
+ rwco->qiov, rwco->flags);
+}
+
+static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int64_t bytes, CoroutineEntry co_entry,
+ BdrvRequestFlags flags)
+{
+ AioContext *aio_context;
+ QEMUIOVector qiov;
+ struct iovec iov;
+ Coroutine *co;
+ BlkRwCo rwco;
+
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = &qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+
+ co = qemu_coroutine_create(co_entry, &rwco);
+ qemu_coroutine_enter(co);
+
+ aio_context = blk_get_aio_context(blk);
+ while (rwco.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
+ }
+
+ return rwco.ret;
+}
+
+int blk_pread_unthrottled(BlockBackend *blk, int64_t offset, uint8_t *buf,
+ int count)
+{
+ int ret;
+
+ ret = blk_check_byte_request(blk, offset, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ blk_root_drained_begin(blk->root);
+ ret = blk_pread(blk, offset, buf, count);
+ blk_root_drained_end(blk->root);
+ return ret;
+}
+
+int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ return blk_prw(blk, offset, NULL, count, blk_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE);
+}
+
+int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags)
+{
+ return bdrv_make_zero(blk->root, flags);
+}
+
+static void error_callback_bh(void *opaque)
+{
+ struct BlockBackendAIOCB *acb = opaque;
+ qemu_bh_delete(acb->bh);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_unref(acb);
+}
+
+BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
+ BlockCompletionFunc *cb,
+ void *opaque, int ret)
+{
+ struct BlockBackendAIOCB *acb;
+ QEMUBH *bh;
+
+ acb = blk_aio_get(&block_backend_aiocb_info, blk, cb, opaque);
+ acb->blk = blk;
+ acb->ret = ret;
+
+ bh = aio_bh_new(blk_get_aio_context(blk), error_callback_bh, acb);
+ acb->bh = bh;
+ qemu_bh_schedule(bh);
+
+ return &acb->common;
+}
+
+typedef struct BlkAioEmAIOCB {
+ BlockAIOCB common;
+ BlkRwCo rwco;
+ int bytes;
+ bool has_returned;
+ QEMUBH* bh;
+} BlkAioEmAIOCB;
+
+static const AIOCBInfo blk_aio_em_aiocb_info = {
+ .aiocb_size = sizeof(BlkAioEmAIOCB),
+};
+
+static void blk_aio_complete(BlkAioEmAIOCB *acb)
+{
+ if (acb->bh) {
+ assert(acb->has_returned);
+ qemu_bh_delete(acb->bh);
+ }
+ if (acb->has_returned) {
+ acb->common.cb(acb->common.opaque, acb->rwco.ret);
+ qemu_aio_unref(acb);
+ }
+}
+
+static void blk_aio_complete_bh(void *opaque)
+{
+ blk_aio_complete(opaque);
+}
+
+static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
+ QEMUIOVector *qiov, CoroutineEntry co_entry,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlkAioEmAIOCB *acb;
+ Coroutine *co;
+
+ acb = blk_aio_get(&blk_aio_em_aiocb_info, blk, cb, opaque);
+ acb->rwco = (BlkRwCo) {
+ .blk = blk,
+ .offset = offset,
+ .qiov = qiov,
+ .flags = flags,
+ .ret = NOT_DONE,
+ };
+ acb->bytes = bytes;
+ acb->bh = NULL;
+ acb->has_returned = false;
+
+ co = qemu_coroutine_create(co_entry, acb);
+ qemu_coroutine_enter(co);
+
+ acb->has_returned = true;
+ if (acb->rwco.ret != NOT_DONE) {
+ acb->bh = aio_bh_new(blk_get_aio_context(blk), blk_aio_complete_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
+
+ return &acb->common;
+}
+
+static void blk_aio_read_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+static void blk_aio_write_entry(void *opaque)
+{
+ BlkAioEmAIOCB *acb = opaque;
+ BlkRwCo *rwco = &acb->rwco;
+
+ assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
+ rwco->qiov, rwco->flags);
+ blk_aio_complete(acb);
+}
+
+BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, offset, count, NULL, blk_aio_write_entry,
+ flags | BDRV_REQ_ZERO_WRITE, cb, opaque);
}
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int count)
{
- return bdrv_pread(blk->bs, offset, buf, count);
+ int ret = blk_prw(blk, offset, buf, count, blk_read_entry, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ return count;
}
-int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count)
+int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int count,
+ BdrvRequestFlags flags)
{
- return bdrv_pwrite(blk->bs, offset, buf, count);
+ int ret = blk_prw(blk, offset, (void *) buf, count, blk_write_entry,
+ flags);
+ if (ret < 0) {
+ return ret;
+ }
+ return count;
}
int64_t blk_getlength(BlockBackend *blk)
{
- return bdrv_getlength(blk->bs);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_getlength(blk_bs(blk));
}
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr)
{
- bdrv_get_geometry(blk->bs, nb_sectors_ptr);
+ if (!blk_bs(blk)) {
+ *nb_sectors_ptr = 0;
+ } else {
+ bdrv_get_geometry(blk_bs(blk), nb_sectors_ptr);
+ }
}
-BlockAIOCB *blk_aio_readv(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+int64_t blk_nb_sectors(BlockBackend *blk)
{
- return bdrv_aio_readv(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_nb_sectors(blk_bs(blk));
}
-BlockAIOCB *blk_aio_writev(BlockBackend *blk, int64_t sector_num,
- QEMUIOVector *iov, int nb_sectors,
+BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque)
{
- return bdrv_aio_writev(blk->bs, sector_num, iov, nb_sectors, cb, opaque);
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_read_entry, flags, cb, opaque);
+}
+
+BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
+ QEMUIOVector *qiov, BdrvRequestFlags flags,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ return blk_aio_prwv(blk, offset, qiov->size, qiov,
+ blk_aio_write_entry, flags, cb, opaque);
}
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque)
{
- return bdrv_aio_flush(blk->bs, cb, opaque);
+ if (!blk_is_available(blk)) {
+ return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+ }
+
+ return bdrv_aio_flush(blk_bs(blk), cb, opaque);
}
-BlockAIOCB *blk_aio_discard(BlockBackend *blk,
- int64_t sector_num, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
+BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk,
+ int64_t offset, int count,
+ BlockCompletionFunc *cb, void *opaque)
{
- return bdrv_aio_discard(blk->bs, sector_num, nb_sectors, cb, opaque);
+ int ret = blk_check_byte_request(blk, offset, count);
+ if (ret < 0) {
+ return blk_abort_aio_request(blk, cb, opaque, ret);
+ }
+
+ return bdrv_aio_pdiscard(blk_bs(blk), offset, count, cb, opaque);
}
void blk_aio_cancel(BlockAIOCB *acb)
@@ -481,30 +1087,58 @@
bdrv_aio_cancel_async(acb);
}
-int blk_aio_multiwrite(BlockBackend *blk, BlockRequest *reqs, int num_reqs)
-{
- return bdrv_aio_multiwrite(blk->bs, reqs, num_reqs);
-}
-
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
{
- return bdrv_ioctl(blk->bs, req, buf);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_ioctl(blk_bs(blk), req, buf);
}
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
{
- return bdrv_aio_ioctl(blk->bs, req, buf, cb, opaque);
+ if (!blk_is_available(blk)) {
+ return blk_abort_aio_request(blk, cb, opaque, -ENOMEDIUM);
+ }
+
+ return bdrv_aio_ioctl(blk_bs(blk), req, buf, cb, opaque);
+}
+
+int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count)
+{
+ int ret = blk_check_byte_request(blk, offset, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return bdrv_co_pdiscard(blk_bs(blk), offset, count);
+}
+
+int blk_co_flush(BlockBackend *blk)
+{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_co_flush(blk_bs(blk));
}
int blk_flush(BlockBackend *blk)
{
- return bdrv_flush(blk->bs);
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_flush(blk_bs(blk));
}
-int blk_flush_all(void)
+void blk_drain(BlockBackend *blk)
{
- return bdrv_flush_all();
+ if (blk_bs(blk)) {
+ bdrv_drain(blk_bs(blk));
+ }
}
void blk_drain_all(void)
@@ -512,116 +1146,329 @@
bdrv_drain_all();
}
+void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
+ BlockdevOnError on_write_error)
+{
+ blk->on_read_error = on_read_error;
+ blk->on_write_error = on_write_error;
+}
+
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read)
{
- return bdrv_get_on_error(blk->bs, is_read);
+ return is_read ? blk->on_read_error : blk->on_write_error;
}
BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error)
{
- return bdrv_get_error_action(blk->bs, is_read, error);
+ BlockdevOnError on_err = blk_get_on_error(blk, is_read);
+
+ switch (on_err) {
+ case BLOCKDEV_ON_ERROR_ENOSPC:
+ return (error == ENOSPC) ?
+ BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_STOP:
+ return BLOCK_ERROR_ACTION_STOP;
+ case BLOCKDEV_ON_ERROR_REPORT:
+ return BLOCK_ERROR_ACTION_REPORT;
+ case BLOCKDEV_ON_ERROR_IGNORE:
+ return BLOCK_ERROR_ACTION_IGNORE;
+ case BLOCKDEV_ON_ERROR_AUTO:
+ default:
+ abort();
+ }
}
+static void send_qmp_error_event(BlockBackend *blk,
+ BlockErrorAction action,
+ bool is_read, int error)
+{
+ IoOperationType optype;
+
+ optype = is_read ? IO_OPERATION_TYPE_READ : IO_OPERATION_TYPE_WRITE;
+ qapi_event_send_block_io_error(blk_name(blk), optype, action,
+ blk_iostatus_is_enabled(blk),
+ error == ENOSPC, strerror(error),
+ &error_abort);
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error)
{
- bdrv_error_action(blk->bs, action, is_read, error);
+ assert(error >= 0);
+
+ if (action == BLOCK_ERROR_ACTION_STOP) {
+ /* First set the iostatus, so that "info block" returns an iostatus
+ * that matches the events raised so far (an additional error iostatus
+ * is fine, but not a lost one).
+ */
+ blk_iostatus_set_err(blk, error);
+
+ /* Then raise the request to stop the VM and the event.
+ * qemu_system_vmstop_request_prepare has two effects. First,
+ * it ensures that the STOP event always comes after the
+ * BLOCK_IO_ERROR event. Second, it ensures that even if management
+ * can observe the STOP event and do a "cont" before the STOP
+ * event is issued, the VM will not stop. In this case, vm_start()
+ * also ensures that the STOP/RESUME pair of events is emitted.
+ */
+ qemu_system_vmstop_request_prepare();
+ send_qmp_error_event(blk, action, is_read, error);
+ qemu_system_vmstop_request(RUN_STATE_IO_ERROR);
+ } else {
+ send_qmp_error_event(blk, action, is_read, error);
+ }
}
int blk_is_read_only(BlockBackend *blk)
{
- return bdrv_is_read_only(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_is_read_only(bs);
+ } else {
+ return blk->root_state.read_only;
+ }
}
int blk_is_sg(BlockBackend *blk)
{
- return bdrv_is_sg(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
+ return 0;
+ }
+
+ return bdrv_is_sg(bs);
}
int blk_enable_write_cache(BlockBackend *blk)
{
- return bdrv_enable_write_cache(blk->bs);
+ return blk->enable_write_cache;
}
void blk_set_enable_write_cache(BlockBackend *blk, bool wce)
{
- bdrv_set_enable_write_cache(blk->bs, wce);
+ blk->enable_write_cache = wce;
}
-int blk_is_inserted(BlockBackend *blk)
+void blk_invalidate_cache(BlockBackend *blk, Error **errp)
{
- return bdrv_is_inserted(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
+ error_setg(errp, "Device '%s' has no medium", blk->name);
+ return;
+ }
+
+ bdrv_invalidate_cache(bs, errp);
+}
+
+bool blk_is_inserted(BlockBackend *blk)
+{
+ BlockDriverState *bs = blk_bs(blk);
+
+ return bs && bdrv_is_inserted(bs);
+}
+
+bool blk_is_available(BlockBackend *blk)
+{
+ return blk_is_inserted(blk) && !blk_dev_is_tray_open(blk);
}
void blk_lock_medium(BlockBackend *blk, bool locked)
{
- bdrv_lock_medium(blk->bs, locked);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_lock_medium(bs, locked);
+ }
}
void blk_eject(BlockBackend *blk, bool eject_flag)
{
- bdrv_eject(blk->bs, eject_flag);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_eject(bs, eject_flag);
+ }
}
int blk_get_flags(BlockBackend *blk)
{
- return bdrv_get_flags(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_flags(bs);
+ } else {
+ return blk->root_state.open_flags;
+ }
+}
+
+/* Returns the maximum transfer length, in bytes; guaranteed nonzero */
+uint32_t blk_get_max_transfer(BlockBackend *blk)
+{
+ BlockDriverState *bs = blk_bs(blk);
+ uint32_t max = 0;
+
+ if (bs) {
+ max = bs->bl.max_transfer;
+ }
+ return MIN_NON_ZERO(max, INT_MAX);
+}
+
+int blk_get_max_iov(BlockBackend *blk)
+{
+ return blk->root->bs->bl.max_iov;
}
void blk_set_guest_block_size(BlockBackend *blk, int align)
{
- bdrv_set_guest_block_size(blk->bs, align);
+ blk->guest_block_size = align;
+}
+
+void *blk_try_blockalign(BlockBackend *blk, size_t size)
+{
+ return qemu_try_blockalign(blk ? blk_bs(blk) : NULL, size);
}
void *blk_blockalign(BlockBackend *blk, size_t size)
{
- return qemu_blockalign(blk ? blk->bs : NULL, size);
+ return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
}
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
{
- return bdrv_op_is_blocked(blk->bs, op, errp);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (!bs) {
+ return false;
+ }
+
+ return bdrv_op_is_blocked(bs, op, errp);
}
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason)
{
- bdrv_op_unblock(blk->bs, op, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock(bs, op, reason);
+ }
}
void blk_op_block_all(BlockBackend *blk, Error *reason)
{
- bdrv_op_block_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_block_all(bs, reason);
+ }
}
void blk_op_unblock_all(BlockBackend *blk, Error *reason)
{
- bdrv_op_unblock_all(blk->bs, reason);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_op_unblock_all(bs, reason);
+ }
}
AioContext *blk_get_aio_context(BlockBackend *blk)
{
- return bdrv_get_aio_context(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ return bdrv_get_aio_context(bs);
+ } else {
+ return qemu_get_aio_context();
+ }
+}
+
+static AioContext *blk_aiocb_get_aio_context(BlockAIOCB *acb)
+{
+ BlockBackendAIOCB *blk_acb = DO_UPCAST(BlockBackendAIOCB, common, acb);
+ return blk_get_aio_context(blk_acb->blk);
}
void blk_set_aio_context(BlockBackend *blk, AioContext *new_context)
{
- bdrv_set_aio_context(blk->bs, new_context);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ if (blk->public.throttle_state) {
+ throttle_timers_detach_aio_context(&blk->public.throttle_timers);
+ }
+ bdrv_set_aio_context(bs, new_context);
+ if (blk->public.throttle_state) {
+ throttle_timers_attach_aio_context(&blk->public.throttle_timers,
+ new_context);
+ }
+ }
+}
+
+void blk_add_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *new_context, void *opaque),
+ void (*detach_aio_context)(void *opaque), void *opaque)
+{
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_add_aio_context_notifier(bs, attached_aio_context,
+ detach_aio_context, opaque);
+ }
+}
+
+void blk_remove_aio_context_notifier(BlockBackend *blk,
+ void (*attached_aio_context)(AioContext *,
+ void *),
+ void (*detach_aio_context)(void *),
+ void *opaque)
+{
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_remove_aio_context_notifier(bs, attached_aio_context,
+ detach_aio_context, opaque);
+ }
+}
+
+void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify)
+{
+ notifier_list_add(&blk->remove_bs_notifiers, notify);
+}
+
+void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify)
+{
+ notifier_list_add(&blk->insert_bs_notifiers, notify);
}
void blk_io_plug(BlockBackend *blk)
{
- bdrv_io_plug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_plug(bs);
+ }
}
void blk_io_unplug(BlockBackend *blk)
{
- bdrv_io_unplug(blk->bs);
+ BlockDriverState *bs = blk_bs(blk);
+
+ if (bs) {
+ bdrv_io_unplug(bs);
+ }
}
BlockAcctStats *blk_get_stats(BlockBackend *blk)
{
- return bdrv_get_stats(blk->bs);
+ return &blk->stats;
}
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
@@ -629,3 +1476,231 @@
{
return qemu_aio_get(aiocb_info, blk_bs(blk), cb, opaque);
}
+
+int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ return blk_co_pwritev(blk, offset, count, NULL,
+ flags | BDRV_REQ_ZERO_WRITE);
+}
+
+int blk_write_compressed(BlockBackend *blk, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ int ret = blk_check_request(blk, sector_num, nb_sectors);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return bdrv_write_compressed(blk_bs(blk), sector_num, buf, nb_sectors);
+}
+
+int blk_truncate(BlockBackend *blk, int64_t offset)
+{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_truncate(blk_bs(blk), offset);
+}
+
+int blk_pdiscard(BlockBackend *blk, int64_t offset, int count)
+{
+ int ret = blk_check_byte_request(blk, offset, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return bdrv_pdiscard(blk_bs(blk), offset, count);
+}
+
+int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
+ int64_t pos, int size)
+{
+ int ret;
+
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_save_vmstate(blk_bs(blk), buf, pos, size);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret == size && !blk->enable_write_cache) {
+ ret = bdrv_flush(blk_bs(blk));
+ }
+
+ return ret < 0 ? ret : size;
+}
+
+int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
+{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
+}
+
+int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
+{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_probe_blocksizes(blk_bs(blk), bsz);
+}
+
+int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo)
+{
+ if (!blk_is_available(blk)) {
+ return -ENOMEDIUM;
+ }
+
+ return bdrv_probe_geometry(blk_bs(blk), geo);
+}
+
+/*
+ * Updates the BlockBackendRootState object with data from the currently
+ * attached BlockDriverState.
+ */
+void blk_update_root_state(BlockBackend *blk)
+{
+ assert(blk->root);
+
+ blk->root_state.open_flags = blk->root->bs->open_flags;
+ blk->root_state.read_only = blk->root->bs->read_only;
+ blk->root_state.detect_zeroes = blk->root->bs->detect_zeroes;
+}
+
+/*
+ * Applies the information in the root state to the given BlockDriverState. This
+ * does not include the flags which have to be specified for bdrv_open(), use
+ * blk_get_open_flags_from_root_state() to inquire them.
+ */
+void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
+{
+ bs->detect_zeroes = blk->root_state.detect_zeroes;
+}
+
+/*
+ * Returns the flags to be used for bdrv_open() of a BlockDriverState which is
+ * supposed to inherit the root state.
+ */
+int blk_get_open_flags_from_root_state(BlockBackend *blk)
+{
+ int bs_flags;
+
+ bs_flags = blk->root_state.read_only ? 0 : BDRV_O_RDWR;
+ bs_flags |= blk->root_state.open_flags & ~BDRV_O_RDWR;
+
+ return bs_flags;
+}
+
+BlockBackendRootState *blk_get_root_state(BlockBackend *blk)
+{
+ return &blk->root_state;
+}
+
+int blk_commit_all(void)
+{
+ BlockBackend *blk = NULL;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk) && blk->root->bs->backing) {
+ int ret = bdrv_commit(blk->root->bs);
+ if (ret < 0) {
+ aio_context_release(aio_context);
+ return ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+ return 0;
+}
+
+int blk_flush_all(void)
+{
+ BlockBackend *blk = NULL;
+ int result = 0;
+
+ while ((blk = blk_all_next(blk)) != NULL) {
+ AioContext *aio_context = blk_get_aio_context(blk);
+ int ret;
+
+ aio_context_acquire(aio_context);
+ if (blk_is_inserted(blk)) {
+ ret = blk_flush(blk);
+ if (ret < 0 && !result) {
+ result = ret;
+ }
+ }
+ aio_context_release(aio_context);
+ }
+
+ return result;
+}
+
+
+/* throttling disk I/O limits */
+void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ throttle_group_config(blk, cfg);
+}
+
+void blk_io_limits_disable(BlockBackend *blk)
+{
+ assert(blk->public.throttle_state);
+ bdrv_drained_begin(blk_bs(blk));
+ throttle_group_unregister_blk(blk);
+ bdrv_drained_end(blk_bs(blk));
+}
+
+/* should be called before blk_set_io_limits if a limit is set */
+void blk_io_limits_enable(BlockBackend *blk, const char *group)
+{
+ assert(!blk->public.throttle_state);
+ throttle_group_register_blk(blk, group);
+}
+
+void blk_io_limits_update_group(BlockBackend *blk, const char *group)
+{
+ /* this BB is not part of any group */
+ if (!blk->public.throttle_state) {
+ return;
+ }
+
+ /* this BB is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(blk), group)) {
+ return;
+ }
+
+ /* need to change the group this bs belong to */
+ blk_io_limits_disable(blk);
+ blk_io_limits_enable(blk, group);
+}
+
+static void blk_root_drained_begin(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ /* Note that blk->root may not be accessible here yet if we are just
+ * attaching to a BlockDriverState that is drained. Use child instead. */
+
+ if (blk->public.io_limits_disabled++ == 0) {
+ throttle_group_restart_blk(blk);
+ }
+}
+
+static void blk_root_drained_end(BdrvChild *child)
+{
+ BlockBackend *blk = child->opaque;
+
+ assert(blk->public.io_limits_disabled);
+ --blk->public.io_limits_disabled;
+}
diff --git a/block/bochs.c b/block/bochs.c
index 199ac2b..8c9652e 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -22,9 +22,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
/**************************************************************/
@@ -101,7 +104,7 @@
struct bochs_header bochs;
int ret;
- bs->read_only = 1; // no write support yet
+ bs->read_only = true; /* no write support yet */
ret = bdrv_pread(bs->file, 0, &bochs, sizeof(bochs));
if (ret < 0) {
@@ -185,6 +188,11 @@
return ret;
}
+static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
{
BDRVBochsState *s = bs->opaque;
@@ -219,38 +227,52 @@
return bitmap_offset + (512 * (s->bitmap_blocks + extent_offset));
}
-static int bochs_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+bochs_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
+ BDRVBochsState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ uint64_t bytes_done = 0;
+ QEMUIOVector local_qiov;
int ret;
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+
while (nb_sectors > 0) {
int64_t block_offset = seek_to_sector(bs, sector_num);
if (block_offset < 0) {
- return block_offset;
- } else if (block_offset > 0) {
- ret = bdrv_pread(bs->file, block_offset, buf, 512);
+ ret = block_offset;
+ goto fail;
+ }
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, 512);
+
+ if (block_offset > 0) {
+ ret = bdrv_co_preadv(bs->file, block_offset, 512,
+ &local_qiov, 0);
if (ret < 0) {
- return ret;
+ goto fail;
}
} else {
- memset(buf, 0, 512);
+ qemu_iovec_memset(&local_qiov, 0, 0, 512);
}
nb_sectors--;
sector_num++;
- buf += 512;
+ bytes_done += 512;
}
- return 0;
-}
-static coroutine_fn int bochs_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVBochsState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = bochs_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
@@ -265,7 +287,8 @@
.instance_size = sizeof(BDRVBochsState),
.bdrv_probe = bochs_probe,
.bdrv_open = bochs_open,
- .bdrv_read = bochs_co_read,
+ .bdrv_refresh_limits = bochs_refresh_limits,
+ .bdrv_co_preadv = bochs_co_preadv,
.bdrv_close = bochs_close,
};
diff --git a/block/cloop.c b/block/cloop.c
index f328be0..7b75f7e 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -21,9 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include <zlib.h>
/* Maximum compressed block size */
@@ -63,7 +66,7 @@
uint32_t offsets_size, max_compressed_block_size = 1, i;
int ret;
- bs->read_only = 1;
+ bs->read_only = true;
/* read header */
ret = bdrv_pread(bs->file, 128, &s->block_size, 4);
@@ -195,6 +198,11 @@
return ret;
}
+static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static inline int cloop_read_block(BlockDriverState *bs, int block_num)
{
BDRVCloopState *s = bs->opaque;
@@ -203,8 +211,8 @@
int ret;
uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
- ret = bdrv_pread(bs->file, s->offsets[block_num], s->compressed_block,
- bytes);
+ ret = bdrv_pread(bs->file, s->offsets[block_num],
+ s->compressed_block, bytes);
if (ret != bytes) {
return -1;
}
@@ -227,33 +235,38 @@
return 0;
}
-static int cloop_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+cloop_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVCloopState *s = bs->opaque;
- int i;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int ret, i;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_co_mutex_lock(&s->lock);
for (i = 0; i < nb_sectors; i++) {
+ void *data;
uint32_t sector_offset_in_block =
((sector_num + i) % s->sectors_per_block),
block_num = (sector_num + i) / s->sectors_per_block;
if (cloop_read_block(bs, block_num) != 0) {
- return -1;
+ ret = -EIO;
+ goto fail;
}
- memcpy(buf + i * 512,
- s->uncompressed_block + sector_offset_in_block * 512, 512);
- }
- return 0;
-}
-static coroutine_fn int cloop_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVCloopState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = cloop_read(bs, sector_num, buf, nb_sectors);
+ data = s->uncompressed_block + sector_offset_in_block * 512;
+ qemu_iovec_from_buf(qiov, i * 512, data, 512);
+ }
+
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
@@ -271,7 +284,8 @@
.instance_size = sizeof(BDRVCloopState),
.bdrv_probe = cloop_probe,
.bdrv_open = cloop_open,
- .bdrv_read = cloop_co_read,
+ .bdrv_refresh_limits = cloop_refresh_limits,
+ .bdrv_co_preadv = cloop_co_preadv,
.bdrv_close = cloop_close,
};
diff --git a/block/commit.c b/block/commit.c
index cfa2bbe..553e18d 100644
--- a/block/commit.c
+++ b/block/commit.c
@@ -12,10 +12,14 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
enum {
/*
@@ -32,28 +36,36 @@
BlockJob common;
RateLimit limit;
BlockDriverState *active;
- BlockDriverState *top;
- BlockDriverState *base;
+ BlockBackend *top;
+ BlockBackend *base;
BlockdevOnError on_error;
int base_flags;
int orig_overlay_flags;
char *backing_file_str;
} CommitBlockJob;
-static int coroutine_fn commit_populate(BlockDriverState *bs,
- BlockDriverState *base,
+static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
int64_t sector_num, int nb_sectors,
void *buf)
{
int ret = 0;
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
- ret = bdrv_read(bs, sector_num, buf, nb_sectors);
- if (ret) {
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
+ qiov.size, &qiov, 0);
+ if (ret < 0) {
return ret;
}
- ret = bdrv_write(base, sector_num, buf, nb_sectors);
- if (ret) {
+ ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
+ qiov.size, &qiov, 0);
+ if (ret < 0) {
return ret;
}
@@ -69,8 +81,8 @@
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
CommitCompleteData *data = opaque;
BlockDriverState *active = s->active;
- BlockDriverState *top = s->top;
- BlockDriverState *base = s->base;
+ BlockDriverState *top = blk_bs(s->top);
+ BlockDriverState *base = blk_bs(s->base);
BlockDriverState *overlay_bs;
int ret = data->ret;
@@ -90,6 +102,8 @@
bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
}
g_free(s->backing_file_str);
+ blk_unref(s->top);
+ blk_unref(s->base);
block_job_completed(&s->common, ret);
g_free(data);
}
@@ -98,42 +112,39 @@
{
CommitBlockJob *s = opaque;
CommitCompleteData *data;
- BlockDriverState *top = s->top;
- BlockDriverState *base = s->base;
int64_t sector_num, end;
+ uint64_t delay_ns = 0;
int ret = 0;
int n = 0;
void *buf = NULL;
int bytes_written = 0;
int64_t base_len;
- ret = s->common.len = bdrv_getlength(top);
+ ret = s->common.len = blk_getlength(s->top);
if (s->common.len < 0) {
goto out;
}
- ret = base_len = bdrv_getlength(base);
+ ret = base_len = blk_getlength(s->base);
if (base_len < 0) {
goto out;
}
if (base_len < s->common.len) {
- ret = bdrv_truncate(base, s->common.len);
+ ret = blk_truncate(s->base, s->common.len);
if (ret) {
goto out;
}
}
end = s->common.len >> BDRV_SECTOR_BITS;
- buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+ buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
for (sector_num = 0; sector_num < end; sector_num += n) {
- uint64_t delay_ns = 0;
bool copy;
-wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -142,25 +153,20 @@
break;
}
/* Copy if allocated above the base */
- ret = bdrv_is_allocated_above(top, base, sector_num,
+ ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
+ sector_num,
COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
&n);
copy = (ret == 1);
trace_commit_one_iteration(s, sector_num, n, ret);
if (copy) {
- if (s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, n);
- if (delay_ns > 0) {
- goto wait;
- }
- }
- ret = commit_populate(top, base, sector_num, n, buf);
+ ret = commit_populate(s->top, s->base, sector_num, n, buf);
bytes_written += n * BDRV_SECTOR_SIZE;
}
if (ret < 0) {
- if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
- s->on_error == BLOCKDEV_ON_ERROR_REPORT||
- (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
+ BlockErrorAction action =
+ block_job_error_action(&s->common, false, s->on_error, -ret);
+ if (action == BLOCK_ERROR_ACTION_REPORT) {
goto out;
} else {
n = 0;
@@ -169,6 +175,10 @@
}
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
+
+ if (copy && s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, n);
+ }
}
ret = 0;
@@ -186,7 +196,7 @@
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
if (speed < 0) {
- error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
@@ -198,8 +208,8 @@
.set_speed = commit_set_speed,
};
-void commit_start(BlockDriverState *bs, BlockDriverState *base,
- BlockDriverState *top, int64_t speed,
+void commit_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, BlockDriverState *top, int64_t speed,
BlockdevOnError on_error, BlockCompletionFunc *cb,
void *opaque, const char *backing_file_str, Error **errp)
{
@@ -210,13 +220,6 @@
BlockDriverState *overlay_bs;
Error *local_err = NULL;
- if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
- on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
- error_setg(errp, "Invalid parameter combination");
- return;
- }
-
assert(top != bs);
if (top == base) {
error_setg(errp, "Invalid files for merge: top and base are the same");
@@ -230,34 +233,40 @@
return;
}
+ s = block_job_create(job_id, &commit_job_driver, bs, speed,
+ cb, opaque, errp);
+ if (!s) {
+ return;
+ }
+
orig_base_flags = bdrv_get_flags(base);
orig_overlay_flags = bdrv_get_flags(overlay_bs);
/* convert base & overlay_bs to r/w, if necessary */
- if (!(orig_base_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, base,
- orig_base_flags | BDRV_O_RDWR);
- }
if (!(orig_overlay_flags & BDRV_O_RDWR)) {
- reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+ reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
orig_overlay_flags | BDRV_O_RDWR);
}
+ if (!(orig_base_flags & BDRV_O_RDWR)) {
+ reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
+ orig_base_flags | BDRV_O_RDWR);
+ }
if (reopen_queue) {
bdrv_reopen_multiple(reopen_queue, &local_err);
if (local_err != NULL) {
error_propagate(errp, local_err);
+ block_job_unref(&s->common);
return;
}
}
- s = block_job_create(&commit_job_driver, bs, speed, cb, opaque, errp);
- if (!s) {
- return;
- }
+ s->base = blk_new();
+ blk_insert_bs(s->base, base);
- s->base = base;
- s->top = top;
+ s->top = blk_new();
+ blk_insert_bs(s->top, top);
+
s->active = bs;
s->base_flags = orig_base_flags;
@@ -266,8 +275,129 @@
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
- s->common.co = qemu_coroutine_create(commit_run);
+ s->common.co = qemu_coroutine_create(commit_run, s);
trace_commit_start(bs, base, top, s, s->common.co, opaque);
- qemu_coroutine_enter(s->common.co, s);
+ qemu_coroutine_enter(s->common.co);
+}
+
+
+#define COMMIT_BUF_SECTORS 2048
+
+/* commit COW file into the raw image */
+int bdrv_commit(BlockDriverState *bs)
+{
+ BlockBackend *src, *backing;
+ BlockDriver *drv = bs->drv;
+ int64_t sector, total_sectors, length, backing_length;
+ int n, ro, open_flags;
+ int ret = 0;
+ uint8_t *buf = NULL;
+
+ if (!drv)
+ return -ENOMEDIUM;
+
+ if (!bs->backing) {
+ return -ENOTSUP;
+ }
+
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
+ bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
+ return -EBUSY;
+ }
+
+ ro = bs->backing->bs->read_only;
+ open_flags = bs->backing->bs->open_flags;
+
+ if (ro) {
+ if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
+ return -EACCES;
+ }
+ }
+
+ src = blk_new();
+ blk_insert_bs(src, bs);
+
+ backing = blk_new();
+ blk_insert_bs(backing, bs->backing->bs);
+
+ length = blk_getlength(src);
+ if (length < 0) {
+ ret = length;
+ goto ro_cleanup;
+ }
+
+ backing_length = blk_getlength(backing);
+ if (backing_length < 0) {
+ ret = backing_length;
+ goto ro_cleanup;
+ }
+
+ /* If our top snapshot is larger than the backing file image,
+ * grow the backing file image if possible. If not possible,
+ * we must return an error */
+ if (length > backing_length) {
+ ret = blk_truncate(backing, length);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ }
+
+ total_sectors = length >> BDRV_SECTOR_BITS;
+
+ /* blk_try_blockalign() for src will choose an alignment that works for
+ * backing as well, so no need to compare the alignment manually. */
+ buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
+ if (buf == NULL) {
+ ret = -ENOMEM;
+ goto ro_cleanup;
+ }
+
+ for (sector = 0; sector < total_sectors; sector += n) {
+ ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ if (ret) {
+ ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
+ n * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+
+ ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
+ n * BDRV_SECTOR_SIZE, 0);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ }
+ }
+
+ if (drv->bdrv_make_empty) {
+ ret = drv->bdrv_make_empty(bs);
+ if (ret < 0) {
+ goto ro_cleanup;
+ }
+ blk_flush(src);
+ }
+
+ /*
+ * Make sure all data we wrote to the backing device is actually
+ * stable on disk.
+ */
+ blk_flush(backing);
+
+ ret = 0;
+ro_cleanup:
+ qemu_vfree(buf);
+
+ blk_unref(src);
+ blk_unref(backing);
+
+ if (ro) {
+ /* ignoring error return here */
+ bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
+ }
+
+ return ret;
}
diff --git a/block/crypto.c b/block/crypto.c
new file mode 100644
index 0000000..7f61e12
--- /dev/null
+++ b/block/crypto.c
@@ -0,0 +1,635 @@
+/*
+ * QEMU block full disk encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "crypto/block.h"
+#include "qapi/opts-visitor.h"
+#include "qapi-visit.h"
+#include "qapi/error.h"
+
+#define BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET "key-secret"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG "cipher-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE "cipher-mode"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG "ivgen-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG "ivgen-hash-alg"
+#define BLOCK_CRYPTO_OPT_LUKS_HASH_ALG "hash-alg"
+
+typedef struct BlockCrypto BlockCrypto;
+
+struct BlockCrypto {
+ QCryptoBlock *block;
+};
+
+
+static int block_crypto_probe_generic(QCryptoBlockFormat format,
+ const uint8_t *buf,
+ int buf_size,
+ const char *filename)
+{
+ if (qcrypto_block_has_format(format, buf, buf_size)) {
+ return 100;
+ } else {
+ return 0;
+ }
+}
+
+
+static ssize_t block_crypto_read_func(QCryptoBlock *block,
+ size_t offset,
+ uint8_t *buf,
+ size_t buflen,
+ Error **errp,
+ void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ ssize_t ret;
+
+ ret = bdrv_pread(bs->file, offset, buf, buflen);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not read encryption header");
+ return ret;
+ }
+ return ret;
+}
+
+
+struct BlockCryptoCreateData {
+ const char *filename;
+ QemuOpts *opts;
+ BlockBackend *blk;
+ uint64_t size;
+};
+
+
+static ssize_t block_crypto_write_func(QCryptoBlock *block,
+ size_t offset,
+ const uint8_t *buf,
+ size_t buflen,
+ Error **errp,
+ void *opaque)
+{
+ struct BlockCryptoCreateData *data = opaque;
+ ssize_t ret;
+
+ ret = blk_pwrite(data->blk, offset, buf, buflen, 0);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not write encryption header");
+ return ret;
+ }
+ return ret;
+}
+
+
+static ssize_t block_crypto_init_func(QCryptoBlock *block,
+ size_t headerlen,
+ Error **errp,
+ void *opaque)
+{
+ struct BlockCryptoCreateData *data = opaque;
+ int ret;
+
+ /* User provided size should reflect amount of space made
+ * available to the guest, so we must take account of that
+ * which will be used by the crypto header
+ */
+ data->size += headerlen;
+
+ qemu_opt_set_number(data->opts, BLOCK_OPT_SIZE, data->size, &error_abort);
+ ret = bdrv_create_file(data->filename, data->opts, errp);
+ if (ret < 0) {
+ return -1;
+ }
+
+ data->blk = blk_new_open(data->filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ if (!data->blk) {
+ return -1;
+ }
+
+ return 0;
+}
+
+
+static QemuOptsList block_crypto_runtime_opts_luks = {
+ .name = "crypto",
+ .head = QTAILQ_HEAD_INITIALIZER(block_crypto_runtime_opts_luks.head),
+ .desc = {
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret that provides the encryption key",
+ },
+ { /* end of list */ }
+ },
+};
+
+
+static QemuOptsList block_crypto_create_opts_luks = {
+ .name = "crypto",
+ .head = QTAILQ_HEAD_INITIALIZER(block_crypto_create_opts_luks.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_KEY_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret that provides the encryption key",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption cipher algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_CIPHER_MODE,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption cipher mode",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of IV generator algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_IVGEN_HASH_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of IV generator hash algorithm",
+ },
+ {
+ .name = BLOCK_CRYPTO_OPT_LUKS_HASH_ALG,
+ .type = QEMU_OPT_STRING,
+ .help = "Name of encryption hash algorithm",
+ },
+ { /* end of list */ }
+ },
+};
+
+
+static QCryptoBlockOpenOptions *
+block_crypto_open_opts_init(QCryptoBlockFormat format,
+ QemuOpts *opts,
+ Error **errp)
+{
+ Visitor *v;
+ QCryptoBlockOpenOptions *ret = NULL;
+ Error *local_err = NULL;
+
+ ret = g_new0(QCryptoBlockOpenOptions, 1);
+ ret->format = format;
+
+ v = opts_visitor_new(opts);
+
+ visit_start_struct(v, NULL, NULL, 0, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ switch (format) {
+ case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+ visit_type_QCryptoBlockOptionsLUKS_members(
+ v, &ret->u.luks, &local_err);
+ break;
+
+ default:
+ error_setg(&local_err, "Unsupported block format %d", format);
+ break;
+ }
+ if (!local_err) {
+ visit_check_struct(v, &local_err);
+ }
+
+ visit_end_struct(v, NULL);
+
+ out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_QCryptoBlockOpenOptions(ret);
+ ret = NULL;
+ }
+ visit_free(v);
+ return ret;
+}
+
+
+static QCryptoBlockCreateOptions *
+block_crypto_create_opts_init(QCryptoBlockFormat format,
+ QemuOpts *opts,
+ Error **errp)
+{
+ Visitor *v;
+ QCryptoBlockCreateOptions *ret = NULL;
+ Error *local_err = NULL;
+
+ ret = g_new0(QCryptoBlockCreateOptions, 1);
+ ret->format = format;
+
+ v = opts_visitor_new(opts);
+
+ visit_start_struct(v, NULL, NULL, 0, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ switch (format) {
+ case Q_CRYPTO_BLOCK_FORMAT_LUKS:
+ visit_type_QCryptoBlockCreateOptionsLUKS_members(
+ v, &ret->u.luks, &local_err);
+ break;
+
+ default:
+ error_setg(&local_err, "Unsupported block format %d", format);
+ break;
+ }
+ if (!local_err) {
+ visit_check_struct(v, &local_err);
+ }
+
+ visit_end_struct(v, NULL);
+
+ out:
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_QCryptoBlockCreateOptions(ret);
+ ret = NULL;
+ }
+ visit_free(v);
+ return ret;
+}
+
+
+static int block_crypto_open_generic(QCryptoBlockFormat format,
+ QemuOptsList *opts_spec,
+ BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ BlockCrypto *crypto = bs->opaque;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ int ret = -EINVAL;
+ QCryptoBlockOpenOptions *open_opts = NULL;
+ unsigned int cflags = 0;
+
+ opts = qemu_opts_create(opts_spec, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto cleanup;
+ }
+
+ open_opts = block_crypto_open_opts_init(format, opts, errp);
+ if (!open_opts) {
+ goto cleanup;
+ }
+
+ if (flags & BDRV_O_NO_IO) {
+ cflags |= QCRYPTO_BLOCK_OPEN_NO_IO;
+ }
+ crypto->block = qcrypto_block_open(open_opts,
+ block_crypto_read_func,
+ bs,
+ cflags,
+ errp);
+
+ if (!crypto->block) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ bs->encrypted = true;
+ bs->valid_key = true;
+
+ ret = 0;
+ cleanup:
+ qapi_free_QCryptoBlockOpenOptions(open_opts);
+ return ret;
+}
+
+
+static int block_crypto_create_generic(QCryptoBlockFormat format,
+ const char *filename,
+ QemuOpts *opts,
+ Error **errp)
+{
+ int ret = -EINVAL;
+ QCryptoBlockCreateOptions *create_opts = NULL;
+ QCryptoBlock *crypto = NULL;
+ struct BlockCryptoCreateData data = {
+ .size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+ BDRV_SECTOR_SIZE),
+ .opts = opts,
+ .filename = filename,
+ };
+
+ create_opts = block_crypto_create_opts_init(format, opts, errp);
+ if (!create_opts) {
+ return -1;
+ }
+
+ crypto = qcrypto_block_create(create_opts,
+ block_crypto_init_func,
+ block_crypto_write_func,
+ &data,
+ errp);
+
+ if (!crypto) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ ret = 0;
+ cleanup:
+ qcrypto_block_free(crypto);
+ blk_unref(data.blk);
+ qapi_free_QCryptoBlockCreateOptions(create_opts);
+ return ret;
+}
+
+static int block_crypto_truncate(BlockDriverState *bs, int64_t offset)
+{
+ BlockCrypto *crypto = bs->opaque;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block);
+
+ offset += payload_offset;
+
+ return bdrv_truncate(bs->file->bs, offset);
+}
+
+static void block_crypto_close(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ qcrypto_block_free(crypto->block);
+}
+
+
+#define BLOCK_CRYPTO_MAX_SECTORS 32
+
+static coroutine_fn int
+block_crypto_co_readv(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done = 0;
+ uint8_t *cipher_data = NULL;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ /* Bounce buffer so we have a linear mem region for
+ * entire sector. XXX optimize so we avoid bounce
+ * buffer in case that qiov->niov == 1
+ */
+ cipher_data =
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qiov->size));
+ if (cipher_data == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ while (remaining_sectors) {
+ cur_nr_sectors = remaining_sectors;
+
+ if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+ cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+ }
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+ ret = bdrv_co_readv(bs->file,
+ payload_offset + sector_num,
+ cur_nr_sectors, &hd_qiov);
+ if (ret < 0) {
+ goto cleanup;
+ }
+
+ if (qcrypto_block_decrypt(crypto->block,
+ sector_num,
+ cipher_data, cur_nr_sectors * 512,
+ NULL) < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ qemu_iovec_from_buf(qiov, bytes_done,
+ cipher_data, cur_nr_sectors * 512);
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+
+ cleanup:
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cipher_data);
+
+ return ret;
+}
+
+
+static coroutine_fn int
+block_crypto_co_writev(BlockDriverState *bs, int64_t sector_num,
+ int remaining_sectors, QEMUIOVector *qiov)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int cur_nr_sectors; /* number of sectors in current iteration */
+ uint64_t bytes_done = 0;
+ uint8_t *cipher_data = NULL;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+ size_t payload_offset =
+ qcrypto_block_get_payload_offset(crypto->block) / 512;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ /* Bounce buffer so we have a linear mem region for
+ * entire sector. XXX optimize so we avoid bounce
+ * buffer in case that qiov->niov == 1
+ */
+ cipher_data =
+ qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_SECTORS * 512,
+ qiov->size));
+ if (cipher_data == NULL) {
+ ret = -ENOMEM;
+ goto cleanup;
+ }
+
+ while (remaining_sectors) {
+ cur_nr_sectors = remaining_sectors;
+
+ if (cur_nr_sectors > BLOCK_CRYPTO_MAX_SECTORS) {
+ cur_nr_sectors = BLOCK_CRYPTO_MAX_SECTORS;
+ }
+
+ qemu_iovec_to_buf(qiov, bytes_done,
+ cipher_data, cur_nr_sectors * 512);
+
+ if (qcrypto_block_encrypt(crypto->block,
+ sector_num,
+ cipher_data, cur_nr_sectors * 512,
+ NULL) < 0) {
+ ret = -EIO;
+ goto cleanup;
+ }
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_add(&hd_qiov, cipher_data, cur_nr_sectors * 512);
+
+ ret = bdrv_co_writev(bs->file,
+ payload_offset + sector_num,
+ cur_nr_sectors, &hd_qiov);
+ if (ret < 0) {
+ goto cleanup;
+ }
+
+ remaining_sectors -= cur_nr_sectors;
+ sector_num += cur_nr_sectors;
+ bytes_done += cur_nr_sectors * 512;
+ }
+
+ cleanup:
+ qemu_iovec_destroy(&hd_qiov);
+ qemu_vfree(cipher_data);
+
+ return ret;
+}
+
+
+static int64_t block_crypto_getlength(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ int64_t len = bdrv_getlength(bs->file->bs);
+
+ ssize_t offset = qcrypto_block_get_payload_offset(crypto->block);
+
+ len -= offset;
+
+ return len;
+}
+
+
+static int block_crypto_probe_luks(const uint8_t *buf,
+ int buf_size,
+ const char *filename) {
+ return block_crypto_probe_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ buf, buf_size, filename);
+}
+
+static int block_crypto_open_luks(BlockDriverState *bs,
+ QDict *options,
+ int flags,
+ Error **errp)
+{
+ return block_crypto_open_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ &block_crypto_runtime_opts_luks,
+ bs, options, flags, errp);
+}
+
+static int block_crypto_create_luks(const char *filename,
+ QemuOpts *opts,
+ Error **errp)
+{
+ return block_crypto_create_generic(Q_CRYPTO_BLOCK_FORMAT_LUKS,
+ filename, opts, errp);
+}
+
+static int block_crypto_get_info_luks(BlockDriverState *bs,
+ BlockDriverInfo *bdi)
+{
+ BlockDriverInfo subbdi;
+ int ret;
+
+ ret = bdrv_get_info(bs->file->bs, &subbdi);
+ if (ret != 0) {
+ return ret;
+ }
+
+ bdi->unallocated_blocks_are_zero = false;
+ bdi->can_write_zeroes_with_unmap = false;
+ bdi->cluster_size = subbdi.cluster_size;
+
+ return 0;
+}
+
+static ImageInfoSpecific *
+block_crypto_get_specific_info_luks(BlockDriverState *bs)
+{
+ BlockCrypto *crypto = bs->opaque;
+ ImageInfoSpecific *spec_info;
+ QCryptoBlockInfo *info;
+
+ info = qcrypto_block_get_info(crypto->block, NULL);
+ if (!info) {
+ return NULL;
+ }
+ if (info->format != Q_CRYPTO_BLOCK_FORMAT_LUKS) {
+ qapi_free_QCryptoBlockInfo(info);
+ return NULL;
+ }
+
+ spec_info = g_new(ImageInfoSpecific, 1);
+ spec_info->type = IMAGE_INFO_SPECIFIC_KIND_LUKS;
+ spec_info->u.luks.data = g_new(QCryptoBlockInfoLUKS, 1);
+ *spec_info->u.luks.data = info->u.luks;
+
+ /* Blank out pointers we've just stolen to avoid double free */
+ memset(&info->u.luks, 0, sizeof(info->u.luks));
+
+ qapi_free_QCryptoBlockInfo(info);
+
+ return spec_info;
+}
+
+BlockDriver bdrv_crypto_luks = {
+ .format_name = "luks",
+ .instance_size = sizeof(BlockCrypto),
+ .bdrv_probe = block_crypto_probe_luks,
+ .bdrv_open = block_crypto_open_luks,
+ .bdrv_close = block_crypto_close,
+ .bdrv_create = block_crypto_create_luks,
+ .bdrv_truncate = block_crypto_truncate,
+ .create_opts = &block_crypto_create_opts_luks,
+
+ .bdrv_co_readv = block_crypto_co_readv,
+ .bdrv_co_writev = block_crypto_co_writev,
+ .bdrv_getlength = block_crypto_getlength,
+ .bdrv_get_info = block_crypto_get_info_luks,
+ .bdrv_get_specific_info = block_crypto_get_specific_info_luks,
+};
+
+static void block_crypto_init(void)
+{
+ bdrv_register(&bdrv_crypto_luks);
+}
+
+block_init(block_crypto_init);
diff --git a/block/curl.c b/block/curl.c
index bbee3ca..426fb4d 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -21,19 +21,31 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
+#include "qemu/error-report.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
+#include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
#include <curl/curl.h>
+#include "qemu/cutils.h"
// #define DEBUG_CURL
// #define DEBUG_VERBOSE
#ifdef DEBUG_CURL
-#define DPRINTF(fmt, ...) do { printf(fmt, ## __VA_ARGS__); } while (0)
+#define DEBUG_CURL_PRINT 1
#else
-#define DPRINTF(fmt, ...) do { } while (0)
+#define DEBUG_CURL_PRINT 0
#endif
+#define DPRINTF(fmt, ...) \
+ do { \
+ if (DEBUG_CURL_PRINT) { \
+ fprintf(stderr, fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
#if LIBCURL_VERSION_NUM >= 0x071000
/* The multi interface timer callback was introduced in 7.16.0 */
@@ -75,6 +87,10 @@
#define CURL_BLOCK_OPT_SSLVERIFY "sslverify"
#define CURL_BLOCK_OPT_TIMEOUT "timeout"
#define CURL_BLOCK_OPT_COOKIE "cookie"
+#define CURL_BLOCK_OPT_USERNAME "username"
+#define CURL_BLOCK_OPT_PASSWORD_SECRET "password-secret"
+#define CURL_BLOCK_OPT_PROXY_USERNAME "proxy-username"
+#define CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET "proxy-password-secret"
struct BDRVCURLState;
@@ -117,6 +133,10 @@
char *cookie;
bool accept_range;
AioContext *aio_context;
+ char *username;
+ char *password;
+ char *proxyusername;
+ char *proxypassword;
} BDRVCURLState;
static void curl_clean_state(CURLState *s);
@@ -149,21 +169,23 @@
state->sock_fd = fd;
s = state->s;
- DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, fd);
+ DPRINTF("CURL (AIO): Sock action %d on fd %d\n", action, (int)fd);
switch (action) {
case CURL_POLL_IN:
- aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
- NULL, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ curl_multi_read, NULL, state);
break;
case CURL_POLL_OUT:
- aio_set_fd_handler(s->aio_context, fd, NULL, curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ NULL, curl_multi_do, state);
break;
case CURL_POLL_INOUT:
- aio_set_fd_handler(s->aio_context, fd, curl_multi_read,
- curl_multi_do, state);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ curl_multi_read, curl_multi_do, state);
break;
case CURL_POLL_REMOVE:
- aio_set_fd_handler(s->aio_context, fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ NULL, NULL, NULL);
break;
}
@@ -297,6 +319,18 @@
/* ACBs for successful messages get completed in curl_read_cb */
if (msg->data.result != CURLE_OK) {
int i;
+ static int errcount = 100;
+
+ /* Don't lose the original error message from curl, since
+ * it contains extra data.
+ */
+ if (errcount > 0) {
+ error_report("curl: %s", state->errmsg);
+ if (--errcount == 0) {
+ error_report("curl: further errors suppressed");
+ }
+ }
+
for (i = 0; i < CURL_NUM_ACB; i++) {
CURLAIOCB *acb = state->acb[i];
@@ -304,7 +338,7 @@
continue;
}
- acb->common.cb(acb->common.opaque, -EIO);
+ acb->common.cb(acb->common.opaque, -EPROTO);
qemu_aio_unref(acb);
state->acb[i] = NULL;
}
@@ -402,6 +436,21 @@
curl_easy_setopt(state->curl, CURLOPT_ERRORBUFFER, state->errmsg);
curl_easy_setopt(state->curl, CURLOPT_FAILONERROR, 1);
+ if (s->username) {
+ curl_easy_setopt(state->curl, CURLOPT_USERNAME, s->username);
+ }
+ if (s->password) {
+ curl_easy_setopt(state->curl, CURLOPT_PASSWORD, s->password);
+ }
+ if (s->proxyusername) {
+ curl_easy_setopt(state->curl,
+ CURLOPT_PROXYUSERNAME, s->proxyusername);
+ }
+ if (s->proxypassword) {
+ curl_easy_setopt(state->curl,
+ CURLOPT_PROXYPASSWORD, s->proxypassword);
+ }
+
/* Restrict supported protocols to avoid security issues in the more
* obscure protocols. For example, do not allow POP3/SMTP/IMAP see
* CVE-2013-0249.
@@ -508,10 +557,31 @@
.type = QEMU_OPT_STRING,
.help = "Pass the cookie or list of cookies with each request"
},
+ {
+ .name = CURL_BLOCK_OPT_USERNAME,
+ .type = QEMU_OPT_STRING,
+ .help = "Username for HTTP auth"
+ },
+ {
+ .name = CURL_BLOCK_OPT_PASSWORD_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret used as password for HTTP auth",
+ },
+ {
+ .name = CURL_BLOCK_OPT_PROXY_USERNAME,
+ .type = QEMU_OPT_STRING,
+ .help = "Username for HTTP proxy auth"
+ },
+ {
+ .name = CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET,
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret used as password for HTTP proxy auth",
+ },
{ /* end of list */ }
},
};
+
static int curl_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -522,6 +592,7 @@
const char *file;
const char *cookie;
double d;
+ const char *secretid;
static int inited = 0;
@@ -563,6 +634,26 @@
goto out_noclean;
}
+ s->username = g_strdup(qemu_opt_get(opts, CURL_BLOCK_OPT_USERNAME));
+ secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PASSWORD_SECRET);
+
+ if (secretid) {
+ s->password = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!s->password) {
+ goto out_noclean;
+ }
+ }
+
+ s->proxyusername = g_strdup(
+ qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_USERNAME));
+ secretid = qemu_opt_get(opts, CURL_BLOCK_OPT_PROXY_PASSWORD_SECRET);
+ if (secretid) {
+ s->proxypassword = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!s->proxypassword) {
+ goto out_noclean;
+ }
+ }
+
if (!inited) {
curl_global_init(CURL_GLOBAL_ALL);
inited = 1;
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
new file mode 100644
index 0000000..f2bfdcf
--- /dev/null
+++ b/block/dirty-bitmap.c
@@ -0,0 +1,387 @@
+/*
+ * Block Dirty Bitmap
+ *
+ * Copyright (c) 2016 Red Hat. Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "block/block_int.h"
+#include "block/blockjob.h"
+
+/**
+ * A BdrvDirtyBitmap can be in three possible states:
+ * (1) successor is NULL and disabled is false: full r/w mode
+ * (2) successor is NULL and disabled is true: read only mode ("disabled")
+ * (3) successor is set: frozen mode.
+ * A frozen bitmap cannot be renamed, deleted, anonymized, cleared, set,
+ * or enabled. A frozen bitmap can only abdicate() or reclaim().
+ */
+struct BdrvDirtyBitmap {
+ HBitmap *bitmap; /* Dirty sector bitmap implementation */
+ BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
+ char *name; /* Optional non-empty unique ID */
+ int64_t size; /* Size of the bitmap (Number of sectors) */
+ bool disabled; /* Bitmap is read-only */
+ QLIST_ENTRY(BdrvDirtyBitmap) list;
+};
+
+BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, const char *name)
+{
+ BdrvDirtyBitmap *bm;
+
+ assert(name);
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ if (bm->name && !strcmp(name, bm->name)) {
+ return bm;
+ }
+ }
+ return NULL;
+}
+
+void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ g_free(bitmap->name);
+ bitmap->name = NULL;
+}
+
+BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs,
+ uint32_t granularity,
+ const char *name,
+ Error **errp)
+{
+ int64_t bitmap_size;
+ BdrvDirtyBitmap *bitmap;
+ uint32_t sector_granularity;
+
+ assert((granularity & (granularity - 1)) == 0);
+
+ if (name && bdrv_find_dirty_bitmap(bs, name)) {
+ error_setg(errp, "Bitmap already exists: %s", name);
+ return NULL;
+ }
+ sector_granularity = granularity >> BDRV_SECTOR_BITS;
+ assert(sector_granularity);
+ bitmap_size = bdrv_nb_sectors(bs);
+ if (bitmap_size < 0) {
+ error_setg_errno(errp, -bitmap_size, "could not get length of device");
+ errno = -bitmap_size;
+ return NULL;
+ }
+ bitmap = g_new0(BdrvDirtyBitmap, 1);
+ bitmap->bitmap = hbitmap_alloc(bitmap_size, ctz32(sector_granularity));
+ bitmap->size = bitmap_size;
+ bitmap->name = g_strdup(name);
+ bitmap->disabled = false;
+ QLIST_INSERT_HEAD(&bs->dirty_bitmaps, bitmap, list);
+ return bitmap;
+}
+
+bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
+{
+ return bitmap->successor;
+}
+
+bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
+{
+ return !(bitmap->disabled || bitmap->successor);
+}
+
+DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap)
+{
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ return DIRTY_BITMAP_STATUS_FROZEN;
+ } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ return DIRTY_BITMAP_STATUS_DISABLED;
+ } else {
+ return DIRTY_BITMAP_STATUS_ACTIVE;
+ }
+}
+
+/**
+ * Create a successor bitmap destined to replace this bitmap after an operation.
+ * Requires that the bitmap is not frozen and has no successor.
+ */
+int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap, Error **errp)
+{
+ uint64_t granularity;
+ BdrvDirtyBitmap *child;
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_setg(errp, "Cannot create a successor for a bitmap that is "
+ "currently frozen");
+ return -1;
+ }
+ assert(!bitmap->successor);
+
+ /* Create an anonymous successor */
+ granularity = bdrv_dirty_bitmap_granularity(bitmap);
+ child = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
+ if (!child) {
+ return -1;
+ }
+
+ /* Successor will be on or off based on our current state. */
+ child->disabled = bitmap->disabled;
+
+ /* Install the successor and freeze the parent */
+ bitmap->successor = child;
+ return 0;
+}
+
+/**
+ * For a bitmap with a successor, yield our name to the successor,
+ * delete the old bitmap, and return a handle to the new bitmap.
+ */
+BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ Error **errp)
+{
+ char *name;
+ BdrvDirtyBitmap *successor = bitmap->successor;
+
+ if (successor == NULL) {
+ error_setg(errp, "Cannot relinquish control if "
+ "there's no successor present");
+ return NULL;
+ }
+
+ name = bitmap->name;
+ bitmap->name = NULL;
+ successor->name = name;
+ bitmap->successor = NULL;
+ bdrv_release_dirty_bitmap(bs, bitmap);
+
+ return successor;
+}
+
+/**
+ * In cases of failure where we can no longer safely delete the parent,
+ * we may wish to re-join the parent and child/successor.
+ * The merged parent will be un-frozen, but not explicitly re-enabled.
+ */
+BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent,
+ Error **errp)
+{
+ BdrvDirtyBitmap *successor = parent->successor;
+
+ if (!successor) {
+ error_setg(errp, "Cannot reclaim a successor when none is present");
+ return NULL;
+ }
+
+ if (!hbitmap_merge(parent->bitmap, successor->bitmap)) {
+ error_setg(errp, "Merging of parent and successor bitmap failed");
+ return NULL;
+ }
+ bdrv_release_dirty_bitmap(bs, successor);
+ parent->successor = NULL;
+
+ return parent;
+}
+
+/**
+ * Truncates _all_ bitmaps attached to a BDS.
+ */
+void bdrv_dirty_bitmap_truncate(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bitmap;
+ uint64_t size = bdrv_nb_sectors(bs);
+
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ hbitmap_truncate(bitmap->bitmap, size);
+ bitmap->size = size;
+ }
+}
+
+static void bdrv_do_release_matching_dirty_bitmap(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap,
+ bool only_named)
+{
+ BdrvDirtyBitmap *bm, *next;
+ QLIST_FOREACH_SAFE(bm, &bs->dirty_bitmaps, list, next) {
+ if ((!bitmap || bm == bitmap) && (!only_named || bm->name)) {
+ assert(!bdrv_dirty_bitmap_frozen(bm));
+ QLIST_REMOVE(bm, list);
+ hbitmap_free(bm->bitmap);
+ g_free(bm->name);
+ g_free(bm);
+
+ if (bitmap) {
+ return;
+ }
+ }
+ }
+}
+
+void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, bitmap, false);
+}
+
+/**
+ * Release all named dirty bitmaps attached to a BDS (for use in bdrv_close()).
+ * There must not be any frozen bitmaps attached.
+ */
+void bdrv_release_named_dirty_bitmaps(BlockDriverState *bs)
+{
+ bdrv_do_release_matching_dirty_bitmap(bs, NULL, true);
+}
+
+void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = true;
+}
+
+void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
+{
+ assert(!bdrv_dirty_bitmap_frozen(bitmap));
+ bitmap->disabled = false;
+}
+
+BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
+{
+ BdrvDirtyBitmap *bm;
+ BlockDirtyInfoList *list = NULL;
+ BlockDirtyInfoList **plist = &list;
+
+ QLIST_FOREACH(bm, &bs->dirty_bitmaps, list) {
+ BlockDirtyInfo *info = g_new0(BlockDirtyInfo, 1);
+ BlockDirtyInfoList *entry = g_new0(BlockDirtyInfoList, 1);
+ info->count = bdrv_get_dirty_count(bm);
+ info->granularity = bdrv_dirty_bitmap_granularity(bm);
+ info->has_name = !!bm->name;
+ info->name = g_strdup(bm->name);
+ info->status = bdrv_dirty_bitmap_status(bm);
+ entry->value = info;
+ *plist = entry;
+ plist = &entry->next;
+ }
+
+ return list;
+}
+
+int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ int64_t sector)
+{
+ if (bitmap) {
+ return hbitmap_get(bitmap->bitmap, sector);
+ } else {
+ return 0;
+ }
+}
+
+/**
+ * Chooses a default granularity based on the existing cluster size,
+ * but clamped between [4K, 64K]. Defaults to 64K in the case that there
+ * is no cluster size information available.
+ */
+uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+ uint32_t granularity;
+
+ if (bdrv_get_info(bs, &bdi) >= 0 && bdi.cluster_size > 0) {
+ granularity = MAX(4096, bdi.cluster_size);
+ granularity = MIN(65536, granularity);
+ } else {
+ granularity = 65536;
+ }
+
+ return granularity;
+}
+
+uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap)
+{
+ return BDRV_SECTOR_SIZE << hbitmap_granularity(bitmap->bitmap);
+}
+
+void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, HBitmapIter *hbi)
+{
+ hbitmap_iter_init(hbi, bitmap->bitmap, 0);
+}
+
+void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
+ int64_t cur_sector, int64_t nr_sectors)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ hbitmap_reset(bitmap->bitmap, cur_sector, nr_sectors);
+}
+
+void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
+{
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ if (!out) {
+ hbitmap_reset_all(bitmap->bitmap);
+ } else {
+ HBitmap *backup = bitmap->bitmap;
+ bitmap->bitmap = hbitmap_alloc(bitmap->size,
+ hbitmap_granularity(backup));
+ *out = backup;
+ }
+}
+
+void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in)
+{
+ HBitmap *tmp = bitmap->bitmap;
+ assert(bdrv_dirty_bitmap_enabled(bitmap));
+ bitmap->bitmap = in;
+ hbitmap_free(tmp);
+}
+
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector,
+ int64_t nr_sectors)
+{
+ BdrvDirtyBitmap *bitmap;
+ QLIST_FOREACH(bitmap, &bs->dirty_bitmaps, list) {
+ if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ continue;
+ }
+ hbitmap_set(bitmap->bitmap, cur_sector, nr_sectors);
+ }
+}
+
+/**
+ * Advance an HBitmapIter to an arbitrary offset.
+ */
+void bdrv_set_dirty_iter(HBitmapIter *hbi, int64_t offset)
+{
+ assert(hbi->hb);
+ hbitmap_iter_init(hbi, hbi->hb, offset);
+}
+
+int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap)
+{
+ return hbitmap_count(bitmap->bitmap);
+}
diff --git a/block/dmg.c b/block/dmg.c
index e455886..b0ed89b 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -21,11 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "qemu/bswap.h"
+#include "qemu/error-report.h"
#include "qemu/module.h"
#include <zlib.h>
+#ifdef CONFIG_BZIP2
+#include <bzlib.h>
+#endif
enum {
/* Limit chunk sizes to prevent unreasonable amounts of memory being used
@@ -55,6 +61,9 @@
uint8_t *compressed_chunk;
uint8_t *uncompressed_chunk;
z_stream zstream;
+#ifdef CONFIG_BZIP2
+ bz_stream bzstream;
+#endif
} BDRVDMGState;
static int dmg_probe(const uint8_t *buf, int buf_size, const char *filename)
@@ -100,6 +109,16 @@
return 0;
}
+static inline uint64_t buff_read_uint64(const uint8_t *buffer, int64_t offset)
+{
+ return be64_to_cpu(*(uint64_t *)&buffer[offset]);
+}
+
+static inline uint32_t buff_read_uint32(const uint8_t *buffer, int64_t offset)
+{
+ return be32_to_cpu(*(uint32_t *)&buffer[offset]);
+}
+
/* Increase max chunk sizes, if necessary. This function is used to calculate
* the buffer sizes needed for compressed/uncompressed chunk I/O.
*/
@@ -112,6 +131,7 @@
switch (s->types[chunk]) {
case 0x80000005: /* zlib compressed */
+ case 0x80000006: /* bzip2 compressed */
compressed_size = s->lengths[chunk];
uncompressed_sectors = s->sectorcounts[chunk];
break;
@@ -119,7 +139,9 @@
uncompressed_sectors = (s->lengths[chunk] + 511) / 512;
break;
case 2: /* zero */
- uncompressed_sectors = s->sectorcounts[chunk];
+ /* as the all-zeroes block may be large, it is treated specially: the
+ * sector is not copied from a large buffer, a simple memset is used
+ * instead. Therefore uncompressed_sectors does not need to be set. */
break;
}
@@ -131,163 +153,374 @@
}
}
-static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
+static int64_t dmg_find_koly_offset(BdrvChild *file, Error **errp)
+{
+ BlockDriverState *file_bs = file->bs;
+ int64_t length;
+ int64_t offset = 0;
+ uint8_t buffer[515];
+ int i, ret;
+
+ /* bdrv_getlength returns a multiple of block size (512), rounded up. Since
+ * dmg images can have odd sizes, try to look for the "koly" magic which
+ * marks the begin of the UDIF trailer (512 bytes). This magic can be found
+ * in the last 511 bytes of the second-last sector or the first 4 bytes of
+ * the last sector (search space: 515 bytes) */
+ length = bdrv_getlength(file_bs);
+ if (length < 0) {
+ error_setg_errno(errp, -length,
+ "Failed to get file size while reading UDIF trailer");
+ return length;
+ } else if (length < 512) {
+ error_setg(errp, "dmg file must be at least 512 bytes long");
+ return -EINVAL;
+ }
+ if (length > 511 + 512) {
+ offset = length - 511 - 512;
+ }
+ length = length < 515 ? length : 515;
+ ret = bdrv_pread(file, offset, buffer, length);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed while reading UDIF trailer");
+ return ret;
+ }
+ for (i = 0; i < length - 3; i++) {
+ if (buffer[i] == 'k' && buffer[i+1] == 'o' &&
+ buffer[i+2] == 'l' && buffer[i+3] == 'y') {
+ return offset + i;
+ }
+ }
+ error_setg(errp, "Could not locate UDIF trailer in dmg file");
+ return -EINVAL;
+}
+
+/* used when building the sector table */
+typedef struct DmgHeaderState {
+ /* used internally by dmg_read_mish_block to remember offsets of blocks
+ * across calls */
+ uint64_t data_fork_offset;
+ /* exported for dmg_open */
+ uint32_t max_compressed_size;
+ uint32_t max_sectors_per_chunk;
+} DmgHeaderState;
+
+static bool dmg_is_known_block_type(uint32_t entry_type)
+{
+ switch (entry_type) {
+ case 0x00000001: /* uncompressed */
+ case 0x00000002: /* zeroes */
+ case 0x80000005: /* zlib */
+#ifdef CONFIG_BZIP2
+ case 0x80000006: /* bzip2 */
+#endif
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int dmg_read_mish_block(BDRVDMGState *s, DmgHeaderState *ds,
+ uint8_t *buffer, uint32_t count)
+{
+ uint32_t type, i;
+ int ret;
+ size_t new_size;
+ uint32_t chunk_count;
+ int64_t offset = 0;
+ uint64_t data_offset;
+ uint64_t in_offset = ds->data_fork_offset;
+ uint64_t out_offset;
+
+ type = buff_read_uint32(buffer, offset);
+ /* skip data that is not a valid MISH block (invalid magic or too small) */
+ if (type != 0x6d697368 || count < 244) {
+ /* assume success for now */
+ return 0;
+ }
+
+ /* chunk offsets are relative to this sector number */
+ out_offset = buff_read_uint64(buffer, offset + 8);
+
+ /* location in data fork for (compressed) blob (in bytes) */
+ data_offset = buff_read_uint64(buffer, offset + 0x18);
+ in_offset += data_offset;
+
+ /* move to begin of chunk entries */
+ offset += 204;
+
+ chunk_count = (count - 204) / 40;
+ new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
+ s->types = g_realloc(s->types, new_size / 2);
+ s->offsets = g_realloc(s->offsets, new_size);
+ s->lengths = g_realloc(s->lengths, new_size);
+ s->sectors = g_realloc(s->sectors, new_size);
+ s->sectorcounts = g_realloc(s->sectorcounts, new_size);
+
+ for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
+ s->types[i] = buff_read_uint32(buffer, offset);
+ if (!dmg_is_known_block_type(s->types[i])) {
+ chunk_count--;
+ i--;
+ offset += 40;
+ continue;
+ }
+
+ /* sector number */
+ s->sectors[i] = buff_read_uint64(buffer, offset + 8);
+ s->sectors[i] += out_offset;
+
+ /* sector count */
+ s->sectorcounts[i] = buff_read_uint64(buffer, offset + 0x10);
+
+ /* all-zeroes sector (type 2) does not need to be "uncompressed" and can
+ * therefore be unbounded. */
+ if (s->types[i] != 2 && s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
+ error_report("sector count %" PRIu64 " for chunk %" PRIu32
+ " is larger than max (%u)",
+ s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* offset in (compressed) data fork */
+ s->offsets[i] = buff_read_uint64(buffer, offset + 0x18);
+ s->offsets[i] += in_offset;
+
+ /* length in (compressed) data fork */
+ s->lengths[i] = buff_read_uint64(buffer, offset + 0x20);
+
+ if (s->lengths[i] > DMG_LENGTHS_MAX) {
+ error_report("length %" PRIu64 " for chunk %" PRIu32
+ " is larger than max (%u)",
+ s->lengths[i], i, DMG_LENGTHS_MAX);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ update_max_chunk_size(s, i, &ds->max_compressed_size,
+ &ds->max_sectors_per_chunk);
+ offset += 40;
+ }
+ s->n_chunks += chunk_count;
+ return 0;
+
+fail:
+ return ret;
+}
+
+static int dmg_read_resource_fork(BlockDriverState *bs, DmgHeaderState *ds,
+ uint64_t info_begin, uint64_t info_length)
{
BDRVDMGState *s = bs->opaque;
- uint64_t info_begin, info_end, last_in_offset, last_out_offset;
- uint32_t count, tmp;
- uint32_t max_compressed_size = 1, max_sectors_per_chunk = 1, i;
- int64_t offset;
int ret;
+ uint32_t count, rsrc_data_offset;
+ uint8_t *buffer = NULL;
+ uint64_t info_end;
+ uint64_t offset;
- bs->read_only = 1;
- s->n_chunks = 0;
- s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
-
- /* read offset of info blocks */
- offset = bdrv_getlength(bs->file);
- if (offset < 0) {
- ret = offset;
- goto fail;
- }
- offset -= 0x1d8;
-
- ret = read_uint64(bs, offset, &info_begin);
+ /* read offset from begin of resource fork (info_begin) to resource data */
+ ret = read_uint32(bs, info_begin, &rsrc_data_offset);
if (ret < 0) {
goto fail;
- } else if (info_begin == 0) {
+ } else if (rsrc_data_offset > info_length) {
ret = -EINVAL;
goto fail;
}
- ret = read_uint32(bs, info_begin, &tmp);
+ /* read length of resource data */
+ ret = read_uint32(bs, info_begin + 8, &count);
if (ret < 0) {
goto fail;
- } else if (tmp != 0x100) {
+ } else if (count == 0 || rsrc_data_offset + count > info_length) {
ret = -EINVAL;
goto fail;
}
- ret = read_uint32(bs, info_begin + 4, &count);
- if (ret < 0) {
- goto fail;
- } else if (count == 0) {
- ret = -EINVAL;
- goto fail;
- }
- info_end = info_begin + count;
+ /* begin of resource data (consisting of one or more resources) */
+ offset = info_begin + rsrc_data_offset;
- offset = info_begin + 0x100;
+ /* end of resource data (there is possibly a following resource map
+ * which will be ignored). */
+ info_end = offset + count;
- /* read offsets */
- last_in_offset = last_out_offset = 0;
+ /* read offsets (mish blocks) from one or more resources in resource data */
while (offset < info_end) {
- uint32_t type;
-
+ /* size of following resource */
ret = read_uint32(bs, offset, &count);
if (ret < 0) {
goto fail;
- } else if (count == 0) {
+ } else if (count == 0 || count > info_end - offset) {
ret = -EINVAL;
goto fail;
}
offset += 4;
- ret = read_uint32(bs, offset, &type);
+ buffer = g_realloc(buffer, count);
+ ret = bdrv_pread(bs->file, offset, buffer, count);
if (ret < 0) {
goto fail;
}
- if (type == 0x6d697368 && count >= 244) {
- size_t new_size;
- uint32_t chunk_count;
-
- offset += 4;
- offset += 200;
-
- chunk_count = (count - 204) / 40;
- new_size = sizeof(uint64_t) * (s->n_chunks + chunk_count);
- s->types = g_realloc(s->types, new_size / 2);
- s->offsets = g_realloc(s->offsets, new_size);
- s->lengths = g_realloc(s->lengths, new_size);
- s->sectors = g_realloc(s->sectors, new_size);
- s->sectorcounts = g_realloc(s->sectorcounts, new_size);
-
- for (i = s->n_chunks; i < s->n_chunks + chunk_count; i++) {
- ret = read_uint32(bs, offset, &s->types[i]);
- if (ret < 0) {
- goto fail;
- }
- offset += 4;
- if (s->types[i] != 0x80000005 && s->types[i] != 1 &&
- s->types[i] != 2) {
- if (s->types[i] == 0xffffffff && i > 0) {
- last_in_offset = s->offsets[i - 1] + s->lengths[i - 1];
- last_out_offset = s->sectors[i - 1] +
- s->sectorcounts[i - 1];
- }
- chunk_count--;
- i--;
- offset += 36;
- continue;
- }
- offset += 4;
-
- ret = read_uint64(bs, offset, &s->sectors[i]);
- if (ret < 0) {
- goto fail;
- }
- s->sectors[i] += last_out_offset;
- offset += 8;
-
- ret = read_uint64(bs, offset, &s->sectorcounts[i]);
- if (ret < 0) {
- goto fail;
- }
- offset += 8;
-
- if (s->sectorcounts[i] > DMG_SECTORCOUNTS_MAX) {
- error_report("sector count %" PRIu64 " for chunk %" PRIu32
- " is larger than max (%u)",
- s->sectorcounts[i], i, DMG_SECTORCOUNTS_MAX);
- ret = -EINVAL;
- goto fail;
- }
-
- ret = read_uint64(bs, offset, &s->offsets[i]);
- if (ret < 0) {
- goto fail;
- }
- s->offsets[i] += last_in_offset;
- offset += 8;
-
- ret = read_uint64(bs, offset, &s->lengths[i]);
- if (ret < 0) {
- goto fail;
- }
- offset += 8;
-
- if (s->lengths[i] > DMG_LENGTHS_MAX) {
- error_report("length %" PRIu64 " for chunk %" PRIu32
- " is larger than max (%u)",
- s->lengths[i], i, DMG_LENGTHS_MAX);
- ret = -EINVAL;
- goto fail;
- }
-
- update_max_chunk_size(s, i, &max_compressed_size,
- &max_sectors_per_chunk);
- }
- s->n_chunks += chunk_count;
+ ret = dmg_read_mish_block(s, ds, buffer, count);
+ if (ret < 0) {
+ goto fail;
}
+ /* advance offset by size of resource */
+ offset += count;
+ }
+ ret = 0;
+
+fail:
+ g_free(buffer);
+ return ret;
+}
+
+static int dmg_read_plist_xml(BlockDriverState *bs, DmgHeaderState *ds,
+ uint64_t info_begin, uint64_t info_length)
+{
+ BDRVDMGState *s = bs->opaque;
+ int ret;
+ uint8_t *buffer = NULL;
+ char *data_begin, *data_end;
+
+ /* Have at least some length to avoid NULL for g_malloc. Attempt to set a
+ * safe upper cap on the data length. A test sample had a XML length of
+ * about 1 MiB. */
+ if (info_length == 0 || info_length > 16 * 1024 * 1024) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ buffer = g_malloc(info_length + 1);
+ buffer[info_length] = '\0';
+ ret = bdrv_pread(bs->file, info_begin, buffer, info_length);
+ if (ret != info_length) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* look for <data>...</data>. The data is 284 (0x11c) bytes after base64
+ * decode. The actual data element has 431 (0x1af) bytes which includes tabs
+ * and line feeds. */
+ data_end = (char *)buffer;
+ while ((data_begin = strstr(data_end, "<data>")) != NULL) {
+ guchar *mish;
+ gsize out_len = 0;
+
+ data_begin += 6;
+ data_end = strstr(data_begin, "</data>");
+ /* malformed XML? */
+ if (data_end == NULL) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ *data_end++ = '\0';
+ mish = g_base64_decode(data_begin, &out_len);
+ ret = dmg_read_mish_block(s, ds, mish, (uint32_t)out_len);
+ g_free(mish);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+ ret = 0;
+
+fail:
+ g_free(buffer);
+ return ret;
+}
+
+static int dmg_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ BDRVDMGState *s = bs->opaque;
+ DmgHeaderState ds;
+ uint64_t rsrc_fork_offset, rsrc_fork_length;
+ uint64_t plist_xml_offset, plist_xml_length;
+ int64_t offset;
+ int ret;
+
+ bs->read_only = true;
+
+ s->n_chunks = 0;
+ s->offsets = s->lengths = s->sectors = s->sectorcounts = NULL;
+ /* used by dmg_read_mish_block to keep track of the current I/O position */
+ ds.data_fork_offset = 0;
+ ds.max_compressed_size = 1;
+ ds.max_sectors_per_chunk = 1;
+
+ /* locate the UDIF trailer */
+ offset = dmg_find_koly_offset(bs->file, errp);
+ if (offset < 0) {
+ ret = offset;
+ goto fail;
+ }
+
+ /* offset of data fork (DataForkOffset) */
+ ret = read_uint64(bs, offset + 0x18, &ds.data_fork_offset);
+ if (ret < 0) {
+ goto fail;
+ } else if (ds.data_fork_offset > offset) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* offset of resource fork (RsrcForkOffset) */
+ ret = read_uint64(bs, offset + 0x28, &rsrc_fork_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+ ret = read_uint64(bs, offset + 0x30, &rsrc_fork_length);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (rsrc_fork_offset >= offset ||
+ rsrc_fork_length > offset - rsrc_fork_offset) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ /* offset of property list (XMLOffset) */
+ ret = read_uint64(bs, offset + 0xd8, &plist_xml_offset);
+ if (ret < 0) {
+ goto fail;
+ }
+ ret = read_uint64(bs, offset + 0xe0, &plist_xml_length);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (plist_xml_offset >= offset ||
+ plist_xml_length > offset - plist_xml_offset) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ ret = read_uint64(bs, offset + 0x1ec, (uint64_t *)&bs->total_sectors);
+ if (ret < 0) {
+ goto fail;
+ }
+ if (bs->total_sectors < 0) {
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (rsrc_fork_length != 0) {
+ ret = dmg_read_resource_fork(bs, &ds,
+ rsrc_fork_offset, rsrc_fork_length);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else if (plist_xml_length != 0) {
+ ret = dmg_read_plist_xml(bs, &ds, plist_xml_offset, plist_xml_length);
+ if (ret < 0) {
+ goto fail;
+ }
+ } else {
+ ret = -EINVAL;
+ goto fail;
}
/* initialize zlib engine */
- s->compressed_chunk = qemu_try_blockalign(bs->file,
- max_compressed_size + 1);
- s->uncompressed_chunk = qemu_try_blockalign(bs->file,
- 512 * max_sectors_per_chunk);
+ s->compressed_chunk = qemu_try_blockalign(bs->file->bs,
+ ds.max_compressed_size + 1);
+ s->uncompressed_chunk = qemu_try_blockalign(bs->file->bs,
+ 512 * ds.max_sectors_per_chunk);
if (s->compressed_chunk == NULL || s->uncompressed_chunk == NULL) {
ret = -ENOMEM;
goto fail;
@@ -314,6 +547,11 @@
return ret;
}
+static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static inline int is_sector_in_chunk(BDRVDMGState* s,
uint32_t chunk_num, uint64_t sector_num)
{
@@ -349,13 +587,16 @@
if (!is_sector_in_chunk(s, s->current_chunk, sector_num)) {
int ret;
uint32_t chunk = search_chunk(s, sector_num);
+#ifdef CONFIG_BZIP2
+ uint64_t total_out;
+#endif
if (chunk >= s->n_chunks) {
return -1;
}
s->current_chunk = s->n_chunks;
- switch (s->types[chunk]) {
+ switch (s->types[chunk]) { /* block entry type */
case 0x80000005: { /* zlib compressed */
/* we need to buffer, because only the chunk as whole can be
* inflated. */
@@ -379,6 +620,34 @@
return -1;
}
break; }
+#ifdef CONFIG_BZIP2
+ case 0x80000006: /* bzip2 compressed */
+ /* we need to buffer, because only the chunk as whole can be
+ * inflated. */
+ ret = bdrv_pread(bs->file, s->offsets[chunk],
+ s->compressed_chunk, s->lengths[chunk]);
+ if (ret != s->lengths[chunk]) {
+ return -1;
+ }
+
+ ret = BZ2_bzDecompressInit(&s->bzstream, 0, 0);
+ if (ret != BZ_OK) {
+ return -1;
+ }
+ s->bzstream.next_in = (char *)s->compressed_chunk;
+ s->bzstream.avail_in = (unsigned int) s->lengths[chunk];
+ s->bzstream.next_out = (char *)s->uncompressed_chunk;
+ s->bzstream.avail_out = (unsigned int) 512 * s->sectorcounts[chunk];
+ ret = BZ2_bzDecompress(&s->bzstream);
+ total_out = ((uint64_t)s->bzstream.total_out_hi32 << 32) +
+ s->bzstream.total_out_lo32;
+ BZ2_bzDecompressEnd(&s->bzstream);
+ if (ret != BZ_STREAM_END ||
+ total_out != 512 * s->sectorcounts[chunk]) {
+ return -1;
+ }
+ break;
+#endif /* CONFIG_BZIP2 */
case 1: /* copy */
ret = bdrv_pread(bs->file, s->offsets[chunk],
s->uncompressed_chunk, s->lengths[chunk]);
@@ -387,7 +656,8 @@
}
break;
case 2: /* zero */
- memset(s->uncompressed_chunk, 0, 512 * s->sectorcounts[chunk]);
+ /* see dmg_read, it is treated specially. No buffer needs to be
+ * pre-filled, the zeroes can be set directly. */
break;
}
s->current_chunk = chunk;
@@ -395,31 +665,42 @@
return 0;
}
-static int dmg_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+dmg_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVDMGState *s = bs->opaque;
- int i;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ int ret, i;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ qemu_co_mutex_lock(&s->lock);
for (i = 0; i < nb_sectors; i++) {
uint32_t sector_offset_in_chunk;
+ void *data;
+
if (dmg_read_chunk(bs, sector_num + i) != 0) {
- return -1;
+ ret = -EIO;
+ goto fail;
+ }
+ /* Special case: current chunk is all zeroes. Do not perform a memcpy as
+ * s->uncompressed_chunk may be too small to cover the large all-zeroes
+ * section. dmg_read_chunk is called to find s->current_chunk */
+ if (s->types[s->current_chunk] == 2) { /* all zeroes block entry */
+ qemu_iovec_memset(qiov, i * 512, 0, 512);
+ continue;
}
sector_offset_in_chunk = sector_num + i - s->sectors[s->current_chunk];
- memcpy(buf + i * 512,
- s->uncompressed_chunk + sector_offset_in_chunk * 512, 512);
+ data = s->uncompressed_chunk + sector_offset_in_chunk * 512;
+ qemu_iovec_from_buf(qiov, i * 512, data, 512);
}
- return 0;
-}
-static coroutine_fn int dmg_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVDMGState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = dmg_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
return ret;
}
@@ -444,7 +725,8 @@
.instance_size = sizeof(BDRVDMGState),
.bdrv_probe = dmg_probe,
.bdrv_open = dmg_open,
- .bdrv_read = dmg_co_read,
+ .bdrv_refresh_limits = dmg_refresh_limits,
+ .bdrv_co_preadv = dmg_co_preadv,
.bdrv_close = dmg_close,
};
diff --git a/block/gluster.c b/block/gluster.c
index 1eb3a8c..01b479f 100644
--- a/block/gluster.c
+++ b/block/gluster.c
@@ -7,9 +7,31 @@
* See the COPYING file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
#include <glusterfs/api/glfs.h>
#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
#include "qemu/uri.h"
+#include "qemu/error-report.h"
+
+#define GLUSTER_OPT_FILENAME "filename"
+#define GLUSTER_OPT_VOLUME "volume"
+#define GLUSTER_OPT_PATH "path"
+#define GLUSTER_OPT_TYPE "type"
+#define GLUSTER_OPT_SERVER_PATTERN "server."
+#define GLUSTER_OPT_HOST "host"
+#define GLUSTER_OPT_PORT "port"
+#define GLUSTER_OPT_TO "to"
+#define GLUSTER_OPT_IPV4 "ipv4"
+#define GLUSTER_OPT_IPV6 "ipv6"
+#define GLUSTER_OPT_SOCKET "socket"
+#define GLUSTER_OPT_DEBUG "debug"
+#define GLUSTER_DEFAULT_PORT 24007
+#define GLUSTER_DEBUG_DEFAULT 4
+#define GLUSTER_DEBUG_MAX 9
+
+#define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n"
typedef struct GlusterAIOCB {
int64_t size;
@@ -22,28 +44,145 @@
typedef struct BDRVGlusterState {
struct glfs *glfs;
struct glfs_fd *fd;
+ bool supports_seek_data;
+ int debug_level;
} BDRVGlusterState;
-typedef struct GlusterConf {
- char *server;
- int port;
- char *volname;
- char *image;
- char *transport;
-} GlusterConf;
+typedef struct BDRVGlusterReopenState {
+ struct glfs *glfs;
+ struct glfs_fd *fd;
+} BDRVGlusterReopenState;
-static void qemu_gluster_gconf_free(GlusterConf *gconf)
-{
- if (gconf) {
- g_free(gconf->server);
- g_free(gconf->volname);
- g_free(gconf->image);
- g_free(gconf->transport);
- g_free(gconf);
+
+static QemuOptsList qemu_gluster_create_opts = {
+ .name = "qemu-gluster-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ {
+ .name = BLOCK_OPT_PREALLOC,
+ .type = QEMU_OPT_STRING,
+ .help = "Preallocation mode (allowed values: off, full)"
+ },
+ {
+ .name = GLUSTER_OPT_DEBUG,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Gluster log level, valid range is 0-9",
+ },
+ { /* end of list */ }
}
-}
+};
-static int parse_volume_options(GlusterConf *gconf, char *path)
+static QemuOptsList runtime_opts = {
+ .name = "gluster",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_FILENAME,
+ .type = QEMU_OPT_STRING,
+ .help = "URL to the gluster image",
+ },
+ {
+ .name = GLUSTER_OPT_DEBUG,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Gluster log level, valid range is 0-9",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_json_opts = {
+ .name = "gluster_json",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_json_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_VOLUME,
+ .type = QEMU_OPT_STRING,
+ .help = "name of gluster volume where VM image resides",
+ },
+ {
+ .name = GLUSTER_OPT_PATH,
+ .type = QEMU_OPT_STRING,
+ .help = "absolute path to image file in gluster volume",
+ },
+ {
+ .name = GLUSTER_OPT_DEBUG,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Gluster log level, valid range is 0-9",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_type_opts = {
+ .name = "gluster_type",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_type_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_TYPE,
+ .type = QEMU_OPT_STRING,
+ .help = "tcp|unix",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_unix_opts = {
+ .name = "gluster_unix",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_unix_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_SOCKET,
+ .type = QEMU_OPT_STRING,
+ .help = "socket file path)",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList runtime_tcp_opts = {
+ .name = "gluster_tcp",
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_tcp_opts.head),
+ .desc = {
+ {
+ .name = GLUSTER_OPT_TYPE,
+ .type = QEMU_OPT_STRING,
+ .help = "tcp|unix",
+ },
+ {
+ .name = GLUSTER_OPT_HOST,
+ .type = QEMU_OPT_STRING,
+ .help = "host address (hostname/ipv4/ipv6 addresses)",
+ },
+ {
+ .name = GLUSTER_OPT_PORT,
+ .type = QEMU_OPT_NUMBER,
+ .help = "port number on which glusterd is listening (default 24007)",
+ },
+ {
+ .name = "to",
+ .type = QEMU_OPT_NUMBER,
+ .help = "max port number, not supported by gluster",
+ },
+ {
+ .name = "ipv4",
+ .type = QEMU_OPT_BOOL,
+ .help = "ipv4 bool value, not supported by gluster",
+ },
+ {
+ .name = "ipv6",
+ .type = QEMU_OPT_BOOL,
+ .help = "ipv6 bool value, not supported by gluster",
+ },
+ { /* end of list */ }
+ },
+};
+
+static int parse_volume_options(BlockdevOptionsGluster *gconf, char *path)
{
char *p, *q;
@@ -57,31 +196,29 @@
if (*p == '\0') {
return -EINVAL;
}
- gconf->volname = g_strndup(q, p - q);
+ gconf->volume = g_strndup(q, p - q);
- /* image */
+ /* path */
p += strspn(p, "/");
if (*p == '\0') {
return -EINVAL;
}
- gconf->image = g_strdup(p);
+ gconf->path = g_strdup(p);
return 0;
}
/*
- * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ * file=gluster[+transport]://[host[:port]]/volume/path[?socket=...]
*
* 'gluster' is the protocol.
*
* 'transport' specifies the transport type used to connect to gluster
* management daemon (glusterd). Valid transport types are
- * tcp, unix and rdma. If a transport type isn't specified, then tcp
- * type is assumed.
+ * tcp or unix. If a transport type isn't specified, then tcp type is assumed.
*
- * 'server' specifies the server where the volume file specification for
- * the given volume resides. This can be either hostname, ipv4 address
- * or ipv6 address. ipv6 address needs to be within square brackets [ ].
- * If transport type is 'unix', then 'server' field should not be specified.
+ * 'host' specifies the host where the volume file specification for
+ * the given volume resides. This can be either hostname or ipv4 address.
+ * If transport type is 'unix', then 'host' field should not be specified.
* The 'socket' field needs to be populated with the path to unix domain
* socket.
*
@@ -90,23 +227,22 @@
* default port. If the transport type is unix, then 'port' should not be
* specified.
*
- * 'volname' is the name of the gluster volume which contains the VM image.
+ * 'volume' is the name of the gluster volume which contains the VM image.
*
- * 'image' is the path to the actual VM image that resides on gluster volume.
+ * 'path' is the path to the actual VM image that resides on gluster volume.
*
* Examples:
*
* file=gluster://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4/testvol/a.img
* file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
- * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
- * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+tcp://host.domain.com:24007/testvol/dir/a.img
* file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
- * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
*/
-static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+static int qemu_gluster_parse_uri(BlockdevOptionsGluster *gconf,
+ const char *filename)
{
+ GlusterServer *gsconf;
URI *uri;
QueryParams *qp = NULL;
bool is_unix = false;
@@ -117,16 +253,21 @@
return -EINVAL;
}
+ gconf->server = g_new0(GlusterServerList, 1);
+ gconf->server->value = gsconf = g_new0(GlusterServer, 1);
+
/* transport */
if (!uri->scheme || !strcmp(uri->scheme, "gluster")) {
- gconf->transport = g_strdup("tcp");
+ gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+tcp")) {
- gconf->transport = g_strdup("tcp");
+ gsconf->type = GLUSTER_TRANSPORT_TCP;
} else if (!strcmp(uri->scheme, "gluster+unix")) {
- gconf->transport = g_strdup("unix");
+ gsconf->type = GLUSTER_TRANSPORT_UNIX;
is_unix = true;
} else if (!strcmp(uri->scheme, "gluster+rdma")) {
- gconf->transport = g_strdup("rdma");
+ gsconf->type = GLUSTER_TRANSPORT_TCP;
+ error_report("Warning: rdma feature is not supported, falling "
+ "back to tcp");
} else {
ret = -EINVAL;
goto out;
@@ -152,10 +293,14 @@
ret = -EINVAL;
goto out;
}
- gconf->server = g_strdup(qp->p[0].value);
+ gsconf->u.q_unix.path = g_strdup(qp->p[0].value);
} else {
- gconf->server = g_strdup(uri->server ? uri->server : "localhost");
- gconf->port = uri->port;
+ gsconf->u.tcp.host = g_strdup(uri->server ? uri->server : "localhost");
+ if (uri->port) {
+ gsconf->u.tcp.port = g_strdup_printf("%d", uri->port);
+ } else {
+ gsconf->u.tcp.port = g_strdup_printf("%d", GLUSTER_DEFAULT_PORT);
+ }
}
out:
@@ -166,52 +311,62 @@
return ret;
}
-static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename,
- Error **errp)
+static struct glfs *qemu_gluster_glfs_init(BlockdevOptionsGluster *gconf,
+ Error **errp)
{
- struct glfs *glfs = NULL;
+ struct glfs *glfs;
int ret;
int old_errno;
+ GlusterServerList *server;
- ret = qemu_gluster_parseuri(gconf, filename);
- if (ret < 0) {
- error_setg(errp, "Usage: file=gluster[+transport]://[server[:port]]/"
- "volname/image[?socket=...]");
- errno = -ret;
- goto out;
- }
-
- glfs = glfs_new(gconf->volname);
+ glfs = glfs_new(gconf->volume);
if (!glfs) {
goto out;
}
- ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
- gconf->port);
- if (ret < 0) {
- goto out;
+ for (server = gconf->server; server; server = server->next) {
+ if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
+ ret = glfs_set_volfile_server(glfs,
+ GlusterTransport_lookup[server->value->type],
+ server->value->u.q_unix.path, 0);
+ } else {
+ ret = glfs_set_volfile_server(glfs,
+ GlusterTransport_lookup[server->value->type],
+ server->value->u.tcp.host,
+ atoi(server->value->u.tcp.port));
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
}
- /*
- * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
- * GlusterFS makes GF_LOG_* macros available to libgfapi users.
- */
- ret = glfs_set_logging(glfs, "-", 4);
+ ret = glfs_set_logging(glfs, "-", gconf->debug_level);
if (ret < 0) {
goto out;
}
ret = glfs_init(glfs);
if (ret) {
- error_setg_errno(errp, errno,
- "Gluster connection failed for server=%s port=%d "
- "volume=%s image=%s transport=%s", gconf->server,
- gconf->port, gconf->volname, gconf->image,
- gconf->transport);
+ error_setg(errp, "Gluster connection for volume %s, path %s failed"
+ " to connect", gconf->volume, gconf->path);
+ for (server = gconf->server; server; server = server->next) {
+ if (server->value->type == GLUSTER_TRANSPORT_UNIX) {
+ error_append_hint(errp, "hint: failed on socket %s ",
+ server->value->u.q_unix.path);
+ } else {
+ error_append_hint(errp, "hint: failed on host %s and port %s ",
+ server->value->u.tcp.host,
+ server->value->u.tcp.port);
+ }
+ }
+
+ error_append_hint(errp, "Please refer to gluster logs for more info\n");
/* glfs_init sometimes doesn't set errno although docs suggest that */
- if (errno == 0)
+ if (errno == 0) {
errno = EINVAL;
+ }
goto out;
}
@@ -226,13 +381,233 @@
return NULL;
}
+static int qapi_enum_parse(const char *opt)
+{
+ int i;
+
+ if (!opt) {
+ return GLUSTER_TRANSPORT__MAX;
+ }
+
+ for (i = 0; i < GLUSTER_TRANSPORT__MAX; i++) {
+ if (!strcmp(opt, GlusterTransport_lookup[i])) {
+ return i;
+ }
+ }
+
+ return i;
+}
+
+/*
+ * Convert the json formatted command line into qapi.
+*/
+static int qemu_gluster_parse_json(BlockdevOptionsGluster *gconf,
+ QDict *options, Error **errp)
+{
+ QemuOpts *opts;
+ GlusterServer *gsconf;
+ GlusterServerList *curr = NULL;
+ QDict *backing_options = NULL;
+ Error *local_err = NULL;
+ char *str = NULL;
+ const char *ptr;
+ size_t num_servers;
+ int i;
+
+ /* create opts info from runtime_json_opts list */
+ opts = qemu_opts_create(&runtime_json_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ num_servers = qdict_array_entries(options, GLUSTER_OPT_SERVER_PATTERN);
+ if (num_servers < 1) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, "server");
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_VOLUME);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_VOLUME);
+ goto out;
+ }
+ gconf->volume = g_strdup(ptr);
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_PATH);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_PATH);
+ goto out;
+ }
+ gconf->path = g_strdup(ptr);
+ qemu_opts_del(opts);
+
+ for (i = 0; i < num_servers; i++) {
+ str = g_strdup_printf(GLUSTER_OPT_SERVER_PATTERN"%d.", i);
+ qdict_extract_subqdict(options, &backing_options, str);
+
+ /* create opts info from runtime_type_opts list */
+ opts = qemu_opts_create(&runtime_type_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_TYPE);
+ gsconf = g_new0(GlusterServer, 1);
+ gsconf->type = qapi_enum_parse(ptr);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER, GLUSTER_OPT_TYPE);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+
+ }
+ if (gsconf->type == GLUSTER_TRANSPORT__MAX) {
+ error_setg(&local_err, QERR_INVALID_PARAMETER_VALUE,
+ GLUSTER_OPT_TYPE, "tcp or unix");
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ qemu_opts_del(opts);
+
+ if (gsconf->type == GLUSTER_TRANSPORT_TCP) {
+ /* create opts info from runtime_tcp_opts list */
+ opts = qemu_opts_create(&runtime_tcp_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_HOST);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
+ GLUSTER_OPT_HOST);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ gsconf->u.tcp.host = g_strdup(ptr);
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_PORT);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
+ GLUSTER_OPT_PORT);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ gsconf->u.tcp.port = g_strdup(ptr);
+
+ /* defend for unsupported fields in InetSocketAddress,
+ * i.e. @ipv4, @ipv6 and @to
+ */
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_TO);
+ if (ptr) {
+ gsconf->u.tcp.has_to = true;
+ }
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV4);
+ if (ptr) {
+ gsconf->u.tcp.has_ipv4 = true;
+ }
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_IPV6);
+ if (ptr) {
+ gsconf->u.tcp.has_ipv6 = true;
+ }
+ if (gsconf->u.tcp.has_to) {
+ error_setg(&local_err, "Parameter 'to' not supported");
+ goto out;
+ }
+ if (gsconf->u.tcp.has_ipv4 || gsconf->u.tcp.has_ipv6) {
+ error_setg(&local_err, "Parameters 'ipv4/ipv6' not supported");
+ goto out;
+ }
+ qemu_opts_del(opts);
+ } else {
+ /* create opts info from runtime_unix_opts list */
+ opts = qemu_opts_create(&runtime_unix_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, backing_options, &local_err);
+ if (local_err) {
+ goto out;
+ }
+
+ ptr = qemu_opt_get(opts, GLUSTER_OPT_SOCKET);
+ if (!ptr) {
+ error_setg(&local_err, QERR_MISSING_PARAMETER,
+ GLUSTER_OPT_SOCKET);
+ error_append_hint(&local_err, GERR_INDEX_HINT, i);
+ goto out;
+ }
+ gsconf->u.q_unix.path = g_strdup(ptr);
+ qemu_opts_del(opts);
+ }
+
+ if (gconf->server == NULL) {
+ gconf->server = g_new0(GlusterServerList, 1);
+ gconf->server->value = gsconf;
+ curr = gconf->server;
+ } else {
+ curr->next = g_new0(GlusterServerList, 1);
+ curr->next->value = gsconf;
+ curr = curr->next;
+ }
+
+ qdict_del(backing_options, str);
+ g_free(str);
+ str = NULL;
+ }
+
+ return 0;
+
+out:
+ error_propagate(errp, local_err);
+ qemu_opts_del(opts);
+ if (str) {
+ qdict_del(backing_options, str);
+ g_free(str);
+ }
+ errno = EINVAL;
+ return -errno;
+}
+
+static struct glfs *qemu_gluster_init(BlockdevOptionsGluster *gconf,
+ const char *filename,
+ QDict *options, Error **errp)
+{
+ int ret;
+ if (filename) {
+ ret = qemu_gluster_parse_uri(gconf, filename);
+ if (ret < 0) {
+ error_setg(errp, "invalid URI");
+ error_append_hint(errp, "Usage: file=gluster[+transport]://"
+ "[host[:port]]/volume/path[?socket=...]\n");
+ errno = -ret;
+ return NULL;
+ }
+ } else {
+ ret = qemu_gluster_parse_json(gconf, options, errp);
+ if (ret < 0) {
+ error_append_hint(errp, "Usage: "
+ "-drive driver=qcow2,file.driver=gluster,"
+ "file.volume=testvol,file.path=/path/a.qcow2"
+ "[,file.debug=9],file.server.0.type=tcp,"
+ "file.server.0.host=1.2.3.4,"
+ "file.server.0.port=24007,"
+ "file.server.1.transport=unix,"
+ "file.server.1.socket=/var/run/glusterd.socket ..."
+ "\n");
+ errno = -ret;
+ return NULL;
+ }
+
+ }
+
+ return qemu_gluster_glfs_init(gconf, errp);
+}
+
static void qemu_gluster_complete_aio(void *opaque)
{
GlusterAIOCB *acb = (GlusterAIOCB *)opaque;
qemu_bh_delete(acb->bh);
acb->bh = NULL;
- qemu_coroutine_enter(acb->coroutine, NULL);
+ qemu_coroutine_enter(acb->coroutine);
}
/*
@@ -245,7 +620,7 @@
if (!ret || ret == acb->size) {
acb->ret = 0; /* Success */
} else if (ret < 0) {
- acb->ret = ret; /* Read/Write failed */
+ acb->ret = -errno; /* Read/Write failed */
} else {
acb->ret = -EIO; /* Partial read/write - fail it */
}
@@ -254,20 +629,6 @@
qemu_bh_schedule(acb->bh);
}
-/* TODO Convert to fine grained options */
-static QemuOptsList runtime_opts = {
- .name = "gluster",
- .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
- .desc = {
- {
- .name = "filename",
- .type = QEMU_OPT_STRING,
- .help = "URL to the gluster image",
- },
- { /* end of list */ }
- },
-};
-
static void qemu_gluster_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -285,13 +646,35 @@
}
}
+/*
+ * Do SEEK_DATA/HOLE to detect if it is functional. Older broken versions of
+ * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is used.
+ * - Corrected versions return -1 and set errno to EINVAL.
+ * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and set
+ * errno to ENXIO when SEEK_DATA is called with a position of EOF.
+ */
+static bool qemu_gluster_test_seek(struct glfs_fd *fd)
+{
+ off_t ret, eof;
+
+ eof = glfs_lseek(fd, 0, SEEK_END);
+ if (eof < 0) {
+ /* this should never occur */
+ return false;
+ }
+
+ /* this should always fail with ENXIO if SEEK_DATA is supported */
+ ret = glfs_lseek(fd, eof, SEEK_DATA);
+ return (ret < 0) && (errno == ENXIO);
+}
+
static int qemu_gluster_open(BlockDriverState *bs, QDict *options,
int bdrv_flags, Error **errp)
{
BDRVGlusterState *s = bs->opaque;
int open_flags = 0;
int ret = 0;
- GlusterConf *gconf = g_new0(GlusterConf, 1);
+ BlockdevOptionsGluster *gconf = NULL;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -304,24 +687,54 @@
goto out;
}
- filename = qemu_opt_get(opts, "filename");
+ filename = qemu_opt_get(opts, GLUSTER_OPT_FILENAME);
- s->glfs = qemu_gluster_init(gconf, filename, errp);
+ s->debug_level = qemu_opt_get_number(opts, GLUSTER_OPT_DEBUG,
+ GLUSTER_DEBUG_DEFAULT);
+ if (s->debug_level < 0) {
+ s->debug_level = 0;
+ } else if (s->debug_level > GLUSTER_DEBUG_MAX) {
+ s->debug_level = GLUSTER_DEBUG_MAX;
+ }
+
+ gconf = g_new0(BlockdevOptionsGluster, 1);
+ gconf->debug_level = s->debug_level;
+ gconf->has_debug_level = true;
+ s->glfs = qemu_gluster_init(gconf, filename, options, errp);
if (!s->glfs) {
ret = -errno;
goto out;
}
+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+ /* Without this, if fsync fails for a recoverable reason (for instance,
+ * ENOSPC), gluster will dump its cache, preventing retries. This means
+ * almost certain data loss. Not all gluster versions support the
+ * 'resync-failed-syncs-after-fsync' key value, but there is no way to
+ * discover during runtime if it is supported (this api returns success for
+ * unknown key/value pairs) */
+ ret = glfs_set_xlator_option(s->glfs, "*-write-behind",
+ "resync-failed-syncs-after-fsync",
+ "on");
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+ ret = -errno;
+ goto out;
+ }
+#endif
+
qemu_gluster_parse_flags(bdrv_flags, &open_flags);
- s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+ s->fd = glfs_open(s->glfs, gconf->path, open_flags);
if (!s->fd) {
ret = -errno;
}
+ s->supports_seek_data = qemu_gluster_test_seek(s->fd);
+
out:
qemu_opts_del(opts);
- qemu_gluster_gconf_free(gconf);
+ qapi_free_BlockdevOptionsGluster(gconf);
if (!ret) {
return ret;
}
@@ -334,37 +747,45 @@
return ret;
}
-typedef struct BDRVGlusterReopenState {
- struct glfs *glfs;
- struct glfs_fd *fd;
-} BDRVGlusterReopenState;
-
-
static int qemu_gluster_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
int ret = 0;
+ BDRVGlusterState *s;
BDRVGlusterReopenState *reop_s;
- GlusterConf *gconf = NULL;
+ BlockdevOptionsGluster *gconf;
int open_flags = 0;
assert(state != NULL);
assert(state->bs != NULL);
+ s = state->bs->opaque;
+
state->opaque = g_new0(BDRVGlusterReopenState, 1);
reop_s = state->opaque;
qemu_gluster_parse_flags(state->flags, &open_flags);
- gconf = g_new0(GlusterConf, 1);
-
- reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, errp);
+ gconf = g_new0(BlockdevOptionsGluster, 1);
+ gconf->debug_level = s->debug_level;
+ gconf->has_debug_level = true;
+ reop_s->glfs = qemu_gluster_init(gconf, state->bs->filename, NULL, errp);
if (reop_s->glfs == NULL) {
ret = -errno;
goto exit;
}
- reop_s->fd = glfs_open(reop_s->glfs, gconf->image, open_flags);
+#ifdef CONFIG_GLUSTERFS_XLATOR_OPT
+ ret = glfs_set_xlator_option(reop_s->glfs, "*-write-behind",
+ "resync-failed-syncs-after-fsync", "on");
+ if (ret < 0) {
+ error_setg_errno(errp, errno, "Unable to set xlator key/value pair");
+ ret = -errno;
+ goto exit;
+ }
+#endif
+
+ reop_s->fd = glfs_open(reop_s->glfs, gconf->path, open_flags);
if (reop_s->fd == NULL) {
/* reops->glfs will be cleaned up in _abort */
ret = -errno;
@@ -373,7 +794,7 @@
exit:
/* state->opaque will be freed in either the _abort or _commit */
- qemu_gluster_gconf_free(gconf);
+ qapi_free_BlockdevOptionsGluster(gconf);
return ret;
}
@@ -425,32 +846,27 @@
}
#ifdef CONFIG_GLUSTERFS_ZEROFILL
-static coroutine_fn int qemu_gluster_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static coroutine_fn int qemu_gluster_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset,
+ int size,
+ BdrvRequestFlags flags)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- off_t size = nb_sectors * BDRV_SECTOR_SIZE;
- off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = size;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = size;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_zerofill_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ ret = glfs_zerofill_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
static inline bool gluster_supports_zerofill(void)
@@ -459,7 +875,7 @@
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
- int64_t size)
+ int64_t size)
{
return glfs_zerofill(fd, offset, size);
}
@@ -471,7 +887,7 @@
}
static inline int qemu_gluster_zerofill(struct glfs_fd *fd, int64_t offset,
- int64_t size)
+ int64_t size)
{
return 0;
}
@@ -480,15 +896,25 @@
static int qemu_gluster_create(const char *filename,
QemuOpts *opts, Error **errp)
{
+ BlockdevOptionsGluster *gconf;
struct glfs *glfs;
struct glfs_fd *fd;
int ret = 0;
int prealloc = 0;
int64_t total_size = 0;
char *tmp = NULL;
- GlusterConf *gconf = g_new0(GlusterConf, 1);
- glfs = qemu_gluster_init(gconf, filename, errp);
+ gconf = g_new0(BlockdevOptionsGluster, 1);
+ gconf->debug_level = qemu_opt_get_number_del(opts, GLUSTER_OPT_DEBUG,
+ GLUSTER_DEBUG_DEFAULT);
+ if (gconf->debug_level < 0) {
+ gconf->debug_level = 0;
+ } else if (gconf->debug_level > GLUSTER_DEBUG_MAX) {
+ gconf->debug_level = GLUSTER_DEBUG_MAX;
+ }
+ gconf->has_debug_level = true;
+
+ glfs = qemu_gluster_init(gconf, filename, NULL, errp);
if (!glfs) {
ret = -errno;
goto out;
@@ -500,19 +926,17 @@
tmp = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
if (!tmp || !strcmp(tmp, "off")) {
prealloc = 0;
- } else if (!strcmp(tmp, "full") &&
- gluster_supports_zerofill()) {
+ } else if (!strcmp(tmp, "full") && gluster_supports_zerofill()) {
prealloc = 1;
} else {
error_setg(errp, "Invalid preallocation mode: '%s'"
- " or GlusterFS doesn't support zerofill API",
- tmp);
+ " or GlusterFS doesn't support zerofill API", tmp);
ret = -EINVAL;
goto out;
}
- fd = glfs_creat(glfs, gconf->image,
- O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+ fd = glfs_creat(glfs, gconf->path,
+ O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
if (!fd) {
ret = -errno;
} else {
@@ -530,7 +954,7 @@
}
out:
g_free(tmp);
- qemu_gluster_gconf_free(gconf);
+ qapi_free_BlockdevOptionsGluster(gconf);
if (glfs) {
glfs_fini(glfs);
}
@@ -538,38 +962,34 @@
}
static coroutine_fn int qemu_gluster_co_rw(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int write)
+ int64_t sector_num, int nb_sectors,
+ QEMUIOVector *qiov, int write)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
size_t size = nb_sectors * BDRV_SECTOR_SIZE;
off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = size;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = size;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
if (write) {
ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- &gluster_finish_aiocb, acb);
+ gluster_finish_aiocb, &acb);
} else {
ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
- &gluster_finish_aiocb, acb);
+ gluster_finish_aiocb, &acb);
}
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
static int qemu_gluster_truncate(BlockDriverState *bs, int64_t offset)
@@ -586,69 +1006,96 @@
}
static coroutine_fn int qemu_gluster_co_readv(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t sector_num,
+ int nb_sectors,
+ QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 0);
}
static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+ int64_t sector_num,
+ int nb_sectors,
+ QEMUIOVector *qiov)
{
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
}
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+ BDRVGlusterState *s = bs->opaque;
+
+ if (s->fd) {
+ glfs_close(s->fd);
+ s->fd = NULL;
+ }
+ glfs_fini(s->glfs);
+}
+
static coroutine_fn int qemu_gluster_co_flush_to_disk(BlockDriverState *bs)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- acb->size = 0;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = 0;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+ ret = glfs_fsync_async(s->fd, gluster_finish_aiocb, &acb);
if (ret < 0) {
ret = -errno;
- goto out;
+ goto error;
}
qemu_coroutine_yield();
- ret = acb->ret;
+ if (acb.ret < 0) {
+ ret = acb.ret;
+ goto error;
+ }
-out:
- g_slice_free(GlusterAIOCB, acb);
+ return acb.ret;
+
+error:
+ /* Some versions of Gluster (3.5.6 -> 3.5.8?) will not retain its cache
+ * after a fsync failure, so we have no way of allowing the guest to safely
+ * continue. Gluster versions prior to 3.5.6 don't retain the cache
+ * either, but will invalidate the fd on error, so this is again our only
+ * option.
+ *
+ * The 'resync-failed-syncs-after-fsync' xlator option for the
+ * write-behind cache will cause later gluster versions to retain its
+ * cache after error, so long as the fd remains open. However, we
+ * currently have no way of knowing if this option is supported.
+ *
+ * TODO: Once gluster provides a way for us to determine if the option
+ * is supported, bypass the closure and setting drv to NULL. */
+ qemu_gluster_close(bs);
+ bs->drv = NULL;
return ret;
}
#ifdef CONFIG_GLUSTERFS_DISCARD
-static coroutine_fn int qemu_gluster_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static coroutine_fn int qemu_gluster_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int size)
{
int ret;
- GlusterAIOCB *acb = g_slice_new(GlusterAIOCB);
+ GlusterAIOCB acb;
BDRVGlusterState *s = bs->opaque;
- size_t size = nb_sectors * BDRV_SECTOR_SIZE;
- off_t offset = sector_num * BDRV_SECTOR_SIZE;
- acb->size = 0;
- acb->ret = 0;
- acb->coroutine = qemu_coroutine_self();
- acb->aio_context = bdrv_get_aio_context(bs);
+ acb.size = 0;
+ acb.ret = 0;
+ acb.coroutine = qemu_coroutine_self();
+ acb.aio_context = bdrv_get_aio_context(bs);
- ret = glfs_discard_async(s->fd, offset, size, &gluster_finish_aiocb, acb);
+ ret = glfs_discard_async(s->fd, offset, size, gluster_finish_aiocb, &acb);
if (ret < 0) {
- ret = -errno;
- goto out;
+ return -errno;
}
qemu_coroutine_yield();
- ret = acb->ret;
-
-out:
- g_slice_free(GlusterAIOCB, acb);
- return ret;
+ return acb.ret;
}
#endif
@@ -679,46 +1126,170 @@
}
}
-static void qemu_gluster_close(BlockDriverState *bs)
-{
- BDRVGlusterState *s = bs->opaque;
-
- if (s->fd) {
- glfs_close(s->fd);
- s->fd = NULL;
- }
- glfs_fini(s->glfs);
-}
-
static int qemu_gluster_has_zero_init(BlockDriverState *bs)
{
/* GlusterFS volume could be backed by a block device */
return 0;
}
-static QemuOptsList qemu_gluster_create_opts = {
- .name = "qemu-gluster-create-opts",
- .head = QTAILQ_HEAD_INITIALIZER(qemu_gluster_create_opts.head),
- .desc = {
- {
- .name = BLOCK_OPT_SIZE,
- .type = QEMU_OPT_SIZE,
- .help = "Virtual disk size"
- },
- {
- .name = BLOCK_OPT_PREALLOC,
- .type = QEMU_OPT_STRING,
- .help = "Preallocation mode (allowed values: off, full)"
- },
- { /* end of list */ }
+/*
+ * Find allocation range in @bs around offset @start.
+ * May change underlying file descriptor's file offset.
+ * If @start is not in a hole, store @start in @data, and the
+ * beginning of the next hole in @hole, and return 0.
+ * If @start is in a non-trailing hole, store @start in @hole and the
+ * beginning of the next non-hole in @data, and return 0.
+ * If @start is in a trailing hole or beyond EOF, return -ENXIO.
+ * If we can't find out, return a negative errno other than -ENXIO.
+ *
+ * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
+ */
+static int find_allocation(BlockDriverState *bs, off_t start,
+ off_t *data, off_t *hole)
+{
+ BDRVGlusterState *s = bs->opaque;
+ off_t offs;
+
+ if (!s->supports_seek_data) {
+ return -ENOTSUP;
}
-};
+
+ /*
+ * SEEK_DATA cases:
+ * D1. offs == start: start is in data
+ * D2. offs > start: start is in a hole, next data at offs
+ * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
+ * or start is beyond EOF
+ * If the latter happens, the file has been truncated behind
+ * our back since we opened it. All bets are off then.
+ * Treating like a trailing hole is simplest.
+ * D4. offs < 0, errno != ENXIO: we learned nothing
+ */
+ offs = glfs_lseek(s->fd, start, SEEK_DATA);
+ if (offs < 0) {
+ return -errno; /* D3 or D4 */
+ }
+ assert(offs >= start);
+
+ if (offs > start) {
+ /* D2: in hole, next data at offs */
+ *hole = start;
+ *data = offs;
+ return 0;
+ }
+
+ /* D1: in data, end not yet known */
+
+ /*
+ * SEEK_HOLE cases:
+ * H1. offs == start: start is in a hole
+ * If this happens here, a hole has been dug behind our back
+ * since the previous lseek().
+ * H2. offs > start: either start is in data, next hole at offs,
+ * or start is in trailing hole, EOF at offs
+ * Linux treats trailing holes like any other hole: offs ==
+ * start. Solaris seeks to EOF instead: offs > start (blech).
+ * If that happens here, a hole has been dug behind our back
+ * since the previous lseek().
+ * H3. offs < 0, errno = ENXIO: start is beyond EOF
+ * If this happens, the file has been truncated behind our
+ * back since we opened it. Treat it like a trailing hole.
+ * H4. offs < 0, errno != ENXIO: we learned nothing
+ * Pretend we know nothing at all, i.e. "forget" about D1.
+ */
+ offs = glfs_lseek(s->fd, start, SEEK_HOLE);
+ if (offs < 0) {
+ return -errno; /* D1 and (H3 or H4) */
+ }
+ assert(offs >= start);
+
+ if (offs > start) {
+ /*
+ * D1 and H2: either in data, next hole at offs, or it was in
+ * data but is now in a trailing hole. In the latter case,
+ * all bets are off. Treating it as if it there was data all
+ * the way to EOF is safe, so simply do that.
+ */
+ *data = start;
+ *hole = offs;
+ return 0;
+ }
+
+ /* D1 and H1 */
+ return -EBUSY;
+}
+
+/*
+ * Returns the allocation status of the specified sectors.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ *
+ * (Based on raw_co_get_block_status() from raw-posix.c.)
+ */
+static int64_t coroutine_fn qemu_gluster_co_get_block_status(
+ BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ BDRVGlusterState *s = bs->opaque;
+ off_t start, data = 0, hole = 0;
+ int64_t total_size;
+ int ret = -EINVAL;
+
+ if (!s->fd) {
+ return ret;
+ }
+
+ start = sector_num * BDRV_SECTOR_SIZE;
+ total_size = bdrv_getlength(bs);
+ if (total_size < 0) {
+ return total_size;
+ } else if (start >= total_size) {
+ *pnum = 0;
+ return 0;
+ } else if (start + nb_sectors * BDRV_SECTOR_SIZE > total_size) {
+ nb_sectors = DIV_ROUND_UP(total_size - start, BDRV_SECTOR_SIZE);
+ }
+
+ ret = find_allocation(bs, start, &data, &hole);
+ if (ret == -ENXIO) {
+ /* Trailing hole */
+ *pnum = nb_sectors;
+ ret = BDRV_BLOCK_ZERO;
+ } else if (ret < 0) {
+ /* No info available, so pretend there are no holes */
+ *pnum = nb_sectors;
+ ret = BDRV_BLOCK_DATA;
+ } else if (data == start) {
+ /* On a data extent, compute sectors to the end of the extent,
+ * possibly including a partial sector at EOF. */
+ *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
+ ret = BDRV_BLOCK_DATA;
+ } else {
+ /* On a hole, compute sectors to the beginning of the next extent. */
+ assert(hole == start);
+ *pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
+ ret = BDRV_BLOCK_ZERO;
+ }
+
+ *file = bs;
+
+ return ret | BDRV_BLOCK_OFFSET_VALID | start;
+}
+
static BlockDriver bdrv_gluster = {
.format_name = "gluster",
.protocol_name = "gluster",
.instance_size = sizeof(BDRVGlusterState),
- .bdrv_needs_filename = true,
+ .bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -733,11 +1304,12 @@
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -745,7 +1317,7 @@
.format_name = "gluster",
.protocol_name = "gluster+tcp",
.instance_size = sizeof(BDRVGlusterState),
- .bdrv_needs_filename = true,
+ .bdrv_needs_filename = false,
.bdrv_file_open = qemu_gluster_open,
.bdrv_reopen_prepare = qemu_gluster_reopen_prepare,
.bdrv_reopen_commit = qemu_gluster_reopen_commit,
@@ -760,11 +1332,12 @@
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
@@ -787,14 +1360,21 @@
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
+/* rdma is deprecated (actually never supported for volfile fetch).
+ * Let's maintain it for the protocol compatibility, to make sure things
+ * won't break immediately. For now, gluster+rdma will fall back to gluster+tcp
+ * protocol with a warning.
+ * TODO: remove gluster+rdma interface support
+ */
static BlockDriver bdrv_gluster_rdma = {
.format_name = "gluster",
.protocol_name = "gluster+rdma",
@@ -814,11 +1394,12 @@
.bdrv_co_flush_to_disk = qemu_gluster_co_flush_to_disk,
.bdrv_has_zero_init = qemu_gluster_has_zero_init,
#ifdef CONFIG_GLUSTERFS_DISCARD
- .bdrv_co_discard = qemu_gluster_co_discard,
+ .bdrv_co_pdiscard = qemu_gluster_co_pdiscard,
#endif
#ifdef CONFIG_GLUSTERFS_ZEROFILL
- .bdrv_co_write_zeroes = qemu_gluster_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = qemu_gluster_co_pwrite_zeroes,
#endif
+ .bdrv_co_get_block_status = qemu_gluster_co_get_block_status,
.create_opts = &qemu_gluster_create_opts,
};
diff --git a/block/io.c b/block/io.c
new file mode 100644
index 0000000..420944d
--- /dev/null
+++ b/block/io.c
@@ -0,0 +1,2736 @@
+/*
+ * Block layer I/O functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "trace.h"
+#include "sysemu/block-backend.h"
+#include "block/blockjob.h"
+#include "block/block_int.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
+
+static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
+ int64_t offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write);
+static void coroutine_fn bdrv_co_do_rw(void *opaque);
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags);
+
+static void bdrv_parent_drained_begin(BlockDriverState *bs)
+{
+ BdrvChild *c;
+
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_begin) {
+ c->role->drained_begin(c);
+ }
+ }
+}
+
+static void bdrv_parent_drained_end(BlockDriverState *bs)
+{
+ BdrvChild *c;
+
+ QLIST_FOREACH(c, &bs->parents, next_parent) {
+ if (c->role->drained_end) {
+ c->role->drained_end(c);
+ }
+ }
+}
+
+static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
+{
+ dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
+ dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+ dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
+ src->opt_mem_alignment);
+ dst->min_mem_alignment = MAX(dst->min_mem_alignment,
+ src->min_mem_alignment);
+ dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
+}
+
+void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ BlockDriver *drv = bs->drv;
+ Error *local_err = NULL;
+
+ memset(&bs->bl, 0, sizeof(bs->bl));
+
+ if (!drv) {
+ return;
+ }
+
+ /* Default alignment based on whether driver has byte interface */
+ bs->bl.request_alignment = drv->bdrv_co_preadv ? 1 : 512;
+
+ /* Take some limits from the children as a default */
+ if (bs->file) {
+ bdrv_refresh_limits(bs->file->bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ bdrv_merge_limits(&bs->bl, &bs->file->bs->bl);
+ } else {
+ bs->bl.min_mem_alignment = 512;
+ bs->bl.opt_mem_alignment = getpagesize();
+
+ /* Safe default since most protocols use readv()/writev()/etc */
+ bs->bl.max_iov = IOV_MAX;
+ }
+
+ if (bs->backing) {
+ bdrv_refresh_limits(bs->backing->bs, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
+ }
+
+ /* Then let the driver override it */
+ if (drv->bdrv_refresh_limits) {
+ drv->bdrv_refresh_limits(bs, errp);
+ }
+}
+
+/**
+ * The copy-on-read flag is actually a reference count so multiple users may
+ * use the feature without worrying about clobbering its previous state.
+ * Copy-on-read stays enabled until all users have called to disable it.
+ */
+void bdrv_enable_copy_on_read(BlockDriverState *bs)
+{
+ bs->copy_on_read++;
+}
+
+void bdrv_disable_copy_on_read(BlockDriverState *bs)
+{
+ assert(bs->copy_on_read > 0);
+ bs->copy_on_read--;
+}
+
+/* Check if any requests are in-flight (including throttled requests) */
+bool bdrv_requests_pending(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ if (!QLIST_EMPTY(&bs->tracked_requests)) {
+ return true;
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ if (bdrv_requests_pending(child->bs)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void bdrv_drain_recurse(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ if (bs->drv && bs->drv->bdrv_drain) {
+ bs->drv->bdrv_drain(bs);
+ }
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_drain_recurse(child->bs);
+ }
+}
+
+typedef struct {
+ Coroutine *co;
+ BlockDriverState *bs;
+ QEMUBH *bh;
+ bool done;
+} BdrvCoDrainData;
+
+static void bdrv_drain_poll(BlockDriverState *bs)
+{
+ bool busy = true;
+
+ while (busy) {
+ /* Keep iterating */
+ busy = bdrv_requests_pending(bs);
+ busy |= aio_poll(bdrv_get_aio_context(bs), busy);
+ }
+}
+
+static void bdrv_co_drain_bh_cb(void *opaque)
+{
+ BdrvCoDrainData *data = opaque;
+ Coroutine *co = data->co;
+
+ qemu_bh_delete(data->bh);
+ bdrv_drain_poll(data->bs);
+ data->done = true;
+ qemu_coroutine_enter(co);
+}
+
+static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs)
+{
+ BdrvCoDrainData data;
+
+ /* Calling bdrv_drain() from a BH ensures the current coroutine yields and
+ * other coroutines run if they were queued from
+ * qemu_co_queue_run_restart(). */
+
+ assert(qemu_in_coroutine());
+ data = (BdrvCoDrainData) {
+ .co = qemu_coroutine_self(),
+ .bs = bs,
+ .done = false,
+ .bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_drain_bh_cb, &data),
+ };
+ qemu_bh_schedule(data.bh);
+
+ qemu_coroutine_yield();
+ /* If we are resumed from some other event (such as an aio completion or a
+ * timer callback), it is a bug in the caller that should be fixed. */
+ assert(data.done);
+}
+
+void bdrv_drained_begin(BlockDriverState *bs)
+{
+ if (!bs->quiesce_counter++) {
+ aio_disable_external(bdrv_get_aio_context(bs));
+ bdrv_parent_drained_begin(bs);
+ }
+
+ bdrv_io_unplugged_begin(bs);
+ bdrv_drain_recurse(bs);
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(bs);
+ } else {
+ bdrv_drain_poll(bs);
+ }
+ bdrv_io_unplugged_end(bs);
+}
+
+void bdrv_drained_end(BlockDriverState *bs)
+{
+ assert(bs->quiesce_counter > 0);
+ if (--bs->quiesce_counter > 0) {
+ return;
+ }
+
+ bdrv_parent_drained_end(bs);
+ aio_enable_external(bdrv_get_aio_context(bs));
+}
+
+/*
+ * Wait for pending requests to complete on a single BlockDriverState subtree,
+ * and suspend block driver's internal I/O until next request arrives.
+ *
+ * Note that unlike bdrv_drain_all(), the caller must hold the BlockDriverState
+ * AioContext.
+ *
+ * Only this BlockDriverState's AioContext is run, so in-flight requests must
+ * not depend on events in other AioContexts. In that case, use
+ * bdrv_drain_all() instead.
+ */
+void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
+{
+ assert(qemu_in_coroutine());
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
+}
+
+void bdrv_drain(BlockDriverState *bs)
+{
+ bdrv_drained_begin(bs);
+ bdrv_drained_end(bs);
+}
+
+/*
+ * Wait for pending requests to complete across all BlockDriverStates
+ *
+ * This function does not flush data to disk, use bdrv_flush_all() for that
+ * after calling this function.
+ */
+void bdrv_drain_all(void)
+{
+ /* Always run first iteration so any pending completion BHs run */
+ bool busy = true;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+ BlockJob *job = NULL;
+ GSList *aio_ctxs = NULL, *ctx;
+
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_pause(job);
+ aio_context_release(aio_context);
+ }
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
+ bdrv_parent_drained_begin(bs);
+ bdrv_io_unplugged_begin(bs);
+ bdrv_drain_recurse(bs);
+ aio_context_release(aio_context);
+
+ if (!g_slist_find(aio_ctxs, aio_context)) {
+ aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
+ }
+ }
+
+ /* Note that completion of an asynchronous I/O operation can trigger any
+ * number of other I/O operations on other devices---for example a
+ * coroutine can submit an I/O request to another device in response to
+ * request completion. Therefore we must keep looping until there was no
+ * more activity rather than simply draining each device independently.
+ */
+ while (busy) {
+ busy = false;
+
+ for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
+ AioContext *aio_context = ctx->data;
+
+ aio_context_acquire(aio_context);
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ if (aio_context == bdrv_get_aio_context(bs)) {
+ if (bdrv_requests_pending(bs)) {
+ busy = true;
+ aio_poll(aio_context, busy);
+ }
+ }
+ }
+ busy |= aio_poll(aio_context, false);
+ aio_context_release(aio_context);
+ }
+ }
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(aio_context);
+ bdrv_io_unplugged_end(bs);
+ bdrv_parent_drained_end(bs);
+ aio_context_release(aio_context);
+ }
+ g_slist_free(aio_ctxs);
+
+ job = NULL;
+ while ((job = block_job_next(job))) {
+ AioContext *aio_context = blk_get_aio_context(job->blk);
+
+ aio_context_acquire(aio_context);
+ block_job_resume(job);
+ aio_context_release(aio_context);
+ }
+}
+
+/**
+ * Remove an active request from the tracked requests list
+ *
+ * This function should be called when a tracked request is completing.
+ */
+static void tracked_request_end(BdrvTrackedRequest *req)
+{
+ if (req->serialising) {
+ req->bs->serialising_in_flight--;
+ }
+
+ QLIST_REMOVE(req, list);
+ qemu_co_queue_restart_all(&req->wait_queue);
+}
+
+/**
+ * Add an active request to the tracked requests list
+ */
+static void tracked_request_begin(BdrvTrackedRequest *req,
+ BlockDriverState *bs,
+ int64_t offset,
+ unsigned int bytes,
+ enum BdrvTrackedRequestType type)
+{
+ *req = (BdrvTrackedRequest){
+ .bs = bs,
+ .offset = offset,
+ .bytes = bytes,
+ .type = type,
+ .co = qemu_coroutine_self(),
+ .serialising = false,
+ .overlap_offset = offset,
+ .overlap_bytes = bytes,
+ };
+
+ qemu_co_queue_init(&req->wait_queue);
+
+ QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
+}
+
+static void mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+{
+ int64_t overlap_offset = req->offset & ~(align - 1);
+ unsigned int overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
+ - overlap_offset;
+
+ if (!req->serialising) {
+ req->bs->serialising_in_flight++;
+ req->serialising = true;
+ }
+
+ req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
+ req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
+}
+
+/**
+ * Round a region to cluster boundaries (sector-based)
+ */
+void bdrv_round_sectors_to_clusters(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors,
+ int64_t *cluster_sector_num,
+ int *cluster_nb_sectors)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_sector_num = sector_num;
+ *cluster_nb_sectors = nb_sectors;
+ } else {
+ int64_t c = bdi.cluster_size / BDRV_SECTOR_SIZE;
+ *cluster_sector_num = QEMU_ALIGN_DOWN(sector_num, c);
+ *cluster_nb_sectors = QEMU_ALIGN_UP(sector_num - *cluster_sector_num +
+ nb_sectors, c);
+ }
+}
+
+/**
+ * Round a region to cluster boundaries
+ */
+void bdrv_round_to_clusters(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes,
+ int64_t *cluster_offset,
+ unsigned int *cluster_bytes)
+{
+ BlockDriverInfo bdi;
+
+ if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
+ *cluster_offset = offset;
+ *cluster_bytes = bytes;
+ } else {
+ int64_t c = bdi.cluster_size;
+ *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
+ *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ }
+}
+
+static int bdrv_get_cluster_size(BlockDriverState *bs)
+{
+ BlockDriverInfo bdi;
+ int ret;
+
+ ret = bdrv_get_info(bs, &bdi);
+ if (ret < 0 || bdi.cluster_size == 0) {
+ return bs->bl.request_alignment;
+ } else {
+ return bdi.cluster_size;
+ }
+}
+
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
+ int64_t offset, unsigned int bytes)
+{
+ /* aaaa bbbb */
+ if (offset >= req->overlap_offset + req->overlap_bytes) {
+ return false;
+ }
+ /* bbbb aaaa */
+ if (req->overlap_offset >= offset + bytes) {
+ return false;
+ }
+ return true;
+}
+
+static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
+{
+ BlockDriverState *bs = self->bs;
+ BdrvTrackedRequest *req;
+ bool retry;
+ bool waited = false;
+
+ if (!bs->serialising_in_flight) {
+ return false;
+ }
+
+ do {
+ retry = false;
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
+ if (req == self || (!req->serialising && !self->serialising)) {
+ continue;
+ }
+ if (tracked_request_overlaps(req, self->overlap_offset,
+ self->overlap_bytes))
+ {
+ /* Hitting this means there was a reentrant request, for
+ * example, a block driver issuing nested requests. This must
+ * never happen since it means deadlock.
+ */
+ assert(qemu_coroutine_self() != req->co);
+
+ /* If the request is already (indirectly) waiting for us, or
+ * will wait for us as soon as it wakes up, then just go on
+ * (instead of producing a deadlock in the former case). */
+ if (!req->waiting_for) {
+ self->waiting_for = req;
+ qemu_co_queue_wait(&req->wait_queue);
+ self->waiting_for = NULL;
+ retry = true;
+ waited = true;
+ break;
+ }
+ }
+ }
+ } while (retry);
+
+ return waited;
+}
+
+static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
+ size_t size)
+{
+ if (size > BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS) {
+ return -EIO;
+ }
+
+ if (!bdrv_is_inserted(bs)) {
+ return -ENOMEDIUM;
+ }
+
+ if (offset < 0) {
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int bdrv_check_request(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors)
+{
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EIO;
+ }
+
+ return bdrv_check_byte_request(bs, sector_num * BDRV_SECTOR_SIZE,
+ nb_sectors * BDRV_SECTOR_SIZE);
+}
+
+typedef struct RwCo {
+ BdrvChild *child;
+ int64_t offset;
+ QEMUIOVector *qiov;
+ bool is_write;
+ int ret;
+ BdrvRequestFlags flags;
+} RwCo;
+
+static void coroutine_fn bdrv_rw_co_entry(void *opaque)
+{
+ RwCo *rwco = opaque;
+
+ if (!rwco->is_write) {
+ rwco->ret = bdrv_co_preadv(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
+ } else {
+ rwco->ret = bdrv_co_pwritev(rwco->child, rwco->offset,
+ rwco->qiov->size, rwco->qiov,
+ rwco->flags);
+ }
+}
+
+/*
+ * Process a vectored synchronous request using coroutines
+ */
+static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
+ QEMUIOVector *qiov, bool is_write,
+ BdrvRequestFlags flags)
+{
+ Coroutine *co;
+ RwCo rwco = {
+ .child = child,
+ .offset = offset,
+ .qiov = qiov,
+ .is_write = is_write,
+ .ret = NOT_DONE,
+ .flags = flags,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_rw_co_entry(&rwco);
+ } else {
+ AioContext *aio_context = bdrv_get_aio_context(child->bs);
+
+ co = qemu_coroutine_create(bdrv_rw_co_entry, &rwco);
+ qemu_coroutine_enter(co);
+ while (rwco.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
+ }
+ }
+ return rwco.ret;
+}
+
+/*
+ * Process a synchronous request using coroutines
+ */
+static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
+ int nb_sectors, bool is_write, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ };
+
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
+ }
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
+ &qiov, is_write, flags);
+}
+
+/* return < 0 if error. See bdrv_write() for the return codes */
+int bdrv_read(BdrvChild *child, int64_t sector_num,
+ uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
+}
+
+/* Return < 0 if error. Important errors are:
+ -EIO generic I/O error (may happen for all errors)
+ -ENOMEDIUM No media inserted.
+ -EINVAL Invalid sector number or nb_sectors
+ -EACCES Trying to write a read-only device
+*/
+int bdrv_write(BdrvChild *child, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
+}
+
+int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = NULL,
+ .iov_len = count,
+ };
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_prwv_co(child, offset, &qiov, true,
+ BDRV_REQ_ZERO_WRITE | flags);
+}
+
+/*
+ * Completely zero out a block device with the help of bdrv_pwrite_zeroes.
+ * The operation is sped up by checking the block status and only writing
+ * zeroes to the device if they currently do not return zeroes. Optional
+ * flags are passed through to bdrv_pwrite_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
+ * BDRV_REQ_FUA).
+ *
+ * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
+ */
+int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
+{
+ int64_t target_sectors, ret, nb_sectors, sector_num = 0;
+ BlockDriverState *bs = child->bs;
+ BlockDriverState *file;
+ int n;
+
+ target_sectors = bdrv_nb_sectors(bs);
+ if (target_sectors < 0) {
+ return target_sectors;
+ }
+
+ for (;;) {
+ nb_sectors = MIN(target_sectors - sector_num, BDRV_REQUEST_MAX_SECTORS);
+ if (nb_sectors <= 0) {
+ return 0;
+ }
+ ret = bdrv_get_block_status(bs, sector_num, nb_sectors, &n, &file);
+ if (ret < 0) {
+ error_report("error getting block status at sector %" PRId64 ": %s",
+ sector_num, strerror(-ret));
+ return ret;
+ }
+ if (ret & BDRV_BLOCK_ZERO) {
+ sector_num += n;
+ continue;
+ }
+ ret = bdrv_pwrite_zeroes(child, sector_num << BDRV_SECTOR_BITS,
+ n << BDRV_SECTOR_BITS, flags);
+ if (ret < 0) {
+ error_report("error writing zeroes at sector %" PRId64 ": %s",
+ sector_num, strerror(-ret));
+ return ret;
+ }
+ sector_num += n;
+ }
+}
+
+int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
+{
+ int ret;
+
+ ret = bdrv_prwv_co(child, offset, qiov, false, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return qiov->size;
+}
+
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *)buf,
+ .iov_len = bytes,
+ };
+
+ if (bytes < 0) {
+ return -EINVAL;
+ }
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_preadv(child, offset, &qiov);
+}
+
+int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
+{
+ int ret;
+
+ ret = bdrv_prwv_co(child, offset, qiov, true, 0);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return qiov->size;
+}
+
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = bytes,
+ };
+
+ if (bytes < 0) {
+ return -EINVAL;
+ }
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ return bdrv_pwritev(child, offset, &qiov);
+}
+
+/*
+ * Writes to the file and ensures that no writes are reordered across this
+ * request (acts as a barrier)
+ *
+ * Returns 0 on success, -errno in error cases.
+ */
+int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
+ const void *buf, int count)
+{
+ int ret;
+
+ ret = bdrv_pwrite(child, offset, buf, count);
+ if (ret < 0) {
+ return ret;
+ }
+
+ ret = bdrv_flush(child->bs);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return 0;
+}
+
+typedef struct CoroutineIOCompletion {
+ Coroutine *coroutine;
+ int ret;
+} CoroutineIOCompletion;
+
+static void bdrv_co_io_em_complete(void *opaque, int ret)
+{
+ CoroutineIOCompletion *co = opaque;
+
+ co->ret = ret;
+ qemu_coroutine_enter(co->coroutine);
+}
+
+static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+
+ assert(!(flags & ~BDRV_REQ_MASK));
+
+ if (drv->bdrv_co_preadv) {
+ return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_readv) {
+ return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ return -EIO;
+ } else {
+ qemu_coroutine_yield();
+ return co.ret;
+ }
+ }
+}
+
+static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ int64_t sector_num;
+ unsigned int nb_sectors;
+ int ret;
+
+ assert(!(flags & ~BDRV_REQ_MASK));
+
+ if (drv->bdrv_co_pwritev) {
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ goto emulate_flags;
+ }
+
+ sector_num = offset >> BDRV_SECTOR_BITS;
+ nb_sectors = bytes >> BDRV_SECTOR_BITS;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes >> BDRV_SECTOR_BITS) <= BDRV_REQUEST_MAX_SECTORS);
+
+ if (drv->bdrv_co_writev_flags) {
+ ret = drv->bdrv_co_writev_flags(bs, sector_num, nb_sectors, qiov,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ } else if (drv->bdrv_co_writev) {
+ assert(!bs->supported_write_flags);
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ }
+
+emulate_flags:
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
+ ret = bdrv_co_flush(bs);
+ }
+
+ return ret;
+}
+
+static int coroutine_fn bdrv_co_do_copy_on_readv(BlockDriverState *bs,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov)
+{
+ /* Perform I/O through a temporary buffer so that users who scribble over
+ * their read buffer while the operation is in progress do not end up
+ * modifying the image file. This is critical for zero-copy guest I/O
+ * where anything might happen inside guest memory.
+ */
+ void *bounce_buffer;
+
+ BlockDriver *drv = bs->drv;
+ struct iovec iov;
+ QEMUIOVector bounce_qiov;
+ int64_t cluster_offset;
+ unsigned int cluster_bytes;
+ size_t skip_bytes;
+ int ret;
+
+ /* Cover entire cluster so no additional backing file I/O is required when
+ * allocating cluster in the image file.
+ */
+ bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
+
+ trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
+ cluster_offset, cluster_bytes);
+
+ iov.iov_len = cluster_bytes;
+ iov.iov_base = bounce_buffer = qemu_try_blockalign(bs, iov.iov_len);
+ if (bounce_buffer == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ qemu_iovec_init_external(&bounce_qiov, &iov, 1);
+
+ ret = bdrv_driver_preadv(bs, cluster_offset, cluster_bytes,
+ &bounce_qiov, 0);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (drv->bdrv_co_pwrite_zeroes &&
+ buffer_is_zero(bounce_buffer, iov.iov_len)) {
+ /* FIXME: Should we (perhaps conditionally) be setting
+ * BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
+ * that still correctly reads as zero? */
+ ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, cluster_bytes, 0);
+ } else {
+ /* This does not change the data on the disk, it is not necessary
+ * to flush even in cache=writethrough mode.
+ */
+ ret = bdrv_driver_pwritev(bs, cluster_offset, cluster_bytes,
+ &bounce_qiov, 0);
+ }
+
+ if (ret < 0) {
+ /* It might be okay to ignore write errors for guest requests. If this
+ * is a deliberate copy-on-read then we don't want to ignore the error.
+ * Simply report it in all cases.
+ */
+ goto err;
+ }
+
+ skip_bytes = offset - cluster_offset;
+ qemu_iovec_from_buf(qiov, 0, bounce_buffer + skip_bytes, bytes);
+
+err:
+ qemu_vfree(bounce_buffer);
+ return ret;
+}
+
+/*
+ * Forwards an already correctly aligned request to the BlockDriver. This
+ * handles copy on read, zeroing after EOF, and fragmentation of large
+ * reads; any other features must be implemented by the caller.
+ */
+static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
+ BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+ int64_t align, QEMUIOVector *qiov, int flags)
+{
+ int64_t total_bytes, max_bytes;
+ int ret = 0;
+ uint64_t bytes_remaining = bytes;
+ int max_transfer;
+
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
+ assert(!qiov || bytes == qiov->size);
+ assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
+ align);
+
+ /* TODO: We would need a per-BDS .supported_read_flags and
+ * potential fallback support, if we ever implement any read flags
+ * to pass through to drivers. For now, there aren't any
+ * passthrough flags. */
+ assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ)));
+
+ /* Handle Copy on Read and associated serialisation */
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ /* If we touch the same cluster it counts as an overlap. This
+ * guarantees that allocating writes will be serialized and not race
+ * with each other for the same cluster. For example, in copy-on-read
+ * it ensures that the CoR read and write operations are atomic and
+ * guest writes cannot interleave between them. */
+ mark_request_serialising(req, bdrv_get_cluster_size(bs));
+ }
+
+ if (!(flags & BDRV_REQ_NO_SERIALISING)) {
+ wait_serialising_requests(req);
+ }
+
+ if (flags & BDRV_REQ_COPY_ON_READ) {
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ unsigned int nb_sectors = end_sector - start_sector;
+ int pnum;
+
+ ret = bdrv_is_allocated(bs, start_sector, nb_sectors, &pnum);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!ret || pnum != nb_sectors) {
+ ret = bdrv_co_do_copy_on_readv(bs, offset, bytes, qiov);
+ goto out;
+ }
+ }
+
+ /* Forward the request to the BlockDriver, possibly fragmenting it */
+ total_bytes = bdrv_getlength(bs);
+ if (total_bytes < 0) {
+ ret = total_bytes;
+ goto out;
+ }
+
+ max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
+ if (bytes <= max_bytes && bytes <= max_transfer) {
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
+ goto out;
+ }
+
+ while (bytes_remaining) {
+ int num;
+
+ if (max_bytes) {
+ QEMUIOVector local_qiov;
+
+ num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
+ assert(num);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
+
+ ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
+ num, &local_qiov, 0);
+ max_bytes -= num;
+ qemu_iovec_destroy(&local_qiov);
+ } else {
+ num = bytes_remaining;
+ ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
+ bytes_remaining);
+ }
+ if (ret < 0) {
+ goto out;
+ }
+ bytes_remaining -= num;
+ }
+
+out:
+ return ret < 0 ? ret : 0;
+}
+
+/*
+ * Handle a read request in coroutine context
+ */
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriverState *bs = child->bs;
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest req;
+
+ uint64_t align = bs->bl.request_alignment;
+ uint8_t *head_buf = NULL;
+ uint8_t *tail_buf = NULL;
+ QEMUIOVector local_qiov;
+ bool use_local_qiov = false;
+ int ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_check_byte_request(bs, offset, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Don't do copy-on-read if we read data before write operation */
+ if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
+ flags |= BDRV_REQ_COPY_ON_READ;
+ }
+
+ /* Align read if necessary by padding qiov */
+ if (offset & (align - 1)) {
+ head_buf = qemu_blockalign(bs, align);
+ qemu_iovec_init(&local_qiov, qiov->niov + 2);
+ qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+
+ bytes += offset & (align - 1);
+ offset = offset & ~(align - 1);
+ }
+
+ if ((offset + bytes) & (align - 1)) {
+ if (!use_local_qiov) {
+ qemu_iovec_init(&local_qiov, qiov->niov + 1);
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+ }
+ tail_buf = qemu_blockalign(bs, align);
+ qemu_iovec_add(&local_qiov, tail_buf,
+ align - ((offset + bytes) & (align - 1)));
+
+ bytes = ROUND_UP(bytes, align);
+ }
+
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
+ ret = bdrv_aligned_preadv(bs, &req, offset, bytes, align,
+ use_local_qiov ? &local_qiov : qiov,
+ flags);
+ tracked_request_end(&req);
+
+ if (use_local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ qemu_vfree(head_buf);
+ qemu_vfree(tail_buf);
+ }
+
+ return ret;
+}
+
+static int coroutine_fn bdrv_co_do_readv(BdrvChild *child,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
+ }
+
+ return bdrv_co_preadv(child, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
+int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_readv(child->bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_readv(child, sector_num, nb_sectors, qiov, 0);
+}
+
+/* Maximum buffer for write zeroes fallback, in bytes */
+#define MAX_WRITE_ZEROES_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
+
+static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
+{
+ BlockDriver *drv = bs->drv;
+ QEMUIOVector qiov;
+ struct iovec iov = {0};
+ int ret = 0;
+ bool need_flush = false;
+ int head = 0;
+ int tail = 0;
+
+ int max_write_zeroes = MIN_NON_ZERO(bs->bl.max_pwrite_zeroes, INT_MAX);
+ int alignment = MAX(bs->bl.pwrite_zeroes_alignment,
+ bs->bl.request_alignment);
+
+ assert(alignment % bs->bl.request_alignment == 0);
+ head = offset % alignment;
+ tail = (offset + count) % alignment;
+ max_write_zeroes = QEMU_ALIGN_DOWN(max_write_zeroes, alignment);
+ assert(max_write_zeroes >= bs->bl.request_alignment);
+
+ while (count > 0 && !ret) {
+ int num = count;
+
+ /* Align request. Block drivers can expect the "bulk" of the request
+ * to be aligned, and that unaligned requests do not cross cluster
+ * boundaries.
+ */
+ if (head) {
+ /* Make a small request up to the first aligned sector. */
+ num = MIN(count, alignment - head);
+ head = 0;
+ } else if (tail && num > alignment) {
+ /* Shorten the request to the last aligned sector. */
+ num -= tail;
+ }
+
+ /* limit request size */
+ if (num > max_write_zeroes) {
+ num = max_write_zeroes;
+ }
+
+ ret = -ENOTSUP;
+ /* First try the efficient write zeroes operation */
+ if (drv->bdrv_co_pwrite_zeroes) {
+ ret = drv->bdrv_co_pwrite_zeroes(bs, offset, num,
+ flags & bs->supported_zero_flags);
+ if (ret != -ENOTSUP && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_zero_flags & BDRV_REQ_FUA)) {
+ need_flush = true;
+ }
+ } else {
+ assert(!bs->supported_zero_flags);
+ }
+
+ if (ret == -ENOTSUP) {
+ /* Fall back to bounce buffer if write zeroes is unsupported */
+ int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
+ MAX_WRITE_ZEROES_BOUNCE_BUFFER);
+ BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
+
+ if ((flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* No need for bdrv_driver_pwrite() to do a fallback
+ * flush on each chunk; use just one at the end */
+ write_flags &= ~BDRV_REQ_FUA;
+ need_flush = true;
+ }
+ num = MIN(num, max_transfer);
+ iov.iov_len = num;
+ if (iov.iov_base == NULL) {
+ iov.iov_base = qemu_try_blockalign(bs, num);
+ if (iov.iov_base == NULL) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+ memset(iov.iov_base, 0, num);
+ }
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
+
+ /* Keep bounce buffer around if it is big enough for all
+ * all future requests.
+ */
+ if (num < max_transfer) {
+ qemu_vfree(iov.iov_base);
+ iov.iov_base = NULL;
+ }
+ }
+
+ offset += num;
+ count -= num;
+ }
+
+fail:
+ if (ret == 0 && need_flush) {
+ ret = bdrv_co_flush(bs);
+ }
+ qemu_vfree(iov.iov_base);
+ return ret;
+}
+
+/*
+ * Forwards an already correctly aligned write request to the BlockDriver,
+ * after possibly fragmenting it.
+ */
+static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
+ BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+ int64_t align, QEMUIOVector *qiov, int flags)
+{
+ BlockDriver *drv = bs->drv;
+ bool waited;
+ int ret;
+
+ int64_t start_sector = offset >> BDRV_SECTOR_BITS;
+ int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
+ uint64_t bytes_remaining = bytes;
+ int max_transfer;
+
+ assert(is_power_of_2(align));
+ assert((offset & (align - 1)) == 0);
+ assert((bytes & (align - 1)) == 0);
+ assert(!qiov || bytes == qiov->size);
+ assert((bs->open_flags & BDRV_O_NO_IO) == 0);
+ assert(!(flags & ~BDRV_REQ_MASK));
+ max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
+ align);
+
+ waited = wait_serialising_requests(req);
+ assert(!waited || !req->serialising);
+ assert(req->overlap_offset <= offset);
+ assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
+
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
+
+ if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
+ !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
+ qemu_iovec_is_zero(qiov)) {
+ flags |= BDRV_REQ_ZERO_WRITE;
+ if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
+ flags |= BDRV_REQ_MAY_UNMAP;
+ }
+ }
+
+ if (ret < 0) {
+ /* Do nothing, write notifier decided to fail this request */
+ } else if (flags & BDRV_REQ_ZERO_WRITE) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
+ ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
+ } else if (bytes <= max_transfer) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV);
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
+ } else {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV);
+ while (bytes_remaining) {
+ int num = MIN(bytes_remaining, max_transfer);
+ QEMUIOVector local_qiov;
+ int local_flags = flags;
+
+ assert(num);
+ if (num < bytes_remaining && (flags & BDRV_REQ_FUA) &&
+ !(bs->supported_write_flags & BDRV_REQ_FUA)) {
+ /* If FUA is going to be emulated by flush, we only
+ * need to flush on the last iteration */
+ local_flags &= ~BDRV_REQ_FUA;
+ }
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
+
+ ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
+ num, &local_qiov, local_flags);
+ qemu_iovec_destroy(&local_qiov);
+ if (ret < 0) {
+ break;
+ }
+ bytes_remaining -= num;
+ }
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+
+ ++bs->write_gen;
+ bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
+
+ if (bs->wr_highest_offset < offset + bytes) {
+ bs->wr_highest_offset = offset + bytes;
+ }
+
+ if (ret >= 0) {
+ bs->total_sectors = MAX(bs->total_sectors, end_sector);
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs,
+ int64_t offset,
+ unsigned int bytes,
+ BdrvRequestFlags flags,
+ BdrvTrackedRequest *req)
+{
+ uint8_t *buf = NULL;
+ QEMUIOVector local_qiov;
+ struct iovec iov;
+ uint64_t align = bs->bl.request_alignment;
+ unsigned int head_padding_bytes, tail_padding_bytes;
+ int ret = 0;
+
+ head_padding_bytes = offset & (align - 1);
+ tail_padding_bytes = align - ((offset + bytes) & (align - 1));
+
+
+ assert(flags & BDRV_REQ_ZERO_WRITE);
+ if (head_padding_bytes || tail_padding_bytes) {
+ buf = qemu_blockalign(bs, align);
+ iov = (struct iovec) {
+ .iov_base = buf,
+ .iov_len = align,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+ }
+ if (head_padding_bytes) {
+ uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
+
+ /* RMW the unaligned part before head. */
+ mark_request_serialising(req, align);
+ wait_serialising_requests(req);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align,
+ align, &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+
+ memset(buf + head_padding_bytes, 0, zero_bytes);
+ ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align,
+ align, &local_qiov,
+ flags & ~BDRV_REQ_ZERO_WRITE);
+ if (ret < 0) {
+ goto fail;
+ }
+ offset += zero_bytes;
+ bytes -= zero_bytes;
+ }
+
+ assert(!bytes || (offset & (align - 1)) == 0);
+ if (bytes >= align) {
+ /* Write the aligned part in the middle. */
+ uint64_t aligned_bytes = bytes & ~(align - 1);
+ ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, align,
+ NULL, flags);
+ if (ret < 0) {
+ goto fail;
+ }
+ bytes -= aligned_bytes;
+ offset += aligned_bytes;
+ }
+
+ assert(!bytes || (offset & (align - 1)) == 0);
+ if (bytes) {
+ assert(align == tail_padding_bytes + bytes);
+ /* RMW the unaligned part after tail. */
+ mark_request_serialising(req, align);
+ wait_serialising_requests(req);
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ ret = bdrv_aligned_preadv(bs, req, offset, align,
+ align, &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+
+ memset(buf, 0, bytes);
+ ret = bdrv_aligned_pwritev(bs, req, offset, align, align,
+ &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
+ }
+fail:
+ qemu_vfree(buf);
+ return ret;
+
+}
+
+/*
+ * Handle a write request in coroutine context
+ */
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ BlockDriverState *bs = child->bs;
+ BdrvTrackedRequest req;
+ uint64_t align = bs->bl.request_alignment;
+ uint8_t *head_buf = NULL;
+ uint8_t *tail_buf = NULL;
+ QEMUIOVector local_qiov;
+ bool use_local_qiov = false;
+ int ret;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+ if (bs->read_only) {
+ return -EPERM;
+ }
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
+
+ ret = bdrv_check_byte_request(bs, offset, bytes);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /*
+ * Align write if necessary by performing a read-modify-write cycle.
+ * Pad qiov with the read parts and be sure to have a tracked request not
+ * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
+ */
+ tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
+
+ if (!qiov) {
+ ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req);
+ goto out;
+ }
+
+ if (offset & (align - 1)) {
+ QEMUIOVector head_qiov;
+ struct iovec head_iov;
+
+ mark_request_serialising(&req, align);
+ wait_serialising_requests(&req);
+
+ head_buf = qemu_blockalign(bs, align);
+ head_iov = (struct iovec) {
+ .iov_base = head_buf,
+ .iov_len = align,
+ };
+ qemu_iovec_init_external(&head_qiov, &head_iov, 1);
+
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ ret = bdrv_aligned_preadv(bs, &req, offset & ~(align - 1), align,
+ align, &head_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+
+ qemu_iovec_init(&local_qiov, qiov->niov + 2);
+ qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+
+ bytes += offset & (align - 1);
+ offset = offset & ~(align - 1);
+
+ /* We have read the tail already if the request is smaller
+ * than one aligned block.
+ */
+ if (bytes < align) {
+ qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
+ bytes = align;
+ }
+ }
+
+ if ((offset + bytes) & (align - 1)) {
+ QEMUIOVector tail_qiov;
+ struct iovec tail_iov;
+ size_t tail_bytes;
+ bool waited;
+
+ mark_request_serialising(&req, align);
+ waited = wait_serialising_requests(&req);
+ assert(!waited || !use_local_qiov);
+
+ tail_buf = qemu_blockalign(bs, align);
+ tail_iov = (struct iovec) {
+ .iov_base = tail_buf,
+ .iov_len = align,
+ };
+ qemu_iovec_init_external(&tail_qiov, &tail_iov, 1);
+
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ ret = bdrv_aligned_preadv(bs, &req, (offset + bytes) & ~(align - 1), align,
+ align, &tail_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+
+ if (!use_local_qiov) {
+ qemu_iovec_init(&local_qiov, qiov->niov + 1);
+ qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
+ use_local_qiov = true;
+ }
+
+ tail_bytes = (offset + bytes) & (align - 1);
+ qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
+
+ bytes = ROUND_UP(bytes, align);
+ }
+
+ ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, align,
+ use_local_qiov ? &local_qiov : qiov,
+ flags);
+
+fail:
+
+ if (use_local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ }
+ qemu_vfree(head_buf);
+ qemu_vfree(tail_buf);
+out:
+ tracked_request_end(&req);
+ return ret;
+}
+
+static int coroutine_fn bdrv_co_do_writev(BdrvChild *child,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
+ return -EINVAL;
+ }
+
+ return bdrv_co_pwritev(child, sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS, qiov, flags);
+}
+
+int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
+ int nb_sectors, QEMUIOVector *qiov)
+{
+ trace_bdrv_co_writev(child->bs, sector_num, nb_sectors);
+
+ return bdrv_co_do_writev(child, sector_num, nb_sectors, qiov, 0);
+}
+
+int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
+ int count, BdrvRequestFlags flags)
+{
+ trace_bdrv_co_pwrite_zeroes(child->bs, offset, count, flags);
+
+ if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
+ flags &= ~BDRV_REQ_MAY_UNMAP;
+ }
+
+ return bdrv_co_pwritev(child, offset, count, NULL,
+ BDRV_REQ_ZERO_WRITE | flags);
+}
+
+typedef struct BdrvCoGetBlockStatusData {
+ BlockDriverState *bs;
+ BlockDriverState *base;
+ BlockDriverState **file;
+ int64_t sector_num;
+ int nb_sectors;
+ int *pnum;
+ int64_t ret;
+ bool done;
+} BdrvCoGetBlockStatusData;
+
+/*
+ * Returns the allocation status of the specified sectors.
+ * Drivers not implementing the functionality are assumed to not support
+ * backing files, hence all their sectors are reported as allocated.
+ *
+ * If 'sector_num' is beyond the end of the disk image the return value is 0
+ * and 'pnum' is set to 0.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ * 'nb_sectors' is the max value 'pnum' should be set to. If nb_sectors goes
+ * beyond the end of the disk image it will be clamped.
+ *
+ * If returned value is positive and BDRV_BLOCK_OFFSET_VALID bit is set, 'file'
+ * points to the BDS which the sector range is allocated in.
+ */
+static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ int64_t total_sectors;
+ int64_t n;
+ int64_t ret, ret2;
+
+ total_sectors = bdrv_nb_sectors(bs);
+ if (total_sectors < 0) {
+ return total_sectors;
+ }
+
+ if (sector_num >= total_sectors) {
+ *pnum = 0;
+ return 0;
+ }
+
+ n = total_sectors - sector_num;
+ if (n < nb_sectors) {
+ nb_sectors = n;
+ }
+
+ if (!bs->drv->bdrv_co_get_block_status) {
+ *pnum = nb_sectors;
+ ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
+ if (bs->drv->protocol_name) {
+ ret |= BDRV_BLOCK_OFFSET_VALID | (sector_num * BDRV_SECTOR_SIZE);
+ }
+ return ret;
+ }
+
+ *file = NULL;
+ ret = bs->drv->bdrv_co_get_block_status(bs, sector_num, nb_sectors, pnum,
+ file);
+ if (ret < 0) {
+ *pnum = 0;
+ return ret;
+ }
+
+ if (ret & BDRV_BLOCK_RAW) {
+ assert(ret & BDRV_BLOCK_OFFSET_VALID);
+ return bdrv_get_block_status(bs->file->bs, ret >> BDRV_SECTOR_BITS,
+ *pnum, pnum, file);
+ }
+
+ if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
+ ret |= BDRV_BLOCK_ALLOCATED;
+ } else {
+ if (bdrv_unallocated_blocks_are_zero(bs)) {
+ ret |= BDRV_BLOCK_ZERO;
+ } else if (bs->backing) {
+ BlockDriverState *bs2 = bs->backing->bs;
+ int64_t nb_sectors2 = bdrv_nb_sectors(bs2);
+ if (nb_sectors2 >= 0 && sector_num >= nb_sectors2) {
+ ret |= BDRV_BLOCK_ZERO;
+ }
+ }
+ }
+
+ if (*file && *file != bs &&
+ (ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO) &&
+ (ret & BDRV_BLOCK_OFFSET_VALID)) {
+ BlockDriverState *file2;
+ int file_pnum;
+
+ ret2 = bdrv_co_get_block_status(*file, ret >> BDRV_SECTOR_BITS,
+ *pnum, &file_pnum, &file2);
+ if (ret2 >= 0) {
+ /* Ignore errors. This is just providing extra information, it
+ * is useful but not necessary.
+ */
+ if (!file_pnum) {
+ /* !file_pnum indicates an offset at or beyond the EOF; it is
+ * perfectly valid for the format block driver to point to such
+ * offsets, so catch it and mark everything as zero */
+ ret |= BDRV_BLOCK_ZERO;
+ } else {
+ /* Limit request to the range reported by the protocol driver */
+ *pnum = file_pnum;
+ ret |= (ret2 & BDRV_BLOCK_ZERO);
+ }
+ }
+ }
+
+ return ret;
+}
+
+static int64_t coroutine_fn bdrv_co_get_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors,
+ int *pnum,
+ BlockDriverState **file)
+{
+ BlockDriverState *p;
+ int64_t ret = 0;
+
+ assert(bs != base);
+ for (p = bs; p != base; p = backing_bs(p)) {
+ ret = bdrv_co_get_block_status(p, sector_num, nb_sectors, pnum, file);
+ if (ret < 0 || ret & BDRV_BLOCK_ALLOCATED) {
+ break;
+ }
+ /* [sector_num, pnum] unallocated on this layer, which could be only
+ * the first part of [sector_num, nb_sectors]. */
+ nb_sectors = MIN(nb_sectors, *pnum);
+ }
+ return ret;
+}
+
+/* Coroutine wrapper for bdrv_get_block_status_above() */
+static void coroutine_fn bdrv_get_block_status_above_co_entry(void *opaque)
+{
+ BdrvCoGetBlockStatusData *data = opaque;
+
+ data->ret = bdrv_co_get_block_status_above(data->bs, data->base,
+ data->sector_num,
+ data->nb_sectors,
+ data->pnum,
+ data->file);
+ data->done = true;
+}
+
+/*
+ * Synchronous wrapper around bdrv_co_get_block_status_above().
+ *
+ * See bdrv_co_get_block_status_above() for details.
+ */
+int64_t bdrv_get_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ Coroutine *co;
+ BdrvCoGetBlockStatusData data = {
+ .bs = bs,
+ .base = base,
+ .file = file,
+ .sector_num = sector_num,
+ .nb_sectors = nb_sectors,
+ .pnum = pnum,
+ .done = false,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_get_block_status_above_co_entry(&data);
+ } else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ co = qemu_coroutine_create(bdrv_get_block_status_above_co_entry,
+ &data);
+ qemu_coroutine_enter(co);
+ while (!data.done) {
+ aio_poll(aio_context, true);
+ }
+ }
+ return data.ret;
+}
+
+int64_t bdrv_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ return bdrv_get_block_status_above(bs, backing_bs(bs),
+ sector_num, nb_sectors, pnum, file);
+}
+
+int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BlockDriverState *file;
+ int64_t ret = bdrv_get_block_status(bs, sector_num, nb_sectors, pnum,
+ &file);
+ if (ret < 0) {
+ return ret;
+ }
+ return !!(ret & BDRV_BLOCK_ALLOCATED);
+}
+
+/*
+ * Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
+ *
+ * Return true if the given sector is allocated in any image between
+ * BASE and TOP (inclusive). BASE can be NULL to check if the given
+ * sector is allocated in any image of the chain. Return false otherwise.
+ *
+ * 'pnum' is set to the number of sectors (including and immediately following
+ * the specified sector) that are known to be in the same
+ * allocated/unallocated state.
+ *
+ */
+int bdrv_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BlockDriverState *intermediate;
+ int ret, n = nb_sectors;
+
+ intermediate = top;
+ while (intermediate && intermediate != base) {
+ int pnum_inter;
+ ret = bdrv_is_allocated(intermediate, sector_num, nb_sectors,
+ &pnum_inter);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ *pnum = pnum_inter;
+ return 1;
+ }
+
+ /*
+ * [sector_num, nb_sectors] is unallocated on top but intermediate
+ * might have
+ *
+ * [sector_num+x, nr_sectors] allocated.
+ */
+ if (n > pnum_inter &&
+ (intermediate == top ||
+ sector_num + pnum_inter < intermediate->total_sectors)) {
+ n = pnum_inter;
+ }
+
+ intermediate = backing_bs(intermediate);
+ }
+
+ *pnum = n;
+ return 0;
+}
+
+int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ BlockDriver *drv = bs->drv;
+ int ret;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ }
+ if (!drv->bdrv_write_compressed) {
+ return -ENOTSUP;
+ }
+ ret = bdrv_check_request(bs, sector_num, nb_sectors);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(QLIST_EMPTY(&bs->dirty_bitmaps));
+
+ return drv->bdrv_write_compressed(bs, sector_num, buf, nb_sectors);
+}
+
+typedef struct BdrvVmstateCo {
+ BlockDriverState *bs;
+ QEMUIOVector *qiov;
+ int64_t pos;
+ bool is_read;
+ int ret;
+} BdrvVmstateCo;
+
+static int coroutine_fn
+bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
+{
+ BlockDriver *drv = bs->drv;
+
+ if (!drv) {
+ return -ENOMEDIUM;
+ } else if (drv->bdrv_load_vmstate) {
+ return is_read ? drv->bdrv_load_vmstate(bs, qiov, pos)
+ : drv->bdrv_save_vmstate(bs, qiov, pos);
+ } else if (bs->file) {
+ return bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read);
+ }
+
+ return -ENOTSUP;
+}
+
+static void coroutine_fn bdrv_co_rw_vmstate_entry(void *opaque)
+{
+ BdrvVmstateCo *co = opaque;
+ co->ret = bdrv_co_rw_vmstate(co->bs, co->qiov, co->pos, co->is_read);
+}
+
+static inline int
+bdrv_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
+ bool is_read)
+{
+ if (qemu_in_coroutine()) {
+ return bdrv_co_rw_vmstate(bs, qiov, pos, is_read);
+ } else {
+ BdrvVmstateCo data = {
+ .bs = bs,
+ .qiov = qiov,
+ .pos = pos,
+ .is_read = is_read,
+ .ret = -EINPROGRESS,
+ };
+ Coroutine *co = qemu_coroutine_create(bdrv_co_rw_vmstate_entry, &data);
+
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
+ }
+ return data.ret;
+ }
+}
+
+int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
+ int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = (void *) buf,
+ .iov_len = size,
+ };
+ int ret;
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ ret = bdrv_writev_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return size;
+}
+
+int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+ return bdrv_rw_vmstate(bs, qiov, pos, false);
+}
+
+int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
+ int64_t pos, int size)
+{
+ QEMUIOVector qiov;
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = size,
+ };
+ int ret;
+
+ qemu_iovec_init_external(&qiov, &iov, 1);
+ ret = bdrv_readv_vmstate(bs, &qiov, pos);
+ if (ret < 0) {
+ return ret;
+ }
+
+ return size;
+}
+
+int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
+{
+ return bdrv_rw_vmstate(bs, qiov, pos, true);
+}
+
+/**************************************************************/
+/* async I/Os */
+
+BlockAIOCB *bdrv_aio_readv(BdrvChild *child, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_readv(child->bs, sector_num, nb_sectors, opaque);
+
+ assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
+ return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
+ 0, cb, opaque, false);
+}
+
+BlockAIOCB *bdrv_aio_writev(BdrvChild *child, int64_t sector_num,
+ QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_writev(child->bs, sector_num, nb_sectors, opaque);
+
+ assert(nb_sectors << BDRV_SECTOR_BITS == qiov->size);
+ return bdrv_co_aio_prw_vector(child, sector_num << BDRV_SECTOR_BITS, qiov,
+ 0, cb, opaque, true);
+}
+
+void bdrv_aio_cancel(BlockAIOCB *acb)
+{
+ qemu_aio_ref(acb);
+ bdrv_aio_cancel_async(acb);
+ while (acb->refcnt > 1) {
+ if (acb->aiocb_info->get_aio_context) {
+ aio_poll(acb->aiocb_info->get_aio_context(acb), true);
+ } else if (acb->bs) {
+ aio_poll(bdrv_get_aio_context(acb->bs), true);
+ } else {
+ abort();
+ }
+ }
+ qemu_aio_unref(acb);
+}
+
+/* Async version of aio cancel. The caller is not blocked if the acb implements
+ * cancel_async, otherwise we do nothing and let the request normally complete.
+ * In either case the completion callback must be called. */
+void bdrv_aio_cancel_async(BlockAIOCB *acb)
+{
+ if (acb->aiocb_info->cancel_async) {
+ acb->aiocb_info->cancel_async(acb);
+ }
+}
+
+/**************************************************************/
+/* async block device emulation */
+
+typedef struct BlockRequest {
+ union {
+ /* Used during read, write, trim */
+ struct {
+ int64_t offset;
+ int bytes;
+ int flags;
+ QEMUIOVector *qiov;
+ };
+ /* Used during ioctl */
+ struct {
+ int req;
+ void *buf;
+ };
+ };
+ BlockCompletionFunc *cb;
+ void *opaque;
+
+ int error;
+} BlockRequest;
+
+typedef struct BlockAIOCBCoroutine {
+ BlockAIOCB common;
+ BdrvChild *child;
+ BlockRequest req;
+ bool is_write;
+ bool need_bh;
+ bool *done;
+ QEMUBH* bh;
+} BlockAIOCBCoroutine;
+
+static const AIOCBInfo bdrv_em_co_aiocb_info = {
+ .aiocb_size = sizeof(BlockAIOCBCoroutine),
+};
+
+static void bdrv_co_complete(BlockAIOCBCoroutine *acb)
+{
+ if (!acb->need_bh) {
+ acb->common.cb(acb->common.opaque, acb->req.error);
+ qemu_aio_unref(acb);
+ }
+}
+
+static void bdrv_co_em_bh(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+
+ assert(!acb->need_bh);
+ qemu_bh_delete(acb->bh);
+ bdrv_co_complete(acb);
+}
+
+static void bdrv_co_maybe_schedule_bh(BlockAIOCBCoroutine *acb)
+{
+ acb->need_bh = false;
+ if (acb->req.error != -EINPROGRESS) {
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb);
+ qemu_bh_schedule(acb->bh);
+ }
+}
+
+/* Invoke bdrv_co_do_readv/bdrv_co_do_writev */
+static void coroutine_fn bdrv_co_do_rw(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+
+ if (!acb->is_write) {
+ acb->req.error = bdrv_co_preadv(acb->child, acb->req.offset,
+ acb->req.qiov->size, acb->req.qiov, acb->req.flags);
+ } else {
+ acb->req.error = bdrv_co_pwritev(acb->child, acb->req.offset,
+ acb->req.qiov->size, acb->req.qiov, acb->req.flags);
+ }
+
+ bdrv_co_complete(acb);
+}
+
+static BlockAIOCB *bdrv_co_aio_prw_vector(BdrvChild *child,
+ int64_t offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags,
+ BlockCompletionFunc *cb,
+ void *opaque,
+ bool is_write)
+{
+ Coroutine *co;
+ BlockAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, child->bs, cb, opaque);
+ acb->child = child;
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
+ acb->req.offset = offset;
+ acb->req.qiov = qiov;
+ acb->req.flags = flags;
+ acb->is_write = is_write;
+
+ co = qemu_coroutine_create(bdrv_co_do_rw, acb);
+ qemu_coroutine_enter(co);
+
+ bdrv_co_maybe_schedule_bh(acb);
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_flush_co_entry(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_flush(bs);
+ bdrv_co_complete(acb);
+}
+
+BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ trace_bdrv_aio_flush(bs, opaque);
+
+ Coroutine *co;
+ BlockAIOCBCoroutine *acb;
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
+
+ co = qemu_coroutine_create(bdrv_aio_flush_co_entry, acb);
+ qemu_coroutine_enter(co);
+
+ bdrv_co_maybe_schedule_bh(acb);
+ return &acb->common;
+}
+
+static void coroutine_fn bdrv_aio_pdiscard_co_entry(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+ BlockDriverState *bs = acb->common.bs;
+
+ acb->req.error = bdrv_co_pdiscard(bs, acb->req.offset, acb->req.bytes);
+ bdrv_co_complete(acb);
+}
+
+BlockAIOCB *bdrv_aio_pdiscard(BlockDriverState *bs, int64_t offset, int count,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ Coroutine *co;
+ BlockAIOCBCoroutine *acb;
+
+ trace_bdrv_aio_pdiscard(bs, offset, count, opaque);
+
+ acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque);
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
+ acb->req.offset = offset;
+ acb->req.bytes = count;
+ co = qemu_coroutine_create(bdrv_aio_pdiscard_co_entry, acb);
+ qemu_coroutine_enter(co);
+
+ bdrv_co_maybe_schedule_bh(acb);
+ return &acb->common;
+}
+
+void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlockAIOCB *acb;
+
+ acb = g_malloc(aiocb_info->aiocb_size);
+ acb->aiocb_info = aiocb_info;
+ acb->bs = bs;
+ acb->cb = cb;
+ acb->opaque = opaque;
+ acb->refcnt = 1;
+ return acb;
+}
+
+void qemu_aio_ref(void *p)
+{
+ BlockAIOCB *acb = p;
+ acb->refcnt++;
+}
+
+void qemu_aio_unref(void *p)
+{
+ BlockAIOCB *acb = p;
+ assert(acb->refcnt > 0);
+ if (--acb->refcnt == 0) {
+ g_free(acb);
+ }
+}
+
+/**************************************************************/
+/* Coroutine block device emulation */
+
+typedef struct FlushCo {
+ BlockDriverState *bs;
+ int ret;
+} FlushCo;
+
+
+static void coroutine_fn bdrv_flush_co_entry(void *opaque)
+{
+ FlushCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_flush(rwco->bs);
+}
+
+int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
+{
+ int ret;
+ BdrvTrackedRequest req;
+
+ if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
+ bdrv_is_sg(bs)) {
+ return 0;
+ }
+
+ tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+
+ int current_gen = bs->write_gen;
+
+ /* Wait until any previous flushes are completed */
+ while (bs->active_flush_req != NULL) {
+ qemu_co_queue_wait(&bs->flush_queue);
+ }
+
+ bs->active_flush_req = &req;
+
+ /* Write back all layers by calling one driver function */
+ if (bs->drv->bdrv_co_flush) {
+ ret = bs->drv->bdrv_co_flush(bs);
+ goto out;
+ }
+
+ /* Write back cached data to the OS even with cache=unsafe */
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
+ if (bs->drv->bdrv_co_flush_to_os) {
+ ret = bs->drv->bdrv_co_flush_to_os(bs);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+
+ /* But don't actually force it to the disk with cache=unsafe */
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
+ goto flush_parent;
+ }
+
+ /* Check if we really need to flush anything */
+ if (bs->flushed_gen == current_gen) {
+ goto flush_parent;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
+ if (bs->drv->bdrv_co_flush_to_disk) {
+ ret = bs->drv->bdrv_co_flush_to_disk(bs);
+ } else if (bs->drv->bdrv_aio_flush) {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ } else {
+ /*
+ * Some block drivers always operate in either writethrough or unsafe
+ * mode and don't support bdrv_flush therefore. Usually qemu doesn't
+ * know how the server works (because the behaviour is hardcoded or
+ * depends on server-side configuration), so we can't ensure that
+ * everything is safe on disk. Returning an error doesn't work because
+ * that would break guests even if the server operates in writethrough
+ * mode.
+ *
+ * Let's hope the user knows what he's doing.
+ */
+ ret = 0;
+ }
+
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
+ * in the case of cache=unsafe, so there are no useless flushes.
+ */
+flush_parent:
+ ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
+out:
+ /* Notify any pending flushes that we have completed */
+ bs->flushed_gen = current_gen;
+ bs->active_flush_req = NULL;
+ /* Return value is ignored - it's ok if wait queue is empty */
+ qemu_co_queue_next(&bs->flush_queue);
+
+ tracked_request_end(&req);
+ return ret;
+}
+
+int bdrv_flush(BlockDriverState *bs)
+{
+ Coroutine *co;
+ FlushCo flush_co = {
+ .bs = bs,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_flush_co_entry(&flush_co);
+ } else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ co = qemu_coroutine_create(bdrv_flush_co_entry, &flush_co);
+ qemu_coroutine_enter(co);
+ while (flush_co.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
+ }
+ }
+
+ return flush_co.ret;
+}
+
+typedef struct DiscardCo {
+ BlockDriverState *bs;
+ int64_t offset;
+ int count;
+ int ret;
+} DiscardCo;
+static void coroutine_fn bdrv_pdiscard_co_entry(void *opaque)
+{
+ DiscardCo *rwco = opaque;
+
+ rwco->ret = bdrv_co_pdiscard(rwco->bs, rwco->offset, rwco->count);
+}
+
+int coroutine_fn bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset,
+ int count)
+{
+ BdrvTrackedRequest req;
+ int max_pdiscard, ret;
+ int head, align;
+
+ if (!bs->drv) {
+ return -ENOMEDIUM;
+ }
+
+ ret = bdrv_check_byte_request(bs, offset, count);
+ if (ret < 0) {
+ return ret;
+ } else if (bs->read_only) {
+ return -EPERM;
+ }
+ assert(!(bs->open_flags & BDRV_O_INACTIVE));
+
+ /* Do nothing if disabled. */
+ if (!(bs->open_flags & BDRV_O_UNMAP)) {
+ return 0;
+ }
+
+ if (!bs->drv->bdrv_co_pdiscard && !bs->drv->bdrv_aio_pdiscard) {
+ return 0;
+ }
+
+ /* Discard is advisory, so ignore any unaligned head or tail */
+ align = MAX(bs->bl.pdiscard_alignment, bs->bl.request_alignment);
+ assert(align % bs->bl.request_alignment == 0);
+ head = offset % align;
+ if (head) {
+ head = MIN(count, align - head);
+ count -= head;
+ offset += head;
+ }
+ count = QEMU_ALIGN_DOWN(count, align);
+ if (!count) {
+ return 0;
+ }
+
+ tracked_request_begin(&req, bs, offset, count, BDRV_TRACKED_DISCARD);
+
+ ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req);
+ if (ret < 0) {
+ goto out;
+ }
+
+ max_pdiscard = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_pdiscard, INT_MAX),
+ align);
+ assert(max_pdiscard);
+
+ while (count > 0) {
+ int ret;
+ int num = MIN(count, max_pdiscard);
+
+ if (bs->drv->bdrv_co_pdiscard) {
+ ret = bs->drv->bdrv_co_pdiscard(bs, offset, num);
+ } else {
+ BlockAIOCB *acb;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+
+ acb = bs->drv->bdrv_aio_pdiscard(bs, offset, num,
+ bdrv_co_io_em_complete, &co);
+ if (acb == NULL) {
+ ret = -EIO;
+ goto out;
+ } else {
+ qemu_coroutine_yield();
+ ret = co.ret;
+ }
+ }
+ if (ret && ret != -ENOTSUP) {
+ goto out;
+ }
+
+ offset += num;
+ count -= num;
+ }
+ ret = 0;
+out:
+ ++bs->write_gen;
+ bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
+ req.bytes >> BDRV_SECTOR_BITS);
+ tracked_request_end(&req);
+ return ret;
+}
+
+int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int count)
+{
+ Coroutine *co;
+ DiscardCo rwco = {
+ .bs = bs,
+ .offset = offset,
+ .count = count,
+ .ret = NOT_DONE,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_pdiscard_co_entry(&rwco);
+ } else {
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+
+ co = qemu_coroutine_create(bdrv_pdiscard_co_entry, &rwco);
+ qemu_coroutine_enter(co);
+ while (rwco.ret == NOT_DONE) {
+ aio_poll(aio_context, true);
+ }
+ }
+
+ return rwco.ret;
+}
+
+static int bdrv_co_do_ioctl(BlockDriverState *bs, int req, void *buf)
+{
+ BlockDriver *drv = bs->drv;
+ BdrvTrackedRequest tracked_req;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ BlockAIOCB *acb;
+
+ tracked_request_begin(&tracked_req, bs, 0, 0, BDRV_TRACKED_IOCTL);
+ if (!drv || !drv->bdrv_aio_ioctl) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+
+ acb = drv->bdrv_aio_ioctl(bs, req, buf, bdrv_co_io_em_complete, &co);
+ if (!acb) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+ qemu_coroutine_yield();
+out:
+ tracked_request_end(&tracked_req);
+ return co.ret;
+}
+
+typedef struct {
+ BlockDriverState *bs;
+ int req;
+ void *buf;
+ int ret;
+} BdrvIoctlCoData;
+
+static void coroutine_fn bdrv_co_ioctl_entry(void *opaque)
+{
+ BdrvIoctlCoData *data = opaque;
+ data->ret = bdrv_co_do_ioctl(data->bs, data->req, data->buf);
+}
+
+/* needed for generic scsi interface */
+int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
+{
+ BdrvIoctlCoData data = {
+ .bs = bs,
+ .req = req,
+ .buf = buf,
+ .ret = -EINPROGRESS,
+ };
+
+ if (qemu_in_coroutine()) {
+ /* Fast-path if already in coroutine context */
+ bdrv_co_ioctl_entry(&data);
+ } else {
+ Coroutine *co = qemu_coroutine_create(bdrv_co_ioctl_entry, &data);
+
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(bdrv_get_aio_context(bs), true);
+ }
+ }
+ return data.ret;
+}
+
+static void coroutine_fn bdrv_co_aio_ioctl_entry(void *opaque)
+{
+ BlockAIOCBCoroutine *acb = opaque;
+ acb->req.error = bdrv_co_do_ioctl(acb->common.bs,
+ acb->req.req, acb->req.buf);
+ bdrv_co_complete(acb);
+}
+
+BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs,
+ unsigned long int req, void *buf,
+ BlockCompletionFunc *cb, void *opaque)
+{
+ BlockAIOCBCoroutine *acb = qemu_aio_get(&bdrv_em_co_aiocb_info,
+ bs, cb, opaque);
+ Coroutine *co;
+
+ acb->need_bh = true;
+ acb->req.error = -EINPROGRESS;
+ acb->req.req = req;
+ acb->req.buf = buf;
+ co = qemu_coroutine_create(bdrv_co_aio_ioctl_entry, acb);
+ qemu_coroutine_enter(co);
+
+ bdrv_co_maybe_schedule_bh(acb);
+ return &acb->common;
+}
+
+void *qemu_blockalign(BlockDriverState *bs, size_t size)
+{
+ return qemu_memalign(bdrv_opt_mem_align(bs), size);
+}
+
+void *qemu_blockalign0(BlockDriverState *bs, size_t size)
+{
+ return memset(qemu_blockalign(bs, size), 0, size);
+}
+
+void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
+{
+ size_t align = bdrv_opt_mem_align(bs);
+
+ /* Ensure that NULL is never returned on success */
+ assert(align > 0);
+ if (size == 0) {
+ size = align;
+ }
+
+ return qemu_try_memalign(align, size);
+}
+
+void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
+{
+ void *mem = qemu_try_blockalign(bs, size);
+
+ if (mem) {
+ memset(mem, 0, size);
+ }
+
+ return mem;
+}
+
+/*
+ * Check if all memory in this vector is sector aligned.
+ */
+bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
+{
+ int i;
+ size_t alignment = bdrv_min_mem_align(bs);
+
+ for (i = 0; i < qiov->niov; i++) {
+ if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
+ return false;
+ }
+ if (qiov->iov[i].iov_len % alignment) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void bdrv_add_before_write_notifier(BlockDriverState *bs,
+ NotifierWithReturn *notifier)
+{
+ notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
+}
+
+void bdrv_io_plug(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_plug(child->bs);
+ }
+
+ if (bs->io_plugged++ == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
+ }
+}
+
+void bdrv_io_unplug(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ assert(bs->io_plugged);
+ if (--bs->io_plugged == 0 && bs->io_plug_disabled == 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplug(child->bs);
+ }
+}
+
+void bdrv_io_unplugged_begin(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ if (bs->io_plug_disabled++ == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_unplug) {
+ drv->bdrv_io_unplug(bs);
+ }
+ }
+
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_begin(child->bs);
+ }
+}
+
+void bdrv_io_unplugged_end(BlockDriverState *bs)
+{
+ BdrvChild *child;
+
+ assert(bs->io_plug_disabled);
+ QLIST_FOREACH(child, &bs->children, next) {
+ bdrv_io_unplugged_end(child->bs);
+ }
+
+ if (--bs->io_plug_disabled == 0 && bs->io_plugged > 0) {
+ BlockDriver *drv = bs->drv;
+ if (drv && drv->bdrv_io_plug) {
+ drv->bdrv_io_plug(bs);
+ }
+ }
+}
diff --git a/block/iscsi.c b/block/iscsi.c
index ed375fc..95ce9e1 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -2,7 +2,7 @@
* QEMU Block driver for iSCSI images
*
* Copyright (c) 2010-2011 Ronnie Sahlberg <ronniesahlberg@gmail.com>
- * Copyright (c) 2012-2014 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2012-2016 Peter Lieven <pl@kamp.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -23,7 +23,7 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include <poll.h>
#include <math.h>
@@ -38,13 +38,14 @@
#include "qemu/iov.h"
#include "sysemu/sysemu.h"
#include "qmp-commands.h"
+#include "qapi/qmp/qstring.h"
+#include "crypto/secret.h"
#include <iscsi/iscsi.h>
#include <iscsi/scsi-lowlevel.h>
#ifdef __linux__
#include <scsi/sg.h>
-#include <block/scsi.h>
#endif
typedef struct IscsiLun {
@@ -56,15 +57,35 @@
uint64_t num_blocks;
int events;
QEMUTimer *nop_timer;
- uint8_t lbpme;
- uint8_t lbprz;
- uint8_t has_write_same;
+ QEMUTimer *event_timer;
struct scsi_inquiry_logical_block_provisioning lbp;
struct scsi_inquiry_block_limits bl;
unsigned char *zeroblock;
- unsigned long *allocationmap;
+ /* The allocmap tracks which clusters (pages) on the iSCSI target are
+ * allocated and which are not. In case a target returns zeros for
+ * unallocated pages (iscsilun->lprz) we can directly return zeros instead
+ * of reading zeros over the wire if a read request falls within an
+ * unallocated block. As there are 3 possible states we need 2 bitmaps to
+ * track. allocmap_valid keeps track if QEMU's information about a page is
+ * valid. allocmap tracks if a page is allocated or not. In case QEMU has no
+ * valid information about a page the corresponding allocmap entry should be
+ * switched to unallocated as well to force a new lookup of the allocation
+ * status as lookups are generally skipped if a page is suspect to be
+ * allocated. If a iSCSI target is opened with cache.direct = on the
+ * allocmap_valid does not exist turning all cached information invalid so
+ * that a fresh lookup is made for any page even if allocmap entry returns
+ * it's unallocated. */
+ unsigned long *allocmap;
+ unsigned long *allocmap_valid;
+ long allocmap_size;
int cluster_sectors;
bool use_16_for_rw;
+ bool write_protected;
+ bool lbpme;
+ bool lbprz;
+ bool dpofua;
+ bool has_write_same;
+ bool request_timed_out;
} IscsiLun;
typedef struct IscsiTask {
@@ -77,6 +98,7 @@
QEMUBH *bh;
IscsiLun *iscsilun;
QEMUTimer retry_timer;
+ int err_code;
} IscsiTask;
typedef struct IscsiAIOCB {
@@ -89,15 +111,18 @@
int status;
int64_t sector_num;
int nb_sectors;
+ int ret;
#ifdef __linux__
sg_io_hdr_t *ioh;
#endif
} IscsiAIOCB;
+/* libiscsi uses time_t so its enough to process events every second */
+#define EVENT_INTERVAL 1000
#define NOP_INTERVAL 5000
#define MAX_NOP_FAILURES 3
#define ISCSI_CMD_RETRIES ARRAY_SIZE(iscsi_retry_times)
-static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048};
+static const unsigned iscsi_retry_times[] = {8, 32, 128, 512, 2048, 8192, 32768};
/* this threshold is a trade-off knob to choose between
* the potential additional overhead of an extra GET_LBA_STATUS request
@@ -143,7 +168,7 @@
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
qemu_bh_delete(iTask->bh);
- qemu_coroutine_enter(iTask->co, NULL);
+ qemu_coroutine_enter(iTask->co);
}
static void iscsi_retry_timer_expired(void *opaque)
@@ -151,7 +176,7 @@
struct IscsiTask *iTask = opaque;
iTask->complete = 1;
if (iTask->co) {
- qemu_coroutine_enter(iTask->co, NULL);
+ qemu_coroutine_enter(iTask->co);
}
}
@@ -160,6 +185,70 @@
return -mean * log((double)rand() / RAND_MAX);
}
+/* SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST was introduced in
+ * libiscsi 1.10.0, together with other constants we need. Use it as
+ * a hint that we have to define them ourselves if needed, to keep the
+ * minimum required libiscsi version at 1.9.0. We use an ASCQ macro for
+ * the test because SCSI_STATUS_* is an enum.
+ *
+ * To guard against future changes where SCSI_SENSE_ASCQ_* also becomes
+ * an enum, check against the LIBISCSI_API_VERSION macro, which was
+ * introduced in 1.11.0. If it is present, there is no need to define
+ * anything.
+ */
+#if !defined(SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST) && \
+ !defined(LIBISCSI_API_VERSION)
+#define SCSI_STATUS_TASK_SET_FULL 0x28
+#define SCSI_STATUS_TIMEOUT 0x0f000002
+#define SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST 0x2600
+#define SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR 0x1a00
+#endif
+
+static int iscsi_translate_sense(struct scsi_sense *sense)
+{
+ int ret;
+
+ switch (sense->key) {
+ case SCSI_SENSE_NOT_READY:
+ return -EBUSY;
+ case SCSI_SENSE_DATA_PROTECTION:
+ return -EACCES;
+ case SCSI_SENSE_COMMAND_ABORTED:
+ return -ECANCELED;
+ case SCSI_SENSE_ILLEGAL_REQUEST:
+ /* Parse ASCQ */
+ break;
+ default:
+ return -EIO;
+ }
+ switch (sense->ascq) {
+ case SCSI_SENSE_ASCQ_PARAMETER_LIST_LENGTH_ERROR:
+ case SCSI_SENSE_ASCQ_INVALID_OPERATION_CODE:
+ case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_CDB:
+ case SCSI_SENSE_ASCQ_INVALID_FIELD_IN_PARAMETER_LIST:
+ ret = -EINVAL;
+ break;
+ case SCSI_SENSE_ASCQ_LBA_OUT_OF_RANGE:
+ ret = -ENOSPC;
+ break;
+ case SCSI_SENSE_ASCQ_LOGICAL_UNIT_NOT_SUPPORTED:
+ ret = -ENOTSUP;
+ break;
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT:
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_CLOSED:
+ case SCSI_SENSE_ASCQ_MEDIUM_NOT_PRESENT_TRAY_OPEN:
+ ret = -ENOMEDIUM;
+ break;
+ case SCSI_SENSE_ASCQ_WRITE_PROTECTED:
+ ret = -EACCES;
+ break;
+ default:
+ ret = -EIO;
+ break;
+ }
+ return ret;
+}
+
static void
iscsi_co_generic_cb(struct iscsi_context *iscsi, int status,
void *command_data, void *opaque)
@@ -180,10 +269,19 @@
iTask->do_retry = 1;
goto out;
}
- if (status == SCSI_STATUS_BUSY) {
+ if (status == SCSI_STATUS_BUSY ||
+ status == SCSI_STATUS_TIMEOUT ||
+ status == SCSI_STATUS_TASK_SET_FULL) {
unsigned retry_time =
exp_random(iscsi_retry_times[iTask->retries - 1]);
- error_report("iSCSI Busy (retry #%u in %u ms): %s",
+ if (status == SCSI_STATUS_TIMEOUT) {
+ /* make sure the request is rescheduled AFTER the
+ * reconnect is initiated */
+ retry_time = EVENT_INTERVAL * 2;
+ iTask->iscsilun->request_timed_out = true;
+ }
+ error_report("iSCSI Busy/TaskSetFull/TimeOut"
+ " (retry #%u in %u ms): %s",
iTask->retries, retry_time,
iscsi_get_error(iscsi));
aio_timer_init(iTask->iscsilun->aio_context,
@@ -195,6 +293,7 @@
return;
}
}
+ iTask->err_code = iscsi_translate_sense(&task->sense);
error_report("iSCSI Failure: %s", iscsi_get_error(iscsi));
}
@@ -255,21 +354,36 @@
iscsi_set_events(IscsiLun *iscsilun)
{
struct iscsi_context *iscsi = iscsilun->iscsi;
- int ev;
+ int ev = iscsi_which_events(iscsi);
- /* We always register a read handler. */
- ev = POLLIN;
- ev |= iscsi_which_events(iscsi);
if (ev != iscsilun->events) {
- aio_set_fd_handler(iscsilun->aio_context,
- iscsi_get_fd(iscsi),
- iscsi_process_read,
+ aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsi),
+ false,
+ (ev & POLLIN) ? iscsi_process_read : NULL,
(ev & POLLOUT) ? iscsi_process_write : NULL,
iscsilun);
+ iscsilun->events = ev;
+ }
+}
+static void iscsi_timed_check_events(void *opaque)
+{
+ IscsiLun *iscsilun = opaque;
+
+ /* check for timed out requests */
+ iscsi_service(iscsilun->iscsi, 0);
+
+ if (iscsilun->request_timed_out) {
+ iscsilun->request_timed_out = false;
+ iscsi_reconnect(iscsilun->iscsi);
}
- iscsilun->events = ev;
+ /* newer versions of libiscsi may return zero events. Ensure we are able
+ * to return to service once this situation changes. */
+ iscsi_set_events(iscsilun);
+
+ timer_mod(iscsilun->event_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
}
static void
@@ -302,70 +416,178 @@
return sector * BDRV_SECTOR_SIZE / iscsilun->block_size;
}
-static bool is_request_lun_aligned(int64_t sector_num, int nb_sectors,
- IscsiLun *iscsilun)
+static bool is_byte_request_lun_aligned(int64_t offset, int count,
+ IscsiLun *iscsilun)
{
- if ((sector_num * BDRV_SECTOR_SIZE) % iscsilun->block_size ||
- (nb_sectors * BDRV_SECTOR_SIZE) % iscsilun->block_size) {
- error_report("iSCSI misaligned request: "
- "iscsilun->block_size %u, sector_num %" PRIi64
- ", nb_sectors %d",
- iscsilun->block_size, sector_num, nb_sectors);
- return 0;
+ if (offset % iscsilun->block_size || count % iscsilun->block_size) {
+ error_report("iSCSI misaligned request: "
+ "iscsilun->block_size %u, offset %" PRIi64
+ ", count %d",
+ iscsilun->block_size, offset, count);
+ return false;
}
- return 1;
+ return true;
}
-static unsigned long *iscsi_allocationmap_init(IscsiLun *iscsilun)
+static bool is_sector_request_lun_aligned(int64_t sector_num, int nb_sectors,
+ IscsiLun *iscsilun)
{
- return bitmap_try_new(DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks,
- iscsilun),
- iscsilun->cluster_sectors));
+ assert(nb_sectors <= BDRV_REQUEST_MAX_SECTORS);
+ return is_byte_request_lun_aligned(sector_num << BDRV_SECTOR_BITS,
+ nb_sectors << BDRV_SECTOR_BITS,
+ iscsilun);
}
-static void iscsi_allocationmap_set(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
+static void iscsi_allocmap_free(IscsiLun *iscsilun)
{
- if (iscsilun->allocationmap == NULL) {
+ g_free(iscsilun->allocmap);
+ g_free(iscsilun->allocmap_valid);
+ iscsilun->allocmap = NULL;
+ iscsilun->allocmap_valid = NULL;
+}
+
+
+static int iscsi_allocmap_init(IscsiLun *iscsilun, int open_flags)
+{
+ iscsi_allocmap_free(iscsilun);
+
+ iscsilun->allocmap_size =
+ DIV_ROUND_UP(sector_lun2qemu(iscsilun->num_blocks, iscsilun),
+ iscsilun->cluster_sectors);
+
+ iscsilun->allocmap = bitmap_try_new(iscsilun->allocmap_size);
+ if (!iscsilun->allocmap) {
+ return -ENOMEM;
+ }
+
+ if (open_flags & BDRV_O_NOCACHE) {
+ /* in case that cache.direct = on all allocmap entries are
+ * treated as invalid to force a relookup of the block
+ * status on every read request */
+ return 0;
+ }
+
+ iscsilun->allocmap_valid = bitmap_try_new(iscsilun->allocmap_size);
+ if (!iscsilun->allocmap_valid) {
+ /* if we are under memory pressure free the allocmap as well */
+ iscsi_allocmap_free(iscsilun);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+iscsi_allocmap_update(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors, bool allocated, bool valid)
+{
+ int64_t cl_num_expanded, nb_cls_expanded, cl_num_shrunk, nb_cls_shrunk;
+
+ if (iscsilun->allocmap == NULL) {
return;
}
- bitmap_set(iscsilun->allocationmap,
- sector_num / iscsilun->cluster_sectors,
- DIV_ROUND_UP(nb_sectors, iscsilun->cluster_sectors));
-}
+ /* expand to entirely contain all affected clusters */
+ cl_num_expanded = sector_num / iscsilun->cluster_sectors;
+ nb_cls_expanded = DIV_ROUND_UP(sector_num + nb_sectors,
+ iscsilun->cluster_sectors) - cl_num_expanded;
+ /* shrink to touch only completely contained clusters */
+ cl_num_shrunk = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
+ nb_cls_shrunk = (sector_num + nb_sectors) / iscsilun->cluster_sectors
+ - cl_num_shrunk;
+ if (allocated) {
+ bitmap_set(iscsilun->allocmap, cl_num_expanded, nb_cls_expanded);
+ } else {
+ bitmap_clear(iscsilun->allocmap, cl_num_shrunk, nb_cls_shrunk);
+ }
-static void iscsi_allocationmap_clear(IscsiLun *iscsilun, int64_t sector_num,
- int nb_sectors)
-{
- int64_t cluster_num, nb_clusters;
- if (iscsilun->allocationmap == NULL) {
+ if (iscsilun->allocmap_valid == NULL) {
return;
}
- cluster_num = DIV_ROUND_UP(sector_num, iscsilun->cluster_sectors);
- nb_clusters = (sector_num + nb_sectors) / iscsilun->cluster_sectors
- - cluster_num;
- if (nb_clusters > 0) {
- bitmap_clear(iscsilun->allocationmap, cluster_num, nb_clusters);
+ if (valid) {
+ bitmap_set(iscsilun->allocmap_valid, cl_num_shrunk, nb_cls_shrunk);
+ } else {
+ bitmap_clear(iscsilun->allocmap_valid, cl_num_expanded,
+ nb_cls_expanded);
}
}
-static int coroutine_fn iscsi_co_writev(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- QEMUIOVector *iov)
+static void
+iscsi_allocmap_set_allocated(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, true, true);
+}
+
+static void
+iscsi_allocmap_set_unallocated(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ /* Note: if cache.direct=on the fifth argument to iscsi_allocmap_update
+ * is ignored, so this will in effect be an iscsi_allocmap_set_invalid.
+ */
+ iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, true);
+}
+
+static void iscsi_allocmap_set_invalid(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ iscsi_allocmap_update(iscsilun, sector_num, nb_sectors, false, false);
+}
+
+static void iscsi_allocmap_invalidate(IscsiLun *iscsilun)
+{
+ if (iscsilun->allocmap) {
+ bitmap_zero(iscsilun->allocmap, iscsilun->allocmap_size);
+ }
+ if (iscsilun->allocmap_valid) {
+ bitmap_zero(iscsilun->allocmap_valid, iscsilun->allocmap_size);
+ }
+}
+
+static inline bool
+iscsi_allocmap_is_allocated(IscsiLun *iscsilun, int64_t sector_num,
+ int nb_sectors)
+{
+ unsigned long size;
+ if (iscsilun->allocmap == NULL) {
+ return true;
+ }
+ size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ return !(find_next_bit(iscsilun->allocmap, size,
+ sector_num / iscsilun->cluster_sectors) == size);
+}
+
+static inline bool iscsi_allocmap_is_valid(IscsiLun *iscsilun,
+ int64_t sector_num, int nb_sectors)
+{
+ unsigned long size;
+ if (iscsilun->allocmap_valid == NULL) {
+ return false;
+ }
+ size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
+ return (find_next_zero_bit(iscsilun->allocmap_valid, size,
+ sector_num / iscsilun->cluster_sectors) == size);
+}
+
+static int coroutine_fn
+iscsi_co_writev_flags(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
+ QEMUIOVector *iov, int flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
uint64_t lba;
uint32_t num_sectors;
+ bool fua = flags & BDRV_REQ_FUA;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ if (fua) {
+ assert(iscsilun->dpofua);
+ }
+ if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
- if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
- error_report("iSCSI Error: Write of %d sectors exceeds max_xfer_len "
- "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
- return -EINVAL;
+ if (bs->bl.max_transfer) {
+ assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
lba = sector_qemu2lun(sector_num, iscsilun);
@@ -375,12 +597,12 @@
if (iscsilun->use_16_for_rw) {
iTask.task = iscsi_write16_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
- iscsilun->block_size, 0, 0, 0, 0, 0,
+ iscsilun->block_size, 0, 0, fua, 0, 0,
iscsi_co_generic_cb, &iTask);
} else {
iTask.task = iscsi_write10_task(iscsilun->iscsi, iscsilun->lun, lba,
NULL, num_sectors * iscsilun->block_size,
- iscsilun->block_size, 0, 0, 0, 0, 0,
+ iscsilun->block_size, 0, 0, fua, 0, 0,
iscsi_co_generic_cb, &iTask);
}
if (iTask.task == NULL) {
@@ -404,30 +626,21 @@
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ iscsi_allocmap_set_invalid(iscsilun, sector_num, nb_sectors);
+ return iTask.err_code;
}
- iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_allocated(iscsilun, sector_num, nb_sectors);
return 0;
}
-static bool iscsi_allocationmap_is_allocated(IscsiLun *iscsilun,
- int64_t sector_num, int nb_sectors)
-{
- unsigned long size;
- if (iscsilun->allocationmap == NULL) {
- return true;
- }
- size = DIV_ROUND_UP(sector_num + nb_sectors, iscsilun->cluster_sectors);
- return !(find_next_bit(iscsilun->allocationmap, size,
- sector_num / iscsilun->cluster_sectors) == size);
-}
static int64_t coroutine_fn iscsi_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
IscsiLun *iscsilun = bs->opaque;
struct scsi_get_lba_status *lbas = NULL;
@@ -437,7 +650,7 @@
iscsi_co_init_iscsitask(iscsilun, &iTask);
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
ret = -EINVAL;
goto out;
}
@@ -448,7 +661,7 @@
*pnum = nb_sectors;
/* LUN does not support logical block provisioning */
- if (iscsilun->lbpme == 0) {
+ if (!iscsilun->lbpme) {
goto out;
}
@@ -507,9 +720,9 @@
}
if (ret & BDRV_BLOCK_ZERO) {
- iscsi_allocationmap_clear(iscsilun, sector_num, *pnum);
+ iscsi_allocmap_set_unallocated(iscsilun, sector_num, *pnum);
} else {
- iscsi_allocationmap_set(iscsilun, sector_num, *pnum);
+ iscsi_allocmap_set_allocated(iscsilun, sector_num, *pnum);
}
if (*pnum > nb_sectors) {
@@ -519,6 +732,9 @@
if (iTask.task != NULL) {
scsi_free_scsi_task(iTask.task);
}
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ *file = bs;
+ }
return ret;
}
@@ -531,25 +747,40 @@
uint64_t lba;
uint32_t num_sectors;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
+ if (!is_sector_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
return -EINVAL;
}
- if (bs->bl.max_transfer_length && nb_sectors > bs->bl.max_transfer_length) {
- error_report("iSCSI Error: Read of %d sectors exceeds max_xfer_len "
- "of %d sectors", nb_sectors, bs->bl.max_transfer_length);
- return -EINVAL;
+ if (bs->bl.max_transfer) {
+ assert(nb_sectors << BDRV_SECTOR_BITS <= bs->bl.max_transfer);
}
- if (iscsilun->lbprz && nb_sectors >= ISCSI_CHECKALLOC_THRES &&
- !iscsi_allocationmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
- int64_t ret;
+ /* if cache.direct is off and we have a valid entry in our allocation map
+ * we can skip checking the block status and directly return zeroes if
+ * the request falls within an unallocated area */
+ if (iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
+ qemu_iovec_memset(iov, 0, 0x00, iov->size);
+ return 0;
+ }
+
+ if (nb_sectors >= ISCSI_CHECKALLOC_THRES &&
+ !iscsi_allocmap_is_valid(iscsilun, sector_num, nb_sectors) &&
+ !iscsi_allocmap_is_allocated(iscsilun, sector_num, nb_sectors)) {
int pnum;
- ret = iscsi_co_get_block_status(bs, sector_num, INT_MAX, &pnum);
+ BlockDriverState *file;
+ /* check the block status from the beginning of the cluster
+ * containing the start sector */
+ int64_t ret = iscsi_co_get_block_status(bs,
+ sector_num - sector_num % iscsilun->cluster_sectors,
+ BDRV_REQUEST_MAX_SECTORS, &pnum, &file);
if (ret < 0) {
return ret;
}
- if (ret & BDRV_BLOCK_ZERO && pnum >= nb_sectors) {
+ /* if the whole request falls into an unallocated area we can avoid
+ * to read and directly return zeroes instead */
+ if (ret & BDRV_BLOCK_ZERO &&
+ pnum >= nb_sectors + sector_num % iscsilun->cluster_sectors) {
qemu_iovec_memset(iov, 0, 0x00, iov->size);
return 0;
}
@@ -593,7 +824,7 @@
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
return 0;
@@ -604,12 +835,7 @@
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
- if (bs->sg) {
- return 0;
- }
-
iscsi_co_init_iscsitask(iscsilun, &iTask);
-
retry:
if (iscsi_synchronizecache10_task(iscsilun->iscsi, iscsilun->lun, 0, 0, 0,
0, iscsi_co_generic_cb, &iTask) == NULL) {
@@ -632,7 +858,7 @@
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
return 0;
@@ -652,12 +878,13 @@
if (status < 0) {
error_report("Failed to ioctl(SG_IO) to iSCSI lun. %s",
iscsi_get_error(iscsi));
- acb->status = -EIO;
+ acb->status = iscsi_translate_sense(&acb->task->sense);
}
acb->ioh->driver_status = 0;
acb->ioh->host_status = 0;
acb->ioh->resid = 0;
+ acb->ioh->status = status;
#define SG_ERR_DRIVER_SENSE 0x08
@@ -675,6 +902,38 @@
iscsi_schedule_bh(acb);
}
+static void iscsi_ioctl_bh_completion(void *opaque)
+{
+ IscsiAIOCB *acb = opaque;
+
+ qemu_bh_delete(acb->bh);
+ acb->common.cb(acb->common.opaque, acb->ret);
+ qemu_aio_unref(acb);
+}
+
+static void iscsi_ioctl_handle_emulated(IscsiAIOCB *acb, int req, void *buf)
+{
+ BlockDriverState *bs = acb->common.bs;
+ IscsiLun *iscsilun = bs->opaque;
+ int ret = 0;
+
+ switch (req) {
+ case SG_GET_VERSION_NUM:
+ *(int *)buf = 30000;
+ break;
+ case SG_GET_SCSI_ID:
+ ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ assert(!acb->bh);
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs),
+ iscsi_ioctl_bh_completion, acb);
+ acb->ret = ret;
+ qemu_bh_schedule(acb->bh);
+}
+
static BlockAIOCB *iscsi_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque)
@@ -684,8 +943,6 @@
struct iscsi_data data;
IscsiAIOCB *acb;
- assert(req == SG_IO);
-
acb = qemu_aio_get(&iscsi_aiocb_info, bs, cb, opaque);
acb->iscsilun = iscsilun;
@@ -694,6 +951,18 @@
acb->buf = NULL;
acb->ioh = buf;
+ if (req != SG_IO) {
+ iscsi_ioctl_handle_emulated(acb, req, buf);
+ return &acb->common;
+ }
+
+ if (acb->ioh->cmd_len > SCSI_CDB_MAX_SIZE) {
+ error_report("iSCSI: ioctl error CDB exceeds max size (%d > %d)",
+ acb->ioh->cmd_len, SCSI_CDB_MAX_SIZE);
+ qemu_aio_unref(acb);
+ return NULL;
+ }
+
acb->task = malloc(sizeof(struct scsi_task));
if (acb->task == NULL) {
error_report("iSCSI: Failed to allocate task for scsi command. %s",
@@ -758,38 +1027,6 @@
return &acb->common;
}
-static void ioctl_cb(void *opaque, int status)
-{
- int *p_status = opaque;
- *p_status = status;
-}
-
-static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- IscsiLun *iscsilun = bs->opaque;
- int status;
-
- switch (req) {
- case SG_GET_VERSION_NUM:
- *(int *)buf = 30000;
- break;
- case SG_GET_SCSI_ID:
- ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
- break;
- case SG_IO:
- status = -EINPROGRESS;
- iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
-
- while (status == -EINPROGRESS) {
- aio_poll(iscsilun->aio_context, true);
- }
-
- return 0;
- default:
- return -1;
- }
- return 0;
-}
#endif
static int64_t
@@ -805,29 +1042,26 @@
}
static int
-coroutine_fn iscsi_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+coroutine_fn iscsi_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
struct unmap_list list;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- return -EINVAL;
- }
+ assert(is_byte_request_lun_aligned(offset, count, iscsilun));
if (!iscsilun->lbp.lbpu) {
/* UNMAP is not supported by the target */
return 0;
}
- list.lba = sector_qemu2lun(sector_num, iscsilun);
- list.num = sector_qemu2lun(nb_sectors, iscsilun);
+ list.lba = offset / iscsilun->block_size;
+ list.num = count / iscsilun->block_size;
iscsi_co_init_iscsitask(iscsilun, &iTask);
retry:
if (iscsi_unmap_task(iscsilun->iscsi, iscsilun->lun, 0, 0, &list, 1,
- iscsi_co_generic_cb, &iTask) == NULL) {
+ iscsi_co_generic_cb, &iTask) == NULL) {
return -ENOMEM;
}
@@ -854,17 +1088,18 @@
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ return iTask.err_code;
}
- iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
return 0;
}
static int
-coroutine_fn iscsi_co_write_zeroes(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+coroutine_fn iscsi_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
IscsiLun *iscsilun = bs->opaque;
struct IscsiTask iTask;
@@ -872,8 +1107,8 @@
uint32_t nb_blocks;
bool use_16_for_ws = iscsilun->use_16_for_rw;
- if (!is_request_lun_aligned(sector_num, nb_sectors, iscsilun)) {
- return -EINVAL;
+ if (!is_byte_request_lun_aligned(offset, count, iscsilun)) {
+ return -ENOTSUP;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
@@ -894,8 +1129,8 @@
return -ENOTSUP;
}
- lba = sector_qemu2lun(sector_num, iscsilun);
- nb_blocks = sector_qemu2lun(nb_sectors, iscsilun);
+ lba = offset / iscsilun->block_size;
+ nb_blocks = count / iscsilun->block_size;
if (iscsilun->zeroblock == NULL) {
iscsilun->zeroblock = g_try_malloc0(iscsilun->block_size);
@@ -947,13 +1182,17 @@
}
if (iTask.status != SCSI_STATUS_GOOD) {
- return -EIO;
+ iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
+ return iTask.err_code;
}
if (flags & BDRV_REQ_MAY_UNMAP) {
- iscsi_allocationmap_clear(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_invalid(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
} else {
- iscsi_allocationmap_set(iscsilun, sector_num, nb_sectors);
+ iscsi_allocmap_set_allocated(iscsilun, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
}
return 0;
@@ -966,6 +1205,8 @@
QemuOpts *opts;
const char *user = NULL;
const char *password = NULL;
+ const char *secretid;
+ char *secret = NULL;
list = qemu_find_opts("iscsi");
if (!list) {
@@ -985,8 +1226,20 @@
return;
}
+ secretid = qemu_opt_get(opts, "password-secret");
password = qemu_opt_get(opts, "password");
- if (!password) {
+ if (secretid && password) {
+ error_setg(errp, "'password' and 'password-secret' properties are "
+ "mutually exclusive");
+ return;
+ }
+ if (secretid) {
+ secret = qcrypto_secret_lookup_as_utf8(secretid, errp);
+ if (!secret) {
+ return;
+ }
+ password = secret;
+ } else if (!password) {
error_setg(errp, "CHAP username specified but no password was given");
return;
}
@@ -994,6 +1247,8 @@
if (iscsi_set_initiator_username_pwd(iscsi, user, password)) {
error_setg(errp, "Failed to set initiator username and password");
}
+
+ g_free(secret);
}
static void parse_header_digest(struct iscsi_context *iscsi, const char *target,
@@ -1068,16 +1323,37 @@
return iscsi_name;
}
+static int parse_timeout(const char *target)
+{
+ QemuOptsList *list;
+ QemuOpts *opts;
+ const char *timeout;
+
+ list = qemu_find_opts("iscsi");
+ if (list) {
+ opts = qemu_opts_find(list, target);
+ if (!opts) {
+ opts = QTAILQ_FIRST(&list->head);
+ }
+ if (opts) {
+ timeout = qemu_opt_get(opts, "timeout");
+ if (timeout) {
+ return atoi(timeout);
+ }
+ }
+ }
+
+ return 0;
+}
+
static void iscsi_nop_timed_event(void *opaque)
{
IscsiLun *iscsilun = opaque;
- if (iscsi_get_nops_in_flight(iscsilun->iscsi) > MAX_NOP_FAILURES) {
+ if (iscsi_get_nops_in_flight(iscsilun->iscsi) >= MAX_NOP_FAILURES) {
error_report("iSCSI: NOP timeout. Reconnecting...");
- iscsi_reconnect(iscsilun->iscsi);
- }
-
- if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
+ iscsilun->request_timed_out = true;
+ } else if (iscsi_nop_out_async(iscsilun->iscsi, NULL, NULL, 0, NULL) != 0) {
error_report("iSCSI: failed to sent NOP-Out. Disabling NOP messages.");
return;
}
@@ -1109,12 +1385,17 @@
} else {
iscsilun->block_size = rc16->block_length;
iscsilun->num_blocks = rc16->returned_lba + 1;
- iscsilun->lbpme = rc16->lbpme;
- iscsilun->lbprz = rc16->lbprz;
+ iscsilun->lbpme = !!rc16->lbpme;
+ iscsilun->lbprz = !!rc16->lbprz;
iscsilun->use_16_for_rw = (rc16->returned_lba > 0xffffffff);
}
+ break;
}
- break;
+ if (task != NULL && task->status == SCSI_STATUS_CHECK_CONDITION
+ && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) {
+ break;
+ }
+ /* Fall through and try READ CAPACITY(10) instead. */
case TYPE_ROM:
task = iscsi_readcapacity10_sync(iscsilun->iscsi, iscsilun->lun, 0, 0);
if (task != NULL && task->status == SCSI_STATUS_GOOD) {
@@ -1140,7 +1421,11 @@
&& retries-- > 0);
if (task == NULL || task->status != SCSI_STATUS_GOOD) {
- error_setg(errp, "iSCSI: failed to send readcapacity10 command.");
+ error_setg(errp, "iSCSI: failed to send readcapacity10/16 command");
+ } else if (!iscsilun->block_size ||
+ iscsilun->block_size % BDRV_SECTOR_SIZE) {
+ error_setg(errp, "iSCSI: the target returned an invalid "
+ "block size of %d.", iscsilun->block_size);
}
if (task) {
scsi_free_scsi_task(task);
@@ -1203,9 +1488,8 @@
{
IscsiLun *iscsilun = bs->opaque;
- aio_set_fd_handler(iscsilun->aio_context,
- iscsi_get_fd(iscsilun->iscsi),
- NULL, NULL, NULL);
+ aio_set_fd_handler(iscsilun->aio_context, iscsi_get_fd(iscsilun->iscsi),
+ false, NULL, NULL, NULL);
iscsilun->events = 0;
if (iscsilun->nop_timer) {
@@ -1213,6 +1497,11 @@
timer_free(iscsilun->nop_timer);
iscsilun->nop_timer = NULL;
}
+ if (iscsilun->event_timer) {
+ timer_del(iscsilun->event_timer);
+ timer_free(iscsilun->event_timer);
+ iscsilun->event_timer = NULL;
+ }
}
static void iscsi_attach_aio_context(BlockDriverState *bs,
@@ -1229,13 +1518,22 @@
iscsi_nop_timed_event, iscsilun);
timer_mod(iscsilun->nop_timer,
qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL);
+
+ /* Set up a timer for periodic calls to iscsi_set_events and to
+ * scan for command timeout */
+ iscsilun->event_timer = aio_timer_new(iscsilun->aio_context,
+ QEMU_CLOCK_REALTIME, SCALE_MS,
+ iscsi_timed_check_events, iscsilun);
+ timer_mod(iscsilun->event_timer,
+ qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + EVENT_INTERVAL);
}
-static bool iscsi_is_write_protected(IscsiLun *iscsilun)
+static void iscsi_modesense_sync(IscsiLun *iscsilun)
{
struct scsi_task *task;
struct scsi_mode_sense *ms = NULL;
- bool wrprotected = false;
+ iscsilun->write_protected = false;
+ iscsilun->dpofua = false;
task = iscsi_modesense6_sync(iscsilun->iscsi, iscsilun->lun,
1, SCSI_MODESENSE_PC_CURRENT,
@@ -1256,22 +1554,18 @@
iscsi_get_error(iscsilun->iscsi));
goto out;
}
- wrprotected = ms->device_specific_parameter & 0x80;
+ iscsilun->write_protected = ms->device_specific_parameter & 0x80;
+ iscsilun->dpofua = ms->device_specific_parameter & 0x10;
out:
if (task) {
scsi_free_scsi_task(task);
}
- return wrprotected;
}
/*
* We support iscsi url's on the form
* iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun>
- *
- * Note: flags are currently not used by iscsi_open. If this function
- * is changed such that flags are used, please examine iscsi_reopen_prepare()
- * to see if needs to be changed as well.
*/
static int iscsi_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
@@ -1286,14 +1580,7 @@
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
- int i, ret;
-
- if ((BDRV_SECTOR_SIZE % 512) != 0) {
- error_setg(errp, "iSCSI: Invalid BDRV_SECTOR_SIZE. "
- "BDRV_SECTOR_SIZE(%lld) is not a multiple "
- "of 512", BDRV_SECTOR_SIZE);
- return -EINVAL;
- }
+ int i, ret = 0, timeout = 0;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &local_err);
@@ -1329,7 +1616,7 @@
goto out;
}
- if (iscsi_url->user != NULL) {
+ if (iscsi_url->user[0] != '\0') {
ret = iscsi_set_initiator_username_pwd(iscsi, iscsi_url->user,
iscsi_url->passwd);
if (ret != 0) {
@@ -1363,6 +1650,16 @@
goto out;
}
+ /* timeout handling is broken in libiscsi before 1.15.0 */
+ timeout = parse_timeout(iscsi_url->target);
+#if defined(LIBISCSI_API_VERSION) && LIBISCSI_API_VERSION >= 20150621
+ iscsi_set_timeout(iscsi, timeout);
+#else
+ if (timeout) {
+ error_report("iSCSI: ignoring timeout value for libiscsi <1.15.0");
+ }
+#endif
+
if (iscsi_full_connect_sync(iscsi, iscsi_url->portal, iscsi_url->lun) != 0) {
error_setg(errp, "iSCSI: Failed to connect to LUN : %s",
iscsi_get_error(iscsi));
@@ -1385,9 +1682,15 @@
scsi_free_scsi_task(task);
task = NULL;
+ iscsi_modesense_sync(iscsilun);
+ if (iscsilun->dpofua) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
+
/* Check the write protect flag of the LUN if we want to write */
if (iscsilun->type == TYPE_DISK && (flags & BDRV_O_RDWR) &&
- iscsi_is_write_protected(iscsilun)) {
+ iscsilun->write_protected) {
error_setg(errp, "Cannot open a write protected LUN as read-write");
ret = -EACCES;
goto out;
@@ -1400,14 +1703,13 @@
goto out;
}
bs->total_sectors = sector_lun2qemu(iscsilun->num_blocks, iscsilun);
- bs->request_alignment = iscsilun->block_size;
/* We don't have any emulation for devices other than disks and CD-ROMs, so
* this must be sg ioctl compatible. We force it to be sg, otherwise qemu
* will try to read from the device to guess the image format.
*/
if (iscsilun->type != TYPE_DISK && iscsilun->type != TYPE_ROM) {
- bs->sg = 1;
+ bs->sg = true;
}
task = iscsi_do_inquiry(iscsilun->iscsi, iscsilun->lun, 1,
@@ -1462,11 +1764,8 @@
iscsilun->bl.opt_unmap_gran * iscsilun->block_size <= 16 * 1024 * 1024) {
iscsilun->cluster_sectors = (iscsilun->bl.opt_unmap_gran *
iscsilun->block_size) >> BDRV_SECTOR_BITS;
- if (iscsilun->lbprz && !(bs->open_flags & BDRV_O_NOCACHE)) {
- iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
- if (iscsilun->allocationmap == NULL) {
- ret = -ENOMEM;
- }
+ if (iscsilun->lbprz) {
+ ret = iscsi_allocmap_init(iscsilun, bs->open_flags);
}
}
@@ -1482,6 +1781,9 @@
if (ret) {
if (iscsi != NULL) {
+ if (iscsi_is_logged_in(iscsi)) {
+ iscsi_logout_sync(iscsi);
+ }
iscsi_destroy_context(iscsi);
}
memset(iscsilun, 0, sizeof(IscsiLun));
@@ -1495,62 +1797,85 @@
struct iscsi_context *iscsi = iscsilun->iscsi;
iscsi_detach_aio_context(bs);
+ if (iscsi_is_logged_in(iscsi)) {
+ iscsi_logout_sync(iscsi);
+ }
iscsi_destroy_context(iscsi);
g_free(iscsilun->zeroblock);
- g_free(iscsilun->allocationmap);
+ iscsi_allocmap_free(iscsilun);
memset(iscsilun, 0, sizeof(IscsiLun));
}
-static int sector_limits_lun2qemu(int64_t sector, IscsiLun *iscsilun)
-{
- return MIN(sector_lun2qemu(sector, iscsilun), INT_MAX / 2 + 1);
-}
-
static void iscsi_refresh_limits(BlockDriverState *bs, Error **errp)
{
/* We don't actually refresh here, but just return data queried in
* iscsi_open(): iscsi targets don't change their limits. */
IscsiLun *iscsilun = bs->opaque;
- uint32_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+ uint64_t max_xfer_len = iscsilun->use_16_for_rw ? 0xffffffff : 0xffff;
+
+ bs->bl.request_alignment = iscsilun->block_size;
if (iscsilun->bl.max_xfer_len) {
max_xfer_len = MIN(max_xfer_len, iscsilun->bl.max_xfer_len);
}
- bs->bl.max_transfer_length = sector_limits_lun2qemu(max_xfer_len, iscsilun);
+ if (max_xfer_len * iscsilun->block_size < INT_MAX) {
+ bs->bl.max_transfer = max_xfer_len * iscsilun->block_size;
+ }
if (iscsilun->lbp.lbpu) {
- if (iscsilun->bl.max_unmap < 0xffffffff) {
- bs->bl.max_discard =
- sector_limits_lun2qemu(iscsilun->bl.max_unmap, iscsilun);
+ if (iscsilun->bl.max_unmap < 0xffffffff / iscsilun->block_size) {
+ bs->bl.max_pdiscard =
+ iscsilun->bl.max_unmap * iscsilun->block_size;
}
- bs->bl.discard_alignment =
- sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+ bs->bl.pdiscard_alignment =
+ iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
+ } else {
+ bs->bl.pdiscard_alignment = iscsilun->block_size;
}
- if (iscsilun->bl.max_ws_len < 0xffffffff) {
- bs->bl.max_write_zeroes =
- sector_limits_lun2qemu(iscsilun->bl.max_ws_len, iscsilun);
+ if (iscsilun->bl.max_ws_len < 0xffffffff / iscsilun->block_size) {
+ bs->bl.max_pwrite_zeroes =
+ iscsilun->bl.max_ws_len * iscsilun->block_size;
}
if (iscsilun->lbp.lbpws) {
- bs->bl.write_zeroes_alignment =
- sector_limits_lun2qemu(iscsilun->bl.opt_unmap_gran, iscsilun);
+ bs->bl.pwrite_zeroes_alignment =
+ iscsilun->bl.opt_unmap_gran * iscsilun->block_size;
+ } else {
+ bs->bl.pwrite_zeroes_alignment = iscsilun->block_size;
}
- bs->bl.opt_transfer_length =
- sector_limits_lun2qemu(iscsilun->bl.opt_xfer_len, iscsilun);
+ if (iscsilun->bl.opt_xfer_len &&
+ iscsilun->bl.opt_xfer_len < INT_MAX / iscsilun->block_size) {
+ bs->bl.opt_transfer = pow2floor(iscsilun->bl.opt_xfer_len *
+ iscsilun->block_size);
+ }
}
-/* Since iscsi_open() ignores bdrv_flags, there is nothing to do here in
- * prepare. Note that this will not re-establish a connection with an iSCSI
- * target - it is effectively a NOP. */
+/* Note that this will not re-establish a connection with an iSCSI target - it
+ * is effectively a NOP. */
static int iscsi_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
- /* NOP */
+ IscsiLun *iscsilun = state->bs->opaque;
+
+ if (state->flags & BDRV_O_RDWR && iscsilun->write_protected) {
+ error_setg(errp, "Cannot open a write protected LUN as read-write");
+ return -EACCES;
+ }
return 0;
}
+static void iscsi_reopen_commit(BDRVReopenState *reopen_state)
+{
+ IscsiLun *iscsilun = reopen_state->bs->opaque;
+
+ /* the cache.direct status might have changed */
+ if (iscsilun->allocmap != NULL) {
+ iscsi_allocmap_init(iscsilun, reopen_state->flags);
+ }
+}
+
static int iscsi_truncate(BlockDriverState *bs, int64_t offset)
{
IscsiLun *iscsilun = bs->opaque;
@@ -1570,9 +1895,8 @@
return -EINVAL;
}
- if (iscsilun->allocationmap != NULL) {
- g_free(iscsilun->allocationmap);
- iscsilun->allocationmap = iscsi_allocationmap_init(iscsilun);
+ if (iscsilun->allocmap != NULL) {
+ iscsi_allocmap_init(iscsilun, bs->open_flags);
}
return 0;
@@ -1626,12 +1950,19 @@
static int iscsi_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
IscsiLun *iscsilun = bs->opaque;
- bdi->unallocated_blocks_are_zero = !!iscsilun->lbprz;
+ bdi->unallocated_blocks_are_zero = iscsilun->lbprz;
bdi->can_write_zeroes_with_unmap = iscsilun->lbprz && iscsilun->lbp.lbpws;
bdi->cluster_size = iscsilun->cluster_sectors * BDRV_SECTOR_SIZE;
return 0;
}
+static void iscsi_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
+{
+ IscsiLun *iscsilun = bs->opaque;
+ iscsi_allocmap_invalidate(iscsilun);
+}
+
static QemuOptsList iscsi_create_opts = {
.name = "iscsi-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(iscsi_create_opts.head),
@@ -1655,7 +1986,9 @@
.bdrv_close = iscsi_close,
.bdrv_create = iscsi_create,
.create_opts = &iscsi_create_opts,
- .bdrv_reopen_prepare = iscsi_reopen_prepare,
+ .bdrv_reopen_prepare = iscsi_reopen_prepare,
+ .bdrv_reopen_commit = iscsi_reopen_commit,
+ .bdrv_invalidate_cache = iscsi_invalidate_cache,
.bdrv_getlength = iscsi_getlength,
.bdrv_get_info = iscsi_get_info,
@@ -1663,14 +1996,13 @@
.bdrv_refresh_limits = iscsi_refresh_limits,
.bdrv_co_get_block_status = iscsi_co_get_block_status,
- .bdrv_co_discard = iscsi_co_discard,
- .bdrv_co_write_zeroes = iscsi_co_write_zeroes,
+ .bdrv_co_pdiscard = iscsi_co_pdiscard,
+ .bdrv_co_pwrite_zeroes = iscsi_co_pwrite_zeroes,
.bdrv_co_readv = iscsi_co_readv,
- .bdrv_co_writev = iscsi_co_writev,
+ .bdrv_co_writev_flags = iscsi_co_writev_flags,
.bdrv_co_flush_to_disk = iscsi_co_flush,
#ifdef __linux__
- .bdrv_ioctl = iscsi_ioctl,
.bdrv_aio_ioctl = iscsi_aio_ioctl,
#endif
@@ -1691,6 +2023,11 @@
.type = QEMU_OPT_STRING,
.help = "password for CHAP authentication to target",
},{
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the secret providing password for CHAP "
+ "authentication to target",
+ },{
.name = "header-digest",
.type = QEMU_OPT_STRING,
.help = "HeaderDigest setting. "
@@ -1699,6 +2036,10 @@
.name = "initiator-name",
.type = QEMU_OPT_STRING,
.help = "Initiator iqn name to use when connecting",
+ },{
+ .name = "timeout",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Request timeout in seconds (default 0 = no timeout)",
},
{ /* end of list */ }
},
diff --git a/block/linux-aio.c b/block/linux-aio.c
index d92513b..e906abe 100644
--- a/block/linux-aio.c
+++ b/block/linux-aio.c
@@ -7,11 +7,14 @@
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/aio.h"
#include "qemu/queue.h"
+#include "block/block.h"
#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
+#include "qemu/coroutine.h"
#include <libaio.h>
@@ -25,27 +28,29 @@
*/
#define MAX_EVENTS 128
-#define MAX_QUEUED_IO 128
-
struct qemu_laiocb {
BlockAIOCB common;
- struct qemu_laio_state *ctx;
+ Coroutine *co;
+ LinuxAioState *ctx;
struct iocb iocb;
ssize_t ret;
size_t nbytes;
QEMUIOVector *qiov;
bool is_read;
- QLIST_ENTRY(qemu_laiocb) node;
+ QSIMPLEQ_ENTRY(qemu_laiocb) next;
};
typedef struct {
- struct iocb *iocbs[MAX_QUEUED_IO];
int plugged;
- unsigned int size;
- unsigned int idx;
+ unsigned int in_queue;
+ unsigned int in_flight;
+ bool blocked;
+ QSIMPLEQ_HEAD(, qemu_laiocb) pending;
} LaioQueue;
-struct qemu_laio_state {
+struct LinuxAioState {
+ AioContext *aio_context;
+
io_context_t ctx;
EventNotifier e;
@@ -59,6 +64,8 @@
int event_max;
};
+static void ioq_submit(LinuxAioState *s);
+
static inline ssize_t io_event_ret(struct io_event *ev)
{
return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
@@ -67,8 +74,7 @@
/*
* Completes an AIO request (calls the callback and frees the ACB).
*/
-static void qemu_laio_process_completion(struct qemu_laio_state *s,
- struct qemu_laiocb *laiocb)
+static void qemu_laio_process_completion(struct qemu_laiocb *laiocb)
{
int ret;
@@ -82,13 +88,18 @@
qemu_iovec_memset(laiocb->qiov, ret, 0,
laiocb->qiov->size - ret);
} else {
- ret = -EINVAL;
+ ret = -ENOSPC;
}
}
}
- laiocb->common.cb(laiocb->common.opaque, ret);
- qemu_aio_unref(laiocb);
+ laiocb->ret = ret;
+ if (laiocb->co) {
+ qemu_coroutine_enter(laiocb->co);
+ } else {
+ laiocb->common.cb(laiocb->common.opaque, ret);
+ qemu_aio_unref(laiocb);
+ }
}
/* The completion BH fetches completed I/O requests and invokes their
@@ -96,7 +107,7 @@
*
* The function is somewhat tricky because it supports nested event loops, for
* example when a request callback invokes aio_poll(). In order to do this,
- * the completion events array and index are kept in qemu_laio_state. The BH
+ * the completion events array and index are kept in LinuxAioState. The BH
* reschedules itself as long as there are completions pending so it will
* either be called again in a nested event loop or will be called after all
* events have been completed. When there are no events left to complete, the
@@ -104,7 +115,7 @@
*/
static void qemu_laio_completion_bh(void *opaque)
{
- struct qemu_laio_state *s = opaque;
+ LinuxAioState *s = opaque;
/* Fetch more completion events when empty */
if (s->event_idx == s->event_max) {
@@ -119,6 +130,7 @@
s->event_max = 0;
return; /* no more events */
}
+ s->io_q.in_flight -= s->event_max;
}
/* Reschedule so nested event loops see currently pending completions */
@@ -133,16 +145,22 @@
laiocb->ret = io_event_ret(&s->events[s->event_idx]);
s->event_idx++;
- qemu_laio_process_completion(s, laiocb);
+ qemu_laio_process_completion(laiocb);
}
+
+ if (!s->io_q.plugged && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
+ ioq_submit(s);
+ }
+
+ qemu_bh_cancel(s->completion_bh);
}
static void qemu_laio_completion_cb(EventNotifier *e)
{
- struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
+ LinuxAioState *s = container_of(e, LinuxAioState, e);
if (event_notifier_test_and_clear(&s->e)) {
- qemu_bh_schedule(s->completion_bh);
+ qemu_laio_completion_bh(s);
}
}
@@ -172,94 +190,74 @@
static void ioq_init(LaioQueue *io_q)
{
- io_q->size = MAX_QUEUED_IO;
- io_q->idx = 0;
+ QSIMPLEQ_INIT(&io_q->pending);
io_q->plugged = 0;
+ io_q->in_queue = 0;
+ io_q->in_flight = 0;
+ io_q->blocked = false;
}
-static int ioq_submit(struct qemu_laio_state *s)
+static void ioq_submit(LinuxAioState *s)
{
- int ret, i = 0;
- int len = s->io_q.idx;
+ int ret, len;
+ struct qemu_laiocb *aiocb;
+ struct iocb *iocbs[MAX_EVENTS];
+ QSIMPLEQ_HEAD(, qemu_laiocb) completed;
do {
- ret = io_submit(s->ctx, len, s->io_q.iocbs);
- } while (i++ < 3 && ret == -EAGAIN);
+ if (s->io_q.in_flight >= MAX_EVENTS) {
+ break;
+ }
+ len = 0;
+ QSIMPLEQ_FOREACH(aiocb, &s->io_q.pending, next) {
+ iocbs[len++] = &aiocb->iocb;
+ if (s->io_q.in_flight + len >= MAX_EVENTS) {
+ break;
+ }
+ }
- /* empty io queue */
- s->io_q.idx = 0;
+ ret = io_submit(s->ctx, len, iocbs);
+ if (ret == -EAGAIN) {
+ break;
+ }
+ if (ret < 0) {
+ /* Fail the first request, retry the rest */
+ aiocb = QSIMPLEQ_FIRST(&s->io_q.pending);
+ QSIMPLEQ_REMOVE_HEAD(&s->io_q.pending, next);
+ s->io_q.in_queue--;
+ aiocb->ret = ret;
+ qemu_laio_process_completion(aiocb);
+ continue;
+ }
- if (ret < 0) {
- i = 0;
- } else {
- i = ret;
- }
-
- for (; i < len; i++) {
- struct qemu_laiocb *laiocb =
- container_of(s->io_q.iocbs[i], struct qemu_laiocb, iocb);
-
- laiocb->ret = (ret < 0) ? ret : -EIO;
- qemu_laio_process_completion(s, laiocb);
- }
- return ret;
+ s->io_q.in_flight += ret;
+ s->io_q.in_queue -= ret;
+ aiocb = container_of(iocbs[ret - 1], struct qemu_laiocb, iocb);
+ QSIMPLEQ_SPLIT_AFTER(&s->io_q.pending, aiocb, next, &completed);
+ } while (ret == len && !QSIMPLEQ_EMPTY(&s->io_q.pending));
+ s->io_q.blocked = (s->io_q.in_queue > 0);
}
-static void ioq_enqueue(struct qemu_laio_state *s, struct iocb *iocb)
+void laio_io_plug(BlockDriverState *bs, LinuxAioState *s)
{
- unsigned int idx = s->io_q.idx;
+ s->io_q.plugged++;
+}
- s->io_q.iocbs[idx++] = iocb;
- s->io_q.idx = idx;
-
- /* submit immediately if queue is full */
- if (idx == s->io_q.size) {
+void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s)
+{
+ assert(s->io_q.plugged);
+ if (--s->io_q.plugged == 0 &&
+ !s->io_q.blocked && !QSIMPLEQ_EMPTY(&s->io_q.pending)) {
ioq_submit(s);
}
}
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx)
+static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
+ int type)
{
- struct qemu_laio_state *s = aio_ctx;
-
- s->io_q.plugged++;
-}
-
-int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug)
-{
- struct qemu_laio_state *s = aio_ctx;
- int ret = 0;
-
- assert(s->io_q.plugged > 0 || !unplug);
-
- if (unplug && --s->io_q.plugged > 0) {
- return 0;
- }
-
- if (s->io_q.idx > 0) {
- ret = ioq_submit(s);
- }
-
- return ret;
-}
-
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
-{
- struct qemu_laio_state *s = aio_ctx;
- struct qemu_laiocb *laiocb;
- struct iocb *iocbs;
- off_t offset = sector_num * 512;
-
- laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
- laiocb->nbytes = nb_sectors * 512;
- laiocb->ctx = s;
- laiocb->ret = -EINPROGRESS;
- laiocb->is_read = (type == QEMU_AIO_READ);
- laiocb->qiov = qiov;
-
- iocbs = &laiocb->iocb;
+ LinuxAioState *s = laiocb->ctx;
+ struct iocb *iocbs = &laiocb->iocb;
+ QEMUIOVector *qiov = laiocb->qiov;
switch (type) {
case QEMU_AIO_WRITE:
@@ -272,43 +270,83 @@
default:
fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
__func__, type);
- goto out_free_aiocb;
+ return -EIO;
}
io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
- if (!s->io_q.plugged) {
- if (io_submit(s->ctx, 1, &iocbs) < 0) {
- goto out_free_aiocb;
- }
- } else {
- ioq_enqueue(s, iocbs);
+ QSIMPLEQ_INSERT_TAIL(&s->io_q.pending, laiocb, next);
+ s->io_q.in_queue++;
+ if (!s->io_q.blocked &&
+ (!s->io_q.plugged ||
+ s->io_q.in_flight + s->io_q.in_queue >= MAX_EVENTS)) {
+ ioq_submit(s);
}
- return &laiocb->common;
-out_free_aiocb:
- qemu_aio_unref(laiocb);
- return NULL;
+ return 0;
}
-void laio_detach_aio_context(void *s_, AioContext *old_context)
+int coroutine_fn laio_co_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ uint64_t offset, QEMUIOVector *qiov, int type)
{
- struct qemu_laio_state *s = s_;
+ int ret;
+ struct qemu_laiocb laiocb = {
+ .co = qemu_coroutine_self(),
+ .nbytes = qiov->size,
+ .ctx = s,
+ .is_read = (type == QEMU_AIO_READ),
+ .qiov = qiov,
+ };
- aio_set_event_notifier(old_context, &s->e, NULL);
+ ret = laio_do_submit(fd, &laiocb, offset, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ qemu_coroutine_yield();
+ return laiocb.ret;
+}
+
+BlockAIOCB *laio_submit(BlockDriverState *bs, LinuxAioState *s, int fd,
+ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ BlockCompletionFunc *cb, void *opaque, int type)
+{
+ struct qemu_laiocb *laiocb;
+ off_t offset = sector_num * BDRV_SECTOR_SIZE;
+ int ret;
+
+ laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
+ laiocb->nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+ laiocb->ctx = s;
+ laiocb->ret = -EINPROGRESS;
+ laiocb->is_read = (type == QEMU_AIO_READ);
+ laiocb->qiov = qiov;
+
+ ret = laio_do_submit(fd, laiocb, offset, type);
+ if (ret < 0) {
+ qemu_aio_unref(laiocb);
+ return NULL;
+ }
+
+ return &laiocb->common;
+}
+
+void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context)
+{
+ aio_set_event_notifier(old_context, &s->e, false, NULL);
qemu_bh_delete(s->completion_bh);
}
-void laio_attach_aio_context(void *s_, AioContext *new_context)
+void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context)
{
- struct qemu_laio_state *s = s_;
-
+ s->aio_context = new_context;
s->completion_bh = aio_bh_new(new_context, qemu_laio_completion_bh, s);
- aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
+ aio_set_event_notifier(new_context, &s->e, false,
+ qemu_laio_completion_cb);
}
-void *laio_init(void)
+LinuxAioState *laio_init(void)
{
- struct qemu_laio_state *s;
+ LinuxAioState *s;
s = g_malloc0(sizeof(*s));
if (event_notifier_init(&s->e, false) < 0) {
@@ -330,10 +368,8 @@
return NULL;
}
-void laio_cleanup(void *s_)
+void laio_cleanup(LinuxAioState *s)
{
- struct qemu_laio_state *s = s_;
-
event_notifier_cleanup(&s->e);
if (io_destroy(s->ctx) != 0) {
diff --git a/block/mirror.c b/block/mirror.c
index 2c6dd2a..e0b3f41 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -11,14 +11,21 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/blockjob.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
#include "qemu/bitmap.h"
#define SLICE_TIME 100000000ULL /* ns */
#define MAX_IN_FLIGHT 16
+#define MAX_IO_SECTORS ((1 << 20) >> BDRV_SECTOR_BITS) /* 1 Mb */
+#define DEFAULT_MIRROR_BUF_SIZE \
+ (MAX_IN_FLIGHT * MAX_IO_SECTORS * BDRV_SECTOR_SIZE)
/* The mirroring buffer is a list of granularity-sized chunks.
* Free chunks are organized in a list.
@@ -30,7 +37,7 @@
typedef struct MirrorBlockJob {
BlockJob common;
RateLimit limit;
- BlockDriverState *target;
+ BlockBackend *target;
BlockDriverState *base;
/* The name of the graph node to replace */
char *replaces;
@@ -39,10 +46,10 @@
/* Used to block operations on the drive-mirror-replace target */
Error *replace_blocker;
bool is_none_mode;
+ BlockMirrorBackingMode backing_mode;
BlockdevOnError on_source_error, on_target_error;
bool synced;
bool should_complete;
- int64_t sector_num;
int64_t granularity;
size_t buf_size;
int64_t bdev_length;
@@ -53,10 +60,15 @@
QSIMPLEQ_HEAD(, MirrorBuffer) buf_free;
int buf_free_count;
+ uint64_t last_pause_ns;
unsigned long *in_flight_bitmap;
int in_flight;
- int sectors_in_flight;
+ int64_t sectors_in_flight;
int ret;
+ bool unmap;
+ bool waiting_for_io;
+ int target_cluster_sectors;
+ int max_iov;
} MirrorBlockJob;
typedef struct MirrorOp {
@@ -71,11 +83,11 @@
{
s->synced = false;
if (read) {
- return block_job_error_action(&s->common, s->common.bs,
- s->on_source_error, true, error);
+ return block_job_error_action(&s->common, s->on_source_error,
+ true, error);
} else {
- return block_job_error_action(&s->common, s->target,
- s->on_target_error, false, error);
+ return block_job_error_action(&s->common, s->on_target_error,
+ false, error);
}
}
@@ -99,7 +111,7 @@
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
chunk_num = op->sector_num / sectors_per_chunk;
- nb_chunks = op->nb_sectors / sectors_per_chunk;
+ nb_chunks = DIV_ROUND_UP(op->nb_sectors, sectors_per_chunk);
bitmap_clear(s->in_flight_bitmap, chunk_num, nb_chunks);
if (ret >= 0) {
if (s->cow_bitmap) {
@@ -109,14 +121,10 @@
}
qemu_iovec_destroy(&op->qiov);
- g_slice_free(MirrorOp, op);
+ g_free(op);
- /* Enter coroutine when it is not sleeping. The coroutine sleeps to
- * rate-limit itself. The coroutine will eventually resume since there is
- * a sleep timeout so don't wake it early.
- */
- if (s->common.busy) {
- qemu_coroutine_enter(s->common.co, NULL);
+ if (s->waiting_for_io) {
+ qemu_coroutine_enter(s->common.co);
}
}
@@ -125,10 +133,9 @@
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
if (ret < 0) {
- BlockDriverState *source = s->common.bs;
BlockErrorAction action;
- bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
action = mirror_error_action(s, false, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
@@ -142,10 +149,9 @@
MirrorOp *op = opaque;
MirrorBlockJob *s = op->s;
if (ret < 0) {
- BlockDriverState *source = s->common.bs;
BlockErrorAction action;
- bdrv_set_dirty(source, op->sector_num, op->nb_sectors);
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, op->sector_num, op->nb_sectors);
action = mirror_error_action(s, true, -ret);
if (action == BLOCK_ERROR_ACTION_REPORT && s->ret >= 0) {
s->ret = ret;
@@ -154,110 +160,108 @@
mirror_iteration_done(op, ret);
return;
}
- bdrv_aio_writev(s->target, op->sector_num, &op->qiov, op->nb_sectors,
- mirror_write_complete, op);
+ blk_aio_pwritev(s->target, op->sector_num * BDRV_SECTOR_SIZE, &op->qiov,
+ 0, mirror_write_complete, op);
}
-static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+static inline void mirror_clip_sectors(MirrorBlockJob *s,
+ int64_t sector_num,
+ int *nb_sectors)
{
- BlockDriverState *source = s->common.bs;
- int nb_sectors, sectors_per_chunk, nb_chunks;
- int64_t end, sector_num, next_chunk, next_sector, hbitmap_next_sector;
- uint64_t delay_ns = 0;
+ *nb_sectors = MIN(*nb_sectors,
+ s->bdev_length / BDRV_SECTOR_SIZE - sector_num);
+}
+
+/* Round sector_num and/or nb_sectors to target cluster if COW is needed, and
+ * return the offset of the adjusted tail sector against original. */
+static int mirror_cow_align(MirrorBlockJob *s,
+ int64_t *sector_num,
+ int *nb_sectors)
+{
+ bool need_cow;
+ int ret = 0;
+ int chunk_sectors = s->granularity >> BDRV_SECTOR_BITS;
+ int64_t align_sector_num = *sector_num;
+ int align_nb_sectors = *nb_sectors;
+ int max_sectors = chunk_sectors * s->max_iov;
+
+ need_cow = !test_bit(*sector_num / chunk_sectors, s->cow_bitmap);
+ need_cow |= !test_bit((*sector_num + *nb_sectors - 1) / chunk_sectors,
+ s->cow_bitmap);
+ if (need_cow) {
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), *sector_num,
+ *nb_sectors, &align_sector_num,
+ &align_nb_sectors);
+ }
+
+ if (align_nb_sectors > max_sectors) {
+ align_nb_sectors = max_sectors;
+ if (need_cow) {
+ align_nb_sectors = QEMU_ALIGN_DOWN(align_nb_sectors,
+ s->target_cluster_sectors);
+ }
+ }
+ /* Clipping may result in align_nb_sectors unaligned to chunk boundary, but
+ * that doesn't matter because it's already the end of source image. */
+ mirror_clip_sectors(s, align_sector_num, &align_nb_sectors);
+
+ ret = align_sector_num + align_nb_sectors - (*sector_num + *nb_sectors);
+ *sector_num = align_sector_num;
+ *nb_sectors = align_nb_sectors;
+ assert(ret >= 0);
+ return ret;
+}
+
+static inline void mirror_wait_for_io(MirrorBlockJob *s)
+{
+ assert(!s->waiting_for_io);
+ s->waiting_for_io = true;
+ qemu_coroutine_yield();
+ s->waiting_for_io = false;
+}
+
+/* Submit async read while handling COW.
+ * Returns: The number of sectors copied after and including sector_num,
+ * excluding any sectors copied prior to sector_num due to alignment.
+ * This will be nb_sectors if no alignment is necessary, or
+ * (new_end - sector_num) if tail is rounded up or down due to
+ * alignment or buffer limit.
+ */
+static int mirror_do_read(MirrorBlockJob *s, int64_t sector_num,
+ int nb_sectors)
+{
+ BlockBackend *source = s->common.blk;
+ int sectors_per_chunk, nb_chunks;
+ int ret;
MirrorOp *op;
+ int max_sectors;
- s->sector_num = hbitmap_iter_next(&s->hbi);
- if (s->sector_num < 0) {
- bdrv_dirty_iter_init(source, s->dirty_bitmap, &s->hbi);
- s->sector_num = hbitmap_iter_next(&s->hbi);
- trace_mirror_restart_iter(s,
- bdrv_get_dirty_count(source, s->dirty_bitmap));
- assert(s->sector_num >= 0);
- }
-
- hbitmap_next_sector = s->sector_num;
- sector_num = s->sector_num;
sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
- end = s->bdev_length / BDRV_SECTOR_SIZE;
+ max_sectors = sectors_per_chunk * s->max_iov;
- /* Extend the QEMUIOVector to include all adjacent blocks that will
- * be copied in this operation.
- *
- * We have to do this if we have no backing file yet in the destination,
- * and the cluster size is very large. Then we need to do COW ourselves.
- * The first time a cluster is copied, copy it entirely. Note that,
- * because both the granularity and the cluster size are powers of two,
- * the number of sectors to copy cannot exceed one cluster.
- *
- * We also want to extend the QEMUIOVector to include more adjacent
- * dirty blocks if possible, to limit the number of I/O operations and
- * run efficiently even with a small granularity.
- */
- nb_chunks = 0;
- nb_sectors = 0;
- next_sector = sector_num;
- next_chunk = sector_num / sectors_per_chunk;
+ /* We can only handle as much as buf_size at a time. */
+ nb_sectors = MIN(s->buf_size >> BDRV_SECTOR_BITS, nb_sectors);
+ nb_sectors = MIN(max_sectors, nb_sectors);
+ assert(nb_sectors);
+ ret = nb_sectors;
- /* Wait for I/O to this cluster (from a previous iteration) to be done. */
- while (test_bit(next_chunk, s->in_flight_bitmap)) {
- trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
- qemu_coroutine_yield();
+ if (s->cow_bitmap) {
+ ret += mirror_cow_align(s, §or_num, &nb_sectors);
}
+ assert(nb_sectors << BDRV_SECTOR_BITS <= s->buf_size);
+ /* The sector range must meet granularity because:
+ * 1) Caller passes in aligned values;
+ * 2) mirror_cow_align is used only when target cluster is larger. */
+ assert(!(sector_num % sectors_per_chunk));
+ nb_chunks = DIV_ROUND_UP(nb_sectors, sectors_per_chunk);
- do {
- int added_sectors, added_chunks;
-
- if (!bdrv_get_dirty(source, s->dirty_bitmap, next_sector) ||
- test_bit(next_chunk, s->in_flight_bitmap)) {
- assert(nb_sectors > 0);
- break;
- }
-
- added_sectors = sectors_per_chunk;
- if (s->cow_bitmap && !test_bit(next_chunk, s->cow_bitmap)) {
- bdrv_round_to_clusters(s->target,
- next_sector, added_sectors,
- &next_sector, &added_sectors);
-
- /* On the first iteration, the rounding may make us copy
- * sectors before the first dirty one.
- */
- if (next_sector < sector_num) {
- assert(nb_sectors == 0);
- sector_num = next_sector;
- next_chunk = next_sector / sectors_per_chunk;
- }
- }
-
- added_sectors = MIN(added_sectors, end - (sector_num + nb_sectors));
- added_chunks = (added_sectors + sectors_per_chunk - 1) / sectors_per_chunk;
-
- /* When doing COW, it may happen that there is not enough space for
- * a full cluster. Wait if that is the case.
- */
- while (nb_chunks == 0 && s->buf_free_count < added_chunks) {
- trace_mirror_yield_buf_busy(s, nb_chunks, s->in_flight);
- qemu_coroutine_yield();
- }
- if (s->buf_free_count < nb_chunks + added_chunks) {
- trace_mirror_break_buf_busy(s, nb_chunks, s->in_flight);
- break;
- }
-
- /* We have enough free space to copy these sectors. */
- bitmap_set(s->in_flight_bitmap, next_chunk, added_chunks);
-
- nb_sectors += added_sectors;
- nb_chunks += added_chunks;
- next_sector += added_sectors;
- next_chunk += added_chunks;
- if (!s->synced && s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, added_sectors);
- }
- } while (delay_ns == 0 && next_sector < end);
+ while (s->buf_free_count < nb_chunks) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ mirror_wait_for_io(s);
+ }
/* Allocate a MirrorOp that is used as an AIO callback. */
- op = g_slice_new(MirrorOp);
+ op = g_new(MirrorOp, 1);
op->s = s;
op->sector_num = sector_num;
op->nb_sectors = nb_sectors;
@@ -266,34 +270,185 @@
* from s->buf_free.
*/
qemu_iovec_init(&op->qiov, nb_chunks);
- next_sector = sector_num;
while (nb_chunks-- > 0) {
MirrorBuffer *buf = QSIMPLEQ_FIRST(&s->buf_free);
- size_t remaining = (nb_sectors * BDRV_SECTOR_SIZE) - op->qiov.size;
+ size_t remaining = nb_sectors * BDRV_SECTOR_SIZE - op->qiov.size;
QSIMPLEQ_REMOVE_HEAD(&s->buf_free, next);
s->buf_free_count--;
qemu_iovec_add(&op->qiov, buf, MIN(s->granularity, remaining));
-
- /* Advance the HBitmapIter in parallel, so that we do not examine
- * the same sector twice.
- */
- if (next_sector > hbitmap_next_sector
- && bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
- hbitmap_next_sector = hbitmap_iter_next(&s->hbi);
- }
-
- next_sector += sectors_per_chunk;
}
- bdrv_reset_dirty(source, sector_num, nb_sectors);
-
/* Copy the dirty cluster. */
s->in_flight++;
s->sectors_in_flight += nb_sectors;
trace_mirror_one_iteration(s, sector_num, nb_sectors);
- bdrv_aio_readv(source, sector_num, &op->qiov, nb_sectors,
+
+ blk_aio_preadv(source, sector_num * BDRV_SECTOR_SIZE, &op->qiov, 0,
mirror_read_complete, op);
+ return ret;
+}
+
+static void mirror_do_zero_or_discard(MirrorBlockJob *s,
+ int64_t sector_num,
+ int nb_sectors,
+ bool is_discard)
+{
+ MirrorOp *op;
+
+ /* Allocate a MirrorOp that is used as an AIO callback. The qiov is zeroed
+ * so the freeing in mirror_iteration_done is nop. */
+ op = g_new0(MirrorOp, 1);
+ op->s = s;
+ op->sector_num = sector_num;
+ op->nb_sectors = nb_sectors;
+
+ s->in_flight++;
+ s->sectors_in_flight += nb_sectors;
+ if (is_discard) {
+ blk_aio_pdiscard(s->target, sector_num << BDRV_SECTOR_BITS,
+ op->nb_sectors << BDRV_SECTOR_BITS,
+ mirror_write_complete, op);
+ } else {
+ blk_aio_pwrite_zeroes(s->target, sector_num * BDRV_SECTOR_SIZE,
+ op->nb_sectors * BDRV_SECTOR_SIZE,
+ s->unmap ? BDRV_REQ_MAY_UNMAP : 0,
+ mirror_write_complete, op);
+ }
+}
+
+static uint64_t coroutine_fn mirror_iteration(MirrorBlockJob *s)
+{
+ BlockDriverState *source = blk_bs(s->common.blk);
+ int64_t sector_num, first_chunk;
+ uint64_t delay_ns = 0;
+ /* At least the first dirty chunk is mirrored in one iteration. */
+ int nb_chunks = 1;
+ int64_t end = s->bdev_length / BDRV_SECTOR_SIZE;
+ int sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
+ bool write_zeroes_ok = bdrv_can_write_zeroes_with_unmap(blk_bs(s->target));
+ int max_io_sectors = MAX((s->buf_size >> BDRV_SECTOR_BITS) / MAX_IN_FLIGHT,
+ MAX_IO_SECTORS);
+
+ sector_num = hbitmap_iter_next(&s->hbi);
+ if (sector_num < 0) {
+ bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
+ sector_num = hbitmap_iter_next(&s->hbi);
+ trace_mirror_restart_iter(s, bdrv_get_dirty_count(s->dirty_bitmap));
+ assert(sector_num >= 0);
+ }
+
+ first_chunk = sector_num / sectors_per_chunk;
+ while (test_bit(first_chunk, s->in_flight_bitmap)) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ mirror_wait_for_io(s);
+ }
+
+ block_job_pause_point(&s->common);
+
+ /* Find the number of consective dirty chunks following the first dirty
+ * one, and wait for in flight requests in them. */
+ while (nb_chunks * sectors_per_chunk < (s->buf_size >> BDRV_SECTOR_BITS)) {
+ int64_t hbitmap_next;
+ int64_t next_sector = sector_num + nb_chunks * sectors_per_chunk;
+ int64_t next_chunk = next_sector / sectors_per_chunk;
+ if (next_sector >= end ||
+ !bdrv_get_dirty(source, s->dirty_bitmap, next_sector)) {
+ break;
+ }
+ if (test_bit(next_chunk, s->in_flight_bitmap)) {
+ break;
+ }
+
+ hbitmap_next = hbitmap_iter_next(&s->hbi);
+ if (hbitmap_next > next_sector || hbitmap_next < 0) {
+ /* The bitmap iterator's cache is stale, refresh it */
+ bdrv_set_dirty_iter(&s->hbi, next_sector);
+ hbitmap_next = hbitmap_iter_next(&s->hbi);
+ }
+ assert(hbitmap_next == next_sector);
+ nb_chunks++;
+ }
+
+ /* Clear dirty bits before querying the block status, because
+ * calling bdrv_get_block_status_above could yield - if some blocks are
+ * marked dirty in this window, we need to know.
+ */
+ bdrv_reset_dirty_bitmap(s->dirty_bitmap, sector_num,
+ nb_chunks * sectors_per_chunk);
+ bitmap_set(s->in_flight_bitmap, sector_num / sectors_per_chunk, nb_chunks);
+ while (nb_chunks > 0 && sector_num < end) {
+ int ret;
+ int io_sectors, io_sectors_acct;
+ BlockDriverState *file;
+ enum MirrorMethod {
+ MIRROR_METHOD_COPY,
+ MIRROR_METHOD_ZERO,
+ MIRROR_METHOD_DISCARD
+ } mirror_method = MIRROR_METHOD_COPY;
+
+ assert(!(sector_num % sectors_per_chunk));
+ ret = bdrv_get_block_status_above(source, NULL, sector_num,
+ nb_chunks * sectors_per_chunk,
+ &io_sectors, &file);
+ if (ret < 0) {
+ io_sectors = MIN(nb_chunks * sectors_per_chunk, max_io_sectors);
+ } else if (ret & BDRV_BLOCK_DATA) {
+ io_sectors = MIN(io_sectors, max_io_sectors);
+ }
+
+ io_sectors -= io_sectors % sectors_per_chunk;
+ if (io_sectors < sectors_per_chunk) {
+ io_sectors = sectors_per_chunk;
+ } else if (ret >= 0 && !(ret & BDRV_BLOCK_DATA)) {
+ int64_t target_sector_num;
+ int target_nb_sectors;
+ bdrv_round_sectors_to_clusters(blk_bs(s->target), sector_num,
+ io_sectors, &target_sector_num,
+ &target_nb_sectors);
+ if (target_sector_num == sector_num &&
+ target_nb_sectors == io_sectors) {
+ mirror_method = ret & BDRV_BLOCK_ZERO ?
+ MIRROR_METHOD_ZERO :
+ MIRROR_METHOD_DISCARD;
+ }
+ }
+
+ while (s->in_flight >= MAX_IN_FLIGHT) {
+ trace_mirror_yield_in_flight(s, sector_num, s->in_flight);
+ mirror_wait_for_io(s);
+ }
+
+ if (s->ret < 0) {
+ return 0;
+ }
+
+ mirror_clip_sectors(s, sector_num, &io_sectors);
+ switch (mirror_method) {
+ case MIRROR_METHOD_COPY:
+ io_sectors = mirror_do_read(s, sector_num, io_sectors);
+ io_sectors_acct = io_sectors;
+ break;
+ case MIRROR_METHOD_ZERO:
+ case MIRROR_METHOD_DISCARD:
+ mirror_do_zero_or_discard(s, sector_num, io_sectors,
+ mirror_method == MIRROR_METHOD_DISCARD);
+ if (write_zeroes_ok) {
+ io_sectors_acct = 0;
+ } else {
+ io_sectors_acct = io_sectors;
+ }
+ break;
+ default:
+ abort();
+ }
+ assert(io_sectors);
+ sector_num += io_sectors;
+ nb_chunks -= DIV_ROUND_UP(io_sectors, sectors_per_chunk);
+ if (s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, io_sectors_acct);
+ }
+ }
return delay_ns;
}
@@ -317,7 +472,7 @@
static void mirror_drain(MirrorBlockJob *s)
{
while (s->in_flight > 0) {
- qemu_coroutine_yield();
+ mirror_wait_for_io(s);
}
}
@@ -330,6 +485,12 @@
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
MirrorExitData *data = opaque;
AioContext *replace_aio_context = NULL;
+ BlockDriverState *src = blk_bs(s->common.blk);
+ BlockDriverState *target_bs = blk_bs(s->target);
+
+ /* Make sure that the source BDS doesn't go away before we called
+ * block_job_completed(). */
+ bdrv_ref(src);
if (s->to_replace) {
replace_aio_context = bdrv_get_aio_context(s->to_replace);
@@ -337,21 +498,24 @@
}
if (s->should_complete && data->ret == 0) {
- BlockDriverState *to_replace = s->common.bs;
+ BlockDriverState *to_replace = src;
if (s->to_replace) {
to_replace = s->to_replace;
}
- if (bdrv_get_flags(s->target) != bdrv_get_flags(to_replace)) {
- bdrv_reopen(s->target, bdrv_get_flags(to_replace), NULL);
+
+ if (bdrv_get_flags(target_bs) != bdrv_get_flags(to_replace)) {
+ bdrv_reopen(target_bs, bdrv_get_flags(to_replace), NULL);
}
- bdrv_swap(s->target, to_replace);
- if (s->common.driver->job_type == BLOCK_JOB_TYPE_COMMIT) {
- /* drop the bs loop chain formed by the swap: break the loop then
- * trigger the unref from the top one */
- BlockDriverState *p = s->base->backing_hd;
- bdrv_set_backing_hd(s->base, NULL);
- bdrv_unref(p);
- }
+
+ /* The mirror job has no requests in flight any more, but we need to
+ * drain potential other users of the BDS before changing the graph. */
+ bdrv_drained_begin(target_bs);
+ bdrv_replace_in_backing_chain(to_replace, target_bs);
+ bdrv_drained_end(target_bs);
+
+ /* We just changed the BDS the job BB refers to */
+ blk_remove_bs(job->blk);
+ blk_insert_bs(job->blk, src);
}
if (s->to_replace) {
bdrv_op_unblock_all(s->to_replace, s->replace_blocker);
@@ -362,22 +526,103 @@
aio_context_release(replace_aio_context);
}
g_free(s->replaces);
- bdrv_unref(s->target);
+ bdrv_op_unblock_all(target_bs, s->common.blocker);
+ blk_unref(s->target);
block_job_completed(&s->common, data->ret);
g_free(data);
+ bdrv_drained_end(src);
+ bdrv_unref(src);
+}
+
+static void mirror_throttle(MirrorBlockJob *s)
+{
+ int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+
+ if (now - s->last_pause_ns > SLICE_TIME) {
+ s->last_pause_ns = now;
+ block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, 0);
+ } else {
+ block_job_pause_point(&s->common);
+ }
+}
+
+static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
+{
+ int64_t sector_num, end;
+ BlockDriverState *base = s->base;
+ BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *target_bs = blk_bs(s->target);
+ int ret, n;
+
+ end = s->bdev_length / BDRV_SECTOR_SIZE;
+
+ if (base == NULL && !bdrv_has_zero_init(target_bs)) {
+ if (!bdrv_can_write_zeroes_with_unmap(target_bs)) {
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, 0, end);
+ return 0;
+ }
+
+ for (sector_num = 0; sector_num < end; ) {
+ int nb_sectors = MIN(end - sector_num,
+ QEMU_ALIGN_DOWN(INT_MAX, s->granularity) >> BDRV_SECTOR_BITS);
+
+ mirror_throttle(s);
+
+ if (block_job_is_cancelled(&s->common)) {
+ return 0;
+ }
+
+ if (s->in_flight >= MAX_IN_FLIGHT) {
+ trace_mirror_yield(s, s->in_flight, s->buf_free_count, -1);
+ mirror_wait_for_io(s);
+ continue;
+ }
+
+ mirror_do_zero_or_discard(s, sector_num, nb_sectors, false);
+ sector_num += nb_sectors;
+ }
+
+ mirror_drain(s);
+ }
+
+ /* First part, loop on the sectors and initialize the dirty bitmap. */
+ for (sector_num = 0; sector_num < end; ) {
+ /* Just to make sure we are not exceeding int limit. */
+ int nb_sectors = MIN(INT_MAX >> BDRV_SECTOR_BITS,
+ end - sector_num);
+
+ mirror_throttle(s);
+
+ if (block_job_is_cancelled(&s->common)) {
+ return 0;
+ }
+
+ ret = bdrv_is_allocated_above(bs, base, sector_num, nb_sectors, &n);
+ if (ret < 0) {
+ return ret;
+ }
+
+ assert(n > 0);
+ if (ret == 1) {
+ bdrv_set_dirty_bitmap(s->dirty_bitmap, sector_num, n);
+ }
+ sector_num += n;
+ }
+ return 0;
}
static void coroutine_fn mirror_run(void *opaque)
{
MirrorBlockJob *s = opaque;
MirrorExitData *data;
- BlockDriverState *bs = s->common.bs;
- int64_t sector_num, end, sectors_per_chunk, length;
- uint64_t last_pause_ns;
+ BlockDriverState *bs = blk_bs(s->common.blk);
+ BlockDriverState *target_bs = blk_bs(s->target);
+ int64_t length;
BlockDriverInfo bdi;
- char backing_filename[1024];
+ char backing_filename[2]; /* we only need 2 characters because we are only
+ checking for a NULL string */
int ret = 0;
- int n;
+ int target_cluster_size = BDRV_SECTOR_SIZE;
if (block_job_is_cancelled(&s->common)) {
goto immediate_exit;
@@ -405,56 +650,39 @@
* the destination do COW. Instead, we copy sectors around the
* dirty data if needed. We need a bitmap to do that.
*/
- bdrv_get_backing_filename(s->target, backing_filename,
+ bdrv_get_backing_filename(target_bs, backing_filename,
sizeof(backing_filename));
- if (backing_filename[0] && !s->target->backing_hd) {
- ret = bdrv_get_info(s->target, &bdi);
- if (ret < 0) {
- goto immediate_exit;
- }
- if (s->granularity < bdi.cluster_size) {
- s->buf_size = MAX(s->buf_size, bdi.cluster_size);
- s->cow_bitmap = bitmap_new(length);
- }
+ if (!bdrv_get_info(target_bs, &bdi) && bdi.cluster_size) {
+ target_cluster_size = bdi.cluster_size;
}
+ if (backing_filename[0] && !target_bs->backing
+ && s->granularity < target_cluster_size) {
+ s->buf_size = MAX(s->buf_size, target_cluster_size);
+ s->cow_bitmap = bitmap_new(length);
+ }
+ s->target_cluster_sectors = target_cluster_size >> BDRV_SECTOR_BITS;
+ s->max_iov = MIN(bs->bl.max_iov, target_bs->bl.max_iov);
- end = s->bdev_length / BDRV_SECTOR_SIZE;
s->buf = qemu_try_blockalign(bs, s->buf_size);
if (s->buf == NULL) {
ret = -ENOMEM;
goto immediate_exit;
}
- sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
mirror_free_init(s);
+ s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
if (!s->is_none_mode) {
- /* First part, loop on the sectors and initialize the dirty bitmap. */
- BlockDriverState *base = s->base;
- for (sector_num = 0; sector_num < end; ) {
- int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
- ret = bdrv_is_allocated_above(bs, base,
- sector_num, next - sector_num, &n);
-
- if (ret < 0) {
- goto immediate_exit;
- }
-
- assert(n > 0);
- if (ret == 1) {
- bdrv_set_dirty(bs, sector_num, n);
- sector_num = next;
- } else {
- sector_num += n;
- }
+ ret = mirror_dirty_init(s);
+ if (ret < 0 || block_job_is_cancelled(&s->common)) {
+ goto immediate_exit;
}
}
- bdrv_dirty_iter_init(bs, s->dirty_bitmap, &s->hbi);
- last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ bdrv_dirty_iter_init(s->dirty_bitmap, &s->hbi);
for (;;) {
uint64_t delay_ns = 0;
- int64_t cnt;
+ int64_t cnt, delta;
bool should_complete;
if (s->ret < 0) {
@@ -462,7 +690,9 @@
goto immediate_exit;
}
- cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+ block_job_pause_point(&s->common);
+
+ cnt = bdrv_get_dirty_count(s->dirty_bitmap);
/* s->common.offset contains the number of bytes already processed so
* far, cnt is the number of dirty sectors remaining and
* s->sectors_in_flight is the number of sectors currently being
@@ -471,29 +701,27 @@
(cnt + s->sectors_in_flight) * BDRV_SECTOR_SIZE;
/* Note that even when no rate limit is applied we need to yield
- * periodically with no pending I/O so that qemu_aio_flush() returns.
+ * periodically with no pending I/O so that bdrv_drain_all() returns.
* We do so every SLICE_TIME nanoseconds, or when there is an error,
* or when the source is clean, whichever comes first.
*/
- if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - last_pause_ns < SLICE_TIME &&
+ delta = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->last_pause_ns;
+ if (delta < SLICE_TIME &&
s->common.iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
- if (s->in_flight == MAX_IN_FLIGHT || s->buf_free_count == 0 ||
+ if (s->in_flight >= MAX_IN_FLIGHT || s->buf_free_count == 0 ||
(cnt == 0 && s->in_flight > 0)) {
trace_mirror_yield(s, s->in_flight, s->buf_free_count, cnt);
- qemu_coroutine_yield();
+ mirror_wait_for_io(s);
continue;
} else if (cnt != 0) {
delay_ns = mirror_iteration(s);
- if (delay_ns == 0) {
- continue;
- }
}
}
should_complete = false;
if (s->in_flight == 0 && cnt == 0) {
trace_mirror_before_flush(s);
- ret = bdrv_flush(s->target);
+ ret = blk_flush(s->target);
if (ret < 0) {
if (mirror_error_action(s, false, -ret) ==
BLOCK_ERROR_ACTION_REPORT) {
@@ -512,7 +740,7 @@
should_complete = s->should_complete ||
block_job_is_cancelled(&s->common);
- cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+ cnt = bdrv_get_dirty_count(s->dirty_bitmap);
}
}
@@ -526,8 +754,8 @@
* mirror_populate runs.
*/
trace_mirror_before_drain(s, cnt);
- bdrv_drain(bs);
- cnt = bdrv_get_dirty_count(bs, s->dirty_bitmap);
+ bdrv_co_drain(bs);
+ cnt = bdrv_get_dirty_count(s->dirty_bitmap);
}
ret = 0;
@@ -548,7 +776,7 @@
s->common.cancelled = false;
break;
}
- last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
}
immediate_exit:
@@ -566,10 +794,12 @@
g_free(s->cow_bitmap);
g_free(s->in_flight_bitmap);
bdrv_release_dirty_bitmap(bs, s->dirty_bitmap);
- bdrv_iostatus_disable(s->target);
data = g_malloc(sizeof(*data));
data->ret = ret;
+ /* Before we switch to target in mirror_exit, make sure data doesn't
+ * change. */
+ bdrv_drained_begin(bs);
block_job_defer_to_main_loop(&s->common, mirror_exit, data);
}
@@ -578,43 +808,43 @@
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
if (speed < 0) {
- error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
}
-static void mirror_iostatus_reset(BlockJob *job)
-{
- MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-
- bdrv_iostatus_reset(s->target);
-}
-
static void mirror_complete(BlockJob *job, Error **errp)
{
MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
- Error *local_err = NULL;
- int ret;
+ BlockDriverState *src, *target;
- ret = bdrv_open_backing_file(s->target, NULL, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
- return;
- }
+ src = blk_bs(job->blk);
+ target = blk_bs(s->target);
+
if (!s->synced) {
- error_set(errp, QERR_BLOCK_JOB_NOT_READY,
- bdrv_get_device_name(job->bs));
+ error_setg(errp, "The active block job '%s' cannot be completed",
+ job->id);
return;
}
- /* check the target bs is not blocked and block all operations on it */
+ if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
+ int ret;
+
+ assert(!target->backing);
+ ret = bdrv_open_backing_file(target, NULL, "backing", errp);
+ if (ret < 0) {
+ return;
+ }
+ }
+
+ /* block all operations on to_replace bs */
if (s->replaces) {
AioContext *replace_aio_context;
- s->to_replace = check_to_replace_node(s->replaces, &local_err);
+ s->to_replace = bdrv_find_node(s->replaces);
if (!s->to_replace) {
- error_propagate(errp, local_err);
+ error_setg(errp, "Node name '%s' not found", s->replaces);
return;
}
@@ -629,33 +859,60 @@
aio_context_release(replace_aio_context);
}
+ if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
+ BlockDriverState *backing = s->is_none_mode ? src : s->base;
+ if (backing_bs(target) != backing) {
+ bdrv_set_backing_hd(target, backing);
+ }
+ }
+
s->should_complete = true;
- block_job_resume(job);
+ block_job_enter(&s->common);
+}
+
+/* There is no matching mirror_resume() because mirror_run() will begin
+ * iterating again when the job is resumed.
+ */
+static void coroutine_fn mirror_pause(BlockJob *job)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ mirror_drain(s);
+}
+
+static void mirror_attached_aio_context(BlockJob *job, AioContext *new_context)
+{
+ MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
+
+ blk_set_aio_context(s->target, new_context);
}
static const BlockJobDriver mirror_job_driver = {
- .instance_size = sizeof(MirrorBlockJob),
- .job_type = BLOCK_JOB_TYPE_MIRROR,
- .set_speed = mirror_set_speed,
- .iostatus_reset= mirror_iostatus_reset,
- .complete = mirror_complete,
+ .instance_size = sizeof(MirrorBlockJob),
+ .job_type = BLOCK_JOB_TYPE_MIRROR,
+ .set_speed = mirror_set_speed,
+ .complete = mirror_complete,
+ .pause = mirror_pause,
+ .attached_aio_context = mirror_attached_aio_context,
};
static const BlockJobDriver commit_active_job_driver = {
- .instance_size = sizeof(MirrorBlockJob),
- .job_type = BLOCK_JOB_TYPE_COMMIT,
- .set_speed = mirror_set_speed,
- .iostatus_reset
- = mirror_iostatus_reset,
- .complete = mirror_complete,
+ .instance_size = sizeof(MirrorBlockJob),
+ .job_type = BLOCK_JOB_TYPE_COMMIT,
+ .set_speed = mirror_set_speed,
+ .complete = mirror_complete,
+ .pause = mirror_pause,
+ .attached_aio_context = mirror_attached_aio_context,
};
-static void mirror_start_job(BlockDriverState *bs, BlockDriverState *target,
- const char *replaces,
- int64_t speed, int64_t granularity,
+static void mirror_start_job(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
+ int64_t speed, uint32_t granularity,
int64_t buf_size,
+ BlockMirrorBackingMode backing_mode,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
+ bool unmap,
BlockCompletionFunc *cb,
void *opaque, Error **errp,
const BlockJobDriver *driver,
@@ -664,74 +921,80 @@
MirrorBlockJob *s;
if (granularity == 0) {
- /* Choose the default granularity based on the target file's cluster
- * size, clamped between 4k and 64k. */
- BlockDriverInfo bdi;
- if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
- granularity = MAX(4096, bdi.cluster_size);
- granularity = MIN(65536, granularity);
- } else {
- granularity = 65536;
- }
+ granularity = bdrv_get_default_bitmap_granularity(target);
}
assert ((granularity & (granularity - 1)) == 0);
- if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
- on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
- error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
+ if (buf_size < 0) {
+ error_setg(errp, "Invalid parameter 'buf-size'");
return;
}
+ if (buf_size == 0) {
+ buf_size = DEFAULT_MIRROR_BUF_SIZE;
+ }
- s = block_job_create(driver, bs, speed, cb, opaque, errp);
+ s = block_job_create(job_id, driver, bs, speed, cb, opaque, errp);
if (!s) {
return;
}
+ s->target = blk_new();
+ blk_insert_bs(s->target, target);
+
s->replaces = g_strdup(replaces);
s->on_source_error = on_source_error;
s->on_target_error = on_target_error;
- s->target = target;
s->is_none_mode = is_none_mode;
+ s->backing_mode = backing_mode;
s->base = base;
s->granularity = granularity;
- s->buf_size = MAX(buf_size, granularity);
+ s->buf_size = ROUND_UP(buf_size, granularity);
+ s->unmap = unmap;
- s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, errp);
+ s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL, errp);
if (!s->dirty_bitmap) {
+ g_free(s->replaces);
+ blk_unref(s->target);
+ block_job_unref(&s->common);
return;
}
- bdrv_set_enable_write_cache(s->target, true);
- bdrv_set_on_error(s->target, on_target_error, on_target_error);
- bdrv_iostatus_enable(s->target);
- s->common.co = qemu_coroutine_create(mirror_run);
+
+ bdrv_op_block_all(target, s->common.blocker);
+
+ s->common.co = qemu_coroutine_create(mirror_run, s);
trace_mirror_start(bs, s, s->common.co, opaque);
- qemu_coroutine_enter(s->common.co, s);
+ qemu_coroutine_enter(s->common.co);
}
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- const char *replaces,
- int64_t speed, int64_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
+void mirror_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
+ int64_t speed, uint32_t granularity, int64_t buf_size,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
+ bool unmap,
BlockCompletionFunc *cb,
void *opaque, Error **errp)
{
bool is_none_mode;
BlockDriverState *base;
+ if (mode == MIRROR_SYNC_MODE_INCREMENTAL) {
+ error_setg(errp, "Sync mode 'incremental' not supported");
+ return;
+ }
is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
- base = mode == MIRROR_SYNC_MODE_TOP ? bs->backing_hd : NULL;
- mirror_start_job(bs, target, replaces,
- speed, granularity, buf_size,
- on_source_error, on_target_error, cb, opaque, errp,
+ base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL;
+ mirror_start_job(job_id, bs, target, replaces,
+ speed, granularity, buf_size, backing_mode,
+ on_source_error, on_target_error, unmap, cb, opaque, errp,
&mirror_job_driver, is_none_mode, base);
}
-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
- int64_t speed,
+void commit_active_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, int64_t speed,
BlockdevOnError on_error,
BlockCompletionFunc *cb,
void *opaque, Error **errp)
@@ -772,9 +1035,9 @@
}
}
- bdrv_ref(base);
- mirror_start_job(bs, base, NULL, speed, 0, 0,
- on_error, on_error, cb, opaque, &local_err,
+ mirror_start_job(job_id, bs, base, NULL, speed, 0, 0,
+ MIRROR_LEAVE_BACKING_CHAIN,
+ on_error, on_error, false, cb, opaque, &local_err,
&commit_active_job_driver, false, base);
if (local_err) {
error_propagate(errp, local_err);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 6e1c97c..2cf3237 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -26,8 +26,8 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "nbd-client.h"
-#include "qemu/sockets.h"
#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index) ((index) ^ ((uint64_t)(intptr_t)bs))
@@ -38,34 +38,49 @@
for (i = 0; i < MAX_NBD_REQUESTS; i++) {
if (s->recv_coroutine[i]) {
- qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+ qemu_coroutine_enter(s->recv_coroutine[i]);
}
}
}
-static void nbd_teardown_connection(NbdClientSession *client)
+static void nbd_teardown_connection(BlockDriverState *bs)
{
+ NbdClientSession *client = nbd_get_client_session(bs);
+
+ if (!client->ioc) { /* Already closed */
+ return;
+ }
+
/* finish any pending coroutines */
- shutdown(client->sock, 2);
+ qio_channel_shutdown(client->ioc,
+ QIO_CHANNEL_SHUTDOWN_BOTH,
+ NULL);
nbd_recv_coroutines_enter_all(client);
- nbd_client_session_detach_aio_context(client);
- closesocket(client->sock);
- client->sock = -1;
+ nbd_client_detach_aio_context(bs);
+ object_unref(OBJECT(client->sioc));
+ client->sioc = NULL;
+ object_unref(OBJECT(client->ioc));
+ client->ioc = NULL;
}
static void nbd_reply_ready(void *opaque)
{
- NbdClientSession *s = opaque;
+ BlockDriverState *bs = opaque;
+ NbdClientSession *s = nbd_get_client_session(bs);
uint64_t i;
int ret;
+ if (!s->ioc) { /* Already closed */
+ return;
+ }
+
if (s->reply.handle == 0) {
/* No reply already in flight. Fetch a header. It is possible
* that another thread has done the same thing in parallel, so
* the socket is not readable anymore.
*/
- ret = nbd_receive_reply(s->sock, &s->reply);
+ ret = nbd_receive_reply(s->ioc, &s->reply);
if (ret == -EAGAIN) {
return;
}
@@ -84,60 +99,77 @@
}
if (s->recv_coroutine[i]) {
- qemu_coroutine_enter(s->recv_coroutine[i], NULL);
+ qemu_coroutine_enter(s->recv_coroutine[i]);
return;
}
fail:
- nbd_teardown_connection(s);
+ nbd_teardown_connection(bs);
}
static void nbd_restart_write(void *opaque)
{
- NbdClientSession *s = opaque;
+ BlockDriverState *bs = opaque;
- qemu_coroutine_enter(s->send_coroutine, NULL);
+ qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine);
}
-static int nbd_co_send_request(NbdClientSession *s,
- struct nbd_request *request,
- QEMUIOVector *qiov, int offset)
+static int nbd_co_send_request(BlockDriverState *bs,
+ struct nbd_request *request,
+ QEMUIOVector *qiov)
{
+ NbdClientSession *s = nbd_get_client_session(bs);
AioContext *aio_context;
- int rc, ret;
+ int rc, ret, i;
qemu_co_mutex_lock(&s->send_mutex);
- s->send_coroutine = qemu_coroutine_self();
- aio_context = bdrv_get_aio_context(s->bs);
- aio_set_fd_handler(aio_context, s->sock,
- nbd_reply_ready, nbd_restart_write, s);
- if (qiov) {
- if (!s->is_unix) {
- socket_set_cork(s->sock, 1);
+
+ for (i = 0; i < MAX_NBD_REQUESTS; i++) {
+ if (s->recv_coroutine[i] == NULL) {
+ s->recv_coroutine[i] = qemu_coroutine_self();
+ break;
}
- rc = nbd_send_request(s->sock, request);
+ }
+
+ g_assert(qemu_in_coroutine());
+ assert(i < MAX_NBD_REQUESTS);
+ request->handle = INDEX_TO_HANDLE(s, i);
+
+ if (!s->ioc) {
+ qemu_co_mutex_unlock(&s->send_mutex);
+ return -EPIPE;
+ }
+
+ s->send_coroutine = qemu_coroutine_self();
+ aio_context = bdrv_get_aio_context(bs);
+
+ aio_set_fd_handler(aio_context, s->sioc->fd, false,
+ nbd_reply_ready, nbd_restart_write, bs);
+ if (qiov) {
+ qio_channel_set_cork(s->ioc, true);
+ rc = nbd_send_request(s->ioc, request);
if (rc >= 0) {
- ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
- offset, request->len);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+ false);
if (ret != request->len) {
rc = -EIO;
}
}
- if (!s->is_unix) {
- socket_set_cork(s->sock, 0);
- }
+ qio_channel_set_cork(s->ioc, false);
} else {
- rc = nbd_send_request(s->sock, request);
+ rc = nbd_send_request(s->ioc, request);
}
- aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, s);
+ aio_set_fd_handler(aio_context, s->sioc->fd, false,
+ nbd_reply_ready, NULL, bs);
s->send_coroutine = NULL;
qemu_co_mutex_unlock(&s->send_mutex);
return rc;
}
static void nbd_co_receive_reply(NbdClientSession *s,
- struct nbd_request *request, struct nbd_reply *reply,
- QEMUIOVector *qiov, int offset)
+ struct nbd_request *request,
+ struct nbd_reply *reply,
+ QEMUIOVector *qiov)
{
int ret;
@@ -145,12 +177,13 @@
* peek at the next reply and avoid yielding if it's ours? */
qemu_coroutine_yield();
*reply = s->reply;
- if (reply->handle != request->handle) {
+ if (reply->handle != request->handle ||
+ !s->ioc) {
reply->error = EIO;
} else {
if (qiov && reply->error == 0) {
- ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
- offset, request->len);
+ ret = nbd_wr_syncv(s->ioc, qiov->iov, qiov->niov, request->len,
+ true);
if (ret != request->len) {
reply->error = EIO;
}
@@ -164,8 +197,6 @@
static void nbd_coroutine_start(NbdClientSession *s,
struct nbd_request *request)
{
- int i;
-
/* Poor man semaphore. The free_sema is locked when no other request
* can be accepted, and unlocked after receiving one reply. */
if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
@@ -174,15 +205,7 @@
}
s->in_flight++;
- for (i = 0; i < MAX_NBD_REQUESTS; i++) {
- if (s->recv_coroutine[i] == NULL) {
- s->recv_coroutine[i] = qemu_coroutine_self();
- break;
- }
- }
-
- assert(i < MAX_NBD_REQUESTS);
- request->handle = INDEX_TO_HANDLE(s, i);
+ /* s->recv_coroutine[i] is set as soon as we get the send_lock. */
}
static void nbd_coroutine_end(NbdClientSession *s,
@@ -195,98 +218,65 @@
}
}
-static int nbd_co_readv_1(NbdClientSession *client, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov,
- int offset)
+int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- struct nbd_request request = { .type = NBD_CMD_READ };
+ NbdClientSession *client = nbd_get_client_session(bs);
+ struct nbd_request request = {
+ .type = NBD_CMD_READ,
+ .from = offset,
+ .len = bytes,
+ };
struct nbd_reply reply;
ssize_t ret;
- request.from = sector_num * 512;
- request.len = nb_sectors * 512;
+ assert(bytes <= NBD_MAX_BUFFER_SIZE);
+ assert(!flags);
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(client, &request, NULL, 0);
+ ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, qiov, offset);
+ nbd_co_receive_reply(client, &request, &reply, qiov);
}
nbd_coroutine_end(client, &request);
return -reply.error;
-
}
-static int nbd_co_writev_1(NbdClientSession *client, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov,
- int offset)
+int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags)
{
- struct nbd_request request = { .type = NBD_CMD_WRITE };
+ NbdClientSession *client = nbd_get_client_session(bs);
+ struct nbd_request request = {
+ .type = NBD_CMD_WRITE,
+ .from = offset,
+ .len = bytes,
+ };
struct nbd_reply reply;
ssize_t ret;
- if (!bdrv_enable_write_cache(client->bs) &&
- (client->nbdflags & NBD_FLAG_SEND_FUA)) {
+ if (flags & BDRV_REQ_FUA) {
+ assert(client->nbdflags & NBD_FLAG_SEND_FUA);
request.type |= NBD_CMD_FLAG_FUA;
}
- request.from = sector_num * 512;
- request.len = nb_sectors * 512;
+ assert(bytes <= NBD_MAX_BUFFER_SIZE);
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(client, &request, qiov, offset);
+ ret = nbd_co_send_request(bs, &request, qiov);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+ nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
-/* qemu-nbd has a limit of slightly less than 1M per request. Try to
- * remain aligned to 4K. */
-#define NBD_MAX_SECTORS 2040
-
-int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+int nbd_client_co_flush(BlockDriverState *bs)
{
- int offset = 0;
- int ret;
- while (nb_sectors > NBD_MAX_SECTORS) {
- ret = nbd_co_readv_1(client, sector_num,
- NBD_MAX_SECTORS, qiov, offset);
- if (ret < 0) {
- return ret;
- }
- offset += NBD_MAX_SECTORS * 512;
- sector_num += NBD_MAX_SECTORS;
- nb_sectors -= NBD_MAX_SECTORS;
- }
- return nbd_co_readv_1(client, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- int offset = 0;
- int ret;
- while (nb_sectors > NBD_MAX_SECTORS) {
- ret = nbd_co_writev_1(client, sector_num,
- NBD_MAX_SECTORS, qiov, offset);
- if (ret < 0) {
- return ret;
- }
- offset += NBD_MAX_SECTORS * 512;
- sector_num += NBD_MAX_SECTORS;
- nb_sectors -= NBD_MAX_SECTORS;
- }
- return nbd_co_writev_1(client, sector_num, nb_sectors, qiov, offset);
-}
-
-int nbd_client_session_co_flush(NbdClientSession *client)
-{
+ NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = { .type = NBD_CMD_FLUSH };
struct nbd_reply reply;
ssize_t ret;
@@ -295,109 +285,121 @@
return 0;
}
- if (client->nbdflags & NBD_FLAG_SEND_FUA) {
- request.type |= NBD_CMD_FLAG_FUA;
- }
-
request.from = 0;
request.len = 0;
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(client, &request, NULL, 0);
+ ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+ nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
-int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
- int nb_sectors)
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
{
- struct nbd_request request = { .type = NBD_CMD_TRIM };
+ NbdClientSession *client = nbd_get_client_session(bs);
+ struct nbd_request request = {
+ .type = NBD_CMD_TRIM,
+ .from = offset,
+ .len = count,
+ };
struct nbd_reply reply;
ssize_t ret;
if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
return 0;
}
- request.from = sector_num * 512;
- request.len = nb_sectors * 512;
nbd_coroutine_start(client, &request);
- ret = nbd_co_send_request(client, &request, NULL, 0);
+ ret = nbd_co_send_request(bs, &request, NULL);
if (ret < 0) {
reply.error = -ret;
} else {
- nbd_co_receive_reply(client, &request, &reply, NULL, 0);
+ nbd_co_receive_reply(client, &request, &reply, NULL);
}
nbd_coroutine_end(client, &request);
return -reply.error;
}
-void nbd_client_session_detach_aio_context(NbdClientSession *client)
+void nbd_client_detach_aio_context(BlockDriverState *bs)
{
- aio_set_fd_handler(bdrv_get_aio_context(client->bs), client->sock,
- NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs),
+ nbd_get_client_session(bs)->sioc->fd,
+ false, NULL, NULL, NULL);
}
-void nbd_client_session_attach_aio_context(NbdClientSession *client,
- AioContext *new_context)
+void nbd_client_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
{
- aio_set_fd_handler(new_context, client->sock,
- nbd_reply_ready, NULL, client);
+ aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sioc->fd,
+ false, nbd_reply_ready, NULL, bs);
}
-void nbd_client_session_close(NbdClientSession *client)
+void nbd_client_close(BlockDriverState *bs)
{
+ NbdClientSession *client = nbd_get_client_session(bs);
struct nbd_request request = {
.type = NBD_CMD_DISC,
.from = 0,
.len = 0
};
- if (!client->bs) {
- return;
- }
- if (client->sock == -1) {
+ if (client->ioc == NULL) {
return;
}
- nbd_send_request(client->sock, &request);
+ nbd_send_request(client->ioc, &request);
- nbd_teardown_connection(client);
- client->bs = NULL;
+ nbd_teardown_connection(bs);
}
-int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
- int sock, const char *export)
+int nbd_client_init(BlockDriverState *bs,
+ QIOChannelSocket *sioc,
+ const char *export,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname,
+ Error **errp)
{
+ NbdClientSession *client = nbd_get_client_session(bs);
int ret;
/* NBD handshake */
logout("session init %s\n", export);
- qemu_set_block(sock);
- ret = nbd_receive_negotiate(sock, export,
- &client->nbdflags, &client->size,
- &client->blocksize);
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);
+
+ ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
+ &client->nbdflags,
+ tlscreds, hostname,
+ &client->ioc,
+ &client->size, errp);
if (ret < 0) {
logout("Failed to negotiate with the NBD server\n");
- closesocket(sock);
return ret;
}
+ if (client->nbdflags & NBD_FLAG_SEND_FUA) {
+ bs->supported_write_flags = BDRV_REQ_FUA;
+ }
qemu_co_mutex_init(&client->send_mutex);
qemu_co_mutex_init(&client->free_sema);
- client->bs = bs;
- client->sock = sock;
+ client->sioc = sioc;
+ object_ref(OBJECT(client->sioc));
+
+ if (!client->ioc) {
+ client->ioc = QIO_CHANNEL(sioc);
+ object_ref(OBJECT(client->ioc));
+ }
/* Now that we're connected, set the socket to be non-blocking and
* kick the reply mechanism. */
- qemu_set_nonblock(sock);
- nbd_client_session_attach_aio_context(client, bdrv_get_aio_context(bs));
+ qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
+
+ nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
logout("Established connection with NBD server\n");
return 0;
diff --git a/block/nbd-client.h b/block/nbd-client.h
index cd478f3..044aca4 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -4,6 +4,7 @@
#include "qemu-common.h"
#include "block/nbd.h"
#include "block/block_int.h"
+#include "io/channel-socket.h"
/* #define DEBUG_NBD */
@@ -17,10 +18,10 @@
#define MAX_NBD_REQUESTS 16
typedef struct NbdClientSession {
- int sock;
- uint32_t nbdflags;
+ QIOChannelSocket *sioc; /* The master data channel */
+ QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
+ uint16_t nbdflags;
off_t size;
- size_t blocksize;
CoMutex send_mutex;
CoMutex free_sema;
@@ -31,24 +32,27 @@
struct nbd_reply reply;
bool is_unix;
-
- BlockDriverState *bs;
} NbdClientSession;
-int nbd_client_session_init(NbdClientSession *client, BlockDriverState *bs,
- int sock, const char *export_name);
-void nbd_client_session_close(NbdClientSession *client);
+NbdClientSession *nbd_get_client_session(BlockDriverState *bs);
-int nbd_client_session_co_discard(NbdClientSession *client, int64_t sector_num,
- int nb_sectors);
-int nbd_client_session_co_flush(NbdClientSession *client);
-int nbd_client_session_co_writev(NbdClientSession *client, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-int nbd_client_session_co_readv(NbdClientSession *client, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
+int nbd_client_init(BlockDriverState *bs,
+ QIOChannelSocket *sock,
+ const char *export_name,
+ QCryptoTLSCreds *tlscreds,
+ const char *hostname,
+ Error **errp);
+void nbd_client_close(BlockDriverState *bs);
-void nbd_client_session_detach_aio_context(NbdClientSession *client);
-void nbd_client_session_attach_aio_context(NbdClientSession *client,
- AioContext *new_context);
+int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count);
+int nbd_client_co_flush(BlockDriverState *bs);
+int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags);
+int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int flags);
+
+void nbd_client_detach_aio_context(BlockDriverState *bs);
+void nbd_client_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context);
#endif /* NBD_CLIENT_H */
diff --git a/block/nbd.c b/block/nbd.c
index 04cc845..6bc06d6 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -26,24 +26,25 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/nbd-client.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
#include "block/block_int.h"
#include "qemu/module.h"
-#include "qemu/sockets.h"
#include "qapi/qmp/qdict.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
-
-#include <sys/types.h>
-#include <unistd.h>
+#include "qemu/cutils.h"
#define EN_OPTSTR ":exportname="
typedef struct BDRVNBDState {
NbdClientSession client;
- QemuOpts *socket_opts;
+
+ /* For nbd_refresh_filename() */
+ char *path, *host, *port, *export, *tlscredsid;
} BDRVNBDState;
static int nbd_parse_uri(const char *filename, QDict *options)
@@ -190,132 +191,232 @@
g_free(file);
}
-static void nbd_config(BDRVNBDState *s, QDict *options, char **export,
- Error **errp)
+static SocketAddress *nbd_config(BDRVNBDState *s, QemuOpts *opts, Error **errp)
{
- Error *local_err = NULL;
+ SocketAddress *saddr;
- if (qdict_haskey(options, "path") == qdict_haskey(options, "host")) {
- if (qdict_haskey(options, "path")) {
+ s->path = g_strdup(qemu_opt_get(opts, "path"));
+ s->host = g_strdup(qemu_opt_get(opts, "host"));
+
+ if (!s->path == !s->host) {
+ if (s->path) {
error_setg(errp, "path and host may not be used at the same time.");
} else {
error_setg(errp, "one of path and host must be specified.");
}
- return;
+ return NULL;
}
- s->client.is_unix = qdict_haskey(options, "path");
- s->socket_opts = qemu_opts_create(&socket_optslist, NULL, 0,
- &error_abort);
+ saddr = g_new0(SocketAddress, 1);
- qemu_opts_absorb_qdict(s->socket_opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
- if (!qemu_opt_get(s->socket_opts, "port")) {
- qemu_opt_set_number(s->socket_opts, "port", NBD_DEFAULT_PORT);
- }
-
- *export = g_strdup(qdict_get_try_str(options, "export"));
- if (*export) {
- qdict_del(options, "export");
- }
-}
-
-static int nbd_establish_connection(BlockDriverState *bs, Error **errp)
-{
- BDRVNBDState *s = bs->opaque;
- int sock;
-
- if (s->client.is_unix) {
- sock = unix_connect_opts(s->socket_opts, errp, NULL, NULL);
+ if (s->path) {
+ UnixSocketAddress *q_unix;
+ saddr->type = SOCKET_ADDRESS_KIND_UNIX;
+ q_unix = saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
+ q_unix->path = g_strdup(s->path);
} else {
- sock = inet_connect_opts(s->socket_opts, errp, NULL, NULL);
- if (sock >= 0) {
- socket_set_nodelay(sock);
+ InetSocketAddress *inet;
+
+ s->port = g_strdup(qemu_opt_get(opts, "port"));
+
+ saddr->type = SOCKET_ADDRESS_KIND_INET;
+ inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
+ inet->host = g_strdup(s->host);
+ inet->port = g_strdup(s->port);
+ if (!inet->port) {
+ inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
}
}
- /* Failed to establish connection */
- if (sock < 0) {
- logout("Failed to establish connection to NBD server\n");
- return -errno;
+ s->client.is_unix = saddr->type == SOCKET_ADDRESS_KIND_UNIX;
+
+ s->export = g_strdup(qemu_opt_get(opts, "export"));
+
+ return saddr;
+}
+
+NbdClientSession *nbd_get_client_session(BlockDriverState *bs)
+{
+ BDRVNBDState *s = bs->opaque;
+ return &s->client;
+}
+
+static QIOChannelSocket *nbd_establish_connection(SocketAddress *saddr,
+ Error **errp)
+{
+ QIOChannelSocket *sioc;
+ Error *local_err = NULL;
+
+ sioc = qio_channel_socket_new();
+
+ qio_channel_socket_connect_sync(sioc,
+ saddr,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return NULL;
}
- return sock;
+ qio_channel_set_delay(QIO_CHANNEL(sioc), false);
+
+ return sioc;
}
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
+{
+ Object *obj;
+ QCryptoTLSCreds *creds;
+
+ obj = object_resolve_path_component(
+ object_get_objects_root(), id);
+ if (!obj) {
+ error_setg(errp, "No TLS credentials with id '%s'",
+ id);
+ return NULL;
+ }
+ creds = (QCryptoTLSCreds *)
+ object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+ if (!creds) {
+ error_setg(errp, "Object with id '%s' is not TLS credentials",
+ id);
+ return NULL;
+ }
+
+ if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_CLIENT) {
+ error_setg(errp,
+ "Expecting TLS credentials with a client endpoint");
+ return NULL;
+ }
+ object_ref(obj);
+ return creds;
+}
+
+
+static QemuOptsList nbd_runtime_opts = {
+ .name = "nbd",
+ .head = QTAILQ_HEAD_INITIALIZER(nbd_runtime_opts.head),
+ .desc = {
+ {
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ .help = "TCP host to connect to",
+ },
+ {
+ .name = "port",
+ .type = QEMU_OPT_STRING,
+ .help = "TCP port to connect to",
+ },
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ .help = "Unix socket path to connect to",
+ },
+ {
+ .name = "export",
+ .type = QEMU_OPT_STRING,
+ .help = "Name of the NBD export to open",
+ },
+ {
+ .name = "tls-creds",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of the TLS credentials to use",
+ },
+ },
+};
+
static int nbd_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVNBDState *s = bs->opaque;
- char *export = NULL;
- int result, sock;
+ QemuOpts *opts = NULL;
Error *local_err = NULL;
+ QIOChannelSocket *sioc = NULL;
+ SocketAddress *saddr = NULL;
+ QCryptoTLSCreds *tlscreds = NULL;
+ const char *hostname = NULL;
+ int ret = -EINVAL;
- /* Pop the config into our state object. Exit if invalid. */
- nbd_config(s, options, &export, &local_err);
+ opts = qemu_opts_create(&nbd_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return -EINVAL;
+ goto error;
+ }
+
+ /* Pop the config into our state object. Exit if invalid. */
+ saddr = nbd_config(s, opts, errp);
+ if (!saddr) {
+ goto error;
+ }
+
+ s->tlscredsid = g_strdup(qemu_opt_get(opts, "tls-creds"));
+ if (s->tlscredsid) {
+ tlscreds = nbd_get_tls_creds(s->tlscredsid, errp);
+ if (!tlscreds) {
+ goto error;
+ }
+
+ if (saddr->type != SOCKET_ADDRESS_KIND_INET) {
+ error_setg(errp, "TLS only supported over IP sockets");
+ goto error;
+ }
+ hostname = saddr->u.inet.data->host;
}
/* establish TCP connection, return error if it fails
* TODO: Configurable retry-until-timeout behaviour.
*/
- sock = nbd_establish_connection(bs, errp);
- if (sock < 0) {
- return sock;
+ sioc = nbd_establish_connection(saddr, errp);
+ if (!sioc) {
+ ret = -ECONNREFUSED;
+ goto error;
}
/* NBD handshake */
- result = nbd_client_session_init(&s->client, bs, sock, export);
- g_free(export);
- return result;
-}
-
-static int nbd_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVNBDState *s = bs->opaque;
-
- return nbd_client_session_co_readv(&s->client, sector_num,
- nb_sectors, qiov);
-}
-
-static int nbd_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
-{
- BDRVNBDState *s = bs->opaque;
-
- return nbd_client_session_co_writev(&s->client, sector_num,
- nb_sectors, qiov);
+ ret = nbd_client_init(bs, sioc, s->export,
+ tlscreds, hostname, errp);
+ error:
+ if (sioc) {
+ object_unref(OBJECT(sioc));
+ }
+ if (tlscreds) {
+ object_unref(OBJECT(tlscreds));
+ }
+ if (ret < 0) {
+ g_free(s->path);
+ g_free(s->host);
+ g_free(s->port);
+ g_free(s->export);
+ g_free(s->tlscredsid);
+ }
+ qapi_free_SocketAddress(saddr);
+ qemu_opts_del(opts);
+ return ret;
}
static int nbd_co_flush(BlockDriverState *bs)
{
- BDRVNBDState *s = bs->opaque;
-
- return nbd_client_session_co_flush(&s->client);
+ return nbd_client_co_flush(bs);
}
-static int nbd_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+static void nbd_refresh_limits(BlockDriverState *bs, Error **errp)
{
- BDRVNBDState *s = bs->opaque;
-
- return nbd_client_session_co_discard(&s->client, sector_num,
- nb_sectors);
+ bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE;
+ bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE;
}
static void nbd_close(BlockDriverState *bs)
{
BDRVNBDState *s = bs->opaque;
- qemu_opts_del(s->socket_opts);
- nbd_client_session_close(&s->client);
+ nbd_client_close(bs);
+
+ g_free(s->path);
+ g_free(s->host);
+ g_free(s->port);
+ g_free(s->export);
+ g_free(s->tlscredsid);
}
static int64_t nbd_getlength(BlockDriverState *bs)
@@ -327,59 +428,56 @@
static void nbd_detach_aio_context(BlockDriverState *bs)
{
- BDRVNBDState *s = bs->opaque;
-
- nbd_client_session_detach_aio_context(&s->client);
+ nbd_client_detach_aio_context(bs);
}
static void nbd_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
- BDRVNBDState *s = bs->opaque;
-
- nbd_client_session_attach_aio_context(&s->client, new_context);
+ nbd_client_attach_aio_context(bs, new_context);
}
-static void nbd_refresh_filename(BlockDriverState *bs)
+static void nbd_refresh_filename(BlockDriverState *bs, QDict *options)
{
+ BDRVNBDState *s = bs->opaque;
QDict *opts = qdict_new();
- const char *path = qdict_get_try_str(bs->options, "path");
- const char *host = qdict_get_try_str(bs->options, "host");
- const char *port = qdict_get_try_str(bs->options, "port");
- const char *export = qdict_get_try_str(bs->options, "export");
qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("nbd")));
- if (path && export) {
+ if (s->path && s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd+unix:///%s?socket=%s", export, path);
- } else if (path && !export) {
+ "nbd+unix:///%s?socket=%s", s->export, s->path);
+ } else if (s->path && !s->export) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd+unix://?socket=%s", path);
- } else if (!path && export && port) {
+ "nbd+unix://?socket=%s", s->path);
+ } else if (!s->path && s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s:%s/%s", host, port, export);
- } else if (!path && export && !port) {
+ "nbd://%s:%s/%s", s->host, s->port, s->export);
+ } else if (!s->path && s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s/%s", host, export);
- } else if (!path && !export && port) {
+ "nbd://%s/%s", s->host, s->export);
+ } else if (!s->path && !s->export && s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s:%s", host, port);
- } else if (!path && !export && !port) {
+ "nbd://%s:%s", s->host, s->port);
+ } else if (!s->path && !s->export && !s->port) {
snprintf(bs->exact_filename, sizeof(bs->exact_filename),
- "nbd://%s", host);
+ "nbd://%s", s->host);
}
- if (path) {
- qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(path)));
- } else if (port) {
- qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
- qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(port)));
+ if (s->path) {
+ qdict_put_obj(opts, "path", QOBJECT(qstring_from_str(s->path)));
+ } else if (s->port) {
+ qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
+ qdict_put_obj(opts, "port", QOBJECT(qstring_from_str(s->port)));
} else {
- qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(host)));
+ qdict_put_obj(opts, "host", QOBJECT(qstring_from_str(s->host)));
}
- if (export) {
- qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(export)));
+ if (s->export) {
+ qdict_put_obj(opts, "export", QOBJECT(qstring_from_str(s->export)));
+ }
+ if (s->tlscredsid) {
+ qdict_put_obj(opts, "tls-creds",
+ QOBJECT(qstring_from_str(s->tlscredsid)));
}
bs->full_open_options = opts;
@@ -391,11 +489,12 @@
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_preadv = nbd_client_co_preadv,
+ .bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
+ .bdrv_co_pdiscard = nbd_client_co_pdiscard,
+ .bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
@@ -408,11 +507,12 @@
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_preadv = nbd_client_co_preadv,
+ .bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
+ .bdrv_co_pdiscard = nbd_client_co_pdiscard,
+ .bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
@@ -425,11 +525,12 @@
.instance_size = sizeof(BDRVNBDState),
.bdrv_parse_filename = nbd_parse_filename,
.bdrv_file_open = nbd_open,
- .bdrv_co_readv = nbd_co_readv,
- .bdrv_co_writev = nbd_co_writev,
+ .bdrv_co_preadv = nbd_client_co_preadv,
+ .bdrv_co_pwritev = nbd_client_co_pwritev,
.bdrv_close = nbd_close,
.bdrv_co_flush_to_os = nbd_co_flush,
- .bdrv_co_discard = nbd_co_discard,
+ .bdrv_co_pdiscard = nbd_client_co_pdiscard,
+ .bdrv_refresh_limits = nbd_refresh_limits,
.bdrv_getlength = nbd_getlength,
.bdrv_detach_aio_context = nbd_detach_aio_context,
.bdrv_attach_aio_context = nbd_attach_aio_context,
diff --git a/block/nfs.c b/block/nfs.c
index c76e368..8602a44 100644
--- a/block/nfs.c
+++ b/block/nfs.c
@@ -1,7 +1,7 @@
/*
* QEMU Block driver for native access to files on NFS shares
*
- * Copyright (c) 2014 Peter Lieven <pl@kamp.de>
+ * Copyright (c) 2014-2016 Peter Lieven <pl@kamp.de>
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,25 +22,33 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include <poll.h>
#include "qemu-common.h"
#include "qemu/config-file.h"
#include "qemu/error-report.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "trace.h"
#include "qemu/iov.h"
#include "qemu/uri.h"
+#include "qemu/cutils.h"
#include "sysemu/sysemu.h"
#include <nfsc/libnfs.h>
+#define QEMU_NFS_MAX_READAHEAD_SIZE 1048576
+#define QEMU_NFS_MAX_PAGECACHE_SIZE (8388608 / NFS_BLKSIZE)
+#define QEMU_NFS_MAX_DEBUG_LEVEL 2
+
typedef struct NFSClient {
struct nfs_context *context;
struct nfsfh *fh;
int events;
bool has_zero_init;
AioContext *aio_context;
+ blkcnt_t st_blocks;
+ bool cache_used;
} NFSClient;
typedef struct NFSRPC {
@@ -60,11 +68,10 @@
{
int ev = nfs_which_events(client->context);
if (ev != client->events) {
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false,
(ev & POLLIN) ? nfs_process_read : NULL,
- (ev & POLLOUT) ? nfs_process_write : NULL,
- client);
+ (ev & POLLOUT) ? nfs_process_write : NULL, client);
}
client->events = ev;
@@ -97,7 +104,7 @@
NFSRPC *task = opaque;
task->complete = 1;
qemu_bh_delete(task->bh);
- qemu_coroutine_enter(task->co, NULL);
+ qemu_coroutine_enter(task->co);
}
static void
@@ -239,9 +246,8 @@
{
NFSClient *client = bs->opaque;
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
- NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false, NULL, NULL, NULL);
client->events = 0;
}
@@ -260,9 +266,8 @@
if (client->fh) {
nfs_close(client->context, client->fh);
}
- aio_set_fd_handler(client->aio_context,
- nfs_get_fd(client->context),
- NULL, NULL, NULL);
+ aio_set_fd_handler(client->aio_context, nfs_get_fd(client->context),
+ false, NULL, NULL, NULL);
nfs_destroy_context(client->context);
}
memset(client, 0, sizeof(NFSClient));
@@ -275,7 +280,7 @@
}
static int64_t nfs_client_open(NFSClient *client, const char *filename,
- int flags, Error **errp)
+ int flags, Error **errp, int open_flags)
{
int ret = -EINVAL, i;
struct stat st;
@@ -327,7 +332,49 @@
nfs_set_tcp_syncnt(client->context, val);
#ifdef LIBNFS_FEATURE_READAHEAD
} else if (!strcmp(qp->p[i].name, "readahead")) {
+ if (open_flags & BDRV_O_NOCACHE) {
+ error_setg(errp, "Cannot enable NFS readahead "
+ "if cache.direct = on");
+ goto fail;
+ }
+ if (val > QEMU_NFS_MAX_READAHEAD_SIZE) {
+ error_report("NFS Warning: Truncating NFS readahead"
+ " size to %d", QEMU_NFS_MAX_READAHEAD_SIZE);
+ val = QEMU_NFS_MAX_READAHEAD_SIZE;
+ }
nfs_set_readahead(client->context, val);
+#ifdef LIBNFS_FEATURE_PAGECACHE
+ nfs_set_pagecache_ttl(client->context, 0);
+#endif
+ client->cache_used = true;
+#endif
+#ifdef LIBNFS_FEATURE_PAGECACHE
+ nfs_set_pagecache_ttl(client->context, 0);
+ } else if (!strcmp(qp->p[i].name, "pagecache")) {
+ if (open_flags & BDRV_O_NOCACHE) {
+ error_setg(errp, "Cannot enable NFS pagecache "
+ "if cache.direct = on");
+ goto fail;
+ }
+ if (val > QEMU_NFS_MAX_PAGECACHE_SIZE) {
+ error_report("NFS Warning: Truncating NFS pagecache"
+ " size to %d pages", QEMU_NFS_MAX_PAGECACHE_SIZE);
+ val = QEMU_NFS_MAX_PAGECACHE_SIZE;
+ }
+ nfs_set_pagecache(client->context, val);
+ nfs_set_pagecache_ttl(client->context, 0);
+ client->cache_used = true;
+#endif
+#ifdef LIBNFS_FEATURE_DEBUG
+ } else if (!strcmp(qp->p[i].name, "debug")) {
+ /* limit the maximum debug level to avoid potential flooding
+ * of our log files. */
+ if (val > QEMU_NFS_MAX_DEBUG_LEVEL) {
+ error_report("NFS Warning: Limiting NFS debug level"
+ " to %d", QEMU_NFS_MAX_DEBUG_LEVEL);
+ val = QEMU_NFS_MAX_DEBUG_LEVEL;
+ }
+ nfs_set_debug(client->context, val);
#endif
} else {
error_setg(errp, "Unknown NFS parameter name: %s",
@@ -367,6 +414,7 @@
}
ret = DIV_ROUND_UP(st.st_size, BDRV_SECTOR_SIZE);
+ client->st_blocks = st.st_blocks;
client->has_zero_init = S_ISREG(st.st_mode);
goto out;
fail:
@@ -398,7 +446,7 @@
}
ret = nfs_client_open(client, qemu_opt_get(opts, "filename"),
(flags & BDRV_O_RDWR) ? O_RDWR : O_RDONLY,
- errp);
+ errp, bs->open_flags);
if (ret < 0) {
goto out;
}
@@ -409,6 +457,19 @@
return ret;
}
+static QemuOptsList nfs_create_opts = {
+ .name = "nfs-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(nfs_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size"
+ },
+ { /* end of list */ }
+ }
+};
+
static int nfs_file_create(const char *url, QemuOpts *opts, Error **errp)
{
int ret = 0;
@@ -421,7 +482,7 @@
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
- ret = nfs_client_open(client, url, O_CREAT, errp);
+ ret = nfs_client_open(client, url, O_CREAT, errp, 0);
if (ret < 0) {
goto out;
}
@@ -444,6 +505,11 @@
NFSRPC task = {0};
struct stat st;
+ if (bdrv_is_read_only(bs) &&
+ !(bs->open_flags & BDRV_O_NOCACHE)) {
+ return client->st_blocks * 512;
+ }
+
task.st = &st;
if (nfs_fstat_async(client->context, client->fh, nfs_co_generic_cb,
&task) != 0) {
@@ -455,7 +521,7 @@
aio_poll(client->aio_context, true);
}
- return (task.ret < 0 ? task.ret : st.st_blocks * st.st_blksize);
+ return (task.ret < 0 ? task.ret : st.st_blocks * 512);
}
static int nfs_file_truncate(BlockDriverState *bs, int64_t offset)
@@ -464,12 +530,57 @@
return nfs_ftruncate(client->context, client->fh, offset);
}
+/* Note that this will not re-establish a connection with the NFS server
+ * - it is effectively a NOP. */
+static int nfs_reopen_prepare(BDRVReopenState *state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ NFSClient *client = state->bs->opaque;
+ struct stat st;
+ int ret = 0;
+
+ if (state->flags & BDRV_O_RDWR && bdrv_is_read_only(state->bs)) {
+ error_setg(errp, "Cannot open a read-only mount as read-write");
+ return -EACCES;
+ }
+
+ if ((state->flags & BDRV_O_NOCACHE) && client->cache_used) {
+ error_setg(errp, "Cannot disable cache if libnfs readahead or"
+ " pagecache is enabled");
+ return -EINVAL;
+ }
+
+ /* Update cache for read-only reopens */
+ if (!(state->flags & BDRV_O_RDWR)) {
+ ret = nfs_fstat(client->context, client->fh, &st);
+ if (ret < 0) {
+ error_setg(errp, "Failed to fstat file: %s",
+ nfs_get_error(client->context));
+ return ret;
+ }
+ client->st_blocks = st.st_blocks;
+ }
+
+ return 0;
+}
+
+#ifdef LIBNFS_FEATURE_PAGECACHE
+static void nfs_invalidate_cache(BlockDriverState *bs,
+ Error **errp)
+{
+ NFSClient *client = bs->opaque;
+ nfs_pagecache_invalidate(client->context, client->fh);
+}
+#endif
+
static BlockDriver bdrv_nfs = {
.format_name = "nfs",
.protocol_name = "nfs",
.instance_size = sizeof(NFSClient),
.bdrv_needs_filename = true,
+ .create_opts = &nfs_create_opts,
+
.bdrv_has_zero_init = nfs_has_zero_init,
.bdrv_get_allocated_file_size = nfs_get_allocated_file_size,
.bdrv_truncate = nfs_file_truncate,
@@ -477,6 +588,7 @@
.bdrv_file_open = nfs_file_open,
.bdrv_close = nfs_file_close,
.bdrv_create = nfs_file_create,
+ .bdrv_reopen_prepare = nfs_reopen_prepare,
.bdrv_co_readv = nfs_co_readv,
.bdrv_co_writev = nfs_co_writev,
@@ -484,6 +596,10 @@
.bdrv_detach_aio_context = nfs_detach_aio_context,
.bdrv_attach_aio_context = nfs_attach_aio_context,
+
+#ifdef LIBNFS_FEATURE_PAGECACHE
+ .bdrv_invalidate_cache = nfs_invalidate_cache,
+#endif
};
static void nfs_block_init(void)
diff --git a/block/null.c b/block/null.c
index ec2bd27..b511010 100644
--- a/block/null.c
+++ b/block/null.c
@@ -10,10 +10,19 @@
* See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qstring.h"
#include "block/block_int.h"
+#define NULL_OPT_LATENCY "latency-ns"
+#define NULL_OPT_ZEROES "read-zeroes"
+
typedef struct {
int64_t length;
+ int64_t latency_ns;
+ bool read_zeroes;
} BDRVNullState;
static QemuOptsList runtime_opts = {
@@ -30,6 +39,17 @@
.type = QEMU_OPT_SIZE,
.help = "size of the null block",
},
+ {
+ .name = NULL_OPT_LATENCY,
+ .type = QEMU_OPT_NUMBER,
+ .help = "nanoseconds (approximated) to wait "
+ "before completing request",
+ },
+ {
+ .name = NULL_OPT_ZEROES,
+ .type = QEMU_OPT_BOOL,
+ .help = "return zeroes when read",
+ },
{ /* end of list */ }
},
};
@@ -39,13 +59,21 @@
{
QemuOpts *opts;
BDRVNullState *s = bs->opaque;
+ int ret = 0;
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
qemu_opts_absorb_qdict(opts, options, &error_abort);
s->length =
qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 1 << 30);
+ s->latency_ns =
+ qemu_opt_get_number(opts, NULL_OPT_LATENCY, 0);
+ if (s->latency_ns < 0) {
+ error_setg(errp, "latency-ns is invalid");
+ ret = -EINVAL;
+ }
+ s->read_zeroes = qemu_opt_get_bool(opts, NULL_OPT_ZEROES, false);
qemu_opts_del(opts);
- return 0;
+ return ret;
}
static void null_close(BlockDriverState *bs)
@@ -58,28 +86,46 @@
return s->length;
}
+static coroutine_fn int null_co_common(BlockDriverState *bs)
+{
+ BDRVNullState *s = bs->opaque;
+
+ if (s->latency_ns) {
+ co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
+ s->latency_ns);
+ }
+ return 0;
+}
+
static coroutine_fn int null_co_readv(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
{
- return 0;
+ BDRVNullState *s = bs->opaque;
+
+ if (s->read_zeroes) {
+ qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
+ return null_co_common(bs);
}
static coroutine_fn int null_co_writev(BlockDriverState *bs,
int64_t sector_num, int nb_sectors,
QEMUIOVector *qiov)
{
- return 0;
+ return null_co_common(bs);
}
static coroutine_fn int null_co_flush(BlockDriverState *bs)
{
- return 0;
+ return null_co_common(bs);
}
typedef struct {
BlockAIOCB common;
QEMUBH *bh;
+ QEMUTimer timer;
} NullAIOCB;
static const AIOCBInfo null_aiocb_info = {
@@ -94,15 +140,33 @@
qemu_aio_unref(acb);
}
+static void null_timer_cb(void *opaque)
+{
+ NullAIOCB *acb = opaque;
+ acb->common.cb(acb->common.opaque, 0);
+ timer_deinit(&acb->timer);
+ qemu_aio_unref(acb);
+}
+
static inline BlockAIOCB *null_aio_common(BlockDriverState *bs,
BlockCompletionFunc *cb,
void *opaque)
{
NullAIOCB *acb;
+ BDRVNullState *s = bs->opaque;
acb = qemu_aio_get(&null_aiocb_info, bs, cb, opaque);
- acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
- qemu_bh_schedule(acb->bh);
+ /* Only emulate latency after vcpu is running. */
+ if (s->latency_ns) {
+ aio_timer_init(bdrv_get_aio_context(bs), &acb->timer,
+ QEMU_CLOCK_REALTIME, SCALE_NS,
+ null_timer_cb, acb);
+ timer_mod_ns(&acb->timer,
+ qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + s->latency_ns);
+ } else {
+ acb->bh = aio_bh_new(bdrv_get_aio_context(bs), null_bh_cb, acb);
+ qemu_bh_schedule(acb->bh);
+ }
return &acb->common;
}
@@ -112,6 +176,12 @@
BlockCompletionFunc *cb,
void *opaque)
{
+ BDRVNullState *s = bs->opaque;
+
+ if (s->read_zeroes) {
+ qemu_iovec_memset(qiov, 0, 0, nb_sectors * BDRV_SECTOR_SIZE);
+ }
+
return null_aio_common(bs, cb, opaque);
}
@@ -131,6 +201,44 @@
return null_aio_common(bs, cb, opaque);
}
+static int null_reopen_prepare(BDRVReopenState *reopen_state,
+ BlockReopenQueue *queue, Error **errp)
+{
+ return 0;
+}
+
+static int64_t coroutine_fn null_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num,
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
+{
+ BDRVNullState *s = bs->opaque;
+ off_t start = sector_num * BDRV_SECTOR_SIZE;
+
+ *pnum = nb_sectors;
+ *file = bs;
+
+ if (s->read_zeroes) {
+ return BDRV_BLOCK_OFFSET_VALID | start | BDRV_BLOCK_ZERO;
+ } else {
+ return BDRV_BLOCK_OFFSET_VALID | start;
+ }
+}
+
+static void null_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+ QINCREF(opts);
+ qdict_del(opts, "filename");
+
+ if (!qdict_size(opts)) {
+ snprintf(bs->exact_filename, sizeof(bs->exact_filename), "%s://",
+ bs->drv->format_name);
+ }
+
+ qdict_put(opts, "driver", qstring_from_str(bs->drv->format_name));
+ bs->full_open_options = opts;
+}
+
static BlockDriver bdrv_null_co = {
.format_name = "null-co",
.protocol_name = "null-co",
@@ -143,6 +251,11 @@
.bdrv_co_readv = null_co_readv,
.bdrv_co_writev = null_co_writev,
.bdrv_co_flush_to_disk = null_co_flush,
+ .bdrv_reopen_prepare = null_reopen_prepare,
+
+ .bdrv_co_get_block_status = null_co_get_block_status,
+
+ .bdrv_refresh_filename = null_refresh_filename,
};
static BlockDriver bdrv_null_aio = {
@@ -157,6 +270,11 @@
.bdrv_aio_readv = null_aio_readv,
.bdrv_aio_writev = null_aio_writev,
.bdrv_aio_flush = null_aio_flush,
+ .bdrv_reopen_prepare = null_reopen_prepare,
+
+ .bdrv_co_get_block_status = null_co_get_block_status,
+
+ .bdrv_refresh_filename = null_refresh_filename,
};
static void bdrv_null_init(void)
diff --git a/block/parallels.c b/block/parallels.c
index 4f9cd8d..2ccefa7 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -2,8 +2,12 @@
* Block driver for Parallels disk image format
*
* Copyright (c) 2007 Alex Beregszaszi
+ * Copyright (c) 2015 Denis V. Lunev <den@openvz.org>
*
- * This code is based on comparing different disk images created by Parallels.
+ * This code was originally based on comparing different disk images created
+ * by Parallels. Currently it is based on opened OpenVZ sources
+ * available at
+ * http://git.openvz.org/?p=ploop;a=summary
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -23,66 +27,559 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
+#include "qemu/bitmap.h"
+#include "qapi/util.h"
/**************************************************************/
#define HEADER_MAGIC "WithoutFreeSpace"
#define HEADER_MAGIC2 "WithouFreSpacExt"
#define HEADER_VERSION 2
-#define HEADER_SIZE 64
+#define HEADER_INUSE_MAGIC (0x746F6E59)
+#define MAX_PARALLELS_IMAGE_FACTOR (1ull << 32)
+
+#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */
+
// always little-endian
-struct parallels_header {
+typedef struct ParallelsHeader {
char magic[16]; // "WithoutFreeSpace"
uint32_t version;
uint32_t heads;
uint32_t cylinders;
uint32_t tracks;
- uint32_t catalog_entries;
+ uint32_t bat_entries;
uint64_t nb_sectors;
uint32_t inuse;
uint32_t data_off;
char padding[12];
-} QEMU_PACKED;
+} QEMU_PACKED ParallelsHeader;
+
+
+typedef enum ParallelsPreallocMode {
+ PRL_PREALLOC_MODE_FALLOCATE = 0,
+ PRL_PREALLOC_MODE_TRUNCATE = 1,
+ PRL_PREALLOC_MODE__MAX = 2,
+} ParallelsPreallocMode;
+
+static const char *prealloc_mode_lookup[] = {
+ "falloc",
+ "truncate",
+ NULL,
+};
+
typedef struct BDRVParallelsState {
+ /** Locking is conservative, the lock protects
+ * - image file extending (truncate, fallocate)
+ * - any access to block allocation table
+ */
CoMutex lock;
- uint32_t *catalog_bitmap;
- unsigned int catalog_size;
+ ParallelsHeader *header;
+ uint32_t header_size;
+ bool header_unclean;
+
+ unsigned long *bat_dirty_bmap;
+ unsigned int bat_dirty_block;
+
+ uint32_t *bat_bitmap;
+ unsigned int bat_size;
+
+ int64_t data_end;
+ uint64_t prealloc_size;
+ ParallelsPreallocMode prealloc_mode;
unsigned int tracks;
unsigned int off_multiplier;
} BDRVParallelsState;
-static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename)
+
+#define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode"
+#define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size"
+
+static QemuOptsList parallels_runtime_opts = {
+ .name = "parallels",
+ .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head),
+ .desc = {
+ {
+ .name = PARALLELS_OPT_PREALLOC_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Preallocation size on image expansion",
+ .def_value_str = "128MiB",
+ },
+ {
+ .name = PARALLELS_OPT_PREALLOC_MODE,
+ .type = QEMU_OPT_STRING,
+ .help = "Preallocation mode on image expansion "
+ "(allowed values: falloc, truncate)",
+ .def_value_str = "falloc",
+ },
+ { /* end of list */ },
+ },
+};
+
+
+static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx)
{
- const struct parallels_header *ph = (const void *)buf;
+ return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier;
+}
- if (buf_size < HEADER_SIZE)
+static uint32_t bat_entry_off(uint32_t idx)
+{
+ return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx;
+}
+
+static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num)
+{
+ uint32_t index, offset;
+
+ index = sector_num / s->tracks;
+ offset = sector_num % s->tracks;
+
+ /* not allocated */
+ if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) {
+ return -1;
+ }
+ return bat2sect(s, index) + offset;
+}
+
+static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num,
+ int nb_sectors)
+{
+ int ret = s->tracks - sector_num % s->tracks;
+ return MIN(nb_sectors, ret);
+}
+
+static int64_t block_status(BDRVParallelsState *s, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ int64_t start_off = -2, prev_end_off = -2;
+
+ *pnum = 0;
+ while (nb_sectors > 0 || start_off == -2) {
+ int64_t offset = seek_to_sector(s, sector_num);
+ int to_end;
+
+ if (start_off == -2) {
+ start_off = offset;
+ prev_end_off = offset;
+ } else if (offset != prev_end_off) {
+ break;
+ }
+
+ to_end = cluster_remainder(s, sector_num, nb_sectors);
+ nb_sectors -= to_end;
+ sector_num += to_end;
+ *pnum += to_end;
+
+ if (offset > 0) {
+ prev_end_off += to_end;
+ }
+ }
+ return start_off;
+}
+
+static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num,
+ int nb_sectors, int *pnum)
+{
+ BDRVParallelsState *s = bs->opaque;
+ uint32_t idx, to_allocate, i;
+ int64_t pos, space;
+
+ pos = block_status(s, sector_num, nb_sectors, pnum);
+ if (pos > 0) {
+ return pos;
+ }
+
+ idx = sector_num / s->tracks;
+ if (idx >= s->bat_size) {
+ return -EINVAL;
+ }
+
+ to_allocate = DIV_ROUND_UP(sector_num + *pnum, s->tracks) - idx;
+ space = to_allocate * s->tracks;
+ if (s->data_end + space > bdrv_getlength(bs->file->bs) >> BDRV_SECTOR_BITS) {
+ int ret;
+ space += s->prealloc_size;
+ if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) {
+ ret = bdrv_pwrite_zeroes(bs->file,
+ s->data_end << BDRV_SECTOR_BITS,
+ space << BDRV_SECTOR_BITS, 0);
+ } else {
+ ret = bdrv_truncate(bs->file->bs,
+ (s->data_end + space) << BDRV_SECTOR_BITS);
+ }
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
+ for (i = 0; i < to_allocate; i++) {
+ s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier);
+ s->data_end += s->tracks;
+ bitmap_set(s->bat_dirty_bmap,
+ bat_entry_off(idx + i) / s->bat_dirty_block, 1);
+ }
+
+ return bat2sect(s, idx) + sector_num % s->tracks;
+}
+
+
+static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs)
+{
+ BDRVParallelsState *s = bs->opaque;
+ unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block);
+ unsigned long bit;
+
+ qemu_co_mutex_lock(&s->lock);
+
+ bit = find_first_bit(s->bat_dirty_bmap, size);
+ while (bit < size) {
+ uint32_t off = bit * s->bat_dirty_block;
+ uint32_t to_write = s->bat_dirty_block;
+ int ret;
+
+ if (off + to_write > s->header_size) {
+ to_write = s->header_size - off;
+ }
+ ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off,
+ to_write);
+ if (ret < 0) {
+ qemu_co_mutex_unlock(&s->lock);
+ return ret;
+ }
+ bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1);
+ }
+ bitmap_zero(s->bat_dirty_bmap, size);
+
+ qemu_co_mutex_unlock(&s->lock);
+ return 0;
+}
+
+
+static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
+{
+ BDRVParallelsState *s = bs->opaque;
+ int64_t offset;
+
+ qemu_co_mutex_lock(&s->lock);
+ offset = block_status(s, sector_num, nb_sectors, pnum);
+ qemu_co_mutex_unlock(&s->lock);
+
+ if (offset < 0) {
return 0;
+ }
- if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
- !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
- (le32_to_cpu(ph->version) == HEADER_VERSION))
- return 100;
+ *file = bs->file->bs;
+ return (offset << BDRV_SECTOR_BITS) |
+ BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
+}
+
+static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVParallelsState *s = bs->opaque;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ while (nb_sectors > 0) {
+ int64_t position;
+ int n, nbytes;
+
+ qemu_co_mutex_lock(&s->lock);
+ position = allocate_clusters(bs, sector_num, nb_sectors, &n);
+ qemu_co_mutex_unlock(&s->lock);
+ if (position < 0) {
+ ret = (int)position;
+ break;
+ }
+
+ nbytes = n << BDRV_SECTOR_BITS;
+
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
+
+ ret = bdrv_co_writev(bs->file, position, n, &hd_qiov);
+ if (ret < 0) {
+ break;
+ }
+
+ nb_sectors -= n;
+ sector_num += n;
+ bytes_done += nbytes;
+ }
+
+ qemu_iovec_destroy(&hd_qiov);
+ return ret;
+}
+
+static coroutine_fn int parallels_co_readv(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+ BDRVParallelsState *s = bs->opaque;
+ uint64_t bytes_done = 0;
+ QEMUIOVector hd_qiov;
+ int ret = 0;
+
+ qemu_iovec_init(&hd_qiov, qiov->niov);
+
+ while (nb_sectors > 0) {
+ int64_t position;
+ int n, nbytes;
+
+ qemu_co_mutex_lock(&s->lock);
+ position = block_status(s, sector_num, nb_sectors, &n);
+ qemu_co_mutex_unlock(&s->lock);
+
+ nbytes = n << BDRV_SECTOR_BITS;
+
+ if (position < 0) {
+ qemu_iovec_memset(qiov, bytes_done, 0, nbytes);
+ } else {
+ qemu_iovec_reset(&hd_qiov);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes);
+
+ ret = bdrv_co_readv(bs->file, position, n, &hd_qiov);
+ if (ret < 0) {
+ break;
+ }
+ }
+
+ nb_sectors -= n;
+ sector_num += n;
+ bytes_done += nbytes;
+ }
+
+ qemu_iovec_destroy(&hd_qiov);
+ return ret;
+}
+
+
+static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix)
+{
+ BDRVParallelsState *s = bs->opaque;
+ int64_t size, prev_off, high_off;
+ int ret;
+ uint32_t i;
+ bool flush_bat = false;
+ int cluster_size = s->tracks << BDRV_SECTOR_BITS;
+
+ size = bdrv_getlength(bs->file->bs);
+ if (size < 0) {
+ res->check_errors++;
+ return size;
+ }
+
+ if (s->header_unclean) {
+ fprintf(stderr, "%s image was not closed correctly\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR");
+ res->corruptions++;
+ if (fix & BDRV_FIX_ERRORS) {
+ /* parallels_close will do the job right */
+ res->corruptions_fixed++;
+ s->header_unclean = false;
+ }
+ }
+
+ res->bfi.total_clusters = s->bat_size;
+ res->bfi.compressed_clusters = 0; /* compression is not supported */
+
+ high_off = 0;
+ prev_off = 0;
+ for (i = 0; i < s->bat_size; i++) {
+ int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS;
+ if (off == 0) {
+ prev_off = 0;
+ continue;
+ }
+
+ /* cluster outside the image */
+ if (off > size) {
+ fprintf(stderr, "%s cluster %u is outside image\n",
+ fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
+ res->corruptions++;
+ if (fix & BDRV_FIX_ERRORS) {
+ prev_off = 0;
+ s->bat_bitmap[i] = 0;
+ res->corruptions_fixed++;
+ flush_bat = true;
+ continue;
+ }
+ }
+
+ res->bfi.allocated_clusters++;
+ if (off > high_off) {
+ high_off = off;
+ }
+
+ if (prev_off != 0 && (prev_off + cluster_size) != off) {
+ res->bfi.fragmented_clusters++;
+ }
+ prev_off = off;
+ }
+
+ if (flush_bat) {
+ ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size);
+ if (ret < 0) {
+ res->check_errors++;
+ return ret;
+ }
+ }
+
+ res->image_end_offset = high_off + cluster_size;
+ if (size > res->image_end_offset) {
+ int64_t count;
+ count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size);
+ fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n",
+ fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR",
+ size - res->image_end_offset);
+ res->leaks += count;
+ if (fix & BDRV_FIX_LEAKS) {
+ ret = bdrv_truncate(bs->file->bs, res->image_end_offset);
+ if (ret < 0) {
+ res->check_errors++;
+ return ret;
+ }
+ res->leaks_fixed += count;
+ }
+ }
return 0;
}
+
+static int parallels_create(const char *filename, QemuOpts *opts, Error **errp)
+{
+ int64_t total_size, cl_size;
+ uint8_t tmp[BDRV_SECTOR_SIZE];
+ Error *local_err = NULL;
+ BlockBackend *file;
+ uint32_t bat_entries, bat_sectors;
+ ParallelsHeader header;
+ int ret;
+
+ total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
+ BDRV_SECTOR_SIZE);
+ cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE,
+ DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE);
+ if (total_size >= MAX_PARALLELS_IMAGE_FACTOR * cl_size) {
+ error_propagate(errp, local_err);
+ return -E2BIG;
+ }
+
+ ret = bdrv_create_file(filename, opts, &local_err);
+ if (ret < 0) {
+ error_propagate(errp, local_err);
+ return ret;
+ }
+
+ file = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (file == NULL) {
+ error_propagate(errp, local_err);
+ return -EIO;
+ }
+
+ blk_set_allow_write_beyond_eof(file, true);
+
+ ret = blk_truncate(file, 0);
+ if (ret < 0) {
+ goto exit;
+ }
+
+ bat_entries = DIV_ROUND_UP(total_size, cl_size);
+ bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size);
+ bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS;
+
+ memset(&header, 0, sizeof(header));
+ memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic));
+ header.version = cpu_to_le32(HEADER_VERSION);
+ /* don't care much about geometry, it is not used on image level */
+ header.heads = cpu_to_le32(16);
+ header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32);
+ header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS);
+ header.bat_entries = cpu_to_le32(bat_entries);
+ header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE));
+ header.data_off = cpu_to_le32(bat_sectors);
+
+ /* write all the data */
+ memset(tmp, 0, sizeof(tmp));
+ memcpy(tmp, &header, sizeof(header));
+
+ ret = blk_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE, 0);
+ if (ret < 0) {
+ goto exit;
+ }
+ ret = blk_pwrite_zeroes(file, BDRV_SECTOR_SIZE,
+ (bat_sectors - 1) << BDRV_SECTOR_BITS, 0);
+ if (ret < 0) {
+ goto exit;
+ }
+ ret = 0;
+
+done:
+ blk_unref(file);
+ return ret;
+
+exit:
+ error_setg_errno(errp, -ret, "Failed to create Parallels image");
+ goto done;
+}
+
+
+static int parallels_probe(const uint8_t *buf, int buf_size,
+ const char *filename)
+{
+ const ParallelsHeader *ph = (const void *)buf;
+
+ if (buf_size < sizeof(ParallelsHeader)) {
+ return 0;
+ }
+
+ if ((!memcmp(ph->magic, HEADER_MAGIC, 16) ||
+ !memcmp(ph->magic, HEADER_MAGIC2, 16)) &&
+ (le32_to_cpu(ph->version) == HEADER_VERSION)) {
+ return 100;
+ }
+
+ return 0;
+}
+
+static int parallels_update_header(BlockDriverState *bs)
+{
+ BDRVParallelsState *s = bs->opaque;
+ unsigned size = MAX(bdrv_opt_mem_align(bs->file->bs),
+ sizeof(ParallelsHeader));
+
+ if (size > s->header_size) {
+ size = s->header_size;
+ }
+ return bdrv_pwrite_sync(bs->file, 0, s->header, size);
+}
+
static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVParallelsState *s = bs->opaque;
- int i;
- struct parallels_header ph;
- int ret;
-
- bs->read_only = 1; // no write support yet
+ ParallelsHeader ph;
+ int ret, size, i;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ char *buf;
ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph));
if (ret < 0) {
@@ -115,25 +612,90 @@
goto fail;
}
- s->catalog_size = le32_to_cpu(ph.catalog_entries);
- if (s->catalog_size > INT_MAX / 4) {
+ s->bat_size = le32_to_cpu(ph.bat_entries);
+ if (s->bat_size > INT_MAX / sizeof(uint32_t)) {
error_setg(errp, "Catalog too large");
ret = -EFBIG;
goto fail;
}
- s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size);
- if (s->catalog_size && s->catalog_bitmap == NULL) {
+
+ size = bat_entry_off(s->bat_size);
+ s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs));
+ s->header = qemu_try_blockalign(bs->file->bs, s->header_size);
+ if (s->header == NULL) {
ret = -ENOMEM;
goto fail;
}
+ s->data_end = le32_to_cpu(ph.data_off);
+ if (s->data_end == 0) {
+ s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE);
+ }
+ if (s->data_end < s->header_size) {
+ /* there is not enough unused space to fit to block align between BAT
+ and actual data. We can't avoid read-modify-write... */
+ s->header_size = size;
+ }
- ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4);
+ ret = bdrv_pread(bs->file, 0, s->header, s->header_size);
if (ret < 0) {
goto fail;
}
+ s->bat_bitmap = (uint32_t *)(s->header + 1);
- for (i = 0; i < s->catalog_size; i++)
- le32_to_cpus(&s->catalog_bitmap[i]);
+ for (i = 0; i < s->bat_size; i++) {
+ int64_t off = bat2sect(s, i);
+ if (off >= s->data_end) {
+ s->data_end = off + s->tracks;
+ }
+ }
+
+ if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) {
+ /* Image was not closed correctly. The check is mandatory */
+ s->header_unclean = true;
+ if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
+ error_setg(errp, "parallels: Image was not closed correctly; "
+ "cannot be opened read/write");
+ ret = -EACCES;
+ goto fail;
+ }
+ }
+
+ opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, &local_err);
+ if (local_err != NULL) {
+ goto fail_options;
+ }
+
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err != NULL) {
+ goto fail_options;
+ }
+
+ s->prealloc_size =
+ qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0);
+ s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS);
+ buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE);
+ s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf,
+ PRL_PREALLOC_MODE__MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err);
+ g_free(buf);
+ if (local_err != NULL) {
+ goto fail_options;
+ }
+ if (!bdrv_has_zero_init(bs->file->bs) ||
+ bdrv_truncate(bs->file->bs, bdrv_getlength(bs->file->bs)) != 0) {
+ s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
+ }
+
+ if (flags & BDRV_O_RDWR) {
+ s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
+ ret = parallels_update_header(bs);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+ s->bat_dirty_block = 4 * getpagesize();
+ s->bat_dirty_bmap =
+ bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
qemu_co_mutex_init(&s->lock);
return 0;
@@ -142,67 +704,67 @@
error_setg(errp, "Image not in Parallels format");
ret = -EINVAL;
fail:
- g_free(s->catalog_bitmap);
+ qemu_vfree(s->header);
return ret;
+
+fail_options:
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
}
-static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
-{
- BDRVParallelsState *s = bs->opaque;
- uint32_t index, offset;
-
- index = sector_num / s->tracks;
- offset = sector_num % s->tracks;
-
- /* not allocated */
- if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0))
- return -1;
- return
- ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512;
-}
-
-static int parallels_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- while (nb_sectors > 0) {
- int64_t position = seek_to_sector(bs, sector_num);
- if (position >= 0) {
- if (bdrv_pread(bs->file, position, buf, 512) != 512)
- return -1;
- } else {
- memset(buf, 0, 512);
- }
- nb_sectors--;
- sector_num++;
- buf += 512;
- }
- return 0;
-}
-
-static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVParallelsState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = parallels_read(bs, sector_num, buf, nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
-}
static void parallels_close(BlockDriverState *bs)
{
BDRVParallelsState *s = bs->opaque;
- g_free(s->catalog_bitmap);
+
+ if (bs->open_flags & BDRV_O_RDWR) {
+ s->header->inuse = 0;
+ parallels_update_header(bs);
+ }
+
+ if (bs->open_flags & BDRV_O_RDWR) {
+ bdrv_truncate(bs->file->bs, s->data_end << BDRV_SECTOR_BITS);
+ }
+
+ g_free(s->bat_dirty_bmap);
+ qemu_vfree(s->header);
}
+static QemuOptsList parallels_create_opts = {
+ .name = "parallels-create-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head),
+ .desc = {
+ {
+ .name = BLOCK_OPT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Virtual disk size",
+ },
+ {
+ .name = BLOCK_OPT_CLUSTER_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Parallels image cluster size",
+ .def_value_str = stringify(DEFAULT_CLUSTER_SIZE),
+ },
+ { /* end of list */ }
+ }
+};
+
static BlockDriver bdrv_parallels = {
.format_name = "parallels",
.instance_size = sizeof(BDRVParallelsState),
.bdrv_probe = parallels_probe,
.bdrv_open = parallels_open,
- .bdrv_read = parallels_co_read,
.bdrv_close = parallels_close,
+ .bdrv_co_get_block_status = parallels_co_get_block_status,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
+ .bdrv_co_flush_to_os = parallels_co_flush_to_os,
+ .bdrv_co_readv = parallels_co_readv,
+ .bdrv_co_writev = parallels_co_writev,
+
+ .bdrv_create = parallels_create,
+ .bdrv_check = parallels_check,
+ .create_opts = ¶llels_create_opts,
};
static void bdrv_parallels_init(void)
diff --git a/block/qapi.c b/block/qapi.c
index a87a34a..6f947e3 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -22,16 +22,23 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/qapi.h"
#include "block/block_int.h"
+#include "block/throttle-groups.h"
+#include "block/write-threshold.h"
#include "qmp-commands.h"
#include "qapi-visit.h"
#include "qapi/qmp-output-visitor.h"
#include "qapi/qmp/types.h"
#include "sysemu/block-backend.h"
+#include "qemu/cutils.h"
-BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs)
+BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
+ BlockDriverState *bs, Error **errp)
{
+ ImageInfo **p_image_info;
+ BlockDriverState *bs0;
BlockDeviceInfo *info = g_malloc0(sizeof(*info));
info->file = g_strdup(bs->filename);
@@ -40,6 +47,13 @@
info->encrypted = bs->encrypted;
info->encryption_key_missing = bdrv_key_required(bs);
+ info->cache = g_new(BlockdevCacheInfo, 1);
+ *info->cache = (BlockdevCacheInfo) {
+ .writeback = blk ? blk_enable_write_cache(blk) : true,
+ .direct = !!(bs->open_flags & BDRV_O_NOCACHE),
+ .no_flush = !!(bs->open_flags & BDRV_O_NO_FLUSH),
+ };
+
if (bs->node_name[0]) {
info->has_node_name = true;
info->node_name = g_strdup(bs->node_name);
@@ -53,9 +67,11 @@
info->backing_file_depth = bdrv_get_backing_file_depth(bs);
info->detect_zeroes = bs->detect_zeroes;
- if (bs->io_limits_enabled) {
+ if (blk && blk_get_public(blk)->throttle_state) {
ThrottleConfig cfg;
- throttle_get_config(&bs->throttle_state, &cfg);
+
+ throttle_group_get_config(blk, &cfg);
+
info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg;
info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg;
@@ -78,8 +94,52 @@
info->has_iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
info->iops_wr_max = cfg.buckets[THROTTLE_OPS_WRITE].max;
+ info->has_bps_max_length = info->has_bps_max;
+ info->bps_max_length =
+ cfg.buckets[THROTTLE_BPS_TOTAL].burst_length;
+ info->has_bps_rd_max_length = info->has_bps_rd_max;
+ info->bps_rd_max_length =
+ cfg.buckets[THROTTLE_BPS_READ].burst_length;
+ info->has_bps_wr_max_length = info->has_bps_wr_max;
+ info->bps_wr_max_length =
+ cfg.buckets[THROTTLE_BPS_WRITE].burst_length;
+
+ info->has_iops_max_length = info->has_iops_max;
+ info->iops_max_length =
+ cfg.buckets[THROTTLE_OPS_TOTAL].burst_length;
+ info->has_iops_rd_max_length = info->has_iops_rd_max;
+ info->iops_rd_max_length =
+ cfg.buckets[THROTTLE_OPS_READ].burst_length;
+ info->has_iops_wr_max_length = info->has_iops_wr_max;
+ info->iops_wr_max_length =
+ cfg.buckets[THROTTLE_OPS_WRITE].burst_length;
+
info->has_iops_size = cfg.op_size;
info->iops_size = cfg.op_size;
+
+ info->has_group = true;
+ info->group = g_strdup(throttle_group_get_name(blk));
+ }
+
+ info->write_threshold = bdrv_write_threshold_get(bs);
+
+ bs0 = bs;
+ p_image_info = &info->image;
+ while (1) {
+ Error *local_err = NULL;
+ bdrv_query_image_info(bs0, p_image_info, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qapi_free_BlockDeviceInfo(info);
+ return NULL;
+ }
+ if (bs0->drv && bs0->backing) {
+ bs0 = bs0->backing->bs;
+ (*p_image_info)->has_backing_image = true;
+ p_image_info = &((*p_image_info)->backing_image);
+ } else {
+ break;
+ }
}
return info;
@@ -168,17 +228,18 @@
{
int64_t size;
const char *backing_filename;
- char backing_filename2[1024];
BlockDriverInfo bdi;
int ret;
Error *err = NULL;
ImageInfo *info;
+ aio_context_acquire(bdrv_get_aio_context(bs));
+
size = bdrv_getlength(bs);
if (size < 0) {
error_setg_errno(errp, -size, "Can't get size of device '%s'",
bdrv_get_device_name(bs));
- return;
+ goto out;
}
info = g_new0(ImageInfo, 1);
@@ -204,14 +265,23 @@
backing_filename = bs->backing_file;
if (backing_filename[0] != '\0') {
+ char *backing_filename2 = g_malloc0(PATH_MAX);
info->backing_filename = g_strdup(backing_filename);
info->has_backing_filename = true;
- bdrv_get_full_backing_filename(bs, backing_filename2,
- sizeof(backing_filename2));
+ bdrv_get_full_backing_filename(bs, backing_filename2, PATH_MAX, &err);
+ if (err) {
+ /* Can't reconstruct the full backing filename, so we must omit
+ * this field and apply a Best Effort to this query. */
+ g_free(backing_filename2);
+ backing_filename2 = NULL;
+ error_free(err);
+ err = NULL;
+ }
- if (strcmp(backing_filename, backing_filename2) != 0) {
- info->full_backing_filename =
- g_strdup(backing_filename2);
+ /* Always report the full_backing_filename if present, even if it's the
+ * same as backing_filename. That they are same is useful info. */
+ if (backing_filename2) {
+ info->full_backing_filename = g_strdup(backing_filename2);
info->has_full_backing_filename = true;
}
@@ -219,6 +289,7 @@
info->backing_filename_format = g_strdup(bs->backing_format);
info->has_backing_filename_format = true;
}
+ g_free(backing_filename2);
}
ret = bdrv_query_snapshot_info_list(bs, &info->snapshots, &err);
@@ -236,10 +307,13 @@
default:
error_propagate(errp, err);
qapi_free_ImageInfo(info);
- return;
+ goto out;
}
*p_info = info;
+
+out:
+ aio_context_release(bdrv_get_aio_context(bs));
}
/* @p_info will be set only on success. */
@@ -248,48 +322,31 @@
{
BlockInfo *info = g_malloc0(sizeof(*info));
BlockDriverState *bs = blk_bs(blk);
- BlockDriverState *bs0;
- ImageInfo **p_image_info;
- Error *local_err = NULL;
info->device = g_strdup(blk_name(blk));
info->type = g_strdup("unknown");
info->locked = blk_dev_is_medium_locked(blk);
info->removable = blk_dev_has_removable_media(blk);
- if (blk_dev_has_removable_media(blk)) {
+ if (blk_dev_has_tray(blk)) {
info->has_tray_open = true;
info->tray_open = blk_dev_is_tray_open(blk);
}
- if (bdrv_iostatus_is_enabled(bs)) {
+ if (blk_iostatus_is_enabled(blk)) {
info->has_io_status = true;
- info->io_status = bs->iostatus;
+ info->io_status = blk_iostatus(blk);
}
- if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
+ if (bs && !QLIST_EMPTY(&bs->dirty_bitmaps)) {
info->has_dirty_bitmaps = true;
info->dirty_bitmaps = bdrv_query_dirty_bitmaps(bs);
}
- if (bs->drv) {
+ if (bs && bs->drv) {
info->has_inserted = true;
- info->inserted = bdrv_block_device_info(bs);
-
- bs0 = bs;
- p_image_info = &info->inserted->image;
- while (1) {
- bdrv_query_image_info(bs0, p_image_info, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- goto err;
- }
- if (bs0->drv && bs0->backing_hd) {
- bs0 = bs0->backing_hd;
- (*p_image_info)->has_backing_image = true;
- p_image_info = &((*p_image_info)->backing_image);
- } else {
- break;
- }
+ info->inserted = bdrv_block_device_info(blk, bs, errp);
+ if (info->inserted == NULL) {
+ goto err;
}
}
@@ -300,37 +357,115 @@
qapi_free_BlockInfo(info);
}
-static BlockStats *bdrv_query_stats(const BlockDriverState *bs)
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing);
+
+static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
+{
+ BlockAcctStats *stats = blk_get_stats(blk);
+ BlockAcctTimedStats *ts = NULL;
+
+ ds->rd_bytes = stats->nr_bytes[BLOCK_ACCT_READ];
+ ds->wr_bytes = stats->nr_bytes[BLOCK_ACCT_WRITE];
+ ds->rd_operations = stats->nr_ops[BLOCK_ACCT_READ];
+ ds->wr_operations = stats->nr_ops[BLOCK_ACCT_WRITE];
+
+ ds->failed_rd_operations = stats->failed_ops[BLOCK_ACCT_READ];
+ ds->failed_wr_operations = stats->failed_ops[BLOCK_ACCT_WRITE];
+ ds->failed_flush_operations = stats->failed_ops[BLOCK_ACCT_FLUSH];
+
+ ds->invalid_rd_operations = stats->invalid_ops[BLOCK_ACCT_READ];
+ ds->invalid_wr_operations = stats->invalid_ops[BLOCK_ACCT_WRITE];
+ ds->invalid_flush_operations =
+ stats->invalid_ops[BLOCK_ACCT_FLUSH];
+
+ ds->rd_merged = stats->merged[BLOCK_ACCT_READ];
+ ds->wr_merged = stats->merged[BLOCK_ACCT_WRITE];
+ ds->flush_operations = stats->nr_ops[BLOCK_ACCT_FLUSH];
+ ds->wr_total_time_ns = stats->total_time_ns[BLOCK_ACCT_WRITE];
+ ds->rd_total_time_ns = stats->total_time_ns[BLOCK_ACCT_READ];
+ ds->flush_total_time_ns = stats->total_time_ns[BLOCK_ACCT_FLUSH];
+
+ ds->has_idle_time_ns = stats->last_access_time_ns > 0;
+ if (ds->has_idle_time_ns) {
+ ds->idle_time_ns = block_acct_idle_time_ns(stats);
+ }
+
+ ds->account_invalid = stats->account_invalid;
+ ds->account_failed = stats->account_failed;
+
+ while ((ts = block_acct_interval_next(stats, ts))) {
+ BlockDeviceTimedStatsList *timed_stats =
+ g_malloc0(sizeof(*timed_stats));
+ BlockDeviceTimedStats *dev_stats = g_malloc0(sizeof(*dev_stats));
+ timed_stats->next = ds->timed_stats;
+ timed_stats->value = dev_stats;
+ ds->timed_stats = timed_stats;
+
+ TimedAverage *rd = &ts->latency[BLOCK_ACCT_READ];
+ TimedAverage *wr = &ts->latency[BLOCK_ACCT_WRITE];
+ TimedAverage *fl = &ts->latency[BLOCK_ACCT_FLUSH];
+
+ dev_stats->interval_length = ts->interval_length;
+
+ dev_stats->min_rd_latency_ns = timed_average_min(rd);
+ dev_stats->max_rd_latency_ns = timed_average_max(rd);
+ dev_stats->avg_rd_latency_ns = timed_average_avg(rd);
+
+ dev_stats->min_wr_latency_ns = timed_average_min(wr);
+ dev_stats->max_wr_latency_ns = timed_average_max(wr);
+ dev_stats->avg_wr_latency_ns = timed_average_avg(wr);
+
+ dev_stats->min_flush_latency_ns = timed_average_min(fl);
+ dev_stats->max_flush_latency_ns = timed_average_max(fl);
+ dev_stats->avg_flush_latency_ns = timed_average_avg(fl);
+
+ dev_stats->avg_rd_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_READ);
+ dev_stats->avg_wr_queue_depth =
+ block_acct_queue_depth(ts, BLOCK_ACCT_WRITE);
+ }
+}
+
+static void bdrv_query_bds_stats(BlockStats *s, const BlockDriverState *bs,
+ bool query_backing)
+{
+ if (bdrv_get_node_name(bs)[0]) {
+ s->has_node_name = true;
+ s->node_name = g_strdup(bdrv_get_node_name(bs));
+ }
+
+ s->stats->wr_highest_offset = bs->wr_highest_offset;
+
+ if (bs->file) {
+ s->has_parent = true;
+ s->parent = bdrv_query_stats(NULL, bs->file->bs, query_backing);
+ }
+
+ if (query_backing && bs->backing) {
+ s->has_backing = true;
+ s->backing = bdrv_query_stats(NULL, bs->backing->bs, query_backing);
+ }
+
+}
+
+static BlockStats *bdrv_query_stats(BlockBackend *blk,
+ const BlockDriverState *bs,
+ bool query_backing)
{
BlockStats *s;
s = g_malloc0(sizeof(*s));
-
- if (bdrv_get_device_name(bs)[0]) {
- s->has_device = true;
- s->device = g_strdup(bdrv_get_device_name(bs));
- }
-
s->stats = g_malloc0(sizeof(*s->stats));
- s->stats->rd_bytes = bs->stats.nr_bytes[BLOCK_ACCT_READ];
- s->stats->wr_bytes = bs->stats.nr_bytes[BLOCK_ACCT_WRITE];
- s->stats->rd_operations = bs->stats.nr_ops[BLOCK_ACCT_READ];
- s->stats->wr_operations = bs->stats.nr_ops[BLOCK_ACCT_WRITE];
- s->stats->wr_highest_offset =
- bs->stats.wr_highest_sector * BDRV_SECTOR_SIZE;
- s->stats->flush_operations = bs->stats.nr_ops[BLOCK_ACCT_FLUSH];
- s->stats->wr_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_WRITE];
- s->stats->rd_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_READ];
- s->stats->flush_total_time_ns = bs->stats.total_time_ns[BLOCK_ACCT_FLUSH];
- if (bs->file) {
- s->has_parent = true;
- s->parent = bdrv_query_stats(bs->file);
+ if (blk) {
+ s->has_device = true;
+ s->device = g_strdup(blk_name(blk));
+ bdrv_query_blk_stats(s->stats, blk);
}
-
- if (bs->backing_hd) {
- s->has_backing = true;
- s->backing = bdrv_query_stats(bs->backing_hd);
+ if (bs) {
+ bdrv_query_bds_stats(s, bs, query_backing);
}
return s;
@@ -347,7 +482,9 @@
bdrv_query_info(blk, &info->value, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- goto err;
+ g_free(info);
+ qapi_free_BlockInfoList(head);
+ return NULL;
}
*p_next = info;
@@ -355,23 +492,40 @@
}
return head;
-
- err:
- qapi_free_BlockInfoList(head);
- return NULL;
}
-BlockStatsList *qmp_query_blockstats(Error **errp)
+static bool next_query_bds(BlockBackend **blk, BlockDriverState **bs,
+ bool query_nodes)
+{
+ if (query_nodes) {
+ *bs = bdrv_next_node(*bs);
+ return !!*bs;
+ }
+
+ *blk = blk_next(*blk);
+ *bs = *blk ? blk_bs(*blk) : NULL;
+
+ return !!*blk;
+}
+
+BlockStatsList *qmp_query_blockstats(bool has_query_nodes,
+ bool query_nodes,
+ Error **errp)
{
BlockStatsList *head = NULL, **p_next = &head;
+ BlockBackend *blk = NULL;
BlockDriverState *bs = NULL;
- while ((bs = bdrv_next(bs))) {
+ /* Just to be safe if query_nodes is not always initialized */
+ query_nodes = has_query_nodes && query_nodes;
+
+ while (next_query_bds(&blk, &bs, query_nodes)) {
BlockStatsList *info = g_malloc0(sizeof(*info));
- AioContext *ctx = bdrv_get_aio_context(bs);
+ AioContext *ctx = blk ? blk_get_aio_context(blk)
+ : bdrv_get_aio_context(bs);
aio_context_acquire(ctx);
- info->value = bdrv_query_stats(bs);
+ info->value = bdrv_query_stats(blk, bs, !query_nodes);
aio_context_release(ctx);
*p_next = info;
@@ -385,7 +539,7 @@
static char *get_human_readable_size(char *buf, int buf_size, int64_t size)
{
- static const char suffixes[NB_SUFFIXES] = "KMGT";
+ static const char suffixes[NB_SUFFIXES] = {'K', 'M', 'G', 'T'};
int64_t base;
int i;
@@ -481,18 +635,9 @@
}
case QTYPE_QBOOL: {
QBool *value = qobject_to_qbool(obj);
- func_fprintf(f, "%s", qbool_get_int(value) ? "true" : "false");
+ func_fprintf(f, "%s", qbool_get_bool(value) ? "true" : "false");
break;
}
- case QTYPE_QERROR: {
- QString *value = qerror_human((QError *)obj);
- func_fprintf(f, "%s", qstring_get_str(value));
- QDECREF(value);
- break;
- }
- case QTYPE_NONE:
- break;
- case QTYPE_MAX:
default:
abort();
}
@@ -505,11 +650,10 @@
int i = 0;
for (entry = qlist_first(list); entry; entry = qlist_next(entry), i++) {
- qtype_code type = qobject_type(entry->value);
+ QType type = qobject_type(entry->value);
bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
- const char *format = composite ? "%*s[%i]:\n" : "%*s[%i]: ";
-
- func_fprintf(f, format, indentation * 4, "", i);
+ func_fprintf(f, "%*s[%i]:%c", indentation * 4, "", i,
+ composite ? '\n' : ' ');
dump_qobject(func_fprintf, f, indentation + 1, entry->value);
if (!composite) {
func_fprintf(f, "\n");
@@ -523,10 +667,9 @@
const QDictEntry *entry;
for (entry = qdict_first(dict); entry; entry = qdict_next(dict, entry)) {
- qtype_code type = qobject_type(entry->value);
+ QType type = qobject_type(entry->value);
bool composite = (type == QTYPE_QDICT || type == QTYPE_QLIST);
- const char *format = composite ? "%*s%s:\n" : "%*s%s: ";
- char key[strlen(entry->key) + 1];
+ char *key = g_malloc(strlen(entry->key) + 1);
int i;
/* replace dashes with spaces in key (variable) names */
@@ -534,28 +677,28 @@
key[i] = entry->key[i] == '-' ? ' ' : entry->key[i];
}
key[i] = 0;
-
- func_fprintf(f, format, indentation * 4, "", key);
+ func_fprintf(f, "%*s%s:%c", indentation * 4, "", key,
+ composite ? '\n' : ' ');
dump_qobject(func_fprintf, f, indentation + 1, entry->value);
if (!composite) {
func_fprintf(f, "\n");
}
+ g_free(key);
}
}
void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f,
ImageInfoSpecific *info_spec)
{
- QmpOutputVisitor *ov = qmp_output_visitor_new();
QObject *obj, *data;
+ Visitor *v = qmp_output_visitor_new(&obj);
- visit_type_ImageInfoSpecific(qmp_output_get_visitor(ov), &info_spec, NULL,
- &error_abort);
- obj = qmp_output_get_qobject(ov);
+ visit_type_ImageInfoSpecific(v, NULL, &info_spec, &error_abort);
+ visit_complete(v, &obj);
assert(qobject_type(obj) == QTYPE_QDICT);
data = qdict_get(qobject_to_qdict(obj), "data");
dump_qobject(func_fprintf, f, 1, data);
- qmp_output_visitor_cleanup(ov);
+ visit_free(v);
}
void bdrv_image_info_dump(fprintf_function func_fprintf, void *f,
@@ -593,7 +736,10 @@
if (info->has_backing_filename) {
func_fprintf(f, "backing file: %s", info->backing_filename);
- if (info->has_full_backing_filename) {
+ if (!info->has_full_backing_filename) {
+ func_fprintf(f, " (cannot determine actual path)");
+ } else if (strcmp(info->backing_filename,
+ info->full_backing_filename) != 0) {
func_fprintf(f, " (actual path: %s)", info->full_backing_filename);
}
func_fprintf(f, "\n");
diff --git a/block/qcow.c b/block/qcow.c
index ece2269..6f9b2e2 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -21,11 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
+#include "qemu/error-report.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include <zlib.h>
-#include "qemu/aes.h"
+#include "qapi/qmp/qerror.h"
+#include "crypto/cipher.h"
#include "migration/migration.h"
/**************************************************************/
@@ -71,10 +77,8 @@
uint8_t *cluster_cache;
uint8_t *cluster_data;
uint64_t cluster_cache_offset;
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ QCryptoCipher *cipher; /* NULL if no key yet */
uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
CoMutex lock;
Error *migration_blocker;
} BDRVQcowState;
@@ -120,11 +124,7 @@
goto fail;
}
if (header.version != QCOW_VERSION) {
- char version[64];
- snprintf(version, sizeof(version), "QCOW version %" PRIu32,
- header.version);
- error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_name(bs), "qcow", version);
+ error_setg(errp, "Unsupported qcow version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -153,9 +153,28 @@
ret = -EINVAL;
goto fail;
}
+ if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+ error_setg(errp, "AES cipher not available");
+ ret = -EINVAL;
+ goto fail;
+ }
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
- bs->encrypted = 1;
+ if (bdrv_uses_whitelist() &&
+ s->crypt_method_header == QCOW_CRYPT_AES) {
+ error_setg(errp,
+ "Use of AES-CBC encrypted qcow images is no longer "
+ "supported in system emulators");
+ error_append_hint(errp,
+ "You can use 'qemu-img convert' to convert your "
+ "image to an alternative supported format, such "
+ "as unencrypted qcow, or raw with the LUKS "
+ "format instead.\n");
+ ret = -ENOSYS;
+ goto fail;
+ }
+
+ bs->encrypted = true;
}
s->cluster_bits = header.cluster_bits;
s->cluster_size = 1 << s->cluster_bits;
@@ -201,7 +220,7 @@
/* alloc L2 cache (max. 64k * 16 * 8 = 8 MB) */
s->l2_cache =
- qemu_try_blockalign(bs->file,
+ qemu_try_blockalign(bs->file->bs,
s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
if (s->l2_cache == NULL) {
error_setg(errp, "Could not allocate L2 table cache");
@@ -215,7 +234,7 @@
/* read the backing file name */
if (header.backing_file_offset != 0) {
len = header.backing_file_size;
- if (len > 1023) {
+ if (len > 1023 || len >= sizeof(bs->backing_file)) {
error_setg(errp, "Backing file name too long");
ret = -EINVAL;
goto fail;
@@ -229,9 +248,9 @@
}
/* Disable migration when qcow images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "qcow", bdrv_get_device_name(bs), "live migration");
+ error_setg(&s->migration_blocker, "The qcow format used by node '%s' "
+ "does not support live migration",
+ bdrv_get_device_or_node_name(bs));
migrate_add_blocker(s->migration_blocker);
qemu_co_mutex_init(&s->lock);
@@ -259,6 +278,7 @@
BDRVQcowState *s = bs->opaque;
uint8_t keybuf[16];
int len, i;
+ Error *err;
memset(keybuf, 0, 16);
len = strlen(key);
@@ -269,38 +289,68 @@
for(i = 0;i < len;i++) {
keybuf[i] = key[i];
}
- s->crypt_method = s->crypt_method_header;
+ assert(bs->encrypted);
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ qcrypto_cipher_free(s->cipher);
+ s->cipher = qcrypto_cipher_new(
+ QCRYPTO_CIPHER_ALG_AES_128,
+ QCRYPTO_CIPHER_MODE_CBC,
+ keybuf, G_N_ELEMENTS(keybuf),
+ &err);
+
+ if (!s->cipher) {
+ /* XXX would be nice if errors in this method could
+ * be properly propagate to the caller. Would need
+ * the bdrv_set_key() API signature to be fixed. */
+ error_free(err);
return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
+ }
return 0;
}
/* The crypt function is compatible with the linux cryptoloop
algorithm for < 4 GB images. NOTE: out_buf == in_buf is
supported */
-static void encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
+static int encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, bool enc, Error **errp)
{
union {
uint64_t ll[2];
uint8_t b[16];
} ivec;
int i;
+ int ret;
for(i = 0; i < nb_sectors; i++) {
ivec.ll[0] = cpu_to_le64(sector_num);
ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
+ if (qcrypto_cipher_setiv(s->cipher,
+ ivec.b, G_N_ELEMENTS(ivec.b),
+ errp) < 0) {
+ return -1;
+ }
+ if (enc) {
+ ret = qcrypto_cipher_encrypt(s->cipher,
+ in_buf,
+ out_buf,
+ 512,
+ errp);
+ } else {
+ ret = qcrypto_cipher_decrypt(s->cipher,
+ in_buf,
+ out_buf,
+ 512,
+ errp);
+ }
+ if (ret < 0) {
+ return -1;
+ }
sector_num++;
in_buf += 512;
out_buf += 512;
}
+ return 0;
}
/* 'allocate' is:
@@ -334,7 +384,7 @@
if (!allocate)
return 0;
/* allocate a new l2 entry */
- l2_offset = bdrv_getlength(bs->file);
+ l2_offset = bdrv_getlength(bs->file->bs);
/* round to cluster size */
l2_offset = (l2_offset + s->cluster_size - 1) & ~(s->cluster_size - 1);
/* update the L1 entry */
@@ -374,7 +424,8 @@
s->l2_size * sizeof(uint64_t)) < 0)
return 0;
} else {
- if (bdrv_pread(bs->file, l2_offset, l2_table, s->l2_size * sizeof(uint64_t)) !=
+ if (bdrv_pread(bs->file, l2_offset, l2_table,
+ s->l2_size * sizeof(uint64_t)) !=
s->l2_size * sizeof(uint64_t))
return 0;
}
@@ -395,34 +446,42 @@
overwritten */
if (decompress_cluster(bs, cluster_offset) < 0)
return 0;
- cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = bdrv_getlength(bs->file->bs);
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
/* write the cluster content */
- if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache, s->cluster_size) !=
+ if (bdrv_pwrite(bs->file, cluster_offset, s->cluster_cache,
+ s->cluster_size) !=
s->cluster_size)
return -1;
} else {
- cluster_offset = bdrv_getlength(bs->file);
+ cluster_offset = bdrv_getlength(bs->file->bs);
if (allocate == 1) {
/* round to cluster size */
cluster_offset = (cluster_offset + s->cluster_size - 1) &
~(s->cluster_size - 1);
- bdrv_truncate(bs->file, cluster_offset + s->cluster_size);
+ bdrv_truncate(bs->file->bs, cluster_offset + s->cluster_size);
/* if encrypted, we must initialize the cluster
content which won't be written */
- if (s->crypt_method &&
+ if (bs->encrypted &&
(n_end - n_start) < s->cluster_sectors) {
uint64_t start_sect;
+ assert(s->cipher);
start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
memset(s->cluster_data + 512, 0x00, 512);
for(i = 0; i < s->cluster_sectors; i++) {
if (i < n_start || i >= n_end) {
- encrypt_sectors(s, start_sect + i,
- s->cluster_data,
- s->cluster_data + 512, 1, 1,
- &s->aes_encrypt_key);
- if (bdrv_pwrite(bs->file, cluster_offset + i * 512,
+ Error *err = NULL;
+ if (encrypt_sectors(s, start_sect + i,
+ s->cluster_data,
+ s->cluster_data + 512, 1,
+ true, &err) < 0) {
+ error_free(err);
+ errno = EIO;
+ return -1;
+ }
+ if (bdrv_pwrite(bs->file,
+ cluster_offset + i * 512,
s->cluster_data, 512) != 512)
return -1;
}
@@ -444,7 +503,7 @@
}
static int64_t coroutine_fn qcow_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVQcowState *s = bs->opaque;
int index_in_cluster, n;
@@ -461,10 +520,11 @@
if (!cluster_offset) {
return 0;
}
- if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->crypt_method) {
+ if ((cluster_offset & QCOW_OFLAG_COMPRESSED) || s->cipher) {
return BDRV_BLOCK_DATA;
}
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
@@ -528,6 +588,7 @@
QEMUIOVector hd_qiov;
uint8_t *buf;
void *orig_buf;
+ Error *err = NULL;
if (qiov->niov > 1) {
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
@@ -552,14 +613,13 @@
}
if (!cluster_offset) {
- if (bs->backing_hd) {
+ if (bs->backing) {
/* read from the base image */
hd_iov.iov_base = (void *)buf;
hd_iov.iov_len = n * 512;
qemu_iovec_init_external(&hd_qiov, &hd_iov, 1);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n, &hd_qiov);
+ ret = bdrv_co_readv(bs->backing, sector_num, n, &hd_qiov);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
@@ -590,10 +650,12 @@
if (ret < 0) {
break;
}
- if (s->crypt_method) {
- encrypt_sectors(s, sector_num, buf, buf,
- n, 0,
- &s->aes_decrypt_key);
+ if (bs->encrypted) {
+ assert(s->cipher);
+ if (encrypt_sectors(s, sector_num, buf, buf,
+ n, false, &err) < 0) {
+ goto fail;
+ }
}
}
ret = 0;
@@ -614,6 +676,7 @@
return ret;
fail:
+ error_free(err);
ret = -EIO;
goto done;
}
@@ -661,12 +724,18 @@
ret = -EIO;
break;
}
- if (s->crypt_method) {
+ if (bs->encrypted) {
+ Error *err = NULL;
+ assert(s->cipher);
if (!cluster_data) {
cluster_data = g_malloc0(s->cluster_size);
}
- encrypt_sectors(s, sector_num, cluster_data, buf,
- n, 1, &s->aes_encrypt_key);
+ if (encrypt_sectors(s, sector_num, cluster_data, buf,
+ n, true, &err) < 0) {
+ error_free(err);
+ ret = -EIO;
+ break;
+ }
src_buf = cluster_data;
} else {
src_buf = buf;
@@ -703,6 +772,8 @@
{
BDRVQcowState *s = bs->opaque;
+ qcrypto_cipher_free(s->cipher);
+ s->cipher = NULL;
g_free(s->l1_table);
qemu_vfree(s->l2_cache);
g_free(s->cluster_cache);
@@ -722,7 +793,7 @@
int flags = 0;
Error *local_err = NULL;
int ret;
- BlockDriverState *qcow_bs;
+ BlockBackend *qcow_blk;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -738,15 +809,17 @@
goto cleanup;
}
- qcow_bs = NULL;
- ret = bdrv_open(&qcow_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
- if (ret < 0) {
+ qcow_blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (qcow_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto cleanup;
}
- ret = bdrv_truncate(qcow_bs, 0);
+ blk_set_allow_write_beyond_eof(qcow_blk, true);
+
+ ret = blk_truncate(qcow_blk, 0);
if (ret < 0) {
goto exit;
}
@@ -786,24 +859,24 @@
}
/* write all the data */
- ret = bdrv_pwrite(qcow_bs, 0, &header, sizeof(header));
+ ret = blk_pwrite(qcow_blk, 0, &header, sizeof(header), 0);
if (ret != sizeof(header)) {
goto exit;
}
if (backing_file) {
- ret = bdrv_pwrite(qcow_bs, sizeof(header),
- backing_file, backing_filename_len);
+ ret = blk_pwrite(qcow_blk, sizeof(header),
+ backing_file, backing_filename_len, 0);
if (ret != backing_filename_len) {
goto exit;
}
}
tmp = g_malloc0(BDRV_SECTOR_SIZE);
- for (i = 0; i < ((sizeof(uint64_t)*l1_size + BDRV_SECTOR_SIZE - 1)/
- BDRV_SECTOR_SIZE); i++) {
- ret = bdrv_pwrite(qcow_bs, header_size +
- BDRV_SECTOR_SIZE*i, tmp, BDRV_SECTOR_SIZE);
+ for (i = 0; i < DIV_ROUND_UP(sizeof(uint64_t) * l1_size, BDRV_SECTOR_SIZE);
+ i++) {
+ ret = blk_pwrite(qcow_blk, header_size + BDRV_SECTOR_SIZE * i,
+ tmp, BDRV_SECTOR_SIZE, 0);
if (ret != BDRV_SECTOR_SIZE) {
g_free(tmp);
goto exit;
@@ -813,7 +886,7 @@
g_free(tmp);
ret = 0;
exit:
- bdrv_unref(qcow_bs);
+ blk_unref(qcow_blk);
cleanup:
g_free(backing_file);
return ret;
@@ -829,7 +902,7 @@
if (bdrv_pwrite_sync(bs->file, s->l1_table_offset, s->l1_table,
l1_length) < 0)
return -1;
- ret = bdrv_truncate(bs->file, s->l1_table_offset + l1_length);
+ ret = bdrv_truncate(bs->file->bs, s->l1_table_offset + l1_length);
if (ret < 0)
return ret;
@@ -840,6 +913,49 @@
return 0;
}
+typedef struct QcowWriteCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} QcowWriteCo;
+
+static void qcow_write_co_entry(void *opaque)
+{
+ QcowWriteCo *co = opaque;
+ QEMUIOVector qiov;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = co->nb_sectors * BDRV_SECTOR_SIZE,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ co->ret = qcow_co_writev(co->bs, co->sector_num, co->nb_sectors, &qiov);
+}
+
+/* Wrapper for non-coroutine contexts */
+static int qcow_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ QcowWriteCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(qcow_write_co_entry, &data);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
+}
+
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow_write_compressed(BlockDriverState *bs, int64_t sector_num,
@@ -867,7 +983,7 @@
return ret;
}
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+ out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -896,7 +1012,7 @@
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ ret = qcow_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
diff --git a/block/qcow2-cache.c b/block/qcow2-cache.c
index 904f6b1..6eaefed 100644
--- a/block/qcow2-cache.c
+++ b/block/qcow2-cache.c
@@ -22,68 +22,127 @@
* THE SOFTWARE.
*/
+/* Needed for CONFIG_MADVISE */
+#include "qemu/osdep.h"
#include "block/block_int.h"
#include "qemu-common.h"
#include "qcow2.h"
#include "trace.h"
typedef struct Qcow2CachedTable {
- void* table;
- int64_t offset;
- bool dirty;
- int cache_hits;
- int ref;
+ int64_t offset;
+ uint64_t lru_counter;
+ int ref;
+ bool dirty;
} Qcow2CachedTable;
struct Qcow2Cache {
- Qcow2CachedTable* entries;
- struct Qcow2Cache* depends;
+ Qcow2CachedTable *entries;
+ struct Qcow2Cache *depends;
int size;
bool depends_on_flush;
+ void *table_array;
+ uint64_t lru_counter;
+ uint64_t cache_clean_lru_counter;
};
+static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
+ Qcow2Cache *c, int table)
+{
+ BDRVQcow2State *s = bs->opaque;
+ return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
+}
+
+static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
+ Qcow2Cache *c, void *table)
+{
+ BDRVQcow2State *s = bs->opaque;
+ ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
+ int idx = table_offset / s->cluster_size;
+ assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
+ return idx;
+}
+
+static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
+ int i, int num_tables)
+{
+#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
+ BDRVQcow2State *s = bs->opaque;
+ void *t = qcow2_cache_get_table_addr(bs, c, i);
+ int align = getpagesize();
+ size_t mem_size = (size_t) s->cluster_size * num_tables;
+ size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
+ size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
+ if (length > 0) {
+ qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
+ }
+#endif
+}
+
+static inline bool can_clean_entry(Qcow2Cache *c, int i)
+{
+ Qcow2CachedTable *t = &c->entries[i];
+ return t->ref == 0 && !t->dirty && t->offset != 0 &&
+ t->lru_counter <= c->cache_clean_lru_counter;
+}
+
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int i = 0;
+ while (i < c->size) {
+ int to_clean = 0;
+
+ /* Skip the entries that we don't need to clean */
+ while (i < c->size && !can_clean_entry(c, i)) {
+ i++;
+ }
+
+ /* And count how many we can clean in a row */
+ while (i < c->size && can_clean_entry(c, i)) {
+ c->entries[i].offset = 0;
+ c->entries[i].lru_counter = 0;
+ i++;
+ to_clean++;
+ }
+
+ if (to_clean > 0) {
+ qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
+ }
+ }
+
+ c->cache_clean_lru_counter = c->lru_counter;
+}
+
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2Cache *c;
- int i;
c = g_new0(Qcow2Cache, 1);
c->size = num_tables;
c->entries = g_try_new0(Qcow2CachedTable, num_tables);
- if (!c->entries) {
- goto fail;
- }
+ c->table_array = qemu_try_blockalign(bs->file->bs,
+ (size_t) num_tables * s->cluster_size);
- for (i = 0; i < c->size; i++) {
- c->entries[i].table = qemu_try_blockalign(bs->file, s->cluster_size);
- if (c->entries[i].table == NULL) {
- goto fail;
- }
+ if (!c->entries || !c->table_array) {
+ qemu_vfree(c->table_array);
+ g_free(c->entries);
+ g_free(c);
+ c = NULL;
}
return c;
-
-fail:
- if (c->entries) {
- for (i = 0; i < c->size; i++) {
- qemu_vfree(c->entries[i].table);
- }
- }
- g_free(c->entries);
- g_free(c);
- return NULL;
}
-int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c)
+int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
{
int i;
for (i = 0; i < c->size; i++) {
assert(c->entries[i].ref == 0);
- qemu_vfree(c->entries[i].table);
}
+ qemu_vfree(c->table_array);
g_free(c->entries);
g_free(c);
@@ -107,7 +166,7 @@
static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret = 0;
if (!c->entries[i].dirty || !c->entries[i].offset) {
@@ -120,7 +179,7 @@
if (c->depends) {
ret = qcow2_cache_flush_dependency(bs, c);
} else if (c->depends_on_flush) {
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret >= 0) {
c->depends_on_flush = false;
}
@@ -151,8 +210,8 @@
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
}
- ret = bdrv_pwrite(bs->file, c->entries[i].offset, c->entries[i].table,
- s->cluster_size);
+ ret = bdrv_pwrite(bs->file, c->entries[i].offset,
+ qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
if (ret < 0) {
return ret;
}
@@ -162,9 +221,9 @@
return 0;
}
-int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int result = 0;
int ret;
int i;
@@ -178,8 +237,15 @@
}
}
+ return result;
+}
+
+int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
+{
+ int result = qcow2_cache_write(bs, c);
+
if (result == 0) {
- ret = bdrv_flush(bs->file);
+ int ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
result = ret;
}
@@ -228,66 +294,55 @@
for (i = 0; i < c->size; i++) {
assert(c->entries[i].ref == 0);
c->entries[i].offset = 0;
- c->entries[i].cache_hits = 0;
+ c->entries[i].lru_counter = 0;
}
+ qcow2_cache_table_release(bs, c, 0, c->size);
+
+ c->lru_counter = 0;
+
return 0;
}
-static int qcow2_cache_find_entry_to_replace(Qcow2Cache *c)
-{
- int i;
- int min_count = INT_MAX;
- int min_index = -1;
-
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].ref) {
- continue;
- }
-
- if (c->entries[i].cache_hits < min_count) {
- min_index = i;
- min_count = c->entries[i].cache_hits;
- }
-
- /* Give newer hits priority */
- /* TODO Check how to optimize the replacement strategy */
- c->entries[i].cache_hits /= 2;
- }
-
- if (min_index == -1) {
- /* This can't happen in current synchronous code, but leave the check
- * here as a reminder for whoever starts using AIO with the cache */
- abort();
- }
- return min_index;
-}
-
static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
uint64_t offset, void **table, bool read_from_disk)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
int ret;
+ int lookup_index;
+ uint64_t min_lru_counter = UINT64_MAX;
+ int min_lru_index = -1;
trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
offset, read_from_disk);
/* Check if the table is already cached */
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].offset == offset) {
+ i = lookup_index = (offset / s->cluster_size * 4) % c->size;
+ do {
+ const Qcow2CachedTable *t = &c->entries[i];
+ if (t->offset == offset) {
goto found;
}
+ if (t->ref == 0 && t->lru_counter < min_lru_counter) {
+ min_lru_counter = t->lru_counter;
+ min_lru_index = i;
+ }
+ if (++i == c->size) {
+ i = 0;
+ }
+ } while (i != lookup_index);
+
+ if (min_lru_index == -1) {
+ /* This can't happen in current synchronous code, but leave the check
+ * here as a reminder for whoever starts using AIO with the cache */
+ abort();
}
- /* If not, write a table back and replace it */
- i = qcow2_cache_find_entry_to_replace(c);
+ /* Cache miss: write a table back and replace it */
+ i = min_lru_index;
trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
c == s->l2_table_cache, i);
- if (i < 0) {
- return i;
- }
ret = qcow2_cache_entry_flush(bs, c, i);
if (ret < 0) {
@@ -302,22 +357,20 @@
BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
}
- ret = bdrv_pread(bs->file, offset, c->entries[i].table, s->cluster_size);
+ ret = bdrv_pread(bs->file, offset,
+ qcow2_cache_get_table_addr(bs, c, i),
+ s->cluster_size);
if (ret < 0) {
return ret;
}
}
- /* Give the table some hits for the start so that it won't be replaced
- * immediately. The number 32 is completely arbitrary. */
- c->entries[i].cache_hits = 32;
c->entries[i].offset = offset;
/* And return the right table */
found:
- c->entries[i].cache_hits++;
c->entries[i].ref++;
- *table = c->entries[i].table;
+ *table = qcow2_cache_get_table_addr(bs, c, i);
trace_qcow2_cache_get_done(qemu_coroutine_self(),
c == s->l2_table_cache, i);
@@ -337,36 +390,24 @@
return qcow2_cache_do_get(bs, c, offset, table, false);
}
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
+void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
{
- int i;
+ int i = qcow2_cache_get_table_idx(bs, c, *table);
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == *table) {
- goto found;
- }
- }
- return -ENOENT;
-
-found:
c->entries[i].ref--;
*table = NULL;
+ if (c->entries[i].ref == 0) {
+ c->entries[i].lru_counter = ++c->lru_counter;
+ }
+
assert(c->entries[i].ref >= 0);
- return 0;
}
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+ void *table)
{
- int i;
-
- for (i = 0; i < c->size; i++) {
- if (c->entries[i].table == table) {
- goto found;
- }
- }
- abort();
-
-found:
+ int i = qcow2_cache_get_table_idx(bs, c, table);
+ assert(c->entries[i].offset != 0);
c->entries[i].dirty = true;
}
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index df0b2c9..f941835 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -22,17 +22,20 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include <zlib.h>
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
+#include "qemu/bswap.h"
#include "trace.h"
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int new_l1_size2, ret, i;
uint64_t *new_l1_table;
int64_t old_l1_table_offset, old_l1_size;
@@ -62,7 +65,8 @@
}
}
- if (new_l1_size > INT_MAX / sizeof(uint64_t)) {
+ QEMU_BUILD_BUG_ON(QCOW_MAX_L1_SIZE > INT_MAX);
+ if (new_l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
return -EFBIG;
}
@@ -72,7 +76,7 @@
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
- new_l1_table = qemu_try_blockalign(bs->file,
+ new_l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(new_l1_size2, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
@@ -105,7 +109,8 @@
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
- ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+ ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset,
+ new_l1_table, new_l1_size2);
if (ret < 0)
goto fail;
for(i = 0; i < s->l1_size; i++)
@@ -113,9 +118,10 @@
/* set new table */
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
- cpu_to_be32w((uint32_t*)data, new_l1_size);
+ stl_be_p(data, new_l1_size);
stq_be_p(data + 4, new_l1_table_offset);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+ ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size),
+ data, sizeof(data));
if (ret < 0) {
goto fail;
}
@@ -148,12 +154,10 @@
static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
uint64_t **l2_table)
{
- BDRVQcowState *s = bs->opaque;
- int ret;
+ BDRVQcow2State *s = bs->opaque;
- ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
-
- return ret;
+ return qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
+ (void **)l2_table);
}
/*
@@ -163,7 +167,7 @@
#define L1_ENTRIES_PER_SECTOR (512 / 8)
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 };
int l1_start_index;
int i, ret;
@@ -182,8 +186,9 @@
}
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
- ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
- buf, sizeof(buf));
+ ret = bdrv_pwrite_sync(bs->file,
+ s->l1_table_offset + 8 * l1_start_index,
+ buf, sizeof(buf));
if (ret < 0) {
return ret;
}
@@ -203,7 +208,7 @@
static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t old_l2_offset;
uint64_t *l2_table = NULL;
int64_t l2_offset;
@@ -253,17 +258,14 @@
memcpy(l2_table, old_table, s->cluster_size);
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
- if (ret < 0) {
- goto fail;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table);
}
/* write the l2 table to the file */
BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
trace_qcow2_l2_allocate_write_l2(bs, l1_index);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
ret = qcow2_cache_flush(bs, s->l2_table_cache);
if (ret < 0) {
goto fail;
@@ -301,7 +303,7 @@
* as contiguous. (This allows it, for example, to stop at the first compressed
* cluster which may require a different handling)
*/
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+static int count_contiguous_clusters(int nb_clusters, int cluster_size,
uint64_t *l2_table, uint64_t stop_flags)
{
int i;
@@ -312,7 +314,7 @@
if (!offset)
return 0;
- assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED);
+ assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
for (i = 0; i < nb_clusters; i++) {
uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -324,14 +326,16 @@
return i;
}
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+static int count_contiguous_clusters_by_type(int nb_clusters,
+ uint64_t *l2_table,
+ int wanted_type)
{
int i;
for (i = 0; i < nb_clusters; i++) {
int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
- if (type != QCOW2_CLUSTER_UNALLOCATED) {
+ if (type != wanted_type) {
break;
}
}
@@ -342,44 +346,61 @@
/* The crypt function is compatible with the linux cryptoloop
algorithm for < 4 GB images. NOTE: out_buf == in_buf is
supported */
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key)
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, bool enc,
+ Error **errp)
{
union {
uint64_t ll[2];
uint8_t b[16];
} ivec;
int i;
+ int ret;
for(i = 0; i < nb_sectors; i++) {
ivec.ll[0] = cpu_to_le64(sector_num);
ivec.ll[1] = 0;
- AES_cbc_encrypt(in_buf, out_buf, 512, key,
- ivec.b, enc);
+ if (qcrypto_cipher_setiv(s->cipher,
+ ivec.b, G_N_ELEMENTS(ivec.b),
+ errp) < 0) {
+ return -1;
+ }
+ if (enc) {
+ ret = qcrypto_cipher_encrypt(s->cipher,
+ in_buf,
+ out_buf,
+ 512,
+ errp);
+ } else {
+ ret = qcrypto_cipher_decrypt(s->cipher,
+ in_buf,
+ out_buf,
+ 512,
+ errp);
+ }
+ if (ret < 0) {
+ return -1;
+ }
sector_num++;
in_buf += 512;
out_buf += 512;
}
+ return 0;
}
-static int coroutine_fn copy_sectors(BlockDriverState *bs,
- uint64_t start_sect,
- uint64_t cluster_offset,
- int n_start, int n_end)
+static int coroutine_fn do_perform_cow(BlockDriverState *bs,
+ uint64_t src_cluster_offset,
+ uint64_t cluster_offset,
+ int offset_in_cluster,
+ int bytes)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QEMUIOVector qiov;
struct iovec iov;
- int n, ret;
+ int ret;
- n = n_end - n_start;
- if (n <= 0) {
- return 0;
- }
-
- iov.iov_len = n * BDRV_SECTOR_SIZE;
+ iov.iov_len = bytes;
iov.iov_base = qemu_try_blockalign(bs, iov.iov_len);
if (iov.iov_base == NULL) {
return -ENOMEM;
@@ -398,25 +419,36 @@
* interface. This avoids double I/O throttling and request tracking,
* which can lead to deadlock when block layer copy-on-read is enabled.
*/
- ret = bs->drv->bdrv_co_readv(bs, start_sect + n_start, n, &qiov);
+ ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster,
+ bytes, &qiov, 0);
if (ret < 0) {
goto out;
}
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, start_sect + n_start,
- iov.iov_base, iov.iov_base, n, 1,
- &s->aes_encrypt_key);
+ if (bs->encrypted) {
+ Error *err = NULL;
+ int64_t sector = (cluster_offset + offset_in_cluster)
+ >> BDRV_SECTOR_BITS;
+ assert(s->cipher);
+ assert((offset_in_cluster & ~BDRV_SECTOR_MASK) == 0);
+ assert((bytes & ~BDRV_SECTOR_MASK) == 0);
+ if (qcow2_encrypt_sectors(s, sector, iov.iov_base, iov.iov_base,
+ bytes >> BDRV_SECTOR_BITS, true, &err) < 0) {
+ ret = -EIO;
+ error_free(err);
+ goto out;
+ }
}
ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
+ cluster_offset + offset_in_cluster, bytes);
if (ret < 0) {
goto out;
}
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
- ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+ ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
+ bytes, &qiov, 0);
if (ret < 0) {
goto out;
}
@@ -431,50 +463,47 @@
/*
* get_cluster_offset
*
- * For a given offset of the disk image, find the cluster offset in
- * qcow2 file. The offset is stored in *cluster_offset.
+ * For a given offset of the virtual disk, find the cluster type and offset in
+ * the qcow2 file. The offset is stored in *cluster_offset.
*
- * on entry, *num is the number of contiguous sectors we'd like to
- * access following offset.
+ * On entry, *bytes is the maximum number of contiguous bytes starting at
+ * offset that we are interested in.
*
- * on exit, *num is the number of contiguous sectors we can read.
+ * On exit, *bytes is the number of bytes starting at offset that have the same
+ * cluster type and (if applicable) are stored contiguously in the image file.
+ * Compressed clusters are always returned one by one.
*
* Returns the cluster type (QCOW2_CLUSTER_*) on success, -errno in error
* cases.
*/
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset)
+ unsigned int *bytes, uint64_t *cluster_offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
- unsigned int index_in_cluster, nb_clusters;
- uint64_t nb_available, nb_needed;
+ unsigned int offset_in_cluster;
+ uint64_t bytes_available, bytes_needed, nb_clusters;
int ret;
- index_in_cluster = (offset >> 9) & (s->cluster_sectors - 1);
- nb_needed = *num + index_in_cluster;
+ offset_in_cluster = offset_into_cluster(s, offset);
+ bytes_needed = (uint64_t) *bytes + offset_in_cluster;
l1_bits = s->l2_bits + s->cluster_bits;
- /* compute how many bytes there are between the offset and
- * the end of the l1 entry
- */
+ /* compute how many bytes there are between the start of the cluster
+ * containing offset and the end of the l1 entry */
+ bytes_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1))
+ + offset_in_cluster;
- nb_available = (1ULL << l1_bits) - (offset & ((1ULL << l1_bits) - 1));
-
- /* compute the number of available sectors */
-
- nb_available = (nb_available >> 9) + index_in_cluster;
-
- if (nb_needed > nb_available) {
- nb_needed = nb_available;
+ if (bytes_needed > bytes_available) {
+ bytes_needed = bytes_available;
}
*cluster_offset = 0;
- /* seek the the l2 offset in the l1 table */
+ /* seek to the l2 offset in the l1 table */
l1_index = offset >> l1_bits;
if (l1_index >= s->l1_size) {
@@ -506,7 +535,12 @@
l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
*cluster_offset = be64_to_cpu(l2_table[l2_index]);
- nb_clusters = size_to_clusters(s, nb_needed << 9);
+
+ nb_clusters = size_to_clusters(s, bytes_needed);
+ /* bytes_needed <= *bytes + offset_in_cluster, both of which are unsigned
+ * integers; the minimum cluster size is 512, so this assertion is always
+ * true */
+ assert(nb_clusters <= INT_MAX);
ret = qcow2_get_cluster_type(*cluster_offset);
switch (ret) {
@@ -523,13 +557,14 @@
ret = -EIO;
goto fail;
}
- c = count_contiguous_clusters(nb_clusters, s->cluster_size,
- &l2_table[l2_index], QCOW_OFLAG_ZERO);
+ c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+ QCOW2_CLUSTER_ZERO);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_UNALLOCATED:
/* how many empty clusters ? */
- c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+ c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+ QCOW2_CLUSTER_UNALLOCATED);
*cluster_offset = 0;
break;
case QCOW2_CLUSTER_NORMAL:
@@ -552,13 +587,18 @@
qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- nb_available = (c * s->cluster_sectors);
+ bytes_available = (int64_t)c * s->cluster_size;
out:
- if (nb_available > nb_needed)
- nb_available = nb_needed;
+ if (bytes_available > bytes_needed) {
+ bytes_available = bytes_needed;
+ }
- *num = nb_available - index_in_cluster;
+ /* bytes_available <= bytes_needed <= *bytes + offset_in_cluster;
+ * subtracting offset_in_cluster will therefore definitely yield something
+ * not exceeding UINT_MAX */
+ assert(bytes_available - offset_in_cluster <= UINT_MAX);
+ *bytes = bytes_available - offset_in_cluster;
return ret;
@@ -582,13 +622,13 @@
uint64_t **new_l2_table,
int *new_l2_index)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int l2_index;
uint64_t l1_index, l2_offset;
uint64_t *l2_table = NULL;
int ret;
- /* seek the the l2 offset in the l1 table */
+ /* seek to the l2 offset in the l1 table */
l1_index = offset >> (s->l2_bits + s->cluster_bits);
if (l1_index >= s->l1_size) {
@@ -656,7 +696,7 @@
uint64_t offset,
int compressed_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index, ret;
uint64_t *l2_table;
int64_t cluster_offset;
@@ -692,29 +732,24 @@
/* compressed clusters never have the copied flag */
BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
l2_table[l2_index] = cpu_to_be64(cluster_offset);
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return 0;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
return cluster_offset;
}
static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
- if (r->nb_sectors == 0) {
+ if (r->nb_bytes == 0) {
return 0;
}
qemu_co_mutex_unlock(&s->lock);
- ret = copy_sectors(bs, m->offset / BDRV_SECTOR_SIZE, m->alloc_offset,
- r->offset / BDRV_SECTOR_SIZE,
- r->offset / BDRV_SECTOR_SIZE + r->nb_sectors);
+ ret = do_perform_cow(bs, m->offset, m->alloc_offset, r->offset, r->nb_bytes);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
@@ -733,7 +768,7 @@
int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i, j = 0, l2_index, ret;
uint64_t *old_cluster, *l2_table;
uint64_t cluster_offset = m->alloc_offset;
@@ -771,32 +806,29 @@
if (ret < 0) {
goto err;
}
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
assert(l2_index + m->nb_clusters <= s->l2_size);
for (i = 0; i < m->nb_clusters; i++) {
/* if two concurrent writes happen to the same unallocated cluster
- * each write allocates separate cluster and writes data concurrently.
- * The first one to complete updates l2 table with pointer to its
- * cluster the second one has to do RMW (which is done above by
- * copy_sectors()), update l2 table with its cluster pointer and free
- * old cluster. This is what this loop does */
- if(l2_table[l2_index + i] != 0)
+ * each write allocates separate cluster and writes data concurrently.
+ * The first one to complete updates l2 table with pointer to its
+ * cluster the second one has to do RMW (which is done above by
+ * perform_cow()), update l2 table with its cluster pointer and free
+ * old cluster. This is what this loop does */
+ if (l2_table[l2_index + i] != 0) {
old_cluster[j++] = l2_table[l2_index + i];
+ }
l2_table[l2_index + i] = cpu_to_be64((cluster_offset +
(i << s->cluster_bits)) | QCOW_OFLAG_COPIED);
}
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto err;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
/*
* If this was a COW, we need to decrease the refcount of the old cluster.
- * Also flush bs->file to get the right order for L2 and refcount update.
*
* Don't discard clusters that reach a refcount of 0 (e.g. compressed
* clusters), the next write will reuse them anyway.
@@ -819,7 +851,7 @@
* write, but require COW to be performed (this includes yet unallocated space,
* which must copy from the backing file)
*/
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
uint64_t *l2_table, int l2_index)
{
int i;
@@ -865,7 +897,7 @@
static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *cur_bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowL2Meta *old_alloc;
uint64_t bytes = *cur_bytes;
@@ -938,13 +970,13 @@
static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t cluster_offset;
uint64_t *l2_table;
- unsigned int nb_clusters;
+ uint64_t nb_clusters;
unsigned int keep_clusters;
- int ret, pret;
+ int ret;
trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
*bytes);
@@ -961,6 +993,7 @@
l2_index = offset_to_l2_index(s, guest_offset);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
@@ -1011,10 +1044,7 @@
/* Cleanup */
out:
- pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (pret < 0) {
- return pret;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
/* Only return a host offset if we actually made progress. Otherwise we
* would make requirements for handle_alloc() that it can't fulfill */
@@ -1046,9 +1076,9 @@
* restarted, but the whole request should not be failed.
*/
static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *host_offset, unsigned int *nb_clusters)
+ uint64_t *host_offset, uint64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
*host_offset, *nb_clusters);
@@ -1064,7 +1094,7 @@
*host_offset = cluster_offset;
return 0;
} else {
- int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+ int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
if (ret < 0) {
return ret;
}
@@ -1096,11 +1126,11 @@
static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int l2_index;
uint64_t *l2_table;
uint64_t entry;
- unsigned int nb_clusters;
+ uint64_t nb_clusters;
int ret;
uint64_t alloc_cluster_offset;
@@ -1118,6 +1148,7 @@
l2_index = offset_to_l2_index(s, guest_offset);
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
/* Find L2 entry for the first involved cluster */
ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
@@ -1139,10 +1170,7 @@
* wrong with our code. */
assert(nb_clusters > 0);
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
/* Allocate, if necessary at a given offset in the image file */
alloc_cluster_offset = start_of_cluster(s, *host_offset);
@@ -1172,25 +1200,20 @@
/*
* Save info needed for meta data update.
*
- * requested_sectors: Number of sectors from the start of the first
+ * requested_bytes: Number of bytes from the start of the first
* newly allocated cluster to the end of the (possibly shortened
* before) write request.
*
- * avail_sectors: Number of sectors from the start of the first
+ * avail_bytes: Number of bytes from the start of the first
* newly allocated to the end of the last newly allocated cluster.
*
- * nb_sectors: The number of sectors from the start of the first
+ * nb_bytes: The number of bytes from the start of the first
* newly allocated cluster to the end of the area that the write
* request actually writes to (excluding COW at the end)
*/
- int requested_sectors =
- (*bytes + offset_into_cluster(s, guest_offset))
- >> BDRV_SECTOR_BITS;
- int avail_sectors = nb_clusters
- << (s->cluster_bits - BDRV_SECTOR_BITS);
- int alloc_n_start = offset_into_cluster(s, guest_offset)
- >> BDRV_SECTOR_BITS;
- int nb_sectors = MIN(requested_sectors, avail_sectors);
+ uint64_t requested_bytes = *bytes + offset_into_cluster(s, guest_offset);
+ int avail_bytes = MIN(INT_MAX, nb_clusters << s->cluster_bits);
+ int nb_bytes = MIN(requested_bytes, avail_bytes);
QCowL2Meta *old_m = *m;
*m = g_malloc0(sizeof(**m));
@@ -1201,23 +1224,21 @@
.alloc_offset = alloc_cluster_offset,
.offset = start_of_cluster(s, guest_offset),
.nb_clusters = nb_clusters,
- .nb_available = nb_sectors,
.cow_start = {
.offset = 0,
- .nb_sectors = alloc_n_start,
+ .nb_bytes = offset_into_cluster(s, guest_offset),
},
.cow_end = {
- .offset = nb_sectors * BDRV_SECTOR_SIZE,
- .nb_sectors = avail_sectors - nb_sectors,
+ .offset = nb_bytes,
+ .nb_bytes = avail_bytes - nb_bytes,
},
};
qemu_co_queue_init(&(*m)->dependent_requests);
QLIST_INSERT_HEAD(&s->cluster_allocs, *m, next_in_flight);
*host_offset = alloc_cluster_offset + offset_into_cluster(s, guest_offset);
- *bytes = MIN(*bytes, (nb_sectors * BDRV_SECTOR_SIZE)
- - offset_into_cluster(s, guest_offset));
+ *bytes = MIN(*bytes, nb_bytes - offset_into_cluster(s, guest_offset));
assert(*bytes != 0);
return 1;
@@ -1249,21 +1270,20 @@
* Return 0 on success and -errno in error cases
*/
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *host_offset, QCowL2Meta **m)
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start, remaining;
uint64_t cluster_offset;
uint64_t cur_bytes;
int ret;
- trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *num);
-
- assert((offset & ~BDRV_SECTOR_MASK) == 0);
+ trace_qcow2_alloc_clusters_offset(qemu_coroutine_self(), offset, *bytes);
again:
start = offset;
- remaining = *num << BDRV_SECTOR_BITS;
+ remaining = *bytes;
cluster_offset = 0;
*host_offset = 0;
cur_bytes = 0;
@@ -1349,8 +1369,8 @@
}
}
- *num -= remaining >> BDRV_SECTOR_BITS;
- assert(*num > 0);
+ *bytes -= remaining;
+ assert(*bytes > 0);
assert(*host_offset != 0);
return 0;
@@ -1385,7 +1405,7 @@
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret, csize, nb_csectors, sector_offset;
uint64_t coffset;
@@ -1395,7 +1415,8 @@
sector_offset = coffset & 511;
csize = nb_csectors * 512 - sector_offset;
BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
- ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+ ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
+ nb_csectors);
if (ret < 0) {
return ret;
}
@@ -1414,9 +1435,10 @@
* clusters.
*/
static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters, enum qcow2_discard_type type, bool full_discard)
+ uint64_t nb_clusters, enum qcow2_discard_type type,
+ bool full_discard)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table;
int l2_index;
int ret;
@@ -1429,6 +1451,7 @@
/* Limit nb_clusters to one L2 table */
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
for (i = 0; i < nb_clusters; i++) {
uint64_t old_l2_entry;
@@ -1450,7 +1473,7 @@
*/
switch (qcow2_get_cluster_type(old_l2_entry)) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (full_discard || !bs->backing_hd) {
+ if (full_discard || !bs->backing) {
continue;
}
break;
@@ -1470,7 +1493,7 @@
}
/* First remove L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
if (!full_discard && s->qcow_version >= 3) {
l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
} else {
@@ -1481,10 +1504,7 @@
qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
}
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
return nb_clusters;
}
@@ -1492,9 +1512,9 @@
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
int nb_sectors, enum qcow2_discard_type type, bool full_discard)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t end_offset;
- unsigned int nb_clusters;
+ uint64_t nb_clusters;
int ret;
end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
@@ -1536,9 +1556,9 @@
* clusters.
*/
static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
+ uint64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table;
int l2_index;
int ret;
@@ -1551,6 +1571,7 @@
/* Limit nb_clusters to one L2 table */
nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+ assert(nb_clusters <= INT_MAX);
for (i = 0; i < nb_clusters; i++) {
uint64_t old_offset;
@@ -1558,7 +1579,7 @@
old_offset = be64_to_cpu(l2_table[l2_index + i]);
/* Update L2 entries */
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
if (old_offset & QCOW_OFLAG_COMPRESSED) {
l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@@ -1567,18 +1588,15 @@
}
}
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- return ret;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
return nb_clusters;
}
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
{
- BDRVQcowState *s = bs->opaque;
- unsigned int nb_clusters;
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t nb_clusters;
int ret;
/* The zero flag is only supported by version 3 and newer */
@@ -1620,9 +1638,10 @@
static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
int l1_size, int64_t *visited_l1_entries,
int64_t l1_entries,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bool is_active_l1 = (l1_table == s->l1_table);
uint64_t *l2_table = NULL;
int ret;
@@ -1631,7 +1650,7 @@
if (!is_active_l1) {
/* inactive L2 tables require a buffer to be stored in when loading
* them from disk */
- l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+ l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
if (l2_table == NULL) {
return -ENOMEM;
}
@@ -1640,17 +1659,25 @@
for (i = 0; i < l1_size; i++) {
uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
bool l2_dirty = false;
- int l2_refcount;
+ uint64_t l2_refcount;
if (!l2_offset) {
/* unallocated */
(*visited_l1_entries)++;
if (status_cb) {
- status_cb(bs, *visited_l1_entries, l1_entries);
+ status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
}
continue;
}
+ if (offset_into_cluster(s, l2_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
+ PRIx64 " unaligned (L1 index: %#x)",
+ l2_offset, i);
+ ret = -EIO;
+ goto fail;
+ }
+
if (is_active_l1) {
/* get active L2 tables from cache */
ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
@@ -1658,15 +1685,15 @@
} else {
/* load inactive L2 tables from disk */
ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
+ (void *)l2_table, s->cluster_sectors);
}
if (ret < 0) {
goto fail;
}
- l2_refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
- if (l2_refcount < 0) {
- ret = l2_refcount;
+ ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
+ &l2_refcount);
+ if (ret < 0) {
goto fail;
}
@@ -1681,7 +1708,7 @@
}
if (!preallocated) {
- if (!bs->backing_hd) {
+ if (!bs->backing) {
/* not backed; therefore we can simply deallocate the
* cluster */
l2_table[j] = 0;
@@ -1699,7 +1726,8 @@
/* For shared L2 tables, set the refcount accordingly (it is
* already 1 and needs to be l2_refcount) */
ret = qcow2_update_cluster_refcount(bs,
- offset >> s->cluster_bits, l2_refcount - 1,
+ offset >> s->cluster_bits,
+ refcount_diff(1, l2_refcount), false,
QCOW2_DISCARD_OTHER);
if (ret < 0) {
qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1709,6 +1737,19 @@
}
}
+ if (offset_into_cluster(s, offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
+ "%#" PRIx64 " unaligned (L2 offset: %#"
+ PRIx64 ", L2 index: %#x)", offset,
+ l2_offset, j);
+ if (!preallocated) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_ALWAYS);
+ }
+ ret = -EIO;
+ goto fail;
+ }
+
ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
if (ret < 0) {
if (!preallocated) {
@@ -1718,8 +1759,7 @@
goto fail;
}
- ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
- s->cluster_sectors, 0);
+ ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
if (ret < 0) {
if (!preallocated) {
qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1738,14 +1778,10 @@
if (is_active_l1) {
if (l2_dirty) {
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
qcow2_cache_depends_on_flush(s->l2_table_cache);
}
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
- if (ret < 0) {
- l2_table = NULL;
- goto fail;
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
} else {
if (l2_dirty) {
ret = qcow2_pre_write_overlap_check(bs,
@@ -1756,7 +1792,7 @@
}
ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
- (void *)l2_table, s->cluster_sectors);
+ (void *)l2_table, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
@@ -1765,7 +1801,7 @@
(*visited_l1_entries)++;
if (status_cb) {
- status_cb(bs, *visited_l1_entries, l1_entries);
+ status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
}
}
@@ -1776,12 +1812,7 @@
if (!is_active_l1) {
qemu_vfree(l2_table);
} else {
- if (ret < 0) {
- qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
- } else {
- ret = qcow2_cache_put(bs, s->l2_table_cache,
- (void **)&l2_table);
- }
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
}
}
return ret;
@@ -1794,9 +1825,10 @@
* qcow2 version which doesn't yet support metadata zero clusters.
*/
int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL;
int64_t l1_entries = 0, visited_l1_entries = 0;
int ret;
@@ -1811,7 +1843,7 @@
ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
&visited_l1_entries, l1_entries,
- status_cb);
+ status_cb, cb_opaque);
if (ret < 0) {
goto fail;
}
@@ -1829,13 +1861,14 @@
}
for (i = 0; i < s->nb_snapshots; i++) {
- int l1_sectors = (s->snapshots[i].l1_size * sizeof(uint64_t) +
- BDRV_SECTOR_SIZE - 1) / BDRV_SECTOR_SIZE;
+ int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size *
+ sizeof(uint64_t), BDRV_SECTOR_SIZE);
l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
- ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
- BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+ ret = bdrv_read(bs->file,
+ s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
+ (void *)l1_table, l1_sectors);
if (ret < 0) {
goto fail;
}
@@ -1846,7 +1879,7 @@
ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
&visited_l1_entries, l1_entries,
- status_cb);
+ status_cb, cb_opaque);
if (ret < 0) {
goto fail;
}
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 9afdb40..cbfb3fe 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -22,15 +22,62 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
#include "block/qcow2.h"
#include "qemu/range.h"
+#include "qemu/bswap.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size);
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length,
- int addend, enum qcow2_discard_type type);
+ int64_t offset, int64_t length, uint64_t addend,
+ bool decrease, enum qcow2_discard_type type);
+
+static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
+static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
+static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index);
+static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index);
+static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index);
+static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index);
+static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index);
+
+static void set_refcount_ro0(void *refcount_array, uint64_t index,
+ uint64_t value);
+static void set_refcount_ro1(void *refcount_array, uint64_t index,
+ uint64_t value);
+static void set_refcount_ro2(void *refcount_array, uint64_t index,
+ uint64_t value);
+static void set_refcount_ro3(void *refcount_array, uint64_t index,
+ uint64_t value);
+static void set_refcount_ro4(void *refcount_array, uint64_t index,
+ uint64_t value);
+static void set_refcount_ro5(void *refcount_array, uint64_t index,
+ uint64_t value);
+static void set_refcount_ro6(void *refcount_array, uint64_t index,
+ uint64_t value);
+
+
+static Qcow2GetRefcountFunc *const get_refcount_funcs[] = {
+ &get_refcount_ro0,
+ &get_refcount_ro1,
+ &get_refcount_ro2,
+ &get_refcount_ro3,
+ &get_refcount_ro4,
+ &get_refcount_ro5,
+ &get_refcount_ro6
+};
+
+static Qcow2SetRefcountFunc *const set_refcount_funcs[] = {
+ &set_refcount_ro0,
+ &set_refcount_ro1,
+ &set_refcount_ro2,
+ &set_refcount_ro3,
+ &set_refcount_ro4,
+ &set_refcount_ro5,
+ &set_refcount_ro6
+};
/*********************************************************/
@@ -38,10 +85,15 @@
int qcow2_refcount_init(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_size2, i;
int ret;
+ assert(s->refcount_order >= 0 && s->refcount_order <= 6);
+
+ s->get_refcount = get_refcount_funcs[s->refcount_order];
+ s->set_refcount = set_refcount_funcs[s->refcount_order];
+
assert(s->refcount_table_size <= INT_MAX / sizeof(uint64_t));
refcount_table_size2 = s->refcount_table_size * sizeof(uint64_t);
s->refcount_table = g_try_malloc(refcount_table_size2);
@@ -67,46 +119,135 @@
void qcow2_refcount_close(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
g_free(s->refcount_table);
}
+static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index)
+{
+ return (((const uint8_t *)refcount_array)[index / 8] >> (index % 8)) & 0x1;
+}
+
+static void set_refcount_ro0(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ assert(!(value >> 1));
+ ((uint8_t *)refcount_array)[index / 8] &= ~(0x1 << (index % 8));
+ ((uint8_t *)refcount_array)[index / 8] |= value << (index % 8);
+}
+
+static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index)
+{
+ return (((const uint8_t *)refcount_array)[index / 4] >> (2 * (index % 4)))
+ & 0x3;
+}
+
+static void set_refcount_ro1(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ assert(!(value >> 2));
+ ((uint8_t *)refcount_array)[index / 4] &= ~(0x3 << (2 * (index % 4)));
+ ((uint8_t *)refcount_array)[index / 4] |= value << (2 * (index % 4));
+}
+
+static uint64_t get_refcount_ro2(const void *refcount_array, uint64_t index)
+{
+ return (((const uint8_t *)refcount_array)[index / 2] >> (4 * (index % 2)))
+ & 0xf;
+}
+
+static void set_refcount_ro2(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ assert(!(value >> 4));
+ ((uint8_t *)refcount_array)[index / 2] &= ~(0xf << (4 * (index % 2)));
+ ((uint8_t *)refcount_array)[index / 2] |= value << (4 * (index % 2));
+}
+
+static uint64_t get_refcount_ro3(const void *refcount_array, uint64_t index)
+{
+ return ((const uint8_t *)refcount_array)[index];
+}
+
+static void set_refcount_ro3(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ assert(!(value >> 8));
+ ((uint8_t *)refcount_array)[index] = value;
+}
+
+static uint64_t get_refcount_ro4(const void *refcount_array, uint64_t index)
+{
+ return be16_to_cpu(((const uint16_t *)refcount_array)[index]);
+}
+
+static void set_refcount_ro4(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ assert(!(value >> 16));
+ ((uint16_t *)refcount_array)[index] = cpu_to_be16(value);
+}
+
+static uint64_t get_refcount_ro5(const void *refcount_array, uint64_t index)
+{
+ return be32_to_cpu(((const uint32_t *)refcount_array)[index]);
+}
+
+static void set_refcount_ro5(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ assert(!(value >> 32));
+ ((uint32_t *)refcount_array)[index] = cpu_to_be32(value);
+}
+
+static uint64_t get_refcount_ro6(const void *refcount_array, uint64_t index)
+{
+ return be64_to_cpu(((const uint64_t *)refcount_array)[index]);
+}
+
+static void set_refcount_ro6(void *refcount_array, uint64_t index,
+ uint64_t value)
+{
+ ((uint64_t *)refcount_array)[index] = cpu_to_be64(value);
+}
+
+
static int load_refcount_block(BlockDriverState *bs,
int64_t refcount_block_offset,
void **refcount_block)
{
- BDRVQcowState *s = bs->opaque;
- int ret;
+ BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_LOAD);
- ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- refcount_block);
-
- return ret;
+ return qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
+ refcount_block);
}
/*
- * Returns the refcount of the cluster given by its index. Any non-negative
- * return value is the refcount of the cluster, negative values are -errno
- * and indicate an error.
+ * Retrieves the refcount of the cluster given by its index and stores it in
+ * *refcount. Returns 0 on success and -errno on failure.
*/
-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index)
+int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
+ uint64_t *refcount)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t refcount_table_index, block_index;
int64_t refcount_block_offset;
int ret;
- uint16_t *refcount_block;
- uint16_t refcount;
+ void *refcount_block;
refcount_table_index = cluster_index >> s->refcount_block_bits;
- if (refcount_table_index >= s->refcount_table_size)
+ if (refcount_table_index >= s->refcount_table_size) {
+ *refcount = 0;
return 0;
+ }
refcount_block_offset =
s->refcount_table[refcount_table_index] & REFT_OFFSET_MASK;
- if (!refcount_block_offset)
+ if (!refcount_block_offset) {
+ *refcount = 0;
return 0;
+ }
if (offset_into_cluster(s, refcount_block_offset)) {
qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#" PRIx64
@@ -116,28 +257,24 @@
}
ret = qcow2_cache_get(bs, s->refcount_block_cache, refcount_block_offset,
- (void**) &refcount_block);
+ &refcount_block);
if (ret < 0) {
return ret;
}
block_index = cluster_index & (s->refcount_block_size - 1);
- refcount = be16_to_cpu(refcount_block[block_index]);
+ *refcount = s->get_refcount(refcount_block, block_index);
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- return ret;
- }
+ qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
- return refcount;
+ return 0;
}
/*
* Rounds the refcount table size up to avoid growing the table for each single
* refcount block that is allocated.
*/
-static unsigned int next_refcount_table_size(BDRVQcowState *s,
+static unsigned int next_refcount_table_size(BDRVQcow2State *s,
unsigned int min_size)
{
unsigned int min_clusters = (min_size >> (s->cluster_bits - 3)) + 1;
@@ -153,7 +290,7 @@
/* Checks if two offsets are described by the same refcount block */
-static int in_same_refcount_block(BDRVQcowState *s, uint64_t offset_a,
+static int in_same_refcount_block(BDRVQcow2State *s, uint64_t offset_a,
uint64_t offset_b)
{
uint64_t block_a = offset_a >> (s->cluster_bits + s->refcount_block_bits);
@@ -169,9 +306,9 @@
* Returns 0 on success or -errno in error case
*/
static int alloc_refcount_block(BlockDriverState *bs,
- int64_t cluster_index, uint16_t **refcount_block)
+ int64_t cluster_index, void **refcount_block)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_index;
int ret;
@@ -196,7 +333,7 @@
}
return load_refcount_block(bs, refcount_block_offset,
- (void**) refcount_block);
+ refcount_block);
}
}
@@ -246,7 +383,7 @@
if (in_same_refcount_block(s, new_block, cluster_index << s->cluster_bits)) {
/* Zero the new refcount block before updating it */
ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
+ refcount_block);
if (ret < 0) {
goto fail_block;
}
@@ -256,11 +393,11 @@
/* The block describes itself, need to update the cache */
int block_index = (new_block >> s->cluster_bits) &
(s->refcount_block_size - 1);
- (*refcount_block)[block_index] = cpu_to_be16(1);
+ s->set_refcount(*refcount_block, block_index, 1);
} else {
/* Described somewhere else. This can recurse at most twice before we
* arrive at a block that describes itself. */
- ret = update_refcount(bs, new_block, s->cluster_size, 1,
+ ret = update_refcount(bs, new_block, s->cluster_size, 1, false,
QCOW2_DISCARD_NEVER);
if (ret < 0) {
goto fail_block;
@@ -274,7 +411,7 @@
/* Initialize the new refcount block only after updating its refcount,
* update_refcount uses the refcount cache itself */
ret = qcow2_cache_get_empty(bs, s->refcount_block_cache, new_block,
- (void**) refcount_block);
+ refcount_block);
if (ret < 0) {
goto fail_block;
}
@@ -284,7 +421,7 @@
/* Now the new refcount block needs to be written to disk */
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE);
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, *refcount_block);
+ qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache, *refcount_block);
ret = qcow2_cache_flush(bs, s->refcount_block_cache);
if (ret < 0) {
goto fail_block;
@@ -308,10 +445,7 @@
return -EAGAIN;
}
- ret = qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
- if (ret < 0) {
- goto fail_block;
- }
+ qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
/*
* If we come here, we need to grow the refcount table. Again, a new
@@ -326,8 +460,20 @@
*/
BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_GROW);
- /* Calculate the number of refcount blocks needed so far */
- uint64_t blocks_used = DIV_ROUND_UP(cluster_index, s->refcount_block_size);
+ /* Calculate the number of refcount blocks needed so far; this will be the
+ * basis for calculating the index of the first cluster used for the
+ * self-describing refcount structures which we are about to create.
+ *
+ * Because we reached this point, there cannot be any refcount entries for
+ * cluster_index or higher indices yet. However, because new_block has been
+ * allocated to describe that cluster (and it will assume this role later
+ * on), we cannot use that index; also, new_block may actually have a higher
+ * cluster index than cluster_index, so it needs to be taken into account
+ * here (and 1 needs to be added to its value because that cluster is used).
+ */
+ uint64_t blocks_used = DIV_ROUND_UP(MAX(cluster_index + 1,
+ (new_block >> s->cluster_bits) + 1),
+ s->refcount_block_size);
if (blocks_used > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
return -EFBIG;
@@ -341,14 +487,12 @@
uint64_t table_clusters =
size_to_clusters(s, table_size * sizeof(uint64_t));
blocks_clusters = 1 +
- ((table_clusters + s->refcount_block_size - 1)
- / s->refcount_block_size);
+ DIV_ROUND_UP(table_clusters, s->refcount_block_size);
uint64_t meta_clusters = table_clusters + blocks_clusters;
last_table_size = table_size;
table_size = next_refcount_table_size(s, blocks_used +
- ((meta_clusters + s->refcount_block_size - 1)
- / s->refcount_block_size));
+ DIV_ROUND_UP(meta_clusters, s->refcount_block_size));
} while (last_table_size != table_size);
@@ -362,7 +506,7 @@
s->cluster_size;
uint64_t table_offset = meta_offset + blocks_clusters * s->cluster_size;
uint64_t *new_table = g_try_new0(uint64_t, table_size);
- uint16_t *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
+ void *new_blocks = g_try_malloc0(blocks_clusters * s->cluster_size);
assert(table_size > 0 && blocks_clusters > 0);
if (new_table == NULL || new_blocks == NULL) {
@@ -384,7 +528,7 @@
uint64_t table_clusters = size_to_clusters(s, table_size * sizeof(uint64_t));
int block = 0;
for (i = 0; i < table_clusters + blocks_clusters; i++) {
- new_blocks[block++] = cpu_to_be16(1);
+ s->set_refcount(new_blocks, block++, 1);
}
/* Write refcount blocks to disk */
@@ -414,12 +558,16 @@
}
/* Hook up the new refcount table in the qcow2 header */
- uint8_t data[12];
- cpu_to_be64w((uint64_t*)data, table_offset);
- cpu_to_be32w((uint32_t*)(data + 8), table_clusters);
+ struct QEMU_PACKED {
+ uint64_t d64;
+ uint32_t d32;
+ } data;
+ data.d64 = cpu_to_be64(table_offset);
+ data.d32 = cpu_to_be32(table_clusters);
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, refcount_table_offset),
- data, sizeof(data));
+ ret = bdrv_pwrite_sync(bs->file,
+ offsetof(QCowHeader, refcount_table_offset),
+ &data, sizeof(data));
if (ret < 0) {
goto fail_table;
}
@@ -437,7 +585,7 @@
qcow2_free_clusters(bs, old_table_offset, old_table_size * sizeof(uint64_t),
QCOW2_DISCARD_OTHER);
- ret = load_refcount_block(bs, new_block, (void**) refcount_block);
+ ret = load_refcount_block(bs, new_block, refcount_block);
if (ret < 0) {
return ret;
}
@@ -452,14 +600,14 @@
g_free(new_table);
fail_block:
if (*refcount_block != NULL) {
- qcow2_cache_put(bs, s->refcount_block_cache, (void**) refcount_block);
+ qcow2_cache_put(bs, s->refcount_block_cache, refcount_block);
}
return ret;
}
void qcow2_process_discards(BlockDriverState *bs, int ret)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2DiscardRegion *d, *next;
QTAILQ_FOREACH_SAFE(d, &s->discards, next, next) {
@@ -467,9 +615,7 @@
/* Discard is optional, ignore the return value */
if (ret >= 0) {
- bdrv_discard(bs->file,
- d->offset >> BDRV_SECTOR_BITS,
- d->bytes >> BDRV_SECTOR_BITS);
+ bdrv_pdiscard(bs->file->bs, d->offset, d->bytes);
}
g_free(d);
@@ -479,7 +625,7 @@
static void update_refcount_discard(BlockDriverState *bs,
uint64_t offset, uint64_t length)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2DiscardRegion *d, *p, *next;
QTAILQ_FOREACH(d, &s->discards, next) {
@@ -527,18 +673,25 @@
}
/* XXX: cache several refcount block clusters ? */
+/* @addend is the absolute value of the addend; if @decrease is set, @addend
+ * will be subtracted from the current refcount, otherwise it will be added */
static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length, int addend, enum qcow2_discard_type type)
+ int64_t offset,
+ int64_t length,
+ uint64_t addend,
+ bool decrease,
+ enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t start, last, cluster_offset;
- uint16_t *refcount_block = NULL;
+ void *refcount_block = NULL;
int64_t old_table_index = -1;
int ret;
#ifdef DEBUG_ALLOC2
- fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64 " addend=%d\n",
- offset, length, addend);
+ fprintf(stderr, "update_refcount: offset=%" PRId64 " size=%" PRId64
+ " addend=%s%" PRIu64 "\n", offset, length, decrease ? "-" : "",
+ addend);
#endif
if (length < 0) {
return -EINVAL;
@@ -546,7 +699,7 @@
return 0;
}
- if (addend < 0) {
+ if (decrease) {
qcow2_cache_set_dependency(bs, s->refcount_block_cache,
s->l2_table_cache);
}
@@ -556,20 +709,16 @@
for(cluster_offset = start; cluster_offset <= last;
cluster_offset += s->cluster_size)
{
- int block_index, refcount;
+ int block_index;
+ uint64_t refcount;
int64_t cluster_index = cluster_offset >> s->cluster_bits;
int64_t table_index = cluster_index >> s->refcount_block_bits;
/* Load the refcount block and allocate it if needed */
if (table_index != old_table_index) {
if (refcount_block) {
- ret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (ret < 0) {
- goto fail;
- }
+ qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
}
-
ret = alloc_refcount_block(bs, cluster_index, &refcount_block);
if (ret < 0) {
goto fail;
@@ -577,21 +726,29 @@
}
old_table_index = table_index;
- qcow2_cache_entry_mark_dirty(s->refcount_block_cache, refcount_block);
+ qcow2_cache_entry_mark_dirty(bs, s->refcount_block_cache,
+ refcount_block);
/* we can update the count and save it */
block_index = cluster_index & (s->refcount_block_size - 1);
- refcount = be16_to_cpu(refcount_block[block_index]);
- refcount += addend;
- if (refcount < 0 || refcount > 0xffff) {
+ refcount = s->get_refcount(refcount_block, block_index);
+ if (decrease ? (refcount - addend > refcount)
+ : (refcount + addend < refcount ||
+ refcount + addend > s->refcount_max))
+ {
ret = -EINVAL;
goto fail;
}
+ if (decrease) {
+ refcount -= addend;
+ } else {
+ refcount += addend;
+ }
if (refcount == 0 && cluster_index < s->free_cluster_index) {
s->free_cluster_index = cluster_index;
}
- refcount_block[block_index] = cpu_to_be16(refcount);
+ s->set_refcount(refcount_block, block_index, refcount);
if (refcount == 0 && s->discard_passthrough[type]) {
update_refcount_discard(bs, cluster_offset, s->cluster_size);
@@ -606,12 +763,7 @@
/* Write last changed block to disk */
if (refcount_block) {
- int wret;
- wret = qcow2_cache_put(bs, s->refcount_block_cache,
- (void**) &refcount_block);
- if (wret < 0) {
- return ret < 0 ? ret : wret;
- }
+ qcow2_cache_put(bs, s->refcount_block_cache, &refcount_block);
}
/*
@@ -620,8 +772,8 @@
*/
if (ret < 0) {
int dummy;
- dummy = update_refcount(bs, offset, cluster_offset - offset, -addend,
- QCOW2_DISCARD_NEVER);
+ dummy = update_refcount(bs, offset, cluster_offset - offset, addend,
+ !decrease, QCOW2_DISCARD_NEVER);
(void)dummy;
}
@@ -631,24 +783,26 @@
/*
* Increases or decreases the refcount of a given cluster.
*
- * If the return value is non-negative, it is the new refcount of the cluster.
- * If it is negative, it is -errno and indicates an error.
+ * @addend is the absolute value of the addend; if @decrease is set, @addend
+ * will be subtracted from the current refcount, otherwise it will be added.
+ *
+ * On success 0 is returned; on failure -errno is returned.
*/
int qcow2_update_cluster_refcount(BlockDriverState *bs,
int64_t cluster_index,
- int addend,
+ uint64_t addend, bool decrease,
enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
ret = update_refcount(bs, cluster_index << s->cluster_bits, 1, addend,
- type);
+ decrease, type);
if (ret < 0) {
return ret;
}
- return qcow2_get_refcount(bs, cluster_index);
+ return 0;
}
@@ -661,18 +815,23 @@
/* return < 0 if error */
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size)
{
- BDRVQcowState *s = bs->opaque;
- uint64_t i, nb_clusters;
- int refcount;
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t i, nb_clusters, refcount;
+ int ret;
+
+ /* We can't allocate clusters if they may still be queued for discard. */
+ if (s->cache_discards) {
+ qcow2_process_discards(bs, 0);
+ }
nb_clusters = size_to_clusters(s, size);
retry:
for(i = 0; i < nb_clusters; i++) {
uint64_t next_cluster_index = s->free_cluster_index++;
- refcount = qcow2_get_refcount(bs, next_cluster_index);
+ ret = qcow2_get_refcount(bs, next_cluster_index, &refcount);
- if (refcount < 0) {
- return refcount;
+ if (ret < 0) {
+ return ret;
} else if (refcount != 0) {
goto retry;
}
@@ -706,7 +865,7 @@
return offset;
}
- ret = update_refcount(bs, offset, size, 1, QCOW2_DISCARD_NEVER);
+ ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);
if (ret < 0) {
@@ -716,13 +875,13 @@
return offset;
}
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters)
+int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
- uint64_t cluster_index;
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t cluster_index, refcount;
uint64_t i;
- int refcount, ret;
+ int ret;
assert(nb_clusters >= 0);
if (nb_clusters == 0) {
@@ -733,17 +892,16 @@
/* Check how many clusters there are free */
cluster_index = offset >> s->cluster_bits;
for(i = 0; i < nb_clusters; i++) {
- refcount = qcow2_get_refcount(bs, cluster_index++);
-
- if (refcount < 0) {
- return refcount;
+ ret = qcow2_get_refcount(bs, cluster_index++, &refcount);
+ if (ret < 0) {
+ return ret;
} else if (refcount != 0) {
break;
}
}
/* And then allocate them */
- ret = update_refcount(bs, offset, i << s->cluster_bits, 1,
+ ret = update_refcount(bs, offset, i << s->cluster_bits, 1, false,
QCOW2_DISCARD_NEVER);
} while (ret == -EAGAIN);
@@ -758,55 +916,64 @@
contiguous sectors. size must be <= cluster_size */
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
{
- BDRVQcowState *s = bs->opaque;
- int64_t offset, cluster_offset;
- int free_in_cluster;
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+ size_t free_in_cluster;
+ int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
assert(size > 0 && size <= s->cluster_size);
- if (s->free_byte_offset == 0) {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
+ assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
+
+ offset = s->free_byte_offset;
+
+ if (offset) {
+ uint64_t refcount;
+ ret = qcow2_get_refcount(bs, offset >> s->cluster_bits, &refcount);
+ if (ret < 0) {
+ return ret;
}
- s->free_byte_offset = offset;
- }
- redo:
- free_in_cluster = s->cluster_size -
- offset_into_cluster(s, s->free_byte_offset);
- if (size <= free_in_cluster) {
- /* enough space in current cluster */
- offset = s->free_byte_offset;
- s->free_byte_offset += size;
- free_in_cluster -= size;
- if (free_in_cluster == 0)
- s->free_byte_offset = 0;
- if (offset_into_cluster(s, offset) != 0)
- qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- } else {
- offset = qcow2_alloc_clusters(bs, s->cluster_size);
- if (offset < 0) {
- return offset;
- }
- cluster_offset = start_of_cluster(s, s->free_byte_offset);
- if ((cluster_offset + s->cluster_size) == offset) {
- /* we are lucky: contiguous data */
- offset = s->free_byte_offset;
- qcow2_update_cluster_refcount(bs, offset >> s->cluster_bits, 1,
- QCOW2_DISCARD_NEVER);
- s->free_byte_offset += size;
- } else {
- s->free_byte_offset = offset;
- goto redo;
+
+ if (refcount == s->refcount_max) {
+ offset = 0;
}
}
- /* The cluster refcount was incremented, either by qcow2_alloc_clusters()
- * or explicitly by qcow2_update_cluster_refcount(). Refcount blocks must
- * be flushed before the caller's L2 table updates.
- */
+ free_in_cluster = s->cluster_size - offset_into_cluster(s, offset);
+ do {
+ if (!offset || free_in_cluster < size) {
+ int64_t new_cluster = alloc_clusters_noref(bs, s->cluster_size);
+ if (new_cluster < 0) {
+ return new_cluster;
+ }
+
+ if (!offset || ROUND_UP(offset, s->cluster_size) != new_cluster) {
+ offset = new_cluster;
+ free_in_cluster = s->cluster_size;
+ } else {
+ free_in_cluster += s->cluster_size;
+ }
+ }
+
+ assert(offset);
+ ret = update_refcount(bs, offset, size, 1, false, QCOW2_DISCARD_NEVER);
+ if (ret < 0) {
+ offset = 0;
+ }
+ } while (ret == -EAGAIN);
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* The cluster refcount was incremented; refcount blocks must be flushed
+ * before the caller's L2 table updates. */
qcow2_cache_set_dependency(bs, s->l2_table_cache, s->refcount_block_cache);
+
+ s->free_byte_offset = offset + size;
+ if (!offset_into_cluster(s, s->free_byte_offset)) {
+ s->free_byte_offset = 0;
+ }
+
return offset;
}
@@ -817,7 +984,7 @@
int ret;
BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_FREE);
- ret = update_refcount(bs, offset, size, -1, type);
+ ret = update_refcount(bs, offset, size, 1, true, type);
if (ret < 0) {
fprintf(stderr, "qcow2_free_clusters failed: %s\n", strerror(-ret));
/* TODO Remember the clusters to free them later and avoid leaking */
@@ -831,7 +998,7 @@
void qcow2_free_any_clusters(BlockDriverState *bs, uint64_t l2_entry,
int nb_clusters, enum qcow2_discard_type type)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
switch (qcow2_get_cluster_type(l2_entry)) {
case QCOW2_CLUSTER_COMPRESSED:
@@ -875,13 +1042,15 @@
int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int64_t l1_table_offset, int l1_size, int addend)
{
- BDRVQcowState *s = bs->opaque;
- uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2;
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t *l1_table, *l2_table, l2_offset, offset, l1_size2, refcount;
bool l1_allocated = false;
int64_t old_offset, old_l2_offset;
- int i, j, l1_modified = 0, nb_csectors, refcount;
+ int i, j, l1_modified = 0, nb_csectors;
int ret;
+ assert(addend >= -1 && addend <= 1);
+
l2_table = NULL;
l1_table = NULL;
l1_size2 = l1_size * sizeof(uint64_t);
@@ -946,7 +1115,7 @@
if (addend != 0) {
ret = update_refcount(bs,
(offset & s->cluster_offset_mask) & ~511,
- nb_csectors * 512, addend,
+ nb_csectors * 512, abs(addend), addend < 0,
QCOW2_DISCARD_SNAPSHOT);
if (ret < 0) {
goto fail;
@@ -976,15 +1145,16 @@
break;
}
if (addend != 0) {
- refcount = qcow2_update_cluster_refcount(bs,
- cluster_index, addend,
+ ret = qcow2_update_cluster_refcount(bs,
+ cluster_index, abs(addend), addend < 0,
QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = qcow2_get_refcount(bs, cluster_index);
+ if (ret < 0) {
+ goto fail;
+ }
}
- if (refcount < 0) {
- ret = refcount;
+ ret = qcow2_get_refcount(bs, cluster_index, &refcount);
+ if (ret < 0) {
goto fail;
}
break;
@@ -1006,24 +1176,25 @@
s->refcount_block_cache);
}
l2_table[j] = cpu_to_be64(offset);
- qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+ qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache,
+ l2_table);
}
}
- ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
- if (ret < 0) {
- goto fail;
- }
-
+ qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
if (addend != 0) {
- refcount = qcow2_update_cluster_refcount(bs, l2_offset >>
- s->cluster_bits, addend, QCOW2_DISCARD_SNAPSHOT);
- } else {
- refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
+ ret = qcow2_update_cluster_refcount(bs, l2_offset >>
+ s->cluster_bits,
+ abs(addend), addend < 0,
+ QCOW2_DISCARD_SNAPSHOT);
+ if (ret < 0) {
+ goto fail;
+ }
}
- if (refcount < 0) {
- ret = refcount;
+ ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
+ &refcount);
+ if (ret < 0) {
goto fail;
} else if (refcount == 1) {
l2_offset |= QCOW_OFLAG_COPIED;
@@ -1050,7 +1221,8 @@
cpu_to_be64s(&l1_table[i]);
}
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
+ l1_table, l1_size2);
for (i = 0; i < l1_size; i++) {
be64_to_cpus(&l1_table[i]);
@@ -1068,22 +1240,84 @@
/* refcount checking functions */
+static uint64_t refcount_array_byte_size(BDRVQcow2State *s, uint64_t entries)
+{
+ /* This assertion holds because there is no way we can address more than
+ * 2^(64 - 9) clusters at once (with cluster size 512 = 2^9, and because
+ * offsets have to be representable in bytes); due to every cluster
+ * corresponding to one refcount entry, we are well below that limit */
+ assert(entries < (UINT64_C(1) << (64 - 9)));
+
+ /* Thanks to the assertion this will not overflow, because
+ * s->refcount_order < 7.
+ * (note: x << s->refcount_order == x * s->refcount_bits) */
+ return DIV_ROUND_UP(entries << s->refcount_order, 8);
+}
+
+/**
+ * Reallocates *array so that it can hold new_size entries. *size must contain
+ * the current number of entries in *array. If the reallocation fails, *array
+ * and *size will not be modified and -errno will be returned. If the
+ * reallocation is successful, *array will be set to the new buffer, *size
+ * will be set to new_size and 0 will be returned. The size of the reallocated
+ * refcount array buffer will be aligned to a cluster boundary, and the newly
+ * allocated area will be zeroed.
+ */
+static int realloc_refcount_array(BDRVQcow2State *s, void **array,
+ int64_t *size, int64_t new_size)
+{
+ int64_t old_byte_size, new_byte_size;
+ void *new_ptr;
+
+ /* Round to clusters so the array can be directly written to disk */
+ old_byte_size = size_to_clusters(s, refcount_array_byte_size(s, *size))
+ * s->cluster_size;
+ new_byte_size = size_to_clusters(s, refcount_array_byte_size(s, new_size))
+ * s->cluster_size;
+
+ if (new_byte_size == old_byte_size) {
+ *size = new_size;
+ return 0;
+ }
+
+ assert(new_byte_size > 0);
+
+ if (new_byte_size > SIZE_MAX) {
+ return -ENOMEM;
+ }
+
+ new_ptr = g_try_realloc(*array, new_byte_size);
+ if (!new_ptr) {
+ return -ENOMEM;
+ }
+
+ if (new_byte_size > old_byte_size) {
+ memset((char *)new_ptr + old_byte_size, 0,
+ new_byte_size - old_byte_size);
+ }
+
+ *array = new_ptr;
+ *size = new_size;
+
+ return 0;
+}
/*
* Increases the refcount for a range of clusters in a given refcount table.
* This is used to construct a temporary refcount table out of L1 and L2 tables
- * which can be compared the the refcount table saved in the image.
+ * which can be compared to the refcount table saved in the image.
*
* Modifies the number of errors in res.
*/
static int inc_refcounts(BlockDriverState *bs,
BdrvCheckResult *res,
- uint16_t **refcount_table,
+ void **refcount_table,
int64_t *refcount_table_size,
int64_t offset, int64_t size)
{
- BDRVQcowState *s = bs->opaque;
- uint64_t start, last, cluster_offset, k;
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t start, last, cluster_offset, k, refcount;
+ int ret;
if (size <= 0) {
return 0;
@@ -1095,30 +1329,25 @@
cluster_offset += s->cluster_size) {
k = cluster_offset >> s->cluster_bits;
if (k >= *refcount_table_size) {
- int64_t old_refcount_table_size = *refcount_table_size;
- uint16_t *new_refcount_table;
-
- *refcount_table_size = k + 1;
- new_refcount_table = g_try_realloc(*refcount_table,
- *refcount_table_size *
- sizeof(**refcount_table));
- if (!new_refcount_table) {
- *refcount_table_size = old_refcount_table_size;
+ ret = realloc_refcount_array(s, refcount_table,
+ refcount_table_size, k + 1);
+ if (ret < 0) {
res->check_errors++;
- return -ENOMEM;
+ return ret;
}
- *refcount_table = new_refcount_table;
-
- memset(*refcount_table + old_refcount_table_size, 0,
- (*refcount_table_size - old_refcount_table_size) *
- sizeof(**refcount_table));
}
- if (++(*refcount_table)[k] == 0) {
+ refcount = s->get_refcount(*refcount_table, k);
+ if (refcount == s->refcount_max) {
fprintf(stderr, "ERROR: overflow cluster offset=0x%" PRIx64
"\n", cluster_offset);
+ fprintf(stderr, "Use qemu-img amend to increase the refcount entry "
+ "width or qemu-img convert to create a clean copy if the "
+ "image cannot be opened for writing\n");
res->corruptions++;
+ continue;
}
+ s->set_refcount(*refcount_table, k, refcount + 1);
}
return 0;
@@ -1138,10 +1367,11 @@
* error occurred.
*/
static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
- uint16_t **refcount_table, int64_t *refcount_table_size, int64_t l2_offset,
- int flags)
+ void **refcount_table,
+ int64_t *refcount_table_size, int64_t l2_offset,
+ int flags)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table, l2_entry;
uint64_t next_contiguous_offset = 0;
int i, l2_size, nb_csectors, ret;
@@ -1256,12 +1486,12 @@
*/
static int check_refcounts_l1(BlockDriverState *bs,
BdrvCheckResult *res,
- uint16_t **refcount_table,
+ void **refcount_table,
int64_t *refcount_table_size,
int64_t l1_table_offset, int l1_size,
int flags)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l1_table = NULL, l2_offset, l1_size2;
int i, ret;
@@ -1338,10 +1568,10 @@
static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
int ret;
- int refcount;
+ uint64_t refcount;
int i, j;
for (i = 0; i < s->l1_size; i++) {
@@ -1353,14 +1583,15 @@
continue;
}
- refcount = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits);
- if (refcount < 0) {
+ ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
+ &refcount);
+ if (ret < 0) {
/* don't print message nor increment check_errors */
continue;
}
if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_index=%d "
- "l1_entry=%" PRIx64 " refcount=%d\n",
+ "l1_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
fix & BDRV_FIX_ERRORS ? "Repairing" :
"ERROR",
i, l1_entry, refcount);
@@ -1395,15 +1626,16 @@
if ((cluster_type == QCOW2_CLUSTER_NORMAL) ||
((cluster_type == QCOW2_CLUSTER_ZERO) && (data_offset != 0))) {
- refcount = qcow2_get_refcount(bs,
- data_offset >> s->cluster_bits);
- if (refcount < 0) {
+ ret = qcow2_get_refcount(bs,
+ data_offset >> s->cluster_bits,
+ &refcount);
+ if (ret < 0) {
/* don't print message nor increment check_errors */
continue;
}
if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) {
fprintf(stderr, "%s OFLAG_COPIED data cluster: "
- "l2_entry=%" PRIx64 " refcount=%d\n",
+ "l2_entry=%" PRIx64 " refcount=%" PRIu64 "\n",
fix & BDRV_FIX_ERRORS ? "Repairing" :
"ERROR",
l2_entry, refcount);
@@ -1430,7 +1662,8 @@
goto fail;
}
- ret = bdrv_pwrite(bs->file, l2_offset, l2_table, s->cluster_size);
+ ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
+ s->cluster_size);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
strerror(-ret));
@@ -1453,9 +1686,9 @@
*/
static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
- uint16_t **refcount_table, int64_t *nb_clusters)
+ void **refcount_table, int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i, size;
int ret;
@@ -1478,40 +1711,32 @@
fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i);
if (fix & BDRV_FIX_ERRORS) {
- int64_t old_nb_clusters = *nb_clusters;
- uint16_t *new_refcount_table;
+ int64_t new_nb_clusters;
if (offset > INT64_MAX - s->cluster_size) {
ret = -EINVAL;
goto resize_fail;
}
- ret = bdrv_truncate(bs->file, offset + s->cluster_size);
+ ret = bdrv_truncate(bs->file->bs, offset + s->cluster_size);
if (ret < 0) {
goto resize_fail;
}
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
ret = size;
goto resize_fail;
}
- *nb_clusters = size_to_clusters(s, size);
- assert(*nb_clusters >= old_nb_clusters);
+ new_nb_clusters = size_to_clusters(s, size);
+ assert(new_nb_clusters >= *nb_clusters);
- new_refcount_table = g_try_realloc(*refcount_table,
- *nb_clusters *
- sizeof(**refcount_table));
- if (!new_refcount_table) {
- *nb_clusters = old_nb_clusters;
+ ret = realloc_refcount_array(s, refcount_table,
+ nb_clusters, new_nb_clusters);
+ if (ret < 0) {
res->check_errors++;
- return -ENOMEM;
+ return ret;
}
- *refcount_table = new_refcount_table;
-
- memset(*refcount_table + old_nb_clusters, 0,
- (*nb_clusters - old_nb_clusters) *
- sizeof(**refcount_table));
if (cluster >= *nb_clusters) {
ret = -EINVAL;
@@ -1546,9 +1771,10 @@
if (ret < 0) {
return ret;
}
- if ((*refcount_table)[cluster] != 1) {
+ if (s->get_refcount(*refcount_table, cluster) != 1) {
fprintf(stderr, "ERROR refcount block %" PRId64
- " refcount=%d\n", i, (*refcount_table)[cluster]);
+ " refcount=%" PRIu64 "\n", i,
+ s->get_refcount(*refcount_table, cluster));
res->corruptions++;
*rebuild = true;
}
@@ -1563,18 +1789,20 @@
*/
static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
- uint16_t **refcount_table, int64_t *nb_clusters)
+ void **refcount_table, int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i;
QCowSnapshot *sn;
int ret;
if (!*refcount_table) {
- *refcount_table = g_try_new0(uint16_t, *nb_clusters);
- if (*nb_clusters && *refcount_table == NULL) {
+ int64_t old_size = 0;
+ ret = realloc_refcount_array(s, refcount_table,
+ &old_size, *nb_clusters);
+ if (ret < 0) {
res->check_errors++;
- return -ENOMEM;
+ return ret;
}
}
@@ -1625,22 +1853,23 @@
static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix, bool *rebuild,
int64_t *highest_cluster,
- uint16_t *refcount_table, int64_t nb_clusters)
+ void *refcount_table, int64_t nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t i;
- int refcount1, refcount2, ret;
+ uint64_t refcount1, refcount2;
+ int ret;
for (i = 0, *highest_cluster = 0; i < nb_clusters; i++) {
- refcount1 = qcow2_get_refcount(bs, i);
- if (refcount1 < 0) {
+ ret = qcow2_get_refcount(bs, i, &refcount1);
+ if (ret < 0) {
fprintf(stderr, "Can't get refcount for cluster %" PRId64 ": %s\n",
- i, strerror(-refcount1));
+ i, strerror(-ret));
res->check_errors++;
continue;
}
- refcount2 = refcount_table[i];
+ refcount2 = s->get_refcount(refcount_table, i);
if (refcount1 > 0 || refcount2 > 0) {
*highest_cluster = i;
@@ -1657,7 +1886,8 @@
num_fixed = &res->corruptions_fixed;
}
- fprintf(stderr, "%s cluster %" PRId64 " refcount=%d reference=%d\n",
+ fprintf(stderr, "%s cluster %" PRId64 " refcount=%" PRIu64
+ " reference=%" PRIu64 "\n",
num_fixed != NULL ? "Repairing" :
refcount1 < refcount2 ? "ERROR" :
"Leaked",
@@ -1665,7 +1895,8 @@
if (num_fixed) {
ret = update_refcount(bs, i << s->cluster_bits, 1,
- refcount2 - refcount1,
+ refcount_diff(refcount1, refcount2),
+ refcount1 > refcount2,
QCOW2_DISCARD_ALWAYS);
if (ret >= 0) {
(*num_fixed)++;
@@ -1697,14 +1928,15 @@
*/
static int64_t alloc_clusters_imrt(BlockDriverState *bs,
int cluster_count,
- uint16_t **refcount_table,
+ void **refcount_table,
int64_t *imrt_nb_clusters,
int64_t *first_free_cluster)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t cluster = *first_free_cluster, i;
bool first_gap = true;
int contiguous_free_clusters;
+ int ret;
/* Starting at *first_free_cluster, find a range of at least cluster_count
* continuously free clusters */
@@ -1713,7 +1945,7 @@
contiguous_free_clusters < cluster_count;
cluster++)
{
- if (!(*refcount_table)[cluster]) {
+ if (!s->get_refcount(*refcount_table, cluster)) {
contiguous_free_clusters++;
if (first_gap) {
/* If this is the first free cluster found, update
@@ -1734,34 +1966,24 @@
/* If no such range could be found, grow the in-memory refcount table
* accordingly to append free clusters at the end of the image */
if (contiguous_free_clusters < cluster_count) {
- int64_t old_imrt_nb_clusters = *imrt_nb_clusters;
- uint16_t *new_refcount_table;
-
/* contiguous_free_clusters clusters are already empty at the image end;
* we need cluster_count clusters; therefore, we have to allocate
* cluster_count - contiguous_free_clusters new clusters at the end of
* the image (which is the current value of cluster; note that cluster
* may exceed old_imrt_nb_clusters if *first_free_cluster pointed beyond
* the image end) */
- *imrt_nb_clusters = cluster + cluster_count - contiguous_free_clusters;
- new_refcount_table = g_try_realloc(*refcount_table,
- *imrt_nb_clusters *
- sizeof(**refcount_table));
- if (!new_refcount_table) {
- *imrt_nb_clusters = old_imrt_nb_clusters;
- return -ENOMEM;
+ ret = realloc_refcount_array(s, refcount_table, imrt_nb_clusters,
+ cluster + cluster_count
+ - contiguous_free_clusters);
+ if (ret < 0) {
+ return ret;
}
- *refcount_table = new_refcount_table;
-
- memset(*refcount_table + old_imrt_nb_clusters, 0,
- (*imrt_nb_clusters - old_imrt_nb_clusters) *
- sizeof(**refcount_table));
}
/* Go back to the first free cluster */
cluster -= contiguous_free_clusters;
for (i = 0; i < cluster_count; i++) {
- (*refcount_table)[cluster + i] = 1;
+ s->set_refcount(*refcount_table, cluster + i, 1);
}
return cluster << s->cluster_bits;
@@ -1777,16 +1999,16 @@
*/
static int rebuild_refcount_structure(BlockDriverState *bs,
BdrvCheckResult *res,
- uint16_t **refcount_table,
+ void **refcount_table,
int64_t *nb_clusters)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
int64_t refblock_offset, refblock_start, refblock_index;
uint32_t reftable_size = 0;
uint64_t *on_disk_reftable = NULL;
- uint16_t *on_disk_refblock;
- int i, ret = 0;
+ void *on_disk_refblock;
+ int ret = 0;
struct {
uint64_t reftable_offset;
uint32_t reftable_clusters;
@@ -1796,7 +2018,7 @@
write_refblocks:
for (; cluster < *nb_clusters; cluster++) {
- if (!(*refcount_table)[cluster]) {
+ if (!s->get_refcount(*refcount_table, cluster)) {
continue;
}
@@ -1869,17 +2091,13 @@
goto fail;
}
- on_disk_refblock = qemu_blockalign0(bs->file, s->cluster_size);
- for (i = 0; i < s->refcount_block_size &&
- refblock_start + i < *nb_clusters; i++)
- {
- on_disk_refblock[i] =
- cpu_to_be16((*refcount_table)[refblock_start + i]);
- }
+ /* The size of *refcount_table is always cluster-aligned, therefore the
+ * write operation will not overflow */
+ on_disk_refblock = (void *)((char *) *refcount_table +
+ refblock_index * s->cluster_size);
ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
- (void *)on_disk_refblock, s->cluster_sectors);
- qemu_vfree(on_disk_refblock);
+ on_disk_refblock, s->cluster_sectors);
if (ret < 0) {
fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
goto fail;
@@ -1935,12 +2153,11 @@
}
/* Enter new reftable into the image header */
- cpu_to_be64w(&reftable_offset_and_clusters.reftable_offset,
- reftable_offset);
- cpu_to_be32w(&reftable_offset_and_clusters.reftable_clusters,
- size_to_clusters(s, reftable_size * sizeof(uint64_t)));
- ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader,
- refcount_table_offset),
+ reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
+ reftable_offset_and_clusters.reftable_clusters =
+ cpu_to_be32(size_to_clusters(s, reftable_size * sizeof(uint64_t)));
+ ret = bdrv_pwrite_sync(bs->file,
+ offsetof(QCowHeader, refcount_table_offset),
&reftable_offset_and_clusters,
sizeof(reftable_offset_and_clusters));
if (ret < 0) {
@@ -1971,14 +2188,14 @@
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
BdrvCheckResult pre_compare_res;
int64_t size, highest_cluster, nb_clusters;
- uint16_t *refcount_table = NULL;
+ void *refcount_table = NULL;
bool rebuild = false;
int ret;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
@@ -2023,7 +2240,7 @@
/* Because the old reftable has been exchanged for a new one the
* references have to be recalculated */
rebuild = false;
- memset(refcount_table, 0, nb_clusters * sizeof(uint16_t));
+ memset(refcount_table, 0, refcount_array_byte_size(s, nb_clusters));
ret = calculate_refcounts(bs, res, 0, &rebuild, &refcount_table,
&nb_clusters);
if (ret < 0) {
@@ -2108,7 +2325,7 @@
int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
int64_t size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int chk = s->overlap_check & ~ign;
int i, j;
@@ -2237,7 +2454,7 @@
if (ret < 0) {
return ret;
} else if (ret > 0) {
- int metadata_ol_bitnr = ffs(ret) - 1;
+ int metadata_ol_bitnr = ctz32(ret);
assert(metadata_ol_bitnr < QCOW2_OL_MAX_BITNR);
qcow2_signal_corruption(bs, true, offset, size, "Preventing invalid "
@@ -2248,3 +2465,450 @@
return 0;
}
+
+/* A pointer to a function of this type is given to walk_over_reftable(). That
+ * function will create refblocks and pass them to a RefblockFinishOp once they
+ * are completed (@refblock). @refblock_empty is set if the refblock is
+ * completely empty.
+ *
+ * Along with the refblock, a corresponding reftable entry is passed, in the
+ * reftable @reftable (which may be reallocated) at @reftable_index.
+ *
+ * @allocated should be set to true if a new cluster has been allocated.
+ */
+typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty,
+ bool *allocated, Error **errp);
+
+/**
+ * This "operation" for walk_over_reftable() allocates the refblock on disk (if
+ * it is not empty) and inserts its offset into the new reftable. The size of
+ * this new reftable is increased as required.
+ */
+static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+
+ if (!refblock_empty && reftable_index >= *reftable_size) {
+ uint64_t *new_reftable;
+ uint64_t new_reftable_size;
+
+ new_reftable_size = ROUND_UP(reftable_index + 1,
+ s->cluster_size / sizeof(uint64_t));
+ if (new_reftable_size > QCOW_MAX_REFTABLE_SIZE / sizeof(uint64_t)) {
+ error_setg(errp,
+ "This operation would make the refcount table grow "
+ "beyond the maximum size supported by QEMU, aborting");
+ return -ENOTSUP;
+ }
+
+ new_reftable = g_try_realloc(*reftable, new_reftable_size *
+ sizeof(uint64_t));
+ if (!new_reftable) {
+ error_setg(errp, "Failed to increase reftable buffer size");
+ return -ENOMEM;
+ }
+
+ memset(new_reftable + *reftable_size, 0,
+ (new_reftable_size - *reftable_size) * sizeof(uint64_t));
+
+ *reftable = new_reftable;
+ *reftable_size = new_reftable_size;
+ }
+
+ if (!refblock_empty && !(*reftable)[reftable_index]) {
+ offset = qcow2_alloc_clusters(bs, s->cluster_size);
+ if (offset < 0) {
+ error_setg_errno(errp, -offset, "Failed to allocate refblock");
+ return offset;
+ }
+ (*reftable)[reftable_index] = offset;
+ *allocated = true;
+ }
+
+ return 0;
+}
+
+/**
+ * This "operation" for walk_over_reftable() writes the refblock to disk at the
+ * offset specified by the new reftable's entry. It does not modify the new
+ * reftable or change any refcounts.
+ */
+static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t offset;
+ int ret;
+
+ if (reftable_index < *reftable_size && (*reftable)[reftable_index]) {
+ offset = (*reftable)[reftable_index];
+
+ ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Overlap check failed");
+ return ret;
+ }
+
+ ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to write refblock");
+ return ret;
+ }
+ } else {
+ assert(refblock_empty);
+ }
+
+ return 0;
+}
+
+/**
+ * This function walks over the existing reftable and every referenced refblock;
+ * if @new_set_refcount is non-NULL, it is called for every refcount entry to
+ * create an equal new entry in the passed @new_refblock. Once that
+ * @new_refblock is completely filled, @operation will be called.
+ *
+ * @status_cb and @cb_opaque are used for the amend operation's status callback.
+ * @index is the index of the walk_over_reftable() calls and @total is the total
+ * number of walk_over_reftable() calls per amend operation. Both are used for
+ * calculating the parameters for the status callback.
+ *
+ * @allocated is set to true if a new cluster has been allocated.
+ */
+static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
+ uint64_t *new_reftable_index,
+ uint64_t *new_reftable_size,
+ void *new_refblock, int new_refblock_size,
+ int new_refcount_bits,
+ RefblockFinishOp *operation, bool *allocated,
+ Qcow2SetRefcountFunc *new_set_refcount,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, int index, int total,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ uint64_t reftable_index;
+ bool new_refblock_empty = true;
+ int refblock_index;
+ int new_refblock_index = 0;
+ int ret;
+
+ for (reftable_index = 0; reftable_index < s->refcount_table_size;
+ reftable_index++)
+ {
+ uint64_t refblock_offset = s->refcount_table[reftable_index]
+ & REFT_OFFSET_MASK;
+
+ status_cb(bs, (uint64_t)index * s->refcount_table_size + reftable_index,
+ (uint64_t)total * s->refcount_table_size, cb_opaque);
+
+ if (refblock_offset) {
+ void *refblock;
+
+ if (offset_into_cluster(s, refblock_offset)) {
+ qcow2_signal_corruption(bs, true, -1, -1, "Refblock offset %#"
+ PRIx64 " unaligned (reftable index: %#"
+ PRIx64 ")", refblock_offset,
+ reftable_index);
+ error_setg(errp,
+ "Image is corrupt (unaligned refblock offset)");
+ return -EIO;
+ }
+
+ ret = qcow2_cache_get(bs, s->refcount_block_cache, refblock_offset,
+ &refblock);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to retrieve refblock");
+ return ret;
+ }
+
+ for (refblock_index = 0; refblock_index < s->refcount_block_size;
+ refblock_index++)
+ {
+ uint64_t refcount;
+
+ if (new_refblock_index >= new_refblock_size) {
+ /* new_refblock is now complete */
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock,
+ new_refblock_empty, allocated, errp);
+ if (ret < 0) {
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ new_refblock_index = 0;
+ new_refblock_empty = true;
+ }
+
+ refcount = s->get_refcount(refblock, refblock_index);
+ if (new_refcount_bits < 64 && refcount >> new_refcount_bits) {
+ uint64_t offset;
+
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+
+ offset = ((reftable_index << s->refcount_block_bits)
+ + refblock_index) << s->cluster_bits;
+
+ error_setg(errp, "Cannot decrease refcount entry width to "
+ "%i bits: Cluster at offset %#" PRIx64 " has a "
+ "refcount of %" PRIu64, new_refcount_bits,
+ offset, refcount);
+ return -EINVAL;
+ }
+
+ if (new_set_refcount) {
+ new_set_refcount(new_refblock, new_refblock_index++,
+ refcount);
+ } else {
+ new_refblock_index++;
+ }
+ new_refblock_empty = new_refblock_empty && refcount == 0;
+ }
+
+ qcow2_cache_put(bs, s->refcount_block_cache, &refblock);
+ } else {
+ /* No refblock means every refcount is 0 */
+ for (refblock_index = 0; refblock_index < s->refcount_block_size;
+ refblock_index++)
+ {
+ if (new_refblock_index >= new_refblock_size) {
+ /* new_refblock is now complete */
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock,
+ new_refblock_empty, allocated, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ new_refblock_index = 0;
+ new_refblock_empty = true;
+ }
+
+ if (new_set_refcount) {
+ new_set_refcount(new_refblock, new_refblock_index++, 0);
+ } else {
+ new_refblock_index++;
+ }
+ }
+ }
+ }
+
+ if (new_refblock_index > 0) {
+ /* Complete the potentially existing partially filled final refblock */
+ if (new_set_refcount) {
+ for (; new_refblock_index < new_refblock_size;
+ new_refblock_index++)
+ {
+ new_set_refcount(new_refblock, new_refblock_index, 0);
+ }
+ }
+
+ ret = operation(bs, new_reftable, *new_reftable_index,
+ new_reftable_size, new_refblock, new_refblock_empty,
+ allocated, errp);
+ if (ret < 0) {
+ return ret;
+ }
+
+ (*new_reftable_index)++;
+ }
+
+ status_cb(bs, (uint64_t)(index + 1) * s->refcount_table_size,
+ (uint64_t)total * s->refcount_table_size, cb_opaque);
+
+ return 0;
+}
+
+int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ Qcow2GetRefcountFunc *new_get_refcount;
+ Qcow2SetRefcountFunc *new_set_refcount;
+ void *new_refblock = qemu_blockalign(bs->file->bs, s->cluster_size);
+ uint64_t *new_reftable = NULL, new_reftable_size = 0;
+ uint64_t *old_reftable, old_reftable_size, old_reftable_offset;
+ uint64_t new_reftable_index = 0;
+ uint64_t i;
+ int64_t new_reftable_offset = 0, allocated_reftable_size = 0;
+ int new_refblock_size, new_refcount_bits = 1 << refcount_order;
+ int old_refcount_order;
+ int walk_index = 0;
+ int ret;
+ bool new_allocation;
+
+ assert(s->qcow_version >= 3);
+ assert(refcount_order >= 0 && refcount_order <= 6);
+
+ /* see qcow2_open() */
+ new_refblock_size = 1 << (s->cluster_bits - (refcount_order - 3));
+
+ new_get_refcount = get_refcount_funcs[refcount_order];
+ new_set_refcount = set_refcount_funcs[refcount_order];
+
+
+ do {
+ int total_walks;
+
+ new_allocation = false;
+
+ /* At least we have to do this walk and the one which writes the
+ * refblocks; also, at least we have to do this loop here at least
+ * twice (normally), first to do the allocations, and second to
+ * determine that everything is correctly allocated, this then makes
+ * three walks in total */
+ total_walks = MAX(walk_index + 2, 3);
+
+ /* First, allocate the structures so they are present in the refcount
+ * structures */
+ ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
+ &new_reftable_size, NULL, new_refblock_size,
+ new_refcount_bits, &alloc_refblock,
+ &new_allocation, NULL, status_cb, cb_opaque,
+ walk_index++, total_walks, errp);
+ if (ret < 0) {
+ goto done;
+ }
+
+ new_reftable_index = 0;
+
+ if (new_allocation) {
+ if (new_reftable_offset) {
+ qcow2_free_clusters(bs, new_reftable_offset,
+ allocated_reftable_size * sizeof(uint64_t),
+ QCOW2_DISCARD_NEVER);
+ }
+
+ new_reftable_offset = qcow2_alloc_clusters(bs, new_reftable_size *
+ sizeof(uint64_t));
+ if (new_reftable_offset < 0) {
+ error_setg_errno(errp, -new_reftable_offset,
+ "Failed to allocate the new reftable");
+ ret = new_reftable_offset;
+ goto done;
+ }
+ allocated_reftable_size = new_reftable_size;
+ }
+ } while (new_allocation);
+
+ /* Second, write the new refblocks */
+ ret = walk_over_reftable(bs, &new_reftable, &new_reftable_index,
+ &new_reftable_size, new_refblock,
+ new_refblock_size, new_refcount_bits,
+ &flush_refblock, &new_allocation, new_set_refcount,
+ status_cb, cb_opaque, walk_index, walk_index + 1,
+ errp);
+ if (ret < 0) {
+ goto done;
+ }
+ assert(!new_allocation);
+
+
+ /* Write the new reftable */
+ ret = qcow2_pre_write_overlap_check(bs, 0, new_reftable_offset,
+ new_reftable_size * sizeof(uint64_t));
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Overlap check failed");
+ goto done;
+ }
+
+ for (i = 0; i < new_reftable_size; i++) {
+ cpu_to_be64s(&new_reftable[i]);
+ }
+
+ ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
+ new_reftable_size * sizeof(uint64_t));
+
+ for (i = 0; i < new_reftable_size; i++) {
+ be64_to_cpus(&new_reftable[i]);
+ }
+
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to write the new reftable");
+ goto done;
+ }
+
+
+ /* Empty the refcount cache */
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to flush the refblock cache");
+ goto done;
+ }
+
+ /* Update the image header to point to the new reftable; this only updates
+ * the fields which are relevant to qcow2_update_header(); other fields
+ * such as s->refcount_table or s->refcount_bits stay stale for now
+ * (because we have to restore everything if qcow2_update_header() fails) */
+ old_refcount_order = s->refcount_order;
+ old_reftable_size = s->refcount_table_size;
+ old_reftable_offset = s->refcount_table_offset;
+
+ s->refcount_order = refcount_order;
+ s->refcount_table_size = new_reftable_size;
+ s->refcount_table_offset = new_reftable_offset;
+
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->refcount_order = old_refcount_order;
+ s->refcount_table_size = old_reftable_size;
+ s->refcount_table_offset = old_reftable_offset;
+ error_setg_errno(errp, -ret, "Failed to update the qcow2 header");
+ goto done;
+ }
+
+ /* Now update the rest of the in-memory information */
+ old_reftable = s->refcount_table;
+ s->refcount_table = new_reftable;
+
+ s->refcount_bits = 1 << refcount_order;
+ s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
+ s->refcount_max += s->refcount_max - 1;
+
+ s->refcount_block_bits = s->cluster_bits - (refcount_order - 3);
+ s->refcount_block_size = 1 << s->refcount_block_bits;
+
+ s->get_refcount = new_get_refcount;
+ s->set_refcount = new_set_refcount;
+
+ /* For cleaning up all old refblocks and the old reftable below the "done"
+ * label */
+ new_reftable = old_reftable;
+ new_reftable_size = old_reftable_size;
+ new_reftable_offset = old_reftable_offset;
+
+done:
+ if (new_reftable) {
+ /* On success, new_reftable actually points to the old reftable (and
+ * new_reftable_size is the old reftable's size); but that is just
+ * fine */
+ for (i = 0; i < new_reftable_size; i++) {
+ uint64_t offset = new_reftable[i] & REFT_OFFSET_MASK;
+ if (offset) {
+ qcow2_free_clusters(bs, offset, s->cluster_size,
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+ g_free(new_reftable);
+
+ if (new_reftable_offset > 0) {
+ qcow2_free_clusters(bs, new_reftable_offset,
+ new_reftable_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
+ }
+ }
+
+ qemu_vfree(new_refblock);
+ return ret;
+}
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 5b3903c..0324243 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -22,13 +22,17 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "block/qcow2.h"
+#include "qemu/bswap.h"
+#include "qemu/error-report.h"
+#include "qemu/cutils.h"
void qcow2_free_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
for(i = 0; i < s->nb_snapshots; i++) {
@@ -42,7 +46,7 @@
int qcow2_read_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
QCowSnapshot *sn;
@@ -135,7 +139,7 @@
/* add at the end of the file a new list of snapshots */
static int qcow2_write_snapshots(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
QCowSnapshotHeader h;
QCowSnapshotExtraData extra;
@@ -277,7 +281,7 @@
static void find_new_snapshot_id(BlockDriverState *bs,
char *id_str, int id_str_size)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
int i;
unsigned long id, id_max = 0;
@@ -295,7 +299,7 @@
const char *id,
const char *name)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int i;
if (id && name) {
@@ -337,7 +341,7 @@
/* if no id is provided, a new one is constructed */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *new_snapshot_list = NULL;
QCowSnapshot *old_snapshot_list = NULL;
QCowSnapshot sn1, *sn = &sn1;
@@ -351,10 +355,8 @@
memset(sn, 0, sizeof(*sn));
- /* Generate an ID if it wasn't passed */
- if (sn_info->id_str[0] == '\0') {
- find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
- }
+ /* Generate an ID */
+ find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
/* Check that the ID is unique */
if (find_snapshot_by_id_and_name(bs, sn_info->id_str, NULL) >= 0) {
@@ -462,7 +464,7 @@
/* copy the snapshot 'snapshot_name' into the current disk image */
int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
int i, snapshot_index;
int cur_l1_bytes, sn_l1_bytes;
@@ -510,7 +512,8 @@
goto fail;
}
- ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
+ ret = bdrv_pread(bs->file, sn->l1_table_offset,
+ sn_l1_table, sn_l1_bytes);
if (ret < 0) {
goto fail;
}
@@ -588,7 +591,7 @@
const char *name,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot sn;
int snapshot_index, ret;
@@ -651,7 +654,7 @@
int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QEMUSnapshotInfo *sn_tab, *sn_info;
QCowSnapshot *sn;
int i;
@@ -684,7 +687,7 @@
Error **errp)
{
int i, snapshot_index;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowSnapshot *sn;
uint64_t *new_l1_table;
int new_l1_bytes;
@@ -702,18 +705,19 @@
sn = &s->snapshots[snapshot_index];
/* Allocate and read in the snapshot's L1 table */
- if (sn->l1_size > QCOW_MAX_L1_SIZE) {
+ if (sn->l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
error_setg(errp, "Snapshot L1 table too large");
return -EFBIG;
}
new_l1_bytes = sn->l1_size * sizeof(uint64_t);
- new_l1_table = qemu_try_blockalign(bs->file,
+ new_l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(new_l1_bytes, 512));
if (new_l1_table == NULL) {
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
+ ret = bdrv_pread(bs->file, sn->l1_table_offset,
+ new_l1_table, new_l1_bytes);
if (ret < 0) {
error_setg(errp, "Failed to read l1 table for snapshot");
qemu_vfree(new_l1_table);
diff --git a/block/qcow2.c b/block/qcow2.c
index d120494..91ef4df 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include <zlib.h>
-#include "qemu/aes.h"
#include "block/qcow2.h"
#include "qemu/error-report.h"
#include "qapi/qmp/qerror.h"
@@ -35,6 +35,8 @@
#include "qapi-event.h"
#include "trace.h"
#include "qemu/option_int.h"
+#include "qemu/cutils.h"
+#include "qemu/bswap.h"
/*
Differences with QCOW:
@@ -86,7 +88,7 @@
uint64_t end_offset, void **p_feature_table,
Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowExtension ext;
uint64_t offset;
int ret;
@@ -117,7 +119,7 @@
#ifdef DEBUG_EXT
printf("ext.magic = 0x%x\n", ext.magic);
#endif
- if (ext.len > end_offset - offset) {
+ if (offset > end_offset || ext.len > end_offset - offset) {
error_setg(errp, "Header extension too large");
return -EINVAL;
}
@@ -140,6 +142,7 @@
return 3;
}
bs->backing_format[ext.len] = '\0';
+ s->image_backing_format = g_strdup(bs->backing_format);
#ifdef DEBUG_EXT
printf("Qcow2: Got format extension %s\n", bs->backing_format);
#endif
@@ -187,7 +190,7 @@
static void cleanup_unknown_header_ext(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
Qcow2UnknownHeaderExtension *uext, *next;
QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
@@ -196,22 +199,8 @@
}
}
-static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
- Error **errp, const char *fmt, ...)
-{
- char msg[64];
- va_list ap;
-
- va_start(ap, fmt);
- vsnprintf(msg, sizeof(msg), fmt, ap);
- va_end(ap);
-
- error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_name(bs), "qcow2", msg);
-}
-
-static void report_unsupported_feature(BlockDriverState *bs,
- Error **errp, Qcow2Feature *table, uint64_t mask)
+static void report_unsupported_feature(Error **errp, Qcow2Feature *table,
+ uint64_t mask)
{
char *features = g_strdup("");
char *old;
@@ -236,7 +225,7 @@
g_free(old);
}
- report_unsupported(bs, errp, "%s", features);
+ error_setg(errp, "Unsupported qcow2 feature(s): %s", features);
g_free(features);
}
@@ -249,7 +238,7 @@
*/
int qcow2_mark_dirty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t val;
int ret;
@@ -265,7 +254,7 @@
if (ret < 0) {
return ret;
}
- ret = bdrv_flush(bs->file);
+ ret = bdrv_flush(bs->file->bs);
if (ret < 0) {
return ret;
}
@@ -282,7 +271,7 @@
*/
static int qcow2_mark_clean(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
int ret;
@@ -304,7 +293,7 @@
*/
int qcow2_mark_corrupt(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
return qcow2_update_header(bs);
@@ -316,7 +305,7 @@
*/
int qcow2_mark_consistent(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
int ret = bdrv_flush(bs);
@@ -351,7 +340,7 @@
static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
uint64_t entries, size_t entry_len)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t size;
/* Use signed INT64_MAX as the maximum even for uint64_t header fields,
@@ -467,6 +456,11 @@
.type = QEMU_OPT_SIZE,
.help = "Maximum refcount block cache size",
},
+ {
+ .name = QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Clean unused cache entries after this time (in seconds)",
+ },
{ /* end of list */ }
},
};
@@ -482,9 +476,54 @@
[QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
};
-static void read_cache_sizes(QemuOpts *opts, uint64_t *l2_cache_size,
+static void cache_clean_timer_cb(void *opaque)
+{
+ BlockDriverState *bs = opaque;
+ BDRVQcow2State *s = bs->opaque;
+ qcow2_cache_clean_unused(bs, s->l2_table_cache);
+ qcow2_cache_clean_unused(bs, s->refcount_block_cache);
+ timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ (int64_t) s->cache_clean_interval * 1000);
+}
+
+static void cache_clean_timer_init(BlockDriverState *bs, AioContext *context)
+{
+ BDRVQcow2State *s = bs->opaque;
+ if (s->cache_clean_interval > 0) {
+ s->cache_clean_timer = aio_timer_new(context, QEMU_CLOCK_VIRTUAL,
+ SCALE_MS, cache_clean_timer_cb,
+ bs);
+ timer_mod(s->cache_clean_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
+ (int64_t) s->cache_clean_interval * 1000);
+ }
+}
+
+static void cache_clean_timer_del(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ if (s->cache_clean_timer) {
+ timer_del(s->cache_clean_timer);
+ timer_free(s->cache_clean_timer);
+ s->cache_clean_timer = NULL;
+ }
+}
+
+static void qcow2_detach_aio_context(BlockDriverState *bs)
+{
+ cache_clean_timer_del(bs);
+}
+
+static void qcow2_attach_aio_context(BlockDriverState *bs,
+ AioContext *new_context)
+{
+ cache_clean_timer_init(bs, new_context);
+}
+
+static void read_cache_sizes(BlockDriverState *bs, QemuOpts *opts,
+ uint64_t *l2_cache_size,
uint64_t *refcount_cache_size, Error **errp)
{
+ BDRVQcow2State *s = bs->opaque;
uint64_t combined_cache_size;
bool l2_cache_size_set, refcount_cache_size_set, combined_cache_size_set;
@@ -524,7 +563,9 @@
}
} else {
if (!l2_cache_size_set && !refcount_cache_size_set) {
- *l2_cache_size = DEFAULT_L2_CACHE_BYTE_SIZE;
+ *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
+ (uint64_t)DEFAULT_L2_CACHE_CLUSTERS
+ * s->cluster_size);
*refcount_cache_size = *l2_cache_size
/ DEFAULT_L2_REFCOUNT_SIZE_RATIO;
} else if (!l2_cache_size_set) {
@@ -537,20 +578,244 @@
}
}
-static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
+typedef struct Qcow2ReopenState {
+ Qcow2Cache *l2_table_cache;
+ Qcow2Cache *refcount_block_cache;
+ bool use_lazy_refcounts;
+ int overlap_check;
+ bool discard_passthrough[QCOW2_DISCARD_MAX];
+ uint64_t cache_clean_interval;
+} Qcow2ReopenState;
+
+static int qcow2_update_options_prepare(BlockDriverState *bs,
+ Qcow2ReopenState *r,
+ QDict *options, int flags,
+ Error **errp)
{
- BDRVQcowState *s = bs->opaque;
- unsigned int len, i;
- int ret = 0;
- QCowHeader header;
+ BDRVQcow2State *s = bs->opaque;
QemuOpts *opts = NULL;
- Error *local_err = NULL;
- uint64_t ext_end;
- uint64_t l1_vm_state_index;
const char *opt_overlap_check, *opt_overlap_check_template;
int overlap_check_template = 0;
uint64_t l2_cache_size, refcount_cache_size;
+ int i;
+ Error *local_err = NULL;
+ int ret;
+
+ opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* get L2 table/refcount block cache size from command line options */
+ read_cache_sizes(bs, opts, &l2_cache_size, &refcount_cache_size,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ l2_cache_size /= s->cluster_size;
+ if (l2_cache_size < MIN_L2_CACHE_SIZE) {
+ l2_cache_size = MIN_L2_CACHE_SIZE;
+ }
+ if (l2_cache_size > INT_MAX) {
+ error_setg(errp, "L2 cache size too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ refcount_cache_size /= s->cluster_size;
+ if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
+ refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
+ }
+ if (refcount_cache_size > INT_MAX) {
+ error_setg(errp, "Refcount cache size too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* alloc new L2 table/refcount block cache, flush old one */
+ if (s->l2_table_cache) {
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret) {
+ error_setg_errno(errp, -ret, "Failed to flush the L2 table cache");
+ goto fail;
+ }
+ }
+
+ if (s->refcount_block_cache) {
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret) {
+ error_setg_errno(errp, -ret,
+ "Failed to flush the refcount block cache");
+ goto fail;
+ }
+ }
+
+ r->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
+ r->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
+ if (r->l2_table_cache == NULL || r->refcount_block_cache == NULL) {
+ error_setg(errp, "Could not allocate metadata caches");
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ /* New interval for cache cleanup timer */
+ r->cache_clean_interval =
+ qemu_opt_get_number(opts, QCOW2_OPT_CACHE_CLEAN_INTERVAL,
+ s->cache_clean_interval);
+ if (r->cache_clean_interval > UINT_MAX) {
+ error_setg(errp, "Cache clean interval too big");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ /* lazy-refcounts; flush if going from enabled to disabled */
+ r->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
+ (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
+ if (r->use_lazy_refcounts && s->qcow_version < 3) {
+ error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
+ "qemu 1.1 compatibility level");
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ if (s->use_lazy_refcounts && !r->use_lazy_refcounts) {
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to disable lazy refcounts");
+ goto fail;
+ }
+ }
+
+ /* Overlap check options */
+ opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
+ opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
+ if (opt_overlap_check_template && opt_overlap_check &&
+ strcmp(opt_overlap_check_template, opt_overlap_check))
+ {
+ error_setg(errp, "Conflicting values for qcow2 options '"
+ QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
+ "' ('%s')", opt_overlap_check, opt_overlap_check_template);
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (!opt_overlap_check) {
+ opt_overlap_check = opt_overlap_check_template ?: "cached";
+ }
+
+ if (!strcmp(opt_overlap_check, "none")) {
+ overlap_check_template = 0;
+ } else if (!strcmp(opt_overlap_check, "constant")) {
+ overlap_check_template = QCOW2_OL_CONSTANT;
+ } else if (!strcmp(opt_overlap_check, "cached")) {
+ overlap_check_template = QCOW2_OL_CACHED;
+ } else if (!strcmp(opt_overlap_check, "all")) {
+ overlap_check_template = QCOW2_OL_ALL;
+ } else {
+ error_setg(errp, "Unsupported value '%s' for qcow2 option "
+ "'overlap-check'. Allowed are any of the following: "
+ "none, constant, cached, all", opt_overlap_check);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ r->overlap_check = 0;
+ for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
+ /* overlap-check defines a template bitmask, but every flag may be
+ * overwritten through the associated boolean option */
+ r->overlap_check |=
+ qemu_opt_get_bool(opts, overlap_bool_option_names[i],
+ overlap_check_template & (1 << i)) << i;
+ }
+
+ r->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
+ r->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
+ r->discard_passthrough[QCOW2_DISCARD_REQUEST] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
+ flags & BDRV_O_UNMAP);
+ r->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
+ r->discard_passthrough[QCOW2_DISCARD_OTHER] =
+ qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
+
+ ret = 0;
+fail:
+ qemu_opts_del(opts);
+ opts = NULL;
+ return ret;
+}
+
+static void qcow2_update_options_commit(BlockDriverState *bs,
+ Qcow2ReopenState *r)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int i;
+
+ if (s->l2_table_cache) {
+ qcow2_cache_destroy(bs, s->l2_table_cache);
+ }
+ if (s->refcount_block_cache) {
+ qcow2_cache_destroy(bs, s->refcount_block_cache);
+ }
+ s->l2_table_cache = r->l2_table_cache;
+ s->refcount_block_cache = r->refcount_block_cache;
+
+ s->overlap_check = r->overlap_check;
+ s->use_lazy_refcounts = r->use_lazy_refcounts;
+
+ for (i = 0; i < QCOW2_DISCARD_MAX; i++) {
+ s->discard_passthrough[i] = r->discard_passthrough[i];
+ }
+
+ if (s->cache_clean_interval != r->cache_clean_interval) {
+ cache_clean_timer_del(bs);
+ s->cache_clean_interval = r->cache_clean_interval;
+ cache_clean_timer_init(bs, bdrv_get_aio_context(bs));
+ }
+}
+
+static void qcow2_update_options_abort(BlockDriverState *bs,
+ Qcow2ReopenState *r)
+{
+ if (r->l2_table_cache) {
+ qcow2_cache_destroy(bs, r->l2_table_cache);
+ }
+ if (r->refcount_block_cache) {
+ qcow2_cache_destroy(bs, r->refcount_block_cache);
+ }
+}
+
+static int qcow2_update_options(BlockDriverState *bs, QDict *options,
+ int flags, Error **errp)
+{
+ Qcow2ReopenState r = {};
+ int ret;
+
+ ret = qcow2_update_options_prepare(bs, &r, options, flags, errp);
+ if (ret >= 0) {
+ qcow2_update_options_commit(bs, &r);
+ } else {
+ qcow2_update_options_abort(bs, &r);
+ }
+
+ return ret;
+}
+
+static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ unsigned int len, i;
+ int ret = 0;
+ QCowHeader header;
+ Error *local_err = NULL;
+ uint64_t ext_end;
+ uint64_t l1_vm_state_index;
ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
if (ret < 0) {
@@ -577,7 +842,7 @@
goto fail;
}
if (header.version < 2 || header.version > 3) {
- report_unsupported(bs, errp, "QCOW version %" PRIu32, header.version);
+ error_setg(errp, "Unsupported qcow2 version %" PRIu32, header.version);
ret = -ENOTSUP;
goto fail;
}
@@ -657,7 +922,7 @@
void *feature_table = NULL;
qcow2_read_extensions(bs, header.header_length, ext_end,
&feature_table, NULL);
- report_unsupported_feature(bs, errp, feature_table,
+ report_unsupported_feature(errp, feature_table,
s->incompatible_features &
~QCOW2_INCOMPAT_MASK);
ret = -ENOTSUP;
@@ -677,13 +942,16 @@
}
/* Check support for various header values */
- if (header.refcount_order != 4) {
- report_unsupported(bs, errp, "%d bit reference counts",
- 1 << header.refcount_order);
- ret = -ENOTSUP;
+ if (header.refcount_order > 6) {
+ error_setg(errp, "Reference count entry width too large; may not "
+ "exceed 64 bits");
+ ret = -EINVAL;
goto fail;
}
s->refcount_order = header.refcount_order;
+ s->refcount_bits = 1 << s->refcount_order;
+ s->refcount_max = UINT64_C(1) << (s->refcount_bits - 1);
+ s->refcount_max += s->refcount_max - 1;
if (header.crypt_method > QCOW_CRYPT_AES) {
error_setg(errp, "Unsupported encryption method: %" PRIu32,
@@ -691,9 +959,28 @@
ret = -EINVAL;
goto fail;
}
+ if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+ error_setg(errp, "AES cipher not available");
+ ret = -EINVAL;
+ goto fail;
+ }
s->crypt_method_header = header.crypt_method;
if (s->crypt_method_header) {
- bs->encrypted = 1;
+ if (bdrv_uses_whitelist() &&
+ s->crypt_method_header == QCOW_CRYPT_AES) {
+ error_setg(errp,
+ "Use of AES-CBC encrypted qcow2 images is no longer "
+ "supported in system emulators");
+ error_append_hint(errp,
+ "You can use 'qemu-img convert' to convert your "
+ "image to an alternative supported format, such "
+ "as unencrypted qcow2, or raw with the LUKS "
+ "format instead.\n");
+ ret = -ENOSYS;
+ goto fail;
+ }
+
+ bs->encrypted = true;
}
s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -739,7 +1026,7 @@
}
/* read the level 1 table */
- if (header.l1_size > QCOW_MAX_L1_SIZE) {
+ if (header.l1_size > QCOW_MAX_L1_SIZE / sizeof(uint64_t)) {
error_setg(errp, "Active L1 table too large");
ret = -EFBIG;
goto fail;
@@ -772,7 +1059,7 @@
if (s->l1_size > 0) {
- s->l1_table = qemu_try_blockalign(bs->file,
+ s->l1_table = qemu_try_blockalign(bs->file->bs,
align_offset(s->l1_size * sizeof(uint64_t), 512));
if (s->l1_table == NULL) {
error_setg(errp, "Could not allocate L1 table");
@@ -790,54 +1077,15 @@
}
}
- /* get L2 table/refcount block cache size from command line options */
- opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
- qemu_opts_absorb_qdict(opts, options, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- read_cache_sizes(opts, &l2_cache_size, &refcount_cache_size, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
-
- l2_cache_size /= s->cluster_size;
- if (l2_cache_size < MIN_L2_CACHE_SIZE) {
- l2_cache_size = MIN_L2_CACHE_SIZE;
- }
- if (l2_cache_size > INT_MAX) {
- error_setg(errp, "L2 cache size too big");
- ret = -EINVAL;
- goto fail;
- }
-
- refcount_cache_size /= s->cluster_size;
- if (refcount_cache_size < MIN_REFCOUNT_CACHE_SIZE) {
- refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE;
- }
- if (refcount_cache_size > INT_MAX) {
- error_setg(errp, "Refcount cache size too big");
- ret = -EINVAL;
- goto fail;
- }
-
- /* alloc L2 table/refcount block cache */
- s->l2_table_cache = qcow2_cache_create(bs, l2_cache_size);
- s->refcount_block_cache = qcow2_cache_create(bs, refcount_cache_size);
- if (s->l2_table_cache == NULL || s->refcount_block_cache == NULL) {
- error_setg(errp, "Could not allocate metadata caches");
- ret = -ENOMEM;
+ /* Parse driver-specific options */
+ ret = qcow2_update_options(bs, options, flags, errp);
+ if (ret < 0) {
goto fail;
}
s->cluster_cache = g_malloc(s->cluster_size);
/* one more sector for decompressed data alignment */
- s->cluster_data = qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
+ s->cluster_data = qemu_try_blockalign(bs->file->bs, QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size + 512);
if (s->cluster_data == NULL) {
error_setg(errp, "Could not allocate temporary cluster buffer");
@@ -868,7 +1116,8 @@
/* read the backing file name */
if (header.backing_file_offset != 0) {
len = header.backing_file_size;
- if (len > MIN(1023, s->cluster_size - header.backing_file_offset)) {
+ if (len > MIN(1023, s->cluster_size - header.backing_file_offset) ||
+ len >= sizeof(bs->backing_file)) {
error_setg(errp, "Backing file name too long");
ret = -EINVAL;
goto fail;
@@ -880,6 +1129,7 @@
goto fail;
}
bs->backing_file[len] = '\0';
+ s->image_backing_file = g_strdup(bs->backing_file);
}
/* Internal snapshots */
@@ -893,7 +1143,7 @@
}
/* Clear unknown autoclear feature bits */
- if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) {
+ if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) {
s->autoclear_features = 0;
ret = qcow2_update_header(bs);
if (ret < 0) {
@@ -906,7 +1156,7 @@
qemu_co_mutex_init(&s->lock);
/* Repair image if dirty */
- if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only &&
+ if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
(s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
BdrvCheckResult result = {0};
@@ -917,70 +1167,6 @@
}
}
- /* Enable lazy_refcounts according to image and command line options */
- s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
- (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
-
- s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
- s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
- s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
- flags & BDRV_O_UNMAP);
- s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
- s->discard_passthrough[QCOW2_DISCARD_OTHER] =
- qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
-
- opt_overlap_check = qemu_opt_get(opts, QCOW2_OPT_OVERLAP);
- opt_overlap_check_template = qemu_opt_get(opts, QCOW2_OPT_OVERLAP_TEMPLATE);
- if (opt_overlap_check_template && opt_overlap_check &&
- strcmp(opt_overlap_check_template, opt_overlap_check))
- {
- error_setg(errp, "Conflicting values for qcow2 options '"
- QCOW2_OPT_OVERLAP "' ('%s') and '" QCOW2_OPT_OVERLAP_TEMPLATE
- "' ('%s')", opt_overlap_check, opt_overlap_check_template);
- ret = -EINVAL;
- goto fail;
- }
- if (!opt_overlap_check) {
- opt_overlap_check = opt_overlap_check_template ?: "cached";
- }
-
- if (!strcmp(opt_overlap_check, "none")) {
- overlap_check_template = 0;
- } else if (!strcmp(opt_overlap_check, "constant")) {
- overlap_check_template = QCOW2_OL_CONSTANT;
- } else if (!strcmp(opt_overlap_check, "cached")) {
- overlap_check_template = QCOW2_OL_CACHED;
- } else if (!strcmp(opt_overlap_check, "all")) {
- overlap_check_template = QCOW2_OL_ALL;
- } else {
- error_setg(errp, "Unsupported value '%s' for qcow2 option "
- "'overlap-check'. Allowed are either of the following: "
- "none, constant, cached, all", opt_overlap_check);
- ret = -EINVAL;
- goto fail;
- }
-
- s->overlap_check = 0;
- for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
- /* overlap-check defines a template bitmask, but every flag may be
- * overwritten through the associated boolean option */
- s->overlap_check |=
- qemu_opt_get_bool(opts, overlap_bool_option_names[i],
- overlap_check_template & (1 << i)) << i;
- }
-
- qemu_opts_del(opts);
- opts = NULL;
-
- if (s->use_lazy_refcounts && s->qcow_version < 3) {
- error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
- "qemu 1.1 compatibility level");
- ret = -EINVAL;
- goto fail;
- }
-
#ifdef DEBUG_ALLOC
{
BdrvCheckResult result = {0};
@@ -990,7 +1176,6 @@
return ret;
fail:
- qemu_opts_del(opts);
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
qcow2_free_snapshots(bs);
@@ -998,6 +1183,7 @@
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
+ cache_clean_timer_del(bs);
if (s->l2_table_cache) {
qcow2_cache_destroy(bs, s->l2_table_cache);
}
@@ -1011,16 +1197,21 @@
static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
- bs->bl.write_zeroes_alignment = s->cluster_sectors;
+ if (bs->encrypted) {
+ /* Encryption works on a sector granularity */
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+ }
+ bs->bl.pwrite_zeroes_alignment = s->cluster_size;
}
static int qcow2_set_key(BlockDriverState *bs, const char *key)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint8_t keybuf[16];
int len, i;
+ Error *err = NULL;
memset(keybuf, 0, 16);
len = strlen(key);
@@ -1031,75 +1222,144 @@
for(i = 0;i < len;i++) {
keybuf[i] = key[i];
}
- s->crypt_method = s->crypt_method_header;
+ assert(bs->encrypted);
- if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
+ qcrypto_cipher_free(s->cipher);
+ s->cipher = qcrypto_cipher_new(
+ QCRYPTO_CIPHER_ALG_AES_128,
+ QCRYPTO_CIPHER_MODE_CBC,
+ keybuf, G_N_ELEMENTS(keybuf),
+ &err);
+
+ if (!s->cipher) {
+ /* XXX would be nice if errors in this method could
+ * be properly propagate to the caller. Would need
+ * the bdrv_set_key() API signature to be fixed. */
+ error_free(err);
return -1;
- if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
- return -1;
-#if 0
- /* test */
- {
- uint8_t in[16];
- uint8_t out[16];
- uint8_t tmp[16];
- for(i=0;i<16;i++)
- in[i] = i;
- AES_encrypt(in, tmp, &s->aes_encrypt_key);
- AES_decrypt(tmp, out, &s->aes_decrypt_key);
- for(i = 0; i < 16; i++)
- printf(" %02x", tmp[i]);
- printf("\n");
- for(i = 0; i < 16; i++)
- printf(" %02x", out[i]);
- printf("\n");
}
-#endif
return 0;
}
-/* We have no actual commit/abort logic for qcow2, but we need to write out any
- * unwritten data if we reopen read-only. */
static int qcow2_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
+ Qcow2ReopenState *r;
int ret;
+ r = g_new0(Qcow2ReopenState, 1);
+ state->opaque = r;
+
+ ret = qcow2_update_options_prepare(state->bs, r, state->options,
+ state->flags, errp);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* We need to write out any unwritten data if we reopen read-only. */
if ((state->flags & BDRV_O_RDWR) == 0) {
ret = bdrv_flush(state->bs);
if (ret < 0) {
- return ret;
+ goto fail;
}
ret = qcow2_mark_clean(state->bs);
if (ret < 0) {
- return ret;
+ goto fail;
}
}
return 0;
+
+fail:
+ qcow2_update_options_abort(state->bs, r);
+ g_free(r);
+ return ret;
+}
+
+static void qcow2_reopen_commit(BDRVReopenState *state)
+{
+ qcow2_update_options_commit(state->bs, state->opaque);
+ g_free(state->opaque);
+}
+
+static void qcow2_reopen_abort(BDRVReopenState *state)
+{
+ qcow2_update_options_abort(state->bs, state->opaque);
+ g_free(state->opaque);
+}
+
+static void qcow2_join_options(QDict *options, QDict *old_options)
+{
+ bool has_new_overlap_template =
+ qdict_haskey(options, QCOW2_OPT_OVERLAP) ||
+ qdict_haskey(options, QCOW2_OPT_OVERLAP_TEMPLATE);
+ bool has_new_total_cache_size =
+ qdict_haskey(options, QCOW2_OPT_CACHE_SIZE);
+ bool has_all_cache_options;
+
+ /* New overlap template overrides all old overlap options */
+ if (has_new_overlap_template) {
+ qdict_del(old_options, QCOW2_OPT_OVERLAP);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_TEMPLATE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_MAIN_HEADER);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L1);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_ACTIVE_L2);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_TABLE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L1);
+ qdict_del(old_options, QCOW2_OPT_OVERLAP_INACTIVE_L2);
+ }
+
+ /* New total cache size overrides all old options */
+ if (qdict_haskey(options, QCOW2_OPT_CACHE_SIZE)) {
+ qdict_del(old_options, QCOW2_OPT_L2_CACHE_SIZE);
+ qdict_del(old_options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
+ }
+
+ qdict_join(options, old_options, false);
+
+ /*
+ * If after merging all cache size options are set, an old total size is
+ * overwritten. Do keep all options, however, if all three are new. The
+ * resulting error message is what we want to happen.
+ */
+ has_all_cache_options =
+ qdict_haskey(options, QCOW2_OPT_CACHE_SIZE) ||
+ qdict_haskey(options, QCOW2_OPT_L2_CACHE_SIZE) ||
+ qdict_haskey(options, QCOW2_OPT_REFCOUNT_CACHE_SIZE);
+
+ if (has_all_cache_options && !has_new_total_cache_size) {
+ qdict_del(options, QCOW2_OPT_CACHE_SIZE);
+ }
}
static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
int index_in_cluster, ret;
+ unsigned int bytes;
int64_t status = 0;
- *pnum = nb_sectors;
+ bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+ &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
return ret;
}
+ *pnum = bytes >> BDRV_SECTOR_BITS;
+
if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
- !s->crypt_method) {
+ !s->cipher) {
index_in_cluster = sector_num & (s->cluster_sectors - 1);
cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
+ *file = bs->file->bs;
status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
}
if (ret == QCOW2_CLUSTER_ZERO) {
@@ -1112,28 +1372,34 @@
/* handle reading after the end of the backing file */
int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t sector_num, int nb_sectors)
+ int64_t offset, int bytes)
{
+ uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
int n1;
- if ((sector_num + nb_sectors) <= bs->total_sectors)
- return nb_sectors;
- if (sector_num >= bs->total_sectors)
- n1 = 0;
- else
- n1 = bs->total_sectors - sector_num;
- qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+ if ((offset + bytes) <= bs_size) {
+ return bytes;
+ }
+
+ if (offset >= bs_size) {
+ n1 = 0;
+ } else {
+ n1 = bs_size - offset;
+ }
+
+ qemu_iovec_memset(qiov, n1, 0, bytes - n1);
return n1;
}
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
- int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster, n1;
+ BDRVQcow2State *s = bs->opaque;
+ int offset_in_cluster, n1;
int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
uint64_t bytes_done = 0;
QEMUIOVector hd_qiov;
@@ -1143,45 +1409,42 @@
qemu_co_mutex_lock(&s->lock);
- while (remaining_sectors != 0) {
+ while (bytes != 0) {
/* prepare next request */
- cur_nr_sectors = remaining_sectors;
- if (s->crypt_method) {
- cur_nr_sectors = MIN(cur_nr_sectors,
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ cur_bytes = MIN(bytes, INT_MAX);
+ if (s->cipher) {
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
}
- ret = qcow2_get_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset);
+ ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
if (ret < 0) {
goto fail;
}
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
+ offset_in_cluster = offset_into_cluster(s, offset);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
- if (bs->backing_hd) {
+ if (bs->backing) {
/* read from the base image */
- n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
- sector_num, cur_nr_sectors);
+ n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
+ offset, cur_bytes);
if (n1 > 0) {
QEMUIOVector local_qiov;
qemu_iovec_init(&local_qiov, hd_qiov.niov);
- qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
- n1 * BDRV_SECTOR_SIZE);
+ qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->backing_hd, sector_num,
- n1, &local_qiov);
+ ret = bdrv_co_preadv(bs->backing, offset, n1,
+ &local_qiov, 0);
qemu_co_mutex_lock(&s->lock);
qemu_iovec_destroy(&local_qiov);
@@ -1192,12 +1455,12 @@
}
} else {
/* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
}
break;
case QCOW2_CLUSTER_ZERO:
- qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+ qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
break;
case QCOW2_CLUSTER_COMPRESSED:
@@ -1208,8 +1471,8 @@
}
qemu_iovec_from_buf(&hd_qiov, 0,
- s->cluster_cache + index_in_cluster * 512,
- 512 * cur_nr_sectors);
+ s->cluster_cache + offset_in_cluster,
+ cur_bytes);
break;
case QCOW2_CLUSTER_NORMAL:
@@ -1218,42 +1481,52 @@
goto fail;
}
- if (s->crypt_method) {
+ if (bs->encrypted) {
+ assert(s->cipher);
+
/*
* For encrypted images, read everything into a temporary
* contiguous buffer on which the AES functions can work.
*/
if (!cluster_data) {
cluster_data =
- qemu_try_blockalign(bs->file, QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
+ qemu_try_blockalign(bs->file->bs,
+ QCOW_MAX_CRYPT_CLUSTERS
+ * s->cluster_size);
if (cluster_data == NULL) {
ret = -ENOMEM;
goto fail;
}
}
- assert(cur_nr_sectors <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+ assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- 512 * cur_nr_sectors);
+ qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
qemu_co_mutex_unlock(&s->lock);
- ret = bdrv_co_readv(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
+ ret = bdrv_co_preadv(bs->file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
}
- if (s->crypt_method) {
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
- qemu_iovec_from_buf(qiov, bytes_done,
- cluster_data, 512 * cur_nr_sectors);
+ if (bs->encrypted) {
+ assert(s->cipher);
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+ Error *err = NULL;
+ if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+ cluster_data, cluster_data,
+ cur_bytes >> BDRV_SECTOR_BITS,
+ false, &err) < 0) {
+ error_free(err);
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
}
break;
@@ -1263,9 +1536,9 @@
goto fail;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
}
ret = 0;
@@ -1278,23 +1551,21 @@
return ret;
}
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
- int64_t sector_num,
- int remaining_sectors,
- QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
- BDRVQcowState *s = bs->opaque;
- int index_in_cluster;
+ BDRVQcow2State *s = bs->opaque;
+ int offset_in_cluster;
int ret;
- int cur_nr_sectors; /* number of sectors in current iteration */
+ unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
QEMUIOVector hd_qiov;
uint64_t bytes_done = 0;
uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
- trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
- remaining_sectors);
+ trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
qemu_iovec_init(&hd_qiov, qiov->niov);
@@ -1302,22 +1573,21 @@
qemu_co_mutex_lock(&s->lock);
- while (remaining_sectors != 0) {
+ while (bytes != 0) {
l2meta = NULL;
trace_qcow2_writev_start_part(qemu_coroutine_self());
- index_in_cluster = sector_num & (s->cluster_sectors - 1);
- cur_nr_sectors = remaining_sectors;
- if (s->crypt_method &&
- cur_nr_sectors >
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
- cur_nr_sectors =
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
+ offset_in_cluster = offset_into_cluster(s, offset);
+ cur_bytes = MIN(bytes, INT_MAX);
+ if (bs->encrypted) {
+ cur_bytes = MIN(cur_bytes,
+ QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+ - offset_in_cluster);
}
- ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
- &cur_nr_sectors, &cluster_offset, &l2meta);
+ ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
+ &cluster_offset, &l2meta);
if (ret < 0) {
goto fail;
}
@@ -1325,12 +1595,13 @@
assert((cluster_offset & 511) == 0);
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
- cur_nr_sectors * 512);
+ qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
- if (s->crypt_method) {
+ if (bs->encrypted) {
+ Error *err = NULL;
+ assert(s->cipher);
if (!cluster_data) {
- cluster_data = qemu_try_blockalign(bs->file,
+ cluster_data = qemu_try_blockalign(bs->file->bs,
QCOW_MAX_CRYPT_CLUSTERS
* s->cluster_size);
if (cluster_data == NULL) {
@@ -1343,17 +1614,21 @@
QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
- qcow2_encrypt_sectors(s, sector_num, cluster_data,
- cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
+ if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+ cluster_data, cluster_data,
+ cur_bytes >>BDRV_SECTOR_BITS,
+ true, &err) < 0) {
+ error_free(err);
+ ret = -EIO;
+ goto fail;
+ }
qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data,
- cur_nr_sectors * 512);
+ qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
}
ret = qcow2_pre_write_overlap_check(bs, 0,
- cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
- cur_nr_sectors * BDRV_SECTOR_SIZE);
+ cluster_offset + offset_in_cluster, cur_bytes);
if (ret < 0) {
goto fail;
}
@@ -1361,10 +1636,10 @@
qemu_co_mutex_unlock(&s->lock);
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
- (cluster_offset >> 9) + index_in_cluster);
- ret = bdrv_co_writev(bs->file,
- (cluster_offset >> 9) + index_in_cluster,
- cur_nr_sectors, &hd_qiov);
+ cluster_offset + offset_in_cluster);
+ ret = bdrv_co_pwritev(bs->file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, &hd_qiov, 0);
qemu_co_mutex_lock(&s->lock);
if (ret < 0) {
goto fail;
@@ -1390,10 +1665,10 @@
l2meta = next;
}
- remaining_sectors -= cur_nr_sectors;
- sector_num += cur_nr_sectors;
- bytes_done += cur_nr_sectors * 512;
- trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+ bytes -= cur_bytes;
+ offset += cur_bytes;
+ bytes_done += cur_bytes;
+ trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
@@ -1420,26 +1695,56 @@
return ret;
}
+static int qcow2_inactivate(BlockDriverState *bs)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int ret, result = 0;
+
+ ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the L2 table cache: %s",
+ strerror(-ret));
+ }
+
+ ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ if (ret) {
+ result = ret;
+ error_report("Failed to flush the refcount block cache: %s",
+ strerror(-ret));
+ }
+
+ if (result == 0) {
+ qcow2_mark_clean(bs);
+ }
+
+ return result;
+}
+
static void qcow2_close(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_vfree(s->l1_table);
/* else pre-write overlap checks in cache_destroy may crash */
s->l1_table = NULL;
- if (!(bs->open_flags & BDRV_O_INCOMING)) {
- qcow2_cache_flush(bs, s->l2_table_cache);
- qcow2_cache_flush(bs, s->refcount_block_cache);
-
- qcow2_mark_clean(bs);
+ if (!(s->flags & BDRV_O_INACTIVE)) {
+ qcow2_inactivate(bs);
}
+ cache_clean_timer_del(bs);
qcow2_cache_destroy(bs, s->l2_table_cache);
qcow2_cache_destroy(bs, s->refcount_block_cache);
+ qcrypto_cipher_free(s->cipher);
+ s->cipher = NULL;
+
g_free(s->unknown_header_fields);
cleanup_unknown_header_ext(bs);
+ g_free(s->image_backing_file);
+ g_free(s->image_backing_format);
+
g_free(s->cluster_cache);
qemu_vfree(s->cluster_data);
qcow2_refcount_close(bs);
@@ -1448,11 +1753,9 @@
static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int flags = s->flags;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
- uint32_t crypt_method = 0;
+ QCryptoCipher *cipher = NULL;
QDict *options;
Error *local_err = NULL;
int ret;
@@ -1462,40 +1765,29 @@
* that means we don't have to worry about reopening them here.
*/
- if (s->crypt_method) {
- crypt_method = s->crypt_method;
- memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
- }
+ cipher = s->cipher;
+ s->cipher = NULL;
qcow2_close(bs);
- bdrv_invalidate_cache(bs->file, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
- memset(s, 0, sizeof(BDRVQcowState));
+ memset(s, 0, sizeof(BDRVQcow2State));
options = qdict_clone_shallow(bs->options);
+ flags &= ~BDRV_O_INACTIVE;
ret = qcow2_open(bs, options, flags, &local_err);
QDECREF(options);
if (local_err) {
- error_setg(errp, "Could not reopen qcow2 layer: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_propagate(errp, local_err);
+ error_prepend(errp, "Could not reopen qcow2 layer: ");
+ bs->drv = NULL;
return;
} else if (ret < 0) {
error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
+ bs->drv = NULL;
return;
}
- if (crypt_method) {
- s->crypt_method = crypt_method;
- memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
- memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
- }
+ s->cipher = cipher;
}
static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
@@ -1527,7 +1819,7 @@
*/
int qcow2_update_header(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
QCowHeader *header;
char *buf;
size_t buflen = s->cluster_size;
@@ -1605,9 +1897,10 @@
}
/* Backing file format header extension */
- if (*bs->backing_format) {
+ if (s->image_backing_format) {
ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
- bs->backing_format, strlen(bs->backing_format),
+ s->image_backing_format,
+ strlen(s->image_backing_format),
buflen);
if (ret < 0) {
goto fail;
@@ -1618,31 +1911,33 @@
}
/* Feature table */
- Qcow2Feature features[] = {
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
- .name = "dirty bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
- .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
- .name = "corrupt bit",
- },
- {
- .type = QCOW2_FEAT_TYPE_COMPATIBLE,
- .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
- .name = "lazy refcounts",
- },
- };
+ if (s->qcow_version >= 3) {
+ Qcow2Feature features[] = {
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
+ .name = "dirty bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
+ .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
+ .name = "corrupt bit",
+ },
+ {
+ .type = QCOW2_FEAT_TYPE_COMPATIBLE,
+ .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
+ .name = "lazy refcounts",
+ },
+ };
- ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
- features, sizeof(features), buflen);
- if (ret < 0) {
- goto fail;
+ ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
+ features, sizeof(features), buflen);
+ if (ret < 0) {
+ goto fail;
+ }
+ buf += ret;
+ buflen -= ret;
}
- buf += ret;
- buflen -= ret;
/* Keep unknown header extensions */
QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
@@ -1665,8 +1960,8 @@
buflen -= ret;
/* Backing file name */
- if (*bs->backing_file) {
- size_t backing_file_len = strlen(bs->backing_file);
+ if (s->image_backing_file) {
+ size_t backing_file_len = strlen(s->image_backing_file);
if (buflen < backing_file_len) {
ret = -ENOSPC;
@@ -1674,7 +1969,7 @@
}
/* Using strncpy is ok here, since buf is not NUL-terminated. */
- strncpy(buf, bs->backing_file, buflen);
+ strncpy(buf, s->image_backing_file, buflen);
header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
header->backing_file_size = cpu_to_be32(backing_file_len);
@@ -1695,27 +1990,39 @@
static int qcow2_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt)
{
+ BDRVQcow2State *s = bs->opaque;
+
+ if (backing_file && strlen(backing_file) > 1023) {
+ return -EINVAL;
+ }
+
pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
+ g_free(s->image_backing_file);
+ g_free(s->image_backing_format);
+
+ s->image_backing_file = backing_file ? g_strdup(bs->backing_file) : NULL;
+ s->image_backing_format = backing_fmt ? g_strdup(bs->backing_format) : NULL;
+
return qcow2_update_header(bs);
}
static int preallocate(BlockDriverState *bs)
{
- uint64_t nb_sectors;
+ uint64_t bytes;
uint64_t offset;
uint64_t host_offset = 0;
- int num;
+ unsigned int cur_bytes;
int ret;
QCowL2Meta *meta;
- nb_sectors = bdrv_nb_sectors(bs);
+ bytes = bdrv_getlength(bs);
offset = 0;
- while (nb_sectors) {
- num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
- ret = qcow2_alloc_cluster_offset(bs, offset, &num,
+ while (bytes) {
+ cur_bytes = MIN(bytes, INT_MAX);
+ ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&host_offset, &meta);
if (ret < 0) {
return ret;
@@ -1741,8 +2048,8 @@
/* TODO Preallocate data if requested */
- nb_sectors -= num;
- offset += num << BDRV_SECTOR_BITS;
+ bytes -= cur_bytes;
+ offset += cur_bytes;
}
/*
@@ -1751,10 +2058,9 @@
* EOF). Extend the image to the last allocated sector.
*/
if (host_offset != 0) {
- uint8_t buf[BDRV_SECTOR_SIZE];
- memset(buf, 0, BDRV_SECTOR_SIZE);
- ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
- buf, 1);
+ uint8_t data = 0;
+ ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
+ &data, 1);
if (ret < 0) {
return ret;
}
@@ -1766,12 +2072,14 @@
static int qcow2_create2(const char *filename, int64_t total_size,
const char *backing_file, const char *backing_format,
int flags, size_t cluster_size, PreallocMode prealloc,
- QemuOpts *opts, int version,
+ QemuOpts *opts, int version, int refcount_order,
Error **errp)
{
- /* Calculate cluster_bits */
int cluster_bits;
- cluster_bits = ffs(cluster_size) - 1;
+ QDict *options;
+
+ /* Calculate cluster_bits */
+ cluster_bits = ctz32(cluster_size);
if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
(1 << cluster_bits) != cluster_size)
{
@@ -1792,16 +2100,28 @@
* 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
* size for any qcow2 image.
*/
- BlockDriverState* bs;
+ BlockBackend *blk;
QCowHeader *header;
uint64_t* refcount_table;
Error *local_err = NULL;
int ret;
if (prealloc == PREALLOC_MODE_FULL || prealloc == PREALLOC_MODE_FALLOC) {
+ /* Note: The following calculation does not need to be exact; if it is a
+ * bit off, either some bytes will be "leaked" (which is fine) or we
+ * will need to increase the file size by some bytes (which is fine,
+ * too, as long as the bulk is allocated here). Therefore, using
+ * floating point arithmetic is fine. */
int64_t meta_size = 0;
uint64_t nreftablee, nrefblocke, nl1e, nl2e;
int64_t aligned_total_size = align_offset(total_size, cluster_size);
+ int refblock_bits, refblock_size;
+ /* refcount entry size in bytes */
+ double rces = (1 << refcount_order) / 8.;
+
+ /* see qcow2_open() */
+ refblock_bits = cluster_bits - (refcount_order - 3);
+ refblock_size = 1 << refblock_bits;
/* header: 1 cluster */
meta_size += cluster_size;
@@ -1826,26 +2146,27 @@
* c = cluster size
* y1 = number of refcount blocks entries
* y2 = meta size including everything
+ * rces = refcount entry size in bytes
* then,
* y1 = (y2 + a)/c
- * y2 = y1 * sizeof(u16) + y1 * sizeof(u16) * sizeof(u64) / c + m
+ * y2 = y1 * rces + y1 * rces * sizeof(u64) / c + m
* we can get y1:
- * y1 = (a + m) / (c - sizeof(u16) - sizeof(u16) * sizeof(u64) / c)
+ * y1 = (a + m) / (c - rces - rces * sizeof(u64) / c)
*/
- nrefblocke = (aligned_total_size + meta_size + cluster_size) /
- (cluster_size - sizeof(uint16_t) -
- 1.0 * sizeof(uint16_t) * sizeof(uint64_t) / cluster_size);
- nrefblocke = align_offset(nrefblocke, cluster_size / sizeof(uint16_t));
- meta_size += nrefblocke * sizeof(uint16_t);
+ nrefblocke = (aligned_total_size + meta_size + cluster_size)
+ / (cluster_size - rces - rces * sizeof(uint64_t)
+ / cluster_size);
+ meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size;
/* total size of refcount tables */
- nreftablee = nrefblocke * sizeof(uint16_t) / cluster_size;
+ nreftablee = nrefblocke / refblock_size;
nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
meta_size += nreftablee * sizeof(uint64_t);
qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
- aligned_total_size + meta_size);
- qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc]);
+ aligned_total_size + meta_size, &error_abort);
+ qemu_opt_set(opts, BLOCK_OPT_PREALLOC, PreallocMode_lookup[prealloc],
+ &error_abort);
}
ret = bdrv_create_file(filename, opts, &local_err);
@@ -1854,14 +2175,15 @@
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Write the header */
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
header = g_malloc0(cluster_size);
@@ -1874,7 +2196,7 @@
.l1_size = cpu_to_be32(0),
.refcount_table_offset = cpu_to_be64(cluster_size),
.refcount_table_clusters = cpu_to_be32(1),
- .refcount_order = cpu_to_be32(4),
+ .refcount_order = cpu_to_be32(refcount_order),
.header_length = cpu_to_be32(sizeof(*header)),
};
@@ -1889,7 +2211,7 @@
cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
}
- ret = bdrv_pwrite(bs, 0, header, cluster_size);
+ ret = blk_pwrite(blk, 0, header, cluster_size, 0);
g_free(header);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -1899,7 +2221,7 @@
/* Write a refcount table with one refcount block */
refcount_table = g_malloc0(2 * cluster_size);
refcount_table[0] = cpu_to_be64(2 * cluster_size);
- ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
+ ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
g_free(refcount_table);
if (ret < 0) {
@@ -1907,24 +2229,25 @@
goto out;
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/*
* And now open the image and make it consistent first (i.e. increase the
* refcount of the cluster that is occupied by the header and the refcount
* table)
*/
- BlockDriver* drv = bdrv_find_format("qcow2");
- assert(drv != NULL);
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv, &local_err);
- if (ret < 0) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow2"));
+ blk = blk_new_open(filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
- ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
+ ret = qcow2_alloc_clusters(blk_bs(blk), 3 * cluster_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
"header and refcount table");
@@ -1935,8 +2258,15 @@
abort();
}
+ /* Create a full header (including things like feature table) */
+ ret = qcow2_update_header(blk_bs(blk));
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not update qcow2 header");
+ goto out;
+ }
+
/* Okay, now that we have a valid image, let's give it the right size */
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not resize image");
goto out;
@@ -1944,7 +2274,7 @@
/* Want a backing file? There you go.*/
if (backing_file) {
- ret = bdrv_change_backing_file(bs, backing_file, backing_format);
+ ret = bdrv_change_backing_file(blk_bs(blk), backing_file, backing_format);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
"with format '%s'", backing_file, backing_format);
@@ -1954,9 +2284,9 @@
/* And if we're supposed to preallocate metadata, do that now */
if (prealloc != PREALLOC_MODE_OFF) {
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = blk_bs(blk)->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = preallocate(bs);
+ ret = preallocate(blk_bs(blk));
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not preallocate metadata");
@@ -1964,22 +2294,24 @@
}
}
- bdrv_unref(bs);
- bs = NULL;
+ blk_unref(blk);
+ blk = NULL;
/* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
- drv, &local_err);
- if (local_err) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str("qcow2"));
+ blk = blk_new_open(filename, NULL, options,
+ BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
ret = 0;
out:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
return ret;
}
@@ -1994,6 +2326,8 @@
size_t cluster_size = DEFAULT_CLUSTER_SIZE;
PreallocMode prealloc;
int version = 3;
+ uint64_t refcount_bits = 16;
+ int refcount_order;
Error *local_err = NULL;
int ret;
@@ -2009,7 +2343,7 @@
DEFAULT_CLUSTER_SIZE);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
- PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+ PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
@@ -2048,12 +2382,30 @@
goto finish;
}
- ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
- cluster_size, prealloc, opts, version, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ refcount_bits = qemu_opt_get_number_del(opts, BLOCK_OPT_REFCOUNT_BITS,
+ refcount_bits);
+ if (refcount_bits > 64 || !is_power_of_2(refcount_bits)) {
+ error_setg(errp, "Refcount width must be a power of two and may not "
+ "exceed 64 bits");
+ ret = -EINVAL;
+ goto finish;
}
+ if (version < 3 && refcount_bits != 16) {
+ error_setg(errp, "Different refcount widths than 16 bits require "
+ "compatibility level 1.1 or above (use compat=1.1 or "
+ "greater)");
+ ret = -EINVAL;
+ goto finish;
+ }
+
+ refcount_order = ctz32(refcount_bits);
+
+ ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
+ cluster_size, prealloc, opts, version, refcount_order,
+ &local_err);
+ error_propagate(errp, local_err);
+
finish:
g_free(backing_file);
g_free(backing_fmt);
@@ -2061,42 +2413,88 @@
return ret;
}
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+
+static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
+ uint32_t count)
+{
+ int nr;
+ BlockDriverState *file;
+ int64_t res;
+
+ if (!count) {
+ return true;
+ }
+ res = bdrv_get_block_status_above(bs, NULL, start, count,
+ &nr, &file);
+ return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count;
+}
+
+static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
int ret;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
- /* Emulate misaligned zero writes */
- if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
- return -ENOTSUP;
+ uint32_t head = offset % s->cluster_size;
+ uint32_t tail = (offset + count) % s->cluster_size;
+
+ trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
+
+ if (head || tail) {
+ int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
+ uint64_t off;
+ unsigned int nr;
+
+ assert(head + count <= s->cluster_size);
+
+ /* check whether remainder of cluster already reads as zero */
+ if (!(is_zero_sectors(bs, cl_start,
+ DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) &&
+ is_zero_sectors(bs, (offset + count) >> BDRV_SECTOR_BITS,
+ DIV_ROUND_UP(-tail & (s->cluster_size - 1),
+ BDRV_SECTOR_SIZE)))) {
+ return -ENOTSUP;
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+ /* We can have new write after previous check */
+ offset = cl_start << BDRV_SECTOR_BITS;
+ count = s->cluster_size;
+ nr = s->cluster_size;
+ ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
+ if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
+ qemu_co_mutex_unlock(&s->lock);
+ return -ENOTSUP;
+ }
+ } else {
+ qemu_co_mutex_lock(&s->lock);
}
+ trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
+
/* Whatever is left can use real zero clusters */
- qemu_co_mutex_lock(&s->lock);
- ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors);
+ ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
{
int ret;
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
- nb_sectors, QCOW2_DISCARD_REQUEST, false);
+ ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
+ QCOW2_DISCARD_REQUEST, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t new_l1_size;
int ret;
@@ -2135,12 +2533,57 @@
return 0;
}
+typedef struct Qcow2WriteCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} Qcow2WriteCo;
+
+static void qcow2_write_co_entry(void *opaque)
+{
+ Qcow2WriteCo *co = opaque;
+ QEMUIOVector qiov;
+ uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+ uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&qiov, &iov, 1);
+
+ co->ret = qcow2_co_pwritev(co->bs, offset, bytes, &qiov, 0);
+}
+
+/* Wrapper for non-coroutine contexts */
+static int qcow2_write(BlockDriverState *bs, int64_t sector_num,
+ const uint8_t *buf, int nb_sectors)
+{
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ Qcow2WriteCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(qcow2_write_co_entry, &data);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
+}
+
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
const uint8_t *buf, int nb_sectors)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
z_stream strm;
int ret, out_len;
uint8_t *out_buf;
@@ -2149,9 +2592,8 @@
if (nb_sectors == 0) {
/* align end of file to a sector boundary to ease reading with
sector based I/Os */
- cluster_offset = bdrv_getlength(bs->file);
- bdrv_truncate(bs->file, cluster_offset);
- return 0;
+ cluster_offset = bdrv_getlength(bs->file->bs);
+ return bdrv_truncate(bs->file->bs, cluster_offset);
}
if (nb_sectors != s->cluster_sectors) {
@@ -2170,7 +2612,7 @@
return ret;
}
- out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+ out_buf = g_malloc(s->cluster_size);
/* best compression, small window, no zlib header */
memset(&strm, 0, sizeof(strm));
@@ -2199,7 +2641,7 @@
if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
/* could not compress: write normal cluster */
- ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+ ret = qcow2_write(bs, sector_num, buf, s->cluster_sectors);
if (ret < 0) {
goto fail;
}
@@ -2232,7 +2674,7 @@
static int make_completely_empty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret, l1_clusters;
int64_t offset;
uint64_t *new_reftable = NULL;
@@ -2267,8 +2709,8 @@
/* After this call, neither the in-memory nor the on-disk refcount
* information accurately describe the actual references */
- ret = bdrv_write_zeroes(bs->file, s->l1_table_offset / BDRV_SECTOR_SIZE,
- l1_clusters * s->cluster_sectors, 0);
+ ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
+ l1_clusters * s->cluster_size, 0);
if (ret < 0) {
goto fail_broken_refcounts;
}
@@ -2281,9 +2723,8 @@
* overwrite parts of the existing refcount and L1 table, which is not
* an issue because the dirty flag is set, complete data loss is in fact
* desired and partial data loss is consequently fine as well */
- ret = bdrv_write_zeroes(bs->file, s->cluster_size / BDRV_SECTOR_SIZE,
- (2 + l1_clusters) * s->cluster_size /
- BDRV_SECTOR_SIZE, 0);
+ ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
+ (2 + l1_clusters) * s->cluster_size, 0);
/* This call (even if it failed overall) may have overwritten on-disk
* refcount structures; in that case, the in-memory refcount information
* will probably differ from the on-disk information which makes the BDS
@@ -2298,9 +2739,9 @@
/* "Create" an empty reftable (one cluster) directly after the image
* header and an empty L1 table three clusters after the image header;
* the cluster between those two will be used as the first refblock */
- cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
- cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
- cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
+ l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
+ l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
+ l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
&l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
if (ret < 0) {
@@ -2357,7 +2798,7 @@
goto fail;
}
- ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size);
+ ret = bdrv_truncate(bs->file->bs, (3 + l1_clusters) * s->cluster_size);
if (ret < 0) {
goto fail;
}
@@ -2380,7 +2821,7 @@
static int qcow2_make_empty(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
uint64_t start_sector;
int sector_step = INT_MAX / BDRV_SECTOR_SIZE;
int l1_clusters, ret = 0;
@@ -2421,18 +2862,18 @@
static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int ret;
qemu_co_mutex_lock(&s->lock);
- ret = qcow2_cache_flush(bs, s->l2_table_cache);
+ ret = qcow2_cache_write(bs, s->l2_table_cache);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
}
if (qcow2_need_accurate_refcounts(s)) {
- ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+ ret = qcow2_cache_write(bs, s->refcount_block_cache);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -2445,7 +2886,7 @@
static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
bdi->unallocated_blocks_are_zero = true;
bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
bdi->cluster_size = s->cluster_size;
@@ -2455,21 +2896,20 @@
static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
*spec_info = (ImageInfoSpecific){
- .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
- {
- .qcow2 = g_new(ImageInfoSpecificQCow2, 1),
- },
+ .type = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
+ .u.qcow2.data = g_new(ImageInfoSpecificQCow2, 1),
};
if (s->qcow_version == 2) {
- *spec_info->qcow2 = (ImageInfoSpecificQCow2){
- .compat = g_strdup("0.10"),
+ *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
+ .compat = g_strdup("0.10"),
+ .refcount_bits = s->refcount_bits,
};
} else if (s->qcow_version == 3) {
- *spec_info->qcow2 = (ImageInfoSpecificQCow2){
+ *spec_info->u.qcow2.data = (ImageInfoSpecificQCow2){
.compat = g_strdup("1.1"),
.lazy_refcounts = s->compatible_features &
QCOW2_COMPAT_LAZY_REFCOUNTS,
@@ -2477,7 +2917,12 @@
.corrupt = s->incompatible_features &
QCOW2_INCOMPAT_CORRUPT,
.has_corrupt = true,
+ .refcount_bits = s->refcount_bits,
};
+ } else {
+ /* if this assertion fails, this probably means a new version was
+ * added without having it covered here */
+ assert(false);
}
return spec_info;
@@ -2486,11 +2931,11 @@
#if 0
static void dump_refcounts(BlockDriverState *bs)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int64_t nb_clusters, k, k1, size;
int refcount;
- size = bdrv_getlength(bs->file);
+ size = bdrv_getlength(bs->file->bs);
nb_clusters = size_to_clusters(s, size);
for(k = 0; k < nb_clusters;) {
k1 = k;
@@ -2507,43 +2952,21 @@
static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
int64_t pos)
{
- BDRVQcowState *s = bs->opaque;
- int64_t total_sectors = bs->total_sectors;
- int growable = bs->growable;
- bool zero_beyond_eof = bs->zero_beyond_eof;
- int ret;
+ BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- bs->growable = 1;
- bs->zero_beyond_eof = false;
- ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
- bs->growable = growable;
- bs->zero_beyond_eof = zero_beyond_eof;
-
- /* bdrv_co_do_writev will have increased the total_sectors value to include
- * the VM state - the VM state is however not an actual part of the block
- * device, therefore, we need to restore the old value. */
- bs->total_sectors = total_sectors;
-
- return ret;
+ return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0);
}
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size)
+static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
- BDRVQcowState *s = bs->opaque;
- int growable = bs->growable;
- bool zero_beyond_eof = bs->zero_beyond_eof;
- int ret;
+ BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- bs->growable = 1;
- bs->zero_beyond_eof = false;
- ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
- bs->growable = growable;
- bs->zero_beyond_eof = zero_beyond_eof;
-
- return ret;
+ return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0);
}
/*
@@ -2551,9 +2974,9 @@
* have to be removed.
*/
static int qcow2_downgrade(BlockDriverState *bs, int target_version,
- BlockDriverAmendStatusCB *status_cb)
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
int current_version = s->qcow_version;
int ret;
@@ -2566,13 +2989,7 @@
}
if (s->refcount_order != 4) {
- /* we would have to convert the image to a refcount_order == 4 image
- * here; however, since qemu (at the time of writing this) does not
- * support anything different than 4 anyway, there is no point in doing
- * so right now; however, we should error out (if qemu supports this in
- * the future and this code has not been adapted) */
- error_report("qcow2_downgrade: Image refcount orders other than 4 are "
- "currently not supported.");
+ error_report("compat=0.10 requires refcount_bits=16");
return -ENOTSUP;
}
@@ -2600,7 +3017,7 @@
/* clearing autoclear features is trivial */
s->autoclear_features = 0;
- ret = qcow2_expand_zero_clusters(bs, status_cb);
+ ret = qcow2_expand_zero_clusters(bs, status_cb, cb_opaque);
if (ret < 0) {
return ret;
}
@@ -2614,10 +3031,81 @@
return 0;
}
-static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb)
+typedef enum Qcow2AmendOperation {
+ /* This is the value Qcow2AmendHelperCBInfo::last_operation will be
+ * statically initialized to so that the helper CB can discern the first
+ * invocation from an operation change */
+ QCOW2_NO_OPERATION = 0,
+
+ QCOW2_CHANGING_REFCOUNT_ORDER,
+ QCOW2_DOWNGRADING,
+} Qcow2AmendOperation;
+
+typedef struct Qcow2AmendHelperCBInfo {
+ /* The code coordinating the amend operations should only modify
+ * these four fields; the rest will be managed by the CB */
+ BlockDriverAmendStatusCB *original_status_cb;
+ void *original_cb_opaque;
+
+ Qcow2AmendOperation current_operation;
+
+ /* Total number of operations to perform (only set once) */
+ int total_operations;
+
+ /* The following fields are managed by the CB */
+
+ /* Number of operations completed */
+ int operations_completed;
+
+ /* Cumulative offset of all completed operations */
+ int64_t offset_completed;
+
+ Qcow2AmendOperation last_operation;
+ int64_t last_work_size;
+} Qcow2AmendHelperCBInfo;
+
+static void qcow2_amend_helper_cb(BlockDriverState *bs,
+ int64_t operation_offset,
+ int64_t operation_work_size, void *opaque)
{
- BDRVQcowState *s = bs->opaque;
+ Qcow2AmendHelperCBInfo *info = opaque;
+ int64_t current_work_size;
+ int64_t projected_work_size;
+
+ if (info->current_operation != info->last_operation) {
+ if (info->last_operation != QCOW2_NO_OPERATION) {
+ info->offset_completed += info->last_work_size;
+ info->operations_completed++;
+ }
+
+ info->last_operation = info->current_operation;
+ }
+
+ assert(info->total_operations > 0);
+ assert(info->operations_completed < info->total_operations);
+
+ info->last_work_size = operation_work_size;
+
+ current_work_size = info->offset_completed + operation_work_size;
+
+ /* current_work_size is the total work size for (operations_completed + 1)
+ * operations (which includes this one), so multiply it by the number of
+ * operations not covered and divide it by the number of operations
+ * covered to get a projection for the operations not covered */
+ projected_work_size = current_work_size * (info->total_operations -
+ info->operations_completed - 1)
+ / (info->operations_completed + 1);
+
+ info->original_status_cb(bs, info->offset_completed + operation_offset,
+ current_work_size + projected_work_size,
+ info->original_cb_opaque);
+}
+
+static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque)
+{
+ BDRVQcow2State *s = bs->opaque;
int old_version = s->qcow_version, new_version = old_version;
uint64_t new_size = 0;
const char *backing_file = NULL, *backing_format = NULL;
@@ -2625,8 +3113,10 @@
const char *compat = NULL;
uint64_t cluster_size = s->cluster_size;
bool encrypt;
+ int refcount_bits = s->refcount_bits;
int ret;
QemuOptDesc *desc = opts->list->desc;
+ Qcow2AmendHelperCBInfo helper_cb_info;
while (desc && desc->name) {
if (!qemu_opt_find(opts, desc->name)) {
@@ -2635,8 +3125,8 @@
continue;
}
- if (!strcmp(desc->name, "compat")) {
- compat = qemu_opt_get(opts, "compat");
+ if (!strcmp(desc->name, BLOCK_OPT_COMPAT_LEVEL)) {
+ compat = qemu_opt_get(opts, BLOCK_OPT_COMPAT_LEVEL);
if (!compat) {
/* preserve default */
} else if (!strcmp(compat, "0.10")) {
@@ -2644,65 +3134,98 @@
} else if (!strcmp(compat, "1.1")) {
new_version = 3;
} else {
- fprintf(stderr, "Unknown compatibility level %s.\n", compat);
+ error_report("Unknown compatibility level %s", compat);
return -EINVAL;
}
- } else if (!strcmp(desc->name, "preallocation")) {
- fprintf(stderr, "Cannot change preallocation mode.\n");
+ } else if (!strcmp(desc->name, BLOCK_OPT_PREALLOC)) {
+ error_report("Cannot change preallocation mode");
return -ENOTSUP;
- } else if (!strcmp(desc->name, "size")) {
- new_size = qemu_opt_get_size(opts, "size", 0);
- } else if (!strcmp(desc->name, "backing_file")) {
- backing_file = qemu_opt_get(opts, "backing_file");
- } else if (!strcmp(desc->name, "backing_fmt")) {
- backing_format = qemu_opt_get(opts, "backing_fmt");
- } else if (!strcmp(desc->name, "encryption")) {
- encrypt = qemu_opt_get_bool(opts, "encryption", s->crypt_method);
- if (encrypt != !!s->crypt_method) {
- fprintf(stderr, "Changing the encryption flag is not "
- "supported.\n");
+ } else if (!strcmp(desc->name, BLOCK_OPT_SIZE)) {
+ new_size = qemu_opt_get_size(opts, BLOCK_OPT_SIZE, 0);
+ } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FILE)) {
+ backing_file = qemu_opt_get(opts, BLOCK_OPT_BACKING_FILE);
+ } else if (!strcmp(desc->name, BLOCK_OPT_BACKING_FMT)) {
+ backing_format = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
+ } else if (!strcmp(desc->name, BLOCK_OPT_ENCRYPT)) {
+ encrypt = qemu_opt_get_bool(opts, BLOCK_OPT_ENCRYPT,
+ !!s->cipher);
+
+ if (encrypt != !!s->cipher) {
+ error_report("Changing the encryption flag is not supported");
return -ENOTSUP;
}
- } else if (!strcmp(desc->name, "cluster_size")) {
- cluster_size = qemu_opt_get_size(opts, "cluster_size",
+ } else if (!strcmp(desc->name, BLOCK_OPT_CLUSTER_SIZE)) {
+ cluster_size = qemu_opt_get_size(opts, BLOCK_OPT_CLUSTER_SIZE,
cluster_size);
if (cluster_size != s->cluster_size) {
- fprintf(stderr, "Changing the cluster size is not "
- "supported.\n");
+ error_report("Changing the cluster size is not supported");
return -ENOTSUP;
}
- } else if (!strcmp(desc->name, "lazy_refcounts")) {
- lazy_refcounts = qemu_opt_get_bool(opts, "lazy_refcounts",
+ } else if (!strcmp(desc->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
+ lazy_refcounts = qemu_opt_get_bool(opts, BLOCK_OPT_LAZY_REFCOUNTS,
lazy_refcounts);
+ } else if (!strcmp(desc->name, BLOCK_OPT_REFCOUNT_BITS)) {
+ refcount_bits = qemu_opt_get_number(opts, BLOCK_OPT_REFCOUNT_BITS,
+ refcount_bits);
+
+ if (refcount_bits <= 0 || refcount_bits > 64 ||
+ !is_power_of_2(refcount_bits))
+ {
+ error_report("Refcount width must be a power of two and may "
+ "not exceed 64 bits");
+ return -EINVAL;
+ }
} else {
- /* if this assertion fails, this probably means a new option was
+ /* if this point is reached, this probably means a new option was
* added without having it covered here */
- assert(false);
+ abort();
}
desc++;
}
- if (new_version != old_version) {
- if (new_version > old_version) {
- /* Upgrade */
- s->qcow_version = new_version;
- ret = qcow2_update_header(bs);
- if (ret < 0) {
- s->qcow_version = old_version;
- return ret;
- }
- } else {
- ret = qcow2_downgrade(bs, new_version, status_cb);
- if (ret < 0) {
- return ret;
- }
+ helper_cb_info = (Qcow2AmendHelperCBInfo){
+ .original_status_cb = status_cb,
+ .original_cb_opaque = cb_opaque,
+ .total_operations = (new_version < old_version)
+ + (s->refcount_bits != refcount_bits)
+ };
+
+ /* Upgrade first (some features may require compat=1.1) */
+ if (new_version > old_version) {
+ s->qcow_version = new_version;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->qcow_version = old_version;
+ return ret;
+ }
+ }
+
+ if (s->refcount_bits != refcount_bits) {
+ int refcount_order = ctz32(refcount_bits);
+ Error *local_error = NULL;
+
+ if (new_version < 3 && refcount_bits != 16) {
+ error_report("Different refcount widths than 16 bits require "
+ "compatibility level 1.1 or above (use compat=1.1 or "
+ "greater)");
+ return -EINVAL;
+ }
+
+ helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
+ ret = qcow2_change_refcount_order(bs, refcount_order,
+ &qcow2_amend_helper_cb,
+ &helper_cb_info, &local_error);
+ if (ret < 0) {
+ error_report_err(local_error);
+ return ret;
}
}
if (backing_file || backing_format) {
- ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
- backing_format ?: bs->backing_format);
+ ret = qcow2_change_backing_file(bs,
+ backing_file ?: s->image_backing_file,
+ backing_format ?: s->image_backing_format);
if (ret < 0) {
return ret;
}
@@ -2710,9 +3233,9 @@
if (s->use_lazy_refcounts != lazy_refcounts) {
if (lazy_refcounts) {
- if (s->qcow_version < 3) {
- fprintf(stderr, "Lazy refcounts only supported with compatibility "
- "level 1.1 and above (use compat=1.1 or greater)\n");
+ if (new_version < 3) {
+ error_report("Lazy refcounts only supported with compatibility "
+ "level 1.1 and above (use compat=1.1 or greater)");
return -EINVAL;
}
s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
@@ -2746,6 +3269,16 @@
}
}
+ /* Downgrade last (so unsupported features can be removed before) */
+ if (new_version < old_version) {
+ helper_cb_info.current_operation = QCOW2_DOWNGRADING;
+ ret = qcow2_downgrade(bs, new_version, &qcow2_amend_helper_cb,
+ &helper_cb_info);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
return 0;
}
@@ -2758,7 +3291,8 @@
void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
int64_t size, const char *message_format, ...)
{
- BDRVQcowState *s = bs->opaque;
+ BDRVQcow2State *s = bs->opaque;
+ const char *node_name;
char *message;
va_list ap;
@@ -2782,8 +3316,11 @@
"corruption events will be suppressed\n", message);
}
- qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs), message,
- offset >= 0, offset, size >= 0, size,
+ node_name = bdrv_get_node_name(bs);
+ qapi_event_send_block_image_corrupted(bdrv_get_device_name(bs),
+ *node_name != '\0', node_name,
+ message, offset >= 0, offset,
+ size >= 0, size,
fatal, &error_abort);
g_free(message);
@@ -2843,28 +3380,37 @@
.help = "Postpone refcount updates",
.def_value_str = "off"
},
+ {
+ .name = BLOCK_OPT_REFCOUNT_BITS,
+ .type = QEMU_OPT_NUMBER,
+ .help = "Width of a reference count entry in bits",
+ .def_value_str = "16"
+ },
{ /* end of list */ }
}
};
-static BlockDriver bdrv_qcow2 = {
+BlockDriver bdrv_qcow2 = {
.format_name = "qcow2",
- .instance_size = sizeof(BDRVQcowState),
+ .instance_size = sizeof(BDRVQcow2State),
.bdrv_probe = qcow2_probe,
.bdrv_open = qcow2_open,
.bdrv_close = qcow2_close,
.bdrv_reopen_prepare = qcow2_reopen_prepare,
+ .bdrv_reopen_commit = qcow2_reopen_commit,
+ .bdrv_reopen_abort = qcow2_reopen_abort,
+ .bdrv_join_options = qcow2_join_options,
.bdrv_create = qcow2_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = qcow2_co_get_block_status,
.bdrv_set_key = qcow2_set_key,
- .bdrv_co_readv = qcow2_co_readv,
- .bdrv_co_writev = qcow2_co_writev,
+ .bdrv_co_preadv = qcow2_co_preadv,
+ .bdrv_co_pwritev = qcow2_co_pwritev,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
- .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
- .bdrv_co_discard = qcow2_co_discard,
+ .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = qcow2_co_pdiscard,
.bdrv_truncate = qcow2_truncate,
.bdrv_write_compressed = qcow2_write_compressed,
.bdrv_make_empty = qcow2_make_empty,
@@ -2885,10 +3431,14 @@
.bdrv_refresh_limits = qcow2_refresh_limits,
.bdrv_invalidate_cache = qcow2_invalidate_cache,
+ .bdrv_inactivate = qcow2_inactivate,
.create_opts = &qcow2_create_opts,
.bdrv_check = qcow2_check,
.bdrv_amend_options = qcow2_amend_options,
+
+ .bdrv_detach_aio_context = qcow2_detach_aio_context,
+ .bdrv_attach_aio_context = qcow2_attach_aio_context,
};
static void bdrv_qcow2_init(void)
diff --git a/block/qcow2.h b/block/qcow2.h
index 6e39a1b..b36a7bf 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -25,8 +25,8 @@
#ifndef BLOCK_QCOW2_H
#define BLOCK_QCOW2_H
-#include "qemu/aes.h"
-#include "block/coroutine.h"
+#include "crypto/cipher.h"
+#include "qemu/coroutine.h"
//#define DEBUG_ALLOC
//#define DEBUG_ALLOC2
@@ -62,11 +62,14 @@
#define MIN_CLUSTER_BITS 9
#define MAX_CLUSTER_BITS 21
-#define MIN_L2_CACHE_SIZE 1 /* cluster */
+/* Must be at least 2 to cover COW */
+#define MIN_L2_CACHE_SIZE 2 /* clusters */
/* Must be at least 4 to cover all cases of refcount table growth */
#define MIN_REFCOUNT_CACHE_SIZE 4 /* clusters */
+/* Whichever is more */
+#define DEFAULT_L2_CACHE_CLUSTERS 8 /* clusters */
#define DEFAULT_L2_CACHE_BYTE_SIZE 1048576 /* bytes */
/* The refblock cache needs only a fourth of the L2 cache size to cover as many
@@ -93,6 +96,7 @@
#define QCOW2_OPT_CACHE_SIZE "cache-size"
#define QCOW2_OPT_L2_CACHE_SIZE "l2-cache-size"
#define QCOW2_OPT_REFCOUNT_CACHE_SIZE "refcount-cache-size"
+#define QCOW2_OPT_CACHE_CLEAN_INTERVAL "cache-clean-interval"
typedef struct QCowHeader {
uint32_t magic;
@@ -213,7 +217,12 @@
QTAILQ_ENTRY(Qcow2DiscardRegion) next;
} Qcow2DiscardRegion;
-typedef struct BDRVQcowState {
+typedef uint64_t Qcow2GetRefcountFunc(const void *refcount_array,
+ uint64_t index);
+typedef void Qcow2SetRefcountFunc(void *refcount_array,
+ uint64_t index, uint64_t value);
+
+typedef struct BDRVQcow2State {
int cluster_bits;
int cluster_size;
int cluster_sectors;
@@ -231,6 +240,8 @@
Qcow2Cache* l2_table_cache;
Qcow2Cache* refcount_block_cache;
+ QEMUTimer *cache_clean_timer;
+ unsigned cache_clean_interval;
uint8_t *cluster_cache;
uint8_t *cluster_data;
@@ -245,10 +256,8 @@
CoMutex lock;
- uint32_t crypt_method; /* current crypt method, 0 if no key yet */
+ QCryptoCipher *cipher; /* current cipher, NULL if no key yet */
uint32_t crypt_method_header;
- AES_KEY aes_encrypt_key;
- AES_KEY aes_decrypt_key;
uint64_t snapshots_offset;
int snapshots_size;
unsigned int nb_snapshots;
@@ -258,6 +267,11 @@
int qcow_version;
bool use_lazy_refcounts;
int refcount_order;
+ int refcount_bits;
+ uint64_t refcount_max;
+
+ Qcow2GetRefcountFunc *get_refcount;
+ Qcow2SetRefcountFunc *set_refcount;
bool discard_passthrough[QCOW2_DISCARD_MAX];
@@ -273,20 +287,13 @@
QLIST_HEAD(, Qcow2UnknownHeaderExtension) unknown_header_ext;
QTAILQ_HEAD (, Qcow2DiscardRegion) discards;
bool cache_discards;
-} BDRVQcowState;
-/* XXX: use std qcow open function ? */
-typedef struct QCowCreateState {
- int cluster_size;
- int cluster_bits;
- uint16_t *refcount_block;
- uint64_t *refcount_table;
- int64_t l1_table_offset;
- int64_t refcount_table_offset;
- int64_t refcount_block_offset;
-} QCowCreateState;
-
-struct QCowAIOCB;
+ /* Backing file path and format as stored in the image (this is not the
+ * effective path/format, which may be the result of a runtime option
+ * override) */
+ char *image_backing_file;
+ char *image_backing_format;
+} BDRVQcow2State;
typedef struct Qcow2COWRegion {
/**
@@ -295,8 +302,8 @@
*/
uint64_t offset;
- /** Number of sectors to copy */
- int nb_sectors;
+ /** Number of bytes to copy */
+ int nb_bytes;
} Qcow2COWRegion;
/**
@@ -311,12 +318,6 @@
/** Host offset of the first newly allocated cluster */
uint64_t alloc_offset;
- /**
- * Number of sectors from the start of the first allocated cluster to
- * the end of the (possibly shortened) request
- */
- int nb_available;
-
/** Number of newly allocated clusters */
int nb_clusters;
@@ -396,28 +397,28 @@
#define REFT_OFFSET_MASK 0xfffffffffffffe00ULL
-static inline int64_t start_of_cluster(BDRVQcowState *s, int64_t offset)
+static inline int64_t start_of_cluster(BDRVQcow2State *s, int64_t offset)
{
return offset & ~(s->cluster_size - 1);
}
-static inline int64_t offset_into_cluster(BDRVQcowState *s, int64_t offset)
+static inline int64_t offset_into_cluster(BDRVQcow2State *s, int64_t offset)
{
return offset & (s->cluster_size - 1);
}
-static inline int size_to_clusters(BDRVQcowState *s, int64_t size)
+static inline uint64_t size_to_clusters(BDRVQcow2State *s, uint64_t size)
{
return (size + (s->cluster_size - 1)) >> s->cluster_bits;
}
-static inline int64_t size_to_l1(BDRVQcowState *s, int64_t size)
+static inline int64_t size_to_l1(BDRVQcow2State *s, int64_t size)
{
int shift = s->cluster_bits + s->l2_bits;
return (size + (1ULL << shift) - 1) >> shift;
}
-static inline int offset_to_l2_index(BDRVQcowState *s, int64_t offset)
+static inline int offset_to_l2_index(BDRVQcow2State *s, int64_t offset)
{
return (offset >> s->cluster_bits) & (s->l2_size - 1);
}
@@ -428,12 +429,12 @@
return offset;
}
-static inline int64_t qcow2_vm_state_offset(BDRVQcowState *s)
+static inline int64_t qcow2_vm_state_offset(BDRVQcow2State *s)
{
return (int64_t)s->l1_vm_state_index << (s->cluster_bits + s->l2_bits);
}
-static inline uint64_t qcow2_max_refcount_clusters(BDRVQcowState *s)
+static inline uint64_t qcow2_max_refcount_clusters(BDRVQcow2State *s)
{
return QCOW_MAX_REFTABLE_SIZE >> s->cluster_bits;
}
@@ -452,7 +453,7 @@
}
/* Check whether refcounts are eager or lazy */
-static inline bool qcow2_need_accurate_refcounts(BDRVQcowState *s)
+static inline bool qcow2_need_accurate_refcounts(BDRVQcow2State *s)
{
return !(s->incompatible_features & QCOW2_INCOMPAT_DIRTY);
}
@@ -464,8 +465,12 @@
static inline uint64_t l2meta_cow_end(QCowL2Meta *m)
{
- return m->offset + m->cow_end.offset
- + (m->cow_end.nb_sectors << BDRV_SECTOR_BITS);
+ return m->offset + m->cow_end.offset + m->cow_end.nb_bytes;
+}
+
+static inline uint64_t refcount_diff(uint64_t r1, uint64_t r2)
+{
+ return r1 > r2 ? r1 - r2 : r2 - r1;
}
// FIXME Need qcow2_ prefix to global functions
@@ -487,14 +492,16 @@
int qcow2_refcount_init(BlockDriverState *bs);
void qcow2_refcount_close(BlockDriverState *bs);
-int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index);
+int qcow2_get_refcount(BlockDriverState *bs, int64_t cluster_index,
+ uint64_t *refcount);
int qcow2_update_cluster_refcount(BlockDriverState *bs, int64_t cluster_index,
- int addend, enum qcow2_discard_type type);
+ uint64_t addend, bool decrease,
+ enum qcow2_discard_type type);
int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
-int qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int nb_clusters);
+int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters);
int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size);
void qcow2_free_clusters(BlockDriverState *bs,
int64_t offset, int64_t size,
@@ -515,21 +522,25 @@
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
int64_t size);
+int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, Error **errp);
+
/* qcow2-cluster.c functions */
int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
bool exact_size);
int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index);
void qcow2_l2_cache_reset(BlockDriverState *bs);
int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset);
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
- uint8_t *out_buf, const uint8_t *in_buf,
- int nb_sectors, int enc,
- const AES_KEY *key);
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
+ uint8_t *out_buf, const uint8_t *in_buf,
+ int nb_sectors, bool enc, Error **errp);
int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *cluster_offset);
+ unsigned int *bytes, uint64_t *cluster_offset);
int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
- int *num, uint64_t *host_offset, QCowL2Meta **m);
+ unsigned int *bytes, uint64_t *host_offset,
+ QCowL2Meta **m);
uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
uint64_t offset,
int compressed_size);
@@ -540,7 +551,8 @@
int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors);
int qcow2_expand_zero_clusters(BlockDriverState *bs,
- BlockDriverAmendStatusCB *status_cb);
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque);
/* qcow2-snapshot.c functions */
int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info);
@@ -562,18 +574,21 @@
Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
-void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table);
+void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
+ void *table);
int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c);
+int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
Qcow2Cache *dependency);
void qcow2_cache_depends_on_flush(Qcow2Cache *c);
+void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c);
int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table);
int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
void **table);
-int qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
+void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table);
#endif
diff --git a/block/qed-check.c b/block/qed-check.c
index 36ecd29..dcd4f03 100644
--- a/block/qed-check.c
+++ b/block/qed-check.c
@@ -11,6 +11,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
typedef struct {
@@ -233,8 +234,7 @@
}
check.result->bfi.total_clusters =
- (s->header.image_size + s->header.cluster_size - 1) /
- s->header.cluster_size;
+ DIV_ROUND_UP(s->header.image_size, s->header.cluster_size);
ret = qed_check_l1_table(&check, s->l1_table);
if (ret == 0) {
/* Only check for leaks if entire image was scanned successfully */
diff --git a/block/qed-cluster.c b/block/qed-cluster.c
index f64b2af..c24e756 100644
--- a/block/qed-cluster.c
+++ b/block/qed-cluster.c
@@ -12,6 +12,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
/**
diff --git a/block/qed-gencb.c b/block/qed-gencb.c
index b817a8b..faf8ecc 100644
--- a/block/qed-gencb.c
+++ b/block/qed-gencb.c
@@ -11,6 +11,7 @@
*
*/
+#include "qemu/osdep.h"
#include "qed.h"
void *gencb_alloc(size_t len, BlockCompletionFunc *cb, void *opaque)
diff --git a/block/qed-l2-cache.c b/block/qed-l2-cache.c
index e9b2aae..5cba794 100644
--- a/block/qed-l2-cache.c
+++ b/block/qed-l2-cache.c
@@ -50,6 +50,7 @@
* table will be deleted in favor of the existing cache entry.
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "qed.h"
diff --git a/block/qed-table.c b/block/qed-table.c
index 513aa87..1a731df 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -12,9 +12,11 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
#include "qed.h"
+#include "qemu/bswap.h"
typedef struct {
GenericCB gencb;
diff --git a/block/qed.c b/block/qed.c
index 80f18d8..426f3cb 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -12,11 +12,15 @@
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/timer.h"
+#include "qemu/bswap.h"
#include "trace.h"
#include "qed.h"
#include "qapi/qmp/qerror.h"
#include "migration/migration.h"
+#include "sysemu/block-backend.h"
static const AIOCBInfo qed_aiocb_info = {
.aiocb_size = sizeof(QEDAIOCB),
@@ -139,8 +143,7 @@
* them, and write back.
*/
- int nsectors = (sizeof(QEDHeader) + BDRV_SECTOR_SIZE - 1) /
- BDRV_SECTOR_SIZE;
+ int nsectors = DIV_ROUND_UP(sizeof(QEDHeader), BDRV_SECTOR_SIZE);
size_t len = nsectors * BDRV_SECTOR_SIZE;
QEDWriteHeaderCB *write_header_cb = gencb_alloc(sizeof(*write_header_cb),
cb, opaque);
@@ -215,7 +218,7 @@
*
* The string is NUL-terminated.
*/
-static int qed_read_string(BlockDriverState *file, uint64_t offset, size_t n,
+static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
char *buf, size_t buflen)
{
int ret;
@@ -344,7 +347,7 @@
* migration.
*/
timer_mod(s->need_check_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- get_ticks_per_sec() * QED_NEED_CHECK_TIMEOUT);
+ NANOSECONDS_PER_SECOND * QED_NEED_CHECK_TIMEOUT);
}
/* It's okay to call this multiple times or when no timer is started */
@@ -354,12 +357,6 @@
timer_del(s->need_check_timer);
}
-static void bdrv_qed_rebind(BlockDriverState *bs)
-{
- BDRVQEDState *s = bs->opaque;
- s->bs = bs;
-}
-
static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
{
BDRVQEDState *s = bs->opaque;
@@ -404,11 +401,8 @@
}
if (s->header.features & ~QED_FEATURE_MASK) {
/* image uses unsupported feature bits */
- char buf[64];
- snprintf(buf, sizeof(buf), "%" PRIx64,
- s->header.features & ~QED_FEATURE_MASK);
- error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_name(bs), "QED", buf);
+ error_setg(errp, "Unsupported QED features: %" PRIx64,
+ s->header.features & ~QED_FEATURE_MASK);
return -ENOTSUP;
}
if (!qed_is_cluster_size_valid(s->header.cluster_size)) {
@@ -416,7 +410,7 @@
}
/* Round down file size to the last cluster */
- file_size = bdrv_getlength(bs->file);
+ file_size = bdrv_getlength(bs->file->bs);
if (file_size < 0) {
return file_size;
}
@@ -436,9 +430,14 @@
s->table_nelems = (s->header.cluster_size * s->header.table_size) /
sizeof(uint64_t);
- s->l2_shift = ffs(s->header.cluster_size) - 1;
+ s->l2_shift = ctz32(s->header.cluster_size);
s->l2_mask = s->table_nelems - 1;
- s->l1_shift = s->l2_shift + ffs(s->table_nelems) - 1;
+ s->l1_shift = s->l2_shift + ctz32(s->table_nelems);
+
+ /* Header size calculation must not overflow uint32_t */
+ if (s->header.header_size > UINT32_MAX / s->header.cluster_size) {
+ return -EINVAL;
+ }
if ((s->header.features & QED_F_BACKING_FILE)) {
if ((uint64_t)s->header.backing_filename_offset +
@@ -466,7 +465,7 @@
* feature is no longer valid.
*/
if ((s->header.autoclear_features & ~QED_AUTOCLEAR_FEATURE_MASK) != 0 &&
- !bdrv_is_read_only(bs->file) && !(flags & BDRV_O_INCOMING)) {
+ !bdrv_is_read_only(bs->file->bs) && !(flags & BDRV_O_INACTIVE)) {
s->header.autoclear_features &= QED_AUTOCLEAR_FEATURE_MASK;
ret = qed_write_header_sync(s);
@@ -475,7 +474,7 @@
}
/* From here on only known autoclear feature bits are valid */
- bdrv_flush(bs->file);
+ bdrv_flush(bs->file->bs);
}
s->l1_table = qed_alloc_table(s);
@@ -493,8 +492,8 @@
* potentially inconsistent images to be opened read-only. This can
* aid data recovery from an otherwise inconsistent image.
*/
- if (!bdrv_is_read_only(bs->file) &&
- !(flags & BDRV_O_INCOMING)) {
+ if (!bdrv_is_read_only(bs->file->bs) &&
+ !(flags & BDRV_O_INACTIVE)) {
BdrvCheckResult result = {0};
ret = qed_check(s, &result, true);
@@ -518,7 +517,7 @@
{
BDRVQEDState *s = bs->opaque;
- bs->bl.write_zeroes_alignment = s->header.cluster_size >> BDRV_SECTOR_BITS;
+ bs->bl.pwrite_zeroes_alignment = s->header.cluster_size;
}
/* We have nothing to do for QED reopen, stubs just return
@@ -536,7 +535,7 @@
bdrv_qed_detach_aio_context(bs);
/* Ensure writes reach stable storage */
- bdrv_flush(bs->file);
+ bdrv_flush(bs->file->bs);
/* Clean shutdown, no check required on next open */
if (s->header.features & QED_F_NEED_CHECK) {
@@ -568,7 +567,7 @@
size_t l1_size = header.cluster_size * header.table_size;
Error *local_err = NULL;
int ret = 0;
- BlockDriverState *bs;
+ BlockBackend *blk;
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
@@ -576,17 +575,17 @@
return ret;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL, NULL,
- &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
- return ret;
+ return -EIO;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* File must start empty and grow, check truncate is supported */
- ret = bdrv_truncate(bs, 0);
+ ret = blk_truncate(blk, 0);
if (ret < 0) {
goto out;
}
@@ -602,18 +601,18 @@
}
qed_header_cpu_to_le(&header, &le_header);
- ret = bdrv_pwrite(bs, 0, &le_header, sizeof(le_header));
+ ret = blk_pwrite(blk, 0, &le_header, sizeof(le_header), 0);
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, sizeof(le_header), backing_file,
- header.backing_filename_size);
+ ret = blk_pwrite(blk, sizeof(le_header), backing_file,
+ header.backing_filename_size, 0);
if (ret < 0) {
goto out;
}
l1_table = g_malloc0(l1_size);
- ret = bdrv_pwrite(bs, header.l1_table_offset, l1_table, l1_size);
+ ret = blk_pwrite(blk, header.l1_table_offset, l1_table, l1_size, 0);
if (ret < 0) {
goto out;
}
@@ -621,7 +620,7 @@
ret = 0; /* success */
out:
g_free(l1_table);
- bdrv_unref(bs);
+ blk_unref(blk);
return ret;
}
@@ -681,6 +680,7 @@
uint64_t pos;
int64_t status;
int *pnum;
+ BlockDriverState **file;
} QEDIsAllocatedCB;
static void qed_is_allocated_cb(void *opaque, int ret, uint64_t offset, size_t len)
@@ -692,6 +692,7 @@
case QED_CLUSTER_FOUND:
offset |= qed_offset_into_cluster(s, cb->pos);
cb->status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+ *cb->file = cb->bs->file->bs;
break;
case QED_CLUSTER_ZERO:
cb->status = BDRV_BLOCK_ZERO;
@@ -707,13 +708,14 @@
}
if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
+ qemu_coroutine_enter(cb->co);
}
}
static int64_t coroutine_fn bdrv_qed_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
BDRVQEDState *s = bs->opaque;
size_t len = (size_t)nb_sectors * BDRV_SECTOR_SIZE;
@@ -722,6 +724,7 @@
.pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE,
.status = BDRV_BLOCK_OFFSET_MASK,
.pnum = pnum,
+ .file = file,
};
QEDRequest request = { .l2_table = NULL };
@@ -767,8 +770,8 @@
/* If there is a backing file, get its length. Treat the absence of a
* backing file like a zero length backing file.
*/
- if (s->bs->backing_hd) {
- int64_t l = bdrv_getlength(s->bs->backing_hd);
+ if (s->bs->backing) {
+ int64_t l = bdrv_getlength(s->bs->backing->bs);
if (l < 0) {
cb(opaque, l);
return;
@@ -797,7 +800,7 @@
qemu_iovec_concat(*backing_qiov, qiov, 0, size);
BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
- bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
+ bdrv_aio_readv(s->bs->backing, pos / BDRV_SECTOR_SIZE,
*backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
}
@@ -1050,7 +1053,7 @@
QEDAIOCB *acb = opaque;
BDRVQEDState *s = acb_to_s(acb);
- if (!bdrv_aio_flush(s->bs->file, qed_aio_write_l2_update_cb, opaque)) {
+ if (!bdrv_aio_flush(s->bs->file->bs, qed_aio_write_l2_update_cb, opaque)) {
qed_aio_complete(acb, -EIO);
}
}
@@ -1076,7 +1079,7 @@
if (acb->find_cluster_ret == QED_CLUSTER_FOUND) {
next_fn = qed_aio_next_io;
} else {
- if (s->bs->backing_hd) {
+ if (s->bs->backing) {
next_fn = qed_aio_write_flush_before_l2_update;
} else {
next_fn = qed_aio_write_l2_update_cb;
@@ -1134,7 +1137,7 @@
static bool qed_should_set_need_check(BDRVQEDState *s)
{
/* The flush before L2 update path ensures consistency */
- if (s->bs->backing_hd) {
+ if (s->bs->backing) {
return false;
}
@@ -1415,21 +1418,21 @@
bool done;
} QEDWriteZeroesCB;
-static void coroutine_fn qed_co_write_zeroes_cb(void *opaque, int ret)
+static void coroutine_fn qed_co_pwrite_zeroes_cb(void *opaque, int ret)
{
QEDWriteZeroesCB *cb = opaque;
cb->done = true;
cb->ret = ret;
if (cb->co) {
- qemu_coroutine_enter(cb->co, NULL);
+ qemu_coroutine_enter(cb->co);
}
}
-static int coroutine_fn bdrv_qed_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- BdrvRequestFlags flags)
+static int coroutine_fn bdrv_qed_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset,
+ int count,
+ BdrvRequestFlags flags)
{
BlockAIOCB *blockacb;
BDRVQEDState *s = bs->opaque;
@@ -1437,25 +1440,22 @@
QEMUIOVector qiov;
struct iovec iov;
- /* Refuse if there are untouched backing file sectors */
- if (bs->backing_hd) {
- if (qed_offset_into_cluster(s, sector_num * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
- if (qed_offset_into_cluster(s, nb_sectors * BDRV_SECTOR_SIZE) != 0) {
- return -ENOTSUP;
- }
+ /* Fall back if the request is not aligned */
+ if (qed_offset_into_cluster(s, offset) ||
+ qed_offset_into_cluster(s, count)) {
+ return -ENOTSUP;
}
/* Zero writes start without an I/O buffer. If a buffer becomes necessary
* then it will be allocated during request processing.
*/
- iov.iov_base = NULL,
- iov.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
+ iov.iov_base = NULL;
+ iov.iov_len = count;
qemu_iovec_init_external(&qiov, &iov, 1);
- blockacb = qed_aio_setup(bs, sector_num, &qiov, nb_sectors,
- qed_co_write_zeroes_cb, &cb,
+ blockacb = qed_aio_setup(bs, offset >> BDRV_SECTOR_BITS, &qiov,
+ count >> BDRV_SECTOR_BITS,
+ qed_co_pwrite_zeroes_cb, &cb,
QED_AIOCB_WRITE | QED_AIOCB_ZERO);
if (!blockacb) {
return -EIO;
@@ -1591,18 +1591,11 @@
bdrv_qed_close(bs);
- bdrv_invalidate_cache(bs->file, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
-
memset(s, 0, sizeof(BDRVQEDState));
ret = bdrv_qed_open(bs, NULL, bs->open_flags, &local_err);
if (local_err) {
- error_setg(errp, "Could not reopen qed layer: %s",
- error_get_pretty(local_err));
- error_free(local_err);
+ error_propagate(errp, local_err);
+ error_prepend(errp, "Could not reopen qed layer: ");
return;
} else if (ret < 0) {
error_setg_errno(errp, -ret, "Could not reopen qed layer");
@@ -1659,7 +1652,6 @@
.supports_backing = true,
.bdrv_probe = bdrv_qed_probe,
- .bdrv_rebind = bdrv_qed_rebind,
.bdrv_open = bdrv_qed_open,
.bdrv_close = bdrv_qed_close,
.bdrv_reopen_prepare = bdrv_qed_reopen_prepare,
@@ -1668,7 +1660,7 @@
.bdrv_co_get_block_status = bdrv_qed_co_get_block_status,
.bdrv_aio_readv = bdrv_qed_aio_readv,
.bdrv_aio_writev = bdrv_qed_aio_writev,
- .bdrv_co_write_zeroes = bdrv_qed_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = bdrv_qed_co_pwrite_zeroes,
.bdrv_truncate = bdrv_qed_truncate,
.bdrv_getlength = bdrv_qed_getlength,
.bdrv_get_info = bdrv_qed_get_info,
diff --git a/block/qed.h b/block/qed.h
index d3934a0..22b3198 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -16,6 +16,7 @@
#define BLOCK_QED_H
#include "block/block_int.h"
+#include "qemu/cutils.h"
/* The layout of a QED file is as follows:
*
@@ -133,7 +134,6 @@
int bh_ret; /* final return status for completion bh */
QSIMPLEQ_ENTRY(QEDAIOCB) next; /* next request */
int flags; /* QED_AIOCB_* bits ORed together */
- bool *finished; /* signal for cancel completion */
uint64_t end_pos; /* request end on block device, in bytes */
/* User scatter-gather list */
diff --git a/block/quorum.c b/block/quorum.c
index 437b122..9cf876f 100644
--- a/block/quorum.c
+++ b/block/quorum.c
@@ -13,16 +13,18 @@
* See the COPYING file in the top-level directory.
*/
-#include <gnutls/gnutls.h>
-#include <gnutls/crypto.h>
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "block/block_int.h"
#include "qapi/qmp/qbool.h"
#include "qapi/qmp/qdict.h"
+#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qlist.h"
#include "qapi/qmp/qstring.h"
#include "qapi-event.h"
+#include "crypto/hash.h"
#define HASH_LENGTH 32
@@ -33,7 +35,7 @@
/* This union holds a vote hash value */
typedef union QuorumVoteValue {
- char h[HASH_LENGTH]; /* SHA-256 hash */
+ uint8_t h[HASH_LENGTH]; /* SHA-256 hash */
int64_t l; /* simpler 64 bits hash */
} QuorumVoteValue;
@@ -64,8 +66,11 @@
/* the following structure holds the state of one quorum instance */
typedef struct BDRVQuorumState {
- BlockDriverState **bs; /* children BlockDriverStates */
+ BdrvChild **children; /* children BlockDriverStates */
int num_children; /* children count */
+ unsigned next_child_index; /* the index of the next child that should
+ * be added
+ */
int threshold; /* if less than threshold children reads gave the
* same result a quorum error occurs.
*/
@@ -214,22 +219,21 @@
return acb;
}
-static void quorum_report_bad(QuorumAIOCB *acb, char *node_name, int ret)
+static void quorum_report_bad(QuorumOpType type, uint64_t sector_num,
+ int nb_sectors, char *node_name, int ret)
{
const char *msg = NULL;
if (ret < 0) {
msg = strerror(-ret);
}
- qapi_event_send_quorum_report_bad(!!msg, msg, node_name,
- acb->sector_num, acb->nb_sectors, &error_abort);
+
+ qapi_event_send_quorum_report_bad(type, !!msg, msg, node_name,
+ sector_num, nb_sectors, &error_abort);
}
static void quorum_report_failure(QuorumAIOCB *acb)
{
- const char *reference = bdrv_get_device_name(acb->common.bs)[0] ?
- bdrv_get_device_name(acb->common.bs) :
- acb->common.bs->node_name;
-
+ const char *reference = bdrv_get_device_or_node_name(acb->common.bs);
qapi_event_send_quorum_failure(reference, acb->sector_num,
acb->nb_sectors, &error_abort);
}
@@ -286,9 +290,19 @@
BDRVQuorumState *s = acb->common.bs->opaque;
bool rewrite = false;
+ if (ret == 0) {
+ acb->success_count++;
+ } else {
+ QuorumOpType type;
+ type = acb->is_read ? QUORUM_OP_TYPE_READ : QUORUM_OP_TYPE_WRITE;
+ quorum_report_bad(type, acb->sector_num, acb->nb_sectors,
+ sacb->aiocb->bs->node_name, ret);
+ }
+
if (acb->is_read && s->read_pattern == QUORUM_READ_PATTERN_FIFO) {
/* We try to read next child in FIFO order if we fail to read */
- if (ret < 0 && ++acb->child_iter < s->num_children) {
+ if (ret < 0 && (acb->child_iter + 1) < s->num_children) {
+ acb->child_iter++;
read_fifo_child(acb);
return;
}
@@ -303,11 +317,6 @@
sacb->ret = ret;
acb->count++;
- if (ret == 0) {
- acb->success_count++;
- } else {
- quorum_report_bad(acb, sacb->aiocb->bs->node_name, ret);
- }
assert(acb->count <= s->num_children);
assert(acb->success_count <= s->num_children);
if (acb->count < s->num_children) {
@@ -339,7 +348,9 @@
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- quorum_report_bad(acb, s->bs[item->index]->node_name, 0);
+ quorum_report_bad(QUORUM_OP_TYPE_READ, acb->sector_num,
+ acb->nb_sectors,
+ s->children[item->index]->bs->node_name, 0);
}
}
}
@@ -372,8 +383,9 @@
continue;
}
QLIST_FOREACH(item, &version->items, next) {
- bdrv_aio_writev(s->bs[item->index], acb->sector_num, acb->qiov,
- acb->nb_sectors, quorum_rewrite_aio_cb, acb);
+ bdrv_aio_writev(s->children[item->index], acb->sector_num,
+ acb->qiov, acb->nb_sectors, quorum_rewrite_aio_cb,
+ acb);
}
}
@@ -430,25 +442,21 @@
static int quorum_compute_hash(QuorumAIOCB *acb, int i, QuorumVoteValue *hash)
{
- int j, ret;
- gnutls_hash_hd_t dig;
QEMUIOVector *qiov = &acb->qcrs[i].qiov;
+ size_t len = sizeof(hash->h);
+ uint8_t *data = hash->h;
- ret = gnutls_hash_init(&dig, GNUTLS_DIG_SHA256);
-
- if (ret < 0) {
- return ret;
+ /* XXX - would be nice if we could pass in the Error **
+ * and propagate that back, but this quorum code is
+ * restricted to just errno values currently */
+ if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256,
+ qiov->iov, qiov->niov,
+ &data, &len,
+ NULL) < 0) {
+ return -EINVAL;
}
- for (j = 0; j < qiov->niov; j++) {
- ret = gnutls_hash(dig, qiov->iov[j].iov_base, qiov->iov[j].iov_len);
- if (ret < 0) {
- break;
- }
- }
-
- gnutls_hash_deinit(dig, (void *) hash);
- return ret;
+ return 0;
}
static QuorumVoteVersion *quorum_get_vote_winner(QuorumVotes *votes)
@@ -646,14 +654,15 @@
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].buf = qemu_blockalign(s->bs[i], acb->qiov->size);
+ acb->qcrs[i].buf = qemu_blockalign(s->children[i]->bs, acb->qiov->size);
qemu_iovec_init(&acb->qcrs[i].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[i].qiov, acb->qiov, acb->qcrs[i].buf);
}
for (i = 0; i < s->num_children; i++) {
- bdrv_aio_readv(s->bs[i], acb->sector_num, &acb->qcrs[i].qiov,
- acb->nb_sectors, quorum_aio_cb, &acb->qcrs[i]);
+ acb->qcrs[i].aiocb = bdrv_aio_readv(s->children[i], acb->sector_num,
+ &acb->qcrs[i].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[i]);
}
return &acb->common;
@@ -663,14 +672,15 @@
{
BDRVQuorumState *s = acb->common.bs->opaque;
- acb->qcrs[acb->child_iter].buf = qemu_blockalign(s->bs[acb->child_iter],
- acb->qiov->size);
+ acb->qcrs[acb->child_iter].buf =
+ qemu_blockalign(s->children[acb->child_iter]->bs, acb->qiov->size);
qemu_iovec_init(&acb->qcrs[acb->child_iter].qiov, acb->qiov->niov);
qemu_iovec_clone(&acb->qcrs[acb->child_iter].qiov, acb->qiov,
acb->qcrs[acb->child_iter].buf);
- bdrv_aio_readv(s->bs[acb->child_iter], acb->sector_num,
- &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
- quorum_aio_cb, &acb->qcrs[acb->child_iter]);
+ acb->qcrs[acb->child_iter].aiocb =
+ bdrv_aio_readv(s->children[acb->child_iter], acb->sector_num,
+ &acb->qcrs[acb->child_iter].qiov, acb->nb_sectors,
+ quorum_aio_cb, &acb->qcrs[acb->child_iter]);
return &acb->common;
}
@@ -709,8 +719,8 @@
int i;
for (i = 0; i < s->num_children; i++) {
- acb->qcrs[i].aiocb = bdrv_aio_writev(s->bs[i], sector_num, qiov,
- nb_sectors, &quorum_aio_cb,
+ acb->qcrs[i].aiocb = bdrv_aio_writev(s->children[i], sector_num,
+ qiov, nb_sectors, &quorum_aio_cb,
&acb->qcrs[i]);
}
@@ -724,12 +734,12 @@
int i;
/* check that all file have the same length */
- result = bdrv_getlength(s->bs[0]);
+ result = bdrv_getlength(s->children[0]->bs);
if (result < 0) {
return result;
}
for (i = 1; i < s->num_children; i++) {
- int64_t value = bdrv_getlength(s->bs[i]);
+ int64_t value = bdrv_getlength(s->children[i]->bs);
if (value < 0) {
return value;
}
@@ -741,21 +751,6 @@
return result;
}
-static void quorum_invalidate_cache(BlockDriverState *bs, Error **errp)
-{
- BDRVQuorumState *s = bs->opaque;
- Error *local_err = NULL;
- int i;
-
- for (i = 0; i < s->num_children; i++) {
- bdrv_invalidate_cache(s->bs[i], &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- return;
- }
- }
-}
-
static coroutine_fn int quorum_co_flush(BlockDriverState *bs)
{
BDRVQuorumState *s = bs->opaque;
@@ -764,19 +759,30 @@
QuorumVoteValue result_value;
int i;
int result = 0;
+ int success_count = 0;
QLIST_INIT(&error_votes.vote_list);
error_votes.compare = quorum_64bits_compare;
for (i = 0; i < s->num_children; i++) {
- result = bdrv_co_flush(s->bs[i]);
- result_value.l = result;
- quorum_count_vote(&error_votes, &result_value, i);
+ result = bdrv_co_flush(s->children[i]->bs);
+ if (result) {
+ quorum_report_bad(QUORUM_OP_TYPE_FLUSH, 0,
+ bdrv_nb_sectors(s->children[i]->bs),
+ s->children[i]->bs->node_name, result);
+ result_value.l = result;
+ quorum_count_vote(&error_votes, &result_value, i);
+ } else {
+ success_count++;
+ }
}
- winner = quorum_get_vote_winner(&error_votes);
- result = winner->value.l;
-
+ if (success_count >= s->threshold) {
+ result = 0;
+ } else {
+ winner = quorum_get_vote_winner(&error_votes);
+ result = winner->value.l;
+ }
quorum_free_vote_list(&error_votes);
return result;
@@ -789,7 +795,7 @@
int i;
for (i = 0; i < s->num_children; i++) {
- bool perm = bdrv_recurse_is_first_non_filter(s->bs[i],
+ bool perm = bdrv_recurse_is_first_non_filter(s->children[i]->bs,
candidate);
if (perm) {
return true;
@@ -803,8 +809,8 @@
{
if (threshold < 1) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE,
- "vote-threshold", "value >= 1");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ "vote-threshold", "value >= 1");
return -ERANGE;
}
@@ -853,7 +859,7 @@
return QUORUM_READ_PATTERN_QUORUM;
}
- for (i = 0; i < QUORUM_READ_PATTERN_MAX; i++) {
+ for (i = 0; i < QUORUM_READ_PATTERN__MAX; i++) {
if (!strcmp(opt, QuorumReadPattern_lookup[i])) {
return i;
}
@@ -869,28 +875,21 @@
Error *local_err = NULL;
QemuOpts *opts = NULL;
bool *opened;
- QDict *sub = NULL;
- QList *list = NULL;
- const QListEntry *lentry;
int i;
int ret = 0;
qdict_flatten(options);
- qdict_extract_subqdict(options, &sub, "children.");
- qdict_array_split(sub, &list);
- if (qdict_size(sub)) {
- error_setg(&local_err, "Invalid option children.%s",
- qdict_first(sub)->key);
+ /* count how many different children are present */
+ s->num_children = qdict_array_entries(options, "children.");
+ if (s->num_children < 0) {
+ error_setg(&local_err, "Option children is not a valid array");
ret = -EINVAL;
goto exit;
}
-
- /* count how many different children are present */
- s->num_children = qlist_size(list);
- if (s->num_children < 2) {
+ if (s->num_children < 1) {
error_setg(&local_err,
- "Number of provided children must be greater than 1");
+ "Number of provided children must be 1 or more");
ret = -EINVAL;
goto exit;
}
@@ -903,6 +902,12 @@
}
s->threshold = qemu_opt_get_number(opts, QUORUM_OPT_VOTE_THRESHOLD, 0);
+ /* and validate it against s->num_children */
+ ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
+ if (ret < 0) {
+ goto exit;
+ }
+
ret = parse_read_pattern(qemu_opt_get(opts, QUORUM_OPT_READ_PATTERN));
if (ret < 0) {
error_setg(&local_err, "Please set read-pattern as fifo or quorum");
@@ -911,12 +916,6 @@
s->read_pattern = ret;
if (s->read_pattern == QUORUM_READ_PATTERN_QUORUM) {
- /* and validate it against s->num_children */
- ret = quorum_valid_threshold(s->threshold, s->num_children, &local_err);
- if (ret < 0) {
- goto exit;
- }
-
/* is the driver in blkverify mode */
if (qemu_opt_get_bool(opts, QUORUM_OPT_BLKVERIFY, false) &&
s->num_children == 2 && s->threshold == 2) {
@@ -936,43 +935,25 @@
}
}
- /* allocate the children BlockDriverState array */
- s->bs = g_new0(BlockDriverState *, s->num_children);
+ /* allocate the children array */
+ s->children = g_new0(BdrvChild *, s->num_children);
opened = g_new0(bool, s->num_children);
- for (i = 0, lentry = qlist_first(list); lentry;
- lentry = qlist_next(lentry), i++) {
- QDict *d;
- QString *string;
+ for (i = 0; i < s->num_children; i++) {
+ char indexstr[32];
+ ret = snprintf(indexstr, 32, "children.%d", i);
+ assert(ret < 32);
- switch (qobject_type(lentry->value))
- {
- /* List of options */
- case QTYPE_QDICT:
- d = qobject_to_qdict(lentry->value);
- QINCREF(d);
- ret = bdrv_open(&s->bs[i], NULL, NULL, d, flags, NULL,
- &local_err);
- break;
-
- /* QMP reference */
- case QTYPE_QSTRING:
- string = qobject_to_qstring(lentry->value);
- ret = bdrv_open(&s->bs[i], NULL, qstring_get_str(string), NULL,
- flags, NULL, &local_err);
- break;
-
- default:
- error_setg(&local_err, "Specification of child block device %i "
- "is invalid", i);
- ret = -EINVAL;
- }
-
- if (ret < 0) {
+ s->children[i] = bdrv_open_child(NULL, options, indexstr, bs,
+ &child_format, false, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
goto close_exit;
}
+
opened[i] = true;
}
+ s->next_child_index = s->num_children;
g_free(opened);
goto exit;
@@ -983,18 +964,14 @@
if (!opened[i]) {
continue;
}
- bdrv_unref(s->bs[i]);
+ bdrv_unref_child(bs, s->children[i]);
}
- g_free(s->bs);
+ g_free(s->children);
g_free(opened);
exit:
qemu_opts_del(opts);
/* propagate error */
- if (local_err) {
- error_propagate(errp, local_err);
- }
- QDECREF(list);
- QDECREF(sub);
+ error_propagate(errp, local_err);
return ret;
}
@@ -1004,34 +981,79 @@
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_unref(s->bs[i]);
+ bdrv_unref_child(bs, s->children[i]);
}
- g_free(s->bs);
+ g_free(s->children);
}
-static void quorum_detach_aio_context(BlockDriverState *bs)
+static void quorum_add_child(BlockDriverState *bs, BlockDriverState *child_bs,
+ Error **errp)
+{
+ BDRVQuorumState *s = bs->opaque;
+ BdrvChild *child;
+ char indexstr[32];
+ int ret;
+
+ assert(s->num_children <= INT_MAX / sizeof(BdrvChild *));
+ if (s->num_children == INT_MAX / sizeof(BdrvChild *) ||
+ s->next_child_index == UINT_MAX) {
+ error_setg(errp, "Too many children");
+ return;
+ }
+
+ ret = snprintf(indexstr, 32, "children.%u", s->next_child_index);
+ if (ret < 0 || ret >= 32) {
+ error_setg(errp, "cannot generate child name");
+ return;
+ }
+ s->next_child_index++;
+
+ bdrv_drained_begin(bs);
+
+ /* We can safely add the child now */
+ bdrv_ref(child_bs);
+ child = bdrv_attach_child(bs, child_bs, indexstr, &child_format);
+ s->children = g_renew(BdrvChild *, s->children, s->num_children + 1);
+ s->children[s->num_children++] = child;
+
+ bdrv_drained_end(bs);
+}
+
+static void quorum_del_child(BlockDriverState *bs, BdrvChild *child,
+ Error **errp)
{
BDRVQuorumState *s = bs->opaque;
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_detach_aio_context(s->bs[i]);
+ if (s->children[i] == child) {
+ break;
+ }
}
+
+ /* we have checked it in bdrv_del_child() */
+ assert(i < s->num_children);
+
+ if (s->num_children <= s->threshold) {
+ error_setg(errp,
+ "The number of children cannot be lower than the vote threshold %d",
+ s->threshold);
+ return;
+ }
+
+ bdrv_drained_begin(bs);
+
+ /* We can safely remove this child now */
+ memmove(&s->children[i], &s->children[i + 1],
+ (s->num_children - i - 1) * sizeof(BdrvChild *));
+ s->children = g_renew(BdrvChild *, s->children, --s->num_children);
+ bdrv_unref_child(bs, child);
+
+ bdrv_drained_end(bs);
}
-static void quorum_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVQuorumState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_children; i++) {
- bdrv_attach_aio_context(s->bs[i], new_context);
- }
-}
-
-static void quorum_refresh_filename(BlockDriverState *bs)
+static void quorum_refresh_filename(BlockDriverState *bs, QDict *options)
{
BDRVQuorumState *s = bs->opaque;
QDict *opts;
@@ -1039,16 +1061,17 @@
int i;
for (i = 0; i < s->num_children; i++) {
- bdrv_refresh_filename(s->bs[i]);
- if (!s->bs[i]->full_open_options) {
+ bdrv_refresh_filename(s->children[i]->bs);
+ if (!s->children[i]->bs->full_open_options) {
return;
}
}
children = qlist_new();
for (i = 0; i < s->num_children; i++) {
- QINCREF(s->bs[i]->full_open_options);
- qlist_append_obj(children, QOBJECT(s->bs[i]->full_open_options));
+ QINCREF(s->children[i]->bs->full_open_options);
+ qlist_append_obj(children,
+ QOBJECT(s->children[i]->bs->full_open_options));
}
opts = qdict_new();
@@ -1056,9 +1079,9 @@
qdict_put_obj(opts, QUORUM_OPT_VOTE_THRESHOLD,
QOBJECT(qint_from_int(s->threshold)));
qdict_put_obj(opts, QUORUM_OPT_BLKVERIFY,
- QOBJECT(qbool_from_int(s->is_blkverify)));
+ QOBJECT(qbool_from_bool(s->is_blkverify)));
qdict_put_obj(opts, QUORUM_OPT_REWRITE,
- QOBJECT(qbool_from_int(s->rewrite_corrupted)));
+ QOBJECT(qbool_from_bool(s->rewrite_corrupted)));
qdict_put_obj(opts, "children", QOBJECT(children));
bs->full_open_options = opts;
@@ -1080,10 +1103,9 @@
.bdrv_aio_readv = quorum_aio_readv,
.bdrv_aio_writev = quorum_aio_writev,
- .bdrv_invalidate_cache = quorum_invalidate_cache,
- .bdrv_detach_aio_context = quorum_detach_aio_context,
- .bdrv_attach_aio_context = quorum_attach_aio_context,
+ .bdrv_add_child = quorum_add_child,
+ .bdrv_del_child = quorum_del_child,
.is_filter = true,
.bdrv_recurse_is_first_non_filter = quorum_recurse_is_first_non_filter,
@@ -1091,6 +1113,10 @@
static void bdrv_quorum_init(void)
{
+ if (!qcrypto_hash_supports(QCRYPTO_HASH_ALG_SHA256)) {
+ /* SHA256 hash support is required for quorum device */
+ return;
+ }
bdrv_register(&bdrv_quorum);
}
diff --git a/block/raw-aio.h b/block/raw-aio.h
deleted file mode 100644
index 80681ce..0000000
--- a/block/raw-aio.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Declarations for AIO in the raw protocol
- *
- * Copyright IBM, Corp. 2008
- *
- * Authors:
- * Anthony Liguori <aliguori@us.ibm.com>
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- * Contributions after 2012-01-13 are licensed under the terms of the
- * GNU GPL, version 2 or (at your option) any later version.
- */
-#ifndef QEMU_RAW_AIO_H
-#define QEMU_RAW_AIO_H
-
-/* AIO request types */
-#define QEMU_AIO_READ 0x0001
-#define QEMU_AIO_WRITE 0x0002
-#define QEMU_AIO_IOCTL 0x0004
-#define QEMU_AIO_FLUSH 0x0008
-#define QEMU_AIO_DISCARD 0x0010
-#define QEMU_AIO_WRITE_ZEROES 0x0020
-#define QEMU_AIO_TYPE_MASK \
- (QEMU_AIO_READ|QEMU_AIO_WRITE|QEMU_AIO_IOCTL|QEMU_AIO_FLUSH| \
- QEMU_AIO_DISCARD|QEMU_AIO_WRITE_ZEROES)
-
-/* AIO flags */
-#define QEMU_AIO_MISALIGNED 0x1000
-#define QEMU_AIO_BLKDEV 0x2000
-
-
-/* linux-aio.c - Linux native implementation */
-#ifdef CONFIG_LINUX_AIO
-void *laio_init(void);
-void laio_cleanup(void *s);
-BlockAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type);
-void laio_detach_aio_context(void *s, AioContext *old_context);
-void laio_attach_aio_context(void *s, AioContext *new_context);
-void laio_io_plug(BlockDriverState *bs, void *aio_ctx);
-int laio_io_unplug(BlockDriverState *bs, void *aio_ctx, bool unplug);
-#endif
-
-#ifdef _WIN32
-typedef struct QEMUWin32AIOState QEMUWin32AIOState;
-QEMUWin32AIOState *win32_aio_init(void);
-void win32_aio_cleanup(QEMUWin32AIOState *aio);
-int win32_aio_attach(QEMUWin32AIOState *aio, HANDLE hfile);
-BlockAIOCB *win32_aio_submit(BlockDriverState *bs,
- QEMUWin32AIOState *aio, HANDLE hfile,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type);
-void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
- AioContext *old_context);
-void win32_aio_attach_aio_context(QEMUWin32AIOState *aio,
- AioContext *new_context);
-#endif
-
-#endif /* QEMU_RAW_AIO_H */
diff --git a/block/raw-posix.c b/block/raw-posix.c
index b1af77e..cabc4f3 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -21,7 +21,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+#include "qemu/error-report.h"
#include "qemu/timer.h"
#include "qemu/log.h"
#include "block/block_int.h"
@@ -29,8 +32,9 @@
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "qapi/util.h"
+#include "qapi/qmp/qstring.h"
#if defined(__APPLE__) && (__MACH__)
#include <paths.h>
@@ -41,6 +45,7 @@
#include <IOKit/storage/IOMedia.h>
#include <IOKit/storage/IOCDMedia.h>
//#include <IOKit/storage/IOCDTypes.h>
+#include <IOKit/storage/IODVDMedia.h>
#include <CoreFoundation/CoreFoundation.h>
#endif
@@ -49,18 +54,21 @@
#include <sys/dkio.h>
#endif
#ifdef __linux__
-#include <sys/types.h>
-#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/param.h>
#include <linux/cdrom.h>
#include <linux/fd.h>
#include <linux/fs.h>
+#include <linux/hdreg.h>
+#include <scsi/sg.h>
+#ifdef __s390__
+#include <asm/dasd.h>
+#endif
#ifndef FS_NOCOW_FL
#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
#endif
#endif
-#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+#if defined(CONFIG_FALLOCATE_PUNCH_HOLE) || defined(CONFIG_FALLOCATE_ZERO_RANGE)
#include <linux/falloc.h>
#endif
#if defined (__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -90,15 +98,19 @@
#include <xfs/xfs.h>
#endif
-//#define DEBUG_FLOPPY
-
//#define DEBUG_BLOCK
-#if defined(DEBUG_BLOCK)
-#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \
- { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0)
+
+#ifdef DEBUG_BLOCK
+# define DEBUG_BLOCK_PRINT 1
#else
-#define DEBUG_BLOCK_PRINT(formatCstr, ...)
+# define DEBUG_BLOCK_PRINT 0
#endif
+#define DPRINTF(fmt, ...) \
+do { \
+ if (DEBUG_BLOCK_PRINT) { \
+ printf(fmt, ## __VA_ARGS__); \
+ } \
+} while (0)
/* OS X does not have O_DSYNC */
#ifndef O_DSYNC
@@ -116,11 +128,6 @@
#define FTYPE_FILE 0
#define FTYPE_CD 1
-#define FTYPE_FD 2
-
-/* if the FD is not accessed during that time (in ns), we try to
- reopen it to see if the disk has been changed */
-#define FD_OPEN_TIMEOUT (1000000000)
#define MAX_BLOCKSIZE 4096
@@ -130,32 +137,19 @@
int open_flags;
size_t buf_align;
-#if defined(__linux__)
- /* linux floppy specific */
- int64_t fd_open_time;
- int64_t fd_error_time;
- int fd_got_error;
- int fd_media_changed;
-#endif
-#ifdef CONFIG_LINUX_AIO
- int use_aio;
- void *aio_ctx;
-#endif
#ifdef CONFIG_XFS
bool is_xfs:1;
#endif
bool has_discard:1;
bool has_write_zeroes:1;
bool discard_zeroes:1;
+ bool has_fallocate;
bool needs_alignment;
} BDRVRawState;
typedef struct BDRVRawReopenState {
int fd;
int open_flags;
-#ifdef CONFIG_LINUX_AIO
- int use_aio;
-#endif
} BDRVRawReopenState;
static int fd_open(BlockDriverState *bs);
@@ -188,7 +182,7 @@
fname = *filename;
dp = strrchr(fname, '/');
- if (lstat(fname, &sb) < 0) {
+ if (qemu_lstat(fname, &sb) < 0) {
fprintf(stderr, "%s: stat failed: %s\n",
fname, strerror(errno));
return -errno;
@@ -217,44 +211,106 @@
}
#endif
-static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
+/*
+ * Get logical block size via ioctl. On success store it in @sector_size_p.
+ */
+static int probe_logical_blocksize(int fd, unsigned int *sector_size_p)
{
- BDRVRawState *s = bs->opaque;
- char *buf;
unsigned int sector_size;
+ bool success = false;
- /* For /dev/sg devices the alignment is not really used.
- With buffered I/O, we don't have any restrictions. */
- if (bs->sg || !s->needs_alignment) {
- bs->request_alignment = 1;
- s->buf_align = 1;
- return;
- }
+ errno = ENOTSUP;
/* Try a few ioctls to get the right size */
- bs->request_alignment = 0;
- s->buf_align = 0;
-
#ifdef BLKSSZGET
if (ioctl(fd, BLKSSZGET, §or_size) >= 0) {
- bs->request_alignment = sector_size;
+ *sector_size_p = sector_size;
+ success = true;
}
#endif
#ifdef DKIOCGETBLOCKSIZE
if (ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) {
- bs->request_alignment = sector_size;
+ *sector_size_p = sector_size;
+ success = true;
}
#endif
#ifdef DIOCGSECTORSIZE
if (ioctl(fd, DIOCGSECTORSIZE, §or_size) >= 0) {
- bs->request_alignment = sector_size;
+ *sector_size_p = sector_size;
+ success = true;
}
#endif
+
+ return success ? 0 : -errno;
+}
+
+/**
+ * Get physical block size of @fd.
+ * On success, store it in @blk_size and return 0.
+ * On failure, return -errno.
+ */
+static int probe_physical_blocksize(int fd, unsigned int *blk_size)
+{
+#ifdef BLKPBSZGET
+ if (ioctl(fd, BLKPBSZGET, blk_size) < 0) {
+ return -errno;
+ }
+ return 0;
+#else
+ return -ENOTSUP;
+#endif
+}
+
+/* Check if read is allowed with given memory buffer and length.
+ *
+ * This function is used to check O_DIRECT memory buffer and request alignment.
+ */
+static bool raw_is_io_aligned(int fd, void *buf, size_t len)
+{
+ ssize_t ret = pread(fd, buf, len, 0);
+
+ if (ret >= 0) {
+ return true;
+ }
+
+#ifdef __linux__
+ /* The Linux kernel returns EINVAL for misaligned O_DIRECT reads. Ignore
+ * other errors (e.g. real I/O error), which could happen on a failed
+ * drive, since we only care about probing alignment.
+ */
+ if (errno != EINVAL) {
+ return true;
+ }
+#endif
+
+ return false;
+}
+
+static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp)
+{
+ BDRVRawState *s = bs->opaque;
+ char *buf;
+ size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize());
+
+ /* For SCSI generic devices the alignment is not really used.
+ With buffered I/O, we don't have any restrictions. */
+ if (bdrv_is_sg(bs) || !s->needs_alignment) {
+ bs->bl.request_alignment = 1;
+ s->buf_align = 1;
+ return;
+ }
+
+ bs->bl.request_alignment = 0;
+ s->buf_align = 0;
+ /* Let's try to use the logical blocksize for the alignment. */
+ if (probe_logical_blocksize(fd, &bs->bl.request_alignment) < 0) {
+ bs->bl.request_alignment = 0;
+ }
#ifdef CONFIG_XFS
if (s->is_xfs) {
struct dioattr da;
if (xfsctl(NULL, fd, XFS_IOC_DIOINFO, &da) >= 0) {
- bs->request_alignment = da.d_miniosz;
+ bs->bl.request_alignment = da.d_miniosz;
/* The kernel returns wrong information for d_mem */
/* s->buf_align = da.d_mem; */
}
@@ -264,9 +320,9 @@
/* If we could not get the sizes so far, we can only guess them */
if (!s->buf_align) {
size_t align;
- buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
- for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
- if (pread(fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+ buf = qemu_memalign(max_align, 2 * max_align);
+ for (align = 512; align <= max_align; align <<= 1) {
+ if (raw_is_io_aligned(fd, buf + align, max_align)) {
s->buf_align = align;
break;
}
@@ -274,21 +330,21 @@
qemu_vfree(buf);
}
- if (!bs->request_alignment) {
+ if (!bs->bl.request_alignment) {
size_t align;
- buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
- for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
- if (pread(fd, buf, align, 0) >= 0) {
- bs->request_alignment = align;
+ buf = qemu_memalign(s->buf_align, max_align);
+ for (align = 512; align <= max_align; align <<= 1) {
+ if (raw_is_io_aligned(fd, buf, align)) {
+ bs->bl.request_alignment = align;
break;
}
}
qemu_vfree(buf);
}
- if (!s->buf_align || !bs->request_alignment) {
- error_setg(errp, "Could not find working O_DIRECT alignment. "
- "Try cache.direct=off.");
+ if (!s->buf_align || !bs->bl.request_alignment) {
+ error_setg(errp, "Could not find working O_DIRECT alignment");
+ error_append_hint(errp, "Try cache.direct=off\n");
}
}
@@ -311,58 +367,15 @@
}
}
-static void raw_detach_aio_context(BlockDriverState *bs)
-{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
-
- if (s->use_aio) {
- laio_detach_aio_context(s->aio_ctx, bdrv_get_aio_context(bs));
- }
-#endif
-}
-
-static void raw_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
+static bool raw_use_aio(int bdrv_flags)
{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
-
- if (s->use_aio) {
- laio_attach_aio_context(s->aio_ctx, new_context);
- }
-#endif
-}
-
-#ifdef CONFIG_LINUX_AIO
-static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
-{
- int ret = -1;
- assert(aio_ctx != NULL);
- assert(use_aio != NULL);
/*
* Currently Linux do AIO only for files opened with O_DIRECT
* specified so check NOCACHE flag too
*/
- if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
- (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
- /* if non-NULL, laio_init() has already been run */
- if (*aio_ctx == NULL) {
- *aio_ctx = laio_init();
- if (!*aio_ctx) {
- goto error;
- }
- }
- *use_aio = 1;
- } else {
- *use_aio = 0;
- }
-
- ret = 0;
-
-error:
- return ret;
+ return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+ (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
}
#endif
@@ -431,26 +444,36 @@
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
- if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
- qemu_close(fd);
- ret = -errno;
- error_setg_errno(errp, -ret, "Could not set AIO state");
+ if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
+ error_setg(errp, "aio=native was specified, but it requires "
+ "cache.direct=on, which was not specified.");
+ ret = -EINVAL;
goto fail;
}
-#endif
+#else
+ if (bdrv_flags & BDRV_O_NATIVE_AIO) {
+ error_setg(errp, "aio=native was specified, but is not supported "
+ "in this build.");
+ ret = -EINVAL;
+ goto fail;
+ }
+#endif /* !defined(CONFIG_LINUX_AIO) */
s->has_discard = true;
s->has_write_zeroes = true;
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
s->needs_alignment = true;
}
if (fstat(s->fd, &st) < 0) {
+ ret = -errno;
error_setg_errno(errp, errno, "Could not stat file");
goto fail;
}
if (S_ISREG(st.st_mode)) {
s->discard_zeroes = true;
+ s->has_fallocate = true;
}
if (S_ISBLK(st.st_mode)) {
#ifdef BLKDISCARDZEROES
@@ -488,8 +511,6 @@
}
#endif
- raw_attach_aio_context(bs, bdrv_get_aio_context(bs));
-
ret = 0;
fail:
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
@@ -503,15 +524,9 @@
Error **errp)
{
BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
s->type = FTYPE_FILE;
- ret = raw_open_common(bs, options, flags, 0, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ return raw_open_common(bs, options, flags, 0, errp);
}
static int raw_reopen_prepare(BDRVReopenState *state,
@@ -530,19 +545,7 @@
state->opaque = g_new0(BDRVRawReopenState, 1);
raw_s = state->opaque;
-#ifdef CONFIG_LINUX_AIO
- raw_s->use_aio = s->use_aio;
-
- /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
- * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
- * won't override aio_ctx if aio_ctx is non-NULL */
- if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
- error_setg(errp, "Could not set AIO state");
- return -1;
- }
-#endif
-
- if (s->type == FTYPE_FD || s->type == FTYPE_CD) {
+ if (s->type == FTYPE_CD) {
raw_s->open_flags |= O_NONBLOCK;
}
@@ -566,15 +569,7 @@
if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
/* dup the original fd */
- /* TODO: use qemu fcntl wrapper */
-#ifdef F_DUPFD_CLOEXEC
- raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
-#else
- raw_s->fd = dup(s->fd);
- if (raw_s->fd != -1) {
- qemu_set_cloexec(raw_s->fd);
- }
-#endif
+ raw_s->fd = qemu_dup(s->fd);
if (raw_s->fd >= 0) {
ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
if (ret) {
@@ -586,11 +581,17 @@
/* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
if (raw_s->fd == -1) {
- assert(!(raw_s->open_flags & O_CREAT));
- raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
- if (raw_s->fd == -1) {
- error_setg_errno(errp, errno, "Could not reopen file");
- ret = -1;
+ const char *normalized_filename = state->bs->filename;
+ ret = raw_normalize_devicepath(&normalized_filename);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not normalize device path");
+ } else {
+ assert(!(raw_s->open_flags & O_CREAT));
+ raw_s->fd = qemu_open(normalized_filename, raw_s->open_flags);
+ if (raw_s->fd == -1) {
+ error_setg_errno(errp, errno, "Could not reopen file");
+ ret = -1;
+ }
}
}
@@ -618,9 +619,6 @@
qemu_close(s->fd);
s->fd = raw_s->fd;
-#ifdef CONFIG_LINUX_AIO
- s->use_aio = raw_s->use_aio;
-#endif
g_free(state->opaque);
state->opaque = NULL;
@@ -644,14 +642,112 @@
state->opaque = NULL;
}
+static int hdev_get_max_transfer_length(int fd)
+{
+#ifdef BLKSECTGET
+ int max_sectors = 0;
+ if (ioctl(fd, BLKSECTGET, &max_sectors) == 0) {
+ return max_sectors;
+ } else {
+ return -errno;
+ }
+#else
+ return -ENOSYS;
+#endif
+}
+
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
+ struct stat st;
+
+ if (!fstat(s->fd, &st)) {
+ if (S_ISBLK(st.st_mode)) {
+ int ret = hdev_get_max_transfer_length(s->fd);
+ if (ret > 0 && ret <= BDRV_REQUEST_MAX_SECTORS) {
+ bs->bl.max_transfer = pow2floor(ret << BDRV_SECTOR_BITS);
+ }
+ }
+ }
raw_probe_alignment(bs, s->fd, errp);
- bs->bl.opt_mem_alignment = s->buf_align;
+ bs->bl.min_mem_alignment = s->buf_align;
+ bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize());
}
+static int check_for_dasd(int fd)
+{
+#ifdef BIODASDINFO2
+ struct dasd_information2_t info = {0};
+
+ return ioctl(fd, BIODASDINFO2, &info);
+#else
+ return -1;
+#endif
+}
+
+/**
+ * Try to get @bs's logical and physical block size.
+ * On success, store them in @bsz and return zero.
+ * On failure, return negative errno.
+ */
+static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+ BDRVRawState *s = bs->opaque;
+ int ret;
+
+ /* If DASD, get blocksizes */
+ if (check_for_dasd(s->fd) < 0) {
+ return -ENOTSUP;
+ }
+ ret = probe_logical_blocksize(s->fd, &bsz->log);
+ if (ret < 0) {
+ return ret;
+ }
+ return probe_physical_blocksize(s->fd, &bsz->phys);
+}
+
+/**
+ * Try to get @bs's geometry: cyls, heads, sectors.
+ * On success, store them in @geo and return 0.
+ * On failure return -errno.
+ * (Allows block driver to assign default geometry values that guest sees)
+ */
+#ifdef __linux__
+static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
+{
+ BDRVRawState *s = bs->opaque;
+ struct hd_geometry ioctl_geo = {0};
+
+ /* If DASD, get its geometry */
+ if (check_for_dasd(s->fd) < 0) {
+ return -ENOTSUP;
+ }
+ if (ioctl(s->fd, HDIO_GETGEO, &ioctl_geo) < 0) {
+ return -errno;
+ }
+ /* HDIO_GETGEO may return success even though geo contains zeros
+ (e.g. certain multipath setups) */
+ if (!ioctl_geo.heads || !ioctl_geo.sectors || !ioctl_geo.cylinders) {
+ return -ENOTSUP;
+ }
+ /* Do not return a geometry for partition */
+ if (ioctl_geo.start != 0) {
+ return -ENOTSUP;
+ }
+ geo->heads = ioctl_geo.heads;
+ geo->sectors = ioctl_geo.sectors;
+ geo->cylinders = ioctl_geo.cylinders;
+
+ return 0;
+}
+#else /* __linux__ */
+static int hdev_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
+{
+ return -ENOTSUP;
+}
+#endif
+
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
int ret;
@@ -860,6 +956,7 @@
static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
struct xfs_flock64 fl;
+ int err;
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
@@ -867,8 +964,9 @@
fl.l_len = bytes;
if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) {
- DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno));
- return -errno;
+ err = errno;
+ DPRINTF("cannot write zero range (%s)\n", strerror(errno));
+ return -err;
}
return 0;
@@ -877,6 +975,7 @@
static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes)
{
struct xfs_flock64 fl;
+ int err;
memset(&fl, 0, sizeof(fl));
fl.l_whence = SEEK_SET;
@@ -884,50 +983,121 @@
fl.l_len = bytes;
if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) {
- DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno));
- return -errno;
+ err = errno;
+ DPRINTF("cannot punch hole (%s)\n", strerror(errno));
+ return -err;
}
return 0;
}
#endif
-static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+static int translate_err(int err)
{
- int ret = -EOPNOTSUPP;
+ if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
+ err == -ENOTTY) {
+ err = -ENOTSUP;
+ }
+ return err;
+}
+
+#ifdef CONFIG_FALLOCATE
+static int do_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+ do {
+ if (fallocate(fd, mode, offset, len) == 0) {
+ return 0;
+ }
+ } while (errno == EINTR);
+ return translate_err(-errno);
+}
+#endif
+
+static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
+{
+ int ret = -ENOTSUP;
BDRVRawState *s = aiocb->bs->opaque;
- if (s->has_write_zeroes == 0) {
+ if (!s->has_write_zeroes) {
return -ENOTSUP;
}
- if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
#ifdef BLKZEROOUT
- do {
- uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
- if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
- return 0;
- }
- } while (errno == EINTR);
-
- ret = -errno;
-#endif
- } else {
-#ifdef CONFIG_XFS
- if (s->is_xfs) {
- return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
+ do {
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
+ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
+ return 0;
}
-#endif
- }
+ } while (errno == EINTR);
- if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
- ret == -ENOTTY) {
+ ret = translate_err(-errno);
+#endif
+
+ if (ret == -ENOTSUP) {
s->has_write_zeroes = false;
- ret = -ENOTSUP;
}
return ret;
}
+static ssize_t handle_aiocb_write_zeroes(RawPosixAIOData *aiocb)
+{
+#if defined(CONFIG_FALLOCATE) || defined(CONFIG_XFS)
+ BDRVRawState *s = aiocb->bs->opaque;
+#endif
+
+ if (aiocb->aio_type & QEMU_AIO_BLKDEV) {
+ return handle_aiocb_write_zeroes_block(aiocb);
+ }
+
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ return xfs_write_zeroes(s, aiocb->aio_offset, aiocb->aio_nbytes);
+ }
+#endif
+
+#ifdef CONFIG_FALLOCATE_ZERO_RANGE
+ if (s->has_write_zeroes) {
+ int ret = do_fallocate(s->fd, FALLOC_FL_ZERO_RANGE,
+ aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0 || ret != -ENOTSUP) {
+ return ret;
+ }
+ s->has_write_zeroes = false;
+ }
+#endif
+
+#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
+ if (s->has_discard && s->has_fallocate) {
+ int ret = do_fallocate(s->fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0) {
+ ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0 || ret != -ENOTSUP) {
+ return ret;
+ }
+ s->has_fallocate = false;
+ } else if (ret != -ENOTSUP) {
+ return ret;
+ } else {
+ s->has_discard = false;
+ }
+ }
+#endif
+
+#ifdef CONFIG_FALLOCATE
+ if (s->has_fallocate && aiocb->aio_offset >= bdrv_getlength(aiocb->bs)) {
+ int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
+ if (ret == 0 || ret != -ENOTSUP) {
+ return ret;
+ }
+ s->has_fallocate = false;
+ }
+#endif
+
+ return -ENOTSUP;
+}
+
static ssize_t handle_aiocb_discard(RawPosixAIOData *aiocb)
{
int ret = -EOPNOTSUPP;
@@ -956,21 +1126,14 @@
#endif
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
- do {
- if (fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- aiocb->aio_offset, aiocb->aio_nbytes) == 0) {
- return 0;
- }
- } while (errno == EINTR);
-
- ret = -errno;
+ ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ aiocb->aio_offset, aiocb->aio_nbytes);
#endif
}
- if (ret == -ENODEV || ret == -ENOSYS || ret == -EOPNOTSUPP ||
- ret == -ENOTTY) {
+ ret = translate_err(ret);
+ if (ret == -ENOTSUP) {
s->has_discard = false;
- ret = -ENOTSUP;
}
return ret;
}
@@ -983,7 +1146,7 @@
switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
case QEMU_AIO_READ:
ret = handle_aiocb_rw(aiocb);
- if (ret >= 0 && ret < aiocb->aio_nbytes && aiocb->bs->growable) {
+ if (ret >= 0 && ret < aiocb->aio_nbytes) {
iov_memset(aiocb->aio_iov, aiocb->aio_niov, ret,
0, aiocb->aio_nbytes - ret);
@@ -1021,48 +1184,48 @@
break;
}
- g_slice_free(RawPosixAIOData, aiocb);
+ g_free(aiocb);
return ret;
}
static int paio_submit_co(BlockDriverState *bs, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- int type)
+ int64_t offset, QEMUIOVector *qiov,
+ int count, int type)
{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
acb->bs = bs;
acb->aio_type = type;
acb->aio_fildes = fd;
- acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
- acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+ acb->aio_nbytes = count;
+ acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
- assert(qiov->size == acb->aio_nbytes);
+ assert(qiov->size == count);
}
- trace_paio_submit_co(sector_num, nb_sectors, type);
+ trace_paio_submit_co(offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_co(pool, aio_worker, acb);
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, int fd,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
- RawPosixAIOData *acb = g_slice_new(RawPosixAIOData);
+ RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
ThreadPool *pool;
acb->bs = bs;
acb->aio_type = type;
acb->aio_fildes = fd;
- acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
- acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+ acb->aio_nbytes = count;
+ acb->aio_offset = offset;
if (qiov) {
acb->aio_iov = qiov->iov;
@@ -1070,19 +1233,18 @@
assert(qiov->size == acb->aio_nbytes);
}
- trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+ trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
-static BlockAIOCB *raw_aio_submit(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque, int type)
+static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov, int type)
{
BDRVRawState *s = bs->opaque;
if (fd_open(bs) < 0)
- return NULL;
+ return -EIO;
/*
* Check if the underlying device requires requests to be aligned,
@@ -1094,23 +1256,38 @@
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
- } else if (s->use_aio) {
- return laio_submit(bs, s->aio_ctx, s->fd, sector_num, qiov,
- nb_sectors, cb, opaque, type);
+ } else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ assert(qiov->size == bytes);
+ return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
#endif
}
}
- return paio_submit(bs, s->fd, sector_num, qiov, nb_sectors,
- cb, opaque, type);
+ return paio_submit_co(bs, s->fd, offset, qiov, bytes, type);
+}
+
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
+{
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_READ);
+}
+
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
+{
+ assert(flags == 0);
+ return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
}
static void raw_aio_plug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_plug(bs, s->aio_ctx);
+ if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ laio_io_plug(bs, aio);
}
#endif
}
@@ -1118,39 +1295,13 @@
static void raw_aio_unplug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx, true);
+ if (bs->open_flags & BDRV_O_NATIVE_AIO) {
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
+ laio_io_unplug(bs, aio);
}
#endif
}
-static void raw_aio_flush_io_queue(BlockDriverState *bs)
-{
-#ifdef CONFIG_LINUX_AIO
- BDRVRawState *s = bs->opaque;
- if (s->use_aio) {
- laio_io_unplug(bs, s->aio_ctx, false);
- }
-#endif
-}
-
-static BlockAIOCB *raw_aio_readv(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_READ);
-}
-
-static BlockAIOCB *raw_aio_writev(BlockDriverState *bs,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
- BlockCompletionFunc *cb, void *opaque)
-{
- return raw_aio_submit(bs, sector_num, qiov, nb_sectors,
- cb, opaque, QEMU_AIO_WRITE);
-}
-
static BlockAIOCB *raw_aio_flush(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque)
{
@@ -1166,13 +1317,6 @@
{
BDRVRawState *s = bs->opaque;
- raw_detach_aio_context(bs);
-
-#ifdef CONFIG_LINUX_AIO
- if (s->use_aio) {
- laio_cleanup(s->aio_ctx);
- }
-#endif
if (s->fd >= 0) {
qemu_close(s->fd);
s->fd = -1;
@@ -1311,7 +1455,20 @@
if (size == 0)
#endif
#if defined(__APPLE__) && defined(__MACH__)
- size = LLONG_MAX;
+ {
+ uint64_t sectors = 0;
+ uint32_t sector_size = 0;
+
+ if (ioctl(fd, DKIOCGETBLOCKCOUNT, §ors) == 0
+ && ioctl(fd, DKIOCGETBLOCKSIZE, §or_size) == 0) {
+ size = sectors * sector_size;
+ } else {
+ size = lseek(fd, 0LL, SEEK_END);
+ if (size < 0) {
+ return -errno;
+ }
+ }
+ }
#else
size = lseek(fd, 0LL, SEEK_END);
if (size < 0) {
@@ -1388,7 +1545,7 @@
nocow = qemu_opt_get_bool(opts, BLOCK_OPT_NOCOW, false);
buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
prealloc = qapi_enum_parse(PreallocMode_lookup, buf,
- PREALLOC_MODE_MAX, PREALLOC_MODE_OFF,
+ PREALLOC_MODE__MAX, PREALLOC_MODE_OFF,
&local_err);
g_free(buf);
if (local_err) {
@@ -1397,7 +1554,7 @@
goto out;
}
- fd = qemu_open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY,
+ fd = qemu_open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY,
0644);
if (fd < 0) {
result = -errno;
@@ -1582,7 +1739,8 @@
*/
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
off_t start, data = 0, hole = 0;
int64_t total_size;
@@ -1614,8 +1772,9 @@
*pnum = nb_sectors;
ret = BDRV_BLOCK_DATA;
} else if (data == start) {
- /* On a data extent, compute sectors to the end of the extent. */
- *pnum = MIN(nb_sectors, (hole - start) / BDRV_SECTOR_SIZE);
+ /* On a data extent, compute sectors to the end of the extent,
+ * possibly including a partial sector at EOF. */
+ *pnum = MIN(nb_sectors, DIV_ROUND_UP(hole - start, BDRV_SECTOR_SIZE));
ret = BDRV_BLOCK_DATA;
} else {
/* On a hole, compute sectors to the beginning of the next extent. */
@@ -1623,30 +1782,31 @@
*pnum = MIN(nb_sectors, (data - start) / BDRV_SECTOR_SIZE);
ret = BDRV_BLOCK_ZERO;
}
+ *file = bs;
return ret | BDRV_BLOCK_OFFSET_VALID | start;
}
-static coroutine_fn BlockAIOCB *raw_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
+static coroutine_fn BlockAIOCB *raw_aio_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
- return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD);
}
-static int coroutine_fn raw_co_write_zeroes(
- BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, BdrvRequestFlags flags)
+static int coroutine_fn raw_co_pwrite_zeroes(
+ BlockDriverState *bs, int64_t offset,
+ int count, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_WRITE_ZEROES);
} else if (s->discard_zeroes) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_DISCARD);
}
return -ENOTSUP;
@@ -1684,7 +1844,7 @@
}
};
-static BlockDriver bdrv_file = {
+BlockDriver bdrv_file = {
.format_name = "file",
.protocol_name = "file",
.instance_size = sizeof(BDRVRawState),
@@ -1699,16 +1859,15 @@
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
.bdrv_co_get_block_status = raw_co_get_block_status,
- .bdrv_co_write_zeroes = raw_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = raw_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
- .bdrv_aio_discard = raw_aio_discard,
+ .bdrv_aio_pdiscard = raw_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1716,9 +1875,6 @@
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
.create_opts = &raw_create_opts,
};
@@ -1726,36 +1882,51 @@
/* host device */
#if defined(__APPLE__) && defined(__MACH__)
-static kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator );
-static kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize );
-
-kern_return_t FindEjectableCDMedia( io_iterator_t *mediaIterator )
+static kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
+ CFIndex maxPathSize, int flags);
+static char *FindEjectableOpticalMedia(io_iterator_t *mediaIterator)
{
- kern_return_t kernResult;
+ kern_return_t kernResult = KERN_FAILURE;
mach_port_t masterPort;
CFMutableDictionaryRef classesToMatch;
+ const char *matching_array[] = {kIODVDMediaClass, kIOCDMediaClass};
+ char *mediaType = NULL;
kernResult = IOMasterPort( MACH_PORT_NULL, &masterPort );
if ( KERN_SUCCESS != kernResult ) {
printf( "IOMasterPort returned %d\n", kernResult );
}
- classesToMatch = IOServiceMatching( kIOCDMediaClass );
- if ( classesToMatch == NULL ) {
- printf( "IOServiceMatching returned a NULL dictionary.\n" );
- } else {
- CFDictionarySetValue( classesToMatch, CFSTR( kIOMediaEjectableKey ), kCFBooleanTrue );
- }
- kernResult = IOServiceGetMatchingServices( masterPort, classesToMatch, mediaIterator );
- if ( KERN_SUCCESS != kernResult )
- {
- printf( "IOServiceGetMatchingServices returned %d\n", kernResult );
- }
+ int index;
+ for (index = 0; index < ARRAY_SIZE(matching_array); index++) {
+ classesToMatch = IOServiceMatching(matching_array[index]);
+ if (classesToMatch == NULL) {
+ error_report("IOServiceMatching returned NULL for %s",
+ matching_array[index]);
+ continue;
+ }
+ CFDictionarySetValue(classesToMatch, CFSTR(kIOMediaEjectableKey),
+ kCFBooleanTrue);
+ kernResult = IOServiceGetMatchingServices(masterPort, classesToMatch,
+ mediaIterator);
+ if (kernResult != KERN_SUCCESS) {
+ error_report("Note: IOServiceGetMatchingServices returned %d",
+ kernResult);
+ continue;
+ }
- return kernResult;
+ /* If a match was found, leave the loop */
+ if (*mediaIterator != 0) {
+ DPRINTF("Matching using %s\n", matching_array[index]);
+ mediaType = g_strdup(matching_array[index]);
+ break;
+ }
+ }
+ return mediaType;
}
-kern_return_t GetBSDPath( io_iterator_t mediaIterator, char *bsdPath, CFIndex maxPathSize )
+kern_return_t GetBSDPath(io_iterator_t mediaIterator, char *bsdPath,
+ CFIndex maxPathSize, int flags)
{
io_object_t nextMedia;
kern_return_t kernResult = KERN_FAILURE;
@@ -1768,7 +1939,9 @@
if ( bsdPathAsCFString ) {
size_t devPathLength;
strcpy( bsdPath, _PATH_DEV );
- strcat( bsdPath, "r" );
+ if (flags & BDRV_O_NOCACHE) {
+ strcat(bsdPath, "r");
+ }
devPathLength = strlen( bsdPath );
if ( CFStringGetCString( bsdPathAsCFString, bsdPath + devPathLength, maxPathSize - devPathLength, kCFStringEncodingASCII ) ) {
kernResult = KERN_SUCCESS;
@@ -1781,7 +1954,46 @@
return kernResult;
}
-#endif
+/* Sets up a real cdrom for use in QEMU */
+static bool setup_cdrom(char *bsd_path, Error **errp)
+{
+ int index, num_of_test_partitions = 2, fd;
+ char test_partition[MAXPATHLEN];
+ bool partition_found = false;
+
+ /* look for a working partition */
+ for (index = 0; index < num_of_test_partitions; index++) {
+ snprintf(test_partition, sizeof(test_partition), "%ss%d", bsd_path,
+ index);
+ fd = qemu_open(test_partition, O_RDONLY | O_BINARY | O_LARGEFILE);
+ if (fd >= 0) {
+ partition_found = true;
+ qemu_close(fd);
+ break;
+ }
+ }
+
+ /* if a working partition on the device was not found */
+ if (partition_found == false) {
+ error_setg(errp, "Failed to find a working partition on disc");
+ } else {
+ DPRINTF("Using %s as optical disc\n", test_partition);
+ pstrcpy(bsd_path, MAXPATHLEN, test_partition);
+ }
+ return partition_found;
+}
+
+/* Prints directions on mounting and unmounting a device */
+static void print_unmounting_directions(const char *file_name)
+{
+ error_report("If device %s is mounted on the desktop, unmount"
+ " it first before using it in QEMU", file_name);
+ error_report("Command to unmount device: diskutil unmountDisk %s",
+ file_name);
+ error_report("Command to mount device: diskutil mountDisk %s", file_name);
+}
+
+#endif /* defined(__APPLE__) && defined(__MACH__) */
static int hdev_probe_device(const char *filename)
{
@@ -1791,7 +2003,7 @@
if (strstart(filename, "/dev/cdrom", NULL))
return 50;
- if (stat(filename, &st) >= 0 &&
+ if (qemu_stat(filename, &st) >= 0 &&
(S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode))) {
return 100;
}
@@ -1841,62 +2053,109 @@
qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
}
+static bool hdev_is_sg(BlockDriverState *bs)
+{
+
+#if defined(__linux__)
+
+ struct stat st;
+ struct sg_scsi_id scsiid;
+ int sg_version;
+
+ if (qemu_stat(bs->filename, &st) >= 0 && S_ISCHR(st.st_mode) &&
+ !bdrv_ioctl(bs, SG_GET_VERSION_NUM, &sg_version) &&
+ !bdrv_ioctl(bs, SG_GET_SCSI_ID, &scsiid)) {
+ DPRINTF("SG device found: type=%d, version=%d\n",
+ scsiid.scsi_type, sg_version);
+ return true;
+ }
+
+#endif
+
+ return false;
+}
+
static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
BDRVRawState *s = bs->opaque;
Error *local_err = NULL;
int ret;
- const char *filename = qdict_get_str(options, "filename");
#if defined(__APPLE__) && defined(__MACH__)
- if (strstart(filename, "/dev/cdrom", NULL)) {
- kern_return_t kernResult;
- io_iterator_t mediaIterator;
- char bsdPath[ MAXPATHLEN ];
- int fd;
+ const char *filename = qdict_get_str(options, "filename");
+ char bsd_path[MAXPATHLEN] = "";
+ bool error_occurred = false;
- kernResult = FindEjectableCDMedia( &mediaIterator );
- kernResult = GetBSDPath( mediaIterator, bsdPath, sizeof( bsdPath ) );
+ /* If using a real cdrom */
+ if (strcmp(filename, "/dev/cdrom") == 0) {
+ char *mediaType = NULL;
+ kern_return_t ret_val;
+ io_iterator_t mediaIterator = 0;
- if ( bsdPath[ 0 ] != '\0' ) {
- strcat(bsdPath,"s0");
- /* some CDs don't have a partition 0 */
- fd = qemu_open(bsdPath, O_RDONLY | O_BINARY | O_LARGEFILE);
- if (fd < 0) {
- bsdPath[strlen(bsdPath)-1] = '1';
- } else {
- qemu_close(fd);
- }
- filename = bsdPath;
- qdict_put(options, "filename", qstring_from_str(filename));
+ mediaType = FindEjectableOpticalMedia(&mediaIterator);
+ if (mediaType == NULL) {
+ error_setg(errp, "Please make sure your CD/DVD is in the optical"
+ " drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
}
- if ( mediaIterator )
- IOObjectRelease( mediaIterator );
+ ret_val = GetBSDPath(mediaIterator, bsd_path, sizeof(bsd_path), flags);
+ if (ret_val != KERN_SUCCESS) {
+ error_setg(errp, "Could not get BSD path for optical drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ /* If a real optical drive was not found */
+ if (bsd_path[0] == '\0') {
+ error_setg(errp, "Failed to obtain bsd path for optical drive");
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ /* If using a cdrom disc and finding a partition on the disc failed */
+ if (strncmp(mediaType, kIOCDMediaClass, 9) == 0 &&
+ setup_cdrom(bsd_path, errp) == false) {
+ print_unmounting_directions(bsd_path);
+ error_occurred = true;
+ goto hdev_open_Mac_error;
+ }
+
+ qdict_put(options, "filename", qstring_from_str(bsd_path));
+
+hdev_open_Mac_error:
+ g_free(mediaType);
+ if (mediaIterator) {
+ IOObjectRelease(mediaIterator);
+ }
+ if (error_occurred) {
+ return -ENOENT;
+ }
}
-#endif
+#endif /* defined(__APPLE__) && defined(__MACH__) */
s->type = FTYPE_FILE;
-#if defined(__linux__)
- {
- char resolved_path[ MAXPATHLEN ], *temp;
-
- temp = realpath(filename, resolved_path);
- if (temp && strstart(temp, "/dev/sg", NULL)) {
- bs->sg = 1;
- }
- }
-#endif
ret = raw_open_common(bs, options, flags, 0, &local_err);
if (ret < 0) {
- if (local_err) {
- error_propagate(errp, local_err);
+ error_propagate(errp, local_err);
+#if defined(__APPLE__) && defined(__MACH__)
+ if (*bsd_path) {
+ filename = bsd_path;
}
+ /* if a physical device experienced an error while being opened */
+ if (strncmp(filename, "/dev/", 5) == 0) {
+ print_unmounting_directions(filename);
+ }
+#endif /* defined(__APPLE__) && defined(__MACH__) */
return ret;
}
+ /* Since this does ioctl the device must be already opened */
+ bs->sg = hdev_is_sg(bs);
+
if (flags & BDRV_O_RDWR) {
ret = check_hdev_writable(s);
if (ret < 0) {
@@ -1910,61 +2169,6 @@
}
#if defined(__linux__)
-/* Note: we do not have a reliable method to detect if the floppy is
- present. The current method is to try to open the floppy at every
- I/O and to keep it opened during a few hundreds of ms. */
-static int fd_open(BlockDriverState *bs)
-{
- BDRVRawState *s = bs->opaque;
- int last_media_present;
-
- if (s->type != FTYPE_FD)
- return 0;
- last_media_present = (s->fd >= 0);
- if (s->fd >= 0 &&
- (get_clock() - s->fd_open_time) >= FD_OPEN_TIMEOUT) {
- qemu_close(s->fd);
- s->fd = -1;
-#ifdef DEBUG_FLOPPY
- printf("Floppy closed\n");
-#endif
- }
- if (s->fd < 0) {
- if (s->fd_got_error &&
- (get_clock() - s->fd_error_time) < FD_OPEN_TIMEOUT) {
-#ifdef DEBUG_FLOPPY
- printf("No floppy (open delayed)\n");
-#endif
- return -EIO;
- }
- s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK);
- if (s->fd < 0) {
- s->fd_error_time = get_clock();
- s->fd_got_error = 1;
- if (last_media_present)
- s->fd_media_changed = 1;
-#ifdef DEBUG_FLOPPY
- printf("No floppy\n");
-#endif
- return -EIO;
- }
-#ifdef DEBUG_FLOPPY
- printf("Floppy opened\n");
-#endif
- }
- if (!last_media_present)
- s->fd_media_changed = 1;
- s->fd_open_time = get_clock();
- s->fd_got_error = 0;
- return 0;
-}
-
-static int hdev_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- BDRVRawState *s = bs->opaque;
-
- return ioctl(s->fd, req, buf);
-}
static BlockAIOCB *hdev_aio_ioctl(BlockDriverState *bs,
unsigned long int req, void *buf,
@@ -1977,7 +2181,7 @@
if (fd_open(bs) < 0)
return NULL;
- acb = g_slice_new(RawPosixAIOData);
+ acb = g_new(RawPosixAIOData, 1);
acb->bs = bs;
acb->aio_type = QEMU_AIO_IOCTL;
acb->aio_fildes = s->fd;
@@ -1987,8 +2191,8 @@
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
+#endif /* linux */
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static int fd_open(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
@@ -1998,17 +2202,9 @@
return 0;
return -EIO;
}
-#else /* !linux && !FreeBSD */
-static int fd_open(BlockDriverState *bs)
-{
- return 0;
-}
-
-#endif /* !linux && !FreeBSD */
-
-static coroutine_fn BlockAIOCB *hdev_aio_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
+static coroutine_fn BlockAIOCB *hdev_aio_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque)
{
BDRVRawState *s = bs->opaque;
@@ -2016,12 +2212,12 @@
if (fd_open(bs) < 0) {
return NULL;
}
- return paio_submit(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit(bs, s->fd, offset, NULL, count,
cb, opaque, QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
-static coroutine_fn int hdev_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+static coroutine_fn int hdev_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags)
{
BDRVRawState *s = bs->opaque;
int rc;
@@ -2031,10 +2227,10 @@
return rc;
}
if (!(flags & BDRV_REQ_MAY_UNMAP)) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_WRITE_ZEROES|QEMU_AIO_BLKDEV);
} else if (s->discard_zeroes) {
- return paio_submit_co(bs, s->fd, sector_num, NULL, nb_sectors,
+ return paio_submit_co(bs, s->fd, offset, NULL, count,
QEMU_AIO_DISCARD|QEMU_AIO_BLKDEV);
}
return -ENOTSUP;
@@ -2049,17 +2245,22 @@
int64_t total_size = 0;
bool has_prefix;
- /* This function is used by all three protocol block drivers and therefore
- * any of these three prefixes may be given.
+ /* This function is used by both protocol block drivers and therefore either
+ * of these prefixes may be given.
* The return value has to be stored somewhere, otherwise this is an error
* due to -Werror=unused-value. */
has_prefix =
strstart(filename, "host_device:", &filename) ||
- strstart(filename, "host_cdrom:" , &filename) ||
- strstart(filename, "host_floppy:", &filename);
+ strstart(filename, "host_cdrom:" , &filename);
(void)has_prefix;
+ ret = raw_normalize_devicepath(&filename);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Could not normalize device path");
+ return ret;
+ }
+
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
BDRV_SECTOR_SIZE);
@@ -2101,181 +2302,30 @@
.bdrv_reopen_abort = raw_reopen_abort,
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_co_write_zeroes = hdev_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = hdev_co_pwrite_zeroes,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
- .bdrv_aio_discard = hdev_aio_discard,
+ .bdrv_aio_pdiscard = hdev_aio_pdiscard,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
.bdrv_get_info = raw_get_info,
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
-
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
+ .bdrv_probe_blocksizes = hdev_probe_blocksizes,
+ .bdrv_probe_geometry = hdev_probe_geometry,
/* generic scsi device */
#ifdef __linux__
- .bdrv_ioctl = hdev_ioctl,
.bdrv_aio_ioctl = hdev_aio_ioctl,
#endif
};
-#ifdef __linux__
-static void floppy_parse_filename(const char *filename, QDict *options,
- Error **errp)
-{
- /* The prefix is optional, just as for "file". */
- strstart(filename, "host_floppy:", &filename);
-
- qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename)));
-}
-
-static int floppy_open(BlockDriverState *bs, QDict *options, int flags,
- Error **errp)
-{
- BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
-
- s->type = FTYPE_FD;
-
- /* open will not fail even if no floppy is inserted, so add O_NONBLOCK */
- ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
- if (ret) {
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
- }
-
- /* close fd so that we can reopen it as needed */
- qemu_close(s->fd);
- s->fd = -1;
- s->fd_media_changed = 1;
-
- return 0;
-}
-
-static int floppy_probe_device(const char *filename)
-{
- int fd, ret;
- int prio = 0;
- struct floppy_struct fdparam;
- struct stat st;
-
- if (strstart(filename, "/dev/fd", NULL) &&
- !strstart(filename, "/dev/fdset/", NULL)) {
- prio = 50;
- }
-
- fd = qemu_open(filename, O_RDONLY | O_NONBLOCK);
- if (fd < 0) {
- goto out;
- }
- ret = fstat(fd, &st);
- if (ret == -1 || !S_ISBLK(st.st_mode)) {
- goto outc;
- }
-
- /* Attempt to detect via a floppy specific ioctl */
- ret = ioctl(fd, FDGETPRM, &fdparam);
- if (ret >= 0)
- prio = 100;
-
-outc:
- qemu_close(fd);
-out:
- return prio;
-}
-
-
-static int floppy_is_inserted(BlockDriverState *bs)
-{
- return fd_open(bs) >= 0;
-}
-
-static int floppy_media_changed(BlockDriverState *bs)
-{
- BDRVRawState *s = bs->opaque;
- int ret;
-
- /*
- * XXX: we do not have a true media changed indication.
- * It does not work if the floppy is changed without trying to read it.
- */
- fd_open(bs);
- ret = s->fd_media_changed;
- s->fd_media_changed = 0;
-#ifdef DEBUG_FLOPPY
- printf("Floppy changed=%d\n", ret);
-#endif
- return ret;
-}
-
-static void floppy_eject(BlockDriverState *bs, bool eject_flag)
-{
- BDRVRawState *s = bs->opaque;
- int fd;
-
- if (s->fd >= 0) {
- qemu_close(s->fd);
- s->fd = -1;
- }
- fd = qemu_open(bs->filename, s->open_flags | O_NONBLOCK);
- if (fd >= 0) {
- if (ioctl(fd, FDEJECT, 0) < 0)
- perror("FDEJECT");
- qemu_close(fd);
- }
-}
-
-static BlockDriver bdrv_host_floppy = {
- .format_name = "host_floppy",
- .protocol_name = "host_floppy",
- .instance_size = sizeof(BDRVRawState),
- .bdrv_needs_filename = true,
- .bdrv_probe_device = floppy_probe_device,
- .bdrv_parse_filename = floppy_parse_filename,
- .bdrv_file_open = floppy_open,
- .bdrv_close = raw_close,
- .bdrv_reopen_prepare = raw_reopen_prepare,
- .bdrv_reopen_commit = raw_reopen_commit,
- .bdrv_reopen_abort = raw_reopen_abort,
- .bdrv_create = hdev_create,
- .create_opts = &raw_create_opts,
-
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
- .bdrv_aio_flush = raw_aio_flush,
- .bdrv_refresh_limits = raw_refresh_limits,
- .bdrv_io_plug = raw_aio_plug,
- .bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
-
- .bdrv_truncate = raw_truncate,
- .bdrv_getlength = raw_getlength,
- .has_variable_length = true,
- .bdrv_get_allocated_file_size
- = raw_get_allocated_file_size,
-
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
- /* removable device support */
- .bdrv_is_inserted = floppy_is_inserted,
- .bdrv_media_changed = floppy_media_changed,
- .bdrv_eject = floppy_eject,
-};
-#endif
-
#if defined(__linux__) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
static void cdrom_parse_filename(const char *filename, QDict *options,
Error **errp)
@@ -2292,17 +2342,11 @@
Error **errp)
{
BDRVRawState *s = bs->opaque;
- Error *local_err = NULL;
- int ret;
s->type = FTYPE_CD;
/* open will not fail even if no CD is inserted, so add O_NONBLOCK */
- ret = raw_open_common(bs, options, flags, O_NONBLOCK, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ return raw_open_common(bs, options, flags, O_NONBLOCK, errp);
}
static int cdrom_probe_device(const char *filename)
@@ -2331,15 +2375,13 @@
return prio;
}
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
{
BDRVRawState *s = bs->opaque;
int ret;
ret = ioctl(s->fd, CDROM_DRIVE_STATUS, CDSL_CURRENT);
- if (ret == CDS_DISC_OK)
- return 1;
- return 0;
+ return ret == CDS_DISC_OK;
}
static void cdrom_eject(BlockDriverState *bs, bool eject_flag)
@@ -2383,13 +2425,13 @@
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2397,16 +2439,12 @@
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
.bdrv_lock_medium = cdrom_lock_medium,
/* generic scsi device */
- .bdrv_ioctl = hdev_ioctl,
.bdrv_aio_ioctl = hdev_aio_ioctl,
};
#endif /* __linux__ */
@@ -2423,9 +2461,7 @@
ret = raw_open_common(bs, options, flags, 0, &local_err);
if (ret) {
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
@@ -2465,7 +2501,7 @@
return 0;
}
-static int cdrom_is_inserted(BlockDriverState *bs)
+static bool cdrom_is_inserted(BlockDriverState *bs)
{
return raw_getlength(bs) > 0;
}
@@ -2520,13 +2556,12 @@
.bdrv_create = hdev_create,
.create_opts = &raw_create_opts,
- .bdrv_aio_readv = raw_aio_readv,
- .bdrv_aio_writev = raw_aio_writev,
+ .bdrv_co_preadv = raw_co_preadv,
+ .bdrv_co_pwritev = raw_co_pwritev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
- .bdrv_flush_io_queue = raw_aio_flush_io_queue,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2534,9 +2569,6 @@
.bdrv_get_allocated_file_size
= raw_get_allocated_file_size,
- .bdrv_detach_aio_context = raw_detach_aio_context,
- .bdrv_attach_aio_context = raw_attach_aio_context,
-
/* removable device support */
.bdrv_is_inserted = cdrom_is_inserted,
.bdrv_eject = cdrom_eject,
@@ -2553,7 +2585,6 @@
bdrv_register(&bdrv_file);
bdrv_register(&bdrv_host_device);
#ifdef __linux__
- bdrv_register(&bdrv_host_floppy);
bdrv_register(&bdrv_host_cdrom);
#endif
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
diff --git a/block/raw-win32.c b/block/raw-win32.c
index bf264d4..b445a2c 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -21,19 +21,19 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "trace.h"
#include "block/thread-pool.h"
#include "qemu/iov.h"
+#include "qapi/qmp/qstring.h"
#include <windows.h>
#include <winioctl.h>
-#ifdef CONFIG_ANDROID
-#include "android/utils/win32_unicode.h"
-#endif
#define FTYPE_FILE 0
#define FTYPE_CD 1
@@ -104,7 +104,7 @@
switch (aiocb->aio_type & QEMU_AIO_TYPE_MASK) {
case QEMU_AIO_READ:
count = handle_aiocb_rw(aiocb);
- if (count < aiocb->aio_nbytes && aiocb->bs->growable) {
+ if (count < aiocb->aio_nbytes) {
/* A short read means that we have reached EOF. Pad the buffer
* with zeros for bytes after EOF. */
iov_memset(aiocb->aio_iov, aiocb->aio_niov, count,
@@ -121,9 +121,9 @@
case QEMU_AIO_WRITE:
count = handle_aiocb_rw(aiocb);
if (count == aiocb->aio_nbytes) {
- count = 0;
+ ret = 0;
} else {
- count = -EINVAL;
+ ret = -EINVAL;
}
break;
case QEMU_AIO_FLUSH:
@@ -137,15 +137,15 @@
break;
}
- g_slice_free(RawWin32AIOData, aiocb);
+ g_free(aiocb);
return ret;
}
static BlockAIOCB *paio_submit(BlockDriverState *bs, HANDLE hfile,
- int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+ int64_t offset, QEMUIOVector *qiov, int count,
BlockCompletionFunc *cb, void *opaque, int type)
{
- RawWin32AIOData *acb = g_slice_new(RawWin32AIOData);
+ RawWin32AIOData *acb = g_new(RawWin32AIOData, 1);
ThreadPool *pool;
acb->bs = bs;
@@ -155,11 +155,12 @@
if (qiov) {
acb->aio_iov = qiov->iov;
acb->aio_niov = qiov->niov;
+ assert(qiov->size == count);
}
- acb->aio_nbytes = nb_sectors * 512;
- acb->aio_offset = sector_num * 512;
+ acb->aio_nbytes = count;
+ acb->aio_offset = offset;
- trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
+ trace_paio_submit(acb, opaque, offset, count, type);
pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
return thread_pool_submit_aio(pool, aio_worker, acb, cb, opaque);
}
@@ -222,7 +223,7 @@
}
}
-static void raw_probe_alignment(BlockDriverState *bs)
+static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
{
BDRVRawState *s = bs->opaque;
DWORD sectorsPerCluster, freeClusters, totalClusters, count;
@@ -230,14 +231,14 @@
BOOL status;
if (s->type == FTYPE_CD) {
- bs->request_alignment = 2048;
+ bs->bl.request_alignment = 2048;
return;
}
if (s->type == FTYPE_HARDDISK) {
status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
NULL, 0, &dg, sizeof(dg), &count, NULL);
if (status != 0) {
- bs->request_alignment = dg.Geometry.BytesPerSector;
+ bs->bl.request_alignment = dg.Geometry.BytesPerSector;
return;
}
/* try GetDiskFreeSpace too */
@@ -247,7 +248,7 @@
GetDiskFreeSpace(s->drive_path, §orsPerCluster,
&dg.Geometry.BytesPerSector,
&freeClusters, &totalClusters);
- bs->request_alignment = dg.Geometry.BytesPerSector;
+ bs->bl.request_alignment = dg.Geometry.BytesPerSector;
}
}
@@ -327,26 +328,13 @@
} else {
/* Relative path. */
char buf[MAX_PATH];
- GetCurrentDirectory(MAX_PATH, buf);
+ win32GetCurrentDirectory(MAX_PATH, buf);
snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
}
-#ifdef CONFIG_ANDROID
- wchar_t* wideName = win32_utf8_to_utf16_str(filename);
- if (wideName == NULL) {
- ret = -EINVAL;
- goto fail;
- }
-
- s->hfile = CreateFileW(wideName, access_flags,
- FILE_SHARE_READ, NULL,
- OPEN_EXISTING, overlapped, NULL);
- free(wideName);
-#else
- s->hfile = CreateFile(filename, access_flags,
+ s->hfile = win32CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
OPEN_EXISTING, overlapped, NULL);
-#endif
if (s->hfile == INVALID_HANDLE_VALUE) {
int err = GetLastError();
@@ -378,7 +366,6 @@
win32_aio_attach_aio_context(s->aio, bdrv_get_aio_context(bs));
}
- raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
@@ -392,9 +379,10 @@
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
- nb_sectors, cb, opaque, QEMU_AIO_READ);
+ nb_sectors, cb, opaque, QEMU_AIO_READ);
} else {
- return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+ return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_READ);
}
}
@@ -406,9 +394,10 @@
BDRVRawState *s = bs->opaque;
if (s->aio) {
return win32_aio_submit(bs, s->aio, s->hfile, sector_num, qiov,
- nb_sectors, cb, opaque, QEMU_AIO_WRITE);
+ nb_sectors, cb, opaque, QEMU_AIO_WRITE);
} else {
- return paio_submit(bs, s->hfile, sector_num, qiov, nb_sectors,
+ return paio_submit(bs, s->hfile, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS,
cb, opaque, QEMU_AIO_WRITE);
}
}
@@ -556,13 +545,14 @@
}
};
-static BlockDriver bdrv_file = {
+BlockDriver bdrv_file = {
.format_name = "file",
.protocol_name = "file",
.instance_size = sizeof(BDRVRawState),
.bdrv_needs_filename = true,
.bdrv_parse_filename = raw_parse_filename,
.bdrv_file_open = raw_open,
+ .bdrv_refresh_limits = raw_probe_alignment,
.bdrv_close = raw_close,
.bdrv_create = raw_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -691,7 +681,7 @@
create_flags = OPEN_EXISTING;
- s->hfile = CreateFile(filename, access_flags,
+ s->hfile = win32CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
create_flags, overlapped, NULL);
if (s->hfile == INVALID_HANDLE_VALUE) {
diff --git a/block/raw_bsd.c b/block/raw_bsd.c
index 401b967..588d408 100644
--- a/block/raw_bsd.c
+++ b/block/raw_bsd.c
@@ -1,6 +1,6 @@
/* BlockDriver implementation for "raw"
*
- * Copyright (C) 2010, 2013, Red Hat, Inc.
+ * Copyright (C) 2010-2016 Red Hat, Inc.
* Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com>
* Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com>
*
@@ -26,7 +26,9 @@
* IN THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/block_int.h"
+#include "qapi/error.h"
#include "qemu/option.h"
static QemuOptsList raw_create_opts = {
@@ -48,85 +50,130 @@
return 0;
}
-static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- return bdrv_co_readv(bs->file, sector_num, nb_sectors, qiov);
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
}
-static int coroutine_fn raw_co_writev(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov)
+static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ int flags)
{
+ void *buf = NULL;
+ BlockDriver *drv;
+ QEMUIOVector local_qiov;
+ int ret;
+
+ if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) {
+ /* Handling partial writes would be a pain - so we just
+ * require that guests have 512-byte request alignment if
+ * probing occurred */
+ QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512);
+ QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512);
+ assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE);
+
+ buf = qemu_try_blockalign(bs->file->bs, 512);
+ if (!buf) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ ret = qemu_iovec_to_buf(qiov, 0, buf, 512);
+ if (ret != 512) {
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ drv = bdrv_probe_all(buf, 512, NULL);
+ if (drv != bs->drv) {
+ ret = -EPERM;
+ goto fail;
+ }
+
+ /* Use the checked buffer, a malicious guest might be overwriting its
+ * original buffer in the background. */
+ qemu_iovec_init(&local_qiov, qiov->niov + 1);
+ qemu_iovec_add(&local_qiov, buf, 512);
+ qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
+ qiov = &local_qiov;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- return bdrv_co_writev(bs->file, sector_num, nb_sectors, qiov);
+ ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
+
+fail:
+ if (qiov == &local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ }
+ qemu_vfree(buf);
+ return ret;
}
static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs,
int64_t sector_num,
- int nb_sectors, int *pnum)
+ int nb_sectors, int *pnum,
+ BlockDriverState **file)
{
*pnum = nb_sectors;
+ *file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
(sector_num << BDRV_SECTOR_BITS);
}
-static int coroutine_fn raw_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
- BdrvRequestFlags flags)
+static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset, int count,
+ BdrvRequestFlags flags)
{
- return bdrv_co_write_zeroes(bs->file, sector_num, nb_sectors, flags);
+ return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
}
-static int coroutine_fn raw_co_discard(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors)
+static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs,
+ int64_t offset, int count)
{
- return bdrv_co_discard(bs->file, sector_num, nb_sectors);
+ return bdrv_co_pdiscard(bs->file->bs, offset, count);
}
static int64_t raw_getlength(BlockDriverState *bs)
{
- return bdrv_getlength(bs->file);
+ return bdrv_getlength(bs->file->bs);
}
static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
{
- return bdrv_get_info(bs->file, bdi);
+ return bdrv_get_info(bs->file->bs, bdi);
}
static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
{
- bs->bl = bs->file->bl;
+ if (bs->probed) {
+ /* To make it easier to protect the first sector, any probed
+ * image is restricted to read-modify-write on sub-sector
+ * operations. */
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+ }
}
static int raw_truncate(BlockDriverState *bs, int64_t offset)
{
- return bdrv_truncate(bs->file, offset);
-}
-
-static int raw_is_inserted(BlockDriverState *bs)
-{
- return bdrv_is_inserted(bs->file);
+ return bdrv_truncate(bs->file->bs, offset);
}
static int raw_media_changed(BlockDriverState *bs)
{
- return bdrv_media_changed(bs->file);
+ return bdrv_media_changed(bs->file->bs);
}
static void raw_eject(BlockDriverState *bs, bool eject_flag)
{
- bdrv_eject(bs->file, eject_flag);
+ bdrv_eject(bs->file->bs, eject_flag);
}
static void raw_lock_medium(BlockDriverState *bs, bool locked)
{
- bdrv_lock_medium(bs->file, locked);
-}
-
-static int raw_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
-{
- return bdrv_ioctl(bs->file, req, buf);
+ bdrv_lock_medium(bs->file->bs, locked);
}
static BlockAIOCB *raw_aio_ioctl(BlockDriverState *bs,
@@ -134,30 +181,39 @@
BlockCompletionFunc *cb,
void *opaque)
{
- return bdrv_aio_ioctl(bs->file, req, buf, cb, opaque);
+ return bdrv_aio_ioctl(bs->file->bs, req, buf, cb, opaque);
}
static int raw_has_zero_init(BlockDriverState *bs)
{
- return bdrv_has_zero_init(bs->file);
+ return bdrv_has_zero_init(bs->file->bs);
}
static int raw_create(const char *filename, QemuOpts *opts, Error **errp)
{
- Error *local_err = NULL;
- int ret;
-
- ret = bdrv_create_file(filename, opts, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- }
- return ret;
+ return bdrv_create_file(filename, opts, errp);
}
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
- bs->sg = bs->file->sg;
+ bs->sg = bs->file->bs->sg;
+ bs->supported_write_flags = BDRV_REQ_FUA &
+ bs->file->bs->supported_write_flags;
+ bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+ bs->file->bs->supported_zero_flags;
+
+ if (bs->probed && !bdrv_is_read_only(bs)) {
+ fprintf(stderr,
+ "WARNING: Image format was not specified for '%s' and probing "
+ "guessed raw.\n"
+ " Automatically detecting the format is dangerous for "
+ "raw images, write operations on block 0 will be restricted.\n"
+ " Specify the 'raw' format explicitly to remove the "
+ "restrictions.\n",
+ bs->file->bs->filename);
+ }
+
return 0;
}
@@ -173,28 +229,38 @@
return 1;
}
-static BlockDriver bdrv_raw = {
+static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+ return bdrv_probe_blocksizes(bs->file->bs, bsz);
+}
+
+static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
+{
+ return bdrv_probe_geometry(bs->file->bs, geo);
+}
+
+BlockDriver bdrv_raw = {
.format_name = "raw",
.bdrv_probe = &raw_probe,
.bdrv_reopen_prepare = &raw_reopen_prepare,
.bdrv_open = &raw_open,
.bdrv_close = &raw_close,
.bdrv_create = &raw_create,
- .bdrv_co_readv = &raw_co_readv,
- .bdrv_co_writev = &raw_co_writev,
- .bdrv_co_write_zeroes = &raw_co_write_zeroes,
- .bdrv_co_discard = &raw_co_discard,
+ .bdrv_co_preadv = &raw_co_preadv,
+ .bdrv_co_pwritev = &raw_co_pwritev,
+ .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes,
+ .bdrv_co_pdiscard = &raw_co_pdiscard,
.bdrv_co_get_block_status = &raw_co_get_block_status,
.bdrv_truncate = &raw_truncate,
.bdrv_getlength = &raw_getlength,
.has_variable_length = true,
.bdrv_get_info = &raw_get_info,
.bdrv_refresh_limits = &raw_refresh_limits,
- .bdrv_is_inserted = &raw_is_inserted,
+ .bdrv_probe_blocksizes = &raw_probe_blocksizes,
+ .bdrv_probe_geometry = &raw_probe_geometry,
.bdrv_media_changed = &raw_media_changed,
.bdrv_eject = &raw_eject,
.bdrv_lock_medium = &raw_lock_medium,
- .bdrv_ioctl = &raw_ioctl,
.bdrv_aio_ioctl = &raw_aio_ioctl,
.create_opts = &raw_create_opts,
.bdrv_has_zero_init = &raw_has_zero_init
diff --git a/block/rbd.c b/block/rbd.c
index 5b5a64a..0106fea 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -11,11 +11,13 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include <inttypes.h>
+#include "qemu/osdep.h"
-#include "qemu-common.h"
+#include "qapi/error.h"
#include "qemu/error-report.h"
#include "block/block_int.h"
+#include "crypto/secret.h"
+#include "qemu/cutils.h"
#include <rbd/librbd.h>
@@ -74,25 +76,18 @@
QEMUIOVector *qiov;
char *bounce;
RBDAIOCmd cmd;
- int64_t sector_num;
int error;
struct BDRVRBDState *s;
- int status;
} RBDAIOCB;
typedef struct RADOSCB {
- int rcbid;
RBDAIOCB *acb;
struct BDRVRBDState *s;
- int done;
int64_t size;
char *buf;
int64_t ret;
} RADOSCB;
-#define RBD_FD_READ 0
-#define RBD_FD_WRITE 1
-
typedef struct BDRVRBDState {
rados_t cluster;
rados_ioctx_t io_ctx;
@@ -235,7 +230,30 @@
return NULL;
}
-static int qemu_rbd_set_conf(rados_t cluster, const char *conf, Error **errp)
+
+static int qemu_rbd_set_auth(rados_t cluster, const char *secretid,
+ Error **errp)
+{
+ if (secretid == 0) {
+ return 0;
+ }
+
+ gchar *secret = qcrypto_secret_lookup_as_base64(secretid,
+ errp);
+ if (!secret) {
+ return -1;
+ }
+
+ rados_conf_set(cluster, "key", secret);
+ g_free(secret);
+
+ return 0;
+}
+
+
+static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
+ bool only_read_conf_file,
+ Error **errp)
{
char *p, *buf;
char name[RBD_MAX_CONF_NAME_SIZE];
@@ -267,17 +285,22 @@
qemu_rbd_unescape(value);
if (strcmp(name, "conf") == 0) {
- ret = rados_conf_read_file(cluster, value);
- if (ret < 0) {
- error_setg(errp, "error reading conf file %s", value);
- break;
+ /* read the conf file alone, so it doesn't override more
+ specific settings for a particular device */
+ if (only_read_conf_file) {
+ ret = rados_conf_read_file(cluster, value);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error reading conf file %s",
+ value);
+ break;
+ }
}
} else if (strcmp(name, "id") == 0) {
/* ignore, this is parsed by qemu_rbd_parse_clientname() */
- } else {
+ } else if (!only_read_conf_file) {
ret = rados_conf_set(cluster, name, value);
if (ret < 0) {
- error_setg(errp, "invalid conf option %s", name);
+ error_setg_errno(errp, -ret, "invalid conf option %s", name);
ret = -EINVAL;
break;
}
@@ -300,10 +323,13 @@
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
+ const char *secretid;
rados_t cluster;
rados_ioctx_t io_ctx;
int ret;
+ secretid = qemu_opt_get(opts, "password-secret");
+
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
name, sizeof(name),
@@ -325,42 +351,59 @@
error_setg(errp, "obj size too small");
return -EINVAL;
}
- obj_order = ffs(objsize) - 1;
+ obj_order = ctz32(objsize);
}
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
- if (rados_create(&cluster, clientname) < 0) {
- error_setg(errp, "error initializing");
- return -EIO;
+ ret = rados_create(&cluster, clientname);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error initializing");
+ return ret;
}
if (strstr(conf, "conf=") == NULL) {
/* try default location, but ignore failure */
rados_conf_read_file(cluster, NULL);
- }
-
- if (conf[0] != '\0' &&
- qemu_rbd_set_conf(cluster, conf, &local_err) < 0) {
+ } else if (conf[0] != '\0' &&
+ qemu_rbd_set_conf(cluster, conf, true, &local_err) < 0) {
rados_shutdown(cluster);
error_propagate(errp, local_err);
return -EIO;
}
- if (rados_connect(cluster) < 0) {
- error_setg(errp, "error connecting");
+ if (conf[0] != '\0' &&
+ qemu_rbd_set_conf(cluster, conf, false, &local_err) < 0) {
+ rados_shutdown(cluster);
+ error_propagate(errp, local_err);
+ return -EIO;
+ }
+
+ if (qemu_rbd_set_auth(cluster, secretid, errp) < 0) {
rados_shutdown(cluster);
return -EIO;
}
- if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
- error_setg(errp, "error opening pool %s", pool);
+ ret = rados_connect(cluster);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error connecting");
rados_shutdown(cluster);
- return -EIO;
+ return ret;
+ }
+
+ ret = rados_ioctx_create(cluster, pool, &io_ctx);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error opening pool %s", pool);
+ rados_shutdown(cluster);
+ return ret;
}
ret = rbd_create(io_ctx, name, bytes, &obj_order);
rados_ioctx_destroy(io_ctx);
rados_shutdown(cluster);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "error rbd create");
+ return ret;
+ }
return ret;
}
@@ -405,7 +448,6 @@
}
qemu_vfree(acb->bounce);
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
- acb->status = 0;
qemu_aio_unref(acb);
}
@@ -420,6 +462,11 @@
.type = QEMU_OPT_STRING,
.help = "Specification of the rbd image",
},
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
{ /* end of list */ }
},
};
@@ -433,6 +480,7 @@
char conf[RBD_MAX_CONF_SIZE];
char clientname_buf[RBD_MAX_CONF_SIZE];
char *clientname;
+ const char *secretid;
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
@@ -447,6 +495,7 @@
}
filename = qemu_opt_get(opts, "filename");
+ secretid = qemu_opt_get(opts, "password-secret");
if (qemu_rbd_parsename(filename, pool, sizeof(pool),
snap_buf, sizeof(snap_buf),
@@ -459,7 +508,7 @@
clientname = qemu_rbd_parse_clientname(conf, clientname_buf);
r = rados_create(&s->cluster, clientname);
if (r < 0) {
- error_setg(&local_err, "error initializing");
+ error_setg_errno(errp, -r, "error initializing");
goto failed_opts;
}
@@ -468,6 +517,28 @@
s->snap = g_strdup(snap_buf);
}
+ if (strstr(conf, "conf=") == NULL) {
+ /* try default location, but ignore failure */
+ rados_conf_read_file(s->cluster, NULL);
+ } else if (conf[0] != '\0') {
+ r = qemu_rbd_set_conf(s->cluster, conf, true, errp);
+ if (r < 0) {
+ goto failed_shutdown;
+ }
+ }
+
+ if (conf[0] != '\0') {
+ r = qemu_rbd_set_conf(s->cluster, conf, false, errp);
+ if (r < 0) {
+ goto failed_shutdown;
+ }
+ }
+
+ if (qemu_rbd_set_auth(s->cluster, secretid, errp) < 0) {
+ r = -EIO;
+ goto failed_shutdown;
+ }
+
/*
* Fallback to more conservative semantics if setting cache
* options fails. Ignore errors from setting rbd_cache because the
@@ -481,33 +552,21 @@
rados_conf_set(s->cluster, "rbd_cache", "true");
}
- if (strstr(conf, "conf=") == NULL) {
- /* try default location, but ignore failure */
- rados_conf_read_file(s->cluster, NULL);
- }
-
- if (conf[0] != '\0') {
- r = qemu_rbd_set_conf(s->cluster, conf, errp);
- if (r < 0) {
- goto failed_shutdown;
- }
- }
-
r = rados_connect(s->cluster);
if (r < 0) {
- error_setg(&local_err, "error connecting");
+ error_setg_errno(errp, -r, "error connecting");
goto failed_shutdown;
}
r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
if (r < 0) {
- error_setg(&local_err, "error opening pool %s", pool);
+ error_setg_errno(errp, -r, "error opening pool %s", pool);
goto failed_shutdown;
}
r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
if (r < 0) {
- error_setg(&local_err, "error reading header from %s", s->name);
+ error_setg_errno(errp, -r, "error reading header from %s", s->name);
goto failed_open;
}
@@ -590,9 +649,9 @@
}
static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
- int64_t sector_num,
+ int64_t off,
QEMUIOVector *qiov,
- int nb_sectors,
+ int64_t size,
BlockCompletionFunc *cb,
void *opaque,
RBDAIOCmd cmd)
@@ -600,7 +659,6 @@
RBDAIOCB *acb;
RADOSCB *rcb = NULL;
rbd_completion_t c;
- int64_t off, size;
char *buf;
int r;
@@ -609,6 +667,7 @@
acb = qemu_aio_get(&rbd_aiocb_info, bs, cb, opaque);
acb->cmd = cmd;
acb->qiov = qiov;
+ assert(!qiov || qiov->size == size);
if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
acb->bounce = NULL;
} else {
@@ -621,7 +680,6 @@
acb->error = 0;
acb->s = s;
acb->bh = NULL;
- acb->status = -EINPROGRESS;
if (cmd == RBD_AIO_WRITE) {
qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
@@ -629,11 +687,7 @@
buf = acb->bounce;
- off = sector_num * BDRV_SECTOR_SIZE;
- size = nb_sectors * BDRV_SECTOR_SIZE;
-
rcb = g_new(RADOSCB, 1);
- rcb->done = 0;
rcb->acb = acb;
rcb->buf = buf;
rcb->s = acb->s;
@@ -682,7 +736,8 @@
BlockCompletionFunc *cb,
void *opaque)
{
- return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+ return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_READ);
}
@@ -693,7 +748,8 @@
BlockCompletionFunc *cb,
void *opaque)
{
- return rbd_start_aio(bs, sector_num, qiov, nb_sectors, cb, opaque,
+ return rbd_start_aio(bs, sector_num << BDRV_SECTOR_BITS, qiov,
+ nb_sectors << BDRV_SECTOR_BITS, cb, opaque,
RBD_AIO_WRITE);
}
@@ -826,10 +882,8 @@
const char *snapshot_name)
{
BDRVRBDState *s = bs->opaque;
- int r;
- r = rbd_snap_rollback(s->image, snapshot_name);
- return r;
+ return rbd_snap_rollback(s->image, snapshot_name);
}
static int qemu_rbd_snap_list(BlockDriverState *bs,
@@ -876,13 +930,13 @@
}
#ifdef LIBRBD_SUPPORTS_DISCARD
-static BlockAIOCB* qemu_rbd_aio_discard(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- BlockCompletionFunc *cb,
- void *opaque)
+static BlockAIOCB *qemu_rbd_aio_pdiscard(BlockDriverState *bs,
+ int64_t offset,
+ int count,
+ BlockCompletionFunc *cb,
+ void *opaque)
{
- return rbd_start_aio(bs, sector_num, NULL, nb_sectors, cb, opaque,
+ return rbd_start_aio(bs, offset, NULL, count, cb, opaque,
RBD_AIO_DISCARD);
}
#endif
@@ -913,6 +967,11 @@
.type = QEMU_OPT_SIZE,
.help = "RBD object size"
},
+ {
+ .name = "password-secret",
+ .type = QEMU_OPT_STRING,
+ .help = "ID of secret providing the password",
+ },
{ /* end of list */ }
}
};
@@ -941,7 +1000,7 @@
#endif
#ifdef LIBRBD_SUPPORTS_DISCARD
- .bdrv_aio_discard = qemu_rbd_aio_discard,
+ .bdrv_aio_pdiscard = qemu_rbd_aio_pdiscard,
#endif
.bdrv_snapshot_create = qemu_rbd_snap_create,
diff --git a/block/sheepdog.c b/block/sheepdog.c
index be3176f..66e1cb2 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -12,12 +12,15 @@
* GNU GPL, version 2 or (at your option) any later version.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu/uri.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/bitops.h"
+#include "qemu/cutils.h"
#define SD_PROTO_VER 0x01
@@ -28,7 +31,6 @@
#define SD_OP_READ_OBJ 0x02
#define SD_OP_WRITE_OBJ 0x03
/* 0x04 is used internally by Sheepdog */
-#define SD_OP_DISCARD_OBJ 0x05
#define SD_OP_NEW_VDI 0x11
#define SD_OP_LOCK_VDI 0x12
@@ -37,6 +39,7 @@
#define SD_OP_READ_VDIS 0x15
#define SD_OP_FLUSH_VDI 0x16
#define SD_OP_DEL_VDI 0x17
+#define SD_OP_GET_CLUSTER_DEFAULT 0x18
#define SD_FLAG_CMD_WRITE 0x01
#define SD_FLAG_CMD_COW 0x02
@@ -91,6 +94,7 @@
#define SD_NR_VDIS (1U << 24)
#define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22)
#define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS)
+#define SD_DEFAULT_BLOCK_SIZE_SHIFT 22
/*
* For erasure coding, we use at most SD_EC_MAX_STRIP for data strips and
* (SD_EC_MAX_STRIP - 1) for parity strips
@@ -167,7 +171,8 @@
uint32_t base_vdi_id;
uint8_t copies;
uint8_t copy_policy;
- uint8_t reserved[2];
+ uint8_t store_policy;
+ uint8_t block_size_shift;
uint32_t snapid;
uint32_t type;
uint32_t pad[2];
@@ -186,6 +191,21 @@
uint32_t pad[5];
} SheepdogVdiRsp;
+typedef struct SheepdogClusterRsp {
+ uint8_t proto_ver;
+ uint8_t opcode;
+ uint16_t flags;
+ uint32_t epoch;
+ uint32_t id;
+ uint32_t data_length;
+ uint32_t result;
+ uint8_t nr_copies;
+ uint8_t copy_policy;
+ uint8_t block_size_shift;
+ uint8_t __pad1;
+ uint32_t __pad2[6];
+} SheepdogClusterRsp;
+
typedef struct SheepdogInode {
char name[SD_MAX_VDI_LEN];
char tag[SD_MAX_VDI_TAG_LEN];
@@ -266,15 +286,24 @@
return !!inode->snap_ctime;
}
+static inline size_t count_data_objs(const struct SheepdogInode *inode)
+{
+ return DIV_ROUND_UP(inode->vdi_size,
+ (1UL << inode->block_size_shift));
+}
+
#undef DPRINTF
#ifdef DEBUG_SDOG
-#define DPRINTF(fmt, args...) \
- do { \
- fprintf(stdout, "%s %d: " fmt, __func__, __LINE__, ##args); \
- } while (0)
+#define DEBUG_SDOG_PRINT 1
#else
-#define DPRINTF(fmt, args...)
+#define DEBUG_SDOG_PRINT 0
#endif
+#define DPRINTF(fmt, args...) \
+ do { \
+ if (DEBUG_SDOG_PRINT) { \
+ fprintf(stderr, "%s %d: " fmt, __func__, __LINE__, ##args); \
+ } \
+ } while (0)
typedef struct SheepdogAIOCB SheepdogAIOCB;
@@ -300,6 +329,10 @@
AIOCB_DISCARD_OBJ,
};
+#define AIOCBOverlapping(x, y) \
+ (!(x->max_affect_data_idx < y->min_affect_data_idx \
+ || y->max_affect_data_idx < x->min_affect_data_idx))
+
struct SheepdogAIOCB {
BlockAIOCB common;
@@ -316,6 +349,20 @@
bool cancelable;
int nr_pending;
+
+ uint32_t min_affect_data_idx;
+ uint32_t max_affect_data_idx;
+
+ /*
+ * The difference between affect_data_idx and dirty_data_idx:
+ * affect_data_idx represents range of index of all request types.
+ * dirty_data_idx represents range of index updated by COW requests.
+ * dirty_data_idx is used for updating an inode object.
+ */
+ uint32_t min_dirty_data_idx;
+ uint32_t max_dirty_data_idx;
+
+ QLIST_ENTRY(SheepdogAIOCB) aiocb_siblings;
};
typedef struct BDRVSheepdogState {
@@ -324,9 +371,6 @@
SheepdogInode inode;
- uint32_t min_dirty_data_idx;
- uint32_t max_dirty_data_idx;
-
char name[SD_MAX_VDI_LEN];
bool is_snapshot;
uint32_t cache_flags;
@@ -344,10 +388,17 @@
/* Every aio request must be linked to either of these queues. */
QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
- QLIST_HEAD(pending_aio_head, AIOReq) pending_aio_head;
QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
+
+ CoQueue overlapping_queue;
+ QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
} BDRVSheepdogState;
+typedef struct BDRVSheepdogReopenState {
+ int fd;
+ int cache_flags;
+} BDRVSheepdogReopenState;
+
static const char * sd_strerror(int err)
{
int i;
@@ -444,7 +495,7 @@
static void coroutine_fn sd_finish_aiocb(SheepdogAIOCB *acb)
{
- qemu_coroutine_enter(acb->coroutine, NULL);
+ qemu_coroutine_enter(acb->coroutine);
qemu_aio_unref(acb);
}
@@ -480,13 +531,7 @@
AIOReq *aioreq, *next;
if (sd_acb_cancelable(acb)) {
- /* Remove outstanding requests from pending and failed queues. */
- QLIST_FOREACH_SAFE(aioreq, &s->pending_aio_head, aio_siblings,
- next) {
- if (aioreq->aiocb == acb) {
- free_aio_req(s, aioreq);
- }
- }
+ /* Remove outstanding requests from failed queue. */
QLIST_FOREACH_SAFE(aioreq, &s->failed_aio_head, aio_siblings,
next) {
if (aioreq->aiocb == acb) {
@@ -511,6 +556,10 @@
int64_t sector_num, int nb_sectors)
{
SheepdogAIOCB *acb;
+ uint32_t object_size;
+ BDRVSheepdogState *s = bs->opaque;
+
+ object_size = (UINT32_C(1) << s->inode.block_size_shift);
acb = qemu_aio_get(&sd_aiocb_info, bs, NULL, NULL);
@@ -524,9 +573,18 @@
acb->coroutine = qemu_coroutine_self();
acb->ret = 0;
acb->nr_pending = 0;
+
+ acb->min_affect_data_idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
+ acb->max_affect_data_idx = (acb->sector_num * BDRV_SECTOR_SIZE +
+ acb->nb_sectors * BDRV_SECTOR_SIZE) / object_size;
+
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
+
return acb;
}
+/* Return -EIO in case of error, file descriptor on success */
static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
{
int fd;
@@ -546,11 +604,14 @@
if (fd >= 0) {
qemu_set_nonblock(fd);
+ } else {
+ fd = -EIO;
}
return fd;
}
+/* Return 0 on success and -errno in case of error */
static coroutine_fn int send_co_req(int sockfd, SheepdogReq *hdr, void *data,
unsigned int *wlen)
{
@@ -559,12 +620,13 @@
ret = qemu_co_send(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
error_report("failed to send a req, %s", strerror(errno));
- return ret;
+ return -errno;
}
ret = qemu_co_send(sockfd, data, *wlen);
if (ret != *wlen) {
error_report("failed to send a req, %s", strerror(errno));
+ return -errno;
}
return ret;
@@ -574,7 +636,7 @@
{
Coroutine *co = opaque;
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
}
typedef struct SheepdogReqCo {
@@ -600,14 +662,16 @@
unsigned int *rlen = srco->rlen;
co = qemu_coroutine_self();
- aio_set_fd_handler(srco->aio_context, sockfd, NULL, restart_co_req, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ NULL, restart_co_req, co);
ret = send_co_req(sockfd, hdr, data, wlen);
if (ret < 0) {
goto out;
}
- aio_set_fd_handler(srco->aio_context, sockfd, restart_co_req, NULL, co);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ restart_co_req, NULL, co);
ret = qemu_co_recv(sockfd, hdr, sizeof(*hdr));
if (ret != sizeof(*hdr)) {
@@ -632,12 +696,18 @@
out:
/* there is at most one request for this sockfd, so it is safe to
* set each handler to NULL. */
- aio_set_fd_handler(srco->aio_context, sockfd, NULL, NULL, NULL);
+ aio_set_fd_handler(srco->aio_context, sockfd, false,
+ NULL, NULL, NULL);
srco->ret = ret;
srco->finished = true;
}
+/*
+ * Send the request to the sheep in a synchronous manner.
+ *
+ * Return 0 on success, -errno in case of error.
+ */
static int do_req(int sockfd, AioContext *aio_context, SheepdogReq *hdr,
void *data, unsigned int *wlen, unsigned int *rlen)
{
@@ -656,8 +726,8 @@
if (qemu_in_coroutine()) {
do_co_req(&srco);
} else {
- co = qemu_coroutine_create(do_co_req);
- qemu_coroutine_enter(co, &srco);
+ co = qemu_coroutine_create(do_co_req, &srco);
+ qemu_coroutine_enter(co);
while (!srco.finished) {
aio_poll(aio_context, true);
}
@@ -674,44 +744,13 @@
static int get_sheep_fd(BDRVSheepdogState *s, Error **errp);
static void co_write_request(void *opaque);
-static AIOReq *find_pending_req(BDRVSheepdogState *s, uint64_t oid)
-{
- AIOReq *aio_req;
-
- QLIST_FOREACH(aio_req, &s->pending_aio_head, aio_siblings) {
- if (aio_req->oid == oid) {
- return aio_req;
- }
- }
-
- return NULL;
-}
-
-/*
- * This function searchs pending requests to the object `oid', and
- * sends them.
- */
-static void coroutine_fn send_pending_req(BDRVSheepdogState *s, uint64_t oid)
-{
- AIOReq *aio_req;
- SheepdogAIOCB *acb;
-
- while ((aio_req = find_pending_req(s, oid)) != NULL) {
- acb = aio_req->aiocb;
- /* move aio_req from pending list to inflight one */
- QLIST_REMOVE(aio_req, aio_siblings);
- QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
- acb->aiocb_type);
- }
-}
-
static coroutine_fn void reconnect_to_sdog(void *opaque)
{
BDRVSheepdogState *s = opaque;
AIOReq *aio_req, *next;
- aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+ NULL, NULL);
close(s->fd);
s->fd = -1;
@@ -726,8 +765,7 @@
s->fd = get_sheep_fd(s, &local_err);
if (s->fd < 0) {
DPRINTF("Wait for connection to be established\n");
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
+ error_report_err(local_err);
co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
1000000000ULL);
}
@@ -809,15 +847,9 @@
*/
if (rsp.result == SD_RES_SUCCESS) {
s->inode.data_vdi_id[idx] = s->inode.vdi_id;
- s->max_dirty_data_idx = MAX(idx, s->max_dirty_data_idx);
- s->min_dirty_data_idx = MIN(idx, s->min_dirty_data_idx);
+ acb->max_dirty_data_idx = MAX(idx, acb->max_dirty_data_idx);
+ acb->min_dirty_data_idx = MIN(idx, acb->min_dirty_data_idx);
}
- /*
- * Some requests may be blocked because simultaneous
- * create requests are not allowed, so we search the
- * pending requests here.
- */
- send_pending_req(s, aio_req->oid);
}
break;
case AIOCB_READ_UDATA:
@@ -843,10 +875,6 @@
rsp.result = SD_RES_SUCCESS;
s->discard_supported = false;
break;
- case SD_RES_SUCCESS:
- idx = data_oid_to_idx(aio_req->oid);
- s->inode.data_vdi_id[idx] = 0;
- break;
default:
break;
}
@@ -897,17 +925,17 @@
BDRVSheepdogState *s = opaque;
if (!s->co_recv) {
- s->co_recv = qemu_coroutine_create(aio_read_response);
+ s->co_recv = qemu_coroutine_create(aio_read_response, opaque);
}
- qemu_coroutine_enter(s->co_recv, opaque);
+ qemu_coroutine_enter(s->co_recv);
}
static void co_write_request(void *opaque)
{
BDRVSheepdogState *s = opaque;
- qemu_coroutine_enter(s->co_send, NULL);
+ qemu_coroutine_enter(s->co_send);
}
/*
@@ -925,7 +953,8 @@
return fd;
}
- aio_set_fd_handler(s->aio_context, fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, fd, false,
+ co_read_response, NULL, s);
return fd;
}
@@ -1161,7 +1190,13 @@
hdr.flags = SD_FLAG_CMD_WRITE | flags;
break;
case AIOCB_DISCARD_OBJ:
- hdr.opcode = SD_OP_DISCARD_OBJ;
+ hdr.opcode = SD_OP_WRITE_OBJ;
+ hdr.flags = SD_FLAG_CMD_WRITE | flags;
+ s->inode.data_vdi_id[data_oid_to_idx(oid)] = 0;
+ offset = offsetof(SheepdogInode,
+ data_vdi_id[data_oid_to_idx(oid)]);
+ oid = vid_to_vdi_oid(s->inode.vdi_id);
+ wlen = datalen = sizeof(uint32_t);
break;
}
@@ -1180,7 +1215,7 @@
qemu_co_mutex_lock(&s->lock);
s->co_send = qemu_coroutine_self();
- aio_set_fd_handler(s->aio_context, s->fd,
+ aio_set_fd_handler(s->aio_context, s->fd, false,
co_read_response, co_write_request, s);
socket_set_cork(s->fd, 1);
@@ -1199,7 +1234,8 @@
}
out:
socket_set_cork(s->fd, 0);
- aio_set_fd_handler(s->aio_context, s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(s->aio_context, s->fd, false,
+ co_read_response, NULL, s);
s->co_send = NULL;
qemu_co_mutex_unlock(&s->lock);
}
@@ -1283,8 +1319,7 @@
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
return -EIO;
}
@@ -1292,8 +1327,7 @@
ret = find_vdi_name(s, s->name, snapid, tag, &vid, false, &local_err);
if (ret) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
goto out;
}
@@ -1315,30 +1349,6 @@
return ret;
}
-/* Return true if the specified request is linked to the pending list. */
-static bool check_simultaneous_create(BDRVSheepdogState *s, AIOReq *aio_req)
-{
- AIOReq *areq;
- QLIST_FOREACH(areq, &s->inflight_aio_head, aio_siblings) {
- if (areq != aio_req && areq->oid == aio_req->oid) {
- /*
- * Sheepdog cannot handle simultaneous create requests to the same
- * object, so we cannot send the request until the previous request
- * finishes.
- */
- DPRINTF("simultaneous create to %" PRIx64 "\n", aio_req->oid);
- aio_req->flags = 0;
- aio_req->base_oid = 0;
- aio_req->create = false;
- QLIST_REMOVE(aio_req, aio_siblings);
- QLIST_INSERT_HEAD(&s->pending_aio_head, aio_req, aio_siblings);
- return true;
- }
- }
-
- return false;
-}
-
static void coroutine_fn resend_aioreq(BDRVSheepdogState *s, AIOReq *aio_req)
{
SheepdogAIOCB *acb = aio_req->aiocb;
@@ -1353,10 +1363,6 @@
goto out;
}
- if (check_simultaneous_create(s, aio_req)) {
- return;
- }
-
if (s->inode.data_vdi_id[idx]) {
aio_req->base_oid = vid_to_data_oid(s->inode.data_vdi_id[idx], idx);
aio_req->flags |= SD_FLAG_CMD_COW;
@@ -1379,7 +1385,8 @@
{
BDRVSheepdogState *s = bs->opaque;
- aio_set_fd_handler(s->aio_context, s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(s->aio_context, s->fd, false, NULL,
+ NULL, NULL);
}
static void sd_attach_aio_context(BlockDriverState *bs,
@@ -1388,7 +1395,8 @@
BDRVSheepdogState *s = bs->opaque;
s->aio_context = new_context;
- aio_set_fd_handler(new_context, s->fd, co_read_response, NULL, s);
+ aio_set_fd_handler(new_context, s->fd, false,
+ co_read_response, NULL, s);
}
/* TODO Convert to fine grained options */
@@ -1432,8 +1440,8 @@
filename = qemu_opt_get(opts, "filename");
QLIST_INIT(&s->inflight_aio_head);
- QLIST_INIT(&s->pending_aio_head);
QLIST_INIT(&s->failed_aio_head);
+ QLIST_INIT(&s->inflight_aiocb_head);
s->fd = -1;
memset(vdi, 0, sizeof(vdi));
@@ -1492,17 +1500,17 @@
}
memcpy(&s->inode, buf, sizeof(s->inode));
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
bs->total_sectors = s->inode.vdi_size / BDRV_SECTOR_SIZE;
pstrcpy(s->name, sizeof(s->name), vdi);
qemu_co_mutex_init(&s->lock);
+ qemu_co_queue_init(&s->overlapping_queue);
qemu_opts_del(opts);
g_free(buf);
return 0;
out:
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+ false, NULL, NULL, NULL);
if (s->fd >= 0) {
closesocket(s->fd);
}
@@ -1511,6 +1519,70 @@
return ret;
}
+static int sd_reopen_prepare(BDRVReopenState *state, BlockReopenQueue *queue,
+ Error **errp)
+{
+ BDRVSheepdogState *s = state->bs->opaque;
+ BDRVSheepdogReopenState *re_s;
+ int ret = 0;
+
+ re_s = state->opaque = g_new0(BDRVSheepdogReopenState, 1);
+
+ re_s->cache_flags = SD_FLAG_CMD_CACHE;
+ if (state->flags & BDRV_O_NOCACHE) {
+ re_s->cache_flags = SD_FLAG_CMD_DIRECT;
+ }
+
+ re_s->fd = get_sheep_fd(s, errp);
+ if (re_s->fd < 0) {
+ ret = re_s->fd;
+ return ret;
+ }
+
+ return ret;
+}
+
+static void sd_reopen_commit(BDRVReopenState *state)
+{
+ BDRVSheepdogReopenState *re_s = state->opaque;
+ BDRVSheepdogState *s = state->bs->opaque;
+
+ if (s->fd) {
+ aio_set_fd_handler(s->aio_context, s->fd, false,
+ NULL, NULL, NULL);
+ closesocket(s->fd);
+ }
+
+ s->fd = re_s->fd;
+ s->cache_flags = re_s->cache_flags;
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+
+ return;
+}
+
+static void sd_reopen_abort(BDRVReopenState *state)
+{
+ BDRVSheepdogReopenState *re_s = state->opaque;
+ BDRVSheepdogState *s = state->bs->opaque;
+
+ if (re_s == NULL) {
+ return;
+ }
+
+ if (re_s->fd) {
+ aio_set_fd_handler(s->aio_context, re_s->fd, false,
+ NULL, NULL, NULL);
+ closesocket(re_s->fd);
+ }
+
+ g_free(state->opaque);
+ state->opaque = NULL;
+
+ return;
+}
+
static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
Error **errp)
{
@@ -1544,6 +1616,7 @@
hdr.vdi_size = s->inode.vdi_size;
hdr.copy_policy = s->inode.copy_policy;
hdr.copies = s->inode.nr_copies;
+ hdr.block_size_shift = s->inode.block_size_shift;
ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr, buf, &wlen, &rlen);
@@ -1568,47 +1641,60 @@
static int sd_prealloc(const char *filename, Error **errp)
{
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
+ BDRVSheepdogState *base = NULL;
+ unsigned long buf_size;
uint32_t idx, max_idx;
+ uint32_t object_size;
int64_t vdi_size;
- void *buf = g_malloc0(SD_DATA_OBJ_SIZE);
+ void *buf = NULL;
int ret;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, errp);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out_with_err_set;
}
- vdi_size = bdrv_getlength(bs);
+ blk_set_allow_write_beyond_eof(blk, true);
+
+ vdi_size = blk_getlength(blk);
if (vdi_size < 0) {
ret = vdi_size;
goto out;
}
- max_idx = DIV_ROUND_UP(vdi_size, SD_DATA_OBJ_SIZE);
+
+ base = blk_bs(blk)->opaque;
+ object_size = (UINT32_C(1) << base->inode.block_size_shift);
+ buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
+ buf = g_malloc0(buf_size);
+
+ max_idx = DIV_ROUND_UP(vdi_size, buf_size);
for (idx = 0; idx < max_idx; idx++) {
/*
* The created image can be a cloned image, so we need to read
* a data from the source image.
*/
- ret = bdrv_pread(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ ret = blk_pread(blk, idx * buf_size, buf, buf_size);
if (ret < 0) {
goto out;
}
- ret = bdrv_pwrite(bs, idx * SD_DATA_OBJ_SIZE, buf, SD_DATA_OBJ_SIZE);
+ ret = blk_pwrite(blk, idx * buf_size, buf, buf_size, 0);
if (ret < 0) {
goto out;
}
}
+ ret = 0;
out:
if (ret < 0) {
error_setg_errno(errp, -ret, "Can't pre-allocate");
}
out_with_err_set:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(buf);
@@ -1669,6 +1755,27 @@
return 0;
}
+static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
+{
+ struct SheepdogInode *inode = &s->inode;
+ uint64_t object_size;
+ int obj_order;
+
+ object_size = qemu_opt_get_size_del(opt, BLOCK_OPT_OBJECT_SIZE, 0);
+ if (object_size) {
+ if ((object_size - 1) & object_size) { /* not a power of 2? */
+ return -EINVAL;
+ }
+ obj_order = ctz32(object_size);
+ if (obj_order < 20 || obj_order > 31) {
+ return -EINVAL;
+ }
+ inode->block_size_shift = (uint8_t)obj_order;
+ }
+
+ return 0;
+}
+
static int sd_create(const char *filename, QemuOpts *opts,
Error **errp)
{
@@ -1679,6 +1786,7 @@
BDRVSheepdogState *s;
char tag[SD_MAX_VDI_TAG_LEN];
uint32_t snapid;
+ uint64_t max_vdi_size;
bool prealloc = false;
s = g_new0(BDRVSheepdogState, 1);
@@ -1717,46 +1825,91 @@
goto out;
}
}
-
- if (s->inode.vdi_size > SD_MAX_VDI_SIZE) {
- error_setg(errp, "too big image size");
- ret = -EINVAL;
+ ret = parse_block_size_shift(s, opts);
+ if (ret < 0) {
+ error_setg(errp, "Invalid object_size."
+ " obect_size needs to be power of 2"
+ " and be limited from 2^20 to 2^31");
goto out;
}
if (backing_file) {
- BlockDriverState *bs;
+ BlockBackend *blk;
BDRVSheepdogState *base;
BlockDriver *drv;
/* Currently, only Sheepdog backing image is supported. */
- drv = bdrv_find_protocol(backing_file, true);
+ drv = bdrv_find_protocol(backing_file, true, NULL);
if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
error_setg(errp, "backing_file must be a sheepdog image");
ret = -EINVAL;
goto out;
}
- bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_PROTOCOL, NULL,
- errp);
- if (ret < 0) {
+ blk = blk_new_open(backing_file, NULL, NULL,
+ BDRV_O_PROTOCOL, errp);
+ if (blk == NULL) {
+ ret = -EIO;
goto out;
}
- base = bs->opaque;
+ base = blk_bs(blk)->opaque;
if (!is_snapshot(&base->inode)) {
error_setg(errp, "cannot clone from a non snapshot vdi");
- bdrv_unref(bs);
+ blk_unref(blk);
ret = -EINVAL;
goto out;
}
s->inode.vdi_id = base->inode.vdi_id;
- bdrv_unref(bs);
+ blk_unref(blk);
}
s->aio_context = qemu_get_aio_context();
+
+ /* if block_size_shift is not specified, get cluster default value */
+ if (s->inode.block_size_shift == 0) {
+ SheepdogVdiReq hdr;
+ SheepdogClusterRsp *rsp = (SheepdogClusterRsp *)&hdr;
+ Error *local_err = NULL;
+ int fd;
+ unsigned int wlen = 0, rlen = 0;
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ ret = -EIO;
+ goto out;
+ }
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.opcode = SD_OP_GET_CLUSTER_DEFAULT;
+ hdr.proto_ver = SD_PROTO_VER;
+
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ NULL, &wlen, &rlen);
+ closesocket(fd);
+ if (ret) {
+ error_setg_errno(errp, -ret, "failed to get cluster default");
+ goto out;
+ }
+ if (rsp->result == SD_RES_SUCCESS) {
+ s->inode.block_size_shift = rsp->block_size_shift;
+ } else {
+ s->inode.block_size_shift = SD_DEFAULT_BLOCK_SIZE_SHIFT;
+ }
+ }
+
+ max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
+
+ if (s->inode.vdi_size > max_vdi_size) {
+ error_setg(errp, "An image is too large."
+ " The maximum image size is %"PRIu64 "GB",
+ max_vdi_size / 1024 / 1024 / 1024);
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = do_sd_create(s, &vid, 0, errp);
if (ret) {
goto out;
@@ -1785,8 +1938,7 @@
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
return;
}
@@ -1809,7 +1961,8 @@
error_report("%s, %s", sd_strerror(rsp->result), s->name);
}
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->fd,
+ false, NULL, NULL, NULL);
closesocket(s->fd);
g_free(s->host_spec);
}
@@ -1827,19 +1980,20 @@
BDRVSheepdogState *s = bs->opaque;
int ret, fd;
unsigned int datalen;
+ uint64_t max_vdi_size;
+ max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
if (offset < s->inode.vdi_size) {
error_report("shrinking is not supported");
return -EINVAL;
- } else if (offset > SD_MAX_VDI_SIZE) {
+ } else if (offset > max_vdi_size) {
error_report("too big image size");
return -EINVAL;
}
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
return fd;
}
@@ -1870,16 +2024,16 @@
AIOReq *aio_req;
uint32_t offset, data_len, mn, mx;
- mn = s->min_dirty_data_idx;
- mx = s->max_dirty_data_idx;
+ mn = acb->min_dirty_data_idx;
+ mx = acb->max_dirty_data_idx;
if (mn <= mx) {
/* we need to update the vdi object. */
offset = sizeof(s->inode) - sizeof(s->inode.data_vdi_id) +
mn * sizeof(s->inode.data_vdi_id[0]);
data_len = (mx - mn + 1) * sizeof(s->inode.data_vdi_id[0]);
- s->min_dirty_data_idx = UINT32_MAX;
- s->max_dirty_data_idx = 0;
+ acb->min_dirty_data_idx = UINT32_MAX;
+ acb->max_dirty_data_idx = 0;
iov.iov_base = &s->inode;
iov.iov_len = sizeof(s->inode);
@@ -1912,8 +2066,7 @@
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
return false;
}
@@ -1960,8 +2113,7 @@
deleted = sd_delete(s);
ret = do_sd_create(s, &vid, !deleted, &local_err);
if (ret) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
goto out;
}
@@ -1969,8 +2121,7 @@
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
ret = fd;
goto out;
}
@@ -2013,9 +2164,10 @@
SheepdogAIOCB *acb = p;
int ret = 0;
unsigned long len, done = 0, total = acb->nb_sectors * BDRV_SECTOR_SIZE;
- unsigned long idx = acb->sector_num * BDRV_SECTOR_SIZE / SD_DATA_OBJ_SIZE;
+ unsigned long idx;
+ uint32_t object_size;
uint64_t oid;
- uint64_t offset = (acb->sector_num * BDRV_SECTOR_SIZE) % SD_DATA_OBJ_SIZE;
+ uint64_t offset;
BDRVSheepdogState *s = acb->common.bs->opaque;
SheepdogInode *inode = &s->inode;
AIOReq *aio_req;
@@ -2032,6 +2184,10 @@
}
}
+ object_size = (UINT32_C(1) << inode->block_size_shift);
+ idx = acb->sector_num * BDRV_SECTOR_SIZE / object_size;
+ offset = (acb->sector_num * BDRV_SECTOR_SIZE) % object_size;
+
/*
* Make sure we don't free the aiocb before we are done with all requests.
* This additional reference is dropped at the end of this function.
@@ -2045,7 +2201,7 @@
oid = vid_to_data_oid(inode->data_vdi_id[idx], idx);
- len = MIN(total - done, SD_DATA_OBJ_SIZE - offset);
+ len = MIN(total - done, object_size - offset);
switch (acb->aiocb_type) {
case AIOCB_READ_UDATA:
@@ -2069,7 +2225,7 @@
* We discard the object only when the whole object is
* 1) allocated 2) trimmed. Otherwise, simply skip it.
*/
- if (len != SD_DATA_OBJ_SIZE || inode->data_vdi_id[idx] == 0) {
+ if (len != object_size || inode->data_vdi_id[idx] == 0) {
goto done;
}
break;
@@ -2086,15 +2242,11 @@
}
aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
- old_oid, done);
+ old_oid,
+ acb->aiocb_type == AIOCB_DISCARD_OBJ ?
+ 0 : done);
QLIST_INSERT_HEAD(&s->inflight_aio_head, aio_req, aio_siblings);
- if (create) {
- if (check_simultaneous_create(s, aio_req)) {
- goto done;
- }
- }
-
add_aio_request(s, aio_req, acb->qiov->iov, acb->qiov->niov,
acb->aiocb_type);
done:
@@ -2109,6 +2261,20 @@
return 1;
}
+static bool check_overlapping_aiocb(BDRVSheepdogState *s, SheepdogAIOCB *aiocb)
+{
+ SheepdogAIOCB *cb;
+
+ QLIST_FOREACH(cb, &s->inflight_aiocb_head, aiocb_siblings) {
+ if (AIOCBOverlapping(aiocb, cb)) {
+ return true;
+ }
+ }
+
+ QLIST_INSERT_HEAD(&s->inflight_aiocb_head, aiocb, aiocb_siblings);
+ return false;
+}
+
static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
int nb_sectors, QEMUIOVector *qiov)
{
@@ -2117,7 +2283,7 @@
int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
BDRVSheepdogState *s = bs->opaque;
- if (bs->growable && offset > s->inode.vdi_size) {
+ if (offset > s->inode.vdi_size) {
ret = sd_truncate(bs, offset);
if (ret < 0) {
return ret;
@@ -2128,14 +2294,25 @@
acb->aio_done_func = sd_write_done;
acb->aiocb_type = AIOCB_WRITE_UDATA;
+retry:
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
+ goto retry;
+ }
+
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
qemu_coroutine_yield();
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
+
return acb->ret;
}
@@ -2144,19 +2321,30 @@
{
SheepdogAIOCB *acb;
int ret;
+ BDRVSheepdogState *s = bs->opaque;
acb = sd_aio_setup(bs, qiov, sector_num, nb_sectors);
acb->aiocb_type = AIOCB_READ_UDATA;
acb->aio_done_func = sd_finish_aiocb;
+retry:
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
+ goto retry;
+ }
+
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
qemu_coroutine_yield();
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
return acb->ret;
}
@@ -2218,8 +2406,7 @@
/* refresh inode. */
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
ret = fd;
goto cleanup;
}
@@ -2234,10 +2421,8 @@
ret = do_sd_create(s, &new_vid, 1, &local_err);
if (ret < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
- error_report("failed to create inode for snapshot. %s",
- strerror(errno));
+ error_reportf_err(local_err,
+ "failed to create inode for snapshot: ");
goto cleanup;
}
@@ -2308,13 +2493,131 @@
return ret;
}
+#define NR_BATCHED_DISCARD 128
+
+static bool remove_objects(BDRVSheepdogState *s)
+{
+ int fd, i = 0, nr_objs = 0;
+ Error *local_err = NULL;
+ int ret = 0;
+ bool result = true;
+ SheepdogInode *inode = &s->inode;
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return false;
+ }
+
+ nr_objs = count_data_objs(inode);
+ while (i < nr_objs) {
+ int start_idx, nr_filled_idx;
+
+ while (i < nr_objs && !inode->data_vdi_id[i]) {
+ i++;
+ }
+ start_idx = i;
+
+ nr_filled_idx = 0;
+ while (i < nr_objs && nr_filled_idx < NR_BATCHED_DISCARD) {
+ if (inode->data_vdi_id[i]) {
+ inode->data_vdi_id[i] = 0;
+ nr_filled_idx++;
+ }
+
+ i++;
+ }
+
+ ret = write_object(fd, s->aio_context,
+ (char *)&inode->data_vdi_id[start_idx],
+ vid_to_vdi_oid(s->inode.vdi_id), inode->nr_copies,
+ (i - start_idx) * sizeof(uint32_t),
+ offsetof(struct SheepdogInode,
+ data_vdi_id[start_idx]),
+ false, s->cache_flags);
+ if (ret < 0) {
+ error_report("failed to discard snapshot inode.");
+ result = false;
+ goto out;
+ }
+ }
+
+out:
+ closesocket(fd);
+ return result;
+}
+
static int sd_snapshot_delete(BlockDriverState *bs,
const char *snapshot_id,
const char *name,
Error **errp)
{
- /* FIXME: Delete specified snapshot id. */
- return 0;
+ unsigned long snap_id = 0;
+ char snap_tag[SD_MAX_VDI_TAG_LEN];
+ Error *local_err = NULL;
+ int fd, ret;
+ char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
+ BDRVSheepdogState *s = bs->opaque;
+ unsigned int wlen = SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN, rlen = 0;
+ uint32_t vid;
+ SheepdogVdiReq hdr = {
+ .opcode = SD_OP_DEL_VDI,
+ .data_length = wlen,
+ .flags = SD_FLAG_CMD_WRITE,
+ };
+ SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
+
+ if (!remove_objects(s)) {
+ return -1;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ memset(snap_tag, 0, sizeof(snap_tag));
+ pstrcpy(buf, SD_MAX_VDI_LEN, s->name);
+ ret = qemu_strtoul(snapshot_id, NULL, 10, &snap_id);
+ if (ret || snap_id > UINT32_MAX) {
+ error_setg(errp, "Invalid snapshot ID: %s",
+ snapshot_id ? snapshot_id : "<null>");
+ return -EINVAL;
+ }
+
+ if (snap_id) {
+ hdr.snapid = (uint32_t) snap_id;
+ } else {
+ pstrcpy(snap_tag, sizeof(snap_tag), snapshot_id);
+ pstrcpy(buf + SD_MAX_VDI_LEN, SD_MAX_VDI_TAG_LEN, snap_tag);
+ }
+
+ ret = find_vdi_name(s, s->name, snap_id, snap_tag, &vid, true,
+ &local_err);
+ if (ret) {
+ return ret;
+ }
+
+ fd = connect_to_sdog(s, &local_err);
+ if (fd < 0) {
+ error_report_err(local_err);
+ return -1;
+ }
+
+ ret = do_req(fd, s->aio_context, (SheepdogReq *)&hdr,
+ buf, &wlen, &rlen);
+ closesocket(fd);
+ if (ret) {
+ return ret;
+ }
+
+ switch (rsp->result) {
+ case SD_RES_NO_VDI:
+ error_report("%s was already deleted", s->name);
+ case SD_RES_SUCCESS:
+ break;
+ default:
+ error_report("%s, %s", sd_strerror(rsp->result), s->name);
+ return -1;
+ }
+
+ return ret;
}
static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
@@ -2336,8 +2639,7 @@
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
ret = fd;
goto out;
}
@@ -2366,8 +2668,7 @@
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
ret = fd;
goto out;
}
@@ -2426,19 +2727,19 @@
uint64_t offset;
uint32_t vdi_index;
uint32_t vdi_id = load ? s->inode.parent_vdi_id : s->inode.vdi_id;
+ uint32_t object_size = (UINT32_C(1) << s->inode.block_size_shift);
fd = connect_to_sdog(s, &local_err);
if (fd < 0) {
- error_report("%s", error_get_pretty(local_err));;
- error_free(local_err);
+ error_report_err(local_err);
return fd;
}
while (remaining) {
- vdi_index = pos / SD_DATA_OBJ_SIZE;
- offset = pos % SD_DATA_OBJ_SIZE;
+ vdi_index = pos / object_size;
+ offset = pos % object_size;
- data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset);
+ data_len = MIN(remaining, object_size - offset);
vmstate_oid = vid_to_vmstate_oid(vdi_id, vdi_index);
@@ -2483,52 +2784,82 @@
return ret;
}
-static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
- int64_t pos, int size)
+static int sd_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+ int64_t pos)
{
BDRVSheepdogState *s = bs->opaque;
+ void *buf;
+ int ret;
- return do_load_save_vmstate(s, data, pos, size, 1);
+ buf = qemu_blockalign(bs, qiov->size);
+ ret = do_load_save_vmstate(s, buf, pos, qiov->size, 1);
+ qemu_iovec_from_buf(qiov, 0, buf, qiov->size);
+ qemu_vfree(buf);
+
+ return ret;
}
-static coroutine_fn int sd_co_discard(BlockDriverState *bs, int64_t sector_num,
- int nb_sectors)
+static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
+ int count)
{
SheepdogAIOCB *acb;
- QEMUIOVector dummy;
BDRVSheepdogState *s = bs->opaque;
int ret;
+ QEMUIOVector discard_iov;
+ struct iovec iov;
+ uint32_t zero = 0;
if (!s->discard_supported) {
- return 0;
+ return 0;
}
- acb = sd_aio_setup(bs, &dummy, sector_num, nb_sectors);
+ memset(&discard_iov, 0, sizeof(discard_iov));
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &zero;
+ iov.iov_len = sizeof(zero);
+ discard_iov.iov = &iov;
+ discard_iov.niov = 1;
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((count & (BDRV_SECTOR_SIZE - 1)) == 0);
+ acb = sd_aio_setup(bs, &discard_iov, offset >> BDRV_SECTOR_BITS,
+ count >> BDRV_SECTOR_BITS);
acb->aiocb_type = AIOCB_DISCARD_OBJ;
acb->aio_done_func = sd_finish_aiocb;
+retry:
+ if (check_overlapping_aiocb(s, acb)) {
+ qemu_co_queue_wait(&s->overlapping_queue);
+ goto retry;
+ }
+
ret = sd_co_rw_vector(acb);
if (ret <= 0) {
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
qemu_aio_unref(acb);
return ret;
}
qemu_coroutine_yield();
+ QLIST_REMOVE(acb, aiocb_siblings);
+ qemu_co_queue_restart_all(&s->overlapping_queue);
+
return acb->ret;
}
static coroutine_fn int64_t
sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
- int *pnum)
+ int *pnum, BlockDriverState **file)
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
+ uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
- unsigned long start = offset / SD_DATA_OBJ_SIZE,
+ unsigned long start = offset / object_size,
end = DIV_ROUND_UP((sector_num + nb_sectors) *
- BDRV_SECTOR_SIZE, SD_DATA_OBJ_SIZE);
+ BDRV_SECTOR_SIZE, object_size);
unsigned long idx;
int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
@@ -2547,10 +2878,13 @@
}
}
- *pnum = (idx - start) * SD_DATA_OBJ_SIZE / BDRV_SECTOR_SIZE;
+ *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE;
if (*pnum > nb_sectors) {
*pnum = nb_sectors;
}
+ if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
+ *file = bs;
+ }
return ret;
}
@@ -2558,14 +2892,15 @@
{
BDRVSheepdogState *s = bs->opaque;
SheepdogInode *inode = &s->inode;
- unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, SD_DATA_OBJ_SIZE);
+ uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
+ unsigned long i, last = DIV_ROUND_UP(inode->vdi_size, object_size);
uint64_t size = 0;
for (i = 0; i < last; i++) {
if (inode->data_vdi_id[i] == 0) {
continue;
}
- size += SD_DATA_OBJ_SIZE;
+ size += object_size;
}
return size;
}
@@ -2594,6 +2929,11 @@
.type = QEMU_OPT_STRING,
.help = "Redundancy of the image"
},
+ {
+ .name = BLOCK_OPT_OBJECT_SIZE,
+ .type = QEMU_OPT_SIZE,
+ .help = "Object size of the image"
+ },
{ /* end of list */ }
}
};
@@ -2604,6 +2944,9 @@
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2614,7 +2957,7 @@
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
- .bdrv_co_discard = sd_co_discard,
+ .bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -2637,6 +2980,9 @@
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2647,7 +2993,7 @@
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
- .bdrv_co_discard = sd_co_discard,
+ .bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
@@ -2670,6 +3016,9 @@
.instance_size = sizeof(BDRVSheepdogState),
.bdrv_needs_filename = true,
.bdrv_file_open = sd_open,
+ .bdrv_reopen_prepare = sd_reopen_prepare,
+ .bdrv_reopen_commit = sd_reopen_commit,
+ .bdrv_reopen_abort = sd_reopen_abort,
.bdrv_close = sd_close,
.bdrv_create = sd_create,
.bdrv_has_zero_init = bdrv_has_zero_init_1,
@@ -2680,7 +3029,7 @@
.bdrv_co_readv = sd_co_readv,
.bdrv_co_writev = sd_co_writev,
.bdrv_co_flush_to_disk = sd_co_flush_to_disk,
- .bdrv_co_discard = sd_co_discard,
+ .bdrv_co_pdiscard = sd_co_pdiscard,
.bdrv_co_get_block_status = sd_co_get_block_status,
.bdrv_snapshot_create = sd_snapshot_create,
diff --git a/block/snapshot.c b/block/snapshot.c
index 698e1a1..bf5c2ca 100644
--- a/block/snapshot.c
+++ b/block/snapshot.c
@@ -22,8 +22,11 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "block/snapshot.h"
#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
QemuOptsList internal_snapshot_opts = {
.name = "snapshot",
@@ -148,7 +151,7 @@
if (!drv->bdrv_snapshot_create) {
if (bs->file != NULL) {
- return bdrv_can_snapshot(bs->file);
+ return bdrv_can_snapshot(bs->file->bs);
}
return 0;
}
@@ -167,7 +170,7 @@
return drv->bdrv_snapshot_create(bs, sn_info);
}
if (bs->file) {
- return bdrv_snapshot_create(bs->file, sn_info);
+ return bdrv_snapshot_create(bs->file->bs, sn_info);
}
return -ENOTSUP;
}
@@ -187,10 +190,10 @@
if (bs->file) {
drv->bdrv_close(bs);
- ret = bdrv_snapshot_goto(bs->file, snapshot_id);
+ ret = bdrv_snapshot_goto(bs->file->bs, snapshot_id);
open_ret = drv->bdrv_open(bs, NULL, bs->open_flags, NULL);
if (open_ret < 0) {
- bdrv_unref(bs->file);
+ bdrv_unref(bs->file->bs);
bs->drv = NULL;
return open_ret;
}
@@ -228,8 +231,10 @@
Error **errp)
{
BlockDriver *drv = bs->drv;
+ int ret;
+
if (!drv) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
}
if (!snapshot_id && !name) {
@@ -238,23 +243,26 @@
}
/* drain all pending i/o before deleting snapshot */
- bdrv_drain_all();
+ bdrv_drained_begin(bs);
if (drv->bdrv_snapshot_delete) {
- return drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
+ ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
+ } else if (bs->file) {
+ ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp);
+ } else {
+ error_setg(errp, "Block format '%s' used by device '%s' "
+ "does not support internal snapshot deletion",
+ drv->format_name, bdrv_get_device_name(bs));
+ ret = -ENOTSUP;
}
- if (bs->file) {
- return bdrv_snapshot_delete(bs->file, snapshot_id, name, errp);
- }
- error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- drv->format_name, bdrv_get_device_name(bs),
- "internal snapshot deletion");
- return -ENOTSUP;
+
+ bdrv_drained_end(bs);
+ return ret;
}
-void bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
- const char *id_or_name,
- Error **errp)
+int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs,
+ const char *id_or_name,
+ Error **errp)
{
int ret;
Error *local_err = NULL;
@@ -269,6 +277,7 @@
if (ret < 0) {
error_propagate(errp, local_err);
}
+ return ret;
}
int bdrv_snapshot_list(BlockDriverState *bs,
@@ -282,7 +291,7 @@
return drv->bdrv_snapshot_list(bs, psn_info);
}
if (bs->file) {
- return bdrv_snapshot_list(bs->file, psn_info);
+ return bdrv_snapshot_list(bs->file->bs, psn_info);
}
return -ENOTSUP;
}
@@ -315,7 +324,7 @@
BlockDriver *drv = bs->drv;
if (!drv) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
}
if (!snapshot_id && !name) {
@@ -329,9 +338,9 @@
if (drv->bdrv_snapshot_load_tmp) {
return drv->bdrv_snapshot_load_tmp(bs, snapshot_id, name, errp);
}
- error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- drv->format_name, bdrv_get_device_name(bs),
- "temporarily load internal snapshot");
+ error_setg(errp, "Block format '%s' used by device '%s' "
+ "does not support temporarily loading internal snapshots",
+ drv->format_name, bdrv_get_device_name(bs));
return -ENOTSUP;
}
@@ -349,9 +358,164 @@
ret = bdrv_snapshot_load_tmp(bs, NULL, id_or_name, &local_err);
}
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
return ret;
}
+
+
+/* Group operations. All block drivers are involved.
+ * These functions will properly handle dataplane (take aio_context_acquire
+ * when appropriate for appropriate block drivers) */
+
+bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs)
+{
+ bool ok = true;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_is_inserted(bs) && !bdrv_is_read_only(bs)) {
+ ok = bdrv_can_snapshot(bs);
+ }
+ aio_context_release(ctx);
+ if (!ok) {
+ goto fail;
+ }
+ }
+
+fail:
+ *first_bad_bs = bs;
+ return ok;
+}
+
+int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bad_bs,
+ Error **err)
+{
+ int ret = 0;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+ QEMUSnapshotInfo sn1, *snapshot = &sn1;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs) &&
+ bdrv_snapshot_find(bs, snapshot, name) >= 0) {
+ ret = bdrv_snapshot_delete_by_id_or_name(bs, name, err);
+ }
+ aio_context_release(ctx);
+ if (ret < 0) {
+ goto fail;
+ }
+ }
+
+fail:
+ *first_bad_bs = bs;
+ return ret;
+}
+
+
+int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bad_bs)
+{
+ int err = 0;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs)) {
+ err = bdrv_snapshot_goto(bs, name);
+ }
+ aio_context_release(ctx);
+ if (err < 0) {
+ goto fail;
+ }
+ }
+
+fail:
+ *first_bad_bs = bs;
+ return err;
+}
+
+int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs)
+{
+ QEMUSnapshotInfo sn;
+ int err = 0;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bdrv_can_snapshot(bs)) {
+ err = bdrv_snapshot_find(bs, &sn, name);
+ }
+ aio_context_release(ctx);
+ if (err < 0) {
+ goto fail;
+ }
+ }
+
+fail:
+ *first_bad_bs = bs;
+ return err;
+}
+
+int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
+ BlockDriverState *vm_state_bs,
+ uint64_t vm_state_size,
+ BlockDriverState **first_bad_bs)
+{
+ int err = 0;
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ if (bs == vm_state_bs) {
+ sn->vm_state_size = vm_state_size;
+ err = bdrv_snapshot_create(bs, sn);
+ } else if (bdrv_can_snapshot(bs)) {
+ sn->vm_state_size = 0;
+ err = bdrv_snapshot_create(bs, sn);
+ }
+ aio_context_release(ctx);
+ if (err < 0) {
+ goto fail;
+ }
+ }
+
+fail:
+ *first_bad_bs = bs;
+ return err;
+}
+
+BlockDriverState *bdrv_all_find_vmstate_bs(void)
+{
+ BlockDriverState *bs;
+ BdrvNextIterator it;
+
+ for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+ bool found;
+
+ aio_context_acquire(ctx);
+ found = bdrv_can_snapshot(bs);
+ aio_context_release(ctx);
+
+ if (found) {
+ break;
+ }
+ }
+ return bs;
+}
diff --git a/block/ssh.c b/block/ssh.c
index f466cbf..5ce12b6 100644
--- a/block/ssh.c
+++ b/block/ssh.c
@@ -22,17 +22,18 @@
* THE SOFTWARE.
*/
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
+#include "qemu/osdep.h"
#include <libssh2.h>
#include <libssh2_sftp.h>
#include "block/block_int.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
#include "qemu/sockets.h"
#include "qemu/uri.h"
#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qstring.h"
/* DEBUG_SSH=1 enables the DPRINTF (debugging printf) statements in
* this block driver code.
@@ -191,7 +192,7 @@
static int parse_uri(const char *filename, QDict *options, Error **errp)
{
URI *uri = NULL;
- QueryParams *qp = NULL;
+ QueryParams *qp;
int i;
uri = uri_parse(filename);
@@ -247,9 +248,6 @@
return 0;
err:
- if (qp) {
- query_params_free(qp);
- }
if (uri) {
uri_free(uri);
}
@@ -510,36 +508,73 @@
return ret;
}
+static QemuOptsList ssh_runtime_opts = {
+ .name = "ssh",
+ .head = QTAILQ_HEAD_INITIALIZER(ssh_runtime_opts.head),
+ .desc = {
+ {
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ .help = "Host to connect to",
+ },
+ {
+ .name = "port",
+ .type = QEMU_OPT_NUMBER,
+ .help = "Port to connect to",
+ },
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ .help = "Path of the image on the host",
+ },
+ {
+ .name = "user",
+ .type = QEMU_OPT_STRING,
+ .help = "User as which to connect",
+ },
+ {
+ .name = "host_key_check",
+ .type = QEMU_OPT_STRING,
+ .help = "Defines how and what to check the host key against",
+ },
+ },
+};
+
static int connect_to_ssh(BDRVSSHState *s, QDict *options,
int ssh_flags, int creat_mode, Error **errp)
{
int r, ret;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
const char *host, *user, *path, *host_key_check;
int port;
- if (!qdict_haskey(options, "host")) {
+ opts = qemu_opts_create(&ssh_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ ret = -EINVAL;
+ error_propagate(errp, local_err);
+ goto err;
+ }
+
+ host = qemu_opt_get(opts, "host");
+ if (!host) {
ret = -EINVAL;
error_setg(errp, "No hostname was specified");
goto err;
}
- host = qdict_get_str(options, "host");
- if (qdict_haskey(options, "port")) {
- port = qdict_get_int(options, "port");
- } else {
- port = 22;
- }
+ port = qemu_opt_get_number(opts, "port", 22);
- if (!qdict_haskey(options, "path")) {
+ path = qemu_opt_get(opts, "path");
+ if (!path) {
ret = -EINVAL;
error_setg(errp, "No path was specified");
goto err;
}
- path = qdict_get_str(options, "path");
- if (qdict_haskey(options, "user")) {
- user = qdict_get_str(options, "user");
- } else {
+ user = qemu_opt_get(opts, "user");
+ if (!user) {
user = g_get_user_name();
if (!user) {
error_setg_errno(errp, errno, "Can't get user name");
@@ -548,9 +583,8 @@
}
}
- if (qdict_haskey(options, "host_key_check")) {
- host_key_check = qdict_get_str(options, "host_key_check");
- } else {
+ host_key_check = qemu_opt_get(opts, "host_key_check");
+ if (!host_key_check) {
host_key_check = "yes";
}
@@ -561,7 +595,7 @@
/* Open the socket and connect. */
s->sock = inet_connect(s->hostport, errp);
if (s->sock < 0) {
- ret = -errno;
+ ret = -EIO;
goto err;
}
@@ -614,21 +648,14 @@
goto err;
}
+ qemu_opts_del(opts);
+
r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
if (r < 0) {
sftp_error_setg(errp, s, "failed to read file attributes");
return -EINVAL;
}
- /* Delete the options we've used; any not deleted will cause the
- * block layer to give an error about unused options.
- */
- qdict_del(options, "host");
- qdict_del(options, "port");
- qdict_del(options, "user");
- qdict_del(options, "path");
- qdict_del(options, "host_key_check");
-
return 0;
err:
@@ -648,6 +675,8 @@
}
s->session = NULL;
+ qemu_opts_del(opts);
+
return ret;
}
@@ -779,7 +808,7 @@
DPRINTF("co=%p", co);
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
}
static coroutine_fn void set_fd_handler(BDRVSSHState *s, BlockDriverState *bs)
@@ -801,14 +830,15 @@
rd_handler, wr_handler);
aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
- rd_handler, wr_handler, co);
+ false, rd_handler, wr_handler, co);
}
static coroutine_fn void clear_fd_handler(BDRVSSHState *s,
BlockDriverState *bs)
{
DPRINTF("s->sock=%d", s->sock);
- aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock, NULL, NULL, NULL);
+ aio_set_fd_handler(bdrv_get_aio_context(bs), s->sock,
+ false, NULL, NULL, NULL);
}
/* A non-blocking call returned EAGAIN, so yield, ensuring the
diff --git a/block/stream.c b/block/stream.c
index a628901..3187481 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -11,10 +11,14 @@
*
*/
+#include "qemu/osdep.h"
#include "trace.h"
#include "block/block_int.h"
#include "block/blockjob.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
#include "qemu/ratelimit.h"
+#include "sysemu/block-backend.h"
enum {
/*
@@ -35,7 +39,7 @@
char *backing_file_str;
} StreamBlockJob;
-static int coroutine_fn stream_populate(BlockDriverState *bs,
+static int coroutine_fn stream_populate(BlockBackend *blk,
int64_t sector_num, int nb_sectors,
void *buf)
{
@@ -48,35 +52,8 @@
qemu_iovec_init_external(&qiov, &iov, 1);
/* Copy-on-read the unallocated clusters */
- return bdrv_co_copy_on_readv(bs, sector_num, nb_sectors, &qiov);
-}
-
-static void close_unused_images(BlockDriverState *top, BlockDriverState *base,
- const char *base_id)
-{
- BlockDriverState *intermediate;
- intermediate = top->backing_hd;
-
- /* Must assign before bdrv_delete() to prevent traversing dangling pointer
- * while we delete backing image instances.
- */
- bdrv_set_backing_hd(top, base);
-
- while (intermediate) {
- BlockDriverState *unused;
-
- /* reached base */
- if (intermediate == base) {
- break;
- }
-
- unused = intermediate;
- intermediate = intermediate->backing_hd;
- bdrv_set_backing_hd(unused, NULL);
- bdrv_unref(unused);
- }
-
- bdrv_refresh_limits(top, NULL);
+ return blk_co_preadv(blk, sector_num * BDRV_SECTOR_SIZE, qiov.size, &qiov,
+ BDRV_REQ_COPY_ON_READ);
}
typedef struct {
@@ -88,6 +65,7 @@
{
StreamBlockJob *s = container_of(job, StreamBlockJob, common);
StreamCompleteData *data = opaque;
+ BlockDriverState *bs = blk_bs(job->blk);
BlockDriverState *base = s->base;
if (!block_job_is_cancelled(&s->common) && data->reached_end &&
@@ -99,8 +77,8 @@
base_fmt = base->drv->format_name;
}
}
- data->ret = bdrv_change_backing_file(job->bs, base_id, base_fmt);
- close_unused_images(job->bs, base, base_id);
+ data->ret = bdrv_change_backing_file(bs, base_id, base_fmt);
+ bdrv_set_backing_hd(bs, base);
}
g_free(s->backing_file_str);
@@ -112,23 +90,25 @@
{
StreamBlockJob *s = opaque;
StreamCompleteData *data;
- BlockDriverState *bs = s->common.bs;
+ BlockBackend *blk = s->common.blk;
+ BlockDriverState *bs = blk_bs(blk);
BlockDriverState *base = s->base;
- int64_t sector_num, end;
+ int64_t sector_num = 0;
+ int64_t end = -1;
+ uint64_t delay_ns = 0;
int error = 0;
int ret = 0;
int n = 0;
void *buf;
- if (!bs->backing_hd) {
- block_job_completed(&s->common, 0);
- return;
+ if (!bs->backing) {
+ goto out;
}
s->common.len = bdrv_getlength(bs);
if (s->common.len < 0) {
- block_job_completed(&s->common, s->common.len);
- return;
+ ret = s->common.len;
+ goto out;
}
end = s->common.len >> BDRV_SECTOR_BITS;
@@ -144,10 +124,8 @@
}
for (sector_num = 0; sector_num < end; sector_num += n) {
- uint64_t delay_ns = 0;
bool copy;
-wait:
/* Note that even when no rate limit is applied we need to yield
* with no pending I/O here so that bdrv_drain_all() returns.
*/
@@ -165,7 +143,7 @@
} else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [sector_num, sector_num+n). */
- ret = bdrv_is_allocated_above(bs->backing_hd, base,
+ ret = bdrv_is_allocated_above(backing_bs(bs), base,
sector_num, n, &n);
/* Finish early if end of backing file has been reached */
@@ -177,18 +155,11 @@
}
trace_stream_one_iteration(s, sector_num, n, ret);
if (copy) {
- if (s->common.speed) {
- delay_ns = ratelimit_calculate_delay(&s->limit, n);
- if (delay_ns > 0) {
- goto wait;
- }
- }
- ret = stream_populate(bs, sector_num, n, buf);
+ ret = stream_populate(blk, sector_num, n, buf);
}
if (ret < 0) {
BlockErrorAction action =
- block_job_error_action(&s->common, s->common.bs, s->on_error,
- true, -ret);
+ block_job_error_action(&s->common, s->on_error, true, -ret);
if (action == BLOCK_ERROR_ACTION_STOP) {
n = 0;
continue;
@@ -204,6 +175,9 @@
/* Publish progress */
s->common.offset += n * BDRV_SECTOR_SIZE;
+ if (copy && s->common.speed) {
+ delay_ns = ratelimit_calculate_delay(&s->limit, n);
+ }
}
if (!base) {
@@ -215,6 +189,7 @@
qemu_vfree(buf);
+out:
/* Modify backing chain and close BDSes in main loop */
data = g_malloc(sizeof(*data));
data->ret = ret;
@@ -227,7 +202,7 @@
StreamBlockJob *s = container_of(job, StreamBlockJob, common);
if (speed < 0) {
- error_set(errp, QERR_INVALID_PARAMETER, "speed");
+ error_setg(errp, QERR_INVALID_PARAMETER, "speed");
return;
}
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
@@ -239,22 +214,15 @@
.set_speed = stream_set_speed,
};
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *backing_file_str, int64_t speed,
- BlockdevOnError on_error,
- BlockCompletionFunc *cb,
- void *opaque, Error **errp)
+void stream_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, const char *backing_file_str,
+ int64_t speed, BlockdevOnError on_error,
+ BlockCompletionFunc *cb, void *opaque, Error **errp)
{
StreamBlockJob *s;
- if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
- on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
- !bdrv_iostatus_is_enabled(bs)) {
- error_set(errp, QERR_INVALID_PARAMETER, "on-error");
- return;
- }
-
- s = block_job_create(&stream_job_driver, bs, speed, cb, opaque, errp);
+ s = block_job_create(job_id, &stream_job_driver, bs, speed,
+ cb, opaque, errp);
if (!s) {
return;
}
@@ -263,7 +231,7 @@
s->backing_file_str = g_strdup(backing_file_str);
s->on_error = on_error;
- s->common.co = qemu_coroutine_create(stream_run);
+ s->common.co = qemu_coroutine_create(stream_run, s);
trace_stream_start(bs, base, s, s->common.co, opaque);
- qemu_coroutine_enter(s->common.co, s);
+ qemu_coroutine_enter(s->common.co);
}
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
new file mode 100644
index 0000000..59545e2
--- /dev/null
+++ b/block/throttle-groups.c
@@ -0,0 +1,509 @@
+/*
+ * QEMU block throttling group infrastructure
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ * Benoît Canet <benoit.canet@nodalink.com>
+ * Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "sysemu/block-backend.h"
+#include "block/throttle-groups.h"
+#include "qemu/queue.h"
+#include "qemu/thread.h"
+#include "sysemu/qtest.h"
+
+/* The ThrottleGroup structure (with its ThrottleState) is shared
+ * among different BlockBackends and it's independent from
+ * AioContext, so in order to use it from different threads it needs
+ * its own locking.
+ *
+ * This locking is however handled internally in this file, so it's
+ * transparent to outside users.
+ *
+ * The whole ThrottleGroup structure is private and invisible to
+ * outside users, that only use it through its ThrottleState.
+ *
+ * In addition to the ThrottleGroup structure, BlockBackendPublic has
+ * fields that need to be accessed by other members of the group and
+ * therefore also need to be protected by this lock. Once a
+ * BlockBackend is registered in a group those fields can be accessed
+ * by other threads any time.
+ *
+ * Again, all this is handled internally and is mostly transparent to
+ * the outside. The 'throttle_timers' field however has an additional
+ * constraint because it may be temporarily invalid (see for example
+ * bdrv_set_aio_context()). Therefore in this file a thread will
+ * access some other BlockBackend's timers only after verifying that
+ * that BlockBackend has throttled requests in the queue.
+ */
+typedef struct ThrottleGroup {
+ char *name; /* This is constant during the lifetime of the group */
+
+ QemuMutex lock; /* This lock protects the following four fields */
+ ThrottleState ts;
+ QLIST_HEAD(, BlockBackendPublic) head;
+ BlockBackend *tokens[2];
+ bool any_timer_armed[2];
+
+ /* These two are protected by the global throttle_groups_lock */
+ unsigned refcount;
+ QTAILQ_ENTRY(ThrottleGroup) list;
+} ThrottleGroup;
+
+static QemuMutex throttle_groups_lock;
+static QTAILQ_HEAD(, ThrottleGroup) throttle_groups =
+ QTAILQ_HEAD_INITIALIZER(throttle_groups);
+
+/* Increments the reference count of a ThrottleGroup given its name.
+ *
+ * If no ThrottleGroup is found with the given name a new one is
+ * created.
+ *
+ * @name: the name of the ThrottleGroup
+ * @ret: the ThrottleState member of the ThrottleGroup
+ */
+ThrottleState *throttle_group_incref(const char *name)
+{
+ ThrottleGroup *tg = NULL;
+ ThrottleGroup *iter;
+
+ qemu_mutex_lock(&throttle_groups_lock);
+
+ /* Look for an existing group with that name */
+ QTAILQ_FOREACH(iter, &throttle_groups, list) {
+ if (!strcmp(name, iter->name)) {
+ tg = iter;
+ break;
+ }
+ }
+
+ /* Create a new one if not found */
+ if (!tg) {
+ tg = g_new0(ThrottleGroup, 1);
+ tg->name = g_strdup(name);
+ qemu_mutex_init(&tg->lock);
+ throttle_init(&tg->ts);
+ QLIST_INIT(&tg->head);
+
+ QTAILQ_INSERT_TAIL(&throttle_groups, tg, list);
+ }
+
+ tg->refcount++;
+
+ qemu_mutex_unlock(&throttle_groups_lock);
+
+ return &tg->ts;
+}
+
+/* Decrease the reference count of a ThrottleGroup.
+ *
+ * When the reference count reaches zero the ThrottleGroup is
+ * destroyed.
+ *
+ * @ts: The ThrottleGroup to unref, given by its ThrottleState member
+ */
+void throttle_group_unref(ThrottleState *ts)
+{
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+
+ qemu_mutex_lock(&throttle_groups_lock);
+ if (--tg->refcount == 0) {
+ QTAILQ_REMOVE(&throttle_groups, tg, list);
+ qemu_mutex_destroy(&tg->lock);
+ g_free(tg->name);
+ g_free(tg);
+ }
+ qemu_mutex_unlock(&throttle_groups_lock);
+}
+
+/* Get the name from a BlockBackend's ThrottleGroup. The name (and the pointer)
+ * is guaranteed to remain constant during the lifetime of the group.
+ *
+ * @blk: a BlockBackend that is member of a throttling group
+ * @ret: the name of the group.
+ */
+const char *throttle_group_get_name(BlockBackend *blk)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ return tg->name;
+}
+
+/* Return the next BlockBackend in the round-robin sequence, simulating a
+ * circular list.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @blk: the current BlockBackend
+ * @ret: the next BlockBackend in the sequence
+ */
+static BlockBackend *throttle_group_next_blk(BlockBackend *blk)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ BlockBackendPublic *next = QLIST_NEXT(blkp, round_robin);
+
+ if (!next) {
+ next = QLIST_FIRST(&tg->head);
+ }
+
+ return blk_by_public(next);
+}
+
+/* Return the next BlockBackend in the round-robin sequence with pending I/O
+ * requests.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @blk: the current BlockBackend
+ * @is_write: the type of operation (read/write)
+ * @ret: the next BlockBackend with pending requests, or blk if there is
+ * none.
+ */
+static BlockBackend *next_throttle_token(BlockBackend *blk, bool is_write)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ BlockBackend *token, *start;
+
+ start = token = tg->tokens[is_write];
+
+ /* get next bs round in round robin style */
+ token = throttle_group_next_blk(token);
+ while (token != start && !blkp->pending_reqs[is_write]) {
+ token = throttle_group_next_blk(token);
+ }
+
+ /* If no IO are queued for scheduling on the next round robin token
+ * then decide the token is the current bs because chances are
+ * the current bs get the current request queued.
+ */
+ if (token == start && !blkp->pending_reqs[is_write]) {
+ token = blk;
+ }
+
+ return token;
+}
+
+/* Check if the next I/O request for a BlockBackend needs to be throttled or
+ * not. If there's no timer set in this group, set one and update the token
+ * accordingly.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @blk: the current BlockBackend
+ * @is_write: the type of operation (read/write)
+ * @ret: whether the I/O request needs to be throttled or not
+ */
+static bool throttle_group_schedule_timer(BlockBackend *blk, bool is_write)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleTimers *tt = &blkp->throttle_timers;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ bool must_wait;
+
+ if (blkp->io_limits_disabled) {
+ return false;
+ }
+
+ /* Check if any of the timers in this group is already armed */
+ if (tg->any_timer_armed[is_write]) {
+ return true;
+ }
+
+ must_wait = throttle_schedule_timer(ts, tt, is_write);
+
+ /* If a timer just got armed, set blk as the current token */
+ if (must_wait) {
+ tg->tokens[is_write] = blk;
+ tg->any_timer_armed[is_write] = true;
+ }
+
+ return must_wait;
+}
+
+/* Look for the next pending I/O request and schedule it.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @blk: the current BlockBackend
+ * @is_write: the type of operation (read/write)
+ */
+static void schedule_next_request(BlockBackend *blk, bool is_write)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ bool must_wait;
+ BlockBackend *token;
+
+ /* Check if there's any pending request to schedule next */
+ token = next_throttle_token(blk, is_write);
+ if (!blkp->pending_reqs[is_write]) {
+ return;
+ }
+
+ /* Set a timer for the request if it needs to be throttled */
+ must_wait = throttle_group_schedule_timer(token, is_write);
+
+ /* If it doesn't have to wait, queue it for immediate execution */
+ if (!must_wait) {
+ /* Give preference to requests from the current blk */
+ if (qemu_in_coroutine() &&
+ qemu_co_queue_next(&blkp->throttled_reqs[is_write])) {
+ token = blk;
+ } else {
+ ThrottleTimers *tt = &blkp->throttle_timers;
+ int64_t now = qemu_clock_get_ns(tt->clock_type);
+ timer_mod(tt->timers[is_write], now + 1);
+ tg->any_timer_armed[is_write] = true;
+ }
+ tg->tokens[is_write] = token;
+ }
+}
+
+/* Check if an I/O request needs to be throttled, wait and set a timer
+ * if necessary, and schedule the next request using a round robin
+ * algorithm.
+ *
+ * @blk: the current BlockBackend
+ * @bytes: the number of bytes for this I/O
+ * @is_write: the type of operation (read/write)
+ */
+void coroutine_fn throttle_group_co_io_limits_intercept(BlockBackend *blk,
+ unsigned int bytes,
+ bool is_write)
+{
+ bool must_wait;
+ BlockBackend *token;
+
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ qemu_mutex_lock(&tg->lock);
+
+ /* First we check if this I/O has to be throttled. */
+ token = next_throttle_token(blk, is_write);
+ must_wait = throttle_group_schedule_timer(token, is_write);
+
+ /* Wait if there's a timer set or queued requests of this type */
+ if (must_wait || blkp->pending_reqs[is_write]) {
+ blkp->pending_reqs[is_write]++;
+ qemu_mutex_unlock(&tg->lock);
+ qemu_co_queue_wait(&blkp->throttled_reqs[is_write]);
+ qemu_mutex_lock(&tg->lock);
+ blkp->pending_reqs[is_write]--;
+ }
+
+ /* The I/O will be executed, so do the accounting */
+ throttle_account(blkp->throttle_state, is_write, bytes);
+
+ /* Schedule the next request */
+ schedule_next_request(blk, is_write);
+
+ qemu_mutex_unlock(&tg->lock);
+}
+
+void throttle_group_restart_blk(BlockBackend *blk)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ while (qemu_co_enter_next(&blkp->throttled_reqs[i])) {
+ ;
+ }
+ }
+}
+
+/* Update the throttle configuration for a particular group. Similar
+ * to throttle_config(), but guarantees atomicity within the
+ * throttling group.
+ *
+ * @blk: a BlockBackend that is a member of the group
+ * @cfg: the configuration to set
+ */
+void throttle_group_config(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleTimers *tt = &blkp->throttle_timers;
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ qemu_mutex_lock(&tg->lock);
+ /* throttle_config() cancels the timers */
+ if (timer_pending(tt->timers[0])) {
+ tg->any_timer_armed[0] = false;
+ }
+ if (timer_pending(tt->timers[1])) {
+ tg->any_timer_armed[1] = false;
+ }
+ throttle_config(ts, tt, cfg);
+ qemu_mutex_unlock(&tg->lock);
+
+ qemu_co_enter_next(&blkp->throttled_reqs[0]);
+ qemu_co_enter_next(&blkp->throttled_reqs[1]);
+}
+
+/* Get the throttle configuration from a particular group. Similar to
+ * throttle_get_config(), but guarantees atomicity within the
+ * throttling group.
+ *
+ * @blk: a BlockBackend that is a member of the group
+ * @cfg: the configuration will be written here
+ */
+void throttle_group_get_config(BlockBackend *blk, ThrottleConfig *cfg)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ qemu_mutex_lock(&tg->lock);
+ throttle_get_config(ts, cfg);
+ qemu_mutex_unlock(&tg->lock);
+}
+
+/* ThrottleTimers callback. This wakes up a request that was waiting
+ * because it had been throttled.
+ *
+ * @blk: the BlockBackend whose request had been throttled
+ * @is_write: the type of operation (read/write)
+ */
+static void timer_cb(BlockBackend *blk, bool is_write)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = blkp->throttle_state;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ bool empty_queue;
+
+ /* The timer has just been fired, so we can update the flag */
+ qemu_mutex_lock(&tg->lock);
+ tg->any_timer_armed[is_write] = false;
+ qemu_mutex_unlock(&tg->lock);
+
+ /* Run the request that was waiting for this timer */
+ empty_queue = !qemu_co_enter_next(&blkp->throttled_reqs[is_write]);
+
+ /* If the request queue was empty then we have to take care of
+ * scheduling the next one */
+ if (empty_queue) {
+ qemu_mutex_lock(&tg->lock);
+ schedule_next_request(blk, is_write);
+ qemu_mutex_unlock(&tg->lock);
+ }
+}
+
+static void read_timer_cb(void *opaque)
+{
+ timer_cb(opaque, false);
+}
+
+static void write_timer_cb(void *opaque)
+{
+ timer_cb(opaque, true);
+}
+
+/* Register a BlockBackend in the throttling group, also initializing its
+ * timers and updating its throttle_state pointer to point to it. If a
+ * throttling group with that name does not exist yet, it will be created.
+ *
+ * @blk: the BlockBackend to insert
+ * @groupname: the name of the group
+ */
+void throttle_group_register_blk(BlockBackend *blk, const char *groupname)
+{
+ int i;
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleState *ts = throttle_group_incref(groupname);
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ int clock_type = QEMU_CLOCK_REALTIME;
+
+ if (qtest_enabled()) {
+ /* For testing block IO throttling only */
+ clock_type = QEMU_CLOCK_VIRTUAL;
+ }
+
+ blkp->throttle_state = ts;
+
+ qemu_mutex_lock(&tg->lock);
+ /* If the ThrottleGroup is new set this BlockBackend as the token */
+ for (i = 0; i < 2; i++) {
+ if (!tg->tokens[i]) {
+ tg->tokens[i] = blk;
+ }
+ }
+
+ QLIST_INSERT_HEAD(&tg->head, blkp, round_robin);
+
+ throttle_timers_init(&blkp->throttle_timers,
+ blk_get_aio_context(blk),
+ clock_type,
+ read_timer_cb,
+ write_timer_cb,
+ blk);
+
+ qemu_mutex_unlock(&tg->lock);
+}
+
+/* Unregister a BlockBackend from its group, removing it from the list,
+ * destroying the timers and setting the throttle_state pointer to NULL.
+ *
+ * The BlockBackend must not have pending throttled requests, so the caller has
+ * to drain them first.
+ *
+ * The group will be destroyed if it's empty after this operation.
+ *
+ * @blk: the BlockBackend to remove
+ */
+void throttle_group_unregister_blk(BlockBackend *blk)
+{
+ BlockBackendPublic *blkp = blk_get_public(blk);
+ ThrottleGroup *tg = container_of(blkp->throttle_state, ThrottleGroup, ts);
+ int i;
+
+ assert(blkp->pending_reqs[0] == 0 && blkp->pending_reqs[1] == 0);
+ assert(qemu_co_queue_empty(&blkp->throttled_reqs[0]));
+ assert(qemu_co_queue_empty(&blkp->throttled_reqs[1]));
+
+ qemu_mutex_lock(&tg->lock);
+ for (i = 0; i < 2; i++) {
+ if (tg->tokens[i] == blk) {
+ BlockBackend *token = throttle_group_next_blk(blk);
+ /* Take care of the case where this is the last blk in the group */
+ if (token == blk) {
+ token = NULL;
+ }
+ tg->tokens[i] = token;
+ }
+ }
+
+ /* remove the current blk from the list */
+ QLIST_REMOVE(blkp, round_robin);
+ throttle_timers_destroy(&blkp->throttle_timers);
+ qemu_mutex_unlock(&tg->lock);
+
+ throttle_group_unref(&tg->ts);
+ blkp->throttle_state = NULL;
+}
+
+static void throttle_groups_init(void)
+{
+ qemu_mutex_init(&throttle_groups_lock);
+}
+
+block_init(throttle_groups_init);
diff --git a/block/trace-events b/block/trace-events
new file mode 100644
index 0000000..05fa13c
--- /dev/null
+++ b/block/trace-events
@@ -0,0 +1,116 @@
+# See docs/tracing.txt for syntax documentation.
+
+# block.c
+bdrv_open_common(void *bs, const char *filename, int flags, const char *format_name) "bs %p filename \"%s\" flags %#x format_name \"%s\""
+bdrv_lock_medium(void *bs, bool locked) "bs %p locked %d"
+
+# block/block-backend.c
+blk_co_preadv(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+blk_co_pwritev(void *blk, void *bs, int64_t offset, unsigned int bytes, int flags) "blk %p bs %p offset %"PRId64" bytes %u flags %x"
+
+# block/io.c
+bdrv_aio_pdiscard(void *bs, int64_t offset, int count, void *opaque) "bs %p offset %"PRId64" count %d opaque %p"
+bdrv_aio_flush(void *bs, void *opaque) "bs %p opaque %p"
+bdrv_aio_readv(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_aio_writev(void *bs, int64_t sector_num, int nb_sectors, void *opaque) "bs %p sector_num %"PRId64" nb_sectors %d opaque %p"
+bdrv_co_readv(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+bdrv_co_writev(void *bs, int64_t sector_num, int nb_sector) "bs %p sector_num %"PRId64" nb_sectors %d"
+bdrv_co_pwrite_zeroes(void *bs, int64_t offset, int count, int flags) "bs %p offset %"PRId64" count %d flags %#x"
+bdrv_co_do_copy_on_readv(void *bs, int64_t offset, unsigned int bytes, int64_t cluster_offset, unsigned int cluster_bytes) "bs %p offset %"PRId64" bytes %u cluster_offset %"PRId64" cluster_bytes %u"
+
+# block/stream.c
+stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+
+# block/commit.c
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
+
+# block/mirror.c
+mirror_start(void *bs, void *s, void *co, void *opaque) "bs %p s %p co %p opaque %p"
+mirror_restart_iter(void *s, int64_t cnt) "s %p dirty count %"PRId64
+mirror_before_flush(void *s) "s %p"
+mirror_before_drain(void *s, int64_t cnt) "s %p dirty count %"PRId64
+mirror_before_sleep(void *s, int64_t cnt, int synced, uint64_t delay_ns) "s %p dirty count %"PRId64" synced %d delay %"PRIu64"ns"
+mirror_one_iteration(void *s, int64_t sector_num, int nb_sectors) "s %p sector_num %"PRId64" nb_sectors %d"
+mirror_iteration_done(void *s, int64_t sector_num, int nb_sectors, int ret) "s %p sector_num %"PRId64" nb_sectors %d ret %d"
+mirror_yield(void *s, int64_t cnt, int buf_free_count, int in_flight) "s %p dirty count %"PRId64" free buffers %d in_flight %d"
+mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_num %"PRId64" in_flight %d"
+mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d"
+
+# block/backup.c
+backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d"
+backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d"
+backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64
+backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64
+backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d"
+
+# blockdev.c
+qmp_block_job_cancel(void *job) "job %p"
+qmp_block_job_pause(void *job) "job %p"
+qmp_block_job_resume(void *job) "job %p"
+qmp_block_job_complete(void *job) "job %p"
+block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+qmp_block_stream(void *bs, void *job) "bs %p job %p"
+
+# block/raw-win32.c
+# block/raw-posix.c
+paio_submit_co(int64_t offset, int count, int type) "offset %"PRId64" count %d type %d"
+paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
+
+# block/qcow2.c
+qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
+qcow2_writev_done_req(void *co, int ret) "co %p ret %d"
+qcow2_writev_start_part(void *co) "co %p"
+qcow2_writev_done_part(void *co, int cur_bytes) "co %p cur_bytes %d"
+qcow2_writev_data(void *co, uint64_t offset) "co %p offset %" PRIx64
+qcow2_pwrite_zeroes_start_req(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
+qcow2_pwrite_zeroes(void *co, int64_t offset, int count) "co %p offset %" PRIx64 " count %d"
+
+# block/qcow2-cluster.c
+qcow2_alloc_clusters_offset(void *co, uint64_t offset, int bytes) "co %p offset %" PRIx64 " bytes %d"
+qcow2_handle_copied(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
+qcow2_handle_alloc(void *co, uint64_t guest_offset, uint64_t host_offset, uint64_t bytes) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " bytes %" PRIx64
+qcow2_do_alloc_clusters_offset(void *co, uint64_t guest_offset, uint64_t host_offset, int nb_clusters) "co %p guest_offset %" PRIx64 " host_offset %" PRIx64 " nb_clusters %d"
+qcow2_cluster_alloc_phys(void *co) "co %p"
+qcow2_cluster_link_l2(void *co, int nb_clusters) "co %p nb_clusters %d"
+
+qcow2_l2_allocate(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_get_empty(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l2(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_write_l1(void *bs, int l1_index) "bs %p l1_index %d"
+qcow2_l2_allocate_done(void *bs, int l1_index, int ret) "bs %p l1_index %d ret %d"
+
+# block/qcow2-cache.c
+qcow2_cache_get(void *co, int c, uint64_t offset, bool read_from_disk) "co %p is_l2_cache %d offset %" PRIx64 " read_from_disk %d"
+qcow2_cache_get_replace_entry(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_read(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_get_done(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+qcow2_cache_flush(void *co, int c) "co %p is_l2_cache %d"
+qcow2_cache_entry_flush(void *co, int c, int i) "co %p is_l2_cache %d index %d"
+
+# block/qed-l2-cache.c
+qed_alloc_l2_cache_entry(void *l2_cache, void *entry) "l2_cache %p entry %p"
+qed_unref_l2_cache_entry(void *entry, int ref) "entry %p ref %d"
+qed_find_l2_cache_entry(void *l2_cache, void *entry, uint64_t offset, int ref) "l2_cache %p entry %p offset %"PRIu64" ref %d"
+
+# block/qed-table.c
+qed_read_table(void *s, uint64_t offset, void *table) "s %p offset %"PRIu64" table %p"
+qed_read_table_cb(void *s, void *table, int ret) "s %p table %p ret %d"
+qed_write_table(void *s, uint64_t offset, void *table, unsigned int index, unsigned int n) "s %p offset %"PRIu64" table %p index %u n %u"
+qed_write_table_cb(void *s, void *table, int flush, int ret) "s %p table %p flush %d ret %d"
+
+# block/qed.c
+qed_need_check_timer_cb(void *s) "s %p"
+qed_start_need_check_timer(void *s) "s %p"
+qed_cancel_need_check_timer(void *s) "s %p"
+qed_aio_complete(void *s, void *acb, int ret) "s %p acb %p ret %d"
+qed_aio_setup(void *s, void *acb, int64_t sector_num, int nb_sectors, void *opaque, int flags) "s %p acb %p sector_num %"PRId64" nb_sectors %d opaque %p flags %#x"
+qed_aio_next_io(void *s, void *acb, int ret, uint64_t cur_pos) "s %p acb %p ret %d cur_pos %"PRIu64
+qed_aio_read_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+qed_aio_write_data(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
+qed_aio_write_prefill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
+qed_aio_write_postfill(void *s, void *acb, uint64_t start, size_t len, uint64_t offset) "s %p acb %p start %"PRIu64" len %zu offset %"PRIu64
+qed_aio_write_main(void *s, void *acb, int ret, uint64_t offset, size_t len) "s %p acb %p ret %d offset %"PRIu64" len %zu"
diff --git a/block/vdi.c b/block/vdi.c
index 39070b7..8a1cf97 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -49,10 +49,15 @@
* so this seems to be reasonable.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "migration/migration.h"
+#include "qemu/coroutine.h"
+#include "qemu/cutils.h"
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
@@ -196,6 +201,8 @@
/* VDI header (converted to host endianness). */
VdiHeader header;
+ CoMutex write_lock;
+
Error *migration_blocker;
} BDRVVdiState;
@@ -487,23 +494,26 @@
bmap_size = header.blocks_in_image * sizeof(uint32_t);
bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
- s->bmap = qemu_try_blockalign(bs->file, bmap_size * SECTOR_SIZE);
+ s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
if (s->bmap == NULL) {
ret = -ENOMEM;
goto fail;
}
- ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap, bmap_size);
+ ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap,
+ bmap_size);
if (ret < 0) {
goto fail_free_bmap;
}
/* Disable migration when vdi images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "vdi", bdrv_get_device_name(bs), "live migration");
+ error_setg(&s->migration_blocker, "The vdi format used by node '%s' "
+ "does not support live migration",
+ bdrv_get_device_or_node_name(bs));
migrate_add_blocker(s->migration_blocker);
+ qemu_co_mutex_init(&s->write_lock);
+
return 0;
fail_free_bmap:
@@ -520,7 +530,7 @@
}
static int64_t coroutine_fn vdi_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
/* TODO: Check for too large sector_num (in bdrv_is_allocated or here). */
BDRVVdiState *s = (BDRVVdiState *)bs->opaque;
@@ -544,116 +554,155 @@
offset = s->header.offset_data +
(uint64_t)bmap_entry * s->block_size +
sector_in_block * SECTOR_SIZE;
+ *file = bs->file->bs;
return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
}
-static int vdi_co_read(BlockDriverState *bs,
- int64_t sector_num, uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vdi_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVdiState *s = bs->opaque;
+ QEMUIOVector local_qiov;
uint32_t bmap_entry;
uint32_t block_index;
- uint32_t sector_in_block;
- uint32_t n_sectors;
+ uint32_t offset_in_block;
+ uint32_t n_bytes;
+ uint64_t bytes_done = 0;
int ret = 0;
logout("\n");
- while (ret >= 0 && nb_sectors > 0) {
- block_index = sector_num / s->block_sectors;
- sector_in_block = sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
+ qemu_iovec_init(&local_qiov, qiov->niov);
- logout("will read %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, sector_num);
+ while (ret >= 0 && bytes > 0) {
+ block_index = offset / s->block_size;
+ offset_in_block = offset % s->block_size;
+ n_bytes = MIN(bytes, s->block_size - offset_in_block);
+
+ logout("will read %u bytes starting at offset %" PRIu64 "\n",
+ n_bytes, offset);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Block not allocated, return zeros, no need to wait. */
- memset(buf, 0, n_sectors * SECTOR_SIZE);
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
ret = 0;
} else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
- ret = bdrv_read(bs->file, offset, buf, n_sectors);
- }
- logout("%u sectors read\n", n_sectors);
+ uint64_t data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ offset_in_block;
- nb_sectors -= n_sectors;
- sector_num += n_sectors;
- buf += n_sectors * SECTOR_SIZE;
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->file, data_offset, n_bytes,
+ &local_qiov, 0);
+ }
+ logout("%u bytes read\n", n_bytes);
+
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
-static int vdi_co_write(BlockDriverState *bs,
- int64_t sector_num, const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vdi_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVdiState *s = bs->opaque;
+ QEMUIOVector local_qiov;
uint32_t bmap_entry;
uint32_t block_index;
- uint32_t sector_in_block;
- uint32_t n_sectors;
+ uint32_t offset_in_block;
+ uint32_t n_bytes;
uint32_t bmap_first = VDI_UNALLOCATED;
uint32_t bmap_last = VDI_UNALLOCATED;
uint8_t *block = NULL;
+ uint64_t bytes_done = 0;
int ret = 0;
logout("\n");
- while (ret >= 0 && nb_sectors > 0) {
- block_index = sector_num / s->block_sectors;
- sector_in_block = sector_num % s->block_sectors;
- n_sectors = s->block_sectors - sector_in_block;
- if (n_sectors > nb_sectors) {
- n_sectors = nb_sectors;
- }
+ qemu_iovec_init(&local_qiov, qiov->niov);
- logout("will write %u sectors starting at sector %" PRIu64 "\n",
- n_sectors, sector_num);
+ while (ret >= 0 && bytes > 0) {
+ block_index = offset / s->block_size;
+ offset_in_block = offset % s->block_size;
+ n_bytes = MIN(bytes, s->block_size - offset_in_block);
+
+ logout("will write %u bytes starting at offset %" PRIu64 "\n",
+ n_bytes, offset);
/* prepare next AIO request */
bmap_entry = le32_to_cpu(s->bmap[block_index]);
if (!VDI_IS_ALLOCATED(bmap_entry)) {
/* Allocate new block and write to it. */
- uint64_t offset;
+ uint64_t data_offset;
bmap_entry = s->header.blocks_allocated;
s->bmap[block_index] = cpu_to_le32(bmap_entry);
s->header.blocks_allocated++;
- offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors;
+ data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size;
if (block == NULL) {
block = g_malloc(s->block_size);
bmap_first = block_index;
}
bmap_last = block_index;
/* Copy data to be written to new block and zero unused parts. */
- memset(block, 0, sector_in_block * SECTOR_SIZE);
- memcpy(block + sector_in_block * SECTOR_SIZE,
- buf, n_sectors * SECTOR_SIZE);
- memset(block + (sector_in_block + n_sectors) * SECTOR_SIZE, 0,
- (s->block_sectors - n_sectors - sector_in_block) * SECTOR_SIZE);
- ret = bdrv_write(bs->file, offset, block, s->block_sectors);
+ memset(block, 0, offset_in_block);
+ qemu_iovec_to_buf(qiov, bytes_done, block + offset_in_block,
+ n_bytes);
+ memset(block + offset_in_block + n_bytes, 0,
+ s->block_size - n_bytes - offset_in_block);
+
+ /* Note that this coroutine does not yield anywhere from reading the
+ * bmap entry until here, so in regards to all the coroutines trying
+ * to write to this cluster, the one doing the allocation will
+ * always be the first to try to acquire the lock.
+ * Therefore, it is also the first that will actually be able to
+ * acquire the lock and thus the padded cluster is written before
+ * the other coroutines can write to the affected area. */
+ qemu_co_mutex_lock(&s->write_lock);
+ ret = bdrv_pwrite(bs->file, data_offset, block, s->block_size);
+ qemu_co_mutex_unlock(&s->write_lock);
} else {
- uint64_t offset = s->header.offset_data / SECTOR_SIZE +
- (uint64_t)bmap_entry * s->block_sectors +
- sector_in_block;
- ret = bdrv_write(bs->file, offset, buf, n_sectors);
+ uint64_t data_offset = s->header.offset_data +
+ (uint64_t)bmap_entry * s->block_size +
+ offset_in_block;
+ qemu_co_mutex_lock(&s->write_lock);
+ /* This lock is only used to make sure the following write operation
+ * is executed after the write issued by the coroutine allocating
+ * this cluster, therefore we do not need to keep it locked.
+ * As stated above, the allocating coroutine will always try to lock
+ * the mutex before all the other concurrent accesses to that
+ * cluster, therefore at this point we can be absolutely certain
+ * that that write operation has returned (there may be other writes
+ * in flight, but they do not concern this very operation). */
+ qemu_co_mutex_unlock(&s->write_lock);
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_pwritev(bs->file, data_offset, n_bytes,
+ &local_qiov, 0);
}
- nb_sectors -= n_sectors;
- sector_num += n_sectors;
- buf += n_sectors * SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
- logout("%u sectors written\n", n_sectors);
+ logout("%u bytes written\n", n_bytes);
}
+ qemu_iovec_destroy(&local_qiov);
+
logout("finished data write\n");
if (ret < 0) {
return ret;
@@ -664,6 +713,7 @@
VdiHeader *header = (VdiHeader *) block;
uint8_t *base;
uint64_t offset;
+ uint32_t n_sectors;
logout("now writing modified header\n");
assert(VDI_IS_ALLOCATED(bmap_first));
@@ -705,7 +755,7 @@
size_t bmap_size;
int64_t offset = 0;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
uint32_t *bmap = NULL;
logout("\n");
@@ -738,13 +788,17 @@
error_propagate(errp, local_err);
goto exit;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* We need enough blocks to store the given disk size,
so always round up. */
blocks = DIV_ROUND_UP(bytes, block_size);
@@ -774,7 +828,7 @@
vdi_header_print(&header);
#endif
vdi_header_to_le(&header);
- ret = bdrv_pwrite_sync(bs, offset, &header, sizeof(header));
+ ret = blk_pwrite(blk, offset, &header, sizeof(header), 0);
if (ret < 0) {
error_setg(errp, "Error writing header to %s", filename);
goto exit;
@@ -795,7 +849,7 @@
bmap[i] = VDI_UNALLOCATED;
}
}
- ret = bdrv_pwrite_sync(bs, offset, bmap, bmap_size);
+ ret = blk_pwrite(blk, offset, bmap, bmap_size, 0);
if (ret < 0) {
error_setg(errp, "Error writing bmap to %s", filename);
goto exit;
@@ -804,7 +858,7 @@
}
if (image_type == VDI_TYPE_STATIC) {
- ret = bdrv_truncate(bs, offset + blocks * block_size);
+ ret = blk_truncate(blk, offset + blocks * block_size);
if (ret < 0) {
error_setg(errp, "Failed to statically allocate %s", filename);
goto exit;
@@ -812,7 +866,7 @@
}
exit:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(bmap);
return ret;
}
@@ -852,11 +906,6 @@
.def_value_str = "off"
},
#endif
- {
- .name = BLOCK_OPT_NOCOW,
- .type = QEMU_OPT_BOOL,
- .help = "Turn off copy-on-write (valid only on btrfs)"
- },
/* TODO: An additional option to set UUID values might be useful. */
{ /* end of list */ }
}
@@ -874,9 +923,9 @@
.bdrv_co_get_block_status = vdi_co_get_block_status,
.bdrv_make_empty = vdi_make_empty,
- .bdrv_read = vdi_co_read,
+ .bdrv_co_preadv = vdi_co_preadv,
#if defined(CONFIG_VDI_WRITE)
- .bdrv_write = vdi_co_write,
+ .bdrv_co_pwritev = vdi_co_pwritev,
#endif
.bdrv_get_info = vdi_get_info,
diff --git a/block/vhdx-endian.c b/block/vhdx-endian.c
index 0640d3f..c306b90 100644
--- a/block/vhdx-endian.c
+++ b/block/vhdx-endian.c
@@ -15,8 +15,10 @@
*
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
#include <uuid/uuid.h>
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
index 6547bec..02eb104 100644
--- a/block/vhdx-log.c
+++ b/block/vhdx-log.c
@@ -17,9 +17,13 @@
* See the COPYING.LIB file in the top-level directory.
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
@@ -190,7 +194,8 @@
/* full */
break;
}
- ret = bdrv_pwrite(bs->file, offset, buffer_tmp, VHDX_LOG_SECTOR_SIZE);
+ ret = bdrv_pwrite(bs->file, offset, buffer_tmp,
+ VHDX_LOG_SECTOR_SIZE);
if (ret < 0) {
goto exit;
}
@@ -352,7 +357,7 @@
}
desc_sectors = vhdx_compute_desc_sectors(hdr.descriptor_count);
- desc_entries = qemu_try_blockalign(bs->file,
+ desc_entries = qemu_try_blockalign(bs->file->bs,
desc_sectors * VHDX_LOG_SECTOR_SIZE);
if (desc_entries == NULL) {
ret = -ENOMEM;
@@ -508,7 +513,7 @@
/* if the log shows a FlushedFileOffset larger than our current file
* size, then that means the file has been truncated / corrupted, and
* we must refused to open it / use it */
- if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file)) {
+ if (hdr_tmp.flushed_file_offset > bdrv_getlength(bs->file->bs)) {
ret = -EINVAL;
goto exit;
}
@@ -538,12 +543,12 @@
goto exit;
}
}
- if (bdrv_getlength(bs->file) < desc_entries->hdr.last_file_offset) {
+ if (bdrv_getlength(bs->file->bs) < desc_entries->hdr.last_file_offset) {
new_file_size = desc_entries->hdr.last_file_offset;
if (new_file_size % (1024*1024)) {
/* round up to nearest 1MB boundary */
new_file_size = ((new_file_size >> 20) + 1) << 20;
- bdrv_truncate(bs->file, new_file_size);
+ bdrv_truncate(bs->file->bs, new_file_size);
}
}
qemu_vfree(desc_entries);
@@ -782,12 +787,13 @@
if (logs.valid) {
if (bs->read_only) {
ret = -EPERM;
- error_setg_errno(errp, EPERM,
- "VHDX image file '%s' opened read-only, but "
- "contains a log that needs to be replayed. To "
- "replay the log, execute:\n qemu-img check -r "
- "all '%s'",
- bs->filename, bs->filename);
+ error_setg(errp,
+ "VHDX image file '%s' opened read-only, but "
+ "contains a log that needs to be replayed",
+ bs->filename);
+ error_append_hint(errp, "To replay the log, run:\n"
+ "qemu-img check -r all '%s'\n",
+ bs->filename);
goto exit;
}
/* now flush the log */
@@ -907,8 +913,8 @@
.sequence_number = s->log.sequence,
.descriptor_count = sectors,
.reserved = 0,
- .flushed_file_offset = bdrv_getlength(bs->file),
- .last_file_offset = bdrv_getlength(bs->file),
+ .flushed_file_offset = bdrv_getlength(bs->file->bs),
+ .last_file_offset = bdrv_getlength(bs->file->bs),
};
new_hdr.log_guid = header->log_guid;
diff --git a/block/vhdx.c b/block/vhdx.c
index 12bfe75..75ef2b1 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -15,15 +15,18 @@
*
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "qemu/crc32c.h"
+#include "qemu/bswap.h"
#include "block/vhdx.h"
#include "migration/migration.h"
#include <uuid/uuid.h>
-#include <glib.h>
/* Options for VHDX creation */
@@ -263,10 +266,10 @@
static void vhdx_set_shift_bits(BDRVVHDXState *s)
{
- s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
- s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
- s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
- s->block_size_bits = 31 - clz32(s->block_size);
+ s->logical_sector_size_bits = ctz32(s->logical_sector_size);
+ s->sectors_per_block_bits = ctz32(s->sectors_per_block);
+ s->chunk_ratio_bits = ctz64(s->chunk_ratio);
+ s->block_size_bits = ctz32(s->block_size);
}
/*
@@ -295,9 +298,10 @@
* and then update the header checksum. Header is converted to proper
* endianness before being written to the specified file offset
*/
-static int vhdx_write_header(BlockDriverState *bs_file, VHDXHeader *hdr,
+static int vhdx_write_header(BdrvChild *file, VHDXHeader *hdr,
uint64_t offset, bool read)
{
+ BlockDriverState *bs_file = file->bs;
uint8_t *buffer = NULL;
int ret;
VHDXHeader *header_le;
@@ -312,7 +316,7 @@
buffer = qemu_blockalign(bs_file, VHDX_HEADER_SIZE);
if (read) {
/* if true, we can't assume the extra reserved bytes are 0 */
- ret = bdrv_pread(bs_file, offset, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(file, offset, buffer, VHDX_HEADER_SIZE);
if (ret < 0) {
goto exit;
}
@@ -326,7 +330,7 @@
vhdx_header_le_export(hdr, header_le);
vhdx_update_checksum(buffer, VHDX_HEADER_SIZE,
offsetof(VHDXHeader, checksum));
- ret = bdrv_pwrite_sync(bs_file, offset, header_le, sizeof(VHDXHeader));
+ ret = bdrv_pwrite_sync(file, offset, header_le, sizeof(VHDXHeader));
exit:
qemu_vfree(buffer);
@@ -427,7 +431,8 @@
/* We have to read the whole VHDX_HEADER_SIZE instead of
* sizeof(VHDXHeader), because the checksum is over the whole
* region */
- ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer,
+ VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -443,7 +448,8 @@
}
}
- ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
+ ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer,
+ VHDX_HEADER_SIZE);
if (ret < 0) {
goto fail;
}
@@ -854,14 +860,8 @@
{
uint32_t data_blocks_cnt, bitmap_blocks_cnt;
- data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
- if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
- data_blocks_cnt++;
- }
- bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
- if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
- bitmap_blocks_cnt++;
- }
+ data_blocks_cnt = DIV_ROUND_UP(s->virtual_disk_size, s->block_size);
+ bitmap_blocks_cnt = DIV_ROUND_UP(data_blocks_cnt, s->chunk_ratio);
if (s->parent_entries) {
s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
@@ -959,7 +959,7 @@
}
/* s->bat is freed in vhdx_close() */
- s->bat = qemu_try_blockalign(bs->file, s->bat_rt.length);
+ s->bat = qemu_try_blockalign(bs->file->bs, s->bat_rt.length);
if (s->bat == NULL) {
ret = -ENOMEM;
goto fail;
@@ -1002,9 +1002,9 @@
/* TODO: differencing files */
/* Disable migration when VHDX images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "vhdx", bdrv_get_device_name(bs), "live migration");
+ error_setg(&s->migration_blocker, "The vhdx format used by node '%s' "
+ "does not support live migration",
+ bdrv_get_device_or_node_name(bs));
migrate_add_blocker(s->migration_blocker);
return 0;
@@ -1109,8 +1109,9 @@
/* check the payload block state */
switch (s->bat[sinfo.bat_idx] & VHDX_BAT_STATE_BIT_MASK) {
case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
- case PAYLOAD_BLOCK_UNDEFINED: /* fall through */
- case PAYLOAD_BLOCK_UNMAPPED: /* fall through */
+ case PAYLOAD_BLOCK_UNDEFINED:
+ case PAYLOAD_BLOCK_UNMAPPED:
+ case PAYLOAD_BLOCK_UNMAPPED_v095:
case PAYLOAD_BLOCK_ZERO:
/* return zero */
qemu_iovec_memset(&hd_qiov, 0, 0, sinfo.bytes_avail);
@@ -1155,12 +1156,12 @@
static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
uint64_t *new_offset)
{
- *new_offset = bdrv_getlength(bs->file);
+ *new_offset = bdrv_getlength(bs->file->bs);
/* per the spec, the address for a block is in units of 1MB */
*new_offset = ROUND_UP(*new_offset, 1024 * 1024);
- return bdrv_truncate(bs->file, *new_offset + s->block_size);
+ return bdrv_truncate(bs->file->bs, *new_offset + s->block_size);
}
/*
@@ -1173,7 +1174,18 @@
{
/* The BAT entry is a uint64, with 44 bits for the file offset in units of
* 1MB, and 3 bits for the block state. */
- s->bat[sinfo->bat_idx] = sinfo->file_offset;
+ if ((state == PAYLOAD_BLOCK_ZERO) ||
+ (state == PAYLOAD_BLOCK_UNDEFINED) ||
+ (state == PAYLOAD_BLOCK_NOT_PRESENT) ||
+ (state == PAYLOAD_BLOCK_UNMAPPED)) {
+ s->bat[sinfo->bat_idx] = 0; /* For PAYLOAD_BLOCK_ZERO, the
+ FileOffsetMB field is denoted as
+ 'reserved' in the v1.0 spec. If it is
+ non-zero, MS Hyper-V will fail to read
+ the disk image */
+ } else {
+ s->bat[sinfo->bat_idx] = sinfo->file_offset;
+ }
s->bat[sinfo->bat_idx] |= state & VHDX_BAT_STATE_BIT_MASK;
@@ -1248,7 +1260,7 @@
/* Queue another write of zero buffers if the underlying file
* does not zero-fill on file extension */
- if (bdrv_has_zero_init(bs->file) == 0) {
+ if (bdrv_has_zero_init(bs->file->bs) == 0) {
use_zero_buffers = true;
/* zero fill the front, if any */
@@ -1257,7 +1269,7 @@
iov1.iov_base = qemu_blockalign(bs, iov1.iov_len);
memset(iov1.iov_base, 0, iov1.iov_len);
qemu_iovec_concat_iov(&hd_qiov, &iov1, 1, 0,
- sinfo.block_offset);
+ iov1.iov_len);
sectors_to_write += iov1.iov_len >> BDRV_SECTOR_BITS;
}
@@ -1273,15 +1285,15 @@
iov2.iov_base = qemu_blockalign(bs, iov2.iov_len);
memset(iov2.iov_base, 0, iov2.iov_len);
qemu_iovec_concat_iov(&hd_qiov, &iov2, 1, 0,
- sinfo.block_offset);
+ iov2.iov_len);
sectors_to_write += iov2.iov_len >> BDRV_SECTOR_BITS;
}
}
-
/* fall through */
case PAYLOAD_BLOCK_NOT_PRESENT: /* fall through */
- case PAYLOAD_BLOCK_UNMAPPED: /* fall through */
- case PAYLOAD_BLOCK_UNDEFINED: /* fall through */
+ case PAYLOAD_BLOCK_UNMAPPED:
+ case PAYLOAD_BLOCK_UNMAPPED_v095:
+ case PAYLOAD_BLOCK_UNDEFINED:
bat_prior_offset = sinfo.file_offset;
ret = vhdx_allocate_block(bs, s, &sinfo.file_offset);
if (ret < 0) {
@@ -1376,9 +1388,11 @@
* There are 2 headers, and the highest sequence number will represent
* the active header
*/
-static int vhdx_create_new_headers(BlockDriverState *bs, uint64_t image_size,
+static int vhdx_create_new_headers(BlockBackend *blk, uint64_t image_size,
uint32_t log_size)
{
+ BlockDriverState *bs = blk_bs(blk);
+ BdrvChild *child;
int ret = 0;
VHDXHeader *hdr = NULL;
@@ -1393,12 +1407,18 @@
vhdx_guid_generate(&hdr->file_write_guid);
vhdx_guid_generate(&hdr->data_write_guid);
- ret = vhdx_write_header(bs, hdr, VHDX_HEADER1_OFFSET, false);
+ /* XXX Ugly way to get blk->root, but that's a feature, not a bug. This
+ * hack makes it obvious that vhdx_write_header() bypasses the BlockBackend
+ * here, which it really shouldn't be doing. */
+ child = QLIST_FIRST(&bs->parents);
+ assert(!QLIST_NEXT(child, next_parent));
+
+ ret = vhdx_write_header(child, hdr, VHDX_HEADER1_OFFSET, false);
if (ret < 0) {
goto exit;
}
hdr->sequence_number++;
- ret = vhdx_write_header(bs, hdr, VHDX_HEADER2_OFFSET, false);
+ ret = vhdx_write_header(child, hdr, VHDX_HEADER2_OFFSET, false);
if (ret < 0) {
goto exit;
}
@@ -1431,7 +1451,7 @@
* The first 64KB of the Metadata section is reserved for the metadata
* header and entries; beyond that, the metadata items themselves reside.
*/
-static int vhdx_create_new_metadata(BlockDriverState *bs,
+static int vhdx_create_new_metadata(BlockBackend *blk,
uint64_t image_size,
uint32_t block_size,
uint32_t sector_size,
@@ -1442,7 +1462,7 @@
uint32_t offset = 0;
void *buffer = NULL;
void *entry_buffer;
- VHDXMetadataTableHeader *md_table;;
+ VHDXMetadataTableHeader *md_table;
VHDXMetadataTableEntry *md_table_entry;
/* Metadata entries */
@@ -1527,13 +1547,13 @@
VHDX_META_FLAGS_IS_VIRTUAL_DISK;
vhdx_metadata_entry_le_export(&md_table_entry[4]);
- ret = bdrv_pwrite(bs, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE);
+ ret = blk_pwrite(blk, metadata_offset, buffer, VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
- ret = bdrv_pwrite(bs, metadata_offset + (64 * KiB), entry_buffer,
- VHDX_METADATA_ENTRY_BUFFER_SIZE);
+ ret = blk_pwrite(blk, metadata_offset + (64 * KiB), entry_buffer,
+ VHDX_METADATA_ENTRY_BUFFER_SIZE, 0);
if (ret < 0) {
goto exit;
}
@@ -1553,7 +1573,7 @@
* Fixed images: default state of the BAT is fully populated, with
* file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
*/
-static int vhdx_create_bat(BlockDriverState *bs, BDRVVHDXState *s,
+static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
uint64_t image_size, VHDXImageType type,
bool use_zero_blocks, uint64_t file_offset,
uint32_t length)
@@ -1577,12 +1597,12 @@
if (type == VHDX_TYPE_DYNAMIC) {
/* All zeroes, so we can just extend the file - the end of the BAT
* is the furthest thing we have written yet */
- ret = bdrv_truncate(bs, data_file_offset);
+ ret = blk_truncate(blk, data_file_offset);
if (ret < 0) {
goto exit;
}
} else if (type == VHDX_TYPE_FIXED) {
- ret = bdrv_truncate(bs, data_file_offset + image_size);
+ ret = blk_truncate(blk, data_file_offset + image_size);
if (ret < 0) {
goto exit;
}
@@ -1593,7 +1613,7 @@
if (type == VHDX_TYPE_FIXED ||
use_zero_blocks ||
- bdrv_has_zero_init(bs) == 0) {
+ bdrv_has_zero_init(blk_bs(blk)) == 0) {
/* for a fixed file, the default BAT entry is not zero */
s->bat = g_try_malloc0(length);
if (length && s->bat == NULL) {
@@ -1609,12 +1629,12 @@
sinfo.file_offset = data_file_offset +
(sector_num << s->logical_sector_size_bits);
sinfo.file_offset = ROUND_UP(sinfo.file_offset, MiB);
- vhdx_update_bat_table_entry(bs, s, &sinfo, &unused, &unused,
+ vhdx_update_bat_table_entry(blk_bs(blk), s, &sinfo, &unused, &unused,
block_state);
cpu_to_le64s(&s->bat[sinfo.bat_idx]);
sector_num += s->sectors_per_block;
}
- ret = bdrv_pwrite(bs, file_offset, s->bat, length);
+ ret = blk_pwrite(blk, file_offset, s->bat, length, 0);
if (ret < 0) {
goto exit;
}
@@ -1634,7 +1654,7 @@
* to create the BAT itself, we will also cause the BAT to be
* created.
*/
-static int vhdx_create_new_region_table(BlockDriverState *bs,
+static int vhdx_create_new_region_table(BlockBackend *blk,
uint64_t image_size,
uint32_t block_size,
uint32_t sector_size,
@@ -1709,21 +1729,21 @@
/* The region table gives us the data we need to create the BAT,
* so do that now */
- ret = vhdx_create_bat(bs, s, image_size, type, use_zero_blocks,
+ ret = vhdx_create_bat(blk, s, image_size, type, use_zero_blocks,
bat_file_offset, bat_length);
if (ret < 0) {
goto exit;
}
/* Now write out the region headers to disk */
- ret = bdrv_pwrite(bs, VHDX_REGION_TABLE_OFFSET, buffer,
- VHDX_HEADER_BLOCK_SIZE);
+ ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, buffer,
+ VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
- ret = bdrv_pwrite(bs, VHDX_REGION_TABLE2_OFFSET, buffer,
- VHDX_HEADER_BLOCK_SIZE);
+ ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, buffer,
+ VHDX_HEADER_BLOCK_SIZE, 0);
if (ret < 0) {
goto exit;
}
@@ -1763,7 +1783,7 @@
gunichar2 *creator = NULL;
glong creator_items;
- BlockDriverState *bs;
+ BlockBackend *blk;
char *type = NULL;
VHDXImageType image_type;
Error *local_err = NULL;
@@ -1773,7 +1793,7 @@
log_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_LOG_SIZE, 0);
block_size = qemu_opt_get_size_del(opts, VHDX_BLOCK_OPT_BLOCK_SIZE, 0);
type = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
- use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, false);
+ use_zero_blocks = qemu_opt_get_bool_del(opts, VHDX_BLOCK_OPT_ZERO, true);
if (image_size > VHDX_MAX_IMAGE_SIZE) {
error_setg_errno(errp, EINVAL, "Image size too large; max of 64TB");
@@ -1828,14 +1848,16 @@
goto exit;
}
- bs = NULL;
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/* Create (A) */
/* The creator field is optional, but may be useful for
@@ -1843,13 +1865,14 @@
creator = g_utf8_to_utf16("QEMU v" QEMU_VERSION, -1, NULL,
&creator_items, NULL);
signature = cpu_to_le64(VHDX_FILE_SIGNATURE);
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET, &signature, sizeof(signature),
+ 0);
if (ret < 0) {
goto delete_and_exit;
}
if (creator) {
- ret = bdrv_pwrite(bs, VHDX_FILE_ID_OFFSET + sizeof(signature),
- creator, creator_items * sizeof(gunichar2));
+ ret = blk_pwrite(blk, VHDX_FILE_ID_OFFSET + sizeof(signature),
+ creator, creator_items * sizeof(gunichar2), 0);
if (ret < 0) {
goto delete_and_exit;
}
@@ -1857,13 +1880,13 @@
/* Creates (B),(C) */
- ret = vhdx_create_new_headers(bs, image_size, log_size);
+ ret = vhdx_create_new_headers(blk, image_size, log_size);
if (ret < 0) {
goto delete_and_exit;
}
/* Creates (D),(E),(G) explicitly. (F) created as by-product */
- ret = vhdx_create_new_region_table(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_region_table(blk, image_size, block_size, 512,
log_size, use_zero_blocks, image_type,
&metadata_offset);
if (ret < 0) {
@@ -1871,7 +1894,7 @@
}
/* Creates (H) */
- ret = vhdx_create_new_metadata(bs, image_size, block_size, 512,
+ ret = vhdx_create_new_metadata(blk, image_size, block_size, 512,
metadata_offset, image_type);
if (ret < 0) {
goto delete_and_exit;
@@ -1879,7 +1902,7 @@
delete_and_exit:
- bdrv_unref(bs);
+ blk_unref(blk);
exit:
g_free(type);
g_free(creator);
@@ -1935,7 +1958,9 @@
{
.name = VHDX_BLOCK_OPT_ZERO,
.type = QEMU_OPT_BOOL,
- .help = "Force use of payload blocks of type 'ZERO'. Non-standard."
+ .help = "Force use of payload blocks of type 'ZERO'. "\
+ "Non-standard, but default. Do not set to 'off' when "\
+ "using 'qemu-img convert' with subformat=dynamic."
},
{ NULL }
}
@@ -1953,6 +1978,7 @@
.bdrv_create = vhdx_create,
.bdrv_get_info = vhdx_get_info,
.bdrv_check = vhdx_check,
+ .bdrv_has_zero_init = bdrv_has_zero_init_1,
.create_opts = &vhdx_create_opts,
};
diff --git a/block/vhdx.h b/block/vhdx.h
index b4a12a0..7003ab7 100644
--- a/block/vhdx.h
+++ b/block/vhdx.h
@@ -226,7 +226,8 @@
#define PAYLOAD_BLOCK_NOT_PRESENT 0
#define PAYLOAD_BLOCK_UNDEFINED 1
#define PAYLOAD_BLOCK_ZERO 2
-#define PAYLOAD_BLOCK_UNMAPPED 5
+#define PAYLOAD_BLOCK_UNMAPPED 3
+#define PAYLOAD_BLOCK_UNMAPPED_v095 5
#define PAYLOAD_BLOCK_FULLY_PRESENT 6
#define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7
diff --git a/block/vmdk.c b/block/vmdk.c
index 2cbfd3e..46d474e 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -23,10 +23,16 @@
* THE SOFTWARE.
*/
-#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "migration/migration.h"
+#include "qemu/cutils.h"
#include <zlib.h>
#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
@@ -84,7 +90,7 @@
#define L2_CACHE_SIZE 16
typedef struct VmdkExtent {
- BlockDriverState *file;
+ BdrvChild *file;
bool flat;
bool compressed;
bool has_marker;
@@ -219,7 +225,7 @@
g_free(e->l1_backup_table);
g_free(e->type);
if (e->file != bs->file) {
- bdrv_unref(e->file);
+ bdrv_unref_child(bs, e->file);
}
}
g_free(s->extents);
@@ -238,15 +244,17 @@
static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
{
- char desc[DESC_SIZE];
+ char *desc;
uint32_t cid = 0xffffffff;
const char *p_name, *cid_str;
size_t cid_str_size;
BDRVVmdkState *s = bs->opaque;
int ret;
+ desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
+ g_free(desc);
return 0;
}
@@ -265,50 +273,55 @@
sscanf(p_name, "%" SCNx32, &cid);
}
+ g_free(desc);
return cid;
}
static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
{
- char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
+ char *desc, *tmp_desc;
char *p_name, *tmp_str;
BDRVVmdkState *s = bs->opaque;
- int ret;
+ int ret = 0;
+ desc = g_malloc0(DESC_SIZE);
+ tmp_desc = g_malloc0(DESC_SIZE);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
desc[DESC_SIZE - 1] = '\0';
tmp_str = strstr(desc, "parentCID");
if (tmp_str == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
- pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
+ pstrcpy(tmp_desc, DESC_SIZE, tmp_str);
p_name = strstr(desc, "CID");
if (p_name != NULL) {
p_name += sizeof("CID");
- snprintf(p_name, sizeof(desc) - (p_name - desc), "%" PRIx32 "\n", cid);
- pstrcat(desc, sizeof(desc), tmp_desc);
+ snprintf(p_name, DESC_SIZE - (p_name - desc), "%" PRIx32 "\n", cid);
+ pstrcat(desc, DESC_SIZE, tmp_desc);
}
ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
- if (ret < 0) {
- return ret;
- }
- return 0;
+out:
+ g_free(desc);
+ g_free(tmp_desc);
+ return ret;
}
static int vmdk_is_cid_valid(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
- BlockDriverState *p_bs = bs->backing_hd;
uint32_t cur_pcid;
- if (!s->cid_checked && p_bs) {
+ if (!s->cid_checked && bs->backing) {
+ BlockDriverState *p_bs = bs->backing->bs;
+
cur_pcid = vmdk_read_cid(p_bs, 0);
if (s->parent_cid != cur_pcid) {
/* CID not valid */
@@ -320,51 +333,28 @@
return 1;
}
-/* Queue extents, if any, for reopen() */
+/* We have nothing to do for VMDK reopen, stubs just return success */
static int vmdk_reopen_prepare(BDRVReopenState *state,
BlockReopenQueue *queue, Error **errp)
{
- BDRVVmdkState *s;
- int ret = -1;
- int i;
- VmdkExtent *e;
-
assert(state != NULL);
assert(state->bs != NULL);
-
- if (queue == NULL) {
- error_setg(errp, "No reopen queue for VMDK extents");
- goto exit;
- }
-
- s = state->bs->opaque;
-
- assert(s != NULL);
-
- for (i = 0; i < s->num_extents; i++) {
- e = &s->extents[i];
- if (e->file != state->bs->file) {
- bdrv_reopen_queue(queue, e->file, state->flags);
- }
- }
- ret = 0;
-
-exit:
- return ret;
+ return 0;
}
static int vmdk_parent_open(BlockDriverState *bs)
{
char *p_name;
- char desc[DESC_SIZE + 1];
+ char *desc;
BDRVVmdkState *s = bs->opaque;
int ret;
- desc[DESC_SIZE] = '\0';
+ desc = g_malloc0(DESC_SIZE + 1);
ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
if (ret < 0) {
- return ret;
+ goto out;
}
+ ret = 0;
p_name = strstr(desc, "parentFileNameHint");
if (p_name != NULL) {
@@ -373,22 +363,26 @@
p_name += sizeof("parentFileNameHint") + 1;
end_name = strchr(p_name, '\"');
if (end_name == NULL) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
if ((end_name - p_name) > sizeof(bs->backing_file) - 1) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
}
- return 0;
+out:
+ g_free(desc);
+ return ret;
}
/* Create and append extent to the extent array. Return the added VmdkExtent
* address. return NULL if allocation failed. */
static int vmdk_add_extent(BlockDriverState *bs,
- BlockDriverState *file, bool flat, int64_t sectors,
+ BdrvChild *file, bool flat, int64_t sectors,
int64_t l1_offset, int64_t l1_backup_offset,
uint32_t l1_size,
int l2_size, uint64_t cluster_sectors,
@@ -413,7 +407,7 @@
return -EFBIG;
}
- nb_sectors = bdrv_nb_sectors(file);
+ nb_sectors = bdrv_nb_sectors(file->bs);
if (nb_sectors < 0) {
return nb_sectors;
}
@@ -450,7 +444,8 @@
Error **errp)
{
int ret;
- int l1_size, i;
+ size_t l1_size;
+ int i;
/* read the L1 table */
l1_size = extent->l1_size * sizeof(uint32_t);
@@ -466,7 +461,7 @@
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read l1 table from extent '%s'",
- extent->file->filename);
+ extent->file->bs->filename);
goto fail_l1;
}
for (i = 0; i < extent->l1_size; i++) {
@@ -486,7 +481,7 @@
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read l1 backup table from extent '%s'",
- extent->file->filename);
+ extent->file->bs->filename);
goto fail_l1b;
}
for (i = 0; i < extent->l1_size; i++) {
@@ -505,7 +500,7 @@
}
static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
- BlockDriverState *file,
+ BdrvChild *file,
int flags, Error **errp)
{
int ret;
@@ -517,12 +512,12 @@
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
- file->filename);
+ file->bs->filename);
return ret;
}
ret = vmdk_add_extent(bs, file, false,
le32_to_cpu(header.disk_sectors),
- le32_to_cpu(header.l1dir_offset) << 9,
+ (int64_t)le32_to_cpu(header.l1dir_offset) << 9,
0,
le32_to_cpu(header.l1dir_size),
4096,
@@ -541,23 +536,30 @@
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
- Error **errp);
+ QDict *options, Error **errp);
-static char *vmdk_read_desc(BlockDriverState *file, uint64_t desc_offset,
- Error **errp)
+static char *vmdk_read_desc(BdrvChild *file, uint64_t desc_offset, Error **errp)
{
int64_t size;
char *buf;
int ret;
- size = bdrv_getlength(file);
+ size = bdrv_getlength(file->bs);
if (size < 0) {
error_setg_errno(errp, -size, "Could not access file");
return NULL;
}
- size = MIN(size, 1 << 20); /* avoid unbounded allocation */
- buf = g_malloc0(size + 1);
+ if (size < 4) {
+ /* Both descriptor file and sparse image must be much larger than 4
+ * bytes, also callers of vmdk_read_desc want to compare the first 4
+ * bytes with VMDK4_MAGIC, let's error out if less is read. */
+ error_setg(errp, "File is too small, not a valid image");
+ return NULL;
+ }
+
+ size = MIN(size, (1 << 20) - 1); /* avoid unbounded allocation */
+ buf = g_malloc(size + 1);
ret = bdrv_pread(file, desc_offset, buf, size);
if (ret < 0) {
@@ -565,13 +567,14 @@
g_free(buf);
return NULL;
}
+ buf[ret] = 0;
return buf;
}
static int vmdk_open_vmdk4(BlockDriverState *bs,
- BlockDriverState *file,
- int flags, Error **errp)
+ BdrvChild *file,
+ int flags, QDict *options, Error **errp)
{
int ret;
uint32_t magic;
@@ -580,12 +583,13 @@
VmdkExtent *extent;
BDRVVmdkState *s = bs->opaque;
int64_t l1_backup_offset = 0;
+ bool compressed;
ret = bdrv_pread(file, sizeof(magic), &header, sizeof(header));
if (ret < 0) {
error_setg_errno(errp, -ret,
"Could not read header from file '%s'",
- file->filename);
+ file->bs->filename);
return -EINVAL;
}
if (header.capacity == 0) {
@@ -595,7 +599,7 @@
if (!buf) {
return -EINVAL;
}
- ret = vmdk_open_desc_file(bs, flags, buf, errp);
+ ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
g_free(buf);
return ret;
}
@@ -632,9 +636,10 @@
} QEMU_PACKED footer;
ret = bdrv_pread(file,
- bs->file->total_sectors * 512 - 1536,
+ bs->file->bs->total_sectors * 512 - 1536,
&footer, sizeof(footer));
if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to read footer");
return ret;
}
@@ -646,20 +651,21 @@
le32_to_cpu(footer.eos_marker.size) != 0 ||
le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
{
+ error_setg(errp, "Invalid footer");
return -EINVAL;
}
header = footer.header;
}
+ compressed =
+ le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
if (le32_to_cpu(header.version) > 3) {
- char buf[64];
- snprintf(buf, sizeof(buf), "VMDK version %" PRId32,
- le32_to_cpu(header.version));
- error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE,
- bdrv_get_device_name(bs), "vmdk", buf);
+ error_setg(errp, "Unsupported VMDK version %" PRIu32,
+ le32_to_cpu(header.version));
return -ENOTSUP;
- } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR)) {
+ } else if (le32_to_cpu(header.version) == 3 && (flags & BDRV_O_RDWR) &&
+ !compressed) {
/* VMware KB 2064959 explains that version 3 added support for
* persistent changed block tracking (CBT), and backup software can
* read it as version=1 if it doesn't care about the changed area
@@ -676,6 +682,7 @@
l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gt)
* le64_to_cpu(header.granularity);
if (l1_entry_sectors == 0) {
+ error_setg(errp, "L1 entry size is invalid");
return -EINVAL;
}
l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
@@ -683,7 +690,7 @@
if (le32_to_cpu(header.flags) & VMDK4_FLAG_RGD) {
l1_backup_offset = le64_to_cpu(header.rgd_offset) << 9;
}
- if (bdrv_nb_sectors(file) < le64_to_cpu(header.grain_offset)) {
+ if (bdrv_nb_sectors(file->bs) < le64_to_cpu(header.grain_offset)) {
error_setg(errp, "File truncated, expecting at least %" PRId64 " bytes",
(int64_t)(le64_to_cpu(header.grain_offset)
* BDRV_SECTOR_SIZE));
@@ -747,9 +754,8 @@
}
/* Open an extent file and append to bs array */
-static int vmdk_open_sparse(BlockDriverState *bs,
- BlockDriverState *file, int flags,
- char *buf, Error **errp)
+static int vmdk_open_sparse(BlockDriverState *bs, BdrvChild *file, int flags,
+ char *buf, QDict *options, Error **errp)
{
uint32_t magic;
@@ -759,7 +765,7 @@
return vmdk_open_vmfs_sparse(bs, file, flags, errp);
break;
case VMDK4_MAGIC:
- return vmdk_open_vmdk4(bs, file, flags, errp);
+ return vmdk_open_vmdk4(bs, file, flags, options, errp);
break;
default:
error_setg(errp, "Image not in VMDK format");
@@ -768,63 +774,90 @@
}
}
+static const char *next_line(const char *s)
+{
+ while (*s) {
+ if (*s == '\n') {
+ return s + 1;
+ }
+ s++;
+ }
+ return s;
+}
+
static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
- const char *desc_file_path, Error **errp)
+ const char *desc_file_path, QDict *options,
+ Error **errp)
{
int ret;
+ int matches;
char access[11];
char type[11];
char fname[512];
- const char *p = desc;
+ const char *p, *np;
int64_t sectors = 0;
int64_t flat_offset;
- char extent_path[PATH_MAX];
- BlockDriverState *extent_file;
+ char *extent_path;
+ BdrvChild *extent_file;
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent;
+ char extent_opt_prefix[32];
+ Error *local_err = NULL;
- while (*p) {
- /* parse extent line:
+ for (p = desc; *p; p = next_line(p)) {
+ /* parse extent line in one of below formats:
+ *
* RW [size in sectors] FLAT "file-name.vmdk" OFFSET
- * or
* RW [size in sectors] SPARSE "file-name.vmdk"
+ * RW [size in sectors] VMFS "file-name.vmdk"
+ * RW [size in sectors] VMFSSPARSE "file-name.vmdk"
*/
flat_offset = -1;
- ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
- access, §ors, type, fname, &flat_offset);
- if (ret < 4 || strcmp(access, "RW")) {
- goto next_line;
+ matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
+ access, §ors, type, fname, &flat_offset);
+ if (matches < 4 || strcmp(access, "RW")) {
+ continue;
} else if (!strcmp(type, "FLAT")) {
- if (ret != 5 || flat_offset < 0) {
- error_setg(errp, "Invalid extent lines: \n%s", p);
- return -EINVAL;
+ if (matches != 5 || flat_offset < 0) {
+ goto invalid;
}
} else if (!strcmp(type, "VMFS")) {
- if (ret == 4) {
+ if (matches == 4) {
flat_offset = 0;
} else {
- error_setg(errp, "Invalid extent lines:\n%s", p);
- return -EINVAL;
+ goto invalid;
}
- } else if (ret != 4) {
- error_setg(errp, "Invalid extent lines:\n%s", p);
- return -EINVAL;
+ } else if (matches != 4) {
+ goto invalid;
}
if (sectors <= 0 ||
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
(strcmp(access, "RW"))) {
- goto next_line;
+ continue;
}
- path_combine(extent_path, sizeof(extent_path),
- desc_file_path, fname);
- extent_file = NULL;
- ret = bdrv_open(&extent_file, extent_path, NULL, NULL,
- bs->open_flags | BDRV_O_PROTOCOL, NULL, errp);
- if (ret) {
- return ret;
+ if (!path_is_absolute(fname) && !path_has_protocol(fname) &&
+ !desc_file_path[0])
+ {
+ error_setg(errp, "Cannot use relative extent paths with VMDK "
+ "descriptor file '%s'", bs->file->bs->filename);
+ return -EINVAL;
+ }
+
+ extent_path = g_malloc0(PATH_MAX);
+ path_combine(extent_path, PATH_MAX, desc_file_path, fname);
+
+ ret = snprintf(extent_opt_prefix, 32, "extents.%d", s->num_extents);
+ assert(ret < 32);
+
+ extent_file = bdrv_open_child(extent_path, options, extent_opt_prefix,
+ bs, &child_file, false, &local_err);
+ g_free(extent_path);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return -EINVAL;
}
/* save to extents array */
@@ -834,7 +867,7 @@
ret = vmdk_add_extent(bs, extent_file, true, sectors,
0, 0, 0, 0, 0, &extent, errp);
if (ret < 0) {
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return ret;
}
extent->flat_start_offset = flat_offset << 9;
@@ -844,35 +877,36 @@
if (!buf) {
ret = -EINVAL;
} else {
- ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf, errp);
+ ret = vmdk_open_sparse(bs, extent_file, bs->open_flags, buf,
+ options, errp);
}
g_free(buf);
if (ret) {
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return ret;
}
extent = &s->extents[s->num_extents - 1];
} else {
error_setg(errp, "Unsupported extent type '%s'", type);
- bdrv_unref(extent_file);
+ bdrv_unref_child(bs, extent_file);
return -ENOTSUP;
}
extent->type = g_strdup(type);
-next_line:
- /* move to next line */
- while (*p) {
- if (*p == '\n') {
- p++;
- break;
- }
- p++;
- }
}
return 0;
+
+invalid:
+ np = next_line(p);
+ assert(np != p);
+ if (np[-1] == '\n') {
+ np--;
+ }
+ error_setg(errp, "Invalid extent line: %.*s", (int)(np - p), p);
+ return -EINVAL;
}
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
- Error **errp)
+ QDict *options, Error **errp)
{
int ret;
char ct[128];
@@ -894,7 +928,8 @@
}
s->create_type = g_strdup(ct);
s->desc_offset = 0;
- ret = vmdk_parse_extents(buf, bs, bs->file->filename, errp);
+ ret = vmdk_parse_extents(buf, bs, bs->file->bs->exact_filename, options,
+ errp);
exit:
return ret;
}
@@ -902,7 +937,7 @@
static int vmdk_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
- char *buf = NULL;
+ char *buf;
int ret;
BDRVVmdkState *s = bs->opaque;
uint32_t magic;
@@ -916,11 +951,12 @@
switch (magic) {
case VMDK3_MAGIC:
case VMDK4_MAGIC:
- ret = vmdk_open_sparse(bs, bs->file, flags, buf, errp);
+ ret = vmdk_open_sparse(bs, bs->file, flags, buf, options,
+ errp);
s->desc_offset = 0x200;
break;
default:
- ret = vmdk_open_desc_file(bs, flags, buf, errp);
+ ret = vmdk_open_desc_file(bs, flags, buf, options, errp);
break;
}
if (ret) {
@@ -937,9 +973,9 @@
qemu_co_mutex_init(&s->lock);
/* Disable migration when VMDK images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "vmdk", bdrv_get_device_name(bs), "live migration");
+ error_setg(&s->migration_blocker, "The vmdk format used by node '%s' "
+ "does not support live migration",
+ bdrv_get_device_or_node_name(bs));
migrate_add_blocker(s->migration_blocker);
g_free(buf);
return 0;
@@ -960,9 +996,9 @@
for (i = 0; i < s->num_extents; i++) {
if (!s->extents[i].flat) {
- bs->bl.write_zeroes_alignment =
- MAX(bs->bl.write_zeroes_alignment,
- s->extents[i].cluster_sectors);
+ bs->bl.pwrite_zeroes_alignment =
+ MAX(bs->bl.pwrite_zeroes_alignment,
+ s->extents[i].cluster_sectors << BDRV_SECTOR_BITS);
}
}
}
@@ -979,71 +1015,71 @@
*/
static int get_whole_cluster(BlockDriverState *bs,
VmdkExtent *extent,
- uint64_t cluster_sector_num,
- uint64_t sector_num,
- uint64_t skip_start_sector,
- uint64_t skip_end_sector)
+ uint64_t cluster_offset,
+ uint64_t offset,
+ uint64_t skip_start_bytes,
+ uint64_t skip_end_bytes)
{
int ret = VMDK_OK;
int64_t cluster_bytes;
uint8_t *whole_grain;
/* For COW, align request sector_num to cluster start */
- sector_num = QEMU_ALIGN_DOWN(sector_num, extent->cluster_sectors);
cluster_bytes = extent->cluster_sectors << BDRV_SECTOR_BITS;
+ offset = QEMU_ALIGN_DOWN(offset, cluster_bytes);
whole_grain = qemu_blockalign(bs, cluster_bytes);
- if (!bs->backing_hd) {
- memset(whole_grain, 0, skip_start_sector << BDRV_SECTOR_BITS);
- memset(whole_grain + (skip_end_sector << BDRV_SECTOR_BITS), 0,
- cluster_bytes - (skip_end_sector << BDRV_SECTOR_BITS));
+ if (!bs->backing) {
+ memset(whole_grain, 0, skip_start_bytes);
+ memset(whole_grain + skip_end_bytes, 0, cluster_bytes - skip_end_bytes);
}
- assert(skip_end_sector <= extent->cluster_sectors);
+ assert(skip_end_bytes <= cluster_bytes);
/* we will be here if it's first write on non-exist grain(cluster).
* try to read from parent image, if exist */
- if (bs->backing_hd && !vmdk_is_cid_valid(bs)) {
+ if (bs->backing && !vmdk_is_cid_valid(bs)) {
ret = VMDK_ERROR;
goto exit;
}
/* Read backing data before skip range */
- if (skip_start_sector > 0) {
- if (bs->backing_hd) {
- ret = bdrv_read(bs->backing_hd, sector_num,
- whole_grain, skip_start_sector);
+ if (skip_start_bytes > 0) {
+ if (bs->backing) {
+ ret = bdrv_pread(bs->backing, offset, whole_grain,
+ skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file, cluster_sector_num, whole_grain,
- skip_start_sector);
+ ret = bdrv_pwrite(extent->file, cluster_offset, whole_grain,
+ skip_start_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
/* Read backing data after skip range */
- if (skip_end_sector < extent->cluster_sectors) {
- if (bs->backing_hd) {
- ret = bdrv_read(bs->backing_hd, sector_num + skip_end_sector,
- whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
- extent->cluster_sectors - skip_end_sector);
+ if (skip_end_bytes < cluster_bytes) {
+ if (bs->backing) {
+ ret = bdrv_pread(bs->backing, offset + skip_end_bytes,
+ whole_grain + skip_end_bytes,
+ cluster_bytes - skip_end_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
- ret = bdrv_write(extent->file, cluster_sector_num + skip_end_sector,
- whole_grain + (skip_end_sector << BDRV_SECTOR_BITS),
- extent->cluster_sectors - skip_end_sector);
+ ret = bdrv_pwrite(extent->file, cluster_offset + skip_end_bytes,
+ whole_grain + skip_end_bytes,
+ cluster_bytes - skip_end_bytes);
if (ret < 0) {
ret = VMDK_ERROR;
goto exit;
}
}
+ ret = VMDK_OK;
exit:
qemu_vfree(whole_grain);
return ret;
@@ -1054,8 +1090,7 @@
{
offset = cpu_to_le32(offset);
/* update L2 table */
- if (bdrv_pwrite_sync(
- extent->file,
+ if (bdrv_pwrite_sync(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1064,8 +1099,7 @@
/* update backup L2 table */
if (extent->l1_backup_table_offset != 0) {
m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
- if (bdrv_pwrite_sync(
- extent->file,
+ if (bdrv_pwrite_sync(extent->file,
((int64_t)m_data->l2_offset * 512)
+ (m_data->l2_index * sizeof(offset)),
&offset, sizeof(offset)) < 0) {
@@ -1105,8 +1139,8 @@
uint64_t offset,
bool allocate,
uint64_t *cluster_offset,
- uint64_t skip_start_sector,
- uint64_t skip_end_sector)
+ uint64_t skip_start_bytes,
+ uint64_t skip_end_bytes)
{
unsigned int l1_index, l2_offset, l2_index;
int min_index, i, j;
@@ -1154,8 +1188,7 @@
}
}
l2_table = extent->l2_cache + (min_index * extent->l2_size);
- if (bdrv_pread(
- extent->file,
+ if (bdrv_pread(extent->file,
(int64_t)l2_offset * 512,
l2_table,
extent->l2_size * sizeof(uint32_t)
@@ -1169,13 +1202,6 @@
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
cluster_sector = le32_to_cpu(l2_table[l2_index]);
- if (m_data) {
- m_data->valid = 1;
- m_data->l1_index = l1_index;
- m_data->l2_index = l2_index;
- m_data->l2_offset = l2_offset;
- m_data->l2_cache_entry = &l2_table[l2_index];
- }
if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
zeroed = true;
}
@@ -1193,13 +1219,18 @@
* This problem may occur because of insufficient space on host disk
* or inappropriate VM shutdown.
*/
- ret = get_whole_cluster(bs, extent,
- cluster_sector,
- offset >> BDRV_SECTOR_BITS,
- skip_start_sector, skip_end_sector);
+ ret = get_whole_cluster(bs, extent, cluster_sector * BDRV_SECTOR_SIZE,
+ offset, skip_start_bytes, skip_end_bytes);
if (ret) {
return ret;
}
+ if (m_data) {
+ m_data->valid = 1;
+ m_data->l1_index = l1_index;
+ m_data->l2_index = l2_index;
+ m_data->l2_offset = l2_offset;
+ m_data->l2_cache_entry = &l2_table[l2_index];
+ }
}
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
return VMDK_OK;
@@ -1222,8 +1253,28 @@
return NULL;
}
+static inline uint64_t vmdk_find_offset_in_cluster(VmdkExtent *extent,
+ int64_t offset)
+{
+ uint64_t extent_begin_offset, extent_relative_offset;
+ uint64_t cluster_size = extent->cluster_sectors * BDRV_SECTOR_SIZE;
+
+ extent_begin_offset =
+ (extent->end_sector - extent->sectors) * BDRV_SECTOR_SIZE;
+ extent_relative_offset = offset - extent_begin_offset;
+ return extent_relative_offset % cluster_size;
+}
+
+static inline uint64_t vmdk_find_index_in_cluster(VmdkExtent *extent,
+ int64_t sector_num)
+{
+ uint64_t offset;
+ offset = vmdk_find_offset_in_cluster(extent, sector_num * BDRV_SECTOR_SIZE);
+ return offset / BDRV_SECTOR_SIZE;
+}
+
static int64_t coroutine_fn vmdk_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int *pnum)
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
BDRVVmdkState *s = bs->opaque;
int64_t index_in_cluster, n, ret;
@@ -1240,6 +1291,7 @@
0, 0);
qemu_co_mutex_unlock(&s->lock);
+ index_in_cluster = vmdk_find_index_in_cluster(extent, sector_num);
switch (ret) {
case VMDK_ERROR:
ret = -EIO;
@@ -1252,14 +1304,15 @@
break;
case VMDK_OK:
ret = BDRV_BLOCK_DATA;
- if (extent->file == bs->file && !extent->compressed) {
- ret |= BDRV_BLOCK_OFFSET_VALID | offset;
+ if (!extent->compressed) {
+ ret |= BDRV_BLOCK_OFFSET_VALID;
+ ret |= (offset + (index_in_cluster << BDRV_SECTOR_BITS))
+ & BDRV_BLOCK_OFFSET_MASK;
}
-
+ *file = extent->file->bs;
break;
}
- index_in_cluster = sector_num % extent->cluster_sectors;
n = extent->cluster_sectors - index_in_cluster;
if (n > nb_sectors) {
n = nb_sectors;
@@ -1269,49 +1322,80 @@
}
static int vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
- int64_t offset_in_cluster, const uint8_t *buf,
- int nb_sectors, int64_t sector_num)
+ int64_t offset_in_cluster, QEMUIOVector *qiov,
+ uint64_t qiov_offset, uint64_t n_bytes,
+ uint64_t offset)
{
int ret;
VmdkGrainMarker *data = NULL;
uLongf buf_len;
- const uint8_t *write_buf = buf;
- int write_len = nb_sectors * 512;
+ QEMUIOVector local_qiov;
+ struct iovec iov;
+ int64_t write_offset;
+ int64_t write_end_sector;
if (extent->compressed) {
+ void *compressed_data;
+
if (!extent->has_marker) {
ret = -EINVAL;
goto out;
}
buf_len = (extent->cluster_sectors << 9) * 2;
data = g_malloc(buf_len + sizeof(VmdkGrainMarker));
- if (compress(data->data, &buf_len, buf, nb_sectors << 9) != Z_OK ||
- buf_len == 0) {
+
+ compressed_data = g_malloc(n_bytes);
+ qemu_iovec_to_buf(qiov, qiov_offset, compressed_data, n_bytes);
+ ret = compress(data->data, &buf_len, compressed_data, n_bytes);
+ g_free(compressed_data);
+
+ if (ret != Z_OK || buf_len == 0) {
ret = -EINVAL;
goto out;
}
- data->lba = sector_num;
+
+ data->lba = offset >> BDRV_SECTOR_BITS;
data->size = buf_len;
- write_buf = (uint8_t *)data;
- write_len = buf_len + sizeof(VmdkGrainMarker);
+
+ n_bytes = buf_len + sizeof(VmdkGrainMarker);
+ iov = (struct iovec) {
+ .iov_base = data,
+ .iov_len = n_bytes,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+ } else {
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
}
- ret = bdrv_pwrite(extent->file,
- cluster_offset + offset_in_cluster,
- write_buf,
- write_len);
- if (ret != write_len) {
- ret = ret < 0 ? ret : -EIO;
+
+ write_offset = cluster_offset + offset_in_cluster,
+ ret = bdrv_co_pwritev(extent->file, write_offset, n_bytes,
+ &local_qiov, 0);
+
+ write_end_sector = DIV_ROUND_UP(write_offset + n_bytes, BDRV_SECTOR_SIZE);
+
+ if (extent->compressed) {
+ extent->next_cluster_sector = write_end_sector;
+ } else {
+ extent->next_cluster_sector = MAX(extent->next_cluster_sector,
+ write_end_sector);
+ }
+
+ if (ret < 0) {
goto out;
}
ret = 0;
out:
g_free(data);
+ if (!extent->compressed) {
+ qemu_iovec_destroy(&local_qiov);
+ }
return ret;
}
static int vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
- int64_t offset_in_cluster, uint8_t *buf,
- int nb_sectors)
+ int64_t offset_in_cluster, QEMUIOVector *qiov,
+ int bytes)
{
int ret;
int cluster_bytes, buf_bytes;
@@ -1323,14 +1407,13 @@
if (!extent->compressed) {
- ret = bdrv_pread(extent->file,
- cluster_offset + offset_in_cluster,
- buf, nb_sectors * 512);
- if (ret == nb_sectors * 512) {
- return 0;
- } else {
- return -EIO;
+ ret = bdrv_co_preadv(extent->file,
+ cluster_offset + offset_in_cluster, bytes,
+ qiov, 0);
+ if (ret < 0) {
+ return ret;
}
+ return 0;
}
cluster_bytes = extent->cluster_sectors * 512;
/* Read two clusters in case GrainMarker + compressed data > one cluster */
@@ -1362,11 +1445,11 @@
}
if (offset_in_cluster < 0 ||
- offset_in_cluster + nb_sectors * 512 > buf_len) {
+ offset_in_cluster + bytes > buf_len) {
ret = -EINVAL;
goto out;
}
- memcpy(buf, uncomp_buf + offset_in_cluster, nb_sectors * 512);
+ qemu_iovec_from_buf(qiov, 0, uncomp_buf + offset_in_cluster, bytes);
ret = 0;
out:
@@ -1375,67 +1458,73 @@
return ret;
}
-static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVmdkState *s = bs->opaque;
int ret;
- uint64_t n, index_in_cluster;
- uint64_t extent_begin_sector, extent_relative_sector_num;
+ uint64_t n_bytes, offset_in_cluster;
VmdkExtent *extent = NULL;
+ QEMUIOVector local_qiov;
uint64_t cluster_offset;
+ uint64_t bytes_done = 0;
- while (nb_sectors > 0) {
- extent = find_extent(s, sector_num, extent);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+ qemu_co_mutex_lock(&s->lock);
+
+ while (bytes > 0) {
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
if (!extent) {
- return -EIO;
+ ret = -EIO;
+ goto fail;
}
ret = get_cluster_offset(bs, extent, NULL,
- sector_num << 9, false, &cluster_offset,
- 0, 0);
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
+ offset, false, &cluster_offset, 0, 0);
+ offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+
+ n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+ - offset_in_cluster);
+
if (ret != VMDK_OK) {
/* if not allocated, try to read from parent image, if exist */
- if (bs->backing_hd && ret != VMDK_ZEROED) {
+ if (bs->backing && ret != VMDK_ZEROED) {
if (!vmdk_is_cid_valid(bs)) {
- return -EINVAL;
+ ret = -EINVAL;
+ goto fail;
}
- ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
+ &local_qiov, 0);
if (ret < 0) {
- return ret;
+ goto fail;
}
} else {
- memset(buf, 0, 512 * n);
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
}
} else {
- ret = vmdk_read_extent(extent,
- cluster_offset, index_in_cluster * 512,
- buf, n);
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = vmdk_read_extent(extent, cluster_offset, offset_in_cluster,
+ &local_qiov, n_bytes);
if (ret) {
- return ret;
+ goto fail;
}
}
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVmdkState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vmdk_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
qemu_co_mutex_unlock(&s->lock);
+ qemu_iovec_destroy(&local_qiov);
+
return ret;
}
@@ -1449,41 +1538,38 @@
*
* Returns: error code with 0 for success.
*/
-static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors,
- bool zeroed, bool zero_dry_run)
+static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
+ uint64_t bytes, QEMUIOVector *qiov,
+ bool zeroed, bool zero_dry_run)
{
BDRVVmdkState *s = bs->opaque;
VmdkExtent *extent = NULL;
int ret;
- int64_t index_in_cluster, n;
- uint64_t extent_begin_sector, extent_relative_sector_num;
+ int64_t offset_in_cluster, n_bytes;
uint64_t cluster_offset;
+ uint64_t bytes_done = 0;
VmdkMetaData m_data;
- if (sector_num > bs->total_sectors) {
- error_report("Wrong offset: sector_num=0x%" PRIx64
- " total_sectors=0x%" PRIx64 "\n",
- sector_num, bs->total_sectors);
+ if (DIV_ROUND_UP(offset, BDRV_SECTOR_SIZE) > bs->total_sectors) {
+ error_report("Wrong offset: offset=0x%" PRIx64
+ " total_sectors=0x%" PRIx64,
+ offset, bs->total_sectors);
return -EIO;
}
- while (nb_sectors > 0) {
- extent = find_extent(s, sector_num, extent);
+ while (bytes > 0) {
+ extent = find_extent(s, offset >> BDRV_SECTOR_BITS, extent);
if (!extent) {
return -EIO;
}
- extent_begin_sector = extent->end_sector - extent->sectors;
- extent_relative_sector_num = sector_num - extent_begin_sector;
- index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
- n = extent->cluster_sectors - index_in_cluster;
- if (n > nb_sectors) {
- n = nb_sectors;
- }
- ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
+ n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
+ - offset_in_cluster);
+
+ ret = get_cluster_offset(bs, extent, &m_data, offset,
!(extent->compressed || zeroed),
- &cluster_offset,
- index_in_cluster, index_in_cluster + n);
+ &cluster_offset, offset_in_cluster,
+ offset_in_cluster + n_bytes);
if (extent->compressed) {
if (ret == VMDK_OK) {
/* Refuse write to allocated cluster for streamOptimized */
@@ -1492,7 +1578,7 @@
return -EIO;
} else {
/* allocate */
- ret = get_cluster_offset(bs, extent, &m_data, sector_num << 9,
+ ret = get_cluster_offset(bs, extent, &m_data, offset,
true, &cluster_offset, 0, 0);
}
}
@@ -1502,9 +1588,9 @@
if (zeroed) {
/* Do zeroed write, buf is ignored */
if (extent->has_zero_grain &&
- index_in_cluster == 0 &&
- n >= extent->cluster_sectors) {
- n = extent->cluster_sectors;
+ offset_in_cluster == 0 &&
+ n_bytes >= extent->cluster_sectors * BDRV_SECTOR_SIZE) {
+ n_bytes = extent->cluster_sectors * BDRV_SECTOR_SIZE;
if (!zero_dry_run) {
/* update L2 tables */
if (vmdk_L2update(extent, &m_data, VMDK_GTE_ZEROED)
@@ -1516,9 +1602,8 @@
return -ENOTSUP;
}
} else {
- ret = vmdk_write_extent(extent,
- cluster_offset, index_in_cluster * 512,
- buf, n, sector_num);
+ ret = vmdk_write_extent(extent, cluster_offset, offset_in_cluster,
+ qiov, bytes_done, n_bytes, offset);
if (ret) {
return ret;
}
@@ -1531,14 +1616,14 @@
}
}
}
- nb_sectors -= n;
- sector_num += n;
- buf += n * 512;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
/* update CID on the first write every time the virtual disk is
* opened */
if (!s->cid_updated) {
- ret = vmdk_write_cid(bs, time(NULL));
+ ret = vmdk_write_cid(bs, g_random_int());
if (ret < 0) {
return ret;
}
@@ -1548,43 +1633,84 @@
return 0;
}
-static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vmdk_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
qemu_co_mutex_lock(&s->lock);
- ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ ret = vmdk_pwritev(bs, offset, bytes, qiov, false, false);
qemu_co_mutex_unlock(&s->lock);
return ret;
}
+typedef struct VmdkWriteCompressedCo {
+ BlockDriverState *bs;
+ int64_t sector_num;
+ const uint8_t *buf;
+ int nb_sectors;
+ int ret;
+} VmdkWriteCompressedCo;
+
+static void vmdk_co_write_compressed(void *opaque)
+{
+ VmdkWriteCompressedCo *co = opaque;
+ QEMUIOVector local_qiov;
+ uint64_t offset = co->sector_num * BDRV_SECTOR_SIZE;
+ uint64_t bytes = co->nb_sectors * BDRV_SECTOR_SIZE;
+
+ struct iovec iov = (struct iovec) {
+ .iov_base = (uint8_t*) co->buf,
+ .iov_len = bytes,
+ };
+ qemu_iovec_init_external(&local_qiov, &iov, 1);
+
+ co->ret = vmdk_pwritev(co->bs, offset, bytes, &local_qiov, false, false);
+}
+
static int vmdk_write_compressed(BlockDriverState *bs,
int64_t sector_num,
const uint8_t *buf,
int nb_sectors)
{
BDRVVmdkState *s = bs->opaque;
+
if (s->num_extents == 1 && s->extents[0].compressed) {
- return vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+ Coroutine *co;
+ AioContext *aio_context = bdrv_get_aio_context(bs);
+ VmdkWriteCompressedCo data = {
+ .bs = bs,
+ .sector_num = sector_num,
+ .buf = buf,
+ .nb_sectors = nb_sectors,
+ .ret = -EINPROGRESS,
+ };
+ co = qemu_coroutine_create(vmdk_co_write_compressed, &data);
+ qemu_coroutine_enter(co);
+ while (data.ret == -EINPROGRESS) {
+ aio_poll(aio_context, true);
+ }
+ return data.ret;
} else {
return -ENOTSUP;
}
}
-static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
- int64_t sector_num,
- int nb_sectors,
- BdrvRequestFlags flags)
+static int coroutine_fn vmdk_co_pwrite_zeroes(BlockDriverState *bs,
+ int64_t offset,
+ int bytes,
+ BdrvRequestFlags flags)
{
int ret;
BDRVVmdkState *s = bs->opaque;
+
qemu_co_mutex_lock(&s->lock);
/* write zeroes could fail if sectors not aligned to cluster, test it with
* dry_run == true before really updating image */
- ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+ ret = vmdk_pwritev(bs, offset, bytes, NULL, true, true);
if (!ret) {
- ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+ ret = vmdk_pwritev(bs, offset, bytes, NULL, true, false);
}
qemu_co_mutex_unlock(&s->lock);
return ret;
@@ -1595,7 +1721,7 @@
QemuOpts *opts, Error **errp)
{
int ret, i;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
VMDK4Header header;
Error *local_err = NULL;
uint32_t tmp, magic, grains, gd_sectors, gt_size, gt_count;
@@ -1608,16 +1734,18 @@
goto exit;
}
- assert(bs == NULL);
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
if (flat) {
- ret = bdrv_truncate(bs, filesize);
+ ret = blk_truncate(blk, filesize);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
@@ -1625,7 +1753,13 @@
}
magic = cpu_to_be32(VMDK4_MAGIC);
memset(&header, 0, sizeof(header));
- header.version = zeroed_grain ? 2 : 1;
+ if (compress) {
+ header.version = 3;
+ } else if (zeroed_grain) {
+ header.version = 2;
+ } else {
+ header.version = 1;
+ }
header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
| (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
| (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
@@ -1666,18 +1800,18 @@
header.check_bytes[3] = 0xa;
/* write all the data */
- ret = bdrv_pwrite(bs, 0, &magic, sizeof(magic));
+ ret = blk_pwrite(blk, 0, &magic, sizeof(magic), 0);
if (ret < 0) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_pwrite(bs, sizeof(magic), &header, sizeof(header));
+ ret = blk_pwrite(blk, sizeof(magic), &header, sizeof(header), 0);
if (ret < 0) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
goto exit;
}
- ret = bdrv_truncate(bs, le64_to_cpu(header.grain_offset) << 9);
+ ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
goto exit;
@@ -1690,10 +1824,10 @@
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size, 0);
if (ret < 0) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
goto exit;
}
@@ -1702,17 +1836,17 @@
i < gt_count; i++, tmp += gt_size) {
gd_buf[i] = cpu_to_le32(tmp);
}
- ret = bdrv_pwrite(bs, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
- gd_buf, gd_buf_size);
+ ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
+ gd_buf, gd_buf_size, 0);
if (ret < 0) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
goto exit;
}
ret = 0;
exit:
- if (bs) {
- bdrv_unref(bs);
+ if (blk) {
+ blk_unref(blk);
}
g_free(gd_buf);
return ret;
@@ -1761,21 +1895,26 @@
static int vmdk_create(const char *filename, QemuOpts *opts, Error **errp)
{
int idx = 0;
- BlockDriverState *new_bs = NULL;
+ BlockBackend *new_blk = NULL;
Error *local_err = NULL;
char *desc = NULL;
int64_t total_size = 0, filesize;
char *adapter_type = NULL;
char *backing_file = NULL;
+ char *hw_version = NULL;
char *fmt = NULL;
- int flags = 0;
int ret = 0;
bool flat, split, compress;
GString *ext_desc_lines;
- char path[PATH_MAX], prefix[PATH_MAX], postfix[PATH_MAX];
+ char *path = g_malloc0(PATH_MAX);
+ char *prefix = g_malloc0(PATH_MAX);
+ char *postfix = g_malloc0(PATH_MAX);
+ char *desc_line = g_malloc0(BUF_SIZE);
+ char *ext_filename = g_malloc0(PATH_MAX);
+ char *desc_filename = g_malloc0(PATH_MAX);
const int64_t split_size = 0x80000000; /* VMDK has constant split size */
const char *desc_extent_line;
- char parent_desc_line[BUF_SIZE] = "";
+ char *parent_desc_line = g_malloc0(BUF_SIZE);
uint32_t parent_cid = 0xffffffff;
uint32_t number_heads = 16;
bool zeroed_grain = false;
@@ -1794,7 +1933,7 @@
"# The Disk Data Base\n"
"#DDB\n"
"\n"
- "ddb.virtualHWVersion = \"%d\"\n"
+ "ddb.virtualHWVersion = \"%s\"\n"
"ddb.geometry.cylinders = \"%" PRId64 "\"\n"
"ddb.geometry.heads = \"%" PRIu32 "\"\n"
"ddb.geometry.sectors = \"63\"\n"
@@ -1811,8 +1950,20 @@
BDRV_SECTOR_SIZE);
adapter_type = qemu_opt_get_del(opts, BLOCK_OPT_ADAPTER_TYPE);
backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+ hw_version = qemu_opt_get_del(opts, BLOCK_OPT_HWVERSION);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_COMPAT6, false)) {
- flags |= BLOCK_FLAG_COMPAT6;
+ if (strcmp(hw_version, "undefined")) {
+ error_setg(errp,
+ "compat6 cannot be enabled with hwversion set");
+ ret = -EINVAL;
+ goto exit;
+ }
+ g_free(hw_version);
+ hw_version = g_strdup("6");
+ }
+ if (strcmp(hw_version, "undefined") == 0) {
+ g_free(hw_version);
+ hw_version = g_strdup("4");
}
fmt = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
if (qemu_opt_get_bool_del(opts, BLOCK_OPT_ZEROED_GRAIN, false)) {
@@ -1867,46 +2018,53 @@
goto exit;
}
if (backing_file) {
- BlockDriverState *bs = NULL;
- ret = bdrv_open(&bs, backing_file, NULL, NULL, BDRV_O_NO_BACKING, NULL,
- errp);
- if (ret != 0) {
+ BlockBackend *blk;
+ char *full_backing = g_new0(char, PATH_MAX);
+ bdrv_get_full_backing_filename_from_filename(filename, backing_file,
+ full_backing, PATH_MAX,
+ &local_err);
+ if (local_err) {
+ g_free(full_backing);
+ error_propagate(errp, local_err);
+ ret = -ENOENT;
goto exit;
}
- if (strcmp(bs->drv->format_name, "vmdk")) {
- bdrv_unref(bs);
+
+ blk = blk_new_open(full_backing, NULL, NULL,
+ BDRV_O_NO_BACKING, errp);
+ g_free(full_backing);
+ if (blk == NULL) {
+ ret = -EIO;
+ goto exit;
+ }
+ if (strcmp(blk_bs(blk)->drv->format_name, "vmdk")) {
+ blk_unref(blk);
ret = -EINVAL;
goto exit;
}
- parent_cid = vmdk_read_cid(bs, 0);
- bdrv_unref(bs);
- snprintf(parent_desc_line, sizeof(parent_desc_line),
+ parent_cid = vmdk_read_cid(blk_bs(blk), 0);
+ blk_unref(blk);
+ snprintf(parent_desc_line, BUF_SIZE,
"parentFileNameHint=\"%s\"", backing_file);
}
/* Create extents */
filesize = total_size;
while (filesize > 0) {
- char desc_line[BUF_SIZE];
- char ext_filename[PATH_MAX];
- char desc_filename[PATH_MAX];
int64_t size = filesize;
if (split && size > split_size) {
size = split_size;
}
if (split) {
- snprintf(desc_filename, sizeof(desc_filename), "%s-%c%03d%s",
+ snprintf(desc_filename, PATH_MAX, "%s-%c%03d%s",
prefix, flat ? 'f' : 's', ++idx, postfix);
} else if (flat) {
- snprintf(desc_filename, sizeof(desc_filename), "%s-flat%s",
- prefix, postfix);
+ snprintf(desc_filename, PATH_MAX, "%s-flat%s", prefix, postfix);
} else {
- snprintf(desc_filename, sizeof(desc_filename), "%s%s",
- prefix, postfix);
+ snprintf(desc_filename, PATH_MAX, "%s%s", prefix, postfix);
}
- snprintf(ext_filename, sizeof(ext_filename), "%s%s",
- path, desc_filename);
+ snprintf(ext_filename, PATH_MAX, "%s%s", path, desc_filename);
if (vmdk_create_extent(ext_filename, size,
flat, compress, zeroed_grain, opts, errp)) {
@@ -1916,18 +2074,18 @@
filesize -= size;
/* Format description line */
- snprintf(desc_line, sizeof(desc_line),
+ snprintf(desc_line, BUF_SIZE,
desc_extent_line, size / BDRV_SECTOR_SIZE, desc_filename);
g_string_append(ext_desc_lines, desc_line);
}
/* generate descriptor file */
desc = g_strdup_printf(desc_template,
- (uint32_t)time(NULL),
+ g_random_int(),
parent_cid,
fmt,
parent_desc_line,
ext_desc_lines->str,
- (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
+ hw_version,
total_size /
(int64_t)(63 * number_heads * BDRV_SECTOR_SIZE),
number_heads,
@@ -1943,14 +2101,18 @@
goto exit;
}
}
- assert(new_bs == NULL);
- ret = bdrv_open(&new_bs, filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err);
- if (ret < 0) {
+
+ new_blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (new_blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto exit;
}
- ret = bdrv_pwrite(new_bs, desc_offset, desc, desc_len);
+
+ blk_set_allow_write_beyond_eof(new_blk, true);
+
+ ret = blk_pwrite(new_blk, desc_offset, desc, desc_len, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not write description");
goto exit;
@@ -1958,19 +2120,27 @@
/* bdrv_pwrite write padding zeros to align to sector, we don't need that
* for description file */
if (desc_offset == 0) {
- ret = bdrv_truncate(new_bs, desc_len);
+ ret = blk_truncate(new_blk, desc_len);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not truncate file");
}
}
exit:
- if (new_bs) {
- bdrv_unref(new_bs);
+ if (new_blk) {
+ blk_unref(new_blk);
}
g_free(adapter_type);
g_free(backing_file);
+ g_free(hw_version);
g_free(fmt);
g_free(desc);
+ g_free(path);
+ g_free(prefix);
+ g_free(postfix);
+ g_free(desc_line);
+ g_free(ext_filename);
+ g_free(desc_filename);
+ g_free(parent_desc_line);
g_string_free(ext_desc_lines, true);
return ret;
}
@@ -1993,7 +2163,7 @@
int ret = 0;
for (i = 0; i < s->num_extents; i++) {
- err = bdrv_co_flush(s->extents[i].file);
+ err = bdrv_co_flush(s->extents[i].file->bs);
if (err < 0) {
ret = err;
}
@@ -2008,7 +2178,7 @@
int64_t r;
BDRVVmdkState *s = bs->opaque;
- ret = bdrv_get_allocated_file_size(bs->file);
+ ret = bdrv_get_allocated_file_size(bs->file->bs);
if (ret < 0) {
return ret;
}
@@ -2016,7 +2186,7 @@
if (s->extents[i].file == bs->file) {
continue;
}
- r = bdrv_get_allocated_file_size(s->extents[i].file);
+ r = bdrv_get_allocated_file_size(s->extents[i].file->bs);
if (r < 0) {
return r;
}
@@ -2034,7 +2204,7 @@
* return 0. */
for (i = 0; i < s->num_extents; i++) {
if (s->extents[i].flat) {
- if (!bdrv_has_zero_init(s->extents[i].file)) {
+ if (!bdrv_has_zero_init(s->extents[i].file->bs)) {
return 0;
}
}
@@ -2047,7 +2217,7 @@
ImageInfo *info = g_new0(ImageInfo, 1);
*info = (ImageInfo){
- .filename = g_strdup(extent->file->filename),
+ .filename = g_strdup(extent->file->bs->filename),
.format = g_strdup(extent->type),
.virtual_size = extent->sectors * BDRV_SECTOR_SIZE,
.compressed = extent->compressed,
@@ -2093,7 +2263,9 @@
PRId64 "\n", sector_num);
break;
}
- if (ret == VMDK_OK && cluster_offset >= bdrv_getlength(extent->file)) {
+ if (ret == VMDK_OK &&
+ cluster_offset >= bdrv_getlength(extent->file->bs))
+ {
fprintf(stderr,
"ERROR: cluster offset for sector %"
PRId64 " points after EOF\n", sector_num);
@@ -2114,19 +2286,19 @@
ImageInfoList **next;
*spec_info = (ImageInfoSpecific){
- .kind = IMAGE_INFO_SPECIFIC_KIND_VMDK,
- {
- .vmdk = g_new0(ImageInfoSpecificVmdk, 1),
+ .type = IMAGE_INFO_SPECIFIC_KIND_VMDK,
+ .u = {
+ .vmdk.data = g_new0(ImageInfoSpecificVmdk, 1),
},
};
- *spec_info->vmdk = (ImageInfoSpecificVmdk) {
+ *spec_info->u.vmdk.data = (ImageInfoSpecificVmdk) {
.create_type = g_strdup(s->create_type),
.cid = s->cid,
.parent_cid = s->parent_cid,
};
- next = &spec_info->vmdk->extents;
+ next = &spec_info->u.vmdk.data->extents;
for (i = 0; i < s->num_extents; i++) {
*next = g_new0(ImageInfoList, 1);
(*next)->value = vmdk_get_extent_info(&s->extents[i]);
@@ -2163,27 +2335,6 @@
return 0;
}
-static void vmdk_detach_aio_context(BlockDriverState *bs)
-{
- BDRVVmdkState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_extents; i++) {
- bdrv_detach_aio_context(s->extents[i].file);
- }
-}
-
-static void vmdk_attach_aio_context(BlockDriverState *bs,
- AioContext *new_context)
-{
- BDRVVmdkState *s = bs->opaque;
- int i;
-
- for (i = 0; i < s->num_extents; i++) {
- bdrv_attach_aio_context(s->extents[i].file, new_context);
- }
-}
-
static QemuOptsList vmdk_create_opts = {
.name = "vmdk-create-opts",
.head = QTAILQ_HEAD_INITIALIZER(vmdk_create_opts.head),
@@ -2211,6 +2362,12 @@
.def_value_str = "off"
},
{
+ .name = BLOCK_OPT_HWVERSION,
+ .type = QEMU_OPT_STRING,
+ .help = "VMDK hardware version",
+ .def_value_str = "undefined"
+ },
+ {
.name = BLOCK_OPT_SUBFMT,
.type = QEMU_OPT_STRING,
.help =
@@ -2234,10 +2391,10 @@
.bdrv_open = vmdk_open,
.bdrv_check = vmdk_check,
.bdrv_reopen_prepare = vmdk_reopen_prepare,
- .bdrv_read = vmdk_co_read,
- .bdrv_write = vmdk_co_write,
+ .bdrv_co_preadv = vmdk_co_preadv,
+ .bdrv_co_pwritev = vmdk_co_pwritev,
.bdrv_write_compressed = vmdk_write_compressed,
- .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
+ .bdrv_co_pwrite_zeroes = vmdk_co_pwrite_zeroes,
.bdrv_close = vmdk_close,
.bdrv_create = vmdk_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
@@ -2247,8 +2404,6 @@
.bdrv_get_specific_info = vmdk_get_specific_info,
.bdrv_refresh_limits = vmdk_refresh_limits,
.bdrv_get_info = vmdk_get_info,
- .bdrv_detach_aio_context = vmdk_detach_aio_context,
- .bdrv_attach_aio_context = vmdk_attach_aio_context,
.supports_backing = true,
.create_opts = &vmdk_create_opts,
diff --git a/block/vpc.c b/block/vpc.c
index 38c4f02..43707ed 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -22,10 +22,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "qemu-common.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
#include "qemu/module.h"
#include "migration/migration.h"
+#include "qemu/bswap.h"
#if defined(CONFIG_UUID)
#include <uuid/uuid.h>
#endif
@@ -42,30 +46,37 @@
VHD_DIFFERENCING = 4,
};
-// Seconds since Jan 1, 2000 0:00:00 (UTC)
+/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
#define VHD_TIMESTAMP_BASE 946684800
-#define VHD_MAX_SECTORS (65535LL * 255 * 255)
+#define VHD_CHS_MAX_C 65535LL
+#define VHD_CHS_MAX_H 16
+#define VHD_CHS_MAX_S 255
-// always big-endian
+#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
+#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
+
+#define VPC_OPT_FORCE_SIZE "force_size"
+
+/* always big-endian */
typedef struct vhd_footer {
- char creator[8]; // "conectix"
+ char creator[8]; /* "conectix" */
uint32_t features;
uint32_t version;
- // Offset of next header structure, 0xFFFFFFFF if none
+ /* Offset of next header structure, 0xFFFFFFFF if none */
uint64_t data_offset;
- // Seconds since Jan 1, 2000 0:00:00 (UTC)
+ /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
uint32_t timestamp;
- char creator_app[4]; // "vpc "
+ char creator_app[4]; /* e.g., "vpc " */
uint16_t major;
uint16_t minor;
- char creator_os[4]; // "Wi2k"
+ char creator_os[4]; /* "Wi2k" */
uint64_t orig_size;
- uint64_t size;
+ uint64_t current_size;
uint16_t cyls;
uint8_t heads;
@@ -73,29 +84,29 @@
uint32_t type;
- // Checksum of the Hard Disk Footer ("one's complement of the sum of all
- // the bytes in the footer without the checksum field")
+ /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
+ the bytes in the footer without the checksum field") */
uint32_t checksum;
- // UUID used to identify a parent hard disk (backing file)
+ /* UUID used to identify a parent hard disk (backing file) */
uint8_t uuid[16];
uint8_t in_saved_state;
} QEMU_PACKED VHDFooter;
typedef struct vhd_dyndisk_header {
- char magic[8]; // "cxsparse"
+ char magic[8]; /* "cxsparse" */
- // Offset of next header structure, 0xFFFFFFFF if none
+ /* Offset of next header structure, 0xFFFFFFFF if none */
uint64_t data_offset;
- // Offset of the Block Allocation Table (BAT)
+ /* Offset of the Block Allocation Table (BAT) */
uint64_t table_offset;
uint32_t version;
- uint32_t max_table_entries; // 32bit/entry
+ uint32_t max_table_entries; /* 32bit/entry */
- // 2 MB by default, must be a power of two
+ /* 2 MB by default, must be a power of two */
uint32_t block_size;
uint32_t checksum;
@@ -103,7 +114,7 @@
uint32_t parent_timestamp;
uint32_t reserved;
- // Backing file name (in UTF-16)
+ /* Backing file name (in UTF-16) */
uint8_t parent_name[512];
struct {
@@ -126,6 +137,8 @@
uint32_t block_size;
uint32_t bitmap_size;
+ bool force_use_chs;
+ bool force_use_sz;
#ifdef CACHE
uint8_t *pageentry_u8;
@@ -138,6 +151,22 @@
Error *migration_blocker;
} BDRVVPCState;
+#define VPC_OPT_SIZE_CALC "force_size_calc"
+static QemuOptsList vpc_runtime_opts = {
+ .name = "vpc-runtime-opts",
+ .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
+ .desc = {
+ {
+ .name = VPC_OPT_SIZE_CALC,
+ .type = QEMU_OPT_STRING,
+ .help = "Force disk size calculation to use either CHS geometry, "
+ "or use the disk current_size specified in the VHD footer. "
+ "{chs, current_size}"
+ },
+ { /* end of list */ }
+ }
+};
+
static uint32_t vpc_checksum(uint8_t* buf, size_t size)
{
uint32_t res = 0;
@@ -157,6 +186,25 @@
return 0;
}
+static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
+ Error **errp)
+{
+ BDRVVPCState *s = bs->opaque;
+ const char *size_calc;
+
+ size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
+
+ if (!size_calc) {
+ /* no override, use autodetect only */
+ } else if (!strcmp(size_calc, "current_size")) {
+ s->force_use_sz = true;
+ } else if (!strcmp(size_calc, "chs")) {
+ s->force_use_chs = true;
+ } else {
+ error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
+ }
+}
+
static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@@ -164,25 +212,47 @@
int i;
VHDFooter *footer;
VHDDynDiskHeader *dyndisk_header;
+ QemuOpts *opts = NULL;
+ Error *local_err = NULL;
+ bool use_chs;
uint8_t buf[HEADER_SIZE];
uint32_t checksum;
uint64_t computed_size;
+ uint64_t pagetable_size;
int disk_type = VHD_DYNAMIC;
int ret;
+ opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
+ qemu_opts_absorb_qdict(opts, options, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ vpc_parse_options(bs, opts, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
+
ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
if (ret < 0) {
+ error_setg(errp, "Unable to read VHD header");
goto fail;
}
footer = (VHDFooter *) s->footer_buf;
if (strncmp(footer->creator, "conectix", 8)) {
- int64_t offset = bdrv_getlength(bs->file);
+ int64_t offset = bdrv_getlength(bs->file->bs);
if (offset < 0) {
ret = offset;
+ error_setg(errp, "Invalid file size");
goto fail;
} else if (offset < HEADER_SIZE) {
ret = -EINVAL;
+ error_setg(errp, "File too small for a VHD header");
goto fail;
}
@@ -209,23 +279,50 @@
/* Write 'checksum' back to footer, or else will leave it with zero. */
footer->checksum = cpu_to_be32(checksum);
- // The visible size of a image in Virtual PC depends on the geometry
- // rather than on the size stored in the footer (the size in the footer
- // is too large usually)
+ /* The visible size of a image in Virtual PC depends on the geometry
+ rather than on the size stored in the footer (the size in the footer
+ is too large usually) */
bs->total_sectors = (int64_t)
be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
- /* images created with disk2vhd report a far higher virtual size
- * than expected with the cyls * heads * sectors_per_cyl formula.
- * use the footer->size instead if the image was created with
- * disk2vhd.
+ /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
+ * VHD image sizes differently. VPC will rely on CHS geometry,
+ * while Hyper-V and disk2vhd use the size specified in the footer.
+ *
+ * We use a couple of approaches to try and determine the correct method:
+ * look at the Creator App field, and look for images that have CHS
+ * geometry that is the maximum value.
+ *
+ * If the CHS geometry is the maximum CHS geometry, then we assume that
+ * the size is the footer->current_size to avoid truncation. Otherwise,
+ * we follow the table based on footer->creator_app:
+ *
+ * Known creator apps:
+ * 'vpc ' : CHS Virtual PC (uses disk geometry)
+ * 'qemu' : CHS QEMU (uses disk geometry)
+ * 'qem2' : current_size QEMU (uses current_size)
+ * 'win ' : current_size Hyper-V
+ * 'd2v ' : current_size Disk2vhd
+ * 'tap\0' : current_size XenServer
+ * 'CTXS' : current_size XenConverter
+ *
+ * The user can override the table values via drive options, however
+ * even with an override we will still use current_size for images
+ * that have CHS geometry of the maximum size.
*/
- if (!strncmp(footer->creator_app, "d2v", 4)) {
- bs->total_sectors = be64_to_cpu(footer->size) / BDRV_SECTOR_SIZE;
+ use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
+ !!strncmp(footer->creator_app, "qem2", 4) &&
+ !!strncmp(footer->creator_app, "d2v ", 4) &&
+ !!strncmp(footer->creator_app, "CTXS", 4) &&
+ !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
+
+ if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
+ bs->total_sectors = be64_to_cpu(footer->current_size) /
+ BDRV_SECTOR_SIZE;
}
- /* Allow a maximum disk size of approximately 2 TB */
- if (bs->total_sectors >= VHD_MAX_SECTORS) {
+ /* Allow a maximum disk size of 2040 GiB */
+ if (bs->total_sectors > VHD_MAX_SECTORS) {
ret = -EFBIG;
goto fail;
}
@@ -234,12 +331,14 @@
ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
HEADER_SIZE);
if (ret < 0) {
+ error_setg(errp, "Error reading dynamic VHD header");
goto fail;
}
dyndisk_header = (VHDDynDiskHeader *) buf;
if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
+ error_setg(errp, "Invalid header magic");
ret = -EINVAL;
goto fail;
}
@@ -255,22 +354,31 @@
s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
- ret = -EINVAL;
- goto fail;
- }
- if (s->max_table_entries > (VHD_MAX_SECTORS * 512) / s->block_size) {
+ error_setg(errp, "Too many blocks");
ret = -EINVAL;
goto fail;
}
computed_size = (uint64_t) s->max_table_entries * s->block_size;
if (computed_size < bs->total_sectors * 512) {
+ error_setg(errp, "Page table too small");
ret = -EINVAL;
goto fail;
}
- s->pagetable = qemu_try_blockalign(bs->file, s->max_table_entries * 4);
+ if (s->max_table_entries > SIZE_MAX / 4 ||
+ s->max_table_entries > (int) INT_MAX / 4) {
+ error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
+ s->max_table_entries);
+ ret = -EINVAL;
+ goto fail;
+ }
+
+ pagetable_size = (uint64_t) s->max_table_entries * 4;
+
+ s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
if (s->pagetable == NULL) {
+ error_setg(errp, "Unable to allocate memory for page table");
ret = -ENOMEM;
goto fail;
}
@@ -278,13 +386,14 @@
s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
- s->max_table_entries * 4);
+ pagetable_size);
if (ret < 0) {
+ error_setg(errp, "Error reading pagetable");
goto fail;
}
s->free_data_block_offset =
- (s->bat_offset + (s->max_table_entries * 4) + 511) & ~511;
+ ROUND_UP(s->bat_offset + pagetable_size, 512);
for (i = 0; i < s->max_table_entries; i++) {
be32_to_cpus(&s->pagetable[i]);
@@ -298,7 +407,7 @@
}
}
- if (s->free_data_block_offset > bdrv_getlength(bs->file)) {
+ if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
error_setg(errp, "block-vpc: free_data_block_offset points after "
"the end of file. The image has been truncated.");
ret = -EINVAL;
@@ -318,9 +427,9 @@
qemu_co_mutex_init(&s->lock);
/* Disable migration when VHD images are used */
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "vpc", bdrv_get_device_name(bs), "live migration");
+ error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
+ "does not support live migration",
+ bdrv_get_device_or_node_name(bs));
migrate_add_blocker(s->migration_blocker);
return 0;
@@ -346,28 +455,27 @@
* The parameter write must be 1 if the offset will be used for a write
* operation (the block bitmaps is updated then), 0 otherwise.
*/
-static inline int64_t get_sector_offset(BlockDriverState *bs,
- int64_t sector_num, int write)
+static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
+ bool write)
{
BDRVVPCState *s = bs->opaque;
- uint64_t offset = sector_num * 512;
uint64_t bitmap_offset, block_offset;
- uint32_t pagetable_index, pageentry_index;
+ uint32_t pagetable_index, offset_in_block;
pagetable_index = offset / s->block_size;
- pageentry_index = (offset % s->block_size) / 512;
+ offset_in_block = offset % s->block_size;
if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
- return -1; // not allocated
+ return -1; /* not allocated */
bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
- block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
+ block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
- // We must ensure that we don't write to any sectors which are marked as
- // unused in the bitmap. We get away with setting all bits in the block
- // bitmap each time we write to a new block. This might cause Virtual PC to
- // miss sparse read optimization, but it's not a problem in terms of
- // correctness.
+ /* We must ensure that we don't write to any sectors which are marked as
+ unused in the bitmap. We get away with setting all bits in the block
+ bitmap each time we write to a new block. This might cause Virtual PC to
+ miss sparse read optimization, but it's not a problem in terms of
+ correctness. */
if (write && (s->last_bitmap_offset != bitmap_offset)) {
uint8_t bitmap[s->bitmap_size];
@@ -376,41 +484,15 @@
bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
}
-// printf("sector: %" PRIx64 ", index: %x, offset: %x, bioff: %" PRIx64 ", bloff: %" PRIx64 "\n",
-// sector_num, pagetable_index, pageentry_index,
-// bitmap_offset, block_offset);
-
-// disabled by reason
-#if 0
-#ifdef CACHE
- if (bitmap_offset != s->last_bitmap)
- {
- lseek(s->fd, bitmap_offset, SEEK_SET);
-
- s->last_bitmap = bitmap_offset;
-
- // Scary! Bitmap is stored as big endian 32bit entries,
- // while we used to look it up byte by byte
- read(s->fd, s->pageentry_u8, 512);
- for (i = 0; i < 128; i++)
- be32_to_cpus(&s->pageentry_u32[i]);
- }
-
- if ((s->pageentry_u8[pageentry_index / 8] >> (pageentry_index % 8)) & 1)
- return -1;
-#else
- lseek(s->fd, bitmap_offset + (pageentry_index / 8), SEEK_SET);
-
- read(s->fd, &bitmap_entry, 1);
-
- if ((bitmap_entry >> (pageentry_index % 8)) & 1)
- return -1; // not allocated
-#endif
-#endif
-
return block_offset;
}
+static inline int64_t get_sector_offset(BlockDriverState *bs,
+ int64_t sector_num, bool write)
+{
+ return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
+}
+
/*
* Writes the footer to the end of the image file. This is needed when the
* file grows as it overwrites the old footer
@@ -437,7 +519,7 @@
*
* Returns the sectors' offset in the image file on success and < 0 on error
*/
-static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
+static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
{
BDRVVPCState *s = bs->opaque;
int64_t bat_offset;
@@ -445,18 +527,17 @@
int ret;
uint8_t bitmap[s->bitmap_size];
- // Check if sector_num is valid
- if ((sector_num < 0) || (sector_num > bs->total_sectors))
- return -1;
+ /* Check if sector_num is valid */
+ if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
+ return -EINVAL;
+ }
- // Write entry into in-memory BAT
- index = (sector_num * 512) / s->block_size;
- if (s->pagetable[index] != 0xFFFFFFFF)
- return -1;
-
+ /* Write entry into in-memory BAT */
+ index = offset / s->block_size;
+ assert(s->pagetable[index] == 0xFFFFFFFF);
s->pagetable[index] = s->free_data_block_offset / 512;
- // Initialize the block's bitmap
+ /* Initialize the block's bitmap */
memset(bitmap, 0xff, s->bitmap_size);
ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
s->bitmap_size);
@@ -464,24 +545,24 @@
return ret;
}
- // Write new footer (the old one will be overwritten)
+ /* Write new footer (the old one will be overwritten) */
s->free_data_block_offset += s->block_size + s->bitmap_size;
ret = rewrite_footer(bs);
if (ret < 0)
goto fail;
- // Write BAT entry to disk
+ /* Write BAT entry to disk */
bat_offset = s->bat_offset + (4 * index);
bat_value = cpu_to_be32(s->pagetable[index]);
ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
if (ret < 0)
goto fail;
- return get_sector_offset(bs, sector_num, 0);
+ return get_image_offset(bs, offset, false);
fail:
s->free_data_block_offset -= (s->block_size + s->bitmap_size);
- return -1;
+ return ret;
}
static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
@@ -497,104 +578,151 @@
return 0;
}
-static int vpc_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVPCState *s = bs->opaque;
int ret;
- int64_t offset;
- int64_t sectors, sectors_per_block;
+ int64_t image_offset;
+ int64_t n_bytes;
+ int64_t bytes_done = 0;
VHDFooter *footer = (VHDFooter *) s->footer_buf;
+ QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_read(bs->file, sector_num, buf, nb_sectors);
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
}
- while (nb_sectors > 0) {
- offset = get_sector_offset(bs, sector_num, 0);
- sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
- sectors = sectors_per_block - (sector_num % sectors_per_block);
- if (sectors > nb_sectors) {
- sectors = nb_sectors;
- }
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&local_qiov, qiov->niov);
- if (offset == -1) {
- memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
+ while (bytes > 0) {
+ image_offset = get_image_offset(bs, offset, false);
+ n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
+
+ if (image_offset == -1) {
+ qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
} else {
- ret = bdrv_pread(bs->file, offset, buf,
- sectors * BDRV_SECTOR_SIZE);
- if (ret != sectors * BDRV_SECTOR_SIZE) {
- return -1;
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
}
}
- nb_sectors -= sectors;
- sector_num += sectors;
- buf += sectors * BDRV_SECTOR_SIZE;
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
}
- return 0;
-}
-static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
-{
- int ret;
- BDRVVPCState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vpc_read(bs, sector_num, buf, nb_sectors);
+ ret = 0;
+fail:
+ qemu_iovec_destroy(&local_qiov);
qemu_co_mutex_unlock(&s->lock);
+
return ret;
}
-static int vpc_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
BDRVVPCState *s = bs->opaque;
- int64_t offset;
- int64_t sectors, sectors_per_block;
+ int64_t image_offset;
+ int64_t n_bytes;
+ int64_t bytes_done = 0;
int ret;
VHDFooter *footer = (VHDFooter *) s->footer_buf;
+ QEMUIOVector local_qiov;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_write(bs->file, sector_num, buf, nb_sectors);
- }
- while (nb_sectors > 0) {
- offset = get_sector_offset(bs, sector_num, 1);
-
- sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
- sectors = sectors_per_block - (sector_num % sectors_per_block);
- if (sectors > nb_sectors) {
- sectors = nb_sectors;
- }
-
- if (offset == -1) {
- offset = alloc_block(bs, sector_num);
- if (offset < 0)
- return -1;
- }
-
- ret = bdrv_pwrite(bs->file, offset, buf, sectors * BDRV_SECTOR_SIZE);
- if (ret != sectors * BDRV_SECTOR_SIZE) {
- return -1;
- }
-
- nb_sectors -= sectors;
- sector_num += sectors;
- buf += sectors * BDRV_SECTOR_SIZE;
+ return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
}
- return 0;
+ qemu_co_mutex_lock(&s->lock);
+ qemu_iovec_init(&local_qiov, qiov->niov);
+
+ while (bytes > 0) {
+ image_offset = get_image_offset(bs, offset, true);
+ n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
+
+ if (image_offset == -1) {
+ image_offset = alloc_block(bs, offset);
+ if (image_offset < 0) {
+ ret = image_offset;
+ goto fail;
+ }
+ }
+
+ qemu_iovec_reset(&local_qiov);
+ qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
+
+ ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
+ &local_qiov, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ bytes -= n_bytes;
+ offset += n_bytes;
+ bytes_done += n_bytes;
+ }
+
+ ret = 0;
+fail:
+ qemu_iovec_destroy(&local_qiov);
+ qemu_co_mutex_unlock(&s->lock);
+
+ return ret;
}
-static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
{
- int ret;
BDRVVPCState *s = bs->opaque;
- qemu_co_mutex_lock(&s->lock);
- ret = vpc_write(bs, sector_num, buf, nb_sectors);
- qemu_co_mutex_unlock(&s->lock);
- return ret;
+ VHDFooter *footer = (VHDFooter*) s->footer_buf;
+ int64_t start, offset;
+ bool allocated;
+ int n;
+
+ if (be32_to_cpu(footer->type) == VHD_FIXED) {
+ *pnum = nb_sectors;
+ *file = bs->file->bs;
+ return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
+ (sector_num << BDRV_SECTOR_BITS);
+ }
+
+ offset = get_sector_offset(bs, sector_num, 0);
+ start = offset;
+ allocated = (offset != -1);
+ *pnum = 0;
+
+ do {
+ /* All sectors in a block are contiguous (without using the bitmap) */
+ n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
+ - sector_num;
+ n = MIN(n, nb_sectors);
+
+ *pnum += n;
+ sector_num += n;
+ nb_sectors -= n;
+ /* *pnum can't be greater than one block for allocated
+ * sectors since there is always a bitmap in between. */
+ if (allocated) {
+ *file = bs->file->bs;
+ return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
+ }
+ if (nb_sectors == 0) {
+ break;
+ }
+ offset = get_sector_offset(bs, sector_num, 0);
+ } while (offset == -1);
+
+ return 0;
}
/*
@@ -605,7 +733,7 @@
* Note that the geometry doesn't always exactly match total_sectors but
* may round it down.
*
- * Returns 0 on success, -EFBIG if the size is larger than ~2 TB. Override
+ * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
* the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
* and instead allow up to 255 heads.
*/
@@ -614,26 +742,20 @@
{
uint32_t cyls_times_heads;
- /* Allow a maximum disk size of approximately 2 TB */
- if (total_sectors > 65535LL * 255 * 255) {
- return -EFBIG;
- }
+ total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
- if (total_sectors > 65535 * 16 * 63) {
+ if (total_sectors >= 65535LL * 16 * 63) {
*secs_per_cyl = 255;
- if (total_sectors > 65535 * 16 * 255) {
- *heads = 255;
- } else {
- *heads = 16;
- }
+ *heads = 16;
cyls_times_heads = total_sectors / *secs_per_cyl;
} else {
*secs_per_cyl = 17;
cyls_times_heads = total_sectors / *secs_per_cyl;
*heads = (cyls_times_heads + 1023) / 1024;
- if (*heads < 4)
+ if (*heads < 4) {
*heads = 4;
+ }
if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
*secs_per_cyl = 31;
@@ -653,7 +775,7 @@
return 0;
}
-static int create_dynamic_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_sectors)
{
VHDDynDiskHeader *dyndisk_header =
@@ -663,34 +785,34 @@
int ret;
int64_t offset = 0;
- // Write the footer (twice: at the beginning and at the end)
+ /* Write the footer (twice: at the beginning and at the end) */
block_size = 0x200000;
num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
- if (ret) {
- goto fail;
- }
-
- offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
- ret = bdrv_pwrite_sync(bs, offset, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
if (ret < 0) {
goto fail;
}
- // Write the initial BAT
+ offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
+ ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ /* Write the initial BAT */
offset = 3 * 512;
memset(buf, 0xFF, 512);
for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
- ret = bdrv_pwrite_sync(bs, offset, buf, 512);
+ ret = blk_pwrite(blk, offset, buf, 512, 0);
if (ret < 0) {
goto fail;
}
offset += 512;
}
- // Prepare the Dynamic Disk Header
+ /* Prepare the Dynamic Disk Header */
memset(buf, 0, 1024);
memcpy(dyndisk_header->magic, "cxsparse", 8);
@@ -707,10 +829,10 @@
dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
- // Write the header
+ /* Write the header */
offset = 512;
- ret = bdrv_pwrite_sync(bs, offset, buf, 1024);
+ ret = blk_pwrite(blk, offset, buf, 1024, 0);
if (ret < 0) {
goto fail;
}
@@ -719,7 +841,7 @@
return ret;
}
-static int create_fixed_disk(BlockDriverState *bs, uint8_t *buf,
+static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
int64_t total_size)
{
int ret;
@@ -727,12 +849,12 @@
/* Add footer to total size */
total_size += HEADER_SIZE;
- ret = bdrv_truncate(bs, total_size);
+ ret = blk_truncate(blk, total_size);
if (ret < 0) {
return ret;
}
- ret = bdrv_pwrite_sync(bs, total_size - HEADER_SIZE, buf, HEADER_SIZE);
+ ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
if (ret < 0) {
return ret;
}
@@ -753,8 +875,9 @@
int64_t total_size;
int disk_type;
int ret = -EIO;
+ bool force_size;
Error *local_err = NULL;
- BlockDriverState *bs = NULL;
+ BlockBackend *blk = NULL;
/* Read out options */
total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
@@ -766,6 +889,7 @@
} else if (!strcmp(disk_type_param, "fixed")) {
disk_type = VHD_FIXED;
} else {
+ error_setg(errp, "Invalid disk type, %s", disk_type_param);
ret = -EINVAL;
goto out;
}
@@ -773,41 +897,67 @@
disk_type = VHD_DYNAMIC;
}
+ force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
+
ret = bdrv_create_file(filename, opts, &local_err);
if (ret < 0) {
error_propagate(errp, local_err);
goto out;
}
- ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
- NULL, &local_err);
- if (ret < 0) {
+
+ blk = blk_new_open(filename, NULL, NULL,
+ BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
+ if (blk == NULL) {
error_propagate(errp, local_err);
+ ret = -EIO;
goto out;
}
+ blk_set_allow_write_beyond_eof(blk, true);
+
/*
* Calculate matching total_size and geometry. Increase the number of
* sectors requested until we get enough (or fail). This ensures that
* qemu-img convert doesn't truncate images, but rather rounds up.
+ *
+ * If the image size can't be represented by a spec conformant CHS geometry,
+ * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
+ * the image size from the VHD footer to calculate total_sectors.
*/
- total_sectors = total_size / BDRV_SECTOR_SIZE;
- for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
- if (calculate_geometry(total_sectors + i, &cyls, &heads,
- &secs_per_cyl))
- {
- ret = -EFBIG;
- goto out;
+ if (force_size) {
+ /* This will force the use of total_size for sector count, below */
+ cyls = VHD_CHS_MAX_C;
+ heads = VHD_CHS_MAX_H;
+ secs_per_cyl = VHD_CHS_MAX_S;
+ } else {
+ total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
+ for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
+ calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
}
}
- total_sectors = (int64_t) cyls * heads * secs_per_cyl;
+ if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
+ total_sectors = total_size / BDRV_SECTOR_SIZE;
+ /* Allow a maximum disk size of 2040 GiB */
+ if (total_sectors > VHD_MAX_SECTORS) {
+ error_setg(errp, "Disk size is too large, max size is 2040 GiB");
+ ret = -EFBIG;
+ goto out;
+ }
+ } else {
+ total_sectors = (int64_t)cyls * heads * secs_per_cyl;
+ total_size = total_sectors * BDRV_SECTOR_SIZE;
+ }
/* Prepare the Hard Disk Footer */
memset(buf, 0, 1024);
memcpy(footer->creator, "conectix", 8);
- /* TODO Check if "qemu" creator_app is ok for VPC */
- memcpy(footer->creator_app, "qemu", 4);
+ if (force_size) {
+ memcpy(footer->creator_app, "qem2", 4);
+ } else {
+ memcpy(footer->creator_app, "qemu", 4);
+ }
memcpy(footer->creator_os, "Wi2k", 4);
footer->features = cpu_to_be32(0x02);
@@ -822,13 +972,8 @@
/* Version of Virtual PC 2007 */
footer->major = cpu_to_be16(0x0005);
footer->minor = cpu_to_be16(0x0003);
- if (disk_type == VHD_DYNAMIC) {
- footer->orig_size = cpu_to_be64(total_sectors * 512);
- footer->size = cpu_to_be64(total_sectors * 512);
- } else {
- footer->orig_size = cpu_to_be64(total_size);
- footer->size = cpu_to_be64(total_size);
- }
+ footer->orig_size = cpu_to_be64(total_size);
+ footer->current_size = cpu_to_be64(total_size);
footer->cyls = cpu_to_be16(cyls);
footer->heads = heads;
footer->secs_per_cyl = secs_per_cyl;
@@ -842,13 +987,16 @@
footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
if (disk_type == VHD_DYNAMIC) {
- ret = create_dynamic_disk(bs, buf, total_sectors);
+ ret = create_dynamic_disk(blk, buf, total_sectors);
} else {
- ret = create_fixed_disk(bs, buf, total_size);
+ ret = create_fixed_disk(blk, buf, total_size);
+ }
+ if (ret < 0) {
+ error_setg(errp, "Unable to create or write VHD header");
}
out:
- bdrv_unref(bs);
+ blk_unref(blk);
g_free(disk_type_param);
return ret;
}
@@ -859,7 +1007,7 @@
VHDFooter *footer = (VHDFooter *) s->footer_buf;
if (be32_to_cpu(footer->type) == VHD_FIXED) {
- return bdrv_has_zero_init(bs->file);
+ return bdrv_has_zero_init(bs->file->bs);
} else {
return 1;
}
@@ -894,9 +1042,11 @@
"{dynamic (default) | fixed} "
},
{
- .name = BLOCK_OPT_NOCOW,
+ .name = VPC_OPT_FORCE_SIZE,
.type = QEMU_OPT_BOOL,
- .help = "Turn off copy-on-write (valid only on btrfs)"
+ .help = "Force disk size calculation to use the actual size "
+ "specified, rather than using the nearest CHS-based "
+ "calculation"
},
{ /* end of list */ }
}
@@ -912,8 +1062,9 @@
.bdrv_reopen_prepare = vpc_reopen_prepare,
.bdrv_create = vpc_create,
- .bdrv_read = vpc_co_read,
- .bdrv_write = vpc_co_write,
+ .bdrv_co_preadv = vpc_co_preadv,
+ .bdrv_co_pwritev = vpc_co_pwritev,
+ .bdrv_co_get_block_status = vpc_co_get_block_status,
.bdrv_get_info = vpc_get_info,
diff --git a/block/vvfat.c b/block/vvfat.c
index e684991..a9c0716 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -22,17 +22,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-#include <sys/stat.h>
+#include "qemu/osdep.h"
#include <dirent.h>
-#include "qemu-common.h"
+#include "qapi/error.h"
#include "block/block_int.h"
#include "qemu/module.h"
+#include "qemu/bswap.h"
#include "migration/migration.h"
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qbool.h"
-#ifdef CONFIG_ANDROID
-#include "android/utils/file_io.h"
-#endif // CONFIG_ANDROID
+#include "qapi/qmp/qstring.h"
+#include "qemu/cutils.h"
#ifndef S_IWGRP
#define S_IWGRP 0
@@ -114,15 +114,12 @@
static inline void* array_get_next(array_t* array) {
unsigned int next = array->next;
- void* result;
if (array_ensure_allocated(array, next) < 0)
return NULL;
array->next = next + 1;
- result = array_get(array, next);
-
- return result;
+ return array_get(array, next);
}
static inline void* array_insert(array_t* array,unsigned int index,unsigned int count) {
@@ -325,6 +322,7 @@
int fat_type; /* 16 or 32 */
array_t fat,directory,mapping;
+ char volume_label[11];
unsigned int cluster_size;
unsigned int sectors_per_cluster;
@@ -343,9 +341,8 @@
unsigned int current_cluster;
/* write support */
- BlockDriverState* write_target;
char* qcow_filename;
- BlockDriverState* qcow;
+ BdrvChild* qcow;
void* fat2;
char* used_clusters;
array_t commits;
@@ -734,11 +731,7 @@
buffer = g_malloc(length);
snprintf(buffer,length,"%s/%s",dirname,entry->d_name);
-#ifdef CONFIG_ANDROID
- if(android_stat(buffer,&st)<0) {
-#else // !CONFIG_ANDROID
- if(stat(buffer,&st)<0) {
-#endif // CONFIG_ANDROID
+ if(qemu_stat(buffer,&st)<0) {
g_free(buffer);
continue;
}
@@ -866,7 +859,7 @@
{
direntry_t* entry=array_get_next(&(s->directory));
entry->attributes=0x28; /* archive | volume label */
- memcpy(entry->name, "QEMU VVFAT ", sizeof(entry->name));
+ memcpy(entry->name, s->volume_label, sizeof(entry->name));
}
/* Now build FAT, and write back information into directory */
@@ -975,7 +968,8 @@
bootsector->u.fat16.signature=0x29;
bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd);
- memcpy(bootsector->u.fat16.volume_label,"QEMU VVFAT ",11);
+ memcpy(bootsector->u.fat16.volume_label, s->volume_label,
+ sizeof(bootsector->u.fat16.volume_label));
memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12 ":s->fat_type==16?"FAT16 ":"FAT32 "),8);
bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa;
@@ -986,15 +980,9 @@
static BDRVVVFATState *vvv = NULL;
#endif
-static int enable_write_target(BDRVVVFATState *s, Error **errp);
+static int enable_write_target(BlockDriverState *bs, Error **errp);
static int is_consistent(BDRVVVFATState *s);
-static void vvfat_rebind(BlockDriverState *bs)
-{
- BDRVVVFATState *s = bs->opaque;
- s->bs = bs;
-}
-
static QemuOptsList runtime_opts = {
.name = "vvfat",
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
@@ -1015,6 +1003,11 @@
.help = "Create a floppy rather than a hard disk image",
},
{
+ .name = "label",
+ .type = QEMU_OPT_STRING,
+ .help = "Use a volume label other than QEMU VVFAT",
+ },
+ {
.name = "rw",
.type = QEMU_OPT_BOOL,
.help = "Make the image writable",
@@ -1066,8 +1059,8 @@
/* Fill in the options QDict */
qdict_put(options, "dir", qstring_from_str(filename));
qdict_put(options, "fat-type", qint_from_int(fat_type));
- qdict_put(options, "floppy", qbool_from_int(floppy));
- qdict_put(options, "rw", qbool_from_int(rw));
+ qdict_put(options, "floppy", qbool_from_bool(floppy));
+ qdict_put(options, "rw", qbool_from_bool(rw));
}
static int vvfat_open(BlockDriverState *bs, QDict *options, int flags,
@@ -1076,7 +1069,7 @@
BDRVVVFATState *s = bs->opaque;
int cyls, heads, secs;
bool floppy;
- const char *dirname;
+ const char *dirname, *label;
QemuOpts *opts;
Error *local_err = NULL;
int ret;
@@ -1103,6 +1096,20 @@
s->fat_type = qemu_opt_get_number(opts, "fat-type", 0);
floppy = qemu_opt_get_bool(opts, "floppy", false);
+ memset(s->volume_label, ' ', sizeof(s->volume_label));
+ label = qemu_opt_get(opts, "label");
+ if (label) {
+ size_t label_length = strlen(label);
+ if (label_length > 11) {
+ error_setg(errp, "vvfat label cannot be longer than 11 bytes");
+ ret = -EINVAL;
+ goto fail;
+ }
+ memcpy(s->volume_label, label, label_length);
+ } else {
+ memcpy(s->volume_label, "QEMU VVFAT", 10);
+ }
+
if (floppy) {
/* 1.44MB or 2.88MB floppy. 2.88MB can be FAT12 (default) or FAT16. */
if (!s->fat_type) {
@@ -1150,8 +1157,8 @@
s->current_cluster=0xffffffff;
/* read only is the default for safety */
- bs->read_only = 1;
- s->qcow = s->write_target = NULL;
+ bs->read_only = true;
+ s->qcow = NULL;
s->qcow_filename = NULL;
s->fat2 = NULL;
s->downcase_short_names = 1;
@@ -1162,11 +1169,11 @@
s->sector_count = cyls * heads * secs - (s->first_sectors_number - 1);
if (qemu_opt_get_bool(opts, "rw", false)) {
- ret = enable_write_target(s, errp);
+ ret = enable_write_target(bs, errp);
if (ret < 0) {
goto fail;
}
- bs->read_only = 0;
+ bs->read_only = false;
}
bs->total_sectors = cyls * heads * secs;
@@ -1187,9 +1194,10 @@
/* Disable migration when vvfat is used rw */
if (s->qcow) {
- error_set(&s->migration_blocker,
- QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- "vvfat (rw)", bdrv_get_device_name(bs), "live migration");
+ error_setg(&s->migration_blocker,
+ "The vvfat (rw) format used by node '%s' "
+ "does not support live migration",
+ bdrv_get_device_or_node_name(bs));
migrate_add_blocker(s->migration_blocker);
}
@@ -1199,6 +1207,11 @@
return ret;
}
+static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+ bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+}
+
static inline void vvfat_close_current_file(BDRVVVFATState *s)
{
if(s->current_mapping) {
@@ -1377,9 +1390,10 @@
return -1;
if (s->qcow) {
int n;
- if (bdrv_is_allocated(s->qcow, sector_num, nb_sectors-i, &n)) {
-DLOG(fprintf(stderr, "sectors %d+%d allocated\n", (int)sector_num, n));
- if (bdrv_read(s->qcow, sector_num, buf + i*0x200, n)) {
+ if (bdrv_is_allocated(s->qcow->bs, sector_num, nb_sectors-i, &n)) {
+ DLOG(fprintf(stderr, "sectors %d+%d allocated\n",
+ (int)sector_num, n));
+ if (bdrv_read(s->qcow, sector_num, buf + i * 0x200, n)) {
return -1;
}
i += n - 1;
@@ -1410,14 +1424,31 @@
return 0;
}
-static coroutine_fn int vvfat_co_read(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vvfat_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVVFATState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ void *buf;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ buf = g_try_malloc(bytes);
+ if (bytes && buf == NULL) {
+ return -ENOMEM;
+ }
+
qemu_co_mutex_lock(&s->lock);
ret = vvfat_read(bs, sector_num, buf, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+
+ qemu_iovec_from_buf(qiov, 0, buf, bytes);
+ g_free(buf);
+
return ret;
}
@@ -1638,12 +1669,15 @@
int was_modified = 0;
int i, dummy;
- if (s->qcow == NULL)
- return 0;
+ if (s->qcow == NULL) {
+ return 0;
+ }
- for (i = 0; !was_modified && i < s->sectors_per_cluster; i++)
- was_modified = bdrv_is_allocated(s->qcow,
- cluster2sector(s, cluster_num) + i, 1, &dummy);
+ for (i = 0; !was_modified && i < s->sectors_per_cluster; i++) {
+ was_modified = bdrv_is_allocated(s->qcow->bs,
+ cluster2sector(s, cluster_num) + i,
+ 1, &dummy);
+ }
return was_modified;
}
@@ -1792,11 +1826,16 @@
vvfat_close_current_file(s);
for (i = 0; i < s->sectors_per_cluster; i++) {
- if (!bdrv_is_allocated(s->qcow, offset + i, 1, &dummy)) {
- if (vvfat_read(s->bs, offset, s->cluster_buffer, 1)) {
+ int res;
+
+ res = bdrv_is_allocated(s->qcow->bs, offset + i, 1, &dummy);
+ if (!res) {
+ res = vvfat_read(s->bs, offset, s->cluster_buffer, 1);
+ if (res) {
return -1;
}
- if (bdrv_write(s->qcow, offset, s->cluster_buffer, 1)) {
+ res = bdrv_write(s->qcow, offset, s->cluster_buffer, 1);
+ if (res) {
return -2;
}
}
@@ -1930,8 +1969,7 @@
/* check file size with FAT */
cluster_count = get_cluster_count_for_direntry(s, direntries + i, path2);
if (cluster_count !=
- (le32_to_cpu(direntries[i].size) + s->cluster_size
- - 1) / s->cluster_size) {
+ DIV_ROUND_UP(le32_to_cpu(direntries[i].size), s->cluster_size)) {
DLOG(fprintf(stderr, "Cluster count mismatch\n"));
goto fail;
}
@@ -2274,12 +2312,17 @@
factor * (old_cluster_count - new_cluster_count));
for (c = first_cluster; !fat_eof(s, c); c = modified_fat_get(s, c)) {
+ direntry_t *first_direntry;
void* direntry = array_get(&(s->directory), current_dir_index);
int ret = vvfat_read(s->bs, cluster2sector(s, c), direntry,
s->sectors_per_cluster);
if (ret)
return ret;
- assert(!strncmp(s->directory.pointer, "QEMU", 4));
+
+ /* The first directory entry on the filesystem is the volume name */
+ first_direntry = (direntry_t*) s->directory.pointer;
+ assert(!memcmp(first_direntry->name, s->volume_label, 11));
+
current_dir_index += factor;
}
@@ -2748,8 +2791,8 @@
return ret;
}
- if (s->qcow->drv->bdrv_make_empty) {
- s->qcow->drv->bdrv_make_empty(s->qcow);
+ if (s->qcow->bs->drv->bdrv_make_empty) {
+ s->qcow->bs->drv->bdrv_make_empty(s->qcow->bs);
}
memset(s->used_clusters, 0, sector2cluster(s, s->sector_count));
@@ -2864,19 +2907,36 @@
return 0;
}
-static coroutine_fn int vvfat_co_write(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors)
+static int coroutine_fn
+vvfat_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
{
int ret;
BDRVVVFATState *s = bs->opaque;
+ uint64_t sector_num = offset >> BDRV_SECTOR_BITS;
+ int nb_sectors = bytes >> BDRV_SECTOR_BITS;
+ void *buf;
+
+ assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+ assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+
+ buf = g_try_malloc(bytes);
+ if (bytes && buf == NULL) {
+ return -ENOMEM;
+ }
+ qemu_iovec_to_buf(qiov, 0, buf, bytes);
+
qemu_co_mutex_lock(&s->lock);
ret = vvfat_write(bs, sector_num, buf, nb_sectors);
qemu_co_mutex_unlock(&s->lock);
+
+ g_free(buf);
+
return ret;
}
static int64_t coroutine_fn vvfat_co_get_block_status(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, int* n)
+ int64_t sector_num, int nb_sectors, int *n, BlockDriverState **file)
{
BDRVVVFATState* s = bs->opaque;
*n = s->sector_count - sector_num;
@@ -2888,45 +2948,68 @@
return BDRV_BLOCK_DATA;
}
-static int write_target_commit(BlockDriverState *bs, int64_t sector_num,
- const uint8_t* buffer, int nb_sectors) {
+static int coroutine_fn
+write_target_commit(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, int flags)
+{
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
return try_commit(s);
}
static void write_target_close(BlockDriverState *bs) {
BDRVVVFATState* s = *((BDRVVVFATState**) bs->opaque);
- bdrv_unref(s->qcow);
+ bdrv_unref_child(s->bs, s->qcow);
g_free(s->qcow_filename);
}
static BlockDriver vvfat_write_target = {
.format_name = "vvfat_write_target",
- .bdrv_write = write_target_commit,
+ .bdrv_co_pwritev = write_target_commit,
.bdrv_close = write_target_close,
};
-static int enable_write_target(BDRVVVFATState *s, Error **errp)
+static void vvfat_qcow_options(int *child_flags, QDict *child_options,
+ int parent_flags, QDict *parent_options)
{
+ *child_flags = BDRV_O_RDWR | BDRV_O_NO_FLUSH;
+}
+
+static const BdrvChildRole child_vvfat_qcow = {
+ .inherit_options = vvfat_qcow_options,
+};
+
+static int enable_write_target(BlockDriverState *bs, Error **errp)
+{
+ BDRVVVFATState *s = bs->opaque;
BlockDriver *bdrv_qcow = NULL;
+ BlockDriverState *backing;
QemuOpts *opts = NULL;
int ret;
int size = sector2cluster(s, s->sector_count);
+ QDict *options;
+
s->used_clusters = calloc(size, 1);
array_init(&(s->commits), sizeof(commit_t));
- s->qcow_filename = g_malloc(1024);
- ret = get_tmp_filename(s->qcow_filename, 1024);
+ s->qcow_filename = g_malloc(PATH_MAX);
+ ret = get_tmp_filename(s->qcow_filename, PATH_MAX);
if (ret < 0) {
error_setg_errno(errp, -ret, "can't create temporary file");
goto err;
}
bdrv_qcow = bdrv_find_format("qcow");
+ if (!bdrv_qcow) {
+ error_setg(errp, "Failed to locate qcow driver");
+ ret = -ENOENT;
+ goto err;
+ }
+
opts = qemu_opts_create(bdrv_qcow->create_opts, NULL, 0, &error_abort);
- qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512);
- qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:");
+ qemu_opt_set_number(opts, BLOCK_OPT_SIZE, s->sector_count * 512,
+ &error_abort);
+ qemu_opt_set(opts, BLOCK_OPT_BACKING_FILE, "fat:", &error_abort);
ret = bdrv_create(bdrv_qcow, s->qcow_filename, opts, errp);
qemu_opts_del(opts);
@@ -2934,11 +3017,13 @@
goto err;
}
- s->qcow = NULL;
- ret = bdrv_open(&s->qcow, s->qcow_filename, NULL, NULL,
- BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
- bdrv_qcow, errp);
- if (ret < 0) {
+ options = qdict_new();
+ qdict_put(options, "write-target.driver", qstring_from_str("qcow"));
+ s->qcow = bdrv_open_child(s->qcow_filename, options, "write-target", bs,
+ &child_vvfat_qcow, false, errp);
+ QDECREF(options);
+ if (!s->qcow) {
+ ret = -EINVAL;
goto err;
}
@@ -2946,10 +3031,13 @@
unlink(s->qcow_filename);
#endif
- bdrv_set_backing_hd(s->bs, bdrv_new());
- s->bs->backing_hd->drv = &vvfat_write_target;
- s->bs->backing_hd->opaque = g_new(void *, 1);
- *(void**)s->bs->backing_hd->opaque = s;
+ backing = bdrv_new();
+ bdrv_set_backing_hd(s->bs, backing);
+ bdrv_unref(backing);
+
+ s->bs->backing->bs->drv = &vvfat_write_target;
+ s->bs->backing->bs->opaque = g_new(void *, 1);
+ *(void**)s->bs->backing->bs->opaque = s;
return 0;
@@ -2982,11 +3070,11 @@
.bdrv_parse_filename = vvfat_parse_filename,
.bdrv_file_open = vvfat_open,
+ .bdrv_refresh_limits = vvfat_refresh_limits,
.bdrv_close = vvfat_close,
- .bdrv_rebind = vvfat_rebind,
- .bdrv_read = vvfat_co_read,
- .bdrv_write = vvfat_co_write,
+ .bdrv_co_preadv = vvfat_co_preadv,
+ .bdrv_co_pwritev = vvfat_co_pwritev,
.bdrv_co_get_block_status = vvfat_co_get_block_status,
};
diff --git a/block/win32-aio.c b/block/win32-aio.c
index 64e8682..95e3ab1 100644
--- a/block/win32-aio.c
+++ b/block/win32-aio.c
@@ -21,12 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "qemu/timer.h"
#include "block/block_int.h"
#include "qemu/module.h"
#include "block/aio.h"
-#include "raw-aio.h"
+#include "block/raw-aio.h"
#include "qemu/event_notifier.h"
#include "qemu/iov.h"
#include <windows.h>
@@ -174,7 +175,7 @@
void win32_aio_detach_aio_context(QEMUWin32AIOState *aio,
AioContext *old_context)
{
- aio_set_event_notifier(old_context, &aio->e, NULL);
+ aio_set_event_notifier(old_context, &aio->e, false, NULL);
aio->is_aio_context_attached = false;
}
@@ -182,7 +183,8 @@
AioContext *new_context)
{
aio->is_aio_context_attached = true;
- aio_set_event_notifier(new_context, &aio->e, win32_aio_completion_cb);
+ aio_set_event_notifier(new_context, &aio->e, false,
+ win32_aio_completion_cb);
}
QEMUWin32AIOState *win32_aio_init(void)
diff --git a/block/write-threshold.c b/block/write-threshold.c
new file mode 100644
index 0000000..cc2ca71
--- /dev/null
+++ b/block/write-threshold.c
@@ -0,0 +1,126 @@
+/*
+ * QEMU System Emulator block write threshold notification
+ *
+ * Copyright Red Hat, Inc. 2014
+ *
+ * Authors:
+ * Francesco Romani <fromani@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block_int.h"
+#include "qemu/coroutine.h"
+#include "block/write-threshold.h"
+#include "qemu/notify.h"
+#include "qapi-event.h"
+#include "qmp-commands.h"
+
+
+uint64_t bdrv_write_threshold_get(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset;
+}
+
+bool bdrv_write_threshold_is_set(const BlockDriverState *bs)
+{
+ return bs->write_threshold_offset > 0;
+}
+
+static void write_threshold_disable(BlockDriverState *bs)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ notifier_with_return_remove(&bs->write_threshold_notifier);
+ bs->write_threshold_offset = 0;
+ }
+}
+
+uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs,
+ const BdrvTrackedRequest *req)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (req->offset > bs->write_threshold_offset) {
+ return (req->offset - bs->write_threshold_offset) + req->bytes;
+ }
+ if ((req->offset + req->bytes) > bs->write_threshold_offset) {
+ return (req->offset + req->bytes) - bs->write_threshold_offset;
+ }
+ }
+ return 0;
+}
+
+static int coroutine_fn before_write_notify(NotifierWithReturn *notifier,
+ void *opaque)
+{
+ BdrvTrackedRequest *req = opaque;
+ BlockDriverState *bs = req->bs;
+ uint64_t amount = 0;
+
+ amount = bdrv_write_threshold_exceeded(bs, req);
+ if (amount > 0) {
+ qapi_event_send_block_write_threshold(
+ bs->node_name,
+ amount,
+ bs->write_threshold_offset,
+ &error_abort);
+
+ /* autodisable to avoid flooding the monitor */
+ write_threshold_disable(bs);
+ }
+
+ return 0; /* should always let other notifiers run */
+}
+
+static void write_threshold_register_notifier(BlockDriverState *bs)
+{
+ bs->write_threshold_notifier.notify = before_write_notify;
+ notifier_with_return_list_add(&bs->before_write_notifiers,
+ &bs->write_threshold_notifier);
+}
+
+static void write_threshold_update(BlockDriverState *bs,
+ int64_t threshold_bytes)
+{
+ bs->write_threshold_offset = threshold_bytes;
+}
+
+void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes)
+{
+ if (bdrv_write_threshold_is_set(bs)) {
+ if (threshold_bytes > 0) {
+ write_threshold_update(bs, threshold_bytes);
+ } else {
+ write_threshold_disable(bs);
+ }
+ } else {
+ if (threshold_bytes > 0) {
+ /* avoid multiple registration */
+ write_threshold_register_notifier(bs);
+ write_threshold_update(bs, threshold_bytes);
+ }
+ /* discard bogus disable request */
+ }
+}
+
+void qmp_block_set_write_threshold(const char *node_name,
+ uint64_t threshold_bytes,
+ Error **errp)
+{
+ BlockDriverState *bs;
+ AioContext *aio_context;
+
+ bs = bdrv_find_node(node_name);
+ if (!bs) {
+ error_setg(errp, "Device '%s' not found", node_name);
+ return;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_write_threshold_set(bs, threshold_bytes);
+
+ aio_context_release(aio_context);
+}
diff --git a/blockdev-nbd.c b/blockdev-nbd.c
index 4eed45d..12cae0e 100644
--- a/blockdev-nbd.c
+++ b/blockdev-nbd.c
@@ -9,75 +9,146 @@
* later. See the COPYING file in the top-level directory.
*/
+#include "qemu/osdep.h"
#include "sysemu/blockdev.h"
+#include "sysemu/block-backend.h"
#include "hw/block/block.h"
-#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
#include "sysemu/sysemu.h"
#include "qmp-commands.h"
#include "trace.h"
#include "block/nbd.h"
-#include "qemu/sockets.h"
+#include "io/channel-socket.h"
-static int server_fd = -1;
+typedef struct NBDServerData {
+ QIOChannelSocket *listen_ioc;
+ int watch;
+ QCryptoTLSCreds *tlscreds;
+} NBDServerData;
-static void nbd_accept(void *opaque)
+static NBDServerData *nbd_server;
+
+
+static gboolean nbd_accept(QIOChannel *ioc, GIOCondition condition,
+ gpointer opaque)
{
- struct sockaddr_in addr;
- socklen_t addr_len = sizeof(addr);
+ QIOChannelSocket *cioc;
- int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
- if (fd >= 0 && !nbd_client_new(NULL, fd, nbd_client_put)) {
- shutdown(fd, 2);
- close(fd);
+ if (!nbd_server) {
+ return FALSE;
}
+
+ cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
+ NULL);
+ if (!cioc) {
+ return TRUE;
+ }
+
+ nbd_client_new(NULL, cioc,
+ nbd_server->tlscreds, NULL,
+ nbd_client_put);
+ object_unref(OBJECT(cioc));
+ return TRUE;
}
-void qmp_nbd_server_start(SocketAddress *addr, Error **errp)
+
+static void nbd_server_free(NBDServerData *server)
{
- if (server_fd != -1) {
+ if (!server) {
+ return;
+ }
+
+ if (server->watch != -1) {
+ g_source_remove(server->watch);
+ }
+ object_unref(OBJECT(server->listen_ioc));
+ if (server->tlscreds) {
+ object_unref(OBJECT(server->tlscreds));
+ }
+
+ g_free(server);
+}
+
+static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
+{
+ Object *obj;
+ QCryptoTLSCreds *creds;
+
+ obj = object_resolve_path_component(
+ object_get_objects_root(), id);
+ if (!obj) {
+ error_setg(errp, "No TLS credentials with id '%s'",
+ id);
+ return NULL;
+ }
+ creds = (QCryptoTLSCreds *)
+ object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
+ if (!creds) {
+ error_setg(errp, "Object with id '%s' is not TLS credentials",
+ id);
+ return NULL;
+ }
+
+ if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
+ error_setg(errp,
+ "Expecting TLS credentials with a server endpoint");
+ return NULL;
+ }
+ object_ref(obj);
+ return creds;
+}
+
+
+void qmp_nbd_server_start(SocketAddress *addr,
+ bool has_tls_creds, const char *tls_creds,
+ Error **errp)
+{
+ if (nbd_server) {
error_setg(errp, "NBD server already running");
return;
}
- server_fd = socket_listen_addr(addr, errp);
- if (server_fd != -1) {
- qemu_set_fd_handler2(server_fd, NULL, nbd_accept, NULL, NULL);
+ nbd_server = g_new0(NBDServerData, 1);
+ nbd_server->watch = -1;
+ nbd_server->listen_ioc = qio_channel_socket_new();
+ if (qio_channel_socket_listen_sync(
+ nbd_server->listen_ioc, addr, errp) < 0) {
+ goto error;
}
-}
-/* Hook into the BlockDriverState notifiers to close the export when
- * the file is closed.
- */
-typedef struct NBDCloseNotifier {
- Notifier n;
- NBDExport *exp;
- QTAILQ_ENTRY(NBDCloseNotifier) next;
-} NBDCloseNotifier;
+ if (has_tls_creds) {
+ nbd_server->tlscreds = nbd_get_tls_creds(tls_creds, errp);
+ if (!nbd_server->tlscreds) {
+ goto error;
+ }
-static QTAILQ_HEAD(, NBDCloseNotifier) close_notifiers =
- QTAILQ_HEAD_INITIALIZER(close_notifiers);
+ if (addr->type != SOCKET_ADDRESS_KIND_INET) {
+ error_setg(errp, "TLS is only supported with IPv4/IPv6");
+ goto error;
+ }
+ }
-static void nbd_close_notifier(Notifier *n, void *data)
-{
- NBDCloseNotifier *cn = DO_UPCAST(NBDCloseNotifier, n, n);
+ nbd_server->watch = qio_channel_add_watch(
+ QIO_CHANNEL(nbd_server->listen_ioc),
+ G_IO_IN,
+ nbd_accept,
+ NULL,
+ NULL);
- notifier_remove(&cn->n);
- QTAILQ_REMOVE(&close_notifiers, cn, next);
+ return;
- nbd_export_close(cn->exp);
- nbd_export_put(cn->exp);
- g_free(cn);
+ error:
+ nbd_server_free(nbd_server);
+ nbd_server = NULL;
}
void qmp_nbd_server_add(const char *device, bool has_writable, bool writable,
Error **errp)
{
- BlockDriverState *bs;
+ BlockBackend *blk;
NBDExport *exp;
- NBDCloseNotifier *n;
- if (server_fd == -1) {
+ if (!nbd_server) {
error_setg(errp, "NBD server not running");
return;
}
@@ -87,44 +158,42 @@
return;
}
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return;
}
- if (!bdrv_is_inserted(bs)) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ if (!blk_is_inserted(blk)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
return;
}
if (!has_writable) {
writable = false;
}
- if (bdrv_is_read_only(bs)) {
+ if (blk_is_read_only(blk)) {
writable = false;
}
- exp = nbd_export_new(bs, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL);
+ exp = nbd_export_new(blk, 0, -1, writable ? 0 : NBD_FLAG_READ_ONLY, NULL,
+ errp);
+ if (!exp) {
+ return;
+ }
nbd_export_set_name(exp, device);
- n = g_new0(NBDCloseNotifier, 1);
- n->n.notify = nbd_close_notifier;
- n->exp = exp;
- bdrv_add_close_notifier(bs, &n->n);
- QTAILQ_INSERT_TAIL(&close_notifiers, n, next);
+ /* The list of named exports has a strong reference to this export now and
+ * our only way of accessing it is through nbd_export_find(), so we can drop
+ * the strong reference that is @exp. */
+ nbd_export_put(exp);
}
void qmp_nbd_server_stop(Error **errp)
{
- while (!QTAILQ_EMPTY(&close_notifiers)) {
- NBDCloseNotifier *cn = QTAILQ_FIRST(&close_notifiers);
- nbd_close_notifier(&cn->n, nbd_export_get_blockdev(cn->exp));
- }
+ nbd_export_close_all();
- if (server_fd != -1) {
- qemu_set_fd_handler2(server_fd, NULL, NULL, NULL, NULL);
- close(server_fd);
- server_fd = -1;
- }
+ nbd_server_free(nbd_server);
+ nbd_server = NULL;
}
diff --git a/blockdev.c b/blockdev.c
index 57910b8..2161400 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -30,15 +30,19 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
#include "sysemu/block-backend.h"
#include "sysemu/blockdev.h"
#include "hw/block/block.h"
#include "block/blockjob.h"
+#include "block/throttle-groups.h"
#include "monitor/monitor.h"
+#include "qemu/error-report.h"
#include "qemu/option.h"
#include "qemu/config-file.h"
#include "qapi/qmp/types.h"
#include "qapi-visit.h"
+#include "qapi/qmp/qerror.h"
#include "qapi/qmp-output-visitor.h"
#include "qapi/util.h"
#include "sysemu/sysemu.h"
@@ -46,6 +50,13 @@
#include "qmp-commands.h"
#include "trace.h"
#include "sysemu/arch_init.h"
+#include "qemu/cutils.h"
+#include "qemu/help_option.h"
+
+static QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
+ QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
+
+static int do_open_tray(const char *device, bool force, Error **errp);
static const char *const if_name[IF_COUNT] = {
[IF_NONE] = "none",
@@ -64,7 +75,7 @@
* Do not change these numbers! They govern how drive option
* index maps to unit and bus. That mapping is ABI.
*
- * All controllers used to imlement if=T drives need to support
+ * All controllers used to implement if=T drives need to support
* if_max_devs[T] units, for any T with if_max_devs[T] != 0.
* Otherwise, some index values map to "impossible" bus, unit
* values.
@@ -121,15 +132,17 @@
return;
}
- aio_context = bdrv_get_aio_context(bs);
- aio_context_acquire(aio_context);
+ if (bs) {
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
- if (bs->job) {
- block_job_cancel(bs->job);
+ if (bs->job) {
+ block_job_cancel(bs->job);
+ }
+
+ aio_context_release(aio_context);
}
- aio_context_release(aio_context);
-
dinfo->auto_del = 1;
}
@@ -138,6 +151,7 @@
DriveInfo *dinfo = blk_legacy_dinfo(blk);
if (dinfo && dinfo->auto_del) {
+ monitor_remove_blk(blk);
blk_unref(blk);
}
}
@@ -173,28 +187,26 @@
QemuOpts *drive_def(const char *optstr)
{
- return qemu_opts_parse(qemu_find_opts("drive"), optstr, 0);
+ return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
}
QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
const char *optstr)
{
QemuOpts *opts;
- char buf[32];
opts = drive_def(optstr);
if (!opts) {
return NULL;
}
if (type != IF_DEFAULT) {
- qemu_opt_set(opts, "if", if_name[type]);
+ qemu_opt_set(opts, "if", if_name[type], &error_abort);
}
if (index >= 0) {
- snprintf(buf, sizeof(buf), "%d", index);
- qemu_opt_set(opts, "index", buf);
+ qemu_opt_set_number(opts, "index", index, &error_abort);
}
if (file)
- qemu_opt_set(opts, "file", file);
+ qemu_opt_set(opts, "file", file, &error_abort);
return opts;
}
@@ -228,8 +240,8 @@
dinfo->type != IF_NONE) {
fprintf(stderr, "Warning: Orphaned drive without device: "
"id=%s,file=%s,if=%s,bus=%d,unit=%d\n",
- blk_name(blk), blk_bs(blk)->filename, if_name[dinfo->type],
- dinfo->bus, dinfo->unit);
+ blk_name(blk), blk_bs(blk) ? blk_bs(blk)->filename : "",
+ if_name[dinfo->type], dinfo->bus, dinfo->unit);
rs = true;
}
}
@@ -280,32 +292,6 @@
BlockDriverState *bs;
} BDRVPutRefBH;
-static void bdrv_put_ref_bh(void *opaque)
-{
- BDRVPutRefBH *s = opaque;
-
- bdrv_unref(s->bs);
- qemu_bh_delete(s->bh);
- g_free(s);
-}
-
-/*
- * Release a BDS reference in a BH
- *
- * It is not safe to use bdrv_unref() from a callback function when the callers
- * still need the BlockDriverState. In such cases we schedule a BH to release
- * the reference.
- */
-static void bdrv_put_ref_bh_schedule(BlockDriverState *bs)
-{
- BDRVPutRefBH *s;
-
- s = g_new(BDRVPutRefBH, 1);
- s->bh = qemu_bh_new(bdrv_put_ref_bh, s);
- s->bs = bs;
- qemu_bh_schedule(s->bh);
-}
-
static int parse_block_error_action(const char *buf, bool is_read, Error **errp)
{
if (!strcmp(buf, "ignore")) {
@@ -323,44 +309,181 @@
}
}
-static bool check_throttle_config(ThrottleConfig *cfg, Error **errp)
+static bool parse_stats_intervals(BlockAcctStats *stats, QList *intervals,
+ Error **errp)
{
- if (throttle_conflicting(cfg)) {
- error_setg(errp, "bps/iops/max total values and read/write values"
- " cannot be used at the same time");
- return false;
- }
+ const QListEntry *entry;
+ for (entry = qlist_first(intervals); entry; entry = qlist_next(entry)) {
+ switch (qobject_type(entry->value)) {
- if (!throttle_is_valid(cfg)) {
- error_setg(errp, "bps/iops/maxs values must be 0 or greater");
- return false;
- }
+ case QTYPE_QSTRING: {
+ unsigned long long length;
+ const char *str = qstring_get_str(qobject_to_qstring(entry->value));
+ if (parse_uint_full(str, &length, 10) == 0 &&
+ length > 0 && length <= UINT_MAX) {
+ block_acct_add_interval(stats, (unsigned) length);
+ } else {
+ error_setg(errp, "Invalid interval length: %s", str);
+ return false;
+ }
+ break;
+ }
+ case QTYPE_QINT: {
+ int64_t length = qint_get_int(qobject_to_qint(entry->value));
+ if (length > 0 && length <= UINT_MAX) {
+ block_acct_add_interval(stats, (unsigned) length);
+ } else {
+ error_setg(errp, "Invalid interval length: %" PRId64, length);
+ return false;
+ }
+ break;
+ }
+
+ default:
+ error_setg(errp, "The specification of stats-intervals is invalid");
+ return false;
+ }
+ }
return true;
}
typedef enum { MEDIA_DISK, MEDIA_CDROM } DriveMediaType;
+/* All parameters but @opts are optional and may be set to NULL. */
+static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
+ const char **throttling_group, ThrottleConfig *throttle_cfg,
+ BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
+{
+ const char *discard;
+ Error *local_error = NULL;
+ const char *aio;
+
+ if (bdrv_flags) {
+ if (!qemu_opt_get_bool(opts, "read-only", false)) {
+ *bdrv_flags |= BDRV_O_RDWR;
+ }
+ if (qemu_opt_get_bool(opts, "copy-on-read", false)) {
+ *bdrv_flags |= BDRV_O_COPY_ON_READ;
+ }
+
+ if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
+ if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
+ error_setg(errp, "Invalid discard option");
+ return;
+ }
+ }
+
+ if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
+ if (!strcmp(aio, "native")) {
+ *bdrv_flags |= BDRV_O_NATIVE_AIO;
+ } else if (!strcmp(aio, "threads")) {
+ /* this is the default */
+ } else {
+ error_setg(errp, "invalid aio option");
+ return;
+ }
+ }
+ }
+
+ /* disk I/O throttling */
+ if (throttling_group) {
+ *throttling_group = qemu_opt_get(opts, "throttling.group");
+ }
+
+ if (throttle_cfg) {
+ throttle_config_init(throttle_cfg);
+ throttle_cfg->buckets[THROTTLE_BPS_TOTAL].avg =
+ qemu_opt_get_number(opts, "throttling.bps-total", 0);
+ throttle_cfg->buckets[THROTTLE_BPS_READ].avg =
+ qemu_opt_get_number(opts, "throttling.bps-read", 0);
+ throttle_cfg->buckets[THROTTLE_BPS_WRITE].avg =
+ qemu_opt_get_number(opts, "throttling.bps-write", 0);
+ throttle_cfg->buckets[THROTTLE_OPS_TOTAL].avg =
+ qemu_opt_get_number(opts, "throttling.iops-total", 0);
+ throttle_cfg->buckets[THROTTLE_OPS_READ].avg =
+ qemu_opt_get_number(opts, "throttling.iops-read", 0);
+ throttle_cfg->buckets[THROTTLE_OPS_WRITE].avg =
+ qemu_opt_get_number(opts, "throttling.iops-write", 0);
+
+ throttle_cfg->buckets[THROTTLE_BPS_TOTAL].max =
+ qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
+ throttle_cfg->buckets[THROTTLE_BPS_READ].max =
+ qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
+ throttle_cfg->buckets[THROTTLE_BPS_WRITE].max =
+ qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
+ throttle_cfg->buckets[THROTTLE_OPS_TOTAL].max =
+ qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
+ throttle_cfg->buckets[THROTTLE_OPS_READ].max =
+ qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
+ throttle_cfg->buckets[THROTTLE_OPS_WRITE].max =
+ qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
+
+ throttle_cfg->buckets[THROTTLE_BPS_TOTAL].burst_length =
+ qemu_opt_get_number(opts, "throttling.bps-total-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_BPS_READ].burst_length =
+ qemu_opt_get_number(opts, "throttling.bps-read-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_BPS_WRITE].burst_length =
+ qemu_opt_get_number(opts, "throttling.bps-write-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_OPS_TOTAL].burst_length =
+ qemu_opt_get_number(opts, "throttling.iops-total-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_OPS_READ].burst_length =
+ qemu_opt_get_number(opts, "throttling.iops-read-max-length", 1);
+ throttle_cfg->buckets[THROTTLE_OPS_WRITE].burst_length =
+ qemu_opt_get_number(opts, "throttling.iops-write-max-length", 1);
+
+ throttle_cfg->op_size =
+ qemu_opt_get_number(opts, "throttling.iops-size", 0);
+
+ if (!throttle_is_valid(throttle_cfg, errp)) {
+ return;
+ }
+ }
+
+ if (detect_zeroes) {
+ *detect_zeroes =
+ qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
+ qemu_opt_get(opts, "detect-zeroes"),
+ BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
+ BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
+ &local_error);
+ if (local_error) {
+ error_propagate(errp, local_error);
+ return;
+ }
+
+ if (bdrv_flags &&
+ *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
+ !(*bdrv_flags & BDRV_O_UNMAP))
+ {
+ error_setg(errp, "setting detect-zeroes to unmap is not allowed "
+ "without setting discard operation to unmap");
+ return;
+ }
+ }
+}
+
/* Takes the ownership of bs_opts */
static BlockBackend *blockdev_init(const char *file, QDict *bs_opts,
Error **errp)
{
const char *buf;
- int ro = 0;
int bdrv_flags = 0;
int on_read_error, on_write_error;
+ bool account_invalid, account_failed;
+ bool writethrough;
BlockBackend *blk;
BlockDriverState *bs;
ThrottleConfig cfg;
int snapshot = 0;
- bool copy_on_read;
- int ret;
Error *error = NULL;
QemuOpts *opts;
+ QDict *interval_dict = NULL;
+ QList *interval_list = NULL;
const char *id;
- bool has_driver_specific_opts;
- BlockdevDetectZeroesOptions detect_zeroes;
- BlockDriver *drv = NULL;
+ BlockdevDetectZeroesOptions detect_zeroes =
+ BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
+ const char *throttling_group = NULL;
/* Check common options by copying from bs_opts to opts, all other options
* stay in bs_opts for processing by bdrv_open(). */
@@ -381,42 +504,31 @@
qdict_del(bs_opts, "id");
}
- has_driver_specific_opts = !!qdict_size(bs_opts);
-
/* extract parameters */
snapshot = qemu_opt_get_bool(opts, "snapshot", 0);
- ro = qemu_opt_get_bool(opts, "read-only", 0);
- copy_on_read = qemu_opt_get_bool(opts, "copy-on-read", false);
- if ((buf = qemu_opt_get(opts, "discard")) != NULL) {
- if (bdrv_parse_discard_flags(buf, &bdrv_flags) != 0) {
- error_setg(errp, "invalid discard option");
- goto early_err;
- }
+ account_invalid = qemu_opt_get_bool(opts, "stats-account-invalid", true);
+ account_failed = qemu_opt_get_bool(opts, "stats-account-failed", true);
+
+ writethrough = !qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, true);
+
+ id = qemu_opts_id(opts);
+
+ qdict_extract_subqdict(bs_opts, &interval_dict, "stats-intervals.");
+ qdict_array_split(interval_dict, &interval_list);
+
+ if (qdict_size(interval_dict) != 0) {
+ error_setg(errp, "Invalid option stats-intervals.%s",
+ qdict_first(interval_dict)->key);
+ goto early_err;
}
- if (qemu_opt_get_bool(opts, "cache.writeback", true)) {
- bdrv_flags |= BDRV_O_CACHE_WB;
+ extract_common_blockdev_options(opts, &bdrv_flags, &throttling_group, &cfg,
+ &detect_zeroes, &error);
+ if (error) {
+ error_propagate(errp, error);
+ goto early_err;
}
- if (qemu_opt_get_bool(opts, "cache.direct", false)) {
- bdrv_flags |= BDRV_O_NOCACHE;
- }
- if (qemu_opt_get_bool(opts, "cache.no-flush", false)) {
- bdrv_flags |= BDRV_O_NO_FLUSH;
- }
-
-#ifdef CONFIG_LINUX_AIO
- if ((buf = qemu_opt_get(opts, "aio")) != NULL) {
- if (!strcmp(buf, "native")) {
- bdrv_flags |= BDRV_O_NATIVE_AIO;
- } else if (!strcmp(buf, "threads")) {
- /* this is the default */
- } else {
- error_setg(errp, "invalid aio option");
- goto early_err;
- }
- }
-#endif
if ((buf = qemu_opt_get(opts, "format")) != NULL) {
if (is_help_option(buf)) {
@@ -426,46 +538,11 @@
goto early_err;
}
- drv = bdrv_find_format(buf);
- if (!drv) {
- error_setg(errp, "'%s' invalid format", buf);
+ if (qdict_haskey(bs_opts, "driver")) {
+ error_setg(errp, "Cannot specify both 'driver' and 'format'");
goto early_err;
}
- }
-
- /* disk I/O throttling */
- memset(&cfg, 0, sizeof(cfg));
- cfg.buckets[THROTTLE_BPS_TOTAL].avg =
- qemu_opt_get_number(opts, "throttling.bps-total", 0);
- cfg.buckets[THROTTLE_BPS_READ].avg =
- qemu_opt_get_number(opts, "throttling.bps-read", 0);
- cfg.buckets[THROTTLE_BPS_WRITE].avg =
- qemu_opt_get_number(opts, "throttling.bps-write", 0);
- cfg.buckets[THROTTLE_OPS_TOTAL].avg =
- qemu_opt_get_number(opts, "throttling.iops-total", 0);
- cfg.buckets[THROTTLE_OPS_READ].avg =
- qemu_opt_get_number(opts, "throttling.iops-read", 0);
- cfg.buckets[THROTTLE_OPS_WRITE].avg =
- qemu_opt_get_number(opts, "throttling.iops-write", 0);
-
- cfg.buckets[THROTTLE_BPS_TOTAL].max =
- qemu_opt_get_number(opts, "throttling.bps-total-max", 0);
- cfg.buckets[THROTTLE_BPS_READ].max =
- qemu_opt_get_number(opts, "throttling.bps-read-max", 0);
- cfg.buckets[THROTTLE_BPS_WRITE].max =
- qemu_opt_get_number(opts, "throttling.bps-write-max", 0);
- cfg.buckets[THROTTLE_OPS_TOTAL].max =
- qemu_opt_get_number(opts, "throttling.iops-total-max", 0);
- cfg.buckets[THROTTLE_OPS_READ].max =
- qemu_opt_get_number(opts, "throttling.iops-read-max", 0);
- cfg.buckets[THROTTLE_OPS_WRITE].max =
- qemu_opt_get_number(opts, "throttling.iops-write-max", 0);
-
- cfg.op_size = qemu_opt_get_number(opts, "throttling.iops-size", 0);
-
- if (!check_throttle_config(&cfg, &error)) {
- error_propagate(errp, error);
- goto early_err;
+ qdict_put(bs_opts, "driver", qstring_from_str(buf));
}
on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
@@ -486,96 +563,167 @@
}
}
- detect_zeroes =
- qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
- qemu_opt_get(opts, "detect-zeroes"),
- BLOCKDEV_DETECT_ZEROES_OPTIONS_MAX,
- BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
- &error);
- if (error) {
- error_propagate(errp, error);
- goto early_err;
- }
-
- if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
- !(bdrv_flags & BDRV_O_UNMAP)) {
- error_setg(errp, "setting detect-zeroes to unmap is not allowed "
- "without setting discard operation to unmap");
- goto early_err;
+ if (snapshot) {
+ bdrv_flags |= BDRV_O_SNAPSHOT;
}
/* init */
- blk = blk_new_with_bs(qemu_opts_id(opts), errp);
- if (!blk) {
- goto early_err;
- }
- bs = blk_bs(blk);
- bs->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
- bs->read_only = ro;
- bs->detect_zeroes = detect_zeroes;
+ if ((!file || !*file) && !qdict_size(bs_opts)) {
+ BlockBackendRootState *blk_rs;
- bdrv_set_on_error(bs, on_read_error, on_write_error);
+ blk = blk_new();
+ blk_rs = blk_get_root_state(blk);
+ blk_rs->open_flags = bdrv_flags;
+ blk_rs->read_only = !(bdrv_flags & BDRV_O_RDWR);
+ blk_rs->detect_zeroes = detect_zeroes;
+
+ QDECREF(bs_opts);
+ } else {
+ if (file && !*file) {
+ file = NULL;
+ }
+
+ /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
+ * with other callers) rather than what we want as the real defaults.
+ * Apply the defaults here instead. */
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+ assert((bdrv_flags & BDRV_O_CACHE_MASK) == 0);
+
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ bdrv_flags |= BDRV_O_INACTIVE;
+ }
+
+ blk = blk_new_open(file, NULL, bs_opts, bdrv_flags, errp);
+ if (!blk) {
+ goto err_no_bs_opts;
+ }
+ bs = blk_bs(blk);
+
+ bs->detect_zeroes = detect_zeroes;
+
+ if (bdrv_key_required(bs)) {
+ autostart = 0;
+ }
+
+ block_acct_init(blk_get_stats(blk), account_invalid, account_failed);
+
+ if (!parse_stats_intervals(blk_get_stats(blk), interval_list, errp)) {
+ blk_unref(blk);
+ blk = NULL;
+ goto err_no_bs_opts;
+ }
+ }
/* disk I/O throttling */
if (throttle_enabled(&cfg)) {
- bdrv_io_limits_enable(bs);
- bdrv_set_io_limits(bs, &cfg);
- }
-
- if (!file || !*file) {
- if (has_driver_specific_opts) {
- file = NULL;
- } else {
- QDECREF(bs_opts);
- qemu_opts_del(opts);
- return blk;
+ if (!throttling_group) {
+ throttling_group = id;
}
- }
- if (snapshot) {
- /* always use cache=unsafe with snapshot */
- bdrv_flags &= ~BDRV_O_CACHE_MASK;
- bdrv_flags |= (BDRV_O_SNAPSHOT|BDRV_O_CACHE_WB|BDRV_O_NO_FLUSH);
+ blk_io_limits_enable(blk, throttling_group);
+ blk_set_io_limits(blk, &cfg);
}
- if (copy_on_read) {
- bdrv_flags |= BDRV_O_COPY_ON_READ;
+ blk_set_enable_write_cache(blk, !writethrough);
+ blk_set_on_error(blk, on_read_error, on_write_error);
+
+ if (!monitor_add_blk(blk, id, errp)) {
+ blk_unref(blk);
+ blk = NULL;
+ goto err_no_bs_opts;
}
- if (runstate_check(RUN_STATE_INMIGRATE)) {
- bdrv_flags |= BDRV_O_INCOMING;
- }
-
- bdrv_flags |= ro ? 0 : BDRV_O_RDWR;
-
- QINCREF(bs_opts);
- ret = bdrv_open(&bs, file, NULL, bs_opts, bdrv_flags, drv, &error);
- assert(bs == blk_bs(blk));
-
- if (ret < 0) {
- error_setg(errp, "could not open disk image %s: %s",
- file ?: blk_name(blk), error_get_pretty(error));
- error_free(error);
- goto err;
- }
-
- if (bdrv_key_required(bs)) {
- autostart = 0;
- }
-
- QDECREF(bs_opts);
+err_no_bs_opts:
qemu_opts_del(opts);
-
+ QDECREF(interval_dict);
+ QDECREF(interval_list);
return blk;
-err:
- blk_unref(blk);
early_err:
qemu_opts_del(opts);
+ QDECREF(interval_dict);
+ QDECREF(interval_list);
err_no_opts:
QDECREF(bs_opts);
return NULL;
}
+static QemuOptsList qemu_root_bds_opts;
+
+/* Takes the ownership of bs_opts */
+static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
+{
+ BlockDriverState *bs;
+ QemuOpts *opts;
+ Error *local_error = NULL;
+ BlockdevDetectZeroesOptions detect_zeroes;
+ int bdrv_flags = 0;
+
+ opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
+ if (!opts) {
+ goto fail;
+ }
+
+ qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
+ if (local_error) {
+ error_propagate(errp, local_error);
+ goto fail;
+ }
+
+ extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
+ &detect_zeroes, &local_error);
+ if (local_error) {
+ error_propagate(errp, local_error);
+ goto fail;
+ }
+
+ /* bdrv_open() defaults to the values in bdrv_flags (for compatibility
+ * with other callers) rather than what we want as the real defaults.
+ * Apply the defaults here instead. */
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_DIRECT, "off");
+ qdict_set_default_str(bs_opts, BDRV_OPT_CACHE_NO_FLUSH, "off");
+
+ if (runstate_check(RUN_STATE_INMIGRATE)) {
+ bdrv_flags |= BDRV_O_INACTIVE;
+ }
+
+ bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
+ if (!bs) {
+ goto fail_no_bs_opts;
+ }
+
+ bs->detect_zeroes = detect_zeroes;
+
+fail_no_bs_opts:
+ qemu_opts_del(opts);
+ return bs;
+
+fail:
+ qemu_opts_del(opts);
+ QDECREF(bs_opts);
+ return NULL;
+}
+
+void blockdev_close_all_bdrv_states(void)
+{
+ BlockDriverState *bs, *next_bs;
+
+ QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
+ AioContext *ctx = bdrv_get_aio_context(bs);
+
+ aio_context_acquire(ctx);
+ bdrv_unref(bs);
+ aio_context_release(ctx);
+ }
+}
+
+/* Iterates over the list of monitor-owned BlockDriverStates */
+BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
+{
+ return bs ? QTAILQ_NEXT(bs, monitor_list)
+ : QTAILQ_FIRST(&monitor_bdrv_states);
+}
+
static void qemu_opt_rename(QemuOpts *opts, const char *from, const char *to,
Error **errp)
{
@@ -592,7 +740,7 @@
/* rename all items in opts */
while ((value = qemu_opt_get(opts, from))) {
- qemu_opt_set(opts, to, value);
+ qemu_opt_set(opts, to, value, &error_abort);
qemu_opt_unset(opts, from);
}
}
@@ -721,6 +869,8 @@
{ "iops_size", "throttling.iops-size" },
+ { "group", "throttling.group" },
+
{ "readonly", "read-only" },
};
@@ -728,8 +878,7 @@
qemu_opt_rename(all_opts, opt_renames[i].from, opt_renames[i].to,
&local_err);
if (local_err) {
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
+ error_report_err(local_err);
return NULL;
}
}
@@ -737,24 +886,25 @@
value = qemu_opt_get(all_opts, "cache");
if (value) {
int flags = 0;
+ bool writethrough;
- if (bdrv_parse_cache_flags(value, &flags) != 0) {
+ if (bdrv_parse_cache_mode(value, &flags, &writethrough) != 0) {
error_report("invalid cache option");
return NULL;
}
/* Specific options take precedence */
- if (!qemu_opt_get(all_opts, "cache.writeback")) {
- qemu_opt_set_bool(all_opts, "cache.writeback",
- !!(flags & BDRV_O_CACHE_WB));
+ if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_WB)) {
+ qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_WB,
+ !writethrough, &error_abort);
}
- if (!qemu_opt_get(all_opts, "cache.direct")) {
- qemu_opt_set_bool(all_opts, "cache.direct",
- !!(flags & BDRV_O_NOCACHE));
+ if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_DIRECT)) {
+ qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_DIRECT,
+ !!(flags & BDRV_O_NOCACHE), &error_abort);
}
- if (!qemu_opt_get(all_opts, "cache.no-flush")) {
- qemu_opt_set_bool(all_opts, "cache.no-flush",
- !!(flags & BDRV_O_NO_FLUSH));
+ if (!qemu_opt_get(all_opts, BDRV_OPT_CACHE_NO_FLUSH)) {
+ qemu_opt_set_bool(all_opts, BDRV_OPT_CACHE_NO_FLUSH,
+ !!(flags & BDRV_O_NO_FLUSH), &error_abort);
}
qemu_opt_unset(all_opts, "cache");
}
@@ -767,8 +917,7 @@
&error_abort);
qemu_opts_absorb_qdict(legacy_opts, bs_opts, &local_err);
if (local_err) {
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
+ error_report_err(local_err);
goto fail;
}
@@ -945,13 +1094,14 @@
devopts = qemu_opts_create(qemu_find_opts("device"), NULL, 0,
&error_abort);
if (arch_type == QEMU_ARCH_S390X) {
- qemu_opt_set(devopts, "driver", "virtio-blk-s390");
+ qemu_opt_set(devopts, "driver", "virtio-blk-ccw", &error_abort);
} else {
- qemu_opt_set(devopts, "driver", "virtio-blk-pci");
+ qemu_opt_set(devopts, "driver", "virtio-blk-pci", &error_abort);
}
- qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"));
+ qemu_opt_set(devopts, "drive", qdict_get_str(bs_opts, "id"),
+ &error_abort);
if (devaddr) {
- qemu_opt_set(devopts, "addr", devaddr);
+ qemu_opt_set(devopts, "addr", devaddr, &error_abort);
}
}
@@ -983,8 +1133,7 @@
bs_opts = NULL;
if (!blk) {
if (local_err) {
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
+ error_report_err(local_err);
}
goto fail;
} else {
@@ -1025,21 +1174,35 @@
return dinfo;
}
-void do_commit(Monitor *mon, const QDict *qdict)
+void hmp_commit(Monitor *mon, const QDict *qdict)
{
const char *device = qdict_get_str(qdict, "device");
- BlockDriverState *bs;
+ BlockBackend *blk;
int ret;
if (!strcmp(device, "all")) {
- ret = bdrv_commit_all();
+ ret = blk_commit_all();
} else {
- bs = bdrv_find(device);
- if (!bs) {
+ BlockDriverState *bs;
+ AioContext *aio_context;
+
+ blk = blk_by_name(device);
+ if (!blk) {
monitor_printf(mon, "Device '%s' not found\n", device);
return;
}
+ if (!blk_is_available(blk)) {
+ monitor_printf(mon, "Device '%s' has no medium\n", device);
+ return;
+ }
+
+ bs = blk_bs(blk);
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
ret = bdrv_commit(bs);
+
+ aio_context_release(aio_context);
}
if (ret < 0) {
monitor_printf(mon, "'commit' error for '%s': %s\n", device,
@@ -1047,16 +1210,13 @@
}
}
-static void blockdev_do_action(int kind, void *data, Error **errp)
+static void blockdev_do_action(TransactionAction *action, Error **errp)
{
- TransactionAction action;
TransactionActionList list;
- action.kind = kind;
- action.data = data;
- list.value = &action;
+ list.value = action;
list.next = NULL;
- qmp_transaction(&list, errp);
+ qmp_transaction(&list, false, NULL, errp);
}
void qmp_blockdev_snapshot_sync(bool has_device, const char *device,
@@ -1067,7 +1227,7 @@
bool has_format, const char *format,
bool has_mode, NewImageMode mode, Error **errp)
{
- BlockdevSnapshot snapshot = {
+ BlockdevSnapshotSync snapshot = {
.has_device = has_device,
.device = (char *) device,
.has_node_name = has_node_name,
@@ -1080,8 +1240,25 @@
.has_mode = has_mode,
.mode = mode,
};
- blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
- &snapshot, errp);
+ TransactionAction action = {
+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC,
+ .u.blockdev_snapshot_sync.data = &snapshot,
+ };
+ blockdev_do_action(&action, errp);
+}
+
+void qmp_blockdev_snapshot(const char *node, const char *overlay,
+ Error **errp)
+{
+ BlockdevSnapshot snapshot_data = {
+ .node = (char *) node,
+ .overlay = (char *) overlay
+ };
+ TransactionAction action = {
+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT,
+ .u.blockdev_snapshot.data = &snapshot_data,
+ };
+ blockdev_do_action(&action, errp);
}
void qmp_blockdev_snapshot_internal_sync(const char *device,
@@ -1092,9 +1269,11 @@
.device = (char *) device,
.name = (char *) name
};
-
- blockdev_do_action(TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
- &snapshot, errp);
+ TransactionAction action = {
+ .type = TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC,
+ .u.blockdev_snapshot_internal_sync.data = &snapshot,
+ };
+ blockdev_do_action(&action, errp);
}
SnapshotInfo *qmp_blockdev_snapshot_delete_internal_sync(const char *device,
@@ -1104,17 +1283,24 @@
const char *name,
Error **errp)
{
- BlockDriverState *bs = bdrv_find(device);
+ BlockDriverState *bs;
+ BlockBackend *blk;
+ AioContext *aio_context;
QEMUSnapshotInfo sn;
Error *local_err = NULL;
SnapshotInfo *info = NULL;
int ret;
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return NULL;
}
+ aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(aio_context);
+
if (!has_id) {
id = NULL;
}
@@ -1125,28 +1311,40 @@
if (!id && !name) {
error_setg(errp, "Name or id must be provided");
- return NULL;
+ goto out_aio_context;
+ }
+
+ if (!blk_is_available(blk)) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ goto out_aio_context;
+ }
+ bs = blk_bs(blk);
+
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, errp)) {
+ goto out_aio_context;
}
ret = bdrv_snapshot_find_by_id_and_name(bs, id, name, &sn, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return NULL;
+ goto out_aio_context;
}
if (!ret) {
error_setg(errp,
"Snapshot with id '%s' and name '%s' does not exist on "
"device '%s'",
STR_OR_NULL(id), STR_OR_NULL(name), device);
- return NULL;
+ goto out_aio_context;
}
bdrv_snapshot_delete(bs, id, name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return NULL;
+ goto out_aio_context;
}
+ aio_context_release(aio_context);
+
info = g_new0(SnapshotInfo, 1);
info->id = g_strdup(sn.id_str);
info->name = g_strdup(sn.name);
@@ -1157,51 +1355,151 @@
info->vm_clock_sec = sn.vm_clock_nsec / 1000000000;
return info;
+
+out_aio_context:
+ aio_context_release(aio_context);
+ return NULL;
}
-/* New and old BlockDriverState structs for group snapshots */
-
-typedef struct BlkTransactionState BlkTransactionState;
-
-/* Only prepare() may fail. In a single transaction, only one of commit() or
- abort() will be called, clean() will always be called if it present. */
-typedef struct BdrvActionOps {
- /* Size of state struct, in bytes. */
- size_t instance_size;
- /* Prepare the work, must NOT be NULL. */
- void (*prepare)(BlkTransactionState *common, Error **errp);
- /* Commit the changes, can be NULL. */
- void (*commit)(BlkTransactionState *common);
- /* Abort the changes on fail, can be NULL. */
- void (*abort)(BlkTransactionState *common);
- /* Clean up resource in the end, can be NULL. */
- void (*clean)(BlkTransactionState *common);
-} BdrvActionOps;
-
-/*
- * This structure must be arranged as first member in child type, assuming
- * that compiler will also arrange it to the same address with parent instance.
- * Later it will be used in free().
+/**
+ * block_dirty_bitmap_lookup:
+ * Return a dirty bitmap (if present), after validating
+ * the node reference and bitmap names.
+ *
+ * @node: The name of the BDS node to search for bitmaps
+ * @name: The name of the bitmap to search for
+ * @pbs: Output pointer for BDS lookup, if desired. Can be NULL.
+ * @paio: Output pointer for aio_context acquisition, if desired. Can be NULL.
+ * @errp: Output pointer for error information. Can be NULL.
+ *
+ * @return: A bitmap object on success, or NULL on failure.
*/
-struct BlkTransactionState {
+static BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
+ const char *name,
+ BlockDriverState **pbs,
+ AioContext **paio,
+ Error **errp)
+{
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+ AioContext *aio_context;
+
+ if (!node) {
+ error_setg(errp, "Node cannot be NULL");
+ return NULL;
+ }
+ if (!name) {
+ error_setg(errp, "Bitmap name cannot be NULL");
+ return NULL;
+ }
+ bs = bdrv_lookup_bs(node, node, NULL);
+ if (!bs) {
+ error_setg(errp, "Node '%s' not found", node);
+ return NULL;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bitmap = bdrv_find_dirty_bitmap(bs, name);
+ if (!bitmap) {
+ error_setg(errp, "Dirty bitmap '%s' not found", name);
+ goto fail;
+ }
+
+ if (pbs) {
+ *pbs = bs;
+ }
+ if (paio) {
+ *paio = aio_context;
+ } else {
+ aio_context_release(aio_context);
+ }
+
+ return bitmap;
+
+ fail:
+ aio_context_release(aio_context);
+ return NULL;
+}
+
+/* New and old BlockDriverState structs for atomic group operations */
+
+typedef struct BlkActionState BlkActionState;
+
+/**
+ * BlkActionOps:
+ * Table of operations that define an Action.
+ *
+ * @instance_size: Size of state struct, in bytes.
+ * @prepare: Prepare the work, must NOT be NULL.
+ * @commit: Commit the changes, can be NULL.
+ * @abort: Abort the changes on fail, can be NULL.
+ * @clean: Clean up resources after all transaction actions have called
+ * commit() or abort(). Can be NULL.
+ *
+ * Only prepare() may fail. In a single transaction, only one of commit() or
+ * abort() will be called. clean() will always be called if it is present.
+ */
+typedef struct BlkActionOps {
+ size_t instance_size;
+ void (*prepare)(BlkActionState *common, Error **errp);
+ void (*commit)(BlkActionState *common);
+ void (*abort)(BlkActionState *common);
+ void (*clean)(BlkActionState *common);
+} BlkActionOps;
+
+/**
+ * BlkActionState:
+ * Describes one Action's state within a Transaction.
+ *
+ * @action: QAPI-defined enum identifying which Action to perform.
+ * @ops: Table of ActionOps this Action can perform.
+ * @block_job_txn: Transaction which this action belongs to.
+ * @entry: List membership for all Actions in this Transaction.
+ *
+ * This structure must be arranged as first member in a subclassed type,
+ * assuming that the compiler will also arrange it to the same offsets as the
+ * base class.
+ */
+struct BlkActionState {
TransactionAction *action;
- const BdrvActionOps *ops;
- QSIMPLEQ_ENTRY(BlkTransactionState) entry;
+ const BlkActionOps *ops;
+ BlockJobTxn *block_job_txn;
+ TransactionProperties *txn_props;
+ QSIMPLEQ_ENTRY(BlkActionState) entry;
};
/* internal snapshot private data */
typedef struct InternalSnapshotState {
- BlkTransactionState common;
+ BlkActionState common;
BlockDriverState *bs;
+ AioContext *aio_context;
QEMUSnapshotInfo sn;
+ bool created;
} InternalSnapshotState;
-static void internal_snapshot_prepare(BlkTransactionState *common,
+
+static int action_check_completion_mode(BlkActionState *s, Error **errp)
+{
+ if (s->txn_props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ error_setg(errp,
+ "Action '%s' does not support Transaction property "
+ "completion-mode = %s",
+ TransactionActionKind_lookup[s->action->type],
+ ActionCompletionMode_lookup[s->txn_props->completion_mode]);
+ return -1;
+ }
+ return 0;
+}
+
+static void internal_snapshot_prepare(BlkActionState *common,
Error **errp)
{
Error *local_err = NULL;
const char *device;
const char *name;
+ BlockBackend *blk;
BlockDriverState *bs;
QEMUSnapshotInfo old_sn, *sn;
bool ret;
@@ -1210,9 +1508,9 @@
InternalSnapshotState *state;
int ret1;
- g_assert(common->action->kind ==
+ g_assert(common->action->type ==
TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_INTERNAL_SYNC);
- internal = common->action->blockdev_snapshot_internal_sync;
+ internal = common->action->u.blockdev_snapshot_internal_sync.data;
state = DO_UPCAST(InternalSnapshotState, common, common);
/* 1. parse input */
@@ -1220,25 +1518,43 @@
name = internal->name;
/* 2. check for validation */
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ if (action_check_completion_mode(common, errp) < 0) {
return;
}
- if (!bdrv_is_inserted(bs)) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ return;
+ }
+
+ /* AioContext is released in .clean() */
+ state->aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(state->aio_context);
+
+ if (!blk_is_available(blk)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ return;
+ }
+ bs = blk_bs(blk);
+
+ state->bs = bs;
+ bdrv_drained_begin(bs);
+
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
return;
}
if (bdrv_is_read_only(bs)) {
- error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
+ error_setg(errp, "Device '%s' is read only", device);
return;
}
if (!bdrv_can_snapshot(bs)) {
- error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
- bs->drv->format_name, device, "internal snapshot");
+ error_setg(errp, "Block format '%s' used by device '%s' "
+ "does not support internal snapshots",
+ bs->drv->format_name, device);
return;
}
@@ -1277,10 +1593,10 @@
}
/* 4. succeed, mark a snapshot is created */
- state->bs = bs;
+ state->created = true;
}
-static void internal_snapshot_abort(BlkTransactionState *common)
+static void internal_snapshot_abort(BlkActionState *common)
{
InternalSnapshotState *state =
DO_UPCAST(InternalSnapshotState, common, common);
@@ -1288,94 +1604,99 @@
QEMUSnapshotInfo *sn = &state->sn;
Error *local_error = NULL;
- if (!bs) {
+ if (!state->created) {
return;
}
if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
- error_report("Failed to delete snapshot with id '%s' and name '%s' on "
- "device '%s' in abort: %s",
- sn->id_str,
- sn->name,
- bdrv_get_device_name(bs),
- error_get_pretty(local_error));
- error_free(local_error);
+ error_reportf_err(local_error,
+ "Failed to delete snapshot with id '%s' and "
+ "name '%s' on device '%s' in abort: ",
+ sn->id_str, sn->name,
+ bdrv_get_device_name(bs));
+ }
+}
+
+static void internal_snapshot_clean(BlkActionState *common)
+{
+ InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
+ common, common);
+
+ if (state->aio_context) {
+ if (state->bs) {
+ bdrv_drained_end(state->bs);
+ }
+ aio_context_release(state->aio_context);
}
}
/* external snapshot private data */
typedef struct ExternalSnapshotState {
- BlkTransactionState common;
+ BlkActionState common;
BlockDriverState *old_bs;
BlockDriverState *new_bs;
+ AioContext *aio_context;
} ExternalSnapshotState;
-static void external_snapshot_prepare(BlkTransactionState *common,
+static void external_snapshot_prepare(BlkActionState *common,
Error **errp)
{
- BlockDriver *drv;
- int flags, ret;
+ int flags = 0;
QDict *options = NULL;
Error *local_err = NULL;
- bool has_device = false;
+ /* Device and node name of the image to generate the snapshot from */
const char *device;
- bool has_node_name = false;
const char *node_name;
- bool has_snapshot_node_name = false;
- const char *snapshot_node_name;
+ /* Reference to the new image (for 'blockdev-snapshot') */
+ const char *snapshot_ref;
+ /* File name of the new image (for 'blockdev-snapshot-sync') */
const char *new_image_file;
- const char *format = "qcow2";
- enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
TransactionAction *action = common->action;
- /* get parameters */
- g_assert(action->kind == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC);
-
- has_device = action->blockdev_snapshot_sync->has_device;
- device = action->blockdev_snapshot_sync->device;
- has_node_name = action->blockdev_snapshot_sync->has_node_name;
- node_name = action->blockdev_snapshot_sync->node_name;
- has_snapshot_node_name =
- action->blockdev_snapshot_sync->has_snapshot_node_name;
- snapshot_node_name = action->blockdev_snapshot_sync->snapshot_node_name;
-
- new_image_file = action->blockdev_snapshot_sync->snapshot_file;
- if (action->blockdev_snapshot_sync->has_format) {
- format = action->blockdev_snapshot_sync->format;
- }
- if (action->blockdev_snapshot_sync->has_mode) {
- mode = action->blockdev_snapshot_sync->mode;
+ /* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
+ * purpose but a different set of parameters */
+ switch (action->type) {
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT:
+ {
+ BlockdevSnapshot *s = action->u.blockdev_snapshot.data;
+ device = s->node;
+ node_name = s->node;
+ new_image_file = NULL;
+ snapshot_ref = s->overlay;
+ }
+ break;
+ case TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC:
+ {
+ BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
+ device = s->has_device ? s->device : NULL;
+ node_name = s->has_node_name ? s->node_name : NULL;
+ new_image_file = s->snapshot_file;
+ snapshot_ref = NULL;
+ }
+ break;
+ default:
+ g_assert_not_reached();
}
/* start processing */
- drv = bdrv_find_format(format);
- if (!drv) {
- error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
+ if (action_check_completion_mode(common, errp) < 0) {
return;
}
- state->old_bs = bdrv_lookup_bs(has_device ? device : NULL,
- has_node_name ? node_name : NULL,
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
+ state->old_bs = bdrv_lookup_bs(device, node_name, errp);
+ if (!state->old_bs) {
return;
}
- if (has_node_name && !has_snapshot_node_name) {
- error_setg(errp, "New snapshot node name missing");
- return;
- }
-
- if (has_snapshot_node_name && bdrv_find_node(snapshot_node_name)) {
- error_setg(errp, "New snapshot node name already existing");
- return;
- }
+ /* Acquire AioContext now so any threads operating on old_bs stop */
+ state->aio_context = bdrv_get_aio_context(state->old_bs);
+ aio_context_acquire(state->aio_context);
+ bdrv_drained_begin(state->old_bs);
if (!bdrv_is_inserted(state->old_bs)) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
return;
}
@@ -1386,62 +1707,112 @@
if (!bdrv_is_read_only(state->old_bs)) {
if (bdrv_flush(state->old_bs)) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
return;
}
}
if (!bdrv_is_first_non_filter(state->old_bs)) {
- error_set(errp, QERR_FEATURE_DISABLED, "snapshot");
+ error_setg(errp, QERR_FEATURE_DISABLED, "snapshot");
return;
}
- flags = state->old_bs->open_flags;
+ if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
+ BlockdevSnapshotSync *s = action->u.blockdev_snapshot_sync.data;
+ const char *format = s->has_format ? s->format : "qcow2";
+ enum NewImageMode mode;
+ const char *snapshot_node_name =
+ s->has_snapshot_node_name ? s->snapshot_node_name : NULL;
- /* create new image w/backing file */
- if (mode != NEW_IMAGE_MODE_EXISTING) {
- bdrv_img_create(new_image_file, format,
- state->old_bs->filename,
- state->old_bs->drv->format_name,
- NULL, -1, flags, &local_err, false);
- if (local_err) {
- error_propagate(errp, local_err);
+ if (node_name && !snapshot_node_name) {
+ error_setg(errp, "New snapshot node name missing");
return;
}
- }
- if (has_snapshot_node_name) {
+ if (snapshot_node_name &&
+ bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
+ error_setg(errp, "New snapshot node name already in use");
+ return;
+ }
+
+ flags = state->old_bs->open_flags;
+ flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ);
+
+ /* create new image w/backing file */
+ mode = s->has_mode ? s->mode : NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+ if (mode != NEW_IMAGE_MODE_EXISTING) {
+ int64_t size = bdrv_getlength(state->old_bs);
+ if (size < 0) {
+ error_setg_errno(errp, -size, "bdrv_getlength failed");
+ return;
+ }
+ bdrv_img_create(new_image_file, format,
+ state->old_bs->filename,
+ state->old_bs->drv->format_name,
+ NULL, size, flags, &local_err, false);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+
options = qdict_new();
- qdict_put(options, "node-name",
- qstring_from_str(snapshot_node_name));
+ if (s->has_snapshot_node_name) {
+ qdict_put(options, "node-name",
+ qstring_from_str(snapshot_node_name));
+ }
+ qdict_put(options, "driver", qstring_from_str(format));
+
+ flags |= BDRV_O_NO_BACKING;
}
- /* TODO Inherit bs->options or only take explicit options with an
- * extended QMP command? */
- assert(state->new_bs == NULL);
- ret = bdrv_open(&state->new_bs, new_image_file, NULL, options,
- flags | BDRV_O_NO_BACKING, drv, &local_err);
+ state->new_bs = bdrv_open(new_image_file, snapshot_ref, options, flags,
+ errp);
/* We will manually add the backing_hd field to the bs later */
- if (ret != 0) {
- error_propagate(errp, local_err);
+ if (!state->new_bs) {
+ return;
+ }
+
+ if (bdrv_has_blk(state->new_bs)) {
+ error_setg(errp, "The snapshot is already in use by %s",
+ bdrv_get_parent_name(state->new_bs));
+ return;
+ }
+
+ if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
+ errp)) {
+ return;
+ }
+
+ if (state->new_bs->backing != NULL) {
+ error_setg(errp, "The snapshot already has a backing image");
+ return;
+ }
+
+ if (!state->new_bs->drv->supports_backing) {
+ error_setg(errp, "The snapshot does not support backing images");
}
}
-static void external_snapshot_commit(BlkTransactionState *common)
+static void external_snapshot_commit(BlkActionState *common)
{
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
+ bdrv_set_aio_context(state->new_bs, state->aio_context);
+
/* This removes our old bs and adds the new bs */
bdrv_append(state->new_bs, state->old_bs);
/* We don't need (or want) to use the transactional
* bdrv_reopen_multiple() across all the entries at once, because we
* don't want to abort all of them if one of them fails the reopen */
- bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR,
- NULL);
+ if (!state->old_bs->copy_on_read) {
+ bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
+ NULL);
+ }
}
-static void external_snapshot_abort(BlkTransactionState *common)
+static void external_snapshot_abort(BlkActionState *common)
{
ExternalSnapshotState *state =
DO_UPCAST(ExternalSnapshotState, common, common);
@@ -1450,41 +1821,82 @@
}
}
+static void external_snapshot_clean(BlkActionState *common)
+{
+ ExternalSnapshotState *state =
+ DO_UPCAST(ExternalSnapshotState, common, common);
+ if (state->aio_context) {
+ bdrv_drained_end(state->old_bs);
+ aio_context_release(state->aio_context);
+ }
+}
+
typedef struct DriveBackupState {
- BlkTransactionState common;
+ BlkActionState common;
BlockDriverState *bs;
+ AioContext *aio_context;
BlockJob *job;
} DriveBackupState;
-static void drive_backup_prepare(BlkTransactionState *common, Error **errp)
+static void do_drive_backup(const char *job_id, const char *device,
+ const char *target, bool has_format,
+ const char *format, enum MirrorSyncMode sync,
+ bool has_mode, enum NewImageMode mode,
+ bool has_speed, int64_t speed,
+ bool has_bitmap, const char *bitmap,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ BlockJobTxn *txn, Error **errp);
+
+static void drive_backup_prepare(BlkActionState *common, Error **errp)
{
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+ BlockBackend *blk;
DriveBackup *backup;
Error *local_err = NULL;
- assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
- backup = common->action->drive_backup;
+ assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
+ backup = common->action->u.drive_backup.data;
- qmp_drive_backup(backup->device, backup->target,
- backup->has_format, backup->format,
- backup->sync,
- backup->has_mode, backup->mode,
- backup->has_speed, backup->speed,
- backup->has_on_source_error, backup->on_source_error,
- backup->has_on_target_error, backup->on_target_error,
- &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- state->bs = NULL;
- state->job = NULL;
+ blk = blk_by_name(backup->device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", backup->device);
return;
}
- state->bs = bdrv_find(backup->device);
+ if (!blk_is_available(blk)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+ return;
+ }
+
+ /* AioContext is released in .clean() */
+ state->aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(state->aio_context);
+ bdrv_drained_begin(blk_bs(blk));
+ state->bs = blk_bs(blk);
+
+ do_drive_backup(backup->has_job_id ? backup->job_id : NULL,
+ backup->device, backup->target,
+ backup->has_format, backup->format,
+ backup->sync,
+ backup->has_mode, backup->mode,
+ backup->has_speed, backup->speed,
+ backup->has_bitmap, backup->bitmap,
+ backup->has_on_source_error, backup->on_source_error,
+ backup->has_on_target_error, backup->on_target_error,
+ common->block_job_txn, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
state->job = state->bs->job;
}
-static void drive_backup_abort(BlkTransactionState *common)
+static void drive_backup_abort(BlkActionState *common)
{
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
BlockDriverState *bs = state->bs;
@@ -1495,30 +1907,252 @@
}
}
-static void abort_prepare(BlkTransactionState *common, Error **errp)
+static void drive_backup_clean(BlkActionState *common)
+{
+ DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
+
+ if (state->aio_context) {
+ bdrv_drained_end(state->bs);
+ aio_context_release(state->aio_context);
+ }
+}
+
+typedef struct BlockdevBackupState {
+ BlkActionState common;
+ BlockDriverState *bs;
+ BlockJob *job;
+ AioContext *aio_context;
+} BlockdevBackupState;
+
+static void do_blockdev_backup(const char *job_id, const char *device,
+ const char *target, enum MirrorSyncMode sync,
+ bool has_speed, int64_t speed,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ BlockJobTxn *txn, Error **errp);
+
+static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
+{
+ BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ BlockdevBackup *backup;
+ BlockBackend *blk;
+ BlockDriverState *target;
+ Error *local_err = NULL;
+
+ assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
+ backup = common->action->u.blockdev_backup.data;
+
+ blk = blk_by_name(backup->device);
+ if (!blk) {
+ error_setg(errp, "Device '%s' not found", backup->device);
+ return;
+ }
+
+ if (!blk_is_available(blk)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, backup->device);
+ return;
+ }
+
+ target = bdrv_lookup_bs(backup->target, backup->target, errp);
+ if (!target) {
+ return;
+ }
+
+ /* AioContext is released in .clean() */
+ state->aio_context = blk_get_aio_context(blk);
+ if (state->aio_context != bdrv_get_aio_context(target)) {
+ state->aio_context = NULL;
+ error_setg(errp, "Backup between two IO threads is not implemented");
+ return;
+ }
+ aio_context_acquire(state->aio_context);
+ state->bs = blk_bs(blk);
+ bdrv_drained_begin(state->bs);
+
+ do_blockdev_backup(backup->has_job_id ? backup->job_id : NULL,
+ backup->device, backup->target, backup->sync,
+ backup->has_speed, backup->speed,
+ backup->has_on_source_error, backup->on_source_error,
+ backup->has_on_target_error, backup->on_target_error,
+ common->block_job_txn, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ state->job = state->bs->job;
+}
+
+static void blockdev_backup_abort(BlkActionState *common)
+{
+ BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+ BlockDriverState *bs = state->bs;
+
+ /* Only cancel if it's the job we started */
+ if (bs && bs->job && bs->job == state->job) {
+ block_job_cancel_sync(bs->job);
+ }
+}
+
+static void blockdev_backup_clean(BlkActionState *common)
+{
+ BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
+
+ if (state->aio_context) {
+ bdrv_drained_end(state->bs);
+ aio_context_release(state->aio_context);
+ }
+}
+
+typedef struct BlockDirtyBitmapState {
+ BlkActionState common;
+ BdrvDirtyBitmap *bitmap;
+ BlockDriverState *bs;
+ AioContext *aio_context;
+ HBitmap *backup;
+ bool prepared;
+} BlockDirtyBitmapState;
+
+static void block_dirty_bitmap_add_prepare(BlkActionState *common,
+ Error **errp)
+{
+ Error *local_err = NULL;
+ BlockDirtyBitmapAdd *action;
+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+ if (action_check_completion_mode(common, errp) < 0) {
+ return;
+ }
+
+ action = common->action->u.block_dirty_bitmap_add.data;
+ /* AIO context taken and released within qmp_block_dirty_bitmap_add */
+ qmp_block_dirty_bitmap_add(action->node, action->name,
+ action->has_granularity, action->granularity,
+ &local_err);
+
+ if (!local_err) {
+ state->prepared = true;
+ } else {
+ error_propagate(errp, local_err);
+ }
+}
+
+static void block_dirty_bitmap_add_abort(BlkActionState *common)
+{
+ BlockDirtyBitmapAdd *action;
+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+ action = common->action->u.block_dirty_bitmap_add.data;
+ /* Should not be able to fail: IF the bitmap was added via .prepare(),
+ * then the node reference and bitmap name must have been valid.
+ */
+ if (state->prepared) {
+ qmp_block_dirty_bitmap_remove(action->node, action->name, &error_abort);
+ }
+}
+
+static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
+ Error **errp)
+{
+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+ BlockDirtyBitmap *action;
+
+ if (action_check_completion_mode(common, errp) < 0) {
+ return;
+ }
+
+ action = common->action->u.block_dirty_bitmap_clear.data;
+ state->bitmap = block_dirty_bitmap_lookup(action->node,
+ action->name,
+ &state->bs,
+ &state->aio_context,
+ errp);
+ if (!state->bitmap) {
+ return;
+ }
+
+ if (bdrv_dirty_bitmap_frozen(state->bitmap)) {
+ error_setg(errp, "Cannot modify a frozen bitmap");
+ return;
+ } else if (!bdrv_dirty_bitmap_enabled(state->bitmap)) {
+ error_setg(errp, "Cannot clear a disabled bitmap");
+ return;
+ }
+
+ bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
+ /* AioContext is released in .clean() */
+}
+
+static void block_dirty_bitmap_clear_abort(BlkActionState *common)
+{
+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+ bdrv_undo_clear_dirty_bitmap(state->bitmap, state->backup);
+}
+
+static void block_dirty_bitmap_clear_commit(BlkActionState *common)
+{
+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+ hbitmap_free(state->backup);
+}
+
+static void block_dirty_bitmap_clear_clean(BlkActionState *common)
+{
+ BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
+ common, common);
+
+ if (state->aio_context) {
+ aio_context_release(state->aio_context);
+ }
+}
+
+static void abort_prepare(BlkActionState *common, Error **errp)
{
error_setg(errp, "Transaction aborted using Abort action");
}
-static void abort_commit(BlkTransactionState *common)
+static void abort_commit(BlkActionState *common)
{
g_assert_not_reached(); /* this action never succeeds */
}
-static const BdrvActionOps actions[] = {
+static const BlkActionOps actions[] = {
+ [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT] = {
+ .instance_size = sizeof(ExternalSnapshotState),
+ .prepare = external_snapshot_prepare,
+ .commit = external_snapshot_commit,
+ .abort = external_snapshot_abort,
+ .clean = external_snapshot_clean,
+ },
[TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = {
.instance_size = sizeof(ExternalSnapshotState),
.prepare = external_snapshot_prepare,
.commit = external_snapshot_commit,
.abort = external_snapshot_abort,
+ .clean = external_snapshot_clean,
},
[TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = {
.instance_size = sizeof(DriveBackupState),
.prepare = drive_backup_prepare,
.abort = drive_backup_abort,
+ .clean = drive_backup_clean,
+ },
+ [TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP] = {
+ .instance_size = sizeof(BlockdevBackupState),
+ .prepare = blockdev_backup_prepare,
+ .abort = blockdev_backup_abort,
+ .clean = blockdev_backup_clean,
},
[TRANSACTION_ACTION_KIND_ABORT] = {
- .instance_size = sizeof(BlkTransactionState),
+ .instance_size = sizeof(BlkActionState),
.prepare = abort_prepare,
.commit = abort_commit,
},
@@ -1526,42 +2160,87 @@
.instance_size = sizeof(InternalSnapshotState),
.prepare = internal_snapshot_prepare,
.abort = internal_snapshot_abort,
+ .clean = internal_snapshot_clean,
},
+ [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_ADD] = {
+ .instance_size = sizeof(BlockDirtyBitmapState),
+ .prepare = block_dirty_bitmap_add_prepare,
+ .abort = block_dirty_bitmap_add_abort,
+ },
+ [TRANSACTION_ACTION_KIND_BLOCK_DIRTY_BITMAP_CLEAR] = {
+ .instance_size = sizeof(BlockDirtyBitmapState),
+ .prepare = block_dirty_bitmap_clear_prepare,
+ .commit = block_dirty_bitmap_clear_commit,
+ .abort = block_dirty_bitmap_clear_abort,
+ .clean = block_dirty_bitmap_clear_clean,
+ }
};
-/*
- * 'Atomic' group snapshots. The snapshots are taken as a set, and if any fail
- * then we do not pivot any of the devices in the group, and abandon the
- * snapshots
+/**
+ * Allocate a TransactionProperties structure if necessary, and fill
+ * that structure with desired defaults if they are unset.
*/
-void qmp_transaction(TransactionActionList *dev_list, Error **errp)
+static TransactionProperties *get_transaction_properties(
+ TransactionProperties *props)
+{
+ if (!props) {
+ props = g_new0(TransactionProperties, 1);
+ }
+
+ if (!props->has_completion_mode) {
+ props->has_completion_mode = true;
+ props->completion_mode = ACTION_COMPLETION_MODE_INDIVIDUAL;
+ }
+
+ return props;
+}
+
+/*
+ * 'Atomic' group operations. The operations are performed as a set, and if
+ * any fail then we roll back all operations in the group.
+ */
+void qmp_transaction(TransactionActionList *dev_list,
+ bool has_props,
+ struct TransactionProperties *props,
+ Error **errp)
{
TransactionActionList *dev_entry = dev_list;
- BlkTransactionState *state, *next;
+ BlockJobTxn *block_job_txn = NULL;
+ BlkActionState *state, *next;
Error *local_err = NULL;
- QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionState) snap_bdrv_states;
+ QSIMPLEQ_HEAD(snap_bdrv_states, BlkActionState) snap_bdrv_states;
QSIMPLEQ_INIT(&snap_bdrv_states);
- /* drain all i/o before any snapshots */
+ /* Does this transaction get canceled as a group on failure?
+ * If not, we don't really need to make a BlockJobTxn.
+ */
+ props = get_transaction_properties(props);
+ if (props->completion_mode != ACTION_COMPLETION_MODE_INDIVIDUAL) {
+ block_job_txn = block_job_txn_new();
+ }
+
+ /* drain all i/o before any operations */
bdrv_drain_all();
- /* We don't do anything in this loop that commits us to the snapshot */
+ /* We don't do anything in this loop that commits us to the operations */
while (NULL != dev_entry) {
TransactionAction *dev_info = NULL;
- const BdrvActionOps *ops;
+ const BlkActionOps *ops;
dev_info = dev_entry->value;
dev_entry = dev_entry->next;
- assert(dev_info->kind < ARRAY_SIZE(actions));
+ assert(dev_info->type < ARRAY_SIZE(actions));
- ops = &actions[dev_info->kind];
+ ops = &actions[dev_info->type];
assert(ops->instance_size > 0);
state = g_malloc0(ops->instance_size);
state->ops = ops;
state->action = dev_info;
+ state->block_job_txn = block_job_txn;
+ state->txn_props = props;
QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry);
state->ops->prepare(state, &local_err);
@@ -1581,10 +2260,7 @@
goto exit;
delete_and_fail:
- /*
- * failure, and it is all-or-none; abandon each new bs, and keep using
- * the original bs for all images
- */
+ /* failure, and it is all-or-none; roll back all operations */
QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) {
if (state->ops->abort) {
state->ops->abort(state);
@@ -1597,45 +2273,29 @@
}
g_free(state);
}
-}
-
-
-static void eject_device(BlockBackend *blk, int force, Error **errp)
-{
- BlockDriverState *bs = blk_bs(blk);
-
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
- return;
+ if (!has_props) {
+ qapi_free_TransactionProperties(props);
}
- if (!blk_dev_has_removable_media(blk)) {
- error_setg(errp, "Device '%s' is not removable",
- bdrv_get_device_name(bs));
- return;
- }
-
- if (blk_dev_is_medium_locked(blk) && !blk_dev_is_tray_open(blk)) {
- blk_dev_eject_request(blk, force);
- if (!force) {
- error_setg(errp, "Device '%s' is locked",
- bdrv_get_device_name(bs));
- return;
- }
- }
-
- bdrv_close(bs);
+ block_job_txn_unref(block_job_txn);
}
void qmp_eject(const char *device, bool has_force, bool force, Error **errp)
{
- BlockBackend *blk;
+ Error *local_err = NULL;
+ int rc;
- blk = blk_by_name(device);
- if (!blk) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
- return;
+ if (!has_force) {
+ force = false;
}
- eject_device(blk, force, errp);
+ rc = do_open_tray(device, force, &local_err);
+ if (rc && rc != -ENOSYS) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ error_free(local_err);
+
+ qmp_x_blockdev_remove_medium(device, errp);
}
void qmp_block_passwd(bool has_device, const char *device,
@@ -1644,7 +2304,7 @@
{
Error *local_err = NULL;
BlockDriverState *bs;
- int err;
+ AioContext *aio_context;
bs = bdrv_lookup_bs(has_device ? device : NULL,
has_node_name ? node_name : NULL,
@@ -1654,163 +2314,521 @@
return;
}
- err = bdrv_set_key(bs, password);
- if (err == -EINVAL) {
- error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
- return;
- } else if (err < 0) {
- error_set(errp, QERR_INVALID_PASSWORD);
- return;
- }
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_add_key(bs, password, errp);
+
+ aio_context_release(aio_context);
}
-static void qmp_bdrv_open_encrypted(BlockDriverState *bs, const char *filename,
- int bdrv_flags, BlockDriver *drv,
- const char *password, Error **errp)
-{
- Error *local_err = NULL;
- int ret;
-
- ret = bdrv_open(&bs, filename, NULL, NULL, bdrv_flags, drv, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
- return;
- }
-
- if (bdrv_key_required(bs)) {
- if (password) {
- if (bdrv_set_key(bs, password) < 0) {
- error_set(errp, QERR_INVALID_PASSWORD);
- }
- } else {
- error_set(errp, QERR_DEVICE_ENCRYPTED, bdrv_get_device_name(bs),
- bdrv_get_encrypted_filename(bs));
- }
- } else if (password) {
- error_set(errp, QERR_DEVICE_NOT_ENCRYPTED, bdrv_get_device_name(bs));
- }
-}
-
-void qmp_change_blockdev(const char *device, const char *filename,
- const char *format, Error **errp)
+/*
+ * Attempt to open the tray of @device.
+ * If @force, ignore its tray lock.
+ * Else, if the tray is locked, don't open it, but ask the guest to open it.
+ * On error, store an error through @errp and return -errno.
+ * If @device does not exist, return -ENODEV.
+ * If it has no removable media, return -ENOTSUP.
+ * If it has no tray, return -ENOSYS.
+ * If the guest was asked to open the tray, return -EINPROGRESS.
+ * Else, return 0.
+ */
+static int do_open_tray(const char *device, bool force, Error **errp)
{
BlockBackend *blk;
- BlockDriverState *bs;
- BlockDriver *drv = NULL;
- int bdrv_flags;
- Error *err = NULL;
+ bool locked;
blk = blk_by_name(device);
if (!blk) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
- return;
- }
- bs = blk_bs(blk);
-
- if (format) {
- drv = bdrv_find_whitelisted_format(format, bs->read_only);
- if (!drv) {
- error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
- return;
- }
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ return -ENODEV;
}
- eject_device(blk, 0, &err);
- if (err) {
- error_propagate(errp, err);
- return;
+ if (!blk_dev_has_removable_media(blk)) {
+ error_setg(errp, "Device '%s' is not removable", device);
+ return -ENOTSUP;
}
- bdrv_flags = bdrv_is_read_only(bs) ? 0 : BDRV_O_RDWR;
- bdrv_flags |= bdrv_is_snapshot(bs) ? BDRV_O_SNAPSHOT : 0;
+ if (!blk_dev_has_tray(blk)) {
+ error_setg(errp, "Device '%s' does not have a tray", device);
+ return -ENOSYS;
+ }
- qmp_bdrv_open_encrypted(bs, filename, bdrv_flags, drv, NULL, errp);
+ if (blk_dev_is_tray_open(blk)) {
+ return 0;
+ }
+
+ locked = blk_dev_is_medium_locked(blk);
+ if (locked) {
+ blk_dev_eject_request(blk, force);
+ }
+
+ if (!locked || force) {
+ blk_dev_change_media_cb(blk, false);
+ }
+
+ if (locked && !force) {
+ error_setg(errp, "Device '%s' is locked and force was not specified, "
+ "wait for tray to open and try again", device);
+ return -EINPROGRESS;
+ }
+
+ return 0;
}
-/* throttling disk I/O limits */
-void qmp_block_set_io_throttle(const char *device, int64_t bps, int64_t bps_rd,
- int64_t bps_wr,
- int64_t iops,
- int64_t iops_rd,
- int64_t iops_wr,
- bool has_bps_max,
- int64_t bps_max,
- bool has_bps_rd_max,
- int64_t bps_rd_max,
- bool has_bps_wr_max,
- int64_t bps_wr_max,
- bool has_iops_max,
- int64_t iops_max,
- bool has_iops_rd_max,
- int64_t iops_rd_max,
- bool has_iops_wr_max,
- int64_t iops_wr_max,
- bool has_iops_size,
- int64_t iops_size, Error **errp)
+void qmp_blockdev_open_tray(const char *device, bool has_force, bool force,
+ Error **errp)
{
- ThrottleConfig cfg;
- BlockDriverState *bs;
- AioContext *aio_context;
+ Error *local_err = NULL;
+ int rc;
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ if (!has_force) {
+ force = false;
+ }
+ rc = do_open_tray(device, force, &local_err);
+ if (rc && rc != -ENOSYS && rc != -EINPROGRESS) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ error_free(local_err);
+}
+
+void qmp_blockdev_close_tray(const char *device, Error **errp)
+{
+ BlockBackend *blk;
+
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return;
}
- memset(&cfg, 0, sizeof(cfg));
- cfg.buckets[THROTTLE_BPS_TOTAL].avg = bps;
- cfg.buckets[THROTTLE_BPS_READ].avg = bps_rd;
- cfg.buckets[THROTTLE_BPS_WRITE].avg = bps_wr;
-
- cfg.buckets[THROTTLE_OPS_TOTAL].avg = iops;
- cfg.buckets[THROTTLE_OPS_READ].avg = iops_rd;
- cfg.buckets[THROTTLE_OPS_WRITE].avg = iops_wr;
-
- if (has_bps_max) {
- cfg.buckets[THROTTLE_BPS_TOTAL].max = bps_max;
- }
- if (has_bps_rd_max) {
- cfg.buckets[THROTTLE_BPS_READ].max = bps_rd_max;
- }
- if (has_bps_wr_max) {
- cfg.buckets[THROTTLE_BPS_WRITE].max = bps_wr_max;
- }
- if (has_iops_max) {
- cfg.buckets[THROTTLE_OPS_TOTAL].max = iops_max;
- }
- if (has_iops_rd_max) {
- cfg.buckets[THROTTLE_OPS_READ].max = iops_rd_max;
- }
- if (has_iops_wr_max) {
- cfg.buckets[THROTTLE_OPS_WRITE].max = iops_wr_max;
+ if (!blk_dev_has_removable_media(blk)) {
+ error_setg(errp, "Device '%s' is not removable", device);
+ return;
}
- if (has_iops_size) {
- cfg.op_size = iops_size;
+ if (!blk_dev_has_tray(blk)) {
+ /* Ignore this command on tray-less devices */
+ return;
}
- if (!check_throttle_config(&cfg, errp)) {
+ if (!blk_dev_is_tray_open(blk)) {
+ return;
+ }
+
+ blk_dev_change_media_cb(blk, true);
+}
+
+void qmp_x_blockdev_remove_medium(const char *device, Error **errp)
+{
+ BlockBackend *blk;
+ BlockDriverState *bs;
+ AioContext *aio_context;
+ bool has_device;
+
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ return;
+ }
+
+ /* For BBs without a device, we can exchange the BDS tree at will */
+ has_device = blk_get_attached_dev(blk);
+
+ if (has_device && !blk_dev_has_removable_media(blk)) {
+ error_setg(errp, "Device '%s' is not removable", device);
+ return;
+ }
+
+ if (has_device && blk_dev_has_tray(blk) && !blk_dev_is_tray_open(blk)) {
+ error_setg(errp, "Tray of device '%s' is not open", device);
+ return;
+ }
+
+ bs = blk_bs(blk);
+ if (!bs) {
return;
}
aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- if (!bs->io_limits_enabled && throttle_enabled(&cfg)) {
- bdrv_io_limits_enable(bs);
- } else if (bs->io_limits_enabled && !throttle_enabled(&cfg)) {
- bdrv_io_limits_disable(bs);
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_EJECT, errp)) {
+ goto out;
}
- if (bs->io_limits_enabled) {
- bdrv_set_io_limits(bs, &cfg);
+ blk_remove_bs(blk);
+
+ if (!blk_dev_has_tray(blk)) {
+ /* For tray-less devices, blockdev-open-tray is a no-op (or may not be
+ * called at all); therefore, the medium needs to be ejected here.
+ * Do it after blk_remove_bs() so blk_is_inserted(blk) returns the @load
+ * value passed here (i.e. false). */
+ blk_dev_change_media_cb(blk, false);
}
+out:
aio_context_release(aio_context);
}
-int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data)
+static void qmp_blockdev_insert_anon_medium(const char *device,
+ BlockDriverState *bs, Error **errp)
+{
+ BlockBackend *blk;
+ bool has_device;
+
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ return;
+ }
+
+ /* For BBs without a device, we can exchange the BDS tree at will */
+ has_device = blk_get_attached_dev(blk);
+
+ if (has_device && !blk_dev_has_removable_media(blk)) {
+ error_setg(errp, "Device '%s' is not removable", device);
+ return;
+ }
+
+ if (has_device && blk_dev_has_tray(blk) && !blk_dev_is_tray_open(blk)) {
+ error_setg(errp, "Tray of device '%s' is not open", device);
+ return;
+ }
+
+ if (blk_bs(blk)) {
+ error_setg(errp, "There already is a medium in device '%s'", device);
+ return;
+ }
+
+ blk_insert_bs(blk, bs);
+
+ if (!blk_dev_has_tray(blk)) {
+ /* For tray-less devices, blockdev-close-tray is a no-op (or may not be
+ * called at all); therefore, the medium needs to be pushed into the
+ * slot here.
+ * Do it after blk_insert_bs() so blk_is_inserted(blk) returns the @load
+ * value passed here (i.e. true). */
+ blk_dev_change_media_cb(blk, true);
+ }
+}
+
+void qmp_x_blockdev_insert_medium(const char *device, const char *node_name,
+ Error **errp)
+{
+ BlockDriverState *bs;
+
+ bs = bdrv_find_node(node_name);
+ if (!bs) {
+ error_setg(errp, "Node '%s' not found", node_name);
+ return;
+ }
+
+ if (bdrv_has_blk(bs)) {
+ error_setg(errp, "Node '%s' is already in use by '%s'", node_name,
+ bdrv_get_parent_name(bs));
+ return;
+ }
+
+ qmp_blockdev_insert_anon_medium(device, bs, errp);
+}
+
+void qmp_blockdev_change_medium(const char *device, const char *filename,
+ bool has_format, const char *format,
+ bool has_read_only,
+ BlockdevChangeReadOnlyMode read_only,
+ Error **errp)
+{
+ BlockBackend *blk;
+ BlockDriverState *medium_bs = NULL;
+ int bdrv_flags;
+ int rc;
+ QDict *options = NULL;
+ Error *err = NULL;
+
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
+ goto fail;
+ }
+
+ if (blk_bs(blk)) {
+ blk_update_root_state(blk);
+ }
+
+ bdrv_flags = blk_get_open_flags_from_root_state(blk);
+ bdrv_flags &= ~(BDRV_O_TEMPORARY | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING |
+ BDRV_O_PROTOCOL);
+
+ if (!has_read_only) {
+ read_only = BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN;
+ }
+
+ switch (read_only) {
+ case BLOCKDEV_CHANGE_READ_ONLY_MODE_RETAIN:
+ break;
+
+ case BLOCKDEV_CHANGE_READ_ONLY_MODE_READ_ONLY:
+ bdrv_flags &= ~BDRV_O_RDWR;
+ break;
+
+ case BLOCKDEV_CHANGE_READ_ONLY_MODE_READ_WRITE:
+ bdrv_flags |= BDRV_O_RDWR;
+ break;
+
+ default:
+ abort();
+ }
+
+ if (has_format) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str(format));
+ }
+
+ medium_bs = bdrv_open(filename, NULL, options, bdrv_flags, errp);
+ if (!medium_bs) {
+ goto fail;
+ }
+
+ bdrv_add_key(medium_bs, NULL, &err);
+ if (err) {
+ error_propagate(errp, err);
+ goto fail;
+ }
+
+ rc = do_open_tray(device, false, &err);
+ if (rc && rc != -ENOSYS) {
+ error_propagate(errp, err);
+ goto fail;
+ }
+ error_free(err);
+ err = NULL;
+
+ qmp_x_blockdev_remove_medium(device, &err);
+ if (err) {
+ error_propagate(errp, err);
+ goto fail;
+ }
+
+ qmp_blockdev_insert_anon_medium(device, medium_bs, &err);
+ if (err) {
+ error_propagate(errp, err);
+ goto fail;
+ }
+
+ blk_apply_root_state(blk, medium_bs);
+
+ qmp_blockdev_close_tray(device, errp);
+
+fail:
+ /* If the medium has been inserted, the device has its own reference, so
+ * ours must be relinquished; and if it has not been inserted successfully,
+ * the reference must be relinquished anyway */
+ bdrv_unref(medium_bs);
+}
+
+/* throttling disk I/O limits */
+void qmp_block_set_io_throttle(BlockIOThrottle *arg, Error **errp)
+{
+ ThrottleConfig cfg;
+ BlockDriverState *bs;
+ BlockBackend *blk;
+ AioContext *aio_context;
+
+ blk = blk_by_name(arg->device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", arg->device);
+ return;
+ }
+
+ aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(aio_context);
+
+ bs = blk_bs(blk);
+ if (!bs) {
+ error_setg(errp, "Device '%s' has no medium", arg->device);
+ goto out;
+ }
+
+ throttle_config_init(&cfg);
+ cfg.buckets[THROTTLE_BPS_TOTAL].avg = arg->bps;
+ cfg.buckets[THROTTLE_BPS_READ].avg = arg->bps_rd;
+ cfg.buckets[THROTTLE_BPS_WRITE].avg = arg->bps_wr;
+
+ cfg.buckets[THROTTLE_OPS_TOTAL].avg = arg->iops;
+ cfg.buckets[THROTTLE_OPS_READ].avg = arg->iops_rd;
+ cfg.buckets[THROTTLE_OPS_WRITE].avg = arg->iops_wr;
+
+ if (arg->has_bps_max) {
+ cfg.buckets[THROTTLE_BPS_TOTAL].max = arg->bps_max;
+ }
+ if (arg->has_bps_rd_max) {
+ cfg.buckets[THROTTLE_BPS_READ].max = arg->bps_rd_max;
+ }
+ if (arg->has_bps_wr_max) {
+ cfg.buckets[THROTTLE_BPS_WRITE].max = arg->bps_wr_max;
+ }
+ if (arg->has_iops_max) {
+ cfg.buckets[THROTTLE_OPS_TOTAL].max = arg->iops_max;
+ }
+ if (arg->has_iops_rd_max) {
+ cfg.buckets[THROTTLE_OPS_READ].max = arg->iops_rd_max;
+ }
+ if (arg->has_iops_wr_max) {
+ cfg.buckets[THROTTLE_OPS_WRITE].max = arg->iops_wr_max;
+ }
+
+ if (arg->has_bps_max_length) {
+ cfg.buckets[THROTTLE_BPS_TOTAL].burst_length = arg->bps_max_length;
+ }
+ if (arg->has_bps_rd_max_length) {
+ cfg.buckets[THROTTLE_BPS_READ].burst_length = arg->bps_rd_max_length;
+ }
+ if (arg->has_bps_wr_max_length) {
+ cfg.buckets[THROTTLE_BPS_WRITE].burst_length = arg->bps_wr_max_length;
+ }
+ if (arg->has_iops_max_length) {
+ cfg.buckets[THROTTLE_OPS_TOTAL].burst_length = arg->iops_max_length;
+ }
+ if (arg->has_iops_rd_max_length) {
+ cfg.buckets[THROTTLE_OPS_READ].burst_length = arg->iops_rd_max_length;
+ }
+ if (arg->has_iops_wr_max_length) {
+ cfg.buckets[THROTTLE_OPS_WRITE].burst_length = arg->iops_wr_max_length;
+ }
+
+ if (arg->has_iops_size) {
+ cfg.op_size = arg->iops_size;
+ }
+
+ if (!throttle_is_valid(&cfg, errp)) {
+ goto out;
+ }
+
+ if (throttle_enabled(&cfg)) {
+ /* Enable I/O limits if they're not enabled yet, otherwise
+ * just update the throttling group. */
+ if (!blk_get_public(blk)->throttle_state) {
+ blk_io_limits_enable(blk,
+ arg->has_group ? arg->group : arg->device);
+ } else if (arg->has_group) {
+ blk_io_limits_update_group(blk, arg->group);
+ }
+ /* Set the new throttling configuration */
+ blk_set_io_limits(blk, &cfg);
+ } else if (blk_get_public(blk)->throttle_state) {
+ /* If all throttling settings are set to 0, disable I/O limits */
+ blk_io_limits_disable(blk);
+ }
+
+out:
+ aio_context_release(aio_context);
+}
+
+void qmp_block_dirty_bitmap_add(const char *node, const char *name,
+ bool has_granularity, uint32_t granularity,
+ Error **errp)
+{
+ AioContext *aio_context;
+ BlockDriverState *bs;
+
+ if (!name || name[0] == '\0') {
+ error_setg(errp, "Bitmap name cannot be empty");
+ return;
+ }
+
+ bs = bdrv_lookup_bs(node, node, errp);
+ if (!bs) {
+ return;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ if (has_granularity) {
+ if (granularity < 512 || !is_power_of_2(granularity)) {
+ error_setg(errp, "Granularity must be power of 2 "
+ "and at least 512");
+ goto out;
+ }
+ } else {
+ /* Default to cluster size, if available: */
+ granularity = bdrv_get_default_bitmap_granularity(bs);
+ }
+
+ bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+
+ out:
+ aio_context_release(aio_context);
+}
+
+void qmp_block_dirty_bitmap_remove(const char *node, const char *name,
+ Error **errp)
+{
+ AioContext *aio_context;
+ BlockDriverState *bs;
+ BdrvDirtyBitmap *bitmap;
+
+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+ if (!bitmap || !bs) {
+ return;
+ }
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_setg(errp,
+ "Bitmap '%s' is currently frozen and cannot be removed",
+ name);
+ goto out;
+ }
+ bdrv_dirty_bitmap_make_anon(bitmap);
+ bdrv_release_dirty_bitmap(bs, bitmap);
+
+ out:
+ aio_context_release(aio_context);
+}
+
+/**
+ * Completely clear a bitmap, for the purposes of synchronizing a bitmap
+ * immediately after a full backup operation.
+ */
+void qmp_block_dirty_bitmap_clear(const char *node, const char *name,
+ Error **errp)
+{
+ AioContext *aio_context;
+ BdrvDirtyBitmap *bitmap;
+ BlockDriverState *bs;
+
+ bitmap = block_dirty_bitmap_lookup(node, name, &bs, &aio_context, errp);
+ if (!bitmap || !bs) {
+ return;
+ }
+
+ if (bdrv_dirty_bitmap_frozen(bitmap)) {
+ error_setg(errp,
+ "Bitmap '%s' is currently frozen and cannot be modified",
+ name);
+ goto out;
+ } else if (!bdrv_dirty_bitmap_enabled(bitmap)) {
+ error_setg(errp,
+ "Bitmap '%s' is currently disabled and cannot be cleared",
+ name);
+ goto out;
+ }
+
+ bdrv_clear_dirty_bitmap(bitmap, NULL);
+
+ out:
+ aio_context_release(aio_context);
+}
+
+void hmp_drive_del(Monitor *mon, const QDict *qdict)
{
const char *id = qdict_get_str(qdict, "id");
BlockBackend *blk;
@@ -1818,50 +2836,56 @@
AioContext *aio_context;
Error *local_err = NULL;
+ bs = bdrv_find_node(id);
+ if (bs) {
+ qmp_x_blockdev_del(false, NULL, true, id, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+ return;
+ }
+
blk = blk_by_name(id);
if (!blk) {
error_report("Device '%s' not found", id);
- return -1;
+ return;
}
- bs = blk_bs(blk);
if (!blk_legacy_dinfo(blk)) {
error_report("Deleting device added with blockdev-add"
" is not supported");
- return -1;
+ return;
}
- aio_context = bdrv_get_aio_context(bs);
+ aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
- error_report("%s", error_get_pretty(local_err));
- error_free(local_err);
- aio_context_release(aio_context);
- return -1;
+ bs = blk_bs(blk);
+ if (bs) {
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, &local_err)) {
+ error_report_err(local_err);
+ aio_context_release(aio_context);
+ return;
+ }
+
+ blk_remove_bs(blk);
}
- /* quiesce block driver; prevent further io */
- bdrv_drain_all();
- bdrv_flush(bs);
- bdrv_close(bs);
+ /* Make the BlockBackend and the attached BlockDriverState anonymous */
+ monitor_remove_blk(blk);
- /* if we have a device attached to this BlockDriverState
- * then we need to make the drive anonymous until the device
- * can be removed. If this is a drive with no device backing
- * then we can just get rid of the block driver state right here.
+ /* If this BlockBackend has a device attached to it, its refcount will be
+ * decremented when the device is removed; otherwise we have to do so here.
*/
if (blk_get_attached_dev(blk)) {
- blk_hide_on_behalf_of_do_drive_del(blk);
/* Further I/O must not pause the guest */
- bdrv_set_on_error(bs, BLOCKDEV_ON_ERROR_REPORT,
- BLOCKDEV_ON_ERROR_REPORT);
+ blk_set_on_error(blk, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT);
} else {
blk_unref(blk);
}
aio_context_release(aio_context);
- return 0;
}
void qmp_block_resize(bool has_device, const char *device,
@@ -1885,17 +2909,17 @@
aio_context_acquire(aio_context);
if (!bdrv_is_first_non_filter(bs)) {
- error_set(errp, QERR_FEATURE_DISABLED, "resize");
+ error_setg(errp, QERR_FEATURE_DISABLED, "resize");
goto out;
}
if (size < 0) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "size", "a >0 size");
goto out;
}
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_RESIZE, NULL)) {
- error_set(errp, QERR_DEVICE_IN_USE, device);
+ error_setg(errp, QERR_DEVICE_IN_USE, device);
goto out;
}
@@ -1907,16 +2931,16 @@
case 0:
break;
case -ENOMEDIUM:
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
break;
case -ENOTSUP:
- error_set(errp, QERR_UNSUPPORTED);
+ error_setg(errp, QERR_UNSUPPORTED);
break;
case -EACCES:
- error_set(errp, QERR_DEVICE_IS_READ_ONLY, device);
+ error_setg(errp, "Device '%s' is read only", device);
break;
case -EBUSY:
- error_set(errp, QERR_DEVICE_IN_USE, device);
+ error_setg(errp, QERR_DEVICE_IN_USE, device);
break;
default:
error_setg_errno(errp, -ret, "Could not resize");
@@ -1950,17 +2974,16 @@
} else {
block_job_event_completed(bs->job, msg);
}
-
- bdrv_put_ref_bh_schedule(bs);
}
-void qmp_block_stream(const char *device,
+void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
bool has_base, const char *base,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
bool has_on_error, BlockdevOnError on_error,
Error **errp)
{
+ BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *base_bs = NULL;
AioContext *aio_context;
@@ -1971,15 +2994,22 @@
on_error = BLOCKDEV_ON_ERROR_REPORT;
}
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return;
}
- aio_context = bdrv_get_aio_context(bs);
+ aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
+ if (!blk_is_available(blk)) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ goto out;
+ }
+ bs = blk_bs(blk);
+
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_STREAM, errp)) {
goto out;
}
@@ -1987,7 +3017,7 @@
if (has_base) {
base_bs = bdrv_find_backing_image(bs, base);
if (base_bs == NULL) {
- error_set(errp, QERR_BASE_NOT_FOUND, base);
+ error_setg(errp, QERR_BASE_NOT_FOUND, base);
goto out;
}
assert(bdrv_get_aio_context(base_bs) == aio_context);
@@ -2005,8 +3035,8 @@
/* backing_file string overrides base bs filename */
base_name = has_backing_file ? backing_file : base_name;
- stream_start(bs, base_bs, base_name, has_speed ? speed : 0,
- on_error, block_job_cb, bs, &local_err);
+ stream_start(has_job_id ? job_id : NULL, bs, base_bs, base_name,
+ has_speed ? speed : 0, on_error, block_job_cb, bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto out;
@@ -2018,13 +3048,14 @@
aio_context_release(aio_context);
}
-void qmp_block_commit(const char *device,
+void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
bool has_base, const char *base,
bool has_top, const char *top,
bool has_backing_file, const char *backing_file,
bool has_speed, int64_t speed,
Error **errp)
{
+ BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *base_bs, *top_bs;
AioContext *aio_context;
@@ -2043,19 +3074,23 @@
* live commit feature versions; for this to work, we must make sure to
* perform the device lookup before any generic errors that may occur in a
* scenario in which all optional arguments are omitted. */
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return;
}
- aio_context = bdrv_get_aio_context(bs);
+ aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
- /* drain all i/o before commits */
- bdrv_drain_all();
+ if (!blk_is_available(blk)) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ goto out;
+ }
+ bs = blk_bs(blk);
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT, errp)) {
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, errp)) {
goto out;
}
@@ -2082,12 +3117,16 @@
}
if (base_bs == NULL) {
- error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+ error_setg(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
goto out;
}
assert(bdrv_get_aio_context(base_bs) == aio_context);
+ if (bdrv_op_is_blocked(base_bs, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
+ goto out;
+ }
+
/* Do not allow attempts to commit an image into itself */
if (top_bs == base_bs) {
error_setg(errp, "cannot commit an image into itself");
@@ -2100,10 +3139,11 @@
" but 'top' is the active layer");
goto out;
}
- commit_active_start(bs, base_bs, speed, on_error, block_job_cb,
- bs, &local_err);
+ commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, speed,
+ on_error, block_job_cb, bs, &local_err);
} else {
- commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+ commit_start(has_job_id ? job_id : NULL, bs, base_bs, top_bs, speed,
+ on_error, block_job_cb, bs,
has_backing_file ? backing_file : NULL, &local_err);
}
if (local_err != NULL) {
@@ -2115,24 +3155,28 @@
aio_context_release(aio_context);
}
-void qmp_drive_backup(const char *device, const char *target,
- bool has_format, const char *format,
- enum MirrorSyncMode sync,
- bool has_mode, enum NewImageMode mode,
- bool has_speed, int64_t speed,
- bool has_on_source_error, BlockdevOnError on_source_error,
- bool has_on_target_error, BlockdevOnError on_target_error,
- Error **errp)
+static void do_drive_backup(const char *job_id, const char *device,
+ const char *target, bool has_format,
+ const char *format, enum MirrorSyncMode sync,
+ bool has_mode, enum NewImageMode mode,
+ bool has_speed, int64_t speed,
+ bool has_bitmap, const char *bitmap,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ BlockJobTxn *txn, Error **errp)
{
+ BlockBackend *blk;
BlockDriverState *bs;
BlockDriverState *target_bs;
BlockDriverState *source = NULL;
+ BdrvDirtyBitmap *bmap = NULL;
AioContext *aio_context;
- BlockDriver *drv = NULL;
+ QDict *options = NULL;
Error *local_err = NULL;
int flags;
int64_t size;
- int ret;
if (!has_speed) {
speed = 0;
@@ -2147,31 +3191,29 @@
mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return;
}
- aio_context = bdrv_get_aio_context(bs);
+ aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
- if (!bdrv_is_inserted(bs)) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ /* Although backup_run has this check too, we need to use bs->drv below, so
+ * do an early check redundantly. */
+ if (!blk_is_available(blk)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
goto out;
}
+ bs = blk_bs(blk);
if (!has_format) {
format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
}
- if (format) {
- drv = bdrv_find_format(format);
- if (!drv) {
- error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
- goto out;
- }
- }
+ /* Early check to avoid creating target */
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
goto out;
}
@@ -2181,7 +3223,7 @@
/* See if we have a backing HD we can use to create our new image
* on top of. */
if (sync == MIRROR_SYNC_MODE_TOP) {
- source = bs->backing_hd;
+ source = backing_bs(bs);
if (!source) {
sync = MIRROR_SYNC_MODE_FULL;
}
@@ -2197,7 +3239,7 @@
}
if (mode != NEW_IMAGE_MODE_EXISTING) {
- assert(format && drv);
+ assert(format);
if (source) {
bdrv_img_create(target, format, source->filename,
source->drv->format_name, NULL,
@@ -2213,19 +3255,32 @@
goto out;
}
- target_bs = NULL;
- ret = bdrv_open(&target_bs, target, NULL, NULL, flags, drv, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
+ if (format) {
+ options = qdict_new();
+ qdict_put(options, "driver", qstring_from_str(format));
+ }
+
+ target_bs = bdrv_open(target, NULL, options, flags, errp);
+ if (!target_bs) {
goto out;
}
bdrv_set_aio_context(target_bs, aio_context);
- backup_start(bs, target_bs, speed, sync, on_source_error, on_target_error,
- block_job_cb, bs, &local_err);
+ if (has_bitmap) {
+ bmap = bdrv_find_dirty_bitmap(bs, bitmap);
+ if (!bmap) {
+ error_setg(errp, "Bitmap '%s' could not be found", bitmap);
+ bdrv_unref(target_bs);
+ goto out;
+ }
+ }
+
+ backup_start(job_id, bs, target_bs, speed, sync, bmap,
+ on_source_error, on_target_error,
+ block_job_cb, bs, txn, &local_err);
+ bdrv_unref(target_bs);
if (local_err != NULL) {
- bdrv_unref(target_bs);
error_propagate(errp, local_err);
goto out;
}
@@ -2234,35 +3289,45 @@
aio_context_release(aio_context);
}
-BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
-{
- return bdrv_named_nodes_list();
-}
-
-#define DEFAULT_MIRROR_BUF_SIZE (10 << 20)
-
-void qmp_drive_mirror(const char *device, const char *target,
+void qmp_drive_backup(bool has_job_id, const char *job_id,
+ const char *device, const char *target,
bool has_format, const char *format,
- bool has_node_name, const char *node_name,
- bool has_replaces, const char *replaces,
enum MirrorSyncMode sync,
bool has_mode, enum NewImageMode mode,
bool has_speed, int64_t speed,
- bool has_granularity, uint32_t granularity,
- bool has_buf_size, int64_t buf_size,
+ bool has_bitmap, const char *bitmap,
bool has_on_source_error, BlockdevOnError on_source_error,
bool has_on_target_error, BlockdevOnError on_target_error,
Error **errp)
{
+ return do_drive_backup(has_job_id ? job_id : NULL, device, target,
+ has_format, format, sync,
+ has_mode, mode, has_speed, speed,
+ has_bitmap, bitmap,
+ has_on_source_error, on_source_error,
+ has_on_target_error, on_target_error,
+ NULL, errp);
+}
+
+BlockDeviceInfoList *qmp_query_named_block_nodes(Error **errp)
+{
+ return bdrv_named_nodes_list(errp);
+}
+
+void do_blockdev_backup(const char *job_id, const char *device,
+ const char *target, enum MirrorSyncMode sync,
+ bool has_speed, int64_t speed,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ BlockJobTxn *txn, Error **errp)
+{
+ BlockBackend *blk;
BlockDriverState *bs;
- BlockDriverState *source, *target_bs;
- AioContext *aio_context;
- BlockDriver *drv = NULL;
+ BlockDriverState *target_bs;
Error *local_err = NULL;
- QDict *options = NULL;
- int flags;
- int64_t size;
- int ret;
+ AioContext *aio_context;
if (!has_speed) {
speed = 0;
@@ -2273,61 +3338,177 @@
if (!has_on_target_error) {
on_target_error = BLOCKDEV_ON_ERROR_REPORT;
}
- if (!has_mode) {
- mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
+
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_setg(errp, "Device '%s' not found", device);
+ return;
+ }
+
+ aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(aio_context);
+
+ if (!blk_is_available(blk)) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ goto out;
+ }
+ bs = blk_bs(blk);
+
+ target_bs = bdrv_lookup_bs(target, target, errp);
+ if (!target_bs) {
+ goto out;
+ }
+
+ if (bdrv_get_aio_context(target_bs) != aio_context) {
+ if (!bdrv_has_blk(target_bs)) {
+ /* The target BDS is not attached, we can safely move it to another
+ * AioContext. */
+ bdrv_set_aio_context(target_bs, aio_context);
+ } else {
+ error_setg(errp, "Target is attached to a different thread from "
+ "source.");
+ goto out;
+ }
+ }
+ backup_start(job_id, bs, target_bs, speed, sync, NULL, on_source_error,
+ on_target_error, block_job_cb, bs, txn, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ }
+out:
+ aio_context_release(aio_context);
+}
+
+void qmp_blockdev_backup(bool has_job_id, const char *job_id,
+ const char *device, const char *target,
+ enum MirrorSyncMode sync,
+ bool has_speed, int64_t speed,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ Error **errp)
+{
+ do_blockdev_backup(has_job_id ? job_id : NULL, device, target,
+ sync, has_speed, speed,
+ has_on_source_error, on_source_error,
+ has_on_target_error, on_target_error,
+ NULL, errp);
+}
+
+/* Parameter check and block job starting for drive mirroring.
+ * Caller should hold @device and @target's aio context (must be the same).
+ **/
+static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target,
+ bool has_replaces, const char *replaces,
+ enum MirrorSyncMode sync,
+ BlockMirrorBackingMode backing_mode,
+ bool has_speed, int64_t speed,
+ bool has_granularity, uint32_t granularity,
+ bool has_buf_size, int64_t buf_size,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ bool has_unmap, bool unmap,
+ Error **errp)
+{
+
+ if (!has_speed) {
+ speed = 0;
+ }
+ if (!has_on_source_error) {
+ on_source_error = BLOCKDEV_ON_ERROR_REPORT;
+ }
+ if (!has_on_target_error) {
+ on_target_error = BLOCKDEV_ON_ERROR_REPORT;
}
if (!has_granularity) {
granularity = 0;
}
if (!has_buf_size) {
- buf_size = DEFAULT_MIRROR_BUF_SIZE;
+ buf_size = 0;
+ }
+ if (!has_unmap) {
+ unmap = true;
}
if (granularity != 0 && (granularity < 512 || granularity > 1048576 * 64)) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
- "a value in range [512B, 64MB]");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
+ "a value in range [512B, 64MB]");
return;
}
if (granularity & (granularity - 1)) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE, "granularity", "power of 2");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "granularity",
+ "power of 2");
return;
}
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR_SOURCE, errp)) {
+ return;
+ }
+ if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_MIRROR_TARGET, errp)) {
return;
}
- aio_context = bdrv_get_aio_context(bs);
+ if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) {
+ sync = MIRROR_SYNC_MODE_FULL;
+ }
+
+ /* pass the node name to replace to mirror start since it's loose coupling
+ * and will allow to check whether the node still exist at mirror completion
+ */
+ mirror_start(job_id, bs, target,
+ has_replaces ? replaces : NULL,
+ speed, granularity, buf_size, sync, backing_mode,
+ on_source_error, on_target_error, unmap,
+ block_job_cb, bs, errp);
+}
+
+void qmp_drive_mirror(DriveMirror *arg, Error **errp)
+{
+ BlockDriverState *bs;
+ BlockBackend *blk;
+ BlockDriverState *source, *target_bs;
+ AioContext *aio_context;
+ BlockMirrorBackingMode backing_mode;
+ Error *local_err = NULL;
+ QDict *options = NULL;
+ int flags;
+ int64_t size;
+ const char *format = arg->format;
+
+ blk = blk_by_name(arg->device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", arg->device);
+ return;
+ }
+
+ aio_context = blk_get_aio_context(blk);
aio_context_acquire(aio_context);
- if (!bdrv_is_inserted(bs)) {
- error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
+ if (!blk_is_available(blk)) {
+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, arg->device);
goto out;
}
-
- if (!has_format) {
- format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name;
- }
- if (format) {
- drv = bdrv_find_format(format);
- if (!drv) {
- error_set(errp, QERR_INVALID_BLOCK_FORMAT, format);
- goto out;
- }
+ bs = blk_bs(blk);
+ if (!arg->has_mode) {
+ arg->mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS;
}
- if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_MIRROR, errp)) {
- goto out;
+ if (!arg->has_format) {
+ format = (arg->mode == NEW_IMAGE_MODE_EXISTING
+ ? NULL : bs->drv->format_name);
}
flags = bs->open_flags | BDRV_O_RDWR;
- source = bs->backing_hd;
- if (!source && sync == MIRROR_SYNC_MODE_TOP) {
- sync = MIRROR_SYNC_MODE_FULL;
+ source = backing_bs(bs);
+ if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) {
+ arg->sync = MIRROR_SYNC_MODE_FULL;
}
- if (sync == MIRROR_SYNC_MODE_NONE) {
+ if (arg->sync == MIRROR_SYNC_MODE_NONE) {
source = bs;
}
@@ -2337,18 +3518,18 @@
goto out;
}
- if (has_replaces) {
+ if (arg->has_replaces) {
BlockDriverState *to_replace_bs;
AioContext *replace_aio_context;
int64_t replace_size;
- if (!has_node_name) {
+ if (!arg->has_node_name) {
error_setg(errp, "a node-name must be provided when replacing a"
" named node of the graph");
goto out;
}
- to_replace_bs = check_to_replace_node(replaces, &local_err);
+ to_replace_bs = check_to_replace_node(bs, arg->replaces, &local_err);
if (!to_replace_bs) {
error_propagate(errp, local_err);
@@ -2367,20 +3548,26 @@
}
}
- if ((sync == MIRROR_SYNC_MODE_FULL || !source)
- && mode != NEW_IMAGE_MODE_EXISTING)
+ if (arg->mode == NEW_IMAGE_MODE_ABSOLUTE_PATHS) {
+ backing_mode = MIRROR_SOURCE_BACKING_CHAIN;
+ } else {
+ backing_mode = MIRROR_OPEN_BACKING_CHAIN;
+ }
+
+ if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source)
+ && arg->mode != NEW_IMAGE_MODE_EXISTING)
{
/* create new image w/o backing file */
- assert(format && drv);
- bdrv_img_create(target, format,
+ assert(format);
+ bdrv_img_create(arg->target, format,
NULL, NULL, NULL, size, flags, &local_err, false);
} else {
- switch (mode) {
+ switch (arg->mode) {
case NEW_IMAGE_MODE_EXISTING:
break;
case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
/* create new image with backing file */
- bdrv_img_create(target, format,
+ bdrv_img_create(arg->target, format,
source->filename,
source->drv->format_name,
NULL, size, flags, &local_err, false);
@@ -2395,74 +3582,126 @@
goto out;
}
- if (has_node_name) {
- options = qdict_new();
- qdict_put(options, "node-name", qstring_from_str(node_name));
+ options = qdict_new();
+ if (arg->has_node_name) {
+ qdict_put(options, "node-name", qstring_from_str(arg->node_name));
+ }
+ if (format) {
+ qdict_put(options, "driver", qstring_from_str(format));
}
/* Mirroring takes care of copy-on-write using the source's backing
* file.
*/
- target_bs = NULL;
- ret = bdrv_open(&target_bs, target, NULL, options,
- flags | BDRV_O_NO_BACKING, drv, &local_err);
- if (ret < 0) {
- error_propagate(errp, local_err);
+ target_bs = bdrv_open(arg->target, NULL, options,
+ flags | BDRV_O_NO_BACKING, errp);
+ if (!target_bs) {
goto out;
}
bdrv_set_aio_context(target_bs, aio_context);
- /* pass the node name to replace to mirror start since it's loose coupling
- * and will allow to check whether the node still exist at mirror completion
- */
- mirror_start(bs, target_bs,
- has_replaces ? replaces : NULL,
- speed, granularity, buf_size, sync,
- on_source_error, on_target_error,
- block_job_cb, bs, &local_err);
- if (local_err != NULL) {
- bdrv_unref(target_bs);
- error_propagate(errp, local_err);
- goto out;
- }
-
+ blockdev_mirror_common(arg->has_job_id ? arg->job_id : NULL, bs, target_bs,
+ arg->has_replaces, arg->replaces, arg->sync,
+ backing_mode, arg->has_speed, arg->speed,
+ arg->has_granularity, arg->granularity,
+ arg->has_buf_size, arg->buf_size,
+ arg->has_on_source_error, arg->on_source_error,
+ arg->has_on_target_error, arg->on_target_error,
+ arg->has_unmap, arg->unmap,
+ &local_err);
+ bdrv_unref(target_bs);
+ error_propagate(errp, local_err);
out:
aio_context_release(aio_context);
}
-/* Get the block job for a given device name and acquire its AioContext */
-static BlockJob *find_block_job(const char *device, AioContext **aio_context)
+void qmp_blockdev_mirror(bool has_job_id, const char *job_id,
+ const char *device, const char *target,
+ bool has_replaces, const char *replaces,
+ MirrorSyncMode sync,
+ bool has_speed, int64_t speed,
+ bool has_granularity, uint32_t granularity,
+ bool has_buf_size, int64_t buf_size,
+ bool has_on_source_error,
+ BlockdevOnError on_source_error,
+ bool has_on_target_error,
+ BlockdevOnError on_target_error,
+ Error **errp)
{
BlockDriverState *bs;
+ BlockBackend *blk;
+ BlockDriverState *target_bs;
+ AioContext *aio_context;
+ BlockMirrorBackingMode backing_mode = MIRROR_LEAVE_BACKING_CHAIN;
+ Error *local_err = NULL;
- bs = bdrv_find(device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_setg(errp, "Device '%s' not found", device);
+ return;
+ }
+ bs = blk_bs(blk);
+
if (!bs) {
- goto notfound;
+ error_setg(errp, "Device '%s' has no media", device);
+ return;
}
- *aio_context = bdrv_get_aio_context(bs);
+ target_bs = bdrv_lookup_bs(target, target, errp);
+ if (!target_bs) {
+ return;
+ }
+
+ aio_context = bdrv_get_aio_context(bs);
+ aio_context_acquire(aio_context);
+
+ bdrv_set_aio_context(target_bs, aio_context);
+
+ blockdev_mirror_common(has_job_id ? job_id : NULL, bs, target_bs,
+ has_replaces, replaces, sync, backing_mode,
+ has_speed, speed,
+ has_granularity, granularity,
+ has_buf_size, buf_size,
+ has_on_source_error, on_source_error,
+ has_on_target_error, on_target_error,
+ true, true,
+ &local_err);
+ error_propagate(errp, local_err);
+
+ aio_context_release(aio_context);
+}
+
+/* Get a block job using its ID and acquire its AioContext */
+static BlockJob *find_block_job(const char *id, AioContext **aio_context,
+ Error **errp)
+{
+ BlockJob *job;
+
+ assert(id != NULL);
+
+ *aio_context = NULL;
+
+ job = block_job_get(id);
+
+ if (!job) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_ACTIVE,
+ "Block job '%s' not found", id);
+ return NULL;
+ }
+
+ *aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(*aio_context);
- if (!bs->job) {
- aio_context_release(*aio_context);
- goto notfound;
- }
-
- return bs->job;
-
-notfound:
- *aio_context = NULL;
- return NULL;
+ return job;
}
void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2474,10 +3713,9 @@
bool has_force, bool force, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2485,7 +3723,7 @@
force = false;
}
- if (job->paused && !force) {
+ if (job->user_paused && !force) {
error_setg(errp, "The block job for device '%s' is currently paused",
device);
goto out;
@@ -2500,13 +3738,13 @@
void qmp_block_job_pause(const char *device, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
- if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+ if (!job || job->user_paused) {
return;
}
+ job->user_paused = true;
trace_qmp_block_job_pause(job);
block_job_pause(job);
aio_context_release(aio_context);
@@ -2515,14 +3753,15 @@
void qmp_block_job_resume(const char *device, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
- if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+ if (!job || !job->user_paused) {
return;
}
+ job->user_paused = false;
trace_qmp_block_job_resume(job);
+ block_job_iostatus_reset(job);
block_job_resume(job);
aio_context_release(aio_context);
}
@@ -2530,10 +3769,9 @@
void qmp_block_job_complete(const char *device, Error **errp)
{
AioContext *aio_context;
- BlockJob *job = find_block_job(device, &aio_context);
+ BlockJob *job = find_block_job(device, &aio_context, errp);
if (!job) {
- error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
return;
}
@@ -2547,48 +3785,59 @@
const char *backing_file,
Error **errp)
{
+ BlockBackend *blk;
BlockDriverState *bs = NULL;
+ AioContext *aio_context;
BlockDriverState *image_bs = NULL;
Error *local_err = NULL;
bool ro;
int open_flags;
int ret;
- /* find the top layer BDS of the chain */
- bs = bdrv_find(device);
- if (!bs) {
- error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+ blk = blk_by_name(device);
+ if (!blk) {
+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+ "Device '%s' not found", device);
return;
}
+ aio_context = blk_get_aio_context(blk);
+ aio_context_acquire(aio_context);
+
+ if (!blk_is_available(blk)) {
+ error_setg(errp, "Device '%s' has no medium", device);
+ goto out;
+ }
+ bs = blk_bs(blk);
+
image_bs = bdrv_lookup_bs(NULL, image_node_name, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
if (!image_bs) {
error_setg(errp, "image file not found");
- return;
+ goto out;
}
if (bdrv_find_base(image_bs) == image_bs) {
error_setg(errp, "not allowing backing file change on an image "
"without a backing file");
- return;
+ goto out;
}
/* even though we are not necessarily operating on bs, we need it to
* determine if block ops are currently prohibited on the chain */
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_CHANGE, errp)) {
- return;
+ goto out;
}
/* final sanity check */
if (!bdrv_chain_contains(bs, image_bs)) {
error_setg(errp, "'%s' and image file are not in the same chain",
device);
- return;
+ goto out;
}
/* if not r/w, reopen to make r/w */
@@ -2599,7 +3848,7 @@
bdrv_reopen(image_bs, open_flags | BDRV_O_RDWR, &local_err);
if (local_err) {
error_propagate(errp, local_err);
- return;
+ goto out;
}
}
@@ -2615,26 +3864,53 @@
if (ro) {
bdrv_reopen(image_bs, open_flags, &local_err);
- if (local_err) {
- error_propagate(errp, local_err); /* will preserve prior errp */
- }
+ error_propagate(errp, local_err);
}
+
+out:
+ aio_context_release(aio_context);
+}
+
+void hmp_drive_add_node(Monitor *mon, const char *optstr)
+{
+ QemuOpts *opts;
+ QDict *qdict;
+ Error *local_err = NULL;
+
+ opts = qemu_opts_parse_noisily(&qemu_drive_opts, optstr, false);
+ if (!opts) {
+ return;
+ }
+
+ qdict = qemu_opts_to_qdict(opts, NULL);
+
+ if (!qdict_get_try_str(qdict, "node-name")) {
+ QDECREF(qdict);
+ error_report("'node-name' needs to be specified");
+ goto out;
+ }
+
+ BlockDriverState *bs = bds_tree_init(qdict, &local_err);
+ if (!bs) {
+ error_report_err(local_err);
+ goto out;
+ }
+
+ QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
+
+out:
+ qemu_opts_del(opts);
}
void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
{
- QmpOutputVisitor *ov = qmp_output_visitor_new();
- BlockBackend *blk;
+ BlockDriverState *bs;
+ BlockBackend *blk = NULL;
QObject *obj;
+ Visitor *v = qmp_output_visitor_new(&obj);
QDict *qdict;
Error *local_err = NULL;
- /* Require an ID in the top level */
- if (!options->has_id) {
- error_setg(errp, "Block device needs an ID");
- goto fail;
- }
-
/* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
* cache.direct=false instead of silently switching to aio=threads, except
* when called from drive_new().
@@ -2650,52 +3926,206 @@
}
}
- visit_type_BlockdevOptions(qmp_output_get_visitor(ov),
- &options, NULL, &local_err);
+ visit_type_BlockdevOptions(v, NULL, &options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
goto fail;
}
- obj = qmp_output_get_qobject(ov);
+ visit_complete(v, &obj);
qdict = qobject_to_qdict(obj);
qdict_flatten(qdict);
- blk = blockdev_init(NULL, qdict, &local_err);
- if (local_err) {
- error_propagate(errp, local_err);
- goto fail;
+ if (options->has_id) {
+ blk = blockdev_init(NULL, qdict, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ bs = blk_bs(blk);
+ } else {
+ if (!qdict_get_try_str(qdict, "node-name")) {
+ error_setg(errp, "'id' and/or 'node-name' need to be specified for "
+ "the root node");
+ goto fail;
+ }
+
+ bs = bds_tree_init(qdict, errp);
+ if (!bs) {
+ goto fail;
+ }
+
+ QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
}
- if (bdrv_key_required(blk_bs(blk))) {
- blk_unref(blk);
+ if (bs && bdrv_key_required(bs)) {
+ if (blk) {
+ monitor_remove_blk(blk);
+ blk_unref(blk);
+ } else {
+ QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
+ bdrv_unref(bs);
+ }
error_setg(errp, "blockdev-add doesn't support encrypted devices");
goto fail;
}
fail:
- qmp_output_visitor_cleanup(ov);
+ visit_free(v);
+}
+
+void qmp_x_blockdev_del(bool has_id, const char *id,
+ bool has_node_name, const char *node_name, Error **errp)
+{
+ AioContext *aio_context;
+ BlockBackend *blk;
+ BlockDriverState *bs;
+
+ if (has_id && has_node_name) {
+ error_setg(errp, "Only one of id and node-name must be specified");
+ return;
+ } else if (!has_id && !has_node_name) {
+ error_setg(errp, "No block device specified");
+ return;
+ }
+
+ if (has_id) {
+ /* blk_by_name() never returns a BB that is not owned by the monitor */
+ blk = blk_by_name(id);
+ if (!blk) {
+ error_setg(errp, "Cannot find block backend %s", id);
+ return;
+ }
+ if (blk_legacy_dinfo(blk)) {
+ error_setg(errp, "Deleting block backend added with drive-add"
+ " is not supported");
+ return;
+ }
+ if (blk_get_refcnt(blk) > 1) {
+ error_setg(errp, "Block backend %s is in use", id);
+ return;
+ }
+ bs = blk_bs(blk);
+ aio_context = blk_get_aio_context(blk);
+ } else {
+ blk = NULL;
+ bs = bdrv_find_node(node_name);
+ if (!bs) {
+ error_setg(errp, "Cannot find node %s", node_name);
+ return;
+ }
+ if (bdrv_has_blk(bs)) {
+ error_setg(errp, "Node %s is in use by %s",
+ node_name, bdrv_get_parent_name(bs));
+ return;
+ }
+ aio_context = bdrv_get_aio_context(bs);
+ }
+
+ aio_context_acquire(aio_context);
+
+ if (bs) {
+ if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_DRIVE_DEL, errp)) {
+ goto out;
+ }
+
+ if (!blk && !bs->monitor_list.tqe_prev) {
+ error_setg(errp, "Node %s is not owned by the monitor",
+ bs->node_name);
+ goto out;
+ }
+
+ if (bs->refcnt > 1) {
+ error_setg(errp, "Block device %s is in use",
+ bdrv_get_device_or_node_name(bs));
+ goto out;
+ }
+ }
+
+ if (blk) {
+ monitor_remove_blk(blk);
+ blk_unref(blk);
+ } else {
+ QTAILQ_REMOVE(&monitor_bdrv_states, bs, monitor_list);
+ bdrv_unref(bs);
+ }
+
+out:
+ aio_context_release(aio_context);
+}
+
+static BdrvChild *bdrv_find_child(BlockDriverState *parent_bs,
+ const char *child_name)
+{
+ BdrvChild *child;
+
+ QLIST_FOREACH(child, &parent_bs->children, next) {
+ if (strcmp(child->name, child_name) == 0) {
+ return child;
+ }
+ }
+
+ return NULL;
+}
+
+void qmp_x_blockdev_change(const char *parent, bool has_child,
+ const char *child, bool has_node,
+ const char *node, Error **errp)
+{
+ BlockDriverState *parent_bs, *new_bs = NULL;
+ BdrvChild *p_child;
+
+ parent_bs = bdrv_lookup_bs(parent, parent, errp);
+ if (!parent_bs) {
+ return;
+ }
+
+ if (has_child == has_node) {
+ if (has_child) {
+ error_setg(errp, "The parameters child and node are in conflict");
+ } else {
+ error_setg(errp, "Either child or node must be specified");
+ }
+ return;
+ }
+
+ if (has_child) {
+ p_child = bdrv_find_child(parent_bs, child);
+ if (!p_child) {
+ error_setg(errp, "Node '%s' does not have child '%s'",
+ parent, child);
+ return;
+ }
+ bdrv_del_child(parent_bs, p_child, errp);
+ }
+
+ if (has_node) {
+ new_bs = bdrv_find_node(node);
+ if (!new_bs) {
+ error_setg(errp, "Node '%s' not found", node);
+ return;
+ }
+ bdrv_add_child(parent_bs, new_bs, errp);
+ }
}
BlockJobInfoList *qmp_query_block_jobs(Error **errp)
{
BlockJobInfoList *head = NULL, **p_next = &head;
- BlockDriverState *bs;
+ BlockJob *job;
- for (bs = bdrv_next(NULL); bs; bs = bdrv_next(bs)) {
- AioContext *aio_context = bdrv_get_aio_context(bs);
+ for (job = block_job_next(NULL); job; job = block_job_next(job)) {
+ BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+ AioContext *aio_context = blk_get_aio_context(job->blk);
aio_context_acquire(aio_context);
-
- if (bs->job) {
- BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
- elem->value = block_job_query(bs->job);
- *p_next = elem;
- p_next = &elem->next;
- }
-
+ elem->value = block_job_query(job);
aio_context_release(aio_context);
+
+ *p_next = elem;
+ p_next = &elem->next;
}
return head;
@@ -2714,22 +4144,14 @@
.type = QEMU_OPT_STRING,
.help = "discard operation (ignore/off, unmap/on)",
},{
- .name = "cache.writeback",
- .type = QEMU_OPT_BOOL,
- .help = "enables writeback mode for any caches",
- },{
- .name = "cache.direct",
- .type = QEMU_OPT_BOOL,
- .help = "enables use of O_DIRECT (bypass the host page cache)",
- },{
- .name = "cache.no-flush",
- .type = QEMU_OPT_BOOL,
- .help = "ignore any flush requests for the device",
- },{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
},{
+ .name = BDRV_OPT_CACHE_WB,
+ .type = QEMU_OPT_BOOL,
+ .help = "Enable writeback mode",
+ },{
.name = "format",
.type = QEMU_OPT_STRING,
.help = "disk format (raw, qcow2, ...)",
@@ -2794,10 +4216,77 @@
.type = QEMU_OPT_NUMBER,
.help = "total bytes write burst",
},{
+ .name = "throttling.iops-total-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the iops-total-max burst period, in seconds",
+ },{
+ .name = "throttling.iops-read-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the iops-read-max burst period, in seconds",
+ },{
+ .name = "throttling.iops-write-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the iops-write-max burst period, in seconds",
+ },{
+ .name = "throttling.bps-total-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the bps-total-max burst period, in seconds",
+ },{
+ .name = "throttling.bps-read-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the bps-read-max burst period, in seconds",
+ },{
+ .name = "throttling.bps-write-max-length",
+ .type = QEMU_OPT_NUMBER,
+ .help = "length of the bps-write-max burst period, in seconds",
+ },{
.name = "throttling.iops-size",
.type = QEMU_OPT_NUMBER,
.help = "when limiting by iops max size of an I/O in bytes",
},{
+ .name = "throttling.group",
+ .type = QEMU_OPT_STRING,
+ .help = "name of the block throttling group",
+ },{
+ .name = "copy-on-read",
+ .type = QEMU_OPT_BOOL,
+ .help = "copy read data from backing file into image file",
+ },{
+ .name = "detect-zeroes",
+ .type = QEMU_OPT_STRING,
+ .help = "try to optimize zero writes (off, on, unmap)",
+ },{
+ .name = "stats-account-invalid",
+ .type = QEMU_OPT_BOOL,
+ .help = "whether to account for invalid I/O operations "
+ "in the statistics",
+ },{
+ .name = "stats-account-failed",
+ .type = QEMU_OPT_BOOL,
+ .help = "whether to account for failed I/O operations "
+ "in the statistics",
+ },
+ { /* end of list */ }
+ },
+};
+
+static QemuOptsList qemu_root_bds_opts = {
+ .name = "root-bds",
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head),
+ .desc = {
+ {
+ .name = "discard",
+ .type = QEMU_OPT_STRING,
+ .help = "discard operation (ignore/off, unmap/on)",
+ },{
+ .name = "aio",
+ .type = QEMU_OPT_STRING,
+ .help = "host AIO implementation (threads, native)",
+ },{
+ .name = "read-only",
+ .type = QEMU_OPT_BOOL,
+ .help = "open drive file as read-only",
+ },{
.name = "copy-on-read",
.type = QEMU_OPT_BOOL,
.help = "copy read data from backing file into image file",
diff --git a/blockjob.c b/blockjob.c
index ba2255d..a5ba3be 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -23,29 +23,130 @@
* THE SOFTWARE.
*/
-#include "config-host.h"
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "trace.h"
#include "block/block.h"
#include "block/blockjob.h"
#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qapi/qmp/qerror.h"
#include "qapi/qmp/qjson.h"
-#include "block/coroutine.h"
+#include "qemu/coroutine.h"
+#include "qemu/id.h"
#include "qmp-commands.h"
#include "qemu/timer.h"
#include "qapi-event.h"
-void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs,
- int64_t speed, BlockCompletionFunc *cb,
- void *opaque, Error **errp)
+/* Transactional group of block jobs */
+struct BlockJobTxn {
+
+ /* Is this txn being cancelled? */
+ bool aborting;
+
+ /* List of jobs */
+ QLIST_HEAD(, BlockJob) jobs;
+
+ /* Reference count */
+ int refcnt;
+};
+
+static QLIST_HEAD(, BlockJob) block_jobs = QLIST_HEAD_INITIALIZER(block_jobs);
+
+BlockJob *block_job_next(BlockJob *job)
+{
+ if (!job) {
+ return QLIST_FIRST(&block_jobs);
+ }
+ return QLIST_NEXT(job, job_list);
+}
+
+BlockJob *block_job_get(const char *id)
{
BlockJob *job;
+ QLIST_FOREACH(job, &block_jobs, job_list) {
+ if (!strcmp(id, job->id)) {
+ return job;
+ }
+ }
+
+ return NULL;
+}
+
+/* Normally the job runs in its BlockBackend's AioContext. The exception is
+ * block_job_defer_to_main_loop() where it runs in the QEMU main loop. Code
+ * that supports both cases uses this helper function.
+ */
+static AioContext *block_job_get_aio_context(BlockJob *job)
+{
+ return job->deferred_to_main_loop ?
+ qemu_get_aio_context() :
+ blk_get_aio_context(job->blk);
+}
+
+static void block_job_attached_aio_context(AioContext *new_context,
+ void *opaque)
+{
+ BlockJob *job = opaque;
+
+ if (job->driver->attached_aio_context) {
+ job->driver->attached_aio_context(job, new_context);
+ }
+
+ block_job_resume(job);
+}
+
+static void block_job_detach_aio_context(void *opaque)
+{
+ BlockJob *job = opaque;
+
+ /* In case the job terminates during aio_poll()... */
+ block_job_ref(job);
+
+ block_job_pause(job);
+
+ if (!job->paused) {
+ /* If job is !job->busy this kicks it into the next pause point. */
+ block_job_enter(job);
+ }
+ while (!job->paused && !job->completed) {
+ aio_poll(block_job_get_aio_context(job), true);
+ }
+
+ block_job_unref(job);
+}
+
+void *block_job_create(const char *job_id, const BlockJobDriver *driver,
+ BlockDriverState *bs, int64_t speed,
+ BlockCompletionFunc *cb, void *opaque, Error **errp)
+{
+ BlockBackend *blk;
+ BlockJob *job;
+
+ assert(cb);
if (bs->job) {
- error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+ error_setg(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
return NULL;
}
- bdrv_ref(bs);
+
+ if (job_id == NULL) {
+ job_id = bdrv_get_device_name(bs);
+ }
+
+ if (!id_wellformed(job_id)) {
+ error_setg(errp, "Invalid job ID '%s'", job_id);
+ return NULL;
+ }
+
+ if (block_job_get(job_id)) {
+ error_setg(errp, "Job ID '%s' already in use", job_id);
+ return NULL;
+ }
+
+ blk = blk_new();
+ blk_insert_bs(blk, bs);
+
job = g_malloc0(driver->instance_size);
error_setg(&job->blocker, "block device is in use by block job: %s",
BlockJobType_lookup[driver->job_type]);
@@ -53,22 +154,26 @@
bdrv_op_unblock(bs, BLOCK_OP_TYPE_DATAPLANE, job->blocker);
job->driver = driver;
- job->bs = bs;
+ job->id = g_strdup(job_id);
+ job->blk = blk;
job->cb = cb;
job->opaque = opaque;
job->busy = true;
+ job->refcnt = 1;
bs->job = job;
+ QLIST_INSERT_HEAD(&block_jobs, job, job_list);
+
+ blk_add_aio_context_notifier(blk, block_job_attached_aio_context,
+ block_job_detach_aio_context, job);
+
/* Only set speed when necessary to avoid NotSupported error */
if (speed != 0) {
Error *local_err = NULL;
block_job_set_speed(job, speed, &local_err);
if (local_err) {
- bs->job = NULL;
- bdrv_op_unblock_all(bs, job->blocker);
- error_free(job->blocker);
- g_free(job);
+ block_job_unref(job);
error_propagate(errp, local_err);
return NULL;
}
@@ -76,16 +181,121 @@
return job;
}
+void block_job_ref(BlockJob *job)
+{
+ ++job->refcnt;
+}
+
+void block_job_unref(BlockJob *job)
+{
+ if (--job->refcnt == 0) {
+ BlockDriverState *bs = blk_bs(job->blk);
+ bs->job = NULL;
+ bdrv_op_unblock_all(bs, job->blocker);
+ blk_remove_aio_context_notifier(job->blk,
+ block_job_attached_aio_context,
+ block_job_detach_aio_context, job);
+ blk_unref(job->blk);
+ error_free(job->blocker);
+ g_free(job->id);
+ QLIST_REMOVE(job, job_list);
+ g_free(job);
+ }
+}
+
+static void block_job_completed_single(BlockJob *job)
+{
+ if (!job->ret) {
+ if (job->driver->commit) {
+ job->driver->commit(job);
+ }
+ } else {
+ if (job->driver->abort) {
+ job->driver->abort(job);
+ }
+ }
+ job->cb(job->opaque, job->ret);
+ if (job->txn) {
+ block_job_txn_unref(job->txn);
+ }
+ block_job_unref(job);
+}
+
+static void block_job_completed_txn_abort(BlockJob *job)
+{
+ AioContext *ctx;
+ BlockJobTxn *txn = job->txn;
+ BlockJob *other_job, *next;
+
+ if (txn->aborting) {
+ /*
+ * We are cancelled by another job, which will handle everything.
+ */
+ return;
+ }
+ txn->aborting = true;
+ /* We are the first failed job. Cancel other jobs. */
+ QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+ ctx = blk_get_aio_context(other_job->blk);
+ aio_context_acquire(ctx);
+ }
+ QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+ if (other_job == job || other_job->completed) {
+ /* Other jobs are "effectively" cancelled by us, set the status for
+ * them; this job, however, may or may not be cancelled, depending
+ * on the caller, so leave it. */
+ if (other_job != job) {
+ other_job->cancelled = true;
+ }
+ continue;
+ }
+ block_job_cancel_sync(other_job);
+ assert(other_job->completed);
+ }
+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
+ ctx = blk_get_aio_context(other_job->blk);
+ block_job_completed_single(other_job);
+ aio_context_release(ctx);
+ }
+}
+
+static void block_job_completed_txn_success(BlockJob *job)
+{
+ AioContext *ctx;
+ BlockJobTxn *txn = job->txn;
+ BlockJob *other_job, *next;
+ /*
+ * Successful completion, see if there are other running jobs in this
+ * txn.
+ */
+ QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
+ if (!other_job->completed) {
+ return;
+ }
+ }
+ /* We are the last completed job, commit the transaction. */
+ QLIST_FOREACH_SAFE(other_job, &txn->jobs, txn_list, next) {
+ ctx = blk_get_aio_context(other_job->blk);
+ aio_context_acquire(ctx);
+ assert(other_job->ret == 0);
+ block_job_completed_single(other_job);
+ aio_context_release(ctx);
+ }
+}
+
void block_job_completed(BlockJob *job, int ret)
{
- BlockDriverState *bs = job->bs;
-
- assert(bs->job == job);
- job->cb(job->opaque, ret);
- bs->job = NULL;
- bdrv_op_unblock_all(bs, job->blocker);
- error_free(job->blocker);
- g_free(job);
+ assert(blk_bs(job->blk)->job == job);
+ assert(!job->completed);
+ job->completed = true;
+ job->ret = ret;
+ if (!job->txn) {
+ block_job_completed_single(job);
+ } else if (ret < 0 || block_job_is_cancelled(job)) {
+ block_job_completed_txn_abort(job);
+ } else {
+ block_job_completed_txn_success(job);
+ }
}
void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
@@ -93,7 +303,7 @@
Error *local_err = NULL;
if (!job->driver->set_speed) {
- error_set(errp, QERR_UNSUPPORTED);
+ error_setg(errp, QERR_UNSUPPORTED);
return;
}
job->driver->set_speed(job, speed, &local_err);
@@ -107,9 +317,9 @@
void block_job_complete(BlockJob *job, Error **errp)
{
- if (job->paused || job->cancelled || !job->driver->complete) {
- error_set(errp, QERR_BLOCK_JOB_NOT_READY,
- bdrv_get_device_name(job->bs));
+ if (job->pause_count || job->cancelled || !job->driver->complete) {
+ error_setg(errp, "The active block job '%s' cannot be completed",
+ job->id);
return;
}
@@ -118,27 +328,62 @@
void block_job_pause(BlockJob *job)
{
- job->paused = true;
+ job->pause_count++;
}
-bool block_job_is_paused(BlockJob *job)
+static bool block_job_should_pause(BlockJob *job)
{
- return job->paused;
+ return job->pause_count > 0;
+}
+
+void coroutine_fn block_job_pause_point(BlockJob *job)
+{
+ if (!block_job_should_pause(job)) {
+ return;
+ }
+ if (block_job_is_cancelled(job)) {
+ return;
+ }
+
+ if (job->driver->pause) {
+ job->driver->pause(job);
+ }
+
+ if (block_job_should_pause(job) && !block_job_is_cancelled(job)) {
+ job->paused = true;
+ job->busy = false;
+ qemu_coroutine_yield(); /* wait for block_job_resume() */
+ job->busy = true;
+ job->paused = false;
+ }
+
+ if (job->driver->resume) {
+ job->driver->resume(job);
+ }
}
void block_job_resume(BlockJob *job)
{
- job->paused = false;
- block_job_iostatus_reset(job);
+ assert(job->pause_count > 0);
+ job->pause_count--;
+ if (job->pause_count) {
+ return;
+ }
+ block_job_enter(job);
+}
+
+void block_job_enter(BlockJob *job)
+{
if (job->co && !job->busy) {
- qemu_coroutine_enter(job->co, NULL);
+ qemu_coroutine_enter(job->co);
}
}
void block_job_cancel(BlockJob *job)
{
job->cancelled = true;
- block_job_resume(job);
+ block_job_iostatus_reset(job);
+ block_job_enter(job);
}
bool block_job_is_cancelled(BlockJob *job)
@@ -154,51 +399,28 @@
}
}
-struct BlockFinishData {
- BlockJob *job;
- BlockCompletionFunc *cb;
- void *opaque;
- bool cancelled;
- int ret;
-};
-
-static void block_job_finish_cb(void *opaque, int ret)
-{
- struct BlockFinishData *data = opaque;
-
- data->cancelled = block_job_is_cancelled(data->job);
- data->ret = ret;
- data->cb(data->opaque, ret);
-}
-
static int block_job_finish_sync(BlockJob *job,
void (*finish)(BlockJob *, Error **errp),
Error **errp)
{
- struct BlockFinishData data;
- BlockDriverState *bs = job->bs;
Error *local_err = NULL;
+ int ret;
- assert(bs->job == job);
+ assert(blk_bs(job->blk)->job == job);
- /* Set up our own callback to store the result and chain to
- * the original callback.
- */
- data.job = job;
- data.cb = job->cb;
- data.opaque = job->opaque;
- data.ret = -EINPROGRESS;
- job->cb = block_job_finish_cb;
- job->opaque = &data;
+ block_job_ref(job);
finish(job, &local_err);
if (local_err) {
error_propagate(errp, local_err);
+ block_job_unref(job);
return -EBUSY;
}
- while (data.ret == -EINPROGRESS) {
- aio_poll(bdrv_get_aio_context(bs), true);
+ while (!job->completed) {
+ aio_poll(block_job_get_aio_context(job), true);
}
- return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+ ret = (job->cancelled && job->ret == 0) ? -ECANCELED : job->ret;
+ block_job_unref(job);
+ return ret;
}
/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
@@ -214,6 +436,19 @@
return block_job_finish_sync(job, &block_job_cancel_err, NULL);
}
+void block_job_cancel_sync_all(void)
+{
+ BlockJob *job;
+ AioContext *aio_context;
+
+ while ((job = QLIST_FIRST(&block_jobs))) {
+ aio_context = blk_get_aio_context(job->blk);
+ aio_context_acquire(aio_context);
+ block_job_cancel_sync(job);
+ aio_context_release(aio_context);
+ }
+}
+
int block_job_complete_sync(BlockJob *job, Error **errp)
{
return block_job_finish_sync(job, &block_job_complete, errp);
@@ -229,12 +464,12 @@
}
job->busy = false;
- if (block_job_is_paused(job)) {
- qemu_coroutine_yield();
- } else {
- co_aio_sleep_ns(bdrv_get_aio_context(job->bs), type, ns);
+ if (!block_job_should_pause(job)) {
+ co_aio_sleep_ns(blk_get_aio_context(job->blk), type, ns);
}
job->busy = true;
+
+ block_job_pause_point(job);
}
void block_job_yield(BlockJob *job)
@@ -247,18 +482,22 @@
}
job->busy = false;
- qemu_coroutine_yield();
+ if (!block_job_should_pause(job)) {
+ qemu_coroutine_yield();
+ }
job->busy = true;
+
+ block_job_pause_point(job);
}
BlockJobInfo *block_job_query(BlockJob *job)
{
BlockJobInfo *info = g_new0(BlockJobInfo, 1);
info->type = g_strdup(BlockJobType_lookup[job->driver->job_type]);
- info->device = g_strdup(bdrv_get_device_name(job->bs));
+ info->device = g_strdup(job->id);
info->len = job->len;
info->busy = job->busy;
- info->paused = job->paused;
+ info->paused = job->pause_count > 0;
info->offset = job->offset;
info->speed = job->speed;
info->io_status = job->iostatus;
@@ -277,7 +516,7 @@
void block_job_event_cancelled(BlockJob *job)
{
qapi_event_send_block_job_cancelled(job->driver->job_type,
- bdrv_get_device_name(job->bs),
+ job->id,
job->len,
job->offset,
job->speed,
@@ -287,7 +526,7 @@
void block_job_event_completed(BlockJob *job, const char *msg)
{
qapi_event_send_block_job_completed(job->driver->job_type,
- bdrv_get_device_name(job->bs),
+ job->id,
job->len,
job->offset,
job->speed,
@@ -301,20 +540,20 @@
job->ready = true;
qapi_event_send_block_job_ready(job->driver->job_type,
- bdrv_get_device_name(job->bs),
+ job->id,
job->len,
job->offset,
job->speed, &error_abort);
}
-BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
- BlockdevOnError on_err,
+BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
int is_read, int error)
{
BlockErrorAction action;
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
+ case BLOCKDEV_ON_ERROR_AUTO:
action = (error == ENOSPC) ?
BLOCK_ERROR_ACTION_STOP : BLOCK_ERROR_ACTION_REPORT;
break;
@@ -330,16 +569,15 @@
default:
abort();
}
- qapi_event_send_block_job_error(bdrv_get_device_name(job->bs),
+ qapi_event_send_block_job_error(job->id,
is_read ? IO_OPERATION_TYPE_READ :
IO_OPERATION_TYPE_WRITE,
action, &error_abort);
if (action == BLOCK_ERROR_ACTION_STOP) {
+ /* make the pause user visible, which will be resumed from QMP. */
+ job->user_paused = true;
block_job_pause(job);
block_job_iostatus_set_err(job, error);
- if (bs != job->bs) {
- bdrv_iostatus_set_err(bs, error);
- }
}
return action;
}
@@ -363,9 +601,10 @@
aio_context_acquire(data->aio_context);
/* Fetch BDS AioContext again, in case it has changed */
- aio_context = bdrv_get_aio_context(data->job->bs);
+ aio_context = blk_get_aio_context(data->job->blk);
aio_context_acquire(aio_context);
+ data->job->deferred_to_main_loop = false;
data->fn(data->job, data->opaque);
aio_context_release(aio_context);
@@ -382,9 +621,43 @@
BlockJobDeferToMainLoopData *data = g_malloc(sizeof(*data));
data->job = job;
data->bh = qemu_bh_new(block_job_defer_to_main_loop_bh, data);
- data->aio_context = bdrv_get_aio_context(job->bs);
+ data->aio_context = blk_get_aio_context(job->blk);
data->fn = fn;
data->opaque = opaque;
+ job->deferred_to_main_loop = true;
qemu_bh_schedule(data->bh);
}
+
+BlockJobTxn *block_job_txn_new(void)
+{
+ BlockJobTxn *txn = g_new0(BlockJobTxn, 1);
+ QLIST_INIT(&txn->jobs);
+ txn->refcnt = 1;
+ return txn;
+}
+
+static void block_job_txn_ref(BlockJobTxn *txn)
+{
+ txn->refcnt++;
+}
+
+void block_job_txn_unref(BlockJobTxn *txn)
+{
+ if (txn && --txn->refcnt == 0) {
+ g_free(txn);
+ }
+}
+
+void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job)
+{
+ if (!txn) {
+ return;
+ }
+
+ assert(!job->txn);
+ job->txn = txn;
+
+ QLIST_INSERT_HEAD(&txn->jobs, job, txn_list);
+ block_job_txn_ref(txn);
+}
diff --git a/bootdevice.c b/bootdevice.c
index b29970c..33e3029 100644
--- a/bootdevice.c
+++ b/bootdevice.c
@@ -1,7 +1,7 @@
/*
* QEMU Boot Device Implement
*
- * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO.,LTD.
+ * Copyright (c) 2014 HUAWEI TECHNOLOGIES CO., LTD.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,9 +22,13 @@
* THE SOFTWARE.
*/
+#include "qemu/osdep.h"
+#include "qapi/error.h"
#include "sysemu/sysemu.h"
#include "qapi/visitor.h"
#include "qemu/error-report.h"
+#include "hw/hw.h"
+#include "hw/qdev-core.h"
typedef struct FWBootEntry FWBootEntry;
@@ -37,6 +41,80 @@
static QTAILQ_HEAD(, FWBootEntry) fw_boot_order =
QTAILQ_HEAD_INITIALIZER(fw_boot_order);
+static QEMUBootSetHandler *boot_set_handler;
+static void *boot_set_opaque;
+
+void qemu_register_boot_set(QEMUBootSetHandler *func, void *opaque)
+{
+ boot_set_handler = func;
+ boot_set_opaque = opaque;
+}
+
+void qemu_boot_set(const char *boot_order, Error **errp)
+{
+ Error *local_err = NULL;
+
+ if (!boot_set_handler) {
+ error_setg(errp, "no function defined to set boot device list for"
+ " this architecture");
+ return;
+ }
+
+ validate_bootdevices(boot_order, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+
+ boot_set_handler(boot_set_opaque, boot_order, errp);
+}
+
+void validate_bootdevices(const char *devices, Error **errp)
+{
+ /* We just do some generic consistency checks */
+ const char *p;
+ int bitmap = 0;
+
+ for (p = devices; *p != '\0'; p++) {
+ /* Allowed boot devices are:
+ * a-b: floppy disk drives
+ * c-f: IDE disk drives
+ * g-m: machine implementation dependent drives
+ * n-p: network devices
+ * It's up to each machine implementation to check if the given boot
+ * devices match the actual hardware implementation and firmware
+ * features.
+ */
+ if (*p < 'a' || *p > 'p') {
+ error_setg(errp, "Invalid boot device '%c'", *p);
+ return;
+ }
+ if (bitmap & (1 << (*p - 'a'))) {
+ error_setg(errp, "Boot device '%c' was given twice", *p);
+ return;
+ }
+ bitmap |= 1 << (*p - 'a');
+ }
+}
+
+void restore_boot_order(void *opaque)
+{
+ char *normal_boot_order = opaque;
+ static int first = 1;
+
+ /* Restore boot order and remove ourselves after the first boot */
+ if (first) {
+ first = 0;
+ return;
+ }
+
+ if (boot_set_handler) {
+ qemu_boot_set(normal_boot_order, &error_abort);
+ }
+
+ qemu_unregister_reset(restore_boot_order, normal_boot_order);
+ g_free(normal_boot_order);
+}
void check_boot_index(int32_t bootindex, Error **errp)
{
@@ -137,7 +215,9 @@
char *list = NULL;
QTAILQ_FOREACH(i, &fw_boot_order, link) {
- char *devpath = NULL, *bootpath;
+ char *devpath = NULL, *suffix = NULL;
+ char *bootpath;
+ char *d;
size_t len;
if (i->dev) {
@@ -145,21 +225,27 @@
assert(devpath);
}
- if (i->suffix && !ignore_suffixes && devpath) {
- size_t bootpathlen = strlen(devpath) + strlen(i->suffix) + 1;
-
- bootpath = g_malloc(bootpathlen);
- snprintf(bootpath, bootpathlen, "%s%s", devpath, i->suffix);
- g_free(devpath);
- } else if (devpath) {
- bootpath = devpath;
- } else if (!ignore_suffixes) {
- assert(i->suffix);
- bootpath = g_strdup(i->suffix);
- } else {
- bootpath = g_strdup("");
+ if (!ignore_suffixes) {
+ if (i->dev) {
+ d = qdev_get_own_fw_dev_path_from_handler(i->dev->parent_bus,
+ i->dev);
+ if (d) {
+ assert(!i->suffix);
+ suffix = d;
+ } else {
+ suffix = g_strdup(i->suffix);
+ }
+ } else {
+ suffix = g_strdup(i->suffix);
+ }
}
+ bootpath = g_strdup_printf("%s%s",
+ devpath ? devpath : "",
+ suffix ? suffix : "");
+ g_free(devpath);
+ g_free(suffix);
+
if (total) {
list[total-1] = '\n';
}
@@ -187,21 +273,21 @@
DeviceState *dev;
} BootIndexProperty;
-static void device_get_bootindex(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+static void device_get_bootindex(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
{
BootIndexProperty *prop = opaque;
- visit_type_int32(v, prop->bootindex, name, errp);
+ visit_type_int32(v, name, prop->bootindex, errp);
}
-static void device_set_bootindex(Object *obj, Visitor *v, void *opaque,
- const char *name, Error **errp)
+static void device_set_bootindex(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
{
BootIndexProperty *prop = opaque;
int32_t boot_index;
Error *local_err = NULL;
- visit_type_int32(v, &boot_index, name, &local_err);
+ visit_type_int32(v, name, &boot_index, &local_err);
if (local_err) {
goto out;
}
@@ -216,9 +302,7 @@
add_boot_device_path(*prop->bootindex, prop->dev, prop->suffix);
out:
- if (local_err) {
- error_propagate(errp, local_err);
- }
+ error_propagate(errp, local_err);
}
static void property_release_bootindex(Object *obj, const char *name,
diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 2abc713..94eec36 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -1,12 +1,6 @@
/* Code for loading BSD executables. Mostly linux kernel code. */
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
+#include "qemu/osdep.h"
#include "qemu.h"
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 93fd9e4..41a1309 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -1,16 +1,10 @@
/* This is the Linux kernel elf-loading code, ported into user space */
-#include <stdio.h>
-#include <sys/types.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/mman.h>
-#include <stdlib.h>
-#include <string.h>
+#include "qemu/osdep.h"
#include "qemu.h"
#include "disas/disas.h"
+#include "qemu/path.h"
#ifdef _ARCH_PPC64
#undef ARCH_DLINFO
@@ -351,8 +345,10 @@
_regs->gpr[1] = infop->start_stack;
#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
- entry = ldq_raw(infop->entry) + infop->load_addr;
- toc = ldq_raw(infop->entry + 8) + infop->load_addr;
+ get_user_u64(entry, infop->entry);
+ entry += infop->load_addr;
+ get_user_u64(toc, infop->entry + 8);
+ toc += infop->load_addr;
_regs->gpr[2] = toc;
infop->entry = entry;
#endif
@@ -365,8 +361,9 @@
get_user_ual(_regs->gpr[3], pos);
pos += sizeof(abi_ulong);
_regs->gpr[4] = pos;
- for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong))
- tmp = ldl(pos);
+ for (tmp = 1; tmp != 0; pos += sizeof(abi_ulong)) {
+ get_user_ual(tmp, pos);
+ }
_regs->gpr[5] = pos;
}
@@ -737,8 +734,7 @@
size must be known */
if (qemu_real_host_page_size < qemu_host_page_size) {
abi_ulong end_addr, end_addr1;
- end_addr1 = (elf_bss + qemu_real_host_page_size - 1) &
- ~(qemu_real_host_page_size - 1);
+ end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss);
end_addr = HOST_PAGE_ALIGN(elf_bss);
if (end_addr1 < end_addr) {
mmap((void *)g2h(end_addr1), end_addr - end_addr1,
@@ -1352,9 +1348,7 @@
}
}
if (!bprm->p) {
- if (elf_interpreter) {
- free(elf_interpreter);
- }
+ free(elf_interpreter);
free (elf_phdata);
close(bprm->fd);
return -E2BIG;
@@ -1368,7 +1362,6 @@
info->mmap = 0;
elf_entry = (abi_ulong) elf_ex.e_entry;
-#if defined(CONFIG_USE_GUEST_BASE)
/*
* In case where user has not explicitly set the guest_base, we
* probe here that should we set it automatically.
@@ -1389,7 +1382,6 @@
}
}
}
-#endif /* CONFIG_USE_GUEST_BASE */
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
diff --git a/bsd-user/i386/syscall.h b/bsd-user/i386/syscall.h
deleted file mode 100644
index 9b34c61..0000000
--- a/bsd-user/i386/syscall.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/* default linux values for the selectors */
-#define __USER_CS (0x23)
-#define __USER_DS (0x2B)
-
-struct target_pt_regs {
- long ebx;
- long ecx;
- long edx;
- long esi;
- long edi;
- long ebp;
- long eax;
- int xds;
- int xes;
- long orig_eax;
- long eip;
- int xcs;
- long eflags;
- long esp;
- int xss;
-};
-
-/* ioctls */
-
-#define TARGET_LDT_ENTRIES 8192
-#define TARGET_LDT_ENTRY_SIZE 8
-
-#define TARGET_GDT_ENTRIES 9
-#define TARGET_GDT_ENTRY_TLS_ENTRIES 3
-#define TARGET_GDT_ENTRY_TLS_MIN 6
-#define TARGET_GDT_ENTRY_TLS_MAX (TARGET_GDT_ENTRY_TLS_MIN + TARGET_GDT_ENTRY_TLS_ENTRIES - 1)
-
-struct target_modify_ldt_ldt_s {
- unsigned int entry_number;
- abi_ulong base_addr;
- unsigned int limit;
- unsigned int flags;
-};
-
-/* vm86 defines */
-
-#define TARGET_BIOSSEG 0x0f000
-
-#define TARGET_CPU_086 0
-#define TARGET_CPU_186 1
-#define TARGET_CPU_286 2
-#define TARGET_CPU_386 3
-#define TARGET_CPU_486 4
-#define TARGET_CPU_586 5
-
-#define TARGET_VM86_SIGNAL 0 /* return due to signal */
-#define TARGET_VM86_UNKNOWN 1 /* unhandled GP fault - IO-instruction or similar */
-#define TARGET_VM86_INTx 2 /* int3/int x instruction (ARG = x) */
-#define TARGET_VM86_STI 3 /* sti/popf/iret instruction enabled virtual interrupts */
-
-/*
- * Additional return values when invoking new vm86()
- */
-#define TARGET_VM86_PICRETURN 4 /* return due to pending PIC request */
-#define TARGET_VM86_TRAP 6 /* return due to DOS-debugger request */
-
-/*
- * function codes when invoking new vm86()
- */
-#define TARGET_VM86_PLUS_INSTALL_CHECK 0
-#define TARGET_VM86_ENTER 1
-#define TARGET_VM86_ENTER_NO_BYPASS 2
-#define TARGET_VM86_REQUEST_IRQ 3
-#define TARGET_VM86_FREE_IRQ 4
-#define TARGET_VM86_GET_IRQ_BITS 5
-#define TARGET_VM86_GET_AND_RESET_IRQ 6
-
-/*
- * This is the stack-layout seen by the user space program when we have
- * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout
- * is 'kernel_vm86_regs' (see below).
- */
-
-struct target_vm86_regs {
-/*
- * normal regs, with special meaning for the segment descriptors..
- */
- abi_long ebx;
- abi_long ecx;
- abi_long edx;
- abi_long esi;
- abi_long edi;
- abi_long ebp;
- abi_long eax;
- abi_long __null_ds;
- abi_long __null_es;
- abi_long __null_fs;
- abi_long __null_gs;
- abi_long orig_eax;
- abi_long eip;
- unsigned short cs, __csh;
- abi_long eflags;
- abi_long esp;
- unsigned short ss, __ssh;
-/*
- * these are specific to v86 mode:
- */
- unsigned short es, __esh;
- unsigned short ds, __dsh;
- unsigned short fs, __fsh;
- unsigned short gs, __gsh;
-};
-
-struct target_revectored_struct {
- abi_ulong __map[8]; /* 256 bits */
-};
-
-struct target_vm86_struct {
- struct target_vm86_regs regs;
- abi_ulong flags;
- abi_ulong screen_bitmap;
- abi_ulong cpu_type;
- struct target_revectored_struct int_revectored;
- struct target_revectored_struct int21_revectored;
-};
-
-/*
- * flags masks
- */
-#define TARGET_VM86_SCREEN_BITMAP 0x0001
-
-struct target_vm86plus_info_struct {
- abi_ulong flags;
-#define TARGET_force_return_for_pic (1 << 0)
-#define TARGET_vm86dbg_active (1 << 1) /* for debugger */
-#define TARGET_vm86dbg_TFpendig (1 << 2) /* for debugger */
-#define TARGET_is_vm86pus (1 << 31) /* for vm86 internal use */
- unsigned char vm86dbg_intxxtab[32]; /* for debugger */
-};
-
-struct target_vm86plus_struct {
- struct target_vm86_regs regs;
- abi_ulong flags;
- abi_ulong screen_bitmap;
- abi_ulong cpu_type;
- struct target_revectored_struct int_revectored;
- struct target_revectored_struct int21_revectored;
- struct target_vm86plus_info_struct vm86plus;
-};
-
-/* FreeBSD sysarch(2) */
-#define TARGET_FREEBSD_I386_GET_LDT 0
-#define TARGET_FREEBSD_I386_SET_LDT 1
- /* I386_IOPL */
-#define TARGET_FREEBSD_I386_GET_IOPERM 3
-#define TARGET_FREEBSD_I386_SET_IOPERM 4
- /* xxxxx */
-#define TARGET_FREEBSD_I386_VM86 6
-#define TARGET_FREEBSD_I386_GET_FSBASE 7
-#define TARGET_FREEBSD_I386_SET_FSBASE 8
-#define TARGET_FREEBSD_I386_GET_GSBASE 9
-#define TARGET_FREEBSD_I386_SET_GSBASE 10
-
-
-#define UNAME_MACHINE "i386"
-
diff --git a/bsd-user/i386/target_syscall.h b/bsd-user/i386/target_syscall.h
new file mode 100644
index 0000000..8f20138
--- /dev/null
+++ b/bsd-user/i386/target_syscall.h
@@ -0,0 +1,165 @@
+#ifndef TARGET_SYSCALL_H
+#define TARGET_SYSCALL_H
+
+/* default linux values for the selectors */
+#define __USER_CS (0x23)
+#define __USER_DS (0x2B)
+
+struct target_pt_regs {
+ long ebx;
+ long ecx;
+ long edx;
+ long esi;
+ long edi;
+ long ebp;
+ long eax;
+ int xds;
+ int xes;
+ long orig_eax;
+ long eip;
+ int xcs;
+ long eflags;
+ long esp;
+ int xss;
+};
+
+/* ioctls */
+
+#define TARGET_LDT_ENTRIES 8192
+#define TARGET_LDT_ENTRY_SIZE 8
+
+#define TARGET_GDT_ENTRIES 9
+#define TARGET_GDT_ENTRY_TLS_ENTRIES 3
+#define TARGET_GDT_ENTRY_TLS_MIN 6
+#define TARGET_GDT_ENTRY_TLS_MAX (TARGET_GDT_ENTRY_TLS_MIN + TARGET_GDT_ENTRY_TLS_ENTRIES - 1)
+
+struct target_modify_ldt_ldt_s {
+ unsigned int entry_number;
+ abi_ulong base_addr;
+ unsigned int limit;
+ unsigned int flags;
+};
+
+/* vm86 defines */
+
+#define TARGET_BIOSSEG 0x0f000
+
+#define TARGET_CPU_086 0
+#define TARGET_CPU_186 1
+#define TARGET_CPU_286 2
+#define TARGET_CPU_386 3
+#define TARGET_CPU_486 4
+#define TARGET_CPU_586 5
+
+#define TARGET_VM86_SIGNAL 0 /* return due to signal */
+#define TARGET_VM86_UNKNOWN 1 /* unhandled GP fault - IO-instruction or similar */
+#define TARGET_VM86_INTx 2 /* int3/int x instruction (ARG = x) */
+#define TARGET_VM86_STI 3 /* sti/popf/iret instruction enabled virtual interrupts */
+
+/*
+ * Additional return values when invoking new vm86()
+ */
+#define TARGET_VM86_PICRETURN 4 /* return due to pending PIC request */
+#define TARGET_VM86_TRAP 6 /* return due to DOS-debugger request */
+
+/*
+ * function codes when invoking new vm86()
+ */
+#define TARGET_VM86_PLUS_INSTALL_CHECK 0
+#define TARGET_VM86_ENTER 1
+#define TARGET_VM86_ENTER_NO_BYPASS 2
+#define TARGET_VM86_REQUEST_IRQ 3
+#define TARGET_VM86_FREE_IRQ 4
+#define TARGET_VM86_GET_IRQ_BITS 5
+#define TARGET_VM86_GET_AND_RESET_IRQ 6
+
+/*
+ * This is the stack-layout seen by the user space program when we have
+ * done a translation of "SAVE_ALL" from vm86 mode. The real kernel layout
+ * is 'kernel_vm86_regs' (see below).
+ */
+
+struct target_vm86_regs {
+/*
+ * normal regs, with special meaning for the segment descriptors..
+ */
+ abi_long ebx;
+ abi_long ecx;
+ abi_long edx;
+ abi_long esi;
+ abi_long edi;
+ abi_long ebp;
+ abi_long eax;
+ abi_long __null_ds;
+ abi_long __null_es;
+ abi_long __null_fs;
+ abi_long __null_gs;
+ abi_long orig_eax;
+ abi_long eip;
+ unsigned short cs, __csh;
+ abi_long eflags;
+ abi_long esp;
+ unsigned short ss, __ssh;
+/*
+ * these are specific to v86 mode:
+ */
+ unsigned short es, __esh;
+ unsigned short ds, __dsh;
+ unsigned short fs, __fsh;
+ unsigned short gs, __gsh;
+};
+
+struct target_revectored_struct {
+ abi_ulong __map[8]; /* 256 bits */
+};
+
+struct target_vm86_struct {
+ struct target_vm86_regs regs;
+ abi_ulong flags;
+ abi_ulong screen_bitmap;
+ abi_ulong cpu_type;
+ struct target_revectored_struct int_revectored;
+ struct target_revectored_struct int21_revectored;
+};
+
+/*
+ * flags masks
+ */
+#define TARGET_VM86_SCREEN_BITMAP 0x0001
+
+struct target_vm86plus_info_struct {
+ abi_ulong flags;
+#define TARGET_force_return_for_pic (1 << 0)
+#define TARGET_vm86dbg_active (1 << 1) /* for debugger */
+#define TARGET_vm86dbg_TFpendig (1 << 2) /* for debugger */
+#define TARGET_is_vm86pus (1 << 31) /* for vm86 internal use */
+ unsigned char vm86dbg_intxxtab[32]; /* for debugger */
+};
+
+struct target_vm86plus_struct {
+ struct target_vm86_regs regs;
+ abi_ulong flags;
+ abi_ulong screen_bitmap;
+ abi_ulong cpu_type;
+ struct target_revectored_struct int_revectored;
+ struct target_revectored_struct int21_revectored;
+ struct target_vm86plus_info_struct vm86plus;
+};
+
+/* FreeBSD sysarch(2) */
+#define TARGET_FREEBSD_I386_GET_LDT 0
+#define TARGET_FREEBSD_I386_SET_LDT 1
+ /* I386_IOPL */
+#define TARGET_FREEBSD_I386_GET_IOPERM 3
+#define TARGET_FREEBSD_I386_SET_IOPERM 4
+ /* xxxxx */
+#define TARGET_FREEBSD_I386_VM86 6
+#define TARGET_FREEBSD_I386_GET_FSBASE 7
+#define TARGET_FREEBSD_I386_SET_FSBASE 8
+#define TARGET_FREEBSD_I386_GET_GSBASE 9
+#define TARGET_FREEBSD_I386_SET_GSBASE 10
+
+
+#define UNAME_MACHINE "i386"
+
+#endif /* TARGET_SYSCALL_H */
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 0e8c26c..0fb08e4 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -16,31 +16,30 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
+#include "qemu/osdep.h"
+#include "qemu-version.h"
#include <machine/trap.h>
-#include <sys/types.h>
-#include <sys/mman.h>
+#include "qapi/error.h"
#include "qemu.h"
-#include "qemu-common.h"
+#include "qemu/config-file.h"
+#include "qemu/path.h"
+#include "qemu/help_option.h"
/* For tb_lock */
#include "cpu.h"
+#include "exec/exec-all.h"
#include "tcg.h"
#include "qemu/timer.h"
#include "qemu/envlist.h"
+#include "exec/log.h"
+#include "trace/control.h"
+#include "glib-compat.h"
int singlestep;
-#if defined(CONFIG_USE_GUEST_BASE)
unsigned long mmap_min_addr;
unsigned long guest_base;
int have_guest_base;
unsigned long reserved_va;
-#endif
static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
const char *qemu_uname_release;
@@ -92,7 +91,7 @@
void fork_end(int child)
{
if (child) {
- gdbserver_fork((CPUArchState *)thread_cpu->env_ptr);
+ gdbserver_fork(thread_cpu);
}
}
@@ -108,13 +107,9 @@
/***********************************************************/
/* CPUX86 core interface */
-void cpu_smm_update(CPUX86State *env)
-{
-}
-
uint64_t cpu_get_tsc(CPUX86State *env)
{
- return cpu_get_real_ticks();
+ return cpu_get_host_ticks();
}
static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
@@ -170,12 +165,14 @@
void cpu_loop(CPUX86State *env)
{
+ X86CPU *cpu = x86_env_get_cpu(env);
+ CPUState *cs = CPU(cpu);
int trapnr;
abi_ulong pc;
//target_siginfo_t info;
for(;;) {
- trapnr = cpu_x86_exec(env);
+ trapnr = cpu_exec(cs);
switch(trapnr) {
case 0x80:
/* syscall from int $0x80 */
@@ -516,7 +513,7 @@
//target_siginfo_t info;
while (1) {
- trapnr = cpu_sparc_exec (env);
+ trapnr = cpu_exec(cs);
switch (trapnr) {
#ifndef TARGET_SPARC64
@@ -671,7 +668,8 @@
static void usage(void)
{
- printf("qemu-" TARGET_NAME " version " QEMU_VERSION ", Copyright (c) 2003-2008 Fabrice Bellard\n"
+ printf("qemu-" TARGET_NAME " version " QEMU_VERSION QEMU_PKGVERSION
+ ", " QEMU_COPYRIGHT "\n"
"usage: qemu-" TARGET_NAME " [options] program [arguments...]\n"
"BSD CPU emulator (compiled for %s emulation)\n"
"\n"
@@ -684,9 +682,7 @@
"-drop-ld-preload drop LD_PRELOAD for target process\n"
"-E var=value sets/modifies targets environment variable(s)\n"
"-U var unsets targets environment variable(s)\n"
-#if defined(CONFIG_USE_GUEST_BASE)
"-B address set guest_base address to address\n"
-#endif
"-bsd type select emulated BSD type FreeBSD/NetBSD/OpenBSD (default)\n"
"\n"
"Debug options:\n"
@@ -696,6 +692,8 @@
"-p pagesize set the host page size to 'pagesize'\n"
"-singlestep always run in singlestep mode\n"
"-strace log system calls\n"
+ "-trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
+ " specify tracing options\n"
"\n"
"Environment variables:\n"
"QEMU_STRACE Print system calls and arguments similar to the\n"
@@ -744,6 +742,7 @@
int gdbstub_port = 0;
char **target_environ, **wrk;
envlist_t *envlist = NULL;
+ char *trace_file = NULL;
bsd_type = target_openbsd;
if (argc <= 1)
@@ -762,12 +761,11 @@
}
cpu_model = NULL;
-#if defined(cpudef_setup)
- cpudef_setup(); /* parse cpu definitions in target config file (TBD) */
-#endif
+
+ qemu_add_opts(&qemu_trace_opts);
optind = 1;
- for(;;) {
+ for (;;) {
if (optind >= argc)
break;
r = argv[optind];
@@ -832,11 +830,9 @@
#endif
exit(1);
}
-#if defined(CONFIG_USE_GUEST_BASE)
} else if (!strcmp(r, "B")) {
guest_base = strtol(argv[optind++], NULL, 0);
have_guest_base = 1;
-#endif
} else if (!strcmp(r, "drop-ld-preload")) {
(void) envlist_unsetenv(envlist, "LD_PRELOAD");
} else if (!strcmp(r, "bsd")) {
@@ -854,14 +850,17 @@
singlestep = 1;
} else if (!strcmp(r, "strace")) {
do_strace = 1;
- } else
- {
+ } else if (!strcmp(r, "trace")) {
+ g_free(trace_file);
+ trace_file = trace_opt_parse(optarg);
+ } else {
usage();
}
}
/* init debug */
- qemu_set_log_filename(log_file);
+ qemu_log_needs_buffers();
+ qemu_set_log_filename(log_file, &error_fatal);
if (log_mask) {
int mask;
@@ -878,6 +877,11 @@
}
filename = argv[optind];
+ if (!trace_init_backends()) {
+ exit(1);
+ }
+ trace_init_file(trace_file);
+
/* Zero out regs */
memset(regs, 0, sizeof(struct target_pt_regs));
@@ -905,15 +909,14 @@
#endif
}
tcg_exec_init(0);
- cpu_exec_init_all();
/* NOTE: we need to init the CPU at this stage to get
qemu_host_page_size */
- env = cpu_init(cpu_model);
- if (!env) {
+ cpu = cpu_init(cpu_model);
+ if (!cpu) {
fprintf(stderr, "Unable to find CPU definition\n");
exit(1);
}
- cpu = ENV_GET_CPU(env);
+ env = cpu->env_ptr;
#if defined(TARGET_SPARC) || defined(TARGET_PPC)
cpu_reset(cpu);
#endif
@@ -926,7 +929,6 @@
target_environ = envlist_to_environ(envlist, NULL);
envlist_free(envlist);
-#if defined(CONFIG_USE_GUEST_BASE)
/*
* Now that page sizes are configured in cpu_init() we can do
* proper page alignment for guest_base.
@@ -948,12 +950,11 @@
unsigned long tmp;
if (fscanf(fp, "%lu", &tmp) == 1) {
mmap_min_addr = tmp;
- qemu_log("host mmap_min_addr=0x%lx\n", mmap_min_addr);
+ qemu_log_mask(CPU_LOG_PAGE, "host mmap_min_addr=0x%lx\n", mmap_min_addr);
}
fclose(fp);
}
}
-#endif /* CONFIG_USE_GUEST_BASE */
if (loader_exec(filename, argv+optind, target_environ, regs, info) != 0) {
printf("Error loading %s\n", filename);
@@ -966,10 +967,8 @@
free(target_environ);
- if (qemu_log_enabled()) {
-#if defined(CONFIG_USE_GUEST_BASE)
+ if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
qemu_log("guest_base 0x%lx\n", guest_base);
-#endif
log_page_dump();
qemu_log("start_brk 0x" TARGET_ABI_FMT_lx "\n", info->start_brk);
@@ -989,12 +988,10 @@
syscall_init();
signal_init();
-#if defined(CONFIG_USE_GUEST_BASE)
/* Now that we've loaded the binary, GUEST_BASE is fixed. Delay
generating the prologue until now so that the prologue can take
the real value of GUEST_BASE into account. */
tcg_prologue_init(&tcg_ctx);
-#endif
/* build Task State */
memset(ts, 0, sizeof(TaskState));
@@ -1136,6 +1133,7 @@
gdbserver_start (gdbstub_port);
gdb_handlesig(cpu, 0);
}
+ trace_init_vcpu_events();
cpu_loop(env);
/* never exits */
return 0;
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 092bf7f..610f91b 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -16,13 +16,7 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/mman.h>
+#include "qemu/osdep.h"
#include "qemu.h"
#include "qemu-common.h"
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 5362297..2b2b918 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -17,15 +17,13 @@
#ifndef QEMU_H
#define QEMU_H
-#include <signal.h>
-#include <string.h>
#include "cpu.h"
+#include "exec/exec-all.h"
#include "exec/cpu_ldst.h"
#undef DEBUG_REMAP
#ifdef DEBUG_REMAP
-#include <stdlib.h>
#endif /* DEBUG_REMAP */
#include "exec/user/abitypes.h"
@@ -38,7 +36,7 @@
extern enum BSDType bsd_type;
#include "syscall_defs.h"
-#include "syscall.h"
+#include "target_syscall.h"
#include "target_signal.h"
#include "exec/gdbstub.h"
@@ -101,9 +99,7 @@
void init_task_state(TaskState *ts);
extern const char *qemu_uname_release;
-#if defined(CONFIG_USE_GUEST_BASE)
extern unsigned long mmap_min_addr;
-#endif
/* ??? See if we can avoid exposing so much of the loader internals. */
/*
@@ -213,10 +209,6 @@
abi_ulong new_addr);
int target_msync(abi_ulong start, abi_ulong len, int flags);
extern unsigned long last_brk;
-void mmap_lock(void);
-void mmap_unlock(void);
-void cpu_list_lock(void);
-void cpu_list_unlock(void);
#if defined(CONFIG_USE_NPTL)
void mmap_fork_start(void);
void mmap_fork_end(int child);
@@ -364,7 +356,7 @@
#ifdef DEBUG_REMAP
{
void *addr;
- addr = malloc(len);
+ addr = g_malloc(len);
if (copy)
memcpy(addr, g2h(guest_addr), len);
else
@@ -390,7 +382,7 @@
return;
if (len > 0)
memcpy(g2h(guest_addr), host_ptr, len);
- free(host_ptr);
+ g_free(host_ptr);
#endif
}
diff --git a/bsd-user/signal.c b/bsd-user/signal.c
index 445f69e..f6f7aa2 100644
--- a/bsd-user/signal.c
+++ b/bsd-user/signal.c
@@ -16,19 +16,11 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdarg.h>
-#include <unistd.h>
-#include <signal.h>
-#include <errno.h>
+#include "qemu/osdep.h"
#include "qemu.h"
#include "target_signal.h"
-//#define DEBUG_SIGNAL
-
void signal_init(void)
{
}
diff --git a/bsd-user/sparc/syscall.h b/bsd-user/sparc/syscall.h
deleted file mode 100644
index 5a9bb7e..0000000
--- a/bsd-user/sparc/syscall.h
+++ /dev/null
@@ -1,9 +0,0 @@
-struct target_pt_regs {
- abi_ulong psr;
- abi_ulong pc;
- abi_ulong npc;
- abi_ulong y;
- abi_ulong u_regs[16];
-};
-
-#define UNAME_MACHINE "sun4"
diff --git a/bsd-user/sparc/target_syscall.h b/bsd-user/sparc/target_syscall.h
new file mode 100644
index 0000000..dfdf9f8
--- /dev/null
+++ b/bsd-user/sparc/target_syscall.h
@@ -0,0 +1,14 @@
+#ifndef TARGET_SYSCALL_H
+#define TARGET_SYSCALL_H
+
+struct target_pt_regs {
+ abi_ulong psr;
+ abi_ulong pc;
+ abi_ulong npc;
+ abi_ulong y;
+ abi_ulong u_regs[16];
+};
+
+#define UNAME_MACHINE "sun4"
+
+#endif /* TARGET_SYSCALL_H */
diff --git a/bsd-user/sparc64/syscall.h b/bsd-user/sparc64/syscall.h
deleted file mode 100644
index 81a816d..0000000
--- a/bsd-user/sparc64/syscall.h
+++ /dev/null
@@ -1,10 +0,0 @@
-struct target_pt_regs {
- abi_ulong u_regs[16];
- abi_ulong tstate;
- abi_ulong pc;
- abi_ulong npc;
- abi_ulong y;
- abi_ulong fprs;
-};
-
-#define UNAME_MACHINE "sun4u"
diff --git a/bsd-user/sparc64/target_syscall.h b/bsd-user/sparc64/target_syscall.h
new file mode 100644
index 0000000..3a9f4c2
--- /dev/null
+++ b/bsd-user/sparc64/target_syscall.h
@@ -0,0 +1,15 @@
+#ifndef TARGET_SYSCALL_H
+#define TARGET_SYSCALL_H
+
+struct target_pt_regs {
+ abi_ulong u_regs[16];
+ abi_ulong tstate;
+ abi_ulong pc;
+ abi_ulong npc;
+ abi_ulong y;
+ abi_ulong fprs;
+};
+
+#define UNAME_MACHINE "sun4u"
+
+#endif /* TARGET_SYSCALL_H */
diff --git a/bsd-user/strace.c b/bsd-user/strace.c
index e33dd4d..fa66fe1 100644
--- a/bsd-user/strace.c
+++ b/bsd-user/strace.c
@@ -16,14 +16,10 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <stdio.h>
-#include <errno.h>
+#include "qemu/osdep.h"
#include <sys/select.h>
-#include <sys/types.h>
-#include <unistd.h>
#include <sys/syscall.h>
#include <sys/ioccom.h>
-#include <ctype.h>
#include "qemu.h"
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index a4d1583..66492aa 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -16,18 +16,9 @@
* You should have received a copy of the GNU General Public License
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdarg.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <time.h>
-#include <limits.h>
-#include <sys/types.h>
-#include <sys/mman.h>
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qemu/path.h"
#include <sys/syscall.h>
#include <sys/param.h>
#include <sys/sysctl.h>
@@ -324,12 +315,14 @@
abi_long arg5, abi_long arg6, abi_long arg7,
abi_long arg8)
{
+ CPUState *cpu = ENV_GET_CPU(cpu_env);
abi_long ret;
void *p;
#ifdef DEBUG
gemu_log("freebsd syscall %d\n", num);
#endif
+ trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8);
if(do_strace)
print_freebsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
@@ -409,6 +402,7 @@
#endif
if (do_strace)
print_freebsd_syscall_ret(num, ret);
+ trace_guest_user_syscall_ret(cpu, num, ret);
return ret;
efault:
ret = -TARGET_EFAULT;
@@ -419,12 +413,14 @@
abi_long arg2, abi_long arg3, abi_long arg4,
abi_long arg5, abi_long arg6)
{
+ CPUState *cpu = ENV_GET_CPU(cpu_env);
abi_long ret;
void *p;
#ifdef DEBUG
gemu_log("netbsd syscall %d\n", num);
#endif
+ trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
if(do_strace)
print_netbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
@@ -481,6 +477,7 @@
#endif
if (do_strace)
print_netbsd_syscall_ret(num, ret);
+ trace_guest_user_syscall_ret(cpu, num, ret);
return ret;
efault:
ret = -TARGET_EFAULT;
@@ -491,12 +488,14 @@
abi_long arg2, abi_long arg3, abi_long arg4,
abi_long arg5, abi_long arg6)
{
+ CPUState *cpu = ENV_GET_CPU(cpu_env);
abi_long ret;
void *p;
#ifdef DEBUG
gemu_log("openbsd syscall %d\n", num);
#endif
+ trace_guest_user_syscall(cpu, num, arg1, arg2, arg3, arg4, arg5, arg6, 0, 0);
if(do_strace)
print_openbsd_syscall(num, arg1, arg2, arg3, arg4, arg5, arg6);
@@ -553,6 +552,7 @@
#endif
if (do_strace)
print_openbsd_syscall_ret(num, ret);
+ trace_guest_user_syscall_ret(cpu, num, ret);
return ret;
efault:
ret = -TARGET_EFAULT;
diff --git a/bsd-user/uaccess.c b/bsd-user/uaccess.c
index 677f19c..91e2067 100644
--- a/bsd-user/uaccess.c
+++ b/bsd-user/uaccess.c
@@ -1,6 +1,6 @@
/* User memory access */
-#include <stdio.h>
-#include <string.h>
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
#include "qemu.h"
@@ -51,7 +51,7 @@
ptr = lock_user(VERIFY_READ, guest_addr, max_len, 1);
if (!ptr)
return -TARGET_EFAULT;
- len = qemu_strnlen((char *)ptr, max_len);
+ len = qemu_strnlen((const char *)ptr, max_len);
unlock_user(ptr, guest_addr, 0);
guest_addr += len;
/* we don't allow wrapping or integer overflow */
diff --git a/bsd-user/x86_64/syscall.h b/bsd-user/x86_64/syscall.h
deleted file mode 100644
index 630514a..0000000
--- a/bsd-user/x86_64/syscall.h
+++ /dev/null
@@ -1,116 +0,0 @@
-#define __USER_CS (0x33)
-#define __USER_DS (0x2B)
-
-struct target_pt_regs {
- abi_ulong r15;
- abi_ulong r14;
- abi_ulong r13;
- abi_ulong r12;
- abi_ulong rbp;
- abi_ulong rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here */
- abi_ulong r11;
- abi_ulong r10;
- abi_ulong r9;
- abi_ulong r8;
- abi_ulong rax;
- abi_ulong rcx;
- abi_ulong rdx;
- abi_ulong rsi;
- abi_ulong rdi;
- abi_ulong orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
- abi_ulong rip;
- abi_ulong cs;
- abi_ulong eflags;
- abi_ulong rsp;
- abi_ulong ss;
-/* top of stack page */
-};
-
-/* Maximum number of LDT entries supported. */
-#define TARGET_LDT_ENTRIES 8192
-/* The size of each LDT entry. */
-#define TARGET_LDT_ENTRY_SIZE 8
-
-#define TARGET_GDT_ENTRIES 16
-#define TARGET_GDT_ENTRY_TLS_ENTRIES 3
-#define TARGET_GDT_ENTRY_TLS_MIN 12
-#define TARGET_GDT_ENTRY_TLS_MAX 14
-
-#if 0 // Redefine this
-struct target_modify_ldt_ldt_s {
- unsigned int entry_number;
- abi_ulong base_addr;
- unsigned int limit;
- unsigned int seg_32bit:1;
- unsigned int contents:2;
- unsigned int read_exec_only:1;
- unsigned int limit_in_pages:1;
- unsigned int seg_not_present:1;
- unsigned int useable:1;
- unsigned int lm:1;
-};
-#else
-struct target_modify_ldt_ldt_s {
- unsigned int entry_number;
- abi_ulong base_addr;
- unsigned int limit;
- unsigned int flags;
-};
-#endif
-
-struct target_ipc64_perm
-{
- int key;
- uint32_t uid;
- uint32_t gid;
- uint32_t cuid;
- uint32_t cgid;
- unsigned short mode;
- unsigned short __pad1;
- unsigned short seq;
- unsigned short __pad2;
- abi_ulong __unused1;
- abi_ulong __unused2;
-};
-
-struct target_msqid64_ds {
- struct target_ipc64_perm msg_perm;
- unsigned int msg_stime; /* last msgsnd time */
- unsigned int msg_rtime; /* last msgrcv time */
- unsigned int msg_ctime; /* last change time */
- abi_ulong msg_cbytes; /* current number of bytes on queue */
- abi_ulong msg_qnum; /* number of messages in queue */
- abi_ulong msg_qbytes; /* max number of bytes on queue */
- unsigned int msg_lspid; /* pid of last msgsnd */
- unsigned int msg_lrpid; /* last receive pid */
- abi_ulong __unused4;
- abi_ulong __unused5;
-};
-
-/* FreeBSD sysarch(2) */
-#define TARGET_FREEBSD_I386_GET_LDT 0
-#define TARGET_FREEBSD_I386_SET_LDT 1
- /* I386_IOPL */
-#define TARGET_FREEBSD_I386_GET_IOPERM 3
-#define TARGET_FREEBSD_I386_SET_IOPERM 4
- /* xxxxx */
-#define TARGET_FREEBSD_I386_GET_FSBASE 7
-#define TARGET_FREEBSD_I386_SET_FSBASE 8
-#define TARGET_FREEBSD_I386_GET_GSBASE 9
-#define TARGET_FREEBSD_I386_SET_GSBASE 10
-
-#define TARGET_FREEBSD_AMD64_GET_FSBASE 128
-#define TARGET_FREEBSD_AMD64_SET_FSBASE 129
-#define TARGET_FREEBSD_AMD64_GET_GSBASE 130
-#define TARGET_FREEBSD_AMD64_SET_GSBASE 131
-
-
-#define UNAME_MACHINE "x86_64"
-
-#define TARGET_ARCH_SET_GS 0x1001
-#define TARGET_ARCH_SET_FS 0x1002
-#define TARGET_ARCH_GET_FS 0x1003
-#define TARGET_ARCH_GET_GS 0x1004
diff --git a/bsd-user/x86_64/target_syscall.h b/bsd-user/x86_64/target_syscall.h
new file mode 100644
index 0000000..211ce29
--- /dev/null
+++ b/bsd-user/x86_64/target_syscall.h
@@ -0,0 +1,121 @@
+#ifndef TARGET_SYSCALL_H
+#define TARGET_SYSCALL_H
+
+#define __USER_CS (0x33)
+#define __USER_DS (0x2B)
+
+struct target_pt_regs {
+ abi_ulong r15;
+ abi_ulong r14;
+ abi_ulong r13;
+ abi_ulong r12;
+ abi_ulong rbp;
+ abi_ulong rbx;
+/* arguments: non interrupts/non tracing syscalls only save up to here */
+ abi_ulong r11;
+ abi_ulong r10;
+ abi_ulong r9;
+ abi_ulong r8;
+ abi_ulong rax;
+ abi_ulong rcx;
+ abi_ulong rdx;
+ abi_ulong rsi;
+ abi_ulong rdi;
+ abi_ulong orig_rax;
+/* end of arguments */
+/* cpu exception frame or undefined */
+ abi_ulong rip;
+ abi_ulong cs;
+ abi_ulong eflags;
+ abi_ulong rsp;
+ abi_ulong ss;
+/* top of stack page */
+};
+
+/* Maximum number of LDT entries supported. */
+#define TARGET_LDT_ENTRIES 8192
+/* The size of each LDT entry. */
+#define TARGET_LDT_ENTRY_SIZE 8
+
+#define TARGET_GDT_ENTRIES 16
+#define TARGET_GDT_ENTRY_TLS_ENTRIES 3
+#define TARGET_GDT_ENTRY_TLS_MIN 12
+#define TARGET_GDT_ENTRY_TLS_MAX 14
+
+#if 0 // Redefine this
+struct target_modify_ldt_ldt_s {
+ unsigned int entry_number;
+ abi_ulong base_addr;
+ unsigned int limit;
+ unsigned int seg_32bit:1;
+ unsigned int contents:2;
+ unsigned int read_exec_only:1;
+ unsigned int limit_in_pages:1;
+ unsigned int seg_not_present:1;
+ unsigned int useable:1;
+ unsigned int lm:1;
+};
+#else
+struct target_modify_ldt_ldt_s {
+ unsigned int entry_number;
+ abi_ulong base_addr;
+ unsigned int limit;
+ unsigned int flags;
+};
+#endif
+
+struct target_ipc64_perm
+{
+ int key;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t cuid;
+ uint32_t cgid;
+ unsigned short mode;
+ unsigned short __pad1;
+ unsigned short seq;
+ unsigned short __pad2;
+ abi_ulong __unused1;
+ abi_ulong __unused2;
+};
+
+struct target_msqid64_ds {
+ struct target_ipc64_perm msg_perm;
+ unsigned int msg_stime; /* last msgsnd time */
+ unsigned int msg_rtime; /* last msgrcv time */
+ unsigned int msg_ctime; /* last change time */
+ abi_ulong msg_cbytes; /* current number of bytes on queue */
+ abi_ulong msg_qnum; /* number of messages in queue */
+ abi_ulong msg_qbytes; /* max number of bytes on queue */
+ unsigned int msg_lspid; /* pid of last msgsnd */
+ unsigned int msg_lrpid; /* last receive pid */
+ abi_ulong __unused4;
+ abi_ulong __unused5;
+};
+
+/* FreeBSD sysarch(2) */
+#define TARGET_FREEBSD_I386_GET_LDT 0
+#define TARGET_FREEBSD_I386_SET_LDT 1
+ /* I386_IOPL */
+#define TARGET_FREEBSD_I386_GET_IOPERM 3
+#define TARGET_FREEBSD_I386_SET_IOPERM 4
+ /* xxxxx */
+#define TARGET_FREEBSD_I386_GET_FSBASE 7
+#define TARGET_FREEBSD_I386_SET_FSBASE 8
+#define TARGET_FREEBSD_I386_GET_GSBASE 9
+#define TARGET_FREEBSD_I386_SET_GSBASE 10
+
+#define TARGET_FREEBSD_AMD64_GET_FSBASE 128
+#define TARGET_FREEBSD_AMD64_SET_FSBASE 129
+#define TARGET_FREEBSD_AMD64_GET_GSBASE 130
+#define TARGET_FREEBSD_AMD64_SET_GSBASE 131
+
+
+#define UNAME_MACHINE "x86_64"
+
+#define TARGET_ARCH_SET_GS 0x1001
+#define TARGET_ARCH_SET_FS 0x1002
+#define TARGET_ARCH_GET_FS 0x1003
+#define TARGET_ARCH_GET_GS 0x1004
+
+#endif /* TARGET_SYSCALL_H */
diff --git a/bt-host.c b/bt-host.c
index 49205bf..2f8f631 100644
--- a/bt-host.c
+++ b/bt-host.c
@@ -17,12 +17,12 @@
* with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/bt.h"
#include "qemu/main-loop.h"
#ifndef _WIN32
-# include <errno.h>
# include <sys/ioctl.h>
# include <sys/uio.h>
# ifdef CONFIG_BLUEZ
diff --git a/bt-vhci.c b/bt-vhci.c
index e267c8a..9d277c3 100644
--- a/bt-vhci.c
+++ b/bt-vhci.c
@@ -17,6 +17,7 @@
* with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include "qemu/osdep.h"
#include "qemu-common.h"
#include "sysemu/bt.h"
#include "hw/bt.h"
diff --git a/configure b/configure
index 137dd59..890c14c 100755
--- a/configure
+++ b/configure
@@ -8,6 +8,9 @@
CLICOLOR_FORCE= GREP_OPTIONS=
unset CLICOLOR_FORCE GREP_OPTIONS
+# Don't allow CCACHE, if present, to use cached results of compile tests!
+export CCACHE_RECACHE=yes
+
# Temporary directory used for files created while
# configure runs. Since it is in the build directory
# we can safely blow away any previous version of it
@@ -28,6 +31,7 @@
TMPL="${TMPDIR1}/${TMPB}.lo"
TMPA="${TMPDIR1}/lib${TMPB}.la"
TMPE="${TMPDIR1}/${TMPB}.exe"
+TMPMO="${TMPDIR1}/${TMPB}.mo"
rm -f config.log
@@ -103,7 +107,8 @@
}
compile_object() {
- do_cc $QEMU_CFLAGS -c -o $TMPO $TMPC
+ local_cflags="$1"
+ do_cc $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC
}
compile_prog() {
@@ -112,38 +117,6 @@
do_cc $QEMU_CFLAGS $local_cflags -o $TMPE $TMPC $LDFLAGS $local_ldflags
}
-do_libtool() {
- local mode=$1
- shift
- # Run the compiler, capturing its output to the log.
- echo $libtool $mode --tag=CC $cc "$@" >> config.log
- $libtool $mode --tag=CC $cc "$@" >> config.log 2>&1 || return $?
- # Test passed. If this is an --enable-werror build, rerun
- # the test with -Werror and bail out if it fails. This
- # makes warning-generating-errors in configure test code
- # obvious to developers.
- if test "$werror" != "yes"; then
- return 0
- fi
- # Don't bother rerunning the compile if we were already using -Werror
- case "$*" in
- *-Werror*)
- return 0
- ;;
- esac
- echo $libtool $mode --tag=CC $cc -Werror "$@" >> config.log
- $libtool $mode --tag=CC $cc -Werror "$@" >> config.log 2>&1 && return $?
- error_exit "configure test passed without -Werror but failed with -Werror." \
- "This is probably a bug in the configure script. The failing command" \
- "will be at the bottom of config.log." \
- "You can run configure with --disable-werror to bypass this check."
-}
-
-libtool_prog() {
- do_libtool --mode=compile $QEMU_CFLAGS -c -fPIE -DPIE -o $TMPO $TMPC || return $?
- do_libtool --mode=link $LDFLAGS -o $TMPA $TMPL -rpath /usr/local/lib
-}
-
# symbolically link $1 to $2. Portable version of "ln -sf".
symlink() {
rm -rf "$2"
@@ -192,7 +165,7 @@
}
# default parameters
-source_path=`dirname "$0"`
+source_path=$(dirname "$0")
cpu=""
iasl="iasl"
interp_prefix="/usr/gnemul/qemu-%M"
@@ -235,19 +208,18 @@
netmap="no"
pixman=""
sdl=""
-sdlabi="1.2"
+sdlabi=""
virtfs=""
vnc="yes"
sparse="no"
uuid=""
vde=""
-vnc_tls=""
vnc_sasl=""
vnc_jpeg=""
vnc_png=""
-vnc_ws=""
xen=""
xen_ctrl_version=""
+xen_pv_domain_build="no"
xen_pci_passthrough=""
linux_aio=""
cap_ng=""
@@ -263,6 +235,7 @@
gprof="no"
debug_tcg="no"
debug="no"
+fortify_source=""
strip_opt="yes"
tcg_interpreter="no"
bigendian="no"
@@ -285,8 +258,6 @@
local_statedir="\${prefix}/var"
confsuffix="/qemu"
slirp="yes"
-fmod_lib=""
-fmod_inc=""
oss_lib=""
bsd="no"
linux="no"
@@ -296,26 +267,29 @@
softmmu="yes"
linux_user="no"
bsd_user="no"
-guest_base="yes"
aix="no"
blobs="yes"
pkgversion=""
pie=""
-zero_malloc=""
qom_cast_debug="yes"
-trace_backends="nop"
+trace_backends="log"
trace_file="trace"
spice=""
rbd=""
-smartcard_nss=""
+smartcard=""
libusb=""
usb_redir=""
-glx=""
+opengl=""
+opengl_dmabuf="no"
+avx2_opt="no"
zlib="yes"
lzo=""
snappy=""
+bzip2=""
guest_agent=""
guest_agent_with_vss="no"
+guest_agent_ntddscsi="no"
+guest_agent_msi=""
vss_win32_sdk=""
win_sdk="no"
want_tools="yes"
@@ -325,22 +299,32 @@
coroutine_pool=""
seccomp=""
glusterfs=""
+glusterfs_xlator_opt="no"
glusterfs_discard="no"
glusterfs_zerofill="no"
-archipelago=""
+archipelago="no"
gtk=""
gtkabi=""
+gtk_gl="no"
+tls_priority="NORMAL"
+gnutls=""
+gnutls_rnd=""
+nettle=""
+nettle_kdf="no"
+gcrypt=""
+gcrypt_kdf="no"
vte=""
+virglrenderer=""
tpm="yes"
libssh2=""
vhdx=""
-quorum=""
numa=""
-android="no"
+tcmalloc="no"
+jemalloc="no"
# parse CC options first
for opt do
- optarg=`expr "x$opt" : 'x[^=]*=\(.*\)'`
+ optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)')
case "$opt" in
--cross-prefix=*) cross_prefix="$optarg"
;;
@@ -352,10 +336,10 @@
;;
--cpu=*) cpu="$optarg"
;;
- --extra-cflags=*) QEMU_CFLAGS="$optarg $QEMU_CFLAGS"
+ --extra-cflags=*) QEMU_CFLAGS="$QEMU_CFLAGS $optarg"
EXTRA_CFLAGS="$optarg"
;;
- --extra-ldflags=*) LDFLAGS="$optarg $LDFLAGS"
+ --extra-ldflags=*) LDFLAGS="$LDFLAGS $optarg"
EXTRA_LDFLAGS="$optarg"
;;
--enable-debug-info) debug_info="yes"
@@ -386,10 +370,10 @@
ar="${AR-${cross_prefix}ar}"
as="${AS-${cross_prefix}as}"
+ccas="${CCAS-$cc}"
cpp="${CPP-$cc -E}"
objcopy="${OBJCOPY-${cross_prefix}objcopy}"
ld="${LD-${cross_prefix}ld}"
-libtool="${LIBTOOL-${cross_prefix}libtool}"
nm="${NM-${cross_prefix}nm}"
strip="${STRIP-${cross_prefix}strip}"
windres="${WINDRES-${cross_prefix}windres}"
@@ -416,7 +400,7 @@
fi
# make source path absolute
-source_path=`cd "$source_path"; pwd`
+source_path=$(cd "$source_path"; pwd)
# running configure in the source tree?
# we know that's the case if configure is there.
@@ -436,6 +420,20 @@
compile_object
}
+check_include() {
+cat > $TMPC <<EOF
+#include <$1>
+int main(void) { return 0; }
+EOF
+ compile_object
+}
+
+write_c_skeleton() {
+ cat > $TMPC <<EOF
+int main(void) { return 0; }
+EOF
+}
+
if check_define __linux__ ; then
targetos="Linux"
elif check_define _WIN32 ; then
@@ -447,7 +445,7 @@
elif check_define __HAIKU__ ; then
targetos='Haiku'
else
- targetos=`uname -s`
+ targetos=$(uname -s)
fi
# Some host OSes need non-standard checks for which CPU to use.
@@ -465,7 +463,7 @@
fi
;;
SunOS)
- # `uname -m` returns i86pc even on an x86_64 box, so default based on isainfo
+ # $(uname -m) returns i86pc even on an x86_64 box, so default based on isainfo
if test -z "$cpu" && test "$(isainfo -k)" = "amd64"; then
cpu="x86_64"
fi
@@ -511,7 +509,7 @@
elif check_define __hppa__ ; then
cpu="hppa"
else
- cpu=`uname -m`
+ cpu=$(uname -m)
fi
ARCH=
@@ -557,25 +555,28 @@
CYGWIN*)
mingw32="yes"
QEMU_CFLAGS="-mno-cygwin $QEMU_CFLAGS"
- audio_possible_drivers="winwave sdl"
- audio_drv_list="winwave"
+ audio_possible_drivers="sdl"
+ audio_drv_list="sdl"
;;
MINGW32*)
mingw32="yes"
- hax="yes"
- audio_possible_drivers="winwave dsound sdl fmod winaudio"
- audio_drv_list="winwave"
+ audio_possible_drivers="dsound sdl winaudio"
+ if check_include dsound.h; then
+ audio_drv_list="dsound"
+ else
+ audio_drv_list=""
+ fi
;;
GNU/kFreeBSD)
bsd="yes"
audio_drv_list="oss"
- audio_possible_drivers="oss sdl esd pa"
+ audio_possible_drivers="oss sdl pa"
;;
FreeBSD)
bsd="yes"
make="${MAKE-gmake}"
audio_drv_list="oss"
- audio_possible_drivers="oss sdl esd pa"
+ audio_possible_drivers="oss sdl pa"
# needed for kinfo_getvmmap(3) in libutil.h
LIBS="-lutil $LIBS"
netmap="" # enable netmap autodetect
@@ -585,14 +586,14 @@
bsd="yes"
make="${MAKE-gmake}"
audio_drv_list="oss"
- audio_possible_drivers="oss sdl esd pa"
+ audio_possible_drivers="oss sdl pa"
HOST_VARIANT_DIR="dragonfly"
;;
NetBSD)
bsd="yes"
make="${MAKE-gmake}"
audio_drv_list="oss"
- audio_possible_drivers="oss sdl esd"
+ audio_possible_drivers="oss sdl"
oss_lib="-lossaudio"
HOST_VARIANT_DIR="netbsd"
;;
@@ -600,7 +601,7 @@
bsd="yes"
make="${MAKE-gmake}"
audio_drv_list="sdl"
- audio_possible_drivers="sdl esd"
+ audio_possible_drivers="sdl"
HOST_VARIANT_DIR="openbsd"
;;
Darwin)
@@ -614,7 +615,7 @@
fi
cocoa="yes"
audio_drv_list="coreaudio"
- audio_possible_drivers="coreaudio sdl fmod"
+ audio_possible_drivers="coreaudio sdl"
LDFLAGS="-framework CoreFoundation -framework IOKit $LDFLAGS"
libs_softmmu="-F/System/Library/Frameworks -framework Cocoa -framework IOKit $libs_softmmu"
# Disable attempts to use ObjectiveC features in os/object.h since they
@@ -629,7 +630,7 @@
ld="gld"
smbd="${SMBD-/usr/sfw/sbin/smbd}"
needs_libsunmath="no"
- solarisrev=`uname -r | cut -f2 -d.`
+ solarisrev=$(uname -r | cut -f2 -d.)
if [ "$cpu" = "i386" -o "$cpu" = "x86_64" ] ; then
if test "$solarisrev" -le 9 ; then
if test -f /opt/SUNWspro/prod/lib/libsunmath.so.1; then
@@ -669,15 +670,12 @@
;;
*)
audio_drv_list="oss"
- audio_possible_drivers="oss alsa sdl esd pa"
+ audio_possible_drivers="oss alsa sdl pa"
linux="yes"
linux_user="yes"
kvm="yes"
vhost_net="yes"
vhost_scsi="yes"
- if [ "$cpu" = "i386" -o "$cpu" = "x86_64" -o "$cpu" = "x32" ] ; then
- audio_possible_drivers="$audio_possible_drivers fmod"
- fi
QEMU_INCLUDES="-I\$(SRC_PATH)/linux-headers -I$(pwd)/linux-headers $QEMU_INCLUDES"
;;
esac
@@ -706,10 +704,10 @@
QEMU_CFLAGS="-DWIN32_LEAN_AND_MEAN -DWINVER=0x501 $QEMU_CFLAGS"
# enable C99/POSIX format strings (needs mingw32-runtime 3.15 or later)
QEMU_CFLAGS="-D__USE_MINGW_ANSI_STDIO=1 $QEMU_CFLAGS"
+ # MinGW needs -mthreads for TLS and macro _MT.
+ QEMU_CFLAGS="-mthreads $QEMU_CFLAGS"
LIBS="-lwinmm -lws2_32 -liphlpapi $LIBS"
-cat > $TMPC << EOF
-int main(void) { return 0; }
-EOF
+ write_c_skeleton;
if compile_prog "" "-liberty" ; then
LIBS="-liberty $LIBS"
fi
@@ -721,13 +719,13 @@
sysconfdir="\${prefix}"
local_statedir=
confsuffix=""
- libs_qga="-lws2_32 -lwinmm -lpowrprof $libs_qga"
+ libs_qga="-lws2_32 -lwinmm -lpowrprof -liphlpapi -lnetapi32 $libs_qga"
fi
werror=""
for opt do
- optarg=`expr "x$opt" : 'x[^=]*=\(.*\)'`
+ optarg=$(expr "x$opt" : 'x[^=]*=\(.*\)')
case "$opt" in
--help|-h) show_help=yes
;;
@@ -772,6 +770,9 @@
--enable-modules)
modules="yes"
;;
+ --disable-modules)
+ modules="no"
+ ;;
--cpu=*)
;;
--target-list=*) target_list="$optarg"
@@ -844,17 +845,13 @@
;;
--enable-vnc) vnc="yes"
;;
- --fmod-lib=*) fmod_lib="$optarg"
- ;;
- --fmod-inc=*) fmod_inc="$optarg"
- ;;
--oss-lib=*) oss_lib="$optarg"
;;
--audio-drv-list=*) audio_drv_list="$optarg"
;;
- --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=`echo "$optarg" | sed -e 's/,/ /g'`
+ --block-drv-rw-whitelist=*|--block-drv-whitelist=*) block_drv_rw_whitelist=$(echo "$optarg" | sed -e 's/,/ /g')
;;
- --block-drv-ro-whitelist=*) block_drv_ro_whitelist=`echo "$optarg" | sed -e 's/,/ /g'`
+ --block-drv-ro-whitelist=*) block_drv_ro_whitelist=$(echo "$optarg" | sed -e 's/,/ /g')
;;
--enable-debug-tcg) debug_tcg="yes"
;;
@@ -865,6 +862,7 @@
debug_tcg="yes"
debug="yes"
strip_opt="no"
+ fortify_source="no"
;;
--enable-sparse) sparse="yes"
;;
@@ -872,10 +870,6 @@
;;
--disable-strip) strip_opt="no"
;;
- --disable-vnc-tls) vnc_tls="no"
- ;;
- --enable-vnc-tls) vnc_tls="yes"
- ;;
--disable-vnc-sasl) vnc_sasl="no"
;;
--enable-vnc-sasl) vnc_sasl="yes"
@@ -888,10 +882,6 @@
;;
--enable-vnc-png) vnc_png="yes"
;;
- --disable-vnc-ws) vnc_ws="no"
- ;;
- --enable-vnc-ws) vnc_ws="yes"
- ;;
--disable-slirp) slirp="no"
;;
--disable-uuid) uuid="no"
@@ -914,6 +904,10 @@
;;
--enable-xen-pci-passthrough) xen_pci_passthrough="yes"
;;
+ --disable-xen-pv-domain-build) xen_pv_domain_build="no"
+ ;;
+ --enable-xen-pv-domain-build) xen_pv_domain_build="yes"
+ ;;
--disable-brlapi) brlapi="no"
;;
--enable-brlapi) brlapi="yes"
@@ -956,8 +950,7 @@
;;
--enable-cocoa)
cocoa="yes" ;
- sdl="no" ;
- audio_drv_list="coreaudio `echo $audio_drv_list | sed s,coreaudio,,g`"
+ audio_drv_list="coreaudio $(echo $audio_drv_list | sed s,coreaudio,,g)"
;;
--disable-system) softmmu="no"
;;
@@ -976,10 +969,6 @@
;;
--enable-bsd-user) bsd_user="yes"
;;
- --enable-guest-base) guest_base="yes"
- ;;
- --disable-guest-base) guest_base="no"
- ;;
--enable-pie) pie="yes"
;;
--disable-pie) pie="no"
@@ -1034,9 +1023,9 @@
;;
--enable-vhost-scsi) vhost_scsi="yes"
;;
- --disable-glx) glx="no"
+ --disable-opengl) opengl="no"
;;
- --enable-glx) glx="yes"
+ --enable-opengl) opengl="yes"
;;
--disable-rbd) rbd="no"
;;
@@ -1046,9 +1035,9 @@
;;
--enable-xfsctl) xfs="yes"
;;
- --disable-smartcard-nss) smartcard_nss="no"
+ --disable-smartcard) smartcard="no"
;;
- --enable-smartcard-nss) smartcard_nss="yes"
+ --enable-smartcard) smartcard="yes"
;;
--disable-libusb) libusb="no"
;;
@@ -1068,10 +1057,18 @@
;;
--enable-snappy) snappy="yes"
;;
+ --disable-bzip2) bzip2="no"
+ ;;
+ --enable-bzip2) bzip2="yes"
+ ;;
--enable-guest-agent) guest_agent="yes"
;;
--disable-guest-agent) guest_agent="no"
;;
+ --enable-guest-agent-msi) guest_agent_msi="yes"
+ ;;
+ --disable-guest-agent-msi) guest_agent_msi="no"
+ ;;
--with-vss-sdk) vss_win32_sdk=""
;;
--with-vss-sdk=*) vss_win32_sdk="$optarg"
@@ -1107,6 +1104,20 @@
;;
--enable-gtk) gtk="yes"
;;
+ --tls-priority=*) tls_priority="$optarg"
+ ;;
+ --disable-gnutls) gnutls="no"
+ ;;
+ --enable-gnutls) gnutls="yes"
+ ;;
+ --disable-nettle) nettle="no"
+ ;;
+ --enable-nettle) nettle="yes"
+ ;;
+ --disable-gcrypt) gcrypt="no"
+ ;;
+ --enable-gcrypt) gcrypt="yes"
+ ;;
--enable-rdma) rdma="yes"
;;
--disable-rdma) rdma="no"
@@ -1117,6 +1128,10 @@
;;
--enable-vte) vte="yes"
;;
+ --disable-virglrenderer) virglrenderer="no"
+ ;;
+ --enable-virglrenderer) virglrenderer="yes"
+ ;;
--disable-tpm) tpm="no"
;;
--enable-tpm) tpm="yes"
@@ -1129,15 +1144,17 @@
;;
--disable-vhdx) vhdx="no"
;;
- --disable-quorum) quorum="no"
- ;;
- --enable-quorum) quorum="yes"
- ;;
--disable-numa) numa="no"
;;
--enable-numa) numa="yes"
;;
- --enable-android) android="yes"
+ --disable-tcmalloc) tcmalloc="no"
+ ;;
+ --enable-tcmalloc) tcmalloc="yes"
+ ;;
+ --disable-jemalloc) jemalloc="no"
+ ;;
+ --enable-jemalloc) jemalloc="yes"
;;
*)
echo "ERROR: unknown option $opt"
@@ -1153,18 +1170,14 @@
# Note that if the Python conditional here evaluates True we will exit
# with status 1 which is a shell 'false' value.
-if ! $python -c 'import sys; sys.exit(sys.version_info < (2,4) or sys.version_info >= (3,))'; then
- error_exit "Cannot use '$python', Python 2.4 or later is required." \
+if ! $python -c 'import sys; sys.exit(sys.version_info < (2,6) or sys.version_info >= (3,))'; then
+ error_exit "Cannot use '$python', Python 2.6 or later is required." \
"Note that Python 3 or later is not yet supported." \
"Use --python=/path/to/python to specify a supported Python."
fi
-# The -B switch was added in Python 2.6.
-# If it is supplied, compiled files are not written.
-# Use it for Python versions which support it.
-if $python -B -c 'import sys; sys.exit(0)' 2>/dev/null; then
- python="$python -B"
-fi
+# Suppress writing compiled files
+python="$python -B"
case "$cpu" in
ppc)
@@ -1212,6 +1225,13 @@
QEMU_CFLAGS="$CPU_CFLAGS $QEMU_CFLAGS"
EXTRA_CFLAGS="$CPU_CFLAGS $EXTRA_CFLAGS"
+# For user-mode emulation the host arch has to be one we explicitly
+# support, even if we're using TCI.
+if [ "$ARCH" = "unknown" ]; then
+ bsd_user="no"
+ linux_user="no"
+fi
+
default_target_list=""
mak_wilds=""
@@ -1269,29 +1289,10 @@
--sysconfdir=PATH install config in PATH$confsuffix
--localstatedir=PATH install local state in PATH (set at runtime on win32)
--with-confsuffix=SUFFIX suffix for QEMU data inside datadir/libdir/sysconfdir [$confsuffix]
- --enable-modules enable modules support
- --enable-debug-tcg enable TCG debugging
- --disable-debug-tcg disable TCG debugging (default)
- --enable-debug-info enable debugging information (default)
- --disable-debug-info disable debugging information
--enable-debug enable common debug build options
- --enable-sparse enable sparse checker
- --disable-sparse disable sparse checker (default)
--disable-strip disable stripping binaries
--disable-werror disable compilation abort on warning
--disable-stack-protector disable compiler-provided stack protection
- --disable-sdl disable SDL
- --enable-sdl enable SDL
- --with-sdlabi select preferred SDL ABI 1.2 or 2.0
- --disable-gtk disable gtk UI
- --enable-gtk enable gtk UI
- --with-gtkabi select preferred GTK ABI 2.0 or 3.0
- --disable-virtfs disable VirtFS
- --enable-virtfs enable VirtFS
- --disable-vnc disable VNC
- --enable-vnc enable VNC
- --disable-cocoa disable Cocoa (Mac OS X only)
- --enable-cocoa enable Cocoa (default on Mac OS X)
--audio-drv-list=LIST set audio drivers list:
Available drivers: $audio_possible_drivers
--block-drv-whitelist=L Same as --block-drv-rw-whitelist=L
@@ -1301,118 +1302,91 @@
--block-drv-ro-whitelist=L
set block driver read-only whitelist
(affects only QEMU, not qemu-img)
- --disable-xen disable xen backend driver support
- --enable-xen enable xen backend driver support
- --disable-xen-pci-passthrough
- --enable-xen-pci-passthrough
- --disable-brlapi disable BrlAPI
- --enable-brlapi enable BrlAPI
- --disable-vnc-tls disable TLS encryption for VNC server
- --enable-vnc-tls enable TLS encryption for VNC server
- --disable-vnc-sasl disable SASL encryption for VNC server
- --enable-vnc-sasl enable SASL encryption for VNC server
- --disable-vnc-jpeg disable JPEG lossy compression for VNC server
- --enable-vnc-jpeg enable JPEG lossy compression for VNC server
- --disable-vnc-png disable PNG compression for VNC server (default)
- --enable-vnc-png enable PNG compression for VNC server
- --disable-vnc-ws disable Websockets support for VNC server
- --enable-vnc-ws enable Websockets support for VNC server
- --disable-curses disable curses output
- --enable-curses enable curses output
- --disable-curl disable curl connectivity
- --enable-curl enable curl connectivity
- --disable-fdt disable fdt device tree
- --enable-fdt enable fdt device tree
- --disable-bluez disable bluez stack connectivity
- --enable-bluez enable bluez stack connectivity
- --disable-slirp disable SLIRP userspace network connectivity
- --disable-kvm disable KVM acceleration support
- --enable-kvm enable KVM acceleration support
- --disable-hax disable HAX acceleration support
- --enable-hax enable HAX acceleration support
- --disable-rdma disable RDMA-based migration support
- --enable-rdma enable RDMA-based migration support
- --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)
- --enable-system enable all system emulation targets
- --disable-system disable all system emulation targets
- --enable-user enable supported user emulation targets
- --disable-user disable all user emulation targets
- --enable-linux-user enable all linux usermode emulation targets
- --disable-linux-user disable all linux usermode emulation targets
- --enable-bsd-user enable all BSD usermode emulation targets
- --disable-bsd-user disable all BSD usermode emulation targets
- --enable-guest-base enable GUEST_BASE support for usermode
- emulation targets
- --disable-guest-base disable GUEST_BASE support
- --enable-pie build Position Independent Executables
- --disable-pie do not build Position Independent Executables
- --fmod-lib path to FMOD library
- --fmod-inc path to FMOD includes
- --oss-lib path to OSS library
- --cpu=CPU Build for host CPU [$cpu]
- --disable-uuid disable uuid support
- --enable-uuid enable uuid support
- --disable-vde disable support for vde network
- --enable-vde enable support for vde network
- --disable-netmap disable support for netmap network
- --enable-netmap enable support for netmap network
- --disable-linux-aio disable Linux AIO support
- --enable-linux-aio enable Linux AIO support
- --disable-cap-ng disable libcap-ng support
- --enable-cap-ng enable libcap-ng support
- --disable-attr disable attr and xattr support
- --enable-attr enable attr and xattr support
- --disable-blobs disable installing provided firmware blobs
- --enable-docs enable documentation build
- --disable-docs disable documentation build
- --disable-vhost-net disable vhost-net acceleration support
- --enable-vhost-net enable vhost-net acceleration support
--enable-trace-backends=B Set trace backend
Available backends: $($python $source_path/scripts/tracetool.py --list-backends)
--with-trace-file=NAME Full PATH,NAME of file to store traces
Default:trace-<pid>
- --disable-spice disable spice
- --enable-spice enable spice
- --enable-rbd enable building the rados block device (rbd)
- --disable-libiscsi disable iscsi support
- --enable-libiscsi enable iscsi support
- --disable-libnfs disable nfs support
- --enable-libnfs enable nfs support
- --disable-smartcard-nss disable smartcard nss support
- --enable-smartcard-nss enable smartcard nss support
- --disable-libusb disable libusb (for usb passthrough)
- --enable-libusb enable libusb (for usb passthrough)
- --disable-usb-redir disable usb network redirection support
- --enable-usb-redir enable usb network redirection support
- --enable-lzo enable the support of lzo compression library
- --enable-snappy enable the support of snappy compression library
- --disable-guest-agent disable building of the QEMU Guest Agent
- --enable-guest-agent enable building of the QEMU Guest Agent
- --with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent
- --with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb)
- --disable-seccomp disable seccomp support
- --enable-seccomp enable seccomp support
+ --disable-slirp disable SLIRP userspace network connectivity
+ --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)
+ --oss-lib path to OSS library
+ --cpu=CPU Build for host CPU [$cpu]
--with-coroutine=BACKEND coroutine backend. Supported options:
gthread, ucontext, sigaltstack, windows
- --disable-coroutine-pool disable coroutine freelist (worse performance)
- --enable-coroutine-pool enable coroutine freelist (better performance)
- --enable-glusterfs enable GlusterFS backend
- --disable-glusterfs disable GlusterFS backend
- --enable-archipelago enable Archipelago backend
- --disable-archipelago disable Archipelago backend
--enable-gcov enable test coverage analysis with gcov
--gcov=GCOV use specified gcov [$gcov_tool]
- --disable-tpm disable TPM support
- --enable-tpm enable TPM support
- --disable-libssh2 disable ssh block device support
- --enable-libssh2 enable ssh block device support
- --disable-vhdx disable support for the Microsoft VHDX image format
- --enable-vhdx enable support for the Microsoft VHDX image format
- --disable-quorum disable quorum block filter support
- --enable-quorum enable quorum block filter support
- --disable-numa disable libnuma support
- --enable-numa enable libnuma support
- --enable-android enable Android support
+ --disable-blobs disable installing provided firmware blobs
+ --with-vss-sdk=SDK-path enable Windows VSS support in QEMU Guest Agent
+ --with-win-sdk=SDK-path path to Windows Platform SDK (to build VSS .tlb)
+ --tls-priority default TLS protocol/cipher priority string
+
+Optional features, enabled with --enable-FEATURE and
+disabled with --disable-FEATURE, default is enabled if available:
+
+ system all system emulation targets
+ user supported user emulation targets
+ linux-user all linux usermode emulation targets
+ bsd-user all BSD usermode emulation targets
+ docs build documentation
+ guest-agent build the QEMU Guest Agent
+ guest-agent-msi build guest agent Windows MSI installation package
+ pie Position Independent Executables
+ modules modules support
+ debug-tcg TCG debugging (default is disabled)
+ debug-info debugging information
+ sparse sparse checker
+
+ gnutls GNUTLS cryptography support
+ nettle nettle cryptography support
+ gcrypt libgcrypt cryptography support
+ sdl SDL UI
+ --with-sdlabi select preferred SDL ABI 1.2 or 2.0
+ gtk gtk UI
+ --with-gtkabi select preferred GTK ABI 2.0 or 3.0
+ vte vte support for the gtk UI
+ curses curses UI
+ vnc VNC UI support
+ vnc-sasl SASL encryption for VNC server
+ vnc-jpeg JPEG lossy compression for VNC server
+ vnc-png PNG compression for VNC server
+ cocoa Cocoa UI (Mac OS X only)
+ virtfs VirtFS
+ xen xen backend driver support
+ xen-pci-passthrough
+ brlapi BrlAPI (Braile)
+ curl curl connectivity
+ fdt fdt device tree
+ bluez bluez stack connectivity
+ kvm KVM acceleration support
+ hax HAX acceleration support
+ rdma RDMA-based migration support
+ uuid uuid support
+ vde support for vde network
+ netmap support for netmap network
+ linux-aio Linux AIO support
+ cap-ng libcap-ng support
+ attr attr and xattr support
+ vhost-net vhost-net acceleration support
+ spice spice
+ rbd rados block device (rbd)
+ libiscsi iscsi support
+ libnfs nfs support
+ smartcard smartcard support (libcacard)
+ libusb libusb (for usb passthrough)
+ usb-redir usb network redirection support
+ lzo support of lzo compression library
+ snappy support of snappy compression library
+ bzip2 support of bzip2 compression library
+ (for reading bzip2-compressed dmg images)
+ seccomp seccomp support
+ coroutine-pool coroutine freelist (better performance)
+ glusterfs GlusterFS backend
+ archipelago Archipelago backend
+ tpm TPM support
+ libssh2 ssh block device support
+ vhdx support for the Microsoft VHDX image format
+ numa libnuma support
+ tcmalloc tcmalloc support
+ jemalloc jemalloc support
NOTE: The object files are built at the place where configure is launched
EOF
@@ -1424,7 +1398,6 @@
if test "$ARCH" = "unknown"; then
if test "$tcg_interpreter" = "yes" ; then
echo "Unsupported CPU = $cpu, will use TCG with TCI (experimental)"
- ARCH=tci
else
error_exit "Unsupported CPU = $cpu, try --enable-tcg-interpreter"
fi
@@ -1432,11 +1405,9 @@
# Consult white-list to determine whether to enable werror
# by default. Only enable by default for git builds
-z_version=`cut -f3 -d. $source_path/VERSION`
-
if test -z "$werror" ; then
if test -d "$source_path/.git" -a \
- "$linux" = "yes" ; then
+ \( "$linux" = "yes" -o "$mingw32" = "yes" \) ; then
werror="yes"
else
werror="no"
@@ -1444,15 +1415,15 @@
fi
# check that the C compiler works.
-cat > $TMPC <<EOF
-int main(void) { return 0; }
-EOF
-
+write_c_skeleton;
if compile_object ; then
: C compiler works ok
else
error_exit "\"$cc\" either does not exist or does not work"
fi
+if ! compile_prog ; then
+ error_exit "\"$cc\" cannot build an executable (is your linker broken?)"
+fi
# Check that the C++ compiler exists and works with the C compiler
if has $cxx; then
@@ -1488,27 +1459,41 @@
gcc_flags="-Wold-style-declaration -Wold-style-definition -Wtype-limits"
gcc_flags="-Wformat-security -Wformat-y2k -Winit-self -Wignored-qualifiers $gcc_flags"
gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
-gcc_flags="-Wendif-labels $gcc_flags"
+gcc_flags="-Wendif-labels -Wno-shift-negative-value $gcc_flags"
gcc_flags="-Wno-initializer-overrides $gcc_flags"
gcc_flags="-Wno-string-plus-int $gcc_flags"
# Note that we do not add -Werror to gcc_flags here, because that would
# enable it for all configure tests. If a configure test failed due
# to -Werror this would just silently disable some features,
# so it's too error prone.
-cat > $TMPC << EOF
-int main(void) { return 0; }
-EOF
-for flag in $gcc_flags; do
+
+cc_has_warning_flag() {
+ write_c_skeleton;
+
# Use the positive sense of the flag when testing for -Wno-wombat
# support (gcc will happily accept the -Wno- form of unknown
# warning options).
- optflag="$(echo $flag | sed -e 's/^-Wno-/-W/')"
- if compile_prog "-Werror $optflag" "" ; then
- QEMU_CFLAGS="$QEMU_CFLAGS $flag"
+ optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')"
+ compile_prog "-Werror $optflag" ""
+}
+
+for flag in $gcc_flags; do
+ if cc_has_warning_flag $flag ; then
+ QEMU_CFLAGS="$QEMU_CFLAGS $flag"
fi
done
if test "$stack_protector" != "no"; then
+ cat > $TMPC << EOF
+int main(int argc, char *argv[])
+{
+ char arr[64], *p = arr, *c = argv[0];
+ while (*c) {
+ *p++ = *c++;
+ }
+ return 0;
+}
+EOF
gcc_flags="-fstack-protector-strong -fstack-protector-all"
sp_on=0
for flag in $gcc_flags; do
@@ -1517,7 +1502,6 @@
if do_cc $QEMU_CFLAGS -Werror $flag -c -o $TMPO $TMPC &&
compile_prog "-Werror $flag" ""; then
QEMU_CFLAGS="$QEMU_CFLAGS $flag"
- LIBTOOLFLAGS="$LIBTOOLFLAGS -Wc,$flag"
sp_on=1
break
fi
@@ -1555,6 +1539,17 @@
fi
fi
+# Unconditional check for compiler __thread support
+ cat > $TMPC << EOF
+static __thread int tls_var;
+int main(void) { return tls_var; }
+EOF
+
+if ! compile_prog "-Werror" "" ; then
+ error_exit "Your compiler does not support the __thread specifier for " \
+ "Thread-Local Storage (TLS). Please upgrade to a version that does."
+fi
+
if test "$pie" = ""; then
case "$cpu-$targetos" in
i386-Linux|x86_64-Linux|x32-Linux|i386-OpenBSD|x86_64-OpenBSD)
@@ -1595,38 +1590,12 @@
fi
fi
- if compile_prog "-fno-pie" "-nopie"; then
+ if compile_prog "-Werror -fno-pie" "-nopie"; then
CFLAGS_NOPIE="-fno-pie"
LDFLAGS_NOPIE="-nopie"
fi
fi
-# check for broken gcc and libtool in RHEL5
-if test -n "$libtool" -a "$pie" != "no" ; then
- cat > $TMPC <<EOF
-
-void *f(unsigned char *buf, int len);
-void *g(unsigned char *buf, int len);
-
-void *
-f(unsigned char *buf, int len)
-{
- return (void*)0L;
-}
-
-void *
-g(unsigned char *buf, int len)
-{
- return f(buf, len);
-}
-
-EOF
- if ! libtool_prog; then
- echo "Disabling libtool due to broken toolchain support"
- libtool=
- fi
-fi
-
##########################################
# __sync_fetch_and_and requires at least -march=i486. Many toolchains
# use i686 as default anyway, but for those that don't, an explicit
@@ -1663,7 +1632,7 @@
"install fileutils from www.blastwave.org using pkg-get -i fileutils" \
"to get ginstall which is used by default (which lives in /opt/csw/bin)"
fi
- if test "`path_of $install`" = "/usr/sbin/install" ; then
+ if test "$(path_of $install)" = "/usr/sbin/install" ; then
error_exit "Solaris /usr/sbin/install is not an appropriate install program." \
"try ginstall from the GNU fileutils available from www.blastwave.org" \
"using pkg-get -i fileutils, or use --install=/usr/ucb/install"
@@ -1682,7 +1651,7 @@
if test -z "${target_list+xxx}" ; then
target_list="$default_target_list"
else
- target_list=`echo "$target_list" | sed -e 's/,/ /g'`
+ target_list=$(echo "$target_list" | sed -e 's/,/ /g')
fi
# Check that we recognised the target name; this allows a more
@@ -1738,6 +1707,21 @@
fi
##########################################
+# cocoa implies not SDL or GTK
+# (the cocoa UI code currently assumes it is always the active UI
+# and doesn't interact well with other UI frontend code)
+if test "$cocoa" = "yes"; then
+ if test "$sdl" = "yes"; then
+ error_exit "Cocoa and SDL UIs cannot both be enabled at once"
+ fi
+ if test "$gtk" = "yes"; then
+ error_exit "Cocoa and GTK UIs cannot both be enabled at once"
+ fi
+ gtk=no
+ sdl=no
+fi
+
+##########################################
# L2TPV3 probe
cat > $TMPC <<EOF
@@ -1752,6 +1736,37 @@
fi
##########################################
+# MinGW / Mingw-w64 localtime_r/gmtime_r check
+
+if test "$mingw32" = "yes"; then
+ # Some versions of MinGW / Mingw-w64 lack localtime_r
+ # and gmtime_r entirely.
+ #
+ # Some versions of Mingw-w64 define a macro for
+ # localtime_r/gmtime_r.
+ #
+ # Some versions of Mingw-w64 will define functions
+ # for localtime_r/gmtime_r, but only if you have
+ # _POSIX_THREAD_SAFE_FUNCTIONS defined. For fun
+ # though, unistd.h and pthread.h both define
+ # that for you.
+ #
+ # So this #undef localtime_r and #include <unistd.h>
+ # are not in fact redundant.
+cat > $TMPC << EOF
+#include <unistd.h>
+#include <time.h>
+#undef localtime_r
+int main(void) { localtime_r(NULL, NULL); return 0; }
+EOF
+ if compile_prog "" "" ; then
+ localtime_r="yes"
+ else
+ localtime_r="no"
+ fi
+fi
+
+##########################################
# pkg-config probe
if ! has "$pkg_config_exe"; then
@@ -1778,6 +1793,33 @@
fi
##########################################
+# avx2 optimization requirement check
+
+
+if test "$static" = "no" ; then
+ cat > $TMPC << EOF
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#include <cpuid.h>
+#include <immintrin.h>
+
+static int bar(void *a) {
+ return _mm256_movemask_epi8(_mm256_cmpeq_epi8(*(__m256i *)a, (__m256i){0}));
+}
+static void *bar_ifunc(void) {return (void*) bar;}
+int foo(void *a) __attribute__((ifunc("bar_ifunc")));
+int main(int argc, char *argv[]) { return foo(argv[0]);}
+EOF
+ if compile_object "" ; then
+ if has readelf; then
+ if readelf --syms $TMPO 2>/dev/null |grep -q "IFUNC.*foo"; then
+ avx2_opt="yes"
+ fi
+ fi
+ fi
+fi
+
+#########################################
# zlib check
if test "$zlib" != "no" ; then
@@ -1833,19 +1875,61 @@
fi
##########################################
+# bzip2 check
+
+if test "$bzip2" != "no" ; then
+ cat > $TMPC << EOF
+#include <bzlib.h>
+int main(void) { BZ2_bzlibVersion(); return 0; }
+EOF
+ if compile_prog "" "-lbz2" ; then
+ bzip2="yes"
+ else
+ if test "$bzip2" = "yes"; then
+ feature_not_found "libbzip2" "Install libbzip2 devel"
+ fi
+ bzip2="no"
+ fi
+fi
+
+##########################################
# libseccomp check
if test "$seccomp" != "no" ; then
- if test "$cpu" = "i386" || test "$cpu" = "x86_64" &&
- $pkg_config --atleast-version=2.1.1 libseccomp; then
- libs_softmmu="$libs_softmmu `$pkg_config --libs libseccomp`"
- QEMU_CFLAGS="$QEMU_CFLAGS `$pkg_config --cflags libseccomp`"
- seccomp="yes"
+ case "$cpu" in
+ i386|x86_64)
+ libseccomp_minver="2.1.0"
+ ;;
+ mips)
+ libseccomp_minver="2.2.0"
+ ;;
+ arm|aarch64)
+ libseccomp_minver="2.2.3"
+ ;;
+ ppc|ppc64)
+ libseccomp_minver="2.3.0"
+ ;;
+ *)
+ libseccomp_minver=""
+ ;;
+ esac
+
+ if test "$libseccomp_minver" != "" &&
+ $pkg_config --atleast-version=$libseccomp_minver libseccomp ; then
+ libs_softmmu="$libs_softmmu $($pkg_config --libs libseccomp)"
+ QEMU_CFLAGS="$QEMU_CFLAGS $($pkg_config --cflags libseccomp)"
+ seccomp="yes"
else
- if test "$seccomp" = "yes"; then
- feature_not_found "libseccomp" "Install libseccomp devel >= 2.1.0"
- fi
- seccomp="no"
+ if test "$seccomp" = "yes" ; then
+ if test "$libseccomp_minver" != "" ; then
+ feature_not_found "libseccomp" \
+ "Install libseccomp devel >= $libseccomp_minver"
+ else
+ feature_not_found "libseccomp" \
+ "libseccomp is not supported for host cpu $cpu"
+ fi
+ fi
+ seccomp="no"
fi
fi
##########################################
@@ -1853,6 +1937,7 @@
if test "$xen" != "no" ; then
xen_libs="-lxenstore -lxenctrl -lxenguest"
+ xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
# First we test whether Xen headers and libraries are available.
# If no, we are done and there is no Xen support.
@@ -1875,6 +1960,128 @@
# Xen unstable
elif
cat > $TMPC <<EOF &&
+/*
+ * If we have stable libs the we don't want the libxc compat
+ * layers, regardless of what CFLAGS we may have been given.
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <xenevtchn.h>
+#include <xengnttab.h>
+#include <xenforeignmemory.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
+int main(void) {
+ xc_interface *xc = NULL;
+ xenforeignmemory_handle *xfmem;
+ xenevtchn_handle *xe;
+ xengnttab_handle *xg;
+ xen_domain_handle_t handle;
+
+ xs_daemon_open();
+
+ xc = xc_interface_open(0, 0, 0);
+ xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+ xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+ xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+ xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
+ xc_domain_create(xc, 0, handle, 0, NULL, NULL);
+
+ xfmem = xenforeignmemory_open(0, 0);
+ xenforeignmemory_map(xfmem, 0, 0, 0, 0, 0);
+
+ xe = xenevtchn_open(0, 0);
+ xenevtchn_fd(xe);
+
+ xg = xengnttab_open(0, 0);
+ xengnttab_map_grant_ref(xg, 0, 0, 0);
+
+ return 0;
+}
+EOF
+ compile_prog "" "$xen_libs $xen_stable_libs"
+ then
+ xen_ctrl_version=471
+ xen=yes
+ elif
+ cat > $TMPC <<EOF &&
+#include <xenctrl.h>
+#include <stdint.h>
+int main(void) {
+ xc_interface *xc = NULL;
+ xen_domain_handle_t handle;
+ xc_domain_create(xc, 0, handle, 0, NULL, NULL);
+ return 0;
+}
+EOF
+ compile_prog "" "$xen_libs"
+ then
+ xen_ctrl_version=470
+ xen=yes
+
+ # Xen 4.6
+ elif
+ cat > $TMPC <<EOF &&
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
+int main(void) {
+ xc_interface *xc;
+ xs_daemon_open();
+ xc = xc_interface_open(0, 0, 0);
+ xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+ xc_gnttab_open(NULL, 0);
+ xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+ xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+ xc_hvm_create_ioreq_server(xc, 0, HVM_IOREQSRV_BUFIOREQ_ATOMIC, NULL);
+ xc_reserved_device_memory_map(xc, 0, 0, 0, 0, NULL, 0);
+ return 0;
+}
+EOF
+ compile_prog "" "$xen_libs"
+ then
+ xen_ctrl_version=460
+ xen=yes
+
+ # Xen 4.5
+ elif
+ cat > $TMPC <<EOF &&
+#include <xenctrl.h>
+#include <xenstore.h>
+#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
+#if !defined(HVM_MAX_VCPUS)
+# error HVM_MAX_VCPUS not defined
+#endif
+int main(void) {
+ xc_interface *xc;
+ xs_daemon_open();
+ xc = xc_interface_open(0, 0, 0);
+ xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
+ xc_gnttab_open(NULL, 0);
+ xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
+ xc_hvm_inject_msi(xc, 0, 0xf0000000, 0x00000000);
+ xc_hvm_create_ioreq_server(xc, 0, 0, NULL);
+ return 0;
+}
+EOF
+ compile_prog "" "$xen_libs"
+ then
+ xen_ctrl_version=450
+ xen=yes
+
+ elif
+ cat > $TMPC <<EOF &&
#include <xenctrl.h>
#include <xenstore.h>
#include <stdint.h>
@@ -1898,119 +2105,27 @@
xen_ctrl_version=420
xen=yes
- elif
- cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xs.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
- xs_daemon_open();
- xc_interface_open(0, 0, 0);
- xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
- xc_gnttab_open(NULL, 0);
- xc_domain_add_to_physmap(0, 0, XENMAPSPACE_gmfn, 0, 0);
- return 0;
-}
-EOF
- compile_prog "" "$xen_libs"
- then
- xen_ctrl_version=410
- xen=yes
-
- # Xen 4.0.0
- elif
- cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xs.h>
-#include <stdint.h>
-#include <xen/hvm/hvm_info_table.h>
-#if !defined(HVM_MAX_VCPUS)
-# error HVM_MAX_VCPUS not defined
-#endif
-int main(void) {
- struct xen_add_to_physmap xatp = {
- .domid = 0, .space = XENMAPSPACE_gmfn, .idx = 0, .gpfn = 0,
- };
- xs_daemon_open();
- xc_interface_open();
- xc_gnttab_open();
- xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
- xc_memory_op(0, XENMEM_add_to_physmap, &xatp);
- return 0;
-}
-EOF
- compile_prog "" "$xen_libs"
- then
- xen_ctrl_version=400
- xen=yes
-
- # Xen 3.4.0
- elif
- cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xs.h>
-int main(void) {
- struct xen_add_to_physmap xatp = {
- .domid = 0, .space = XENMAPSPACE_gmfn, .idx = 0, .gpfn = 0,
- };
- xs_daemon_open();
- xc_interface_open();
- xc_gnttab_open();
- xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
- xc_memory_op(0, XENMEM_add_to_physmap, &xatp);
- return 0;
-}
-EOF
- compile_prog "" "$xen_libs"
- then
- xen_ctrl_version=340
- xen=yes
-
- # Xen 3.3.0
- elif
- cat > $TMPC <<EOF &&
-#include <xenctrl.h>
-#include <xs.h>
-int main(void) {
- xs_daemon_open();
- xc_interface_open();
- xc_gnttab_open();
- xc_hvm_set_mem_type(0, 0, HVMMEM_ram_ro, 0, 0);
- return 0;
-}
-EOF
- compile_prog "" "$xen_libs"
- then
- xen_ctrl_version=330
- xen=yes
-
- # Xen version unsupported
else
if test "$xen" = "yes" ; then
- feature_not_found "xen (unsupported version)" "Install supported xen (e.g. 4.0, 3.4, 3.3)"
+ feature_not_found "xen (unsupported version)" \
+ "Install a supported xen (xen 4.2 or newer)"
fi
xen=no
fi
if test "$xen" = yes; then
+ if test $xen_ctrl_version -ge 471 ; then
+ libs_softmmu="$xen_stable_libs $libs_softmmu"
+ fi
libs_softmmu="$xen_libs $libs_softmmu"
fi
fi
if test "$xen_pci_passthrough" != "no"; then
- if test "$xen" = "yes" && test "$linux" = "yes" &&
- test "$xen_ctrl_version" -ge 340; then
+ if test "$xen" = "yes" && test "$linux" = "yes"; then
xen_pci_passthrough=yes
else
if test "$xen_pci_passthrough" = "yes"; then
- if test "$xen_ctrl_version" -lt 340; then
- error_exit "User requested feature Xen PCI Passthrough" \
- "This feature does not work with Xen 3.3"
- fi
error_exit "User requested feature Xen PCI Passthrough" \
" but this feature requires /sys from Linux"
fi
@@ -2018,19 +2133,10 @@
fi
fi
-##########################################
-# libtool probe
-
-if ! has $libtool; then
- libtool=
-fi
-
-# MacOSX ships with a libtool which isn't the GNU one; weed this
-# out by checking whether libtool supports the --version switch
-if test -n "$libtool"; then
- if ! "$libtool" --version >/dev/null 2>&1; then
- libtool=
- fi
+if test "$xen_pv_domain_build" = "yes" &&
+ test "$xen" != "yes"; then
+ error_exit "User requested Xen PV domain builder support" \
+ "which requires Xen support."
fi
##########################################
@@ -2047,6 +2153,15 @@
fi
##########################################
+# X11 probe
+x11_cflags=
+x11_libs=-lX11
+if $pkg_config --exists "x11"; then
+ x11_cflags=$($pkg_config --cflags x11)
+ x11_libs=$($pkg_config --libs x11)
+fi
+
+##########################################
# GTK probe
if test "$gtkabi" = ""; then
@@ -2070,10 +2185,12 @@
gtkversion="2.18.0"
fi
if $pkg_config --exists "$gtkpackage >= $gtkversion"; then
- gtk_cflags=`$pkg_config --cflags $gtkpackage`
- gtk_libs=`$pkg_config --libs $gtkpackage`
+ gtk_cflags=$($pkg_config --cflags $gtkpackage)
+ gtk_libs=$($pkg_config --libs $gtkpackage)
+ gtk_version=$($pkg_config --modversion $gtkpackage)
if $pkg_config --exists "$gtkx11package >= $gtkversion"; then
- gtk_libs="$gtk_libs -lX11"
+ gtk_cflags="$gtk_cflags $x11_cflags"
+ gtk_libs="$gtk_libs $x11_libs"
fi
libs_softmmu="$gtk_libs $libs_softmmu"
gtk="yes"
@@ -2084,25 +2201,239 @@
fi
fi
+
+##########################################
+# GNUTLS probe
+
+gnutls_works() {
+ # Unfortunately some distros have bad pkg-config information for gnutls
+ # such that it claims to exist but you get a compiler error if you try
+ # to use the options returned by --libs. Specifically, Ubuntu for --static
+ # builds doesn't work:
+ # https://bugs.launchpad.net/ubuntu/+source/gnutls26/+bug/1478035
+ #
+ # So sanity check the cflags/libs before assuming gnutls can be used.
+ if ! $pkg_config --exists "gnutls"; then
+ return 1
+ fi
+
+ write_c_skeleton
+ compile_prog "$($pkg_config --cflags gnutls)" "$($pkg_config --libs gnutls)"
+}
+
+gnutls_gcrypt=no
+gnutls_nettle=no
+if test "$gnutls" != "no"; then
+ if gnutls_works; then
+ gnutls_cflags=$($pkg_config --cflags gnutls)
+ gnutls_libs=$($pkg_config --libs gnutls)
+ libs_softmmu="$gnutls_libs $libs_softmmu"
+ libs_tools="$gnutls_libs $libs_tools"
+ QEMU_CFLAGS="$QEMU_CFLAGS $gnutls_cflags"
+ gnutls="yes"
+
+ # gnutls_rnd requires >= 2.11.0
+ if $pkg_config --exists "gnutls >= 2.11.0"; then
+ gnutls_rnd="yes"
+ else
+ gnutls_rnd="no"
+ fi
+
+ if $pkg_config --exists 'gnutls >= 3.0'; then
+ gnutls_gcrypt=no
+ gnutls_nettle=yes
+ elif $pkg_config --exists 'gnutls >= 2.12'; then
+ case $($pkg_config --libs --static gnutls) in
+ *gcrypt*)
+ gnutls_gcrypt=yes
+ gnutls_nettle=no
+ ;;
+ *nettle*)
+ gnutls_gcrypt=no
+ gnutls_nettle=yes
+ ;;
+ *)
+ gnutls_gcrypt=yes
+ gnutls_nettle=no
+ ;;
+ esac
+ else
+ gnutls_gcrypt=yes
+ gnutls_nettle=no
+ fi
+ elif test "$gnutls" = "yes"; then
+ feature_not_found "gnutls" "Install gnutls devel"
+ else
+ gnutls="no"
+ gnutls_rnd="no"
+ fi
+else
+ gnutls_rnd="no"
+fi
+
+
+# If user didn't give a --disable/enable-gcrypt flag,
+# then mark as disabled if user requested nettle
+# explicitly, or if gnutls links to nettle
+if test -z "$gcrypt"
+then
+ if test "$nettle" = "yes" || test "$gnutls_nettle" = "yes"
+ then
+ gcrypt="no"
+ fi
+fi
+
+# If user didn't give a --disable/enable-nettle flag,
+# then mark as disabled if user requested gcrypt
+# explicitly, or if gnutls links to gcrypt
+if test -z "$nettle"
+then
+ if test "$gcrypt" = "yes" || test "$gnutls_gcrypt" = "yes"
+ then
+ nettle="no"
+ fi
+fi
+
+has_libgcrypt_config() {
+ if ! has "libgcrypt-config"
+ then
+ return 1
+ fi
+
+ if test -n "$cross_prefix"
+ then
+ host=$(libgcrypt-config --host)
+ if test "$host-" != $cross_prefix
+ then
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+if test "$gcrypt" != "no"; then
+ if has_libgcrypt_config; then
+ gcrypt_cflags=$(libgcrypt-config --cflags)
+ gcrypt_libs=$(libgcrypt-config --libs)
+ # Debian has remove -lgpg-error from libgcrypt-config
+ # as it "spreads unnecessary dependencies" which in
+ # turn breaks static builds...
+ if test "$static" = "yes"
+ then
+ gcrypt_libs="$gcrypt_libs -lgpg-error"
+ fi
+ libs_softmmu="$gcrypt_libs $libs_softmmu"
+ libs_tools="$gcrypt_libs $libs_tools"
+ QEMU_CFLAGS="$QEMU_CFLAGS $gcrypt_cflags"
+ gcrypt="yes"
+ if test -z "$nettle"; then
+ nettle="no"
+ fi
+
+ cat > $TMPC << EOF
+#include <gcrypt.h>
+int main(void) {
+ gcry_kdf_derive(NULL, 0, GCRY_KDF_PBKDF2,
+ GCRY_MD_SHA256,
+ NULL, 0, 0, 0, NULL);
+ return 0;
+}
+EOF
+ if compile_prog "$gcrypt_cflags" "$gcrypt_libs" ; then
+ gcrypt_kdf=yes
+ fi
+ else
+ if test "$gcrypt" = "yes"; then
+ feature_not_found "gcrypt" "Install gcrypt devel"
+ else
+ gcrypt="no"
+ fi
+ fi
+fi
+
+
+if test "$nettle" != "no"; then
+ if $pkg_config --exists "nettle"; then
+ nettle_cflags=$($pkg_config --cflags nettle)
+ nettle_libs=$($pkg_config --libs nettle)
+ nettle_version=$($pkg_config --modversion nettle)
+ libs_softmmu="$nettle_libs $libs_softmmu"
+ libs_tools="$nettle_libs $libs_tools"
+ QEMU_CFLAGS="$QEMU_CFLAGS $nettle_cflags"
+ nettle="yes"
+
+ cat > $TMPC << EOF
+#include <stddef.h>
+#include <nettle/pbkdf2.h>
+int main(void) {
+ pbkdf2_hmac_sha256(8, NULL, 1000, 8, NULL, 8, NULL);
+ return 0;
+}
+EOF
+ if compile_prog "$nettle_cflags" "$nettle_libs" ; then
+ nettle_kdf=yes
+ fi
+ else
+ if test "$nettle" = "yes"; then
+ feature_not_found "nettle" "Install nettle devel"
+ else
+ nettle="no"
+ fi
+ fi
+fi
+
+if test "$gcrypt" = "yes" && test "$nettle" = "yes"
+then
+ error_exit "Only one of gcrypt & nettle can be enabled"
+fi
+
+##########################################
+# libtasn1 - only for the TLS creds/session test suite
+
+tasn1=yes
+tasn1_cflags=""
+tasn1_libs=""
+if $pkg_config --exists "libtasn1"; then
+ tasn1_cflags=$($pkg_config --cflags libtasn1)
+ tasn1_libs=$($pkg_config --libs libtasn1)
+else
+ tasn1=no
+fi
+
+
+##########################################
+# getifaddrs (for tests/test-io-channel-socket )
+
+have_ifaddrs_h=yes
+if ! check_include "ifaddrs.h" ; then
+ have_ifaddrs_h=no
+fi
+
##########################################
# VTE probe
if test "$vte" != "no"; then
if test "$gtkabi" = "3.0"; then
- vtepackage="vte-2.90"
- vteversion="0.32.0"
+ vteminversion="0.32.0"
+ if $pkg_config --exists "vte-2.91"; then
+ vtepackage="vte-2.91"
+ else
+ vtepackage="vte-2.90"
+ fi
else
vtepackage="vte"
- vteversion="0.24.0"
+ vteminversion="0.24.0"
fi
- if $pkg_config --exists "$vtepackage >= $vteversion"; then
- vte_cflags=`$pkg_config --cflags $vtepackage`
- vte_libs=`$pkg_config --libs $vtepackage`
+ if $pkg_config --exists "$vtepackage >= $vteminversion"; then
+ vte_cflags=$($pkg_config --cflags $vtepackage)
+ vte_libs=$($pkg_config --libs $vtepackage)
+ vteversion=$($pkg_config --modversion $vtepackage)
libs_softmmu="$vte_libs $libs_softmmu"
vte="yes"
elif test "$vte" = "yes"; then
if test "$gtkabi" = "3.0"; then
- feature_not_found "vte" "Install libvte-2.90 devel"
+ feature_not_found "vte" "Install libvte-2.90/2.91 devel"
else
feature_not_found "vte" "Install libvte devel"
fi
@@ -2117,25 +2448,37 @@
# Look for sdl configuration program (pkg-config or sdl-config). Try
# sdl-config even without cross prefix, and favour pkg-config over sdl-config.
+if test "$sdlabi" = ""; then
+ if $pkg_config --exists "sdl"; then
+ sdlabi=1.2
+ elif $pkg_config --exists "sdl2"; then
+ sdlabi=2.0
+ else
+ sdlabi=1.2
+ fi
+fi
+
if test $sdlabi = "2.0"; then
sdl_config=$sdl2_config
sdlname=sdl2
sdlconfigname=sdl2_config
-else
+elif test $sdlabi = "1.2"; then
sdlname=sdl
sdlconfigname=sdl_config
+else
+ error_exit "Unknown sdlabi $sdlabi, must be 1.2 or 2.0"
fi
-if test "`basename $sdl_config`" != $sdlconfigname && ! has ${sdl_config}; then
+if test "$(basename $sdl_config)" != $sdlconfigname && ! has ${sdl_config}; then
sdl_config=$sdlconfigname
fi
if $pkg_config $sdlname --exists; then
sdlconfig="$pkg_config $sdlname"
- _sdlversion=`$sdlconfig --modversion 2>/dev/null | sed 's/[^0-9]//g'`
+ sdlversion=$($sdlconfig --modversion 2>/dev/null)
elif has ${sdl_config}; then
sdlconfig="$sdl_config"
- _sdlversion=`$sdlconfig --version | sed 's/[^0-9]//g'`
+ sdlversion=$($sdlconfig --version)
else
if test "$sdl" = "yes" ; then
feature_not_found "sdl" "Install SDL devel"
@@ -2153,26 +2496,24 @@
#undef main /* We don't want SDL to override our main() */
int main( void ) { return SDL_Init (SDL_INIT_VIDEO); }
EOF
- sdl_cflags=`$sdlconfig --cflags 2> /dev/null`
+ sdl_cflags=$($sdlconfig --cflags 2>/dev/null)
if test "$static" = "yes" ; then
- sdl_libs=`$sdlconfig --static-libs 2>/dev/null`
+ sdl_libs=$($sdlconfig --static-libs 2>/dev/null)
else
- sdl_libs=`$sdlconfig --libs 2> /dev/null`
+ sdl_libs=$($sdlconfig --libs 2>/dev/null)
fi
if compile_prog "$sdl_cflags" "$sdl_libs" ; then
- if test "$_sdlversion" -lt 121 ; then
+ if test $(echo $sdlversion | sed 's/[^0-9]//g') -lt 121 ; then
sdl_too_old=yes
else
- if test "$cocoa" = "no" ; then
- sdl=yes
- fi
+ sdl=yes
fi
# static link with sdl ? (note: sdl.pc's --static --libs is broken)
if test "$sdl" = "yes" -a "$static" = "yes" ; then
if test $? = 0 && echo $sdl_libs | grep -- -laa > /dev/null; then
- sdl_libs="$sdl_libs `aalib-config --static-libs 2>/dev/null`"
- sdl_cflags="$sdl_cflags `aalib-config --cflags 2>/dev/null`"
+ sdl_libs="$sdl_libs $(aalib-config --static-libs 2>/dev/null)"
+ sdl_cflags="$sdl_cflags $(aalib-config --cflags 2>/dev/null)"
fi
if compile_prog "$sdl_cflags" "$sdl_libs" ; then
:
@@ -2198,8 +2539,9 @@
#endif
int main(void) { return 0; }
EOF
- if compile_prog "$sdl_cflags" "$sdl_libs" ; then
- sdl_libs="$sdl_libs -lX11"
+ if compile_prog "$sdl_cflags $x11_cflags" "$sdl_libs $x11_libs" ; then
+ sdl_cflags="$sdl_cflags $x11_cflags"
+ sdl_libs="$sdl_libs $x11_libs"
fi
libs_softmmu="$sdl_libs $libs_softmmu"
fi
@@ -2228,62 +2570,6 @@
fi
fi
-##########################################
-# VNC TLS/WS detection
-if test "$vnc" = "yes" -a \( "$vnc_tls" != "no" -o "$vnc_ws" != "no" \) ; then
- cat > $TMPC <<EOF
-#include <gnutls/gnutls.h>
-int main(void) { gnutls_session_t s; gnutls_init(&s, GNUTLS_SERVER); return 0; }
-EOF
- vnc_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
- vnc_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
- if compile_prog "$vnc_tls_cflags" "$vnc_tls_libs" ; then
- if test "$vnc_tls" != "no" ; then
- vnc_tls=yes
- fi
- if test "$vnc_ws" != "no" ; then
- vnc_ws=yes
- fi
- libs_softmmu="$vnc_tls_libs $libs_softmmu"
- QEMU_CFLAGS="$QEMU_CFLAGS $vnc_tls_cflags"
- else
- if test "$vnc_tls" = "yes" ; then
- feature_not_found "vnc-tls" "Install gnutls devel"
- fi
- if test "$vnc_ws" = "yes" ; then
- feature_not_found "vnc-ws" "Install gnutls devel"
- fi
- vnc_tls=no
- vnc_ws=no
- fi
-fi
-
-##########################################
-# Quorum probe (check for gnutls)
-if test "$quorum" != "no" ; then
-cat > $TMPC <<EOF
-#include <gnutls/gnutls.h>
-#include <gnutls/crypto.h>
-int main(void) {char data[4096], digest[32];
-gnutls_hash_fast(GNUTLS_DIG_SHA256, data, 4096, digest);
-return 0;
-}
-EOF
-quorum_tls_cflags=`$pkg_config --cflags gnutls 2> /dev/null`
-quorum_tls_libs=`$pkg_config --libs gnutls 2> /dev/null`
-if compile_prog "$quorum_tls_cflags" "$quorum_tls_libs" ; then
- qcow_tls=yes
- libs_softmmu="$quorum_tls_libs $libs_softmmu"
- libs_tools="$quorum_tls_libs $libs_softmmu"
- QEMU_CFLAGS="$QEMU_CFLAGS $quorum_tls_cflags"
- quorum="yes"
-else
- if test "$quorum" = "yes"; then
- feature_not_found "gnutls" "gnutls > 2.10.0 required to compile Quorum"
- fi
- quorum="no"
-fi
-fi
##########################################
# VNC SASL detection
@@ -2344,8 +2630,8 @@
}
EOF
if $pkg_config libpng --exists; then
- vnc_png_cflags=`$pkg_config libpng --cflags`
- vnc_png_libs=`$pkg_config libpng --libs`
+ vnc_png_cflags=$($pkg_config libpng --cflags)
+ vnc_png_libs=$($pkg_config libpng --libs)
else
vnc_png_cflags=""
vnc_png_libs="-lpng"
@@ -2539,7 +2825,7 @@
fi
}
-audio_drv_list=`echo "$audio_drv_list" | sed -e 's/,/ /g'`
+audio_drv_list=$(echo "$audio_drv_list" | sed -e 's/,/ /g')
for drv in $audio_drv_list; do
case $drv in
alsa)
@@ -2548,21 +2834,6 @@
libs_softmmu="-lasound $libs_softmmu"
;;
- fmod)
- if test -z $fmod_lib || test -z $fmod_inc; then
- error_exit "You must specify path to FMOD library and headers" \
- "Example: --fmod-inc=/path/include/fmod --fmod-lib=/path/lib/libfmod-3.74.so"
- fi
- audio_drv_probe $drv fmod.h $fmod_lib "return FSOUND_GetVersion();" "-I $fmod_inc"
- libs_softmmu="$fmod_lib $libs_softmmu"
- ;;
-
- esd)
- audio_drv_probe $drv esd.h -lesd 'return esd_play_stream(0, 0, "", 0);'
- libs_softmmu="-lesd $libs_softmmu"
- audio_pt_int="yes"
- ;;
-
pa)
audio_drv_probe $drv pulse/mainloop.h "-lpulse" \
"pa_mainloop *m = 0; pa_mainloop_free (m); return 0;"
@@ -2579,6 +2850,10 @@
audio_win_int="yes"
;;
+ winaudio)
+ libs_softmmu="-lwinmm $libs_softmmu"
+ ;;
+
oss)
libs_softmmu="$oss_lib $libs_softmmu"
;;
@@ -2587,15 +2862,6 @@
# XXX: Probes for CoreAudio, DirectSound, SDL(?)
;;
- winwave)
- libs_softmmu="-lwinmm $libs_softmmu"
- audio_win_int="yes"
- ;;
-
- winaudio)
- libs_softmmu="-lwinmm $libs_softmmu"
- ;;
-
*)
echo "$audio_possible_drivers" | grep -q "\<$drv\>" || {
error_exit "Unknown driver '$drv' selected" \
@@ -2630,7 +2896,7 @@
# curses probe
if test "$curses" != "no" ; then
if test "$mingw32" = "yes" ; then
- curses_list="-lpdcurses"
+ curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lpdcurses"
else
curses_list="$($pkg_config --libs ncurses 2>/dev/null):-lncurses:-lcurses"
fi
@@ -2675,8 +2941,8 @@
#include <curl/curl.h>
int main(void) { curl_easy_init(); curl_multi_setopt(0, 0, 0); return 0; }
EOF
- curl_cflags=`$curlconfig --cflags 2>/dev/null`
- curl_libs=`$curlconfig --libs 2>/dev/null`
+ curl_cflags=$($curlconfig --cflags 2>/dev/null)
+ curl_libs=$($curlconfig --libs 2>/dev/null)
if compile_prog "$curl_cflags" "$curl_libs" ; then
curl=yes
else
@@ -2694,8 +2960,8 @@
#include <bluetooth/bluetooth.h>
int main(void) { return bt_error(0); }
EOF
- bluez_cflags=`$pkg_config --cflags bluez 2> /dev/null`
- bluez_libs=`$pkg_config --libs bluez 2> /dev/null`
+ bluez_cflags=$($pkg_config --cflags bluez 2>/dev/null)
+ bluez_libs=$($pkg_config --libs bluez 2>/dev/null)
if compile_prog "$bluez_cflags" "$bluez_libs" ; then
bluez=yes
libs_softmmu="$bluez_libs $libs_softmmu"
@@ -2710,12 +2976,7 @@
##########################################
# glib support probe
-if test "$mingw32" = yes; then
- # g_poll is required in order to integrate with the glib main loop.
- glib_req_ver=2.20
-else
- glib_req_ver=2.12
-fi
+glib_req_ver=2.22
glib_modules=gthread-2.0
if test "$modules" = yes; then
glib_modules="$glib_modules gmodule-2.0"
@@ -2723,8 +2984,8 @@
for i in $glib_modules; do
if $pkg_config --atleast-version=$glib_req_ver $i; then
- glib_cflags=`$pkg_config --cflags $i`
- glib_libs=`$pkg_config --libs $i`
+ glib_cflags=$($pkg_config --cflags $i)
+ glib_libs=$($pkg_config --libs $i)
CFLAGS="$glib_cflags $CFLAGS"
LIBS="$glib_libs $LIBS"
libs_qga="$glib_libs $libs_qga"
@@ -2733,18 +2994,54 @@
fi
done
+# Sanity check that the current size_t matches the
+# size that glib thinks it should be. This catches
+# problems on multi-arch where people try to build
+# 32-bit QEMU while pointing at 64-bit glib headers
+cat > $TMPC <<EOF
+#include <glib.h>
+#include <unistd.h>
+
+#define QEMU_BUILD_BUG_ON(x) \
+ typedef char qemu_build_bug_on[(x)?-1:1] __attribute__((unused));
+
+int main(void) {
+ QEMU_BUILD_BUG_ON(sizeof(size_t) != GLIB_SIZEOF_SIZE_T);
+ return 0;
+}
+EOF
+
+if ! compile_prog "$CFLAGS" "$LIBS" ; then
+ error_exit "sizeof(size_t) doesn't match GLIB_SIZEOF_SIZE_T."\
+ "You probably need to set PKG_CONFIG_LIBDIR"\
+ "to point to the right pkg-config files for your"\
+ "build target"
+fi
+
# g_test_trap_subprocess added in 2.38. Used by some tests.
glib_subprocess=yes
if ! $pkg_config --atleast-version=2.38 glib-2.0; then
glib_subprocess=no
fi
+# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage
+cat > $TMPC << EOF
+#include <glib.h>
+int main(void) { return 0; }
+EOF
+if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then
+ if cc_has_warning_flag "-Wno-unknown-attributes"; then
+ glib_cflags="-Wno-unknown-attributes $glib_cflags"
+ CFLAGS="-Wno-unknown-attributes $CFLAGS"
+ fi
+fi
+
##########################################
# SHA command probe for modules
if test "$modules" = yes; then
shacmd_probe="sha1sum sha1 shasum"
for c in $shacmd_probe; do
- if which $c >/dev/null 2>&1; then
+ if has $c; then
shacmd="$c"
break
fi
@@ -2777,8 +3074,8 @@
pixman_libs=
elif test "$pixman" = "system"; then
# pixman version has been checked above
- pixman_cflags=`$pkg_config --cflags pixman-1`
- pixman_libs=`$pkg_config --libs pixman-1`
+ pixman_cflags=$($pkg_config --cflags pixman-1)
+ pixman_libs=$($pkg_config --libs pixman-1)
else
if test ! -d ${source_path}/pixman/pixman; then
error_exit "pixman >= 0.21.8 not present. Your options:" \
@@ -2838,6 +3135,7 @@
if test "$found" = "no"; then
LIBS="$pthread_lib $LIBS"
fi
+ PTHREAD_LIB="$pthread_lib"
break
fi
done
@@ -2894,8 +3192,8 @@
min_libssh2_version=1.2.8
if test "$libssh2" != "no" ; then
if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
- libssh2_cflags=`$pkg_config libssh2 --cflags`
- libssh2_libs=`$pkg_config libssh2 --libs`
+ libssh2_cflags=$($pkg_config libssh2 --cflags)
+ libssh2_libs=$($pkg_config libssh2 --libs)
libssh2=yes
else
if test "$libssh2" = "yes" ; then
@@ -3039,9 +3337,11 @@
if test "$fdt" != "no" ; then
fdt_libs="-lfdt"
# explicitly check for libfdt_env.h as it is missing in some stable installs
+ # and test for required functions to make sure we are on a version >= 1.4.0
cat > $TMPC << EOF
+#include <libfdt.h>
#include <libfdt_env.h>
-int main(void) { return 0; }
+int main(void) { fdt_get_property_by_offset(0, 0, 0); return 0; }
EOF
if compile_prog "" "$fdt_libs" ; then
# system DTC is good - use it
@@ -3059,7 +3359,7 @@
fdt_libs="-L\$(BUILD_DIR)/dtc/libfdt $fdt_libs"
elif test "$fdt" = "yes" ; then
# have neither and want - prompt for system/submodule install
- error_exit "DTC (libfdt) not present. Your options:" \
+ error_exit "DTC (libfdt) version >= 1.4.0 not present. Your options:" \
" (1) Preferred: Install the DTC (libfdt) devel package" \
" (2) Fetch the DTC submodule, using:" \
" git submodule update --init dtc"
@@ -3073,26 +3373,39 @@
libs_softmmu="$libs_softmmu $fdt_libs"
##########################################
-# GLX probe, used by milkymist-tmu2
-if test "$glx" != "no" ; then
- glx_libs="-lGL -lX11"
- cat > $TMPC << EOF
-#include <X11/Xlib.h>
-#include <GL/gl.h>
-#include <GL/glx.h>
-int main(void) { glBegin(0); glXQueryVersion(0,0,0); return 0; }
-EOF
- if compile_prog "" "-lGL -lX11" ; then
- glx=yes
- else
- if test "$glx" = "yes" ; then
- feature_not_found "glx" "Install GL devel (e.g. MESA)"
+# opengl probe (for sdl2, gtk, milkymist-tmu2)
+
+if test "$opengl" != "no" ; then
+ opengl_pkgs="epoxy libdrm gbm"
+ if $pkg_config $opengl_pkgs x11; then
+ opengl_cflags="$($pkg_config --cflags $opengl_pkgs) $x11_cflags"
+ opengl_libs="$($pkg_config --libs $opengl_pkgs) $x11_libs"
+ opengl=yes
+ if test "$gtk" = "yes" && $pkg_config --exists "$gtkpackage >= 3.16"; then
+ gtk_gl="yes"
fi
- glx_libs=
- glx=no
+ else
+ if test "$opengl" = "yes" ; then
+ feature_not_found "opengl" "Please install opengl (mesa) devel pkgs: $opengl_pkgs"
+ fi
+ opengl_cflags=""
+ opengl_libs=""
+ opengl=no
fi
fi
+if test "$opengl" = "yes"; then
+ cat > $TMPC << EOF
+#include <epoxy/egl.h>
+#ifndef EGL_MESA_image_dma_buf_export
+# error mesa/epoxy lacks support for dmabufs (mesa 10.6+)
+#endif
+int main(void) { return 0; }
+EOF
+ if compile_prog "" "" ; then
+ opengl_dmabuf=yes
+ fi
+fi
##########################################
# archipelago probe
@@ -3111,6 +3424,12 @@
archipelago="yes"
libs_tools="$archipelago_libs $libs_tools"
libs_softmmu="$archipelago_libs $libs_softmmu"
+
+ echo "WARNING: Please check the licenses of QEMU and libxseg carefully."
+ echo "GPLv3 versions of libxseg may not be compatible with QEMU's"
+ echo "license and therefore prevent redistribution."
+ echo
+ echo "To disable Archipelago, use --disable-archipelago"
else
if test "$archipelago" = "yes" ; then
feature_not_found "Archipelago backend support" "Install libxseg devel"
@@ -3125,8 +3444,11 @@
if test "$glusterfs" != "no" ; then
if $pkg_config --atleast-version=3 glusterfs-api; then
glusterfs="yes"
- glusterfs_cflags=`$pkg_config --cflags glusterfs-api`
- glusterfs_libs=`$pkg_config --libs glusterfs-api`
+ glusterfs_cflags=$($pkg_config --cflags glusterfs-api)
+ glusterfs_libs=$($pkg_config --libs glusterfs-api)
+ if $pkg_config --atleast-version=4 glusterfs-api; then
+ glusterfs_xlator_opt="yes"
+ fi
if $pkg_config --atleast-version=5 glusterfs-api; then
glusterfs_discard="yes"
fi
@@ -3267,6 +3589,43 @@
fi
fi
+if test "$tcmalloc" = "yes" && test "$jemalloc" = "yes" ; then
+ echo "ERROR: tcmalloc && jemalloc can't be used at the same time"
+ exit 1
+fi
+
+##########################################
+# tcmalloc probe
+
+if test "$tcmalloc" = "yes" ; then
+ cat > $TMPC << EOF
+#include <stdlib.h>
+int main(void) { malloc(1); return 0; }
+EOF
+
+ if compile_prog "" "-ltcmalloc" ; then
+ LIBS="-ltcmalloc $LIBS"
+ else
+ feature_not_found "tcmalloc" "install gperftools devel"
+ fi
+fi
+
+##########################################
+# jemalloc probe
+
+if test "$jemalloc" = "yes" ; then
+ cat > $TMPC << EOF
+#include <stdlib.h>
+int main(void) { malloc(1); return 0; }
+EOF
+
+ if compile_prog "" "-ljemalloc" ; then
+ LIBS="-ljemalloc $LIBS"
+ else
+ feature_not_found "jemalloc" "install jemalloc devel"
+ fi
+fi
+
##########################################
# signalfd probe
signalfd="no"
@@ -3295,6 +3654,22 @@
eventfd=yes
fi
+# check if memfd is supported
+memfd=no
+cat > $TMPC << EOF
+#include <sys/memfd.h>
+
+int main(void)
+{
+ return memfd_create("foo", MFD_ALLOW_SEALING);
+}
+EOF
+if compile_prog "" "" ; then
+ memfd=yes
+fi
+
+
+
# check for fallocate
fallocate=no
cat > $TMPC << EOF
@@ -3326,6 +3701,22 @@
fallocate_punch_hole=yes
fi
+# check that fallocate supports range zeroing inside the file
+fallocate_zero_range=no
+cat > $TMPC << EOF
+#include <fcntl.h>
+#include <linux/falloc.h>
+
+int main(void)
+{
+ fallocate(0, FALLOC_FL_ZERO_RANGE, 0, 0);
+ return 0;
+}
+EOF
+if compile_prog "" "" ; then
+ fallocate_zero_range=yes
+fi
+
# check for posix_fallocate
posix_fallocate=no
cat > $TMPC << EOF
@@ -3434,8 +3825,8 @@
epoll=yes
fi
-# epoll_create1 and epoll_pwait are later additions
-# so we must check separately for their presence
+# epoll_create1 is a later addition
+# so we must check separately for its presence
epoll_create1=no
cat > $TMPC << EOF
#include <sys/epoll.h>
@@ -3457,20 +3848,6 @@
epoll_create1=yes
fi
-epoll_pwait=no
-cat > $TMPC << EOF
-#include <sys/epoll.h>
-
-int main(void)
-{
- epoll_pwait(0, 0, 0, 0, 0);
- return 0;
-}
-EOF
-if compile_prog "" "" ; then
- epoll_pwait=yes
-fi
-
# check for sendfile support
sendfile=no
cat > $TMPC << EOF
@@ -3634,34 +4011,20 @@
fi
fi
-# check for libcacard for smartcard support
+# check for smartcard support
smartcard_cflags=""
-# TODO - what's the minimal nss version we support?
-if test "$smartcard_nss" != "no"; then
- cat > $TMPC << EOF
-#include <pk11pub.h>
-int main(void) { PK11_FreeSlot(0); return 0; }
-EOF
- # FIXME: do not include $glib_* in here
- nss_libs="$($pkg_config --libs nss 2>/dev/null) $glib_libs"
- nss_cflags="$($pkg_config --cflags nss 2>/dev/null) $glib_cflags"
- test_cflags="$nss_cflags"
- # The header files in nss < 3.13.3 have a bug which causes them to
- # emit a warning. If we're going to compile QEMU with -Werror, then
- # test that the headers don't have this bug. Otherwise we would pass
- # the configure test but fail to compile QEMU later.
- if test "$werror" = "yes"; then
- test_cflags="-Werror $test_cflags"
- fi
- if test -n "$libtool" &&
- $pkg_config --atleast-version=3.12.8 nss && \
- compile_prog "$test_cflags" "$nss_libs"; then
- smartcard_nss="yes"
+if test "$smartcard" != "no"; then
+ if $pkg_config libcacard; then
+ libcacard_cflags=$($pkg_config --cflags libcacard)
+ libcacard_libs=$($pkg_config --libs libcacard)
+ QEMU_CFLAGS="$QEMU_CFLAGS $libcacard_cflags"
+ libs_softmmu="$libs_softmmu $libcacard_libs"
+ smartcard="yes"
else
- if test "$smartcard_nss" = "yes"; then
- feature_not_found "nss" "Install nss devel >= 3.12.8"
+ if test "$smartcard" = "yes"; then
+ feature_not_found "smartcard" "Install libcacard devel"
fi
- smartcard_nss="no"
+ smartcard="no"
fi
fi
@@ -3702,13 +4065,13 @@
if test "$mingw32" = "yes" -a "$guest_agent" != "no" -a "$vss_win32_sdk" != "no" ; then
case "$vss_win32_sdk" in
- "") vss_win32_include="-I$source_path" ;;
+ "") vss_win32_include="-isystem $source_path" ;;
*\ *) # The SDK is installed in "Program Files" by default, but we cannot
# handle path with spaces. So we symlink the headers into ".sdk/vss".
- vss_win32_include="-I$source_path/.sdk/vss"
+ vss_win32_include="-isystem $source_path/.sdk/vss"
symlink "$vss_win32_sdk/inc" "$source_path/.sdk/vss/inc"
;;
- *) vss_win32_include="-I$vss_win32_sdk"
+ *) vss_win32_include="-isystem $vss_win32_sdk"
esac
cat > $TMPC << EOF
#define __MIDL_user_allocate_free_DEFINED__
@@ -3719,6 +4082,7 @@
guest_agent_with_vss="yes"
QEMU_CFLAGS="$QEMU_CFLAGS $vss_win32_include"
libs_qga="-lole32 -loleaut32 -lshlwapi -luuid -lstdc++ -Wl,--enable-stdcall-fixup $libs_qga"
+ qga_vss_provider="qga/vss-win32/qga-vss.dll qga/vss-win32/qga-vss.tlb"
else
if test "$vss_win32_sdk" != "" ; then
echo "ERROR: Please download and install Microsoft VSS SDK:"
@@ -3753,6 +4117,45 @@
fi
##########################################
+# check if mingw environment provides a recent ntddscsi.h
+if test "$mingw32" = "yes" -a "$guest_agent" != "no"; then
+ cat > $TMPC << EOF
+#include <windows.h>
+#include <ntddscsi.h>
+int main(void) {
+#if !defined(IOCTL_SCSI_GET_ADDRESS)
+#error Missing required ioctl definitions
+#endif
+ SCSI_ADDRESS addr = { .Lun = 0, .TargetId = 0, .PathId = 0 };
+ return addr.Lun;
+}
+EOF
+ if compile_prog "" "" ; then
+ guest_agent_ntddscsi=yes
+ libs_qga="-lsetupapi $libs_qga"
+ fi
+fi
+
+##########################################
+# virgl renderer probe
+
+if test "$virglrenderer" != "no" ; then
+ cat > $TMPC << EOF
+#include <virglrenderer.h>
+int main(void) { virgl_renderer_poll(); return 0; }
+EOF
+ virgl_cflags=$($pkg_config --cflags virglrenderer 2>/dev/null)
+ virgl_libs=$($pkg_config --libs virglrenderer 2>/dev/null)
+ if $pkg_config virglrenderer >/dev/null 2>&1 && \
+ compile_prog "$virgl_cflags" "$virgl_libs" ; then
+ virglrenderer="yes"
+ else
+ if test "$virglrenderer" = "yes" ; then
+ feature_not_found "virglrenderer"
+ fi
+ virglrenderer="no"
+ fi
+fi
##########################################
# check if we have fdatasync
@@ -3800,24 +4203,6 @@
fi
##########################################
-# check if we have usable SIGEV_THREAD_ID
-
-sigev_thread_id=no
-cat > $TMPC << EOF
-#include <signal.h>
-int main(void) {
- struct sigevent ev;
- ev.sigev_notify = SIGEV_THREAD_ID;
- ev._sigev_un._tid = 0;
- asm volatile("" : : "g"(&ev));
- return 0;
-}
-EOF
-if compile_prog "" "" ; then
- sigev_thread_id=yes
-fi
-
-##########################################
# check if trace backend exists
$python "$source_path/scripts/tracetool.py" "--backends=$trace_backends" --check-backends > /dev/null 2> /dev/null
@@ -3835,12 +4220,12 @@
EOF
if compile_prog "" "" ; then
if $pkg_config lttng-ust --exists; then
- lttng_ust_libs=`$pkg_config --libs lttng-ust`
+ lttng_ust_libs=$($pkg_config --libs lttng-ust)
else
lttng_ust_libs="-llttng-ust"
fi
if $pkg_config liburcu-bp --exists; then
- urcu_bp_libs=`$pkg_config --libs liburcu-bp`
+ urcu_bp_libs=$($pkg_config --libs liburcu-bp)
else
urcu_bp_libs="-lurcu-bp"
fi
@@ -4079,6 +4464,95 @@
getauxval=yes
fi
+########################################
+# check if ccache is interfering with
+# semantic analysis of macros
+
+unset CCACHE_CPP2
+ccache_cpp2=no
+cat > $TMPC << EOF
+static const int Z = 1;
+#define fn() ({ Z; })
+#define TAUT(X) ((X) == Z)
+#define PAREN(X, Y) (X == Y)
+#define ID(X) (X)
+int main(int argc, char *argv[])
+{
+ int x = 0, y = 0;
+ x = ID(x);
+ x = fn();
+ fn();
+ if (PAREN(x, y)) return 0;
+ if (TAUT(Z)) return 0;
+ return 0;
+}
+EOF
+
+if ! compile_object "-Werror"; then
+ ccache_cpp2=yes
+fi
+
+#################################################
+# clang does not support glibc + FORTIFY_SOURCE.
+
+if test "$fortify_source" != "no"; then
+ if echo | $cc -dM -E - | grep __clang__ > /dev/null 2>&1 ; then
+ fortify_source="no";
+ elif test -n "$cxx" &&
+ echo | $cxx -dM -E - | grep __clang__ >/dev/null 2>&1 ; then
+ fortify_source="no";
+ else
+ fortify_source="yes"
+ fi
+fi
+
+##########################################
+# check if struct fsxattr is available via linux/fs.h
+
+have_fsxattr=no
+cat > $TMPC << EOF
+#include <linux/fs.h>
+struct fsxattr foo;
+int main(void) {
+ return 0;
+}
+EOF
+if compile_prog "" "" ; then
+ have_fsxattr=yes
+fi
+
+##########################################
+# check if rtnetlink.h exists and is useful
+have_rtnetlink=no
+cat > $TMPC << EOF
+#include <linux/rtnetlink.h>
+int main(void) {
+ return IFLA_PROTO_DOWN;
+}
+EOF
+if compile_prog "" "" ; then
+ have_rtnetlink=yes
+fi
+
+#################################################
+# Sparc implicitly links with --relax, which is
+# incompatible with -r, so --no-relax should be
+# given. It does no harm to give it on other
+# platforms too.
+
+# Note: the prototype is needed since QEMU_CFLAGS
+# contains -Wmissing-prototypes
+cat > $TMPC << EOF
+extern int foo(void);
+int foo(void) { return 0; }
+EOF
+if ! compile_object ""; then
+ error_exit "Failed to compile object file for LD_REL_FLAGS test"
+fi
+if do_cc -nostdlib -Wl,-r -Wl,--no-relax -o $TMPMO $TMPO; then
+ LD_REL_FLAGS="-Wl,--no-relax"
+fi
+
##########################################
# End of CC checks
# After here, no more $cc or $ld runs
@@ -4086,11 +4560,10 @@
if test "$gcov" = "yes" ; then
CFLAGS="-fprofile-arcs -ftest-coverage -g $CFLAGS"
LDFLAGS="-fprofile-arcs -ftest-coverage $LDFLAGS"
-elif test "$debug" = "no" ; then
- if test "${EXTRA_CFLAGS#*-O}" = "$EXTRA_CFLAGS"; then
- CFLAGS="-O2 $CFLAGS"
- fi
- CFLAGS="-U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $CFLAGS"
+elif test "$fortify_source" = "yes" ; then
+ CFLAGS="-O2 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 $CFLAGS"
+elif test "$debug" = "no"; then
+ CFLAGS="-O2 $CFLAGS"
fi
##########################################
@@ -4108,16 +4581,6 @@
fi
fi
-# Disable zero malloc errors for official releases unless explicitly told to
-# enable/disable
-if test -z "$zero_malloc" ; then
- if test "$z_version" = "50" ; then
- zero_malloc="no"
- else
- zero_malloc="yes"
- fi
-fi
-
# Now we've finished running tests it's OK to add -Werror to the compiler flags
if test "$werror" = "yes"; then
QEMU_CFLAGS="-Werror $QEMU_CFLAGS"
@@ -4155,6 +4618,7 @@
tools="qemu-img\$(EXESUF) qemu-io\$(EXESUF) $tools"
if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" ] ; then
tools="qemu-nbd\$(EXESUF) $tools"
+ tools="ivshmem-client\$(EXESUF) ivshmem-server\$(EXESUF) $tools"
fi
fi
if test "$softmmu" = yes ; then
@@ -4164,18 +4628,18 @@
tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
else
if test "$virtfs" = yes; then
- error_exit "VirtFS is supported only on Linux and requires libcap-devel and libattr-devel"
+ error_exit "VirtFS is supported only on Linux and requires libcap devel and libattr devel"
fi
virtfs=no
fi
fi
fi
+
+# Probe for guest agent support/options
+
if [ "$guest_agent" != "no" ]; then
if [ "$linux" = "yes" -o "$bsd" = "yes" -o "$solaris" = "yes" -o "$mingw32" = "yes" ] ; then
- tools="qemu-ga\$(EXESUF) $tools"
- if [ "$mingw32" = "yes" -a "$guest_agent_with_vss" = "yes" ]; then
- tools="qga/vss-win32/qga-vss.dll qga/vss-win32/qga-vss.tlb $tools"
- fi
+ tools="qemu-ga $tools"
guest_agent=yes
elif [ "$guest_agent" != yes ]; then
guest_agent=no
@@ -4184,12 +4648,78 @@
fi
fi
+# Guest agent Window MSI package
+
+if test "$guest_agent" != yes; then
+ if test "$guest_agent_msi" = yes; then
+ error_exit "MSI guest agent package requires guest agent enabled"
+ fi
+ guest_agent_msi=no
+elif test "$mingw32" != "yes"; then
+ if test "$guest_agent_msi" = "yes"; then
+ error_exit "MSI guest agent package is available only for MinGW Windows cross-compilation"
+ fi
+ guest_agent_msi=no
+elif ! has wixl; then
+ if test "$guest_agent_msi" = "yes"; then
+ error_exit "MSI guest agent package requires wixl tool installed ( usually from msitools package )"
+ fi
+ guest_agent_msi=no
+else
+ # we support qemu-ga, mingw32, and wixl: default to MSI enabled if it wasn't
+ # disabled explicitly
+ if test "$guest_agent_msi" != "no"; then
+ guest_agent_msi=yes
+ fi
+fi
+
+if test "$guest_agent_msi" = "yes"; then
+ if test "$guest_agent_with_vss" = "yes"; then
+ QEMU_GA_MSI_WITH_VSS="-D InstallVss"
+ fi
+
+ if test "$QEMU_GA_MANUFACTURER" = ""; then
+ QEMU_GA_MANUFACTURER=QEMU
+ fi
+
+ if test "$QEMU_GA_DISTRO" = ""; then
+ QEMU_GA_DISTRO=Linux
+ fi
+
+ if test "$QEMU_GA_VERSION" = ""; then
+ QEMU_GA_VERSION=$(cat $source_path/VERSION)
+ fi
+
+ QEMU_GA_MSI_MINGW_DLL_PATH="-D Mingw_dlls=$($pkg_config --variable=prefix glib-2.0)/bin"
+
+ case "$cpu" in
+ x86_64)
+ QEMU_GA_MSI_ARCH="-a x64 -D Arch=64"
+ ;;
+ i386)
+ QEMU_GA_MSI_ARCH="-D Arch=32"
+ ;;
+ *)
+ error_exit "CPU $cpu not supported for building installation package"
+ ;;
+ esac
+fi
+
# Mac OS X ships with a broken assembler
roms=
if test \( "$cpu" = "i386" -o "$cpu" = "x86_64" \) -a \
"$targetos" != "Darwin" -a "$targetos" != "SunOS" -a \
"$softmmu" = yes ; then
- roms="optionrom"
+ # Different host OS linkers have different ideas about the name of the ELF
+ # emulation. Linux and OpenBSD use 'elf_i386'; FreeBSD uses the _fbsd
+ # variant; and Windows uses i386pe.
+ for emu in elf_i386 elf_i386_fbsd i386pe; do
+ if "$ld" -verbose 2>&1 | grep -q "^[[:space:]]*$emu[[:space:]]*$"; then
+ ld_i386_emulation="$emu"
+ roms="optionrom"
+ break
+ fi
+ done
fi
if test "$cpu" = "ppc64" -a "$targetos" != "Darwin" ; then
roms="$roms spapr-rtas"
@@ -4200,7 +4730,7 @@
fi
# Probe for the need for relocating the user-only binary.
-if test "$pie" = "no" ; then
+if ( [ "$linux_user" = yes ] || [ "$bsd_user" = yes ] ) && [ "$pie" = no ]; then
textseg_addr=
case "$cpu" in
arm | i386 | ppc* | s390* | sparc* | x86_64 | x32)
@@ -4222,6 +4752,16 @@
# In case ld does not support -Ttext-segment, edit the default linker
# script via sed to set the .text start addr. This is needed on FreeBSD
# at least.
+ if ! $ld --verbose >/dev/null 2>&1; then
+ error_exit \
+ "We need to link the QEMU user mode binaries at a" \
+ "specific text address. Unfortunately your linker" \
+ "doesn't support either the -Ttext-segment option or" \
+ "printing the default linker script with --verbose." \
+ "If you don't want the user mode binaries, pass the" \
+ "--disable-user option to configure."
+ fi
+
$ld --verbose | sed \
-e '1,/==================================================/d' \
-e '/==================================================/,$d' \
@@ -4232,21 +4772,27 @@
fi
fi
+echo_version() {
+ if test "$1" = "yes" ; then
+ echo "($2)"
+ fi
+}
+
# prepend pixman and ftd flags after all config tests are done
QEMU_CFLAGS="$pixman_cflags $fdt_cflags $QEMU_CFLAGS"
libs_softmmu="$pixman_libs $libs_softmmu"
echo "Install prefix $prefix"
-echo "BIOS directory `eval echo $qemu_datadir`"
-echo "binary directory `eval echo $bindir`"
-echo "library directory `eval echo $libdir`"
-echo "module directory `eval echo $qemu_moddir`"
-echo "libexec directory `eval echo $libexecdir`"
-echo "include directory `eval echo $includedir`"
-echo "config directory `eval echo $sysconfdir`"
+echo "BIOS directory $(eval echo $qemu_datadir)"
+echo "binary directory $(eval echo $bindir)"
+echo "library directory $(eval echo $libdir)"
+echo "module directory $(eval echo $qemu_moddir)"
+echo "libexec directory $(eval echo $libexecdir)"
+echo "include directory $(eval echo $includedir)"
+echo "config directory $(eval echo $sysconfdir)"
if test "$mingw32" = "no" ; then
-echo "local state directory `eval echo $local_statedir`"
-echo "Manual directory `eval echo $mandir`"
+echo "local state directory $(eval echo $local_statedir)"
+echo "Manual directory $(eval echo $mandir)"
echo "ELF interp prefix $interp_prefix"
else
echo "local state directory queried at runtime"
@@ -4281,10 +4827,20 @@
echo "Cocoa support $cocoa"
fi
echo "pixman $pixman"
-echo "SDL support $sdl"
-echo "GTK support $gtk"
-echo "VTE support $vte"
+echo "SDL support $sdl $(echo_version $sdl $sdlversion)"
+echo "GTK support $gtk $(echo_version $gtk $gtk_version)"
+echo "GTK GL support $gtk_gl"
+echo "VTE support $vte $(echo_version $vte $vteversion)"
+echo "TLS priority $tls_priority"
+echo "GNUTLS support $gnutls"
+echo "GNUTLS rnd $gnutls_rnd"
+echo "libgcrypt $gcrypt"
+echo "libgcrypt kdf $gcrypt_kdf"
+echo "nettle $nettle $(echo_version $nettle $nettle_version)"
+echo "nettle kdf $nettle_kdf"
+echo "libtasn1 $tasn1"
echo "curses support $curses"
+echo "virgl support $virglrenderer"
echo "curl support $curl"
echo "mingw32 support $mingw32"
echo "Audio drivers $audio_drv_list"
@@ -4293,20 +4849,21 @@
echo "VirtFS support $virtfs"
echo "VNC support $vnc"
if test "$vnc" = "yes" ; then
- echo "VNC TLS support $vnc_tls"
echo "VNC SASL support $vnc_sasl"
echo "VNC JPEG support $vnc_jpeg"
echo "VNC PNG support $vnc_png"
- echo "VNC WS support $vnc_ws"
fi
if test -n "$sparc_cpu"; then
echo "Target Sparc Arch $sparc_cpu"
fi
echo "xen support $xen"
+if test "$xen" = "yes" ; then
+ echo "xen ctrl version $xen_ctrl_version"
+ echo "pv dom build $xen_pv_domain_build"
+fi
echo "brlapi support $brlapi"
echo "bluez support $bluez"
echo "Documentation $docs"
-echo "GUEST_BASE $guest_base"
echo "PIE $pie"
echo "vde support $vde"
echo "netmap support $netmap"
@@ -4322,30 +4879,28 @@
echo "fdatasync $fdatasync"
echo "madvise $madvise"
echo "posix_madvise $posix_madvise"
-echo "sigev_thread_id $sigev_thread_id"
echo "uuid support $uuid"
echo "libcap-ng support $cap_ng"
echo "vhost-net support $vhost_net"
echo "vhost-scsi support $vhost_scsi"
echo "Trace backends $trace_backends"
-if test "$trace_backend" = "simple"; then
+if have_backend "simple"; then
echo "Trace output file $trace_file-<pid>"
fi
-if test "$spice" = "yes"; then
-echo "spice support $spice ($spice_protocol_version/$spice_server_version)"
-else
-echo "spice support $spice"
-fi
+echo "spice support $spice $(echo_version $spice $spice_protocol_version/$spice_server_version)"
echo "rbd support $rbd"
echo "xfsctl support $xfs"
-echo "nss used $smartcard_nss"
+echo "smartcard support $smartcard"
echo "libusb $libusb"
echo "usb net redir $usb_redir"
-echo "GLX support $glx"
+echo "OpenGL support $opengl"
+echo "OpenGL dmabufs $opengl_dmabuf"
echo "libiscsi support $libiscsi"
echo "libnfs support $libnfs"
echo "build guest agent $guest_agent"
echo "QGA VSS support $guest_agent_with_vss"
+echo "QGA w32 disk info $guest_agent_ntddscsi"
+echo "QGA MSI support $guest_agent_msi"
echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
@@ -4358,11 +4913,13 @@
echo "TPM passthrough $tpm_passthrough"
echo "QOM debugging $qom_cast_debug"
echo "vhdx $vhdx"
-echo "Quorum $quorum"
echo "lzo support $lzo"
echo "snappy support $snappy"
+echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
-echo "Android support $android"
+echo "tcmalloc support $tcmalloc"
+echo "jemalloc support $jemalloc"
+echo "avx2 optimization $avx2_opt"
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -4409,7 +4966,7 @@
fi
if test "$mingw32" = "yes" ; then
echo "CONFIG_WIN32=y" >> $config_host_mak
- rc_version=`cat $source_path/VERSION`
+ rc_version=$(cat $source_path/VERSION)
version_major=${rc_version%%.*}
rc_version=${rc_version#*.}
version_minor=${rc_version%%.*}
@@ -4420,8 +4977,21 @@
echo "CONFIG_PRODUCTVERSION=$version_major,$version_minor,$version_subminor,$version_micro" >> $config_host_mak
if test "$guest_agent_with_vss" = "yes" ; then
echo "CONFIG_QGA_VSS=y" >> $config_host_mak
+ echo "QGA_VSS_PROVIDER=$qga_vss_provider" >> $config_host_mak
echo "WIN_SDK=\"$win_sdk\"" >> $config_host_mak
fi
+ if test "$guest_agent_ntddscsi" = "yes" ; then
+ echo "CONFIG_QGA_NTDDDISK=y" >> $config_host_mak
+ fi
+ if test "$guest_agent_msi" = "yes"; then
+ echo "QEMU_GA_MSI_ENABLED=yes" >> $config_host_mak
+ echo "QEMU_GA_MSI_MINGW_DLL_PATH=${QEMU_GA_MSI_MINGW_DLL_PATH}" >> $config_host_mak
+ echo "QEMU_GA_MSI_WITH_VSS=${QEMU_GA_MSI_WITH_VSS}" >> $config_host_mak
+ echo "QEMU_GA_MSI_ARCH=${QEMU_GA_MSI_ARCH}" >> $config_host_mak
+ echo "QEMU_GA_MANUFACTURER=${QEMU_GA_MANUFACTURER}" >> $config_host_mak
+ echo "QEMU_GA_DISTRO=${QEMU_GA_DISTRO}" >> $config_host_mak
+ echo "QEMU_GA_VERSION=${QEMU_GA_VERSION}" >> $config_host_mak
+ fi
else
echo "CONFIG_POSIX=y" >> $config_host_mak
fi
@@ -4472,11 +5042,8 @@
fi
echo "CONFIG_AUDIO_DRIVERS=$audio_drv_list" >> $config_host_mak
for drv in $audio_drv_list; do
- def=CONFIG_`echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]'`
+ def=CONFIG_$(echo $drv | LC_ALL=C tr '[a-z]' '[A-Z]')
echo "$def=y" >> $config_host_mak
- if test "$drv" = "fmod"; then
- echo "FMOD_CFLAGS=-I$fmod_inc" >> $config_host_mak
- fi
done
if test "$audio_pt_int" = "yes" ; then
echo "CONFIG_AUDIO_PT_INT=y" >> $config_host_mak
@@ -4489,9 +5056,6 @@
if test "$vnc" = "yes" ; then
echo "CONFIG_VNC=y" >> $config_host_mak
fi
-if test "$vnc_tls" = "yes" ; then
- echo "CONFIG_VNC_TLS=y" >> $config_host_mak
-fi
if test "$vnc_sasl" = "yes" ; then
echo "CONFIG_VNC_SASL=y" >> $config_host_mak
fi
@@ -4501,10 +5065,6 @@
if test "$vnc_png" = "yes" ; then
echo "CONFIG_VNC_PNG=y" >> $config_host_mak
fi
-if test "$vnc_ws" = "yes" ; then
- echo "CONFIG_VNC_WS=y" >> $config_host_mak
- echo "VNC_WS_CFLAGS=$vnc_ws_cflags" >> $config_host_mak
-fi
if test "$fnmatch" = "yes" ; then
echo "CONFIG_FNMATCH=y" >> $config_host_mak
fi
@@ -4514,7 +5074,7 @@
if test "$xfs" = "yes" ; then
echo "CONFIG_XFS=y" >> $config_host_mak
fi
-qemu_version=`head $source_path/VERSION`
+qemu_version=$(head $source_path/VERSION)
echo "VERSION=$qemu_version" >>$config_host_mak
echo "PKGVERSION=$pkgversion" >>$config_host_mak
echo "SRC_PATH=$source_path" >> $config_host_mak
@@ -4525,7 +5085,7 @@
if test "$modules" = "yes"; then
# $shacmd can generate a hash started with digit, which the compiler doesn't
# like as an symbol. So prefix it with an underscore
- echo "CONFIG_STAMP=_`(echo $qemu_version; echo $pkgversion; cat $0) | $shacmd - | cut -f1 -d\ `" >> $config_host_mak
+ echo "CONFIG_STAMP=_$( (echo $qemu_version; echo $pkgversion; cat $0) | $shacmd - | cut -f1 -d\ )" >> $config_host_mak
echo "CONFIG_MODULES=y" >> $config_host_mak
fi
if test "$sdl" = "yes" ; then
@@ -4554,12 +5114,18 @@
if test "$eventfd" = "yes" ; then
echo "CONFIG_EVENTFD=y" >> $config_host_mak
fi
+if test "$memfd" = "yes" ; then
+ echo "CONFIG_MEMFD=y" >> $config_host_mak
+fi
if test "$fallocate" = "yes" ; then
echo "CONFIG_FALLOCATE=y" >> $config_host_mak
fi
if test "$fallocate_punch_hole" = "yes" ; then
echo "CONFIG_FALLOCATE_PUNCH_HOLE=y" >> $config_host_mak
fi
+if test "$fallocate_zero_range" = "yes" ; then
+ echo "CONFIG_FALLOCATE_ZERO_RANGE=y" >> $config_host_mak
+fi
if test "$posix_fallocate" = "yes" ; then
echo "CONFIG_POSIX_FALLOCATE=y" >> $config_host_mak
fi
@@ -4584,9 +5150,6 @@
if test "$epoll_create1" = "yes" ; then
echo "CONFIG_EPOLL_CREATE1=y" >> $config_host_mak
fi
-if test "$epoll_pwait" = "yes" ; then
- echo "CONFIG_EPOLL_PWAIT=y" >> $config_host_mak
-fi
if test "$sendfile" = "yes" ; then
echo "CONFIG_SENDFILE=y" >> $config_host_mak
fi
@@ -4620,7 +5183,7 @@
echo "CONFIG_BLUEZ=y" >> $config_host_mak
echo "BLUEZ_CFLAGS=$bluez_cflags" >> $config_host_mak
fi
-if test "glib_subprocess" = "yes" ; then
+if test "$glib_subprocess" = "yes" ; then
echo "CONFIG_HAS_GLIB_SUBPROCESS_TESTS=y" >> $config_host_mak
fi
echo "GLIB_CFLAGS=$glib_cflags" >> $config_host_mak
@@ -4628,14 +5191,60 @@
echo "CONFIG_GTK=y" >> $config_host_mak
echo "CONFIG_GTKABI=$gtkabi" >> $config_host_mak
echo "GTK_CFLAGS=$gtk_cflags" >> $config_host_mak
+ echo "GTK_LIBS=$gtk_libs" >> $config_host_mak
+ if test "$gtk_gl" = "yes" ; then
+ echo "CONFIG_GTK_GL=y" >> $config_host_mak
+ fi
+fi
+echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak
+if test "$gnutls" = "yes" ; then
+ echo "CONFIG_GNUTLS=y" >> $config_host_mak
+fi
+if test "$gnutls_rnd" = "yes" ; then
+ echo "CONFIG_GNUTLS_RND=y" >> $config_host_mak
+fi
+if test "$gcrypt" = "yes" ; then
+ echo "CONFIG_GCRYPT=y" >> $config_host_mak
+ if test "$gcrypt_kdf" = "yes" ; then
+ echo "CONFIG_GCRYPT_KDF=y" >> $config_host_mak
+ fi
+fi
+if test "$nettle" = "yes" ; then
+ echo "CONFIG_NETTLE=y" >> $config_host_mak
+ echo "CONFIG_NETTLE_VERSION_MAJOR=${nettle_version%%.*}" >> $config_host_mak
+ if test "$nettle_kdf" = "yes" ; then
+ echo "CONFIG_NETTLE_KDF=y" >> $config_host_mak
+ fi
+fi
+if test "$tasn1" = "yes" ; then
+ echo "CONFIG_TASN1=y" >> $config_host_mak
+fi
+if test "$have_ifaddrs_h" = "yes" ; then
+ echo "HAVE_IFADDRS_H=y" >> $config_host_mak
+fi
+
+# Work around a system header bug with some kernel/XFS header
+# versions where they both try to define 'struct fsxattr':
+# xfs headers will not try to redefine structs from linux headers
+# if this macro is set.
+if test "$have_fsxattr" = "yes" ; then
+ echo "HAVE_FSXATTR=y" >> $config_host_mak
fi
if test "$vte" = "yes" ; then
echo "CONFIG_VTE=y" >> $config_host_mak
echo "VTE_CFLAGS=$vte_cflags" >> $config_host_mak
fi
+if test "$virglrenderer" = "yes" ; then
+ echo "CONFIG_VIRGL=y" >> $config_host_mak
+ echo "VIRGL_CFLAGS=$virgl_cflags" >> $config_host_mak
+ echo "VIRGL_LIBS=$virgl_libs" >> $config_host_mak
+fi
if test "$xen" = "yes" ; then
echo "CONFIG_XEN_BACKEND=y" >> $config_host_mak
echo "CONFIG_XEN_CTRL_INTERFACE_VERSION=$xen_ctrl_version" >> $config_host_mak
+ if test "$xen_pv_domain_build" = "yes" ; then
+ echo "CONFIG_XEN_PV_DOMAIN_BUILD=y" >> $config_host_mak
+ fi
fi
if test "$linux_aio" = "yes" ; then
echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
@@ -4682,18 +5291,13 @@
if test "$posix_madvise" = "yes" ; then
echo "CONFIG_POSIX_MADVISE=y" >> $config_host_mak
fi
-if test "$sigev_thread_id" = "yes" ; then
- echo "CONFIG_SIGEV_THREAD_ID=y" >> $config_host_mak
-fi
if test "$spice" = "yes" ; then
echo "CONFIG_SPICE=y" >> $config_host_mak
fi
-if test "$smartcard_nss" = "yes" ; then
- echo "CONFIG_SMARTCARD_NSS=y" >> $config_host_mak
- echo "NSS_LIBS=$nss_libs" >> $config_host_mak
- echo "NSS_CFLAGS=$nss_cflags" >> $config_host_mak
+if test "$smartcard" = "yes" ; then
+ echo "CONFIG_SMARTCARD=y" >> $config_host_mak
fi
if test "$libusb" = "yes" ; then
@@ -4704,9 +5308,17 @@
echo "CONFIG_USB_REDIR=y" >> $config_host_mak
fi
-if test "$glx" = "yes" ; then
- echo "CONFIG_GLX=y" >> $config_host_mak
- echo "GLX_LIBS=$glx_libs" >> $config_host_mak
+if test "$opengl" = "yes" ; then
+ echo "CONFIG_OPENGL=y" >> $config_host_mak
+ echo "OPENGL_CFLAGS=$opengl_cflags" >> $config_host_mak
+ echo "OPENGL_LIBS=$opengl_libs" >> $config_host_mak
+ if test "$opengl_dmabuf" = "yes" ; then
+ echo "CONFIG_OPENGL_DMABUF=y" >> $config_host_mak
+ fi
+fi
+
+if test "$avx2_opt" = "yes" ; then
+ echo "CONFIG_AVX2_OPT=y" >> $config_host_mak
fi
if test "$lzo" = "yes" ; then
@@ -4717,6 +5329,11 @@
echo "CONFIG_SNAPPY=y" >> $config_host_mak
fi
+if test "$bzip2" = "yes" ; then
+ echo "CONFIG_BZIP2=y" >> $config_host_mak
+ echo "BZIP2_LIBS=-lbz2" >> $config_host_mak
+fi
+
if test "$libiscsi" = "yes" ; then
echo "CONFIG_LIBISCSI=m" >> $config_host_mak
echo "LIBISCSI_CFLAGS=$libiscsi_cflags" >> $config_host_mak
@@ -4736,8 +5353,8 @@
echo "CONFIG_BSD=y" >> $config_host_mak
fi
-if test "$zero_malloc" = "yes" ; then
- echo "CONFIG_ZERO_MALLOC=y" >> $config_host_mak
+if test "$localtime_r" = "yes" ; then
+ echo "CONFIG_LOCALTIME_R=y" >> $config_host_mak
fi
if test "$qom_cast_debug" = "yes" ; then
echo "CONFIG_QOM_CAST_DEBUG=y" >> $config_host_mak
@@ -4793,6 +5410,10 @@
echo "GLUSTERFS_LIBS=$glusterfs_libs" >> $config_host_mak
fi
+if test "$glusterfs_xlator_opt" = "yes" ; then
+ echo "CONFIG_GLUSTERFS_XLATOR_OPT=y" >> $config_host_mak
+fi
+
if test "$glusterfs_discard" = "yes" ; then
echo "CONFIG_GLUSTERFS_DISCARD=y" >> $config_host_mak
fi
@@ -4812,10 +5433,6 @@
echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
fi
-if test "$quorum" = "yes" ; then
- echo "CONFIG_QUORUM=y" >> $config_host_mak
-fi
-
if test "$vhdx" = "yes" ; then
echo "CONFIG_VHDX=y" >> $config_host_mak
fi
@@ -4844,8 +5461,8 @@
# Set the appropriate trace file.
trace_file="\"$trace_file-\" FMT_pid"
fi
-if have_backend "stderr"; then
- echo "CONFIG_TRACE_STDERR=y" >> $config_host_mak
+if have_backend "log"; then
+ echo "CONFIG_TRACE_LOG=y" >> $config_host_mak
fi
if have_backend "ust"; then
echo "CONFIG_TRACE_UST=y" >> $config_host_mak
@@ -4869,6 +5486,10 @@
echo "CONFIG_RDMA=y" >> $config_host_mak
fi
+if test "$have_rtnetlink" = "yes" ; then
+ echo "CONFIG_RTNETLINK=y" >> $config_host_mak
+fi
+
# Hold two types of flag:
# CONFIG_THREAD_SETNAME_BYTHREAD - we've got a way of setting the name on
# a thread we have a handle to
@@ -4900,13 +5521,8 @@
echo "INSTALL=$install" >> $config_host_mak
echo "INSTALL_DIR=$install -d -m 0755" >> $config_host_mak
echo "INSTALL_DATA=$install -c -m 0644" >> $config_host_mak
-if test -n "$libtool"; then
- echo "INSTALL_PROG=\$(LIBTOOL) --mode=install $install -c -m 0755" >> $config_host_mak
- echo "INSTALL_LIB=\$(LIBTOOL) --mode=install $install -c -m 0644" >> $config_host_mak
-else
- echo "INSTALL_PROG=$install -c -m 0755" >> $config_host_mak
- echo "INSTALL_LIB=$install -c -m 0644" >> $config_host_mak
-fi
+echo "INSTALL_PROG=$install -c -m 0755" >> $config_host_mak
+echo "INSTALL_LIB=$install -c -m 0644" >> $config_host_mak
echo "PYTHON=$python" >> $config_host_mak
echo "CC=$cc" >> $config_host_mak
if $iasl -h > /dev/null 2>&1; then
@@ -4919,18 +5535,19 @@
echo "AR=$ar" >> $config_host_mak
echo "ARFLAGS=$ARFLAGS" >> $config_host_mak
echo "AS=$as" >> $config_host_mak
+echo "CCAS=$ccas" >> $config_host_mak
echo "CPP=$cpp" >> $config_host_mak
echo "OBJCOPY=$objcopy" >> $config_host_mak
echo "LD=$ld" >> $config_host_mak
echo "NM=$nm" >> $config_host_mak
echo "WINDRES=$windres" >> $config_host_mak
-echo "LIBTOOL=$libtool" >> $config_host_mak
echo "CFLAGS=$CFLAGS" >> $config_host_mak
echo "CFLAGS_NOPIE=$CFLAGS_NOPIE" >> $config_host_mak
echo "QEMU_CFLAGS=$QEMU_CFLAGS" >> $config_host_mak
echo "QEMU_INCLUDES=$QEMU_INCLUDES" >> $config_host_mak
if test "$sparse" = "yes" ; then
echo "CC := REAL_CC=\"\$(CC)\" cgcc" >> $config_host_mak
+ echo "CPP := REAL_CC=\"\$(CPP)\" cgcc" >> $config_host_mak
echo "CXX := REAL_CC=\"\$(CXX)\" cgcc" >> $config_host_mak
echo "HOST_CC := REAL_CC=\"\$(HOST_CC)\" cgcc" >> $config_host_mak
echo "QEMU_CFLAGS += -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-non-pointer-null" >> $config_host_mak
@@ -4942,13 +5559,17 @@
fi
echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
echo "LDFLAGS_NOPIE=$LDFLAGS_NOPIE" >> $config_host_mak
-echo "LIBTOOLFLAGS=$LIBTOOLFLAGS" >> $config_host_mak
+echo "LD_REL_FLAGS=$LD_REL_FLAGS" >> $config_host_mak
+echo "LD_I386_EMULATION=$ld_i386_emulation" >> $config_host_mak
echo "LIBS+=$LIBS" >> $config_host_mak
echo "LIBS_TOOLS+=$libs_tools" >> $config_host_mak
+echo "PTHREAD_LIB=$PTHREAD_LIB" >> $config_host_mak
echo "EXESUF=$EXESUF" >> $config_host_mak
echo "DSOSUF=$DSOSUF" >> $config_host_mak
echo "LDFLAGS_SHARED=$LDFLAGS_SHARED" >> $config_host_mak
echo "LIBS_QGA+=$libs_qga" >> $config_host_mak
+echo "TASN1_LIBS=$tasn1_libs" >> $config_host_mak
+echo "TASN1_CFLAGS=$tasn1_cflags" >> $config_host_mak
echo "POD2MAN=$POD2MAN" >> $config_host_mak
echo "TRANSLATE_OPT_CFLAGS=$TRANSLATE_OPT_CFLAGS" >> $config_host_mak
if test "$gcov" = "yes" ; then
@@ -4989,7 +5610,7 @@
for target in $target_list; do
target_dir="$target"
config_target_mak=$target_dir/config-target.mak
-target_name=`echo $target | cut -d '-' -f 1`
+target_name=$(echo $target | cut -d '-' -f 1)
target_bigendian="no"
case "$target_name" in
@@ -5029,7 +5650,7 @@
echo "# Automatically generated by configure - do not modify" > $config_target_mak
bflt="no"
-interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_name/g"`
+interp_prefix1=$(echo "$interp_prefix" | sed "s/%M/$target_name/g")
gdb_xml_files=""
TARGET_ARCH="$target_name"
@@ -5081,8 +5702,6 @@
TARGET_BASE_ARCH=mips
echo "TARGET_ABI_MIPSN64=y" >> $config_target_mak
;;
- tricore)
- ;;
moxie)
;;
or32)
@@ -5100,20 +5719,20 @@
ppc64)
TARGET_BASE_ARCH=ppc
TARGET_ABI_DIR=ppc
- gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
+ gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
;;
ppc64le)
TARGET_ARCH=ppc64
TARGET_BASE_ARCH=ppc
TARGET_ABI_DIR=ppc
- gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
+ gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
;;
ppc64abi32)
TARGET_ARCH=ppc64
TARGET_BASE_ARCH=ppc
TARGET_ABI_DIR=ppc
echo "TARGET_ABI32=y" >> $config_target_mak
- gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
+ gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml power-vsx.xml"
;;
sh4|sh4eb)
TARGET_ARCH=sh4
@@ -5131,7 +5750,11 @@
echo "TARGET_ABI32=y" >> $config_target_mak
;;
s390x)
- gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml"
+ gdb_xml_files="s390x-core64.xml s390-acr.xml s390-fpr.xml s390-vx.xml s390-cr.xml s390-virt.xml"
+ ;;
+ tilegx)
+ ;;
+ tricore)
;;
unicore32)
;;
@@ -5153,7 +5776,7 @@
echo "$@"| LC_ALL=C tr '[a-z]' '[A-Z]'
}
-target_arch_name="`upper $TARGET_ARCH`"
+target_arch_name="$(upper $TARGET_ARCH)"
echo "TARGET_$target_arch_name=y" >> $config_target_mak
echo "TARGET_NAME=$target_name" >> $config_target_mak
echo "TARGET_BASE_ARCH=$TARGET_BASE_ARCH" >> $config_target_mak
@@ -5186,10 +5809,13 @@
\( "$target_name" = "ppcemb" -a "$cpu" = "ppc64" \) -o \
\( "$target_name" = "mipsel" -a "$cpu" = "mips" \) -o \
\( "$target_name" = "x86_64" -a "$cpu" = "i386" \) -o \
- \( "$target_name" = "i386" -a "$cpu" = "x86_64" \) \) ; then
+ \( "$target_name" = "i386" -a "$cpu" = "x86_64" \) -o \
+ \( "$target_name" = "x86_64" -a "$cpu" = "x32" \) -o \
+ \( "$target_name" = "i386" -a "$cpu" = "x32" \) \) ; then
echo "CONFIG_KVM=y" >> $config_target_mak
if test "$vhost_net" = "yes" ; then
echo "CONFIG_VHOST_NET=y" >> $config_target_mak
+ echo "CONFIG_VHOST_NET_TEST_$target_name=y" >> $config_host_mak
fi
fi
esac
@@ -5226,9 +5852,6 @@
if test "$target_user_only" = "yes" -a "$bflt" = "yes"; then
echo "TARGET_HAS_BFLT=y" >> $config_target_mak
fi
-if test "$target_user_only" = "yes" -a "$guest_base" = "yes"; then
- echo "CONFIG_USE_GUEST_BASE=y" >> $config_target_mak
-fi
if test "$target_bsd_user" = "yes" ; then
echo "CONFIG_BSD_USER=y" >> $config_target_mak
fi
@@ -5238,91 +5861,76 @@
cflags=""
ldflags=""
+disas_config() {
+ echo "CONFIG_${1}_DIS=y" >> $config_target_mak
+ echo "CONFIG_${1}_DIS=y" >> config-all-disas.mak
+}
+
for i in $ARCH $TARGET_BASE_ARCH ; do
case "$i" in
alpha)
- echo "CONFIG_ALPHA_DIS=y" >> $config_target_mak
- echo "CONFIG_ALPHA_DIS=y" >> config-all-disas.mak
+ disas_config "ALPHA"
;;
aarch64)
if test -n "${cxx}"; then
- echo "CONFIG_ARM_A64_DIS=y" >> $config_target_mak
- echo "CONFIG_ARM_A64_DIS=y" >> config-all-disas.mak
+ disas_config "ARM_A64"
fi
;;
arm)
- echo "CONFIG_ARM_DIS=y" >> $config_target_mak
- echo "CONFIG_ARM_DIS=y" >> config-all-disas.mak
+ disas_config "ARM"
if test -n "${cxx}"; then
- echo "CONFIG_ARM_A64_DIS=y" >> $config_target_mak
- echo "CONFIG_ARM_A64_DIS=y" >> config-all-disas.mak
+ disas_config "ARM_A64"
fi
;;
cris)
- echo "CONFIG_CRIS_DIS=y" >> $config_target_mak
- echo "CONFIG_CRIS_DIS=y" >> config-all-disas.mak
+ disas_config "CRIS"
;;
hppa)
- echo "CONFIG_HPPA_DIS=y" >> $config_target_mak
- echo "CONFIG_HPPA_DIS=y" >> config-all-disas.mak
+ disas_config "HPPA"
;;
i386|x86_64|x32)
- echo "CONFIG_I386_DIS=y" >> $config_target_mak
- echo "CONFIG_I386_DIS=y" >> config-all-disas.mak
+ disas_config "I386"
;;
ia64*)
- echo "CONFIG_IA64_DIS=y" >> $config_target_mak
- echo "CONFIG_IA64_DIS=y" >> config-all-disas.mak
+ disas_config "IA64"
;;
lm32)
- echo "CONFIG_LM32_DIS=y" >> $config_target_mak
- echo "CONFIG_LM32_DIS=y" >> config-all-disas.mak
+ disas_config "LM32"
;;
m68k)
- echo "CONFIG_M68K_DIS=y" >> $config_target_mak
- echo "CONFIG_M68K_DIS=y" >> config-all-disas.mak
+ disas_config "M68K"
;;
microblaze*)
- echo "CONFIG_MICROBLAZE_DIS=y" >> $config_target_mak
- echo "CONFIG_MICROBLAZE_DIS=y" >> config-all-disas.mak
+ disas_config "MICROBLAZE"
;;
mips*)
- echo "CONFIG_MIPS_DIS=y" >> $config_target_mak
- echo "CONFIG_MIPS_DIS=y" >> config-all-disas.mak
+ disas_config "MIPS"
;;
moxie*)
- echo "CONFIG_MOXIE_DIS=y" >> $config_target_mak
- echo "CONFIG_MOXIE_DIS=y" >> config-all-disas.mak
+ disas_config "MOXIE"
;;
or32)
- echo "CONFIG_OPENRISC_DIS=y" >> $config_target_mak
- echo "CONFIG_OPENRISC_DIS=y" >> config-all-disas.mak
+ disas_config "OPENRISC"
;;
ppc*)
- echo "CONFIG_PPC_DIS=y" >> $config_target_mak
- echo "CONFIG_PPC_DIS=y" >> config-all-disas.mak
+ disas_config "PPC"
;;
s390*)
- echo "CONFIG_S390_DIS=y" >> $config_target_mak
- echo "CONFIG_S390_DIS=y" >> config-all-disas.mak
+ disas_config "S390"
;;
sh4)
- echo "CONFIG_SH4_DIS=y" >> $config_target_mak
- echo "CONFIG_SH4_DIS=y" >> config-all-disas.mak
+ disas_config "SH4"
;;
sparc*)
- echo "CONFIG_SPARC_DIS=y" >> $config_target_mak
- echo "CONFIG_SPARC_DIS=y" >> config-all-disas.mak
+ disas_config "SPARC"
;;
xtensa*)
- echo "CONFIG_XTENSA_DIS=y" >> $config_target_mak
- echo "CONFIG_XTENSA_DIS=y" >> config-all-disas.mak
+ disas_config "XTENSA"
;;
esac
done
if test "$tcg_interpreter" = "yes" ; then
- echo "CONFIG_TCI_DIS=y" >> $config_target_mak
- echo "CONFIG_TCI_DIS=y" >> config-all-disas.mak
+ disas_config "TCI"
fi
case "$ARCH" in
@@ -5357,10 +5965,6 @@
echo "config-host.h: subdir-pixman" >> $config_host_mak
fi
-if test "$rdma" = "yes" ; then
-echo "CONFIG_RDMA=y" >> $config_host_mak
-fi
-
if [ "$dtc_internal" = "yes" ]; then
echo "config-host.h: subdir-dtc" >> $config_host_mak
fi
@@ -5369,8 +5973,8 @@
echo "CONFIG_NUMA=y" >> $config_host_mak
fi
-if test "$android" = "yes"; then
- echo "CONFIG_ANDROID=y" >> $config_host_mak
+if test "$ccache_cpp2" = "yes"; then
+ echo "export CCACHE_CPP2=y" >> $config_host_mak
fi
# build tree in object directory in case the source is not in the current directory
@@ -5397,11 +6001,11 @@
$source_path/pc-bios/u-boot.* \
$source_path/pc-bios/palcode-*
do
- FILES="$FILES pc-bios/`basename $bios_file`"
+ FILES="$FILES pc-bios/$(basename $bios_file)"
done
-for test_file in `find $source_path/tests/acpi-test-data -type f`
+for test_file in $(find $source_path/tests/acpi-test-data -type f)
do
- FILES="$FILES tests/acpi-test-data`echo $test_file | sed -e 's/.*acpi-test-data//'`"
+ FILES="$FILES tests/acpi-test-data$(echo $test_file | sed -e 's/.*acpi-test-data//')"
done
mkdir -p $DIRS
for f in $FILES ; do
@@ -5416,6 +6020,7 @@
echo "# Automatically generated by configure - do not modify" > $config_mak
echo "SRC_PATH=$source_path/roms/$rom" >> $config_mak
echo "AS=$as" >> $config_mak
+ echo "CCAS=$ccas" >> $config_mak
echo "CC=$cc" >> $config_mak
echo "BCC=bcc" >> $config_mak
echo "CPP=$cpp" >> $config_mak
@@ -5424,6 +6029,11 @@
echo "LD=$ld" >> $config_mak
done
+# set up tests data directory
+if [ ! -e tests/data ]; then
+ symlink "$source_path/tests/data" tests/data
+fi
+
# set up qemu-iotests in this build directory
iotests_common_env="tests/qemu-iotests/common.env"
iotests_check="tests/qemu-iotests/check"
@@ -5446,7 +6056,7 @@
EOD
printf "exec" >>config.status
printf " '%s'" "$0" "$@" >>config.status
-echo >>config.status
+echo ' "$@"' >>config.status
chmod +x config.status
rm -r "$TMPDIR1"
diff --git a/contrib/ivshmem-client/Makefile.objs b/contrib/ivshmem-client/Makefile.objs
new file mode 100644
index 0000000..bfab2d2
--- /dev/null
+++ b/contrib/ivshmem-client/Makefile.objs
@@ -0,0 +1 @@
+ivshmem-client-obj-y = ivshmem-client.o main.o
diff --git a/contrib/ivshmem-client/ivshmem-client.c b/contrib/ivshmem-client/ivshmem-client.c
new file mode 100644
index 0000000..44ae364
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "qemu-common.h"
+#include "qemu/queue.h"
+
+#include "ivshmem-client.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_CLIENT_DEBUG(client, fmt, ...) do { \
+ if ((client)->verbose) { \
+ printf(fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
+
+/* read message from the unix socket */
+static int
+ivshmem_client_read_one_msg(IvshmemClient *client, int64_t *index, int *fd)
+{
+ int ret;
+ struct msghdr msg;
+ struct iovec iov[1];
+ union {
+ struct cmsghdr cmsg;
+ char control[CMSG_SPACE(sizeof(int))];
+ } msg_control;
+ struct cmsghdr *cmsg;
+
+ iov[0].iov_base = index;
+ iov[0].iov_len = sizeof(*index);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &msg_control;
+ msg.msg_controllen = sizeof(msg_control);
+
+ ret = recvmsg(client->sock_fd, &msg, 0);
+ if (ret < sizeof(*index)) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read message: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ if (ret == 0) {
+ IVSHMEM_CLIENT_DEBUG(client, "lost connection to server\n");
+ return -1;
+ }
+
+ *index = GINT64_FROM_LE(*index);
+ *fd = -1;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)) ||
+ cmsg->cmsg_level != SOL_SOCKET ||
+ cmsg->cmsg_type != SCM_RIGHTS) {
+ continue;
+ }
+
+ memcpy(fd, CMSG_DATA(cmsg), sizeof(*fd));
+ }
+
+ return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * client is freed */
+static void
+ivshmem_client_free_peer(IvshmemClient *client, IvshmemClientPeer *peer)
+{
+ unsigned vector;
+
+ QTAILQ_REMOVE(&client->peer_list, peer, next);
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ close(peer->vectors[vector]);
+ }
+
+ g_free(peer);
+}
+
+/* handle message coming from server (new peer, new vectors) */
+static int
+ivshmem_client_handle_server_msg(IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ int64_t peer_id;
+ int ret, fd;
+
+ ret = ivshmem_client_read_one_msg(client, &peer_id, &fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* can return a peer or the local client */
+ peer = ivshmem_client_search_peer(client, peer_id);
+
+ /* delete peer */
+ if (fd == -1) {
+
+ if (peer == NULL || peer == &client->local) {
+ IVSHMEM_CLIENT_DEBUG(client, "receive delete for invalid "
+ "peer %" PRId64 "\n", peer_id);
+ return -1;
+ }
+
+ IVSHMEM_CLIENT_DEBUG(client, "delete peer id = %" PRId64 "\n", peer_id);
+ ivshmem_client_free_peer(client, peer);
+ return 0;
+ }
+
+ /* new peer */
+ if (peer == NULL) {
+ peer = g_malloc0(sizeof(*peer));
+ peer->id = peer_id;
+ peer->vectors_count = 0;
+ QTAILQ_INSERT_TAIL(&client->peer_list, peer, next);
+ IVSHMEM_CLIENT_DEBUG(client, "new peer id = %" PRId64 "\n", peer_id);
+ }
+
+ /* new vector */
+ IVSHMEM_CLIENT_DEBUG(client, " new vector %d (fd=%d) for peer id %"
+ PRId64 "\n", peer->vectors_count, fd, peer->id);
+ if (peer->vectors_count >= G_N_ELEMENTS(peer->vectors)) {
+ IVSHMEM_CLIENT_DEBUG(client, "Too many vectors received, failing");
+ return -1;
+ }
+
+ peer->vectors[peer->vectors_count] = fd;
+ peer->vectors_count++;
+
+ return 0;
+}
+
+/* init a new ivshmem client */
+int
+ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+ IvshmemClientNotifCb notif_cb, void *notif_arg,
+ bool verbose)
+{
+ int ret;
+ unsigned i;
+
+ memset(client, 0, sizeof(*client));
+
+ ret = snprintf(client->unix_sock_path, sizeof(client->unix_sock_path),
+ "%s", unix_sock_path);
+
+ if (ret < 0 || ret >= sizeof(client->unix_sock_path)) {
+ IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+ return -1;
+ }
+
+ for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+ client->local.vectors[i] = -1;
+ }
+
+ QTAILQ_INIT(&client->peer_list);
+ client->local.id = -1;
+
+ client->notif_cb = notif_cb;
+ client->notif_arg = notif_arg;
+ client->verbose = verbose;
+ client->shm_fd = -1;
+ client->sock_fd = -1;
+
+ return 0;
+}
+
+/* create and connect to the unix socket */
+int
+ivshmem_client_connect(IvshmemClient *client)
+{
+ struct sockaddr_un sun;
+ int fd, ret;
+ int64_t tmp;
+
+ IVSHMEM_CLIENT_DEBUG(client, "connect to client %s\n",
+ client->unix_sock_path);
+
+ client->sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (client->sock_fd < 0) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot create socket: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ sun.sun_family = AF_UNIX;
+ ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+ client->unix_sock_path);
+ if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+ IVSHMEM_CLIENT_DEBUG(client, "could not copy unix socket path\n");
+ goto err_close;
+ }
+
+ if (connect(client->sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot connect to %s: %s\n", sun.sun_path,
+ strerror(errno));
+ goto err_close;
+ }
+
+ /* first, we expect a protocol version */
+ if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+ (tmp != IVSHMEM_PROTOCOL_VERSION) || fd != -1) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read from server\n");
+ goto err_close;
+ }
+
+ /* then, we expect our index + a fd == -1 */
+ if (ivshmem_client_read_one_msg(client, &client->local.id, &fd) < 0 ||
+ client->local.id < 0 || fd != -1) {
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (2)\n");
+ goto err_close;
+ }
+ IVSHMEM_CLIENT_DEBUG(client, "our_id=%" PRId64 "\n", client->local.id);
+
+ /* now, we expect shared mem fd + a -1 index, note that shm fd
+ * is not used */
+ if (ivshmem_client_read_one_msg(client, &tmp, &fd) < 0 ||
+ tmp != -1 || fd < 0) {
+ if (fd >= 0) {
+ close(fd);
+ }
+ IVSHMEM_CLIENT_DEBUG(client, "cannot read from server (3)\n");
+ goto err_close;
+ }
+ client->shm_fd = fd;
+ IVSHMEM_CLIENT_DEBUG(client, "shm_fd=%d\n", fd);
+
+ return 0;
+
+err_close:
+ close(client->sock_fd);
+ client->sock_fd = -1;
+ return -1;
+}
+
+/* close connection to the server, and free all peer structures */
+void
+ivshmem_client_close(IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ unsigned i;
+
+ IVSHMEM_CLIENT_DEBUG(client, "close client\n");
+
+ while ((peer = QTAILQ_FIRST(&client->peer_list)) != NULL) {
+ ivshmem_client_free_peer(client, peer);
+ }
+
+ close(client->shm_fd);
+ client->shm_fd = -1;
+ close(client->sock_fd);
+ client->sock_fd = -1;
+ client->local.id = -1;
+ for (i = 0; i < IVSHMEM_CLIENT_MAX_VECTORS; i++) {
+ close(client->local.vectors[i]);
+ client->local.vectors[i] = -1;
+ }
+ client->local.vectors_count = 0;
+}
+
+/* get the fd_set according to the unix socket and peer list */
+void
+ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds, int *maxfd)
+{
+ int fd;
+ unsigned vector;
+
+ FD_SET(client->sock_fd, fds);
+ if (client->sock_fd >= *maxfd) {
+ *maxfd = client->sock_fd + 1;
+ }
+
+ for (vector = 0; vector < client->local.vectors_count; vector++) {
+ fd = client->local.vectors[vector];
+ FD_SET(fd, fds);
+ if (fd >= *maxfd) {
+ *maxfd = fd + 1;
+ }
+ }
+}
+
+/* handle events from eventfd: just print a message on notification */
+static int
+ivshmem_client_handle_event(IvshmemClient *client, const fd_set *cur, int maxfd)
+{
+ IvshmemClientPeer *peer;
+ uint64_t kick;
+ unsigned i;
+ int ret;
+
+ peer = &client->local;
+
+ for (i = 0; i < peer->vectors_count; i++) {
+ if (peer->vectors[i] >= maxfd || !FD_ISSET(peer->vectors[i], cur)) {
+ continue;
+ }
+
+ ret = read(peer->vectors[i], &kick, sizeof(kick));
+ if (ret < 0) {
+ return ret;
+ }
+ if (ret != sizeof(kick)) {
+ IVSHMEM_CLIENT_DEBUG(client, "invalid read size = %d\n", ret);
+ errno = EINVAL;
+ return -1;
+ }
+ IVSHMEM_CLIENT_DEBUG(client, "received event on fd %d vector %d: %"
+ PRIu64 "\n", peer->vectors[i], i, kick);
+ if (client->notif_cb != NULL) {
+ client->notif_cb(client, peer, i, client->notif_arg);
+ }
+ }
+
+ return 0;
+}
+
+/* read and handle new messages on the given fd_set */
+int
+ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd)
+{
+ if (client->sock_fd < maxfd && FD_ISSET(client->sock_fd, fds) &&
+ ivshmem_client_handle_server_msg(client) < 0 && errno != EINTR) {
+ IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_server_msg() "
+ "failed\n");
+ return -1;
+ } else if (ivshmem_client_handle_event(client, fds, maxfd) < 0 &&
+ errno != EINTR) {
+ IVSHMEM_CLIENT_DEBUG(client, "ivshmem_client_handle_event() failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/* send a notification on a vector of a peer */
+int
+ivshmem_client_notify(const IvshmemClient *client,
+ const IvshmemClientPeer *peer, unsigned vector)
+{
+ uint64_t kick;
+ int fd;
+
+ if (vector >= peer->vectors_count) {
+ IVSHMEM_CLIENT_DEBUG(client, "invalid vector %u on peer %" PRId64 "\n",
+ vector, peer->id);
+ return -1;
+ }
+ fd = peer->vectors[vector];
+ IVSHMEM_CLIENT_DEBUG(client, "notify peer %" PRId64
+ " on vector %d, fd %d\n", peer->id, vector, fd);
+
+ kick = 1;
+ if (write(fd, &kick, sizeof(kick)) != sizeof(kick)) {
+ fprintf(stderr, "could not write to %d: %s\n", peer->vectors[vector],
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+/* send a notification to all vectors of a peer */
+int
+ivshmem_client_notify_all_vects(const IvshmemClient *client,
+ const IvshmemClientPeer *peer)
+{
+ unsigned vector;
+ int ret = 0;
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ if (ivshmem_client_notify(client, peer, vector) < 0) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+/* send a notification to all peers */
+int
+ivshmem_client_notify_broadcast(const IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ int ret = 0;
+
+ QTAILQ_FOREACH(peer, &client->peer_list, next) {
+ if (ivshmem_client_notify_all_vects(client, peer) < 0) {
+ ret = -1;
+ }
+ }
+
+ return ret;
+}
+
+/* lookup peer from its id */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id)
+{
+ IvshmemClientPeer *peer;
+
+ if (peer_id == client->local.id) {
+ return &client->local;
+ }
+
+ QTAILQ_FOREACH(peer, &client->peer_list, next) {
+ if (peer->id == peer_id) {
+ return peer;
+ }
+ }
+ return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_client_dump(const IvshmemClient *client)
+{
+ const IvshmemClientPeer *peer;
+ unsigned vector;
+
+ /* dump local infos */
+ peer = &client->local;
+ printf("our_id = %" PRId64 "\n", peer->id);
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ printf(" vector %d is enabled (fd=%d)\n", vector,
+ peer->vectors[vector]);
+ }
+
+ /* dump peers */
+ QTAILQ_FOREACH(peer, &client->peer_list, next) {
+ printf("peer_id = %" PRId64 "\n", peer->id);
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ printf(" vector %d is enabled (fd=%d)\n", vector,
+ peer->vectors[vector]);
+ }
+ }
+}
diff --git a/contrib/ivshmem-client/ivshmem-client.h b/contrib/ivshmem-client/ivshmem-client.h
new file mode 100644
index 0000000..5ee9422
--- /dev/null
+++ b/contrib/ivshmem-client/ivshmem-client.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef IVSHMEM_CLIENT_H
+#define IVSHMEM_CLIENT_H
+
+/**
+ * This file provides helper to implement an ivshmem client. It is used
+ * on the host to ask QEMU to send an interrupt to an ivshmem PCI device in a
+ * guest. QEMU also implements an ivshmem client similar to this one, they both
+ * connect to an ivshmem server.
+ *
+ * A standalone ivshmem client based on this file is provided for debug/test
+ * purposes.
+ */
+
+#include <sys/select.h>
+
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the client
+ */
+#define IVSHMEM_CLIENT_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, it is advertised to
+ * all connected clients through the unix socket. When our ivshmem
+ * client receives a notification, it creates a IvshmemClientPeer
+ * structure to store the infos of this peer.
+ *
+ * This structure is also used to store the information of our own
+ * client in (IvshmemClient)->local.
+ */
+typedef struct IvshmemClientPeer {
+ QTAILQ_ENTRY(IvshmemClientPeer) next; /**< next in list*/
+ int64_t id; /**< the id of the peer */
+ int vectors[IVSHMEM_CLIENT_MAX_VECTORS]; /**< one fd per vector */
+ unsigned vectors_count; /**< number of vectors */
+} IvshmemClientPeer;
+QTAILQ_HEAD(IvshmemClientPeerList, IvshmemClientPeer);
+
+typedef struct IvshmemClientPeerList IvshmemClientPeerList;
+typedef struct IvshmemClient IvshmemClient;
+
+/**
+ * Typedef of callback function used when our IvshmemClient receives a
+ * notification from a peer.
+ */
+typedef void (*IvshmemClientNotifCb)(
+ const IvshmemClient *client,
+ const IvshmemClientPeer *peer,
+ unsigned vect, void *arg);
+
+/**
+ * Structure describing an ivshmem client
+ *
+ * This structure stores all information related to our client: the name
+ * of the server unix socket, the list of peers advertised by the
+ * server, our own client information, and a pointer the notification
+ * callback function used when we receive a notification from a peer.
+ */
+struct IvshmemClient {
+ char unix_sock_path[PATH_MAX]; /**< path to unix sock */
+ int sock_fd; /**< unix sock filedesc */
+ int shm_fd; /**< shm file descriptor */
+
+ IvshmemClientPeerList peer_list; /**< list of peers */
+ IvshmemClientPeer local; /**< our own infos */
+
+ IvshmemClientNotifCb notif_cb; /**< notification callback */
+ void *notif_arg; /**< notification argument */
+
+ bool verbose; /**< true to enable debug */
+};
+
+/**
+ * Initialize an ivshmem client
+ *
+ * @client: A pointer to an uninitialized IvshmemClient structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @notif_cb: If not NULL, the pointer to the function to be called when
+ * our IvshmemClient receives a notification from a peer
+ * @notif_arg: Opaque pointer given as-is to the notification callback
+ * function
+ * @verbose: True to enable debug
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_init(IvshmemClient *client, const char *unix_sock_path,
+ IvshmemClientNotifCb notif_cb, void *notif_arg,
+ bool verbose);
+
+/**
+ * Connect to the server
+ *
+ * Connect to the server unix socket, and read the first initial
+ * messages sent by the server, giving the ID of the client and the file
+ * descriptor of the shared memory.
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_connect(IvshmemClient *client);
+
+/**
+ * Close connection to the server and free all peer structures
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_close(IvshmemClient *client);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors
+ * that must be polled (unix server socket and peers eventfd). The
+ * function will not initialize the fd_set, it is up to the caller
+ * to do this.
+ *
+ * @client: The ivshmem client
+ * @fds: The fd_set to be updated
+ * @maxfd: Must be set to the max file descriptor + 1 in fd_set. This value is
+ * updated if this function adds a greater fd in fd_set.
+ */
+void ivshmem_client_get_fds(const IvshmemClient *client, fd_set *fds,
+ int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set filled by select(), handle incoming messages from
+ * server or peers.
+ *
+ * @client: The ivshmem client
+ * @fds: The fd_set containing the file descriptors to be checked. Note
+ * that file descriptors that are not related to our client are
+ * ignored.
+ * @maxfd: The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_handle_fds(IvshmemClient *client, fd_set *fds, int maxfd);
+
+/**
+ * Send a notification to a vector of a peer
+ *
+ * @client: The ivshmem client
+ * @peer: The peer to be notified
+ * @vector: The number of the vector
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_client_notify(const IvshmemClient *client,
+ const IvshmemClientPeer *peer, unsigned vector);
+
+/**
+ * Send a notification to all vectors of a peer
+ *
+ * @client: The ivshmem client
+ * @peer: The peer to be notified
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ * notification failed)
+ */
+int ivshmem_client_notify_all_vects(const IvshmemClient *client,
+ const IvshmemClientPeer *peer);
+
+/**
+ * Broadcat a notification to all vectors of all peers
+ *
+ * @client: The ivshmem client
+ *
+ * Returns: 0 on success, or a negative value on error (at least one
+ * notification failed)
+ */
+int ivshmem_client_notify_broadcast(const IvshmemClient *client);
+
+/**
+ * Search a peer from its identifier
+ *
+ * Return the peer structure from its peer_id. If the given peer_id is
+ * the local id, the function returns the local peer structure.
+ *
+ * @client: The ivshmem client
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns: The peer structure, or NULL if not found
+ */
+IvshmemClientPeer *
+ivshmem_client_search_peer(IvshmemClient *client, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem client on stdout
+ *
+ * Dump the id and the vectors of the given ivshmem client and the list
+ * of its peers and their vectors on stdout.
+ *
+ * @client: The ivshmem client
+ */
+void ivshmem_client_dump(const IvshmemClient *client);
+
+#endif /* IVSHMEM_CLIENT_H */
diff --git a/contrib/ivshmem-client/main.c b/contrib/ivshmem-client/main.c
new file mode 100644
index 0000000..33ae1da
--- /dev/null
+++ b/contrib/ivshmem-client/main.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "ivshmem-client.h"
+
+#define IVSHMEM_CLIENT_DEFAULT_VERBOSE 0
+#define IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+
+typedef struct IvshmemClientArgs {
+ bool verbose;
+ const char *unix_sock_path;
+} IvshmemClientArgs;
+
+/* show ivshmem_client_usage and exit with given error code */
+static void
+ivshmem_client_usage(const char *name, int code)
+{
+ fprintf(stderr, "%s [opts]\n", name);
+ fprintf(stderr, " -h: show this help\n");
+ fprintf(stderr, " -v: verbose mode\n");
+ fprintf(stderr, " -S <unix_sock_path>: path to the unix socket\n"
+ " to connect to.\n"
+ " default=%s\n", IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH);
+ exit(code);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_client_parse_args(IvshmemClientArgs *args, int argc, char *argv[])
+{
+ int c;
+
+ while ((c = getopt(argc, argv,
+ "h" /* help */
+ "v" /* verbose */
+ "S:" /* unix_sock_path */
+ )) != -1) {
+
+ switch (c) {
+ case 'h': /* help */
+ ivshmem_client_usage(argv[0], 0);
+ break;
+
+ case 'v': /* verbose */
+ args->verbose = 1;
+ break;
+
+ case 'S': /* unix_sock_path */
+ args->unix_sock_path = optarg;
+ break;
+
+ default:
+ ivshmem_client_usage(argv[0], 1);
+ break;
+ }
+ }
+}
+
+/* show command line help */
+static void
+ivshmem_client_cmdline_help(void)
+{
+ printf("dump: dump peers (including us)\n"
+ "int <peer> <vector>: notify one vector on a peer\n"
+ "int <peer> all: notify all vectors of a peer\n"
+ "int all: notify all vectors of all peers (excepting us)\n");
+}
+
+/* read stdin and handle commands */
+static int
+ivshmem_client_handle_stdin_command(IvshmemClient *client)
+{
+ IvshmemClientPeer *peer;
+ char buf[128];
+ char *s, *token;
+ int ret;
+ int peer_id, vector;
+
+ memset(buf, 0, sizeof(buf));
+ ret = read(0, buf, sizeof(buf) - 1);
+ if (ret < 0) {
+ return -1;
+ }
+
+ s = buf;
+ while ((token = strsep(&s, "\n\r;")) != NULL) {
+ if (!strcmp(token, "")) {
+ continue;
+ }
+ if (!strcmp(token, "?")) {
+ ivshmem_client_cmdline_help();
+ }
+ if (!strcmp(token, "help")) {
+ ivshmem_client_cmdline_help();
+ } else if (!strcmp(token, "dump")) {
+ ivshmem_client_dump(client);
+ } else if (!strcmp(token, "int all")) {
+ ivshmem_client_notify_broadcast(client);
+ } else if (sscanf(token, "int %d %d", &peer_id, &vector) == 2) {
+ peer = ivshmem_client_search_peer(client, peer_id);
+ if (peer == NULL) {
+ printf("cannot find peer_id = %d\n", peer_id);
+ continue;
+ }
+ ivshmem_client_notify(client, peer, vector);
+ } else if (sscanf(token, "int %d all", &peer_id) == 1) {
+ peer = ivshmem_client_search_peer(client, peer_id);
+ if (peer == NULL) {
+ printf("cannot find peer_id = %d\n", peer_id);
+ continue;
+ }
+ ivshmem_client_notify_all_vects(client, peer);
+ } else {
+ printf("invalid command, type help\n");
+ }
+ }
+
+ printf("cmd> ");
+ fflush(stdout);
+ return 0;
+}
+
+/* listen on stdin (command line), on unix socket (notifications of new
+ * and dead peers), and on eventfd (IRQ request) */
+static int
+ivshmem_client_poll_events(IvshmemClient *client)
+{
+ fd_set fds;
+ int ret, maxfd;
+
+ while (1) {
+
+ FD_ZERO(&fds);
+ FD_SET(0, &fds); /* add stdin in fd_set */
+ maxfd = 1;
+
+ ivshmem_client_get_fds(client, &fds, &maxfd);
+
+ ret = select(maxfd, &fds, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+
+ fprintf(stderr, "select error: %s\n", strerror(errno));
+ break;
+ }
+ if (ret == 0) {
+ continue;
+ }
+
+ if (FD_ISSET(0, &fds) &&
+ ivshmem_client_handle_stdin_command(client) < 0 && errno != EINTR) {
+ fprintf(stderr, "ivshmem_client_handle_stdin_command() failed\n");
+ break;
+ }
+
+ if (ivshmem_client_handle_fds(client, &fds, maxfd) < 0) {
+ fprintf(stderr, "ivshmem_client_handle_fds() failed\n");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* callback when we receive a notification (just display it) */
+static void
+ivshmem_client_notification_cb(const IvshmemClient *client,
+ const IvshmemClientPeer *peer,
+ unsigned vect, void *arg)
+{
+ (void)client;
+ (void)arg;
+ printf("receive notification from peer_id=%" PRId64 " vector=%u\n",
+ peer->id, vect);
+}
+
+int
+main(int argc, char *argv[])
+{
+ struct sigaction sa;
+ IvshmemClient client;
+ IvshmemClientArgs args = {
+ .verbose = IVSHMEM_CLIENT_DEFAULT_VERBOSE,
+ .unix_sock_path = IVSHMEM_CLIENT_DEFAULT_UNIX_SOCK_PATH,
+ };
+
+ /* parse arguments, will exit on error */
+ ivshmem_client_parse_args(&args, argc, argv);
+
+ /* Ignore SIGPIPE, see this link for more info:
+ * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+ sa.sa_handler = SIG_IGN;
+ sa.sa_flags = 0;
+ if (sigemptyset(&sa.sa_mask) == -1 ||
+ sigaction(SIGPIPE, &sa, 0) == -1) {
+ perror("failed to ignore SIGPIPE; sigaction");
+ return 1;
+ }
+
+ ivshmem_client_cmdline_help();
+ printf("cmd> ");
+ fflush(stdout);
+
+ if (ivshmem_client_init(&client, args.unix_sock_path,
+ ivshmem_client_notification_cb, NULL,
+ args.verbose) < 0) {
+ fprintf(stderr, "cannot init client\n");
+ return 1;
+ }
+
+ while (1) {
+ if (ivshmem_client_connect(&client) < 0) {
+ fprintf(stderr, "cannot connect to server, retry in 1 second\n");
+ sleep(1);
+ continue;
+ }
+
+ fprintf(stdout, "listen on server socket %d\n", client.sock_fd);
+
+ if (ivshmem_client_poll_events(&client) == 0) {
+ continue;
+ }
+
+ /* disconnected from server, reset all peers */
+ fprintf(stdout, "disconnected from server\n");
+
+ ivshmem_client_close(&client);
+ }
+
+ return 0;
+}
diff --git a/contrib/ivshmem-server/Makefile.objs b/contrib/ivshmem-server/Makefile.objs
new file mode 100644
index 0000000..c060dd3
--- /dev/null
+++ b/contrib/ivshmem-server/Makefile.objs
@@ -0,0 +1 @@
+ivshmem-server-obj-y = ivshmem-server.o main.o
diff --git a/contrib/ivshmem-server/ivshmem-server.c b/contrib/ivshmem-server/ivshmem-server.c
new file mode 100644
index 0000000..e2f295b
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.c
@@ -0,0 +1,457 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+#include "qemu/sockets.h"
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "ivshmem-server.h"
+
+/* log a message on stdout if verbose=1 */
+#define IVSHMEM_SERVER_DEBUG(server, fmt, ...) do { \
+ if ((server)->verbose) { \
+ printf(fmt, ## __VA_ARGS__); \
+ } \
+ } while (0)
+
+/** maximum size of a huge page, used by ivshmem_server_ftruncate() */
+#define IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE (1024 * 1024 * 1024)
+
+/** default listen backlog (number of sockets not accepted) */
+#define IVSHMEM_SERVER_LISTEN_BACKLOG 10
+
+/* send message to a client unix socket */
+static int
+ivshmem_server_send_one_msg(int sock_fd, int64_t peer_id, int fd)
+{
+ int ret;
+ struct msghdr msg;
+ struct iovec iov[1];
+ union {
+ struct cmsghdr cmsg;
+ char control[CMSG_SPACE(sizeof(int))];
+ } msg_control;
+ struct cmsghdr *cmsg;
+
+ peer_id = GINT64_TO_LE(peer_id);
+ iov[0].iov_base = &peer_id;
+ iov[0].iov_len = sizeof(peer_id);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+
+ /* if fd is specified, add it in a cmsg */
+ if (fd >= 0) {
+ memset(&msg_control, 0, sizeof(msg_control));
+ msg.msg_control = &msg_control;
+ msg.msg_controllen = sizeof(msg_control);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int));
+ memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd));
+ }
+
+ ret = sendmsg(sock_fd, &msg, 0);
+ if (ret <= 0) {
+ return -1;
+ }
+
+ return 0;
+}
+
+/* free a peer when the server advertises a disconnection or when the
+ * server is freed */
+static void
+ivshmem_server_free_peer(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+ unsigned vector;
+ IvshmemServerPeer *other_peer;
+
+ IVSHMEM_SERVER_DEBUG(server, "free peer %" PRId64 "\n", peer->id);
+ close(peer->sock_fd);
+ QTAILQ_REMOVE(&server->peer_list, peer, next);
+
+ /* advertise the deletion to other peers */
+ QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+ ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id, -1);
+ }
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ event_notifier_cleanup(&peer->vectors[vector]);
+ }
+
+ g_free(peer);
+}
+
+/* send the peer id and the shm_fd just after a new client connection */
+static int
+ivshmem_server_send_initial_info(IvshmemServer *server, IvshmemServerPeer *peer)
+{
+ int ret;
+
+ /* send our protocol version first */
+ ret = ivshmem_server_send_one_msg(peer->sock_fd, IVSHMEM_PROTOCOL_VERSION,
+ -1);
+ if (ret < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send version: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* send the peer id to the client */
+ ret = ivshmem_server_send_one_msg(peer->sock_fd, peer->id, -1);
+ if (ret < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send peer id: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ /* send the shm_fd */
+ ret = ivshmem_server_send_one_msg(peer->sock_fd, -1, server->shm_fd);
+ if (ret < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send shm fd: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/* handle message on listening unix socket (new client connection) */
+static int
+ivshmem_server_handle_new_conn(IvshmemServer *server)
+{
+ IvshmemServerPeer *peer, *other_peer;
+ struct sockaddr_un unaddr;
+ socklen_t unaddr_len;
+ int newfd;
+ unsigned i;
+
+ /* accept the incoming connection */
+ unaddr_len = sizeof(unaddr);
+ newfd = qemu_accept(server->sock_fd,
+ (struct sockaddr *)&unaddr, &unaddr_len);
+
+ if (newfd < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot accept() %s\n", strerror(errno));
+ return -1;
+ }
+
+ qemu_set_nonblock(newfd);
+ IVSHMEM_SERVER_DEBUG(server, "accept()=%d\n", newfd);
+
+ /* allocate new structure for this peer */
+ peer = g_malloc0(sizeof(*peer));
+ peer->sock_fd = newfd;
+
+ /* get an unused peer id */
+ /* XXX: this could use id allocation such as Linux IDA, or simply
+ * a free-list */
+ for (i = 0; i < G_MAXUINT16; i++) {
+ if (ivshmem_server_search_peer(server, server->cur_id) == NULL) {
+ break;
+ }
+ server->cur_id++;
+ }
+ if (i == G_MAXUINT16) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot allocate new client id\n");
+ close(newfd);
+ g_free(peer);
+ return -1;
+ }
+ peer->id = server->cur_id++;
+
+ /* create eventfd, one per vector */
+ peer->vectors_count = server->n_vectors;
+ for (i = 0; i < peer->vectors_count; i++) {
+ if (event_notifier_init(&peer->vectors[i], FALSE) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot create eventfd\n");
+ goto fail;
+ }
+ }
+
+ /* send peer id and shm fd */
+ if (ivshmem_server_send_initial_info(server, peer) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot send initial info\n");
+ goto fail;
+ }
+
+ /* advertise the new peer to others */
+ QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+ for (i = 0; i < peer->vectors_count; i++) {
+ ivshmem_server_send_one_msg(other_peer->sock_fd, peer->id,
+ peer->vectors[i].wfd);
+ }
+ }
+
+ /* advertise the other peers to the new one */
+ QTAILQ_FOREACH(other_peer, &server->peer_list, next) {
+ for (i = 0; i < peer->vectors_count; i++) {
+ ivshmem_server_send_one_msg(peer->sock_fd, other_peer->id,
+ other_peer->vectors[i].wfd);
+ }
+ }
+
+ /* advertise the new peer to itself */
+ for (i = 0; i < peer->vectors_count; i++) {
+ ivshmem_server_send_one_msg(peer->sock_fd, peer->id,
+ event_notifier_get_fd(&peer->vectors[i]));
+ }
+
+ QTAILQ_INSERT_TAIL(&server->peer_list, peer, next);
+ IVSHMEM_SERVER_DEBUG(server, "new peer id = %" PRId64 "\n",
+ peer->id);
+ return 0;
+
+fail:
+ while (i--) {
+ event_notifier_cleanup(&peer->vectors[i]);
+ }
+ close(newfd);
+ g_free(peer);
+ return -1;
+}
+
+/* Try to ftruncate a file to next power of 2 of shmsize.
+ * If it fails; all power of 2 above shmsize are tested until
+ * we reach the maximum huge page size. This is useful
+ * if the shm file is in a hugetlbfs that cannot be truncated to the
+ * shm_size value. */
+static int
+ivshmem_server_ftruncate(int fd, unsigned shmsize)
+{
+ int ret;
+ struct stat mapstat;
+
+ /* align shmsize to next power of 2 */
+ shmsize = pow2ceil(shmsize);
+
+ if (fstat(fd, &mapstat) != -1 && mapstat.st_size == shmsize) {
+ return 0;
+ }
+
+ while (shmsize <= IVSHMEM_SERVER_MAX_HUGEPAGE_SIZE) {
+ ret = ftruncate(fd, shmsize);
+ if (ret == 0) {
+ return ret;
+ }
+ shmsize *= 2;
+ }
+
+ return -1;
+}
+
+/* Init a new ivshmem server */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+ const char *shm_path, bool use_shm_open,
+ size_t shm_size, unsigned n_vectors,
+ bool verbose)
+{
+ int ret;
+
+ memset(server, 0, sizeof(*server));
+ server->verbose = verbose;
+
+ ret = snprintf(server->unix_sock_path, sizeof(server->unix_sock_path),
+ "%s", unix_sock_path);
+ if (ret < 0 || ret >= sizeof(server->unix_sock_path)) {
+ IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+ return -1;
+ }
+ ret = snprintf(server->shm_path, sizeof(server->shm_path),
+ "%s", shm_path);
+ if (ret < 0 || ret >= sizeof(server->shm_path)) {
+ IVSHMEM_SERVER_DEBUG(server, "could not copy shm path\n");
+ return -1;
+ }
+
+ server->use_shm_open = use_shm_open;
+ server->shm_size = shm_size;
+ server->n_vectors = n_vectors;
+
+ QTAILQ_INIT(&server->peer_list);
+
+ return 0;
+}
+
+/* open shm, create and bind to the unix socket */
+int
+ivshmem_server_start(IvshmemServer *server)
+{
+ struct sockaddr_un sun;
+ int shm_fd, sock_fd, ret;
+
+ /* open shm file */
+ if (server->use_shm_open) {
+ IVSHMEM_SERVER_DEBUG(server, "Using POSIX shared memory: %s\n",
+ server->shm_path);
+ shm_fd = shm_open(server->shm_path, O_CREAT | O_RDWR, S_IRWXU);
+ } else {
+ gchar *filename = g_strdup_printf("%s/ivshmem.XXXXXX", server->shm_path);
+ IVSHMEM_SERVER_DEBUG(server, "Using file-backed shared memory: %s\n",
+ server->shm_path);
+ shm_fd = mkstemp(filename);
+ unlink(filename);
+ g_free(filename);
+ }
+
+ if (shm_fd < 0) {
+ fprintf(stderr, "cannot open shm file %s: %s\n", server->shm_path,
+ strerror(errno));
+ return -1;
+ }
+ if (ivshmem_server_ftruncate(shm_fd, server->shm_size) < 0) {
+ fprintf(stderr, "ftruncate(%s) failed: %s\n", server->shm_path,
+ strerror(errno));
+ goto err_close_shm;
+ }
+
+ IVSHMEM_SERVER_DEBUG(server, "create & bind socket %s\n",
+ server->unix_sock_path);
+
+ /* create the unix listening socket */
+ sock_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot create socket: %s\n",
+ strerror(errno));
+ goto err_close_shm;
+ }
+
+ sun.sun_family = AF_UNIX;
+ ret = snprintf(sun.sun_path, sizeof(sun.sun_path), "%s",
+ server->unix_sock_path);
+ if (ret < 0 || ret >= sizeof(sun.sun_path)) {
+ IVSHMEM_SERVER_DEBUG(server, "could not copy unix socket path\n");
+ goto err_close_sock;
+ }
+ if (bind(sock_fd, (struct sockaddr *)&sun, sizeof(sun)) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "cannot connect to %s: %s\n", sun.sun_path,
+ strerror(errno));
+ goto err_close_sock;
+ }
+
+ if (listen(sock_fd, IVSHMEM_SERVER_LISTEN_BACKLOG) < 0) {
+ IVSHMEM_SERVER_DEBUG(server, "listen() failed: %s\n", strerror(errno));
+ goto err_close_sock;
+ }
+
+ server->sock_fd = sock_fd;
+ server->shm_fd = shm_fd;
+
+ return 0;
+
+err_close_sock:
+ close(sock_fd);
+err_close_shm:
+ close(shm_fd);
+ return -1;
+}
+
+/* close connections to clients, the unix socket and the shm fd */
+void
+ivshmem_server_close(IvshmemServer *server)
+{
+ IvshmemServerPeer *peer, *npeer;
+
+ IVSHMEM_SERVER_DEBUG(server, "close server\n");
+
+ QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, npeer) {
+ ivshmem_server_free_peer(server, peer);
+ }
+
+ unlink(server->unix_sock_path);
+ close(server->sock_fd);
+ close(server->shm_fd);
+ server->sock_fd = -1;
+ server->shm_fd = -1;
+}
+
+/* get the fd_set according to the unix socket and the peer list */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd)
+{
+ IvshmemServerPeer *peer;
+
+ if (server->sock_fd == -1) {
+ return;
+ }
+
+ FD_SET(server->sock_fd, fds);
+ if (server->sock_fd >= *maxfd) {
+ *maxfd = server->sock_fd + 1;
+ }
+
+ QTAILQ_FOREACH(peer, &server->peer_list, next) {
+ FD_SET(peer->sock_fd, fds);
+ if (peer->sock_fd >= *maxfd) {
+ *maxfd = peer->sock_fd + 1;
+ }
+ }
+}
+
+/* process incoming messages on the sockets in fd_set */
+int
+ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd)
+{
+ IvshmemServerPeer *peer, *peer_next;
+
+ if (server->sock_fd < maxfd && FD_ISSET(server->sock_fd, fds) &&
+ ivshmem_server_handle_new_conn(server) < 0 && errno != EINTR) {
+ IVSHMEM_SERVER_DEBUG(server, "ivshmem_server_handle_new_conn() "
+ "failed\n");
+ return -1;
+ }
+
+ QTAILQ_FOREACH_SAFE(peer, &server->peer_list, next, peer_next) {
+ /* any message from a peer socket result in a close() */
+ IVSHMEM_SERVER_DEBUG(server, "peer->sock_fd=%d\n", peer->sock_fd);
+ if (peer->sock_fd < maxfd && FD_ISSET(peer->sock_fd, fds)) {
+ ivshmem_server_free_peer(server, peer);
+ }
+ }
+
+ return 0;
+}
+
+/* lookup peer from its id */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id)
+{
+ IvshmemServerPeer *peer;
+
+ QTAILQ_FOREACH(peer, &server->peer_list, next) {
+ if (peer->id == peer_id) {
+ return peer;
+ }
+ }
+ return NULL;
+}
+
+/* dump our info, the list of peers their vectors on stdout */
+void
+ivshmem_server_dump(const IvshmemServer *server)
+{
+ const IvshmemServerPeer *peer;
+ unsigned vector;
+
+ /* dump peers */
+ QTAILQ_FOREACH(peer, &server->peer_list, next) {
+ printf("peer_id = %" PRId64 "\n", peer->id);
+
+ for (vector = 0; vector < peer->vectors_count; vector++) {
+ printf(" vector %d is enabled (fd=%d)\n", vector,
+ event_notifier_get_fd(&peer->vectors[vector]));
+ }
+ }
+}
diff --git a/contrib/ivshmem-server/ivshmem-server.h b/contrib/ivshmem-server/ivshmem-server.h
new file mode 100644
index 0000000..4af08e1
--- /dev/null
+++ b/contrib/ivshmem-server/ivshmem-server.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#ifndef IVSHMEM_SERVER_H
+#define IVSHMEM_SERVER_H
+
+/**
+ * The ivshmem server is a daemon that creates a unix socket in listen
+ * mode. The ivshmem clients (qemu or ivshmem-client) connect to this
+ * unix socket. For each client, the server will create some eventfd
+ * (see EVENTFD(2)), one per vector. These fd are transmitted to all
+ * clients using the SCM_RIGHTS cmsg message. Therefore, each client is
+ * able to send a notification to another client without being
+ * "profixied" by the server.
+ *
+ * We use this mechanism to send interruptions between guests.
+ * qemu is able to transform an event on a eventfd into a PCI MSI-x
+ * interruption in the guest.
+ *
+ * The ivshmem server is also able to share the file descriptor
+ * associated to the ivshmem shared memory.
+ */
+
+#include <sys/select.h>
+
+#include "qemu/event_notifier.h"
+#include "qemu/queue.h"
+#include "hw/misc/ivshmem.h"
+
+/**
+ * Maximum number of notification vectors supported by the server
+ */
+#define IVSHMEM_SERVER_MAX_VECTORS 64
+
+/**
+ * Structure storing a peer
+ *
+ * Each time a client connects to an ivshmem server, a new
+ * IvshmemServerPeer structure is created. This peer and all its
+ * vectors are advertised to all connected clients through the connected
+ * unix sockets.
+ */
+typedef struct IvshmemServerPeer {
+ QTAILQ_ENTRY(IvshmemServerPeer) next; /**< next in list*/
+ int sock_fd; /**< connected unix sock */
+ int64_t id; /**< the id of the peer */
+ EventNotifier vectors[IVSHMEM_SERVER_MAX_VECTORS]; /**< one per vector */
+ unsigned vectors_count; /**< number of vectors */
+} IvshmemServerPeer;
+QTAILQ_HEAD(IvshmemServerPeerList, IvshmemServerPeer);
+
+typedef struct IvshmemServerPeerList IvshmemServerPeerList;
+
+/**
+ * Structure describing an ivshmem server
+ *
+ * This structure stores all information related to our server: the name
+ * of the server unix socket and the list of connected peers.
+ */
+typedef struct IvshmemServer {
+ char unix_sock_path[PATH_MAX]; /**< path to unix socket */
+ int sock_fd; /**< unix sock file descriptor */
+ char shm_path[PATH_MAX]; /**< path to shm */
+ bool use_shm_open;
+ size_t shm_size; /**< size of shm */
+ int shm_fd; /**< shm file descriptor */
+ unsigned n_vectors; /**< number of vectors */
+ uint16_t cur_id; /**< id to be given to next client */
+ bool verbose; /**< true in verbose mode */
+ IvshmemServerPeerList peer_list; /**< list of peers */
+} IvshmemServer;
+
+/**
+ * Initialize an ivshmem server
+ *
+ * @server: A pointer to an uninitialized IvshmemServer structure
+ * @unix_sock_path: The pointer to the unix socket file name
+ * @shm_path: Path to the shared memory. The path corresponds to a POSIX
+ * shm name or a hugetlbfs mount point.
+ * @shm_size: Size of shared memory
+ * @n_vectors: Number of interrupt vectors per client
+ * @verbose: True to enable verbose mode
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int
+ivshmem_server_init(IvshmemServer *server, const char *unix_sock_path,
+ const char *shm_path, bool use_shm_open,
+ size_t shm_size, unsigned n_vectors,
+ bool verbose);
+
+/**
+ * Open the shm, then create and bind to the unix socket
+ *
+ * @server: The pointer to the initialized IvshmemServer structure
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_start(IvshmemServer *server);
+
+/**
+ * Close the server
+ *
+ * Close connections to all clients, close the unix socket and the
+ * shared memory file descriptor. The structure remains initialized, so
+ * it is possible to call ivshmem_server_start() again after a call to
+ * ivshmem_server_close().
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_close(IvshmemServer *server);
+
+/**
+ * Fill a fd_set with file descriptors to be monitored
+ *
+ * This function will fill a fd_set with all file descriptors that must
+ * be polled (unix server socket and peers unix socket). The function
+ * will not initialize the fd_set, it is up to the caller to do it.
+ *
+ * @server: The ivshmem server
+ * @fds: The fd_set to be updated
+ * @maxfd: Must be set to the max file descriptor + 1 in fd_set. This value is
+ * updated if this function adds a greater fd in fd_set.
+ */
+void
+ivshmem_server_get_fds(const IvshmemServer *server, fd_set *fds, int *maxfd);
+
+/**
+ * Read and handle new messages
+ *
+ * Given a fd_set (for instance filled by a call to select()), handle
+ * incoming messages from peers.
+ *
+ * @server: The ivshmem server
+ * @fds: The fd_set containing the file descriptors to be checked. Note that
+ * file descriptors that are not related to our server are ignored.
+ * @maxfd: The maximum fd in fd_set, plus one.
+ *
+ * Returns: 0 on success, or a negative value on error
+ */
+int ivshmem_server_handle_fds(IvshmemServer *server, fd_set *fds, int maxfd);
+
+/**
+ * Search a peer from its identifier
+ *
+ * @server: The ivshmem server
+ * @peer_id: The identifier of the peer structure
+ *
+ * Returns: The peer structure, or NULL if not found
+ */
+IvshmemServerPeer *
+ivshmem_server_search_peer(IvshmemServer *server, int64_t peer_id);
+
+/**
+ * Dump information of this ivshmem server and its peers on stdout
+ *
+ * @server: The ivshmem server
+ */
+void ivshmem_server_dump(const IvshmemServer *server);
+
+#endif /* IVSHMEM_SERVER_H */
diff --git a/contrib/ivshmem-server/main.c b/contrib/ivshmem-server/main.c
new file mode 100644
index 0000000..45776d8
--- /dev/null
+++ b/contrib/ivshmem-server/main.c
@@ -0,0 +1,273 @@
+/*
+ * Copyright 6WIND S.A., 2014
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * (at your option) any later version. See the COPYING file in the
+ * top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/cutils.h"
+
+#include "ivshmem-server.h"
+
+#define IVSHMEM_SERVER_DEFAULT_VERBOSE 0
+#define IVSHMEM_SERVER_DEFAULT_FOREGROUND 0
+#define IVSHMEM_SERVER_DEFAULT_PID_FILE "/var/run/ivshmem-server.pid"
+#define IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "/tmp/ivshmem_socket"
+#define IVSHMEM_SERVER_DEFAULT_SHM_PATH "ivshmem"
+#define IVSHMEM_SERVER_DEFAULT_SHM_SIZE (4*1024*1024)
+#define IVSHMEM_SERVER_DEFAULT_N_VECTORS 1
+
+/* used to quit on signal SIGTERM */
+static int ivshmem_server_quit;
+
+/* arguments given by the user */
+typedef struct IvshmemServerArgs {
+ bool verbose;
+ bool foreground;
+ const char *pid_file;
+ const char *unix_socket_path;
+ const char *shm_path;
+ bool use_shm_open;
+ uint64_t shm_size;
+ unsigned n_vectors;
+} IvshmemServerArgs;
+
+static void
+ivshmem_server_usage(const char *progname)
+{
+ printf("Usage: %s [OPTION]...\n"
+ " -h: show this help\n"
+ " -v: verbose mode\n"
+ " -F: foreground mode (default is to daemonize)\n"
+ " -p <pid-file>: path to the PID file (used in daemon mode only)\n"
+ " default " IVSHMEM_SERVER_DEFAULT_PID_FILE "\n"
+ " -S <unix-socket-path>: path to the unix socket to listen to\n"
+ " default " IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH "\n"
+ " -M <shm-name>: POSIX shared memory object to use\n"
+ " default " IVSHMEM_SERVER_DEFAULT_SHM_PATH "\n"
+ " -m <dir-name>: where to create shared memory\n"
+ " -l <size>: size of shared memory in bytes\n"
+ " suffixes K, M and G can be used, e.g. 1K means 1024\n"
+ " default %u\n"
+ " -n <nvectors>: number of vectors\n"
+ " default %u\n",
+ progname, IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+ IVSHMEM_SERVER_DEFAULT_N_VECTORS);
+}
+
+static void
+ivshmem_server_help(const char *progname)
+{
+ fprintf(stderr, "Try '%s -h' for more information.\n", progname);
+}
+
+/* parse the program arguments, exit on error */
+static void
+ivshmem_server_parse_args(IvshmemServerArgs *args, int argc, char *argv[])
+{
+ int c;
+ unsigned long long v;
+ Error *err = NULL;
+
+ while ((c = getopt(argc, argv, "hvFp:S:m:M:l:n:")) != -1) {
+
+ switch (c) {
+ case 'h': /* help */
+ ivshmem_server_usage(argv[0]);
+ exit(0);
+ break;
+
+ case 'v': /* verbose */
+ args->verbose = 1;
+ break;
+
+ case 'F': /* foreground */
+ args->foreground = 1;
+ break;
+
+ case 'p': /* pid file */
+ args->pid_file = optarg;
+ break;
+
+ case 'S': /* unix socket path */
+ args->unix_socket_path = optarg;
+ break;
+
+ case 'M': /* shm name */
+ case 'm': /* dir name */
+ args->shm_path = optarg;
+ args->use_shm_open = c == 'M';
+ break;
+
+ case 'l': /* shm size */
+ parse_option_size("shm_size", optarg, &args->shm_size, &err);
+ if (err) {
+ error_report_err(err);
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+ break;
+
+ case 'n': /* number of vectors */
+ if (parse_uint_full(optarg, &v, 0) < 0) {
+ fprintf(stderr, "cannot parse n_vectors\n");
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+ args->n_vectors = v;
+ break;
+
+ default:
+ ivshmem_server_usage(argv[0]);
+ exit(1);
+ break;
+ }
+ }
+
+ if (args->n_vectors > IVSHMEM_SERVER_MAX_VECTORS) {
+ fprintf(stderr, "too many requested vectors (max is %d)\n",
+ IVSHMEM_SERVER_MAX_VECTORS);
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+
+ if (args->verbose == 1 && args->foreground == 0) {
+ fprintf(stderr, "cannot use verbose in daemon mode\n");
+ ivshmem_server_help(argv[0]);
+ exit(1);
+ }
+}
+
+/* wait for events on listening server unix socket and connected client
+ * sockets */
+static int
+ivshmem_server_poll_events(IvshmemServer *server)
+{
+ fd_set fds;
+ int ret = 0, maxfd;
+
+ while (!ivshmem_server_quit) {
+
+ FD_ZERO(&fds);
+ maxfd = 0;
+ ivshmem_server_get_fds(server, &fds, &maxfd);
+
+ ret = select(maxfd, &fds, NULL, NULL, NULL);
+
+ if (ret < 0) {
+ if (errno == EINTR) {
+ continue;
+ }
+
+ fprintf(stderr, "select error: %s\n", strerror(errno));
+ break;
+ }
+ if (ret == 0) {
+ continue;
+ }
+
+ if (ivshmem_server_handle_fds(server, &fds, maxfd) < 0) {
+ fprintf(stderr, "ivshmem_server_handle_fds() failed\n");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void
+ivshmem_server_quit_cb(int signum)
+{
+ ivshmem_server_quit = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ IvshmemServer server;
+ struct sigaction sa, sa_quit;
+ IvshmemServerArgs args = {
+ .verbose = IVSHMEM_SERVER_DEFAULT_VERBOSE,
+ .foreground = IVSHMEM_SERVER_DEFAULT_FOREGROUND,
+ .pid_file = IVSHMEM_SERVER_DEFAULT_PID_FILE,
+ .unix_socket_path = IVSHMEM_SERVER_DEFAULT_UNIX_SOCK_PATH,
+ .shm_path = IVSHMEM_SERVER_DEFAULT_SHM_PATH,
+ .use_shm_open = true,
+ .shm_size = IVSHMEM_SERVER_DEFAULT_SHM_SIZE,
+ .n_vectors = IVSHMEM_SERVER_DEFAULT_N_VECTORS,
+ };
+ int ret = 1;
+
+ /*
+ * Do not remove this notice without adding proper error handling!
+ * Start with handling ivshmem_server_send_one_msg() failure.
+ */
+ printf("*** Example code, do not use in production ***\n");
+
+ /* parse arguments, will exit on error */
+ ivshmem_server_parse_args(&args, argc, argv);
+
+ /* Ignore SIGPIPE, see this link for more info:
+ * http://www.mail-archive.com/libevent-users@monkey.org/msg01606.html */
+ sa.sa_handler = SIG_IGN;
+ sa.sa_flags = 0;
+ if (sigemptyset(&sa.sa_mask) == -1 ||
+ sigaction(SIGPIPE, &sa, 0) == -1) {
+ perror("failed to ignore SIGPIPE; sigaction");
+ goto err;
+ }
+
+ sa_quit.sa_handler = ivshmem_server_quit_cb;
+ sa_quit.sa_flags = 0;
+ if (sigemptyset(&sa_quit.sa_mask) == -1 ||
+ sigaction(SIGTERM, &sa_quit, 0) == -1) {
+ perror("failed to add SIGTERM handler; sigaction");
+ goto err;
+ }
+
+ /* init the ivshms structure */
+ if (ivshmem_server_init(&server, args.unix_socket_path,
+ args.shm_path, args.use_shm_open,
+ args.shm_size, args.n_vectors, args.verbose) < 0) {
+ fprintf(stderr, "cannot init server\n");
+ goto err;
+ }
+
+ /* start the ivshmem server (open shm & unix socket) */
+ if (ivshmem_server_start(&server) < 0) {
+ fprintf(stderr, "cannot bind\n");
+ goto err;
+ }
+
+ /* daemonize if asked to */
+ if (!args.foreground) {
+ FILE *fp;
+
+ if (qemu_daemon(1, 1) < 0) {
+ fprintf(stderr, "cannot daemonize: %s\n", strerror(errno));
+ goto err_close;
+ }
+
+ /* write pid file */
+ fp = fopen(args.pid_file, "w");
+ if (fp == NULL) {
+ fprintf(stderr, "cannot write pid file: %s\n", strerror(errno));
+ goto err_close;
+ }
+
+ fprintf(fp, "%d\n", (int) getpid());
+ fclose(fp);
+ }
+
+ ivshmem_server_poll_events(&server);
+ fprintf(stdout, "server disconnected\n");
+ ret = 0;
+
+err_close:
+ ivshmem_server_close(&server);
+err:
+ return ret;
+}
diff --git a/coroutine-gthread.c b/coroutine-gthread.c
deleted file mode 100644
index 6bd6d6b..0000000
--- a/coroutine-gthread.c
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * GThread coroutine initialization code
- *
- * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
- * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.0 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <glib.h>
-#include "qemu-common.h"
-#include "block/coroutine_int.h"
-
-typedef struct {
- Coroutine base;
- GThread *thread;
- bool runnable;
- bool free_on_thread_exit;
- CoroutineAction action;
-} CoroutineGThread;
-
-static CompatGMutex coroutine_lock;
-static CompatGCond coroutine_cond;
-
-/* GLib 2.31 and beyond deprecated various parts of the thread API,
- * but the new interfaces are not available in older GLib versions
- * so we have to cope with both.
- */
-#if GLIB_CHECK_VERSION(2, 31, 0)
-/* Awkwardly, the GPrivate API doesn't provide a way to update the
- * GDestroyNotify handler for the coroutine key dynamically. So instead
- * we track whether or not the CoroutineGThread should be freed on
- * thread exit / coroutine key update using the free_on_thread_exit
- * field.
- */
-static void coroutine_destroy_notify(gpointer data)
-{
- CoroutineGThread *co = data;
- if (co && co->free_on_thread_exit) {
- g_free(co);
- }
-}
-
-static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify);
-
-static inline CoroutineGThread *get_coroutine_key(void)
-{
- return g_private_get(&coroutine_key);
-}
-
-static inline void set_coroutine_key(CoroutineGThread *co,
- bool free_on_thread_exit)
-{
- /* Unlike g_static_private_set() this does not call the GDestroyNotify
- * if the previous value of the key was NULL. Fortunately we only need
- * the GDestroyNotify in the non-NULL key case.
- */
- co->free_on_thread_exit = free_on_thread_exit;
- g_private_replace(&coroutine_key, co);
-}
-
-static inline GThread *create_thread(GThreadFunc func, gpointer data)
-{
- return g_thread_new("coroutine", func, data);
-}
-
-#else
-
-/* Handle older GLib versions */
-
-static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
-
-static inline CoroutineGThread *get_coroutine_key(void)
-{
- return g_static_private_get(&coroutine_key);
-}
-
-static inline void set_coroutine_key(CoroutineGThread *co,
- bool free_on_thread_exit)
-{
- g_static_private_set(&coroutine_key, co,
- free_on_thread_exit ? (GDestroyNotify)g_free : NULL);
-}
-
-static inline GThread *create_thread(GThreadFunc func, gpointer data)
-{
- return g_thread_create_full(func, data, 0, TRUE, TRUE,
- G_THREAD_PRIORITY_NORMAL, NULL);
-}
-
-#endif
-
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
-#if !GLIB_CHECK_VERSION(2, 31, 0)
- if (!g_thread_supported()) {
- g_thread_init(NULL);
- }
-#endif
-}
-
-static void coroutine_wait_runnable_locked(CoroutineGThread *co)
-{
- while (!co->runnable) {
- g_cond_wait(&coroutine_cond, &coroutine_lock);
- }
-}
-
-static void coroutine_wait_runnable(CoroutineGThread *co)
-{
- g_mutex_lock(&coroutine_lock);
- coroutine_wait_runnable_locked(co);
- g_mutex_unlock(&coroutine_lock);
-}
-
-static gpointer coroutine_thread(gpointer opaque)
-{
- CoroutineGThread *co = opaque;
-
- set_coroutine_key(co, false);
- coroutine_wait_runnable(co);
- co->base.entry(co->base.entry_arg);
- qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
- return NULL;
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
- CoroutineGThread *co;
-
- co = g_malloc0(sizeof(*co));
- co->thread = create_thread(coroutine_thread, co);
- if (!co->thread) {
- g_free(co);
- return NULL;
- }
- return &co->base;
-}
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
- CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
-
- g_thread_join(co->thread);
- g_free(co);
-}
-
-CoroutineAction qemu_coroutine_switch(Coroutine *from_,
- Coroutine *to_,
- CoroutineAction action)
-{
- CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
- CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
-
- g_mutex_lock(&coroutine_lock);
- from->runnable = false;
- from->action = action;
- to->runnable = true;
- to->action = action;
- g_cond_broadcast(&coroutine_cond);
-
- if (action != COROUTINE_TERMINATE) {
- coroutine_wait_runnable_locked(from);
- }
- g_mutex_unlock(&coroutine_lock);
- return from->action;
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
- CoroutineGThread *co = get_coroutine_key();
- if (!co) {
- co = g_malloc0(sizeof(*co));
- co->runnable = true;
- set_coroutine_key(co, true);
- }
-
- return &co->base;
-}
-
-bool qemu_in_coroutine(void)
-{
- CoroutineGThread *co = get_coroutine_key();
-
- return co && co->base.caller;
-}
diff --git a/coroutine-sigaltstack.c b/coroutine-sigaltstack.c
deleted file mode 100644
index 63519ff..0000000
--- a/coroutine-sigaltstack.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * sigaltstack coroutine initialization code
- *
- * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
- * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
- * Copyright (C) 2012 Alex Barcelo <abarcelo@ac.upc.edu>
-** This file is partly based on pth_mctx.c, from the GNU Portable Threads
-** Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
-#ifdef _FORTIFY_SOURCE
-#undef _FORTIFY_SOURCE
-#endif
-#include <stdlib.h>
-#include <setjmp.h>
-#include <stdint.h>
-#include <pthread.h>
-#include <signal.h>
-#include "qemu-common.h"
-#include "block/coroutine_int.h"
-
-typedef struct {
- Coroutine base;
- void *stack;
- sigjmp_buf env;
-} CoroutineUContext;
-
-/**
- * Per-thread coroutine bookkeeping
- */
-typedef struct {
- /** Currently executing coroutine */
- Coroutine *current;
-
- /** The default coroutine */
- CoroutineUContext leader;
-
- /** Information for the signal handler (trampoline) */
- sigjmp_buf tr_reenter;
- volatile sig_atomic_t tr_called;
- void *tr_handler;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
-
-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- if (!s) {
- s = g_malloc0(sizeof(*s));
- s->current = &s->leader.base;
- pthread_setspecific(thread_state_key, s);
- }
- return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
- CoroutineThreadState *s = opaque;
-
- g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
- int ret;
-
- ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
- if (ret != 0) {
- fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
- abort();
- }
-}
-
-/* "boot" function
- * This is what starts the coroutine, is called from the trampoline
- * (from the signal handler when it is not signal handling, read ahead
- * for more information).
- */
-static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
-{
- /* Initialize longjmp environment and switch back the caller */
- if (!sigsetjmp(self->env, 0)) {
- siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
- }
-
- while (true) {
- co->entry(co->entry_arg);
- qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
- }
-}
-
-/*
- * This is used as the signal handler. This is called with the brand new stack
- * (thanks to sigaltstack). We have to return, given that this is a signal
- * handler and the sigmask and some other things are changed.
- */
-static void coroutine_trampoline(int signal)
-{
- CoroutineUContext *self;
- Coroutine *co;
- CoroutineThreadState *coTS;
-
- /* Get the thread specific information */
- coTS = coroutine_get_thread_state();
- self = coTS->tr_handler;
- coTS->tr_called = 1;
- co = &self->base;
-
- /*
- * Here we have to do a bit of a ping pong between the caller, given that
- * this is a signal handler and we have to do a return "soon". Then the
- * caller can reestablish everything and do a siglongjmp here again.
- */
- if (!sigsetjmp(coTS->tr_reenter, 0)) {
- return;
- }
-
- /*
- * Ok, the caller has siglongjmp'ed back to us, so now prepare
- * us for the real machine state switching. We have to jump
- * into another function here to get a new stack context for
- * the auto variables (which have to be auto-variables
- * because the start of the thread happens later). Else with
- * PIC (i.e. Position Independent Code which is used when PTH
- * is built as a shared library) most platforms would
- * horrible core dump as experience showed.
- */
- coroutine_bootstrap(self, co);
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
- const size_t stack_size = 1 << 20;
- CoroutineUContext *co;
- CoroutineThreadState *coTS;
- struct sigaction sa;
- struct sigaction osa;
- stack_t ss;
- stack_t oss;
- sigset_t sigs;
- sigset_t osigs;
- sigjmp_buf old_env;
-
- /* The way to manipulate stack is with the sigaltstack function. We
- * prepare a stack, with it delivering a signal to ourselves and then
- * put sigsetjmp/siglongjmp where needed.
- * This has been done keeping coroutine-ucontext as a model and with the
- * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
- * of the coroutines and see pth_mctx.c (from the pth project) for the
- * sigaltstack way of manipulating stacks.
- */
-
- co = g_malloc0(sizeof(*co));
- co->stack = g_malloc(stack_size);
- co->base.entry_arg = &old_env; /* stash away our jmp_buf */
-
- coTS = coroutine_get_thread_state();
- coTS->tr_handler = co;
-
- /*
- * Preserve the SIGUSR2 signal state, block SIGUSR2,
- * and establish our signal handler. The signal will
- * later transfer control onto the signal stack.
- */
- sigemptyset(&sigs);
- sigaddset(&sigs, SIGUSR2);
- pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
- sa.sa_handler = coroutine_trampoline;
- sigfillset(&sa.sa_mask);
- sa.sa_flags = SA_ONSTACK;
- if (sigaction(SIGUSR2, &sa, &osa) != 0) {
- abort();
- }
-
- /*
- * Set the new stack.
- */
- ss.ss_sp = co->stack;
- ss.ss_size = stack_size;
- ss.ss_flags = 0;
- if (sigaltstack(&ss, &oss) < 0) {
- abort();
- }
-
- /*
- * Now transfer control onto the signal stack and set it up.
- * It will return immediately via "return" after the sigsetjmp()
- * was performed. Be careful here with race conditions. The
- * signal can be delivered the first time sigsuspend() is
- * called.
- */
- coTS->tr_called = 0;
- pthread_kill(pthread_self(), SIGUSR2);
- sigfillset(&sigs);
- sigdelset(&sigs, SIGUSR2);
- while (!coTS->tr_called) {
- sigsuspend(&sigs);
- }
-
- /*
- * Inform the system that we are back off the signal stack by
- * removing the alternative signal stack. Be careful here: It
- * first has to be disabled, before it can be removed.
- */
- sigaltstack(NULL, &ss);
- ss.ss_flags = SS_DISABLE;
- if (sigaltstack(&ss, NULL) < 0) {
- abort();
- }
- sigaltstack(NULL, &ss);
- if (!(oss.ss_flags & SS_DISABLE)) {
- sigaltstack(&oss, NULL);
- }
-
- /*
- * Restore the old SIGUSR2 signal handler and mask
- */
- sigaction(SIGUSR2, &osa, NULL);
- pthread_sigmask(SIG_SETMASK, &osigs, NULL);
-
- /*
- * Now enter the trampoline again, but this time not as a signal
- * handler. Instead we jump into it directly. The functionally
- * redundant ping-pong pointer arithmetic is necessary to avoid
- * type-conversion warnings related to the `volatile' qualifier and
- * the fact that `jmp_buf' usually is an array type.
- */
- if (!sigsetjmp(old_env, 0)) {
- siglongjmp(coTS->tr_reenter, 1);
- }
-
- /*
- * Ok, we returned again, so now we're finished
- */
-
- return &co->base;
-}
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
- CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
-
- g_free(co->stack);
- g_free(co);
-}
-
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
- CoroutineAction action)
-{
- CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
- CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
- CoroutineThreadState *s = coroutine_get_thread_state();
- int ret;
-
- s->current = to_;
-
- ret = sigsetjmp(from->env, 0);
- if (ret == 0) {
- siglongjmp(to->env, action);
- }
- return ret;
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
- CoroutineThreadState *s = coroutine_get_thread_state();
-
- return s->current;
-}
-
-bool qemu_in_coroutine(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- return s && s->current->caller;
-}
-
diff --git a/coroutine-ucontext.c b/coroutine-ucontext.c
deleted file mode 100644
index 4bf2cde..0000000
--- a/coroutine-ucontext.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * ucontext coroutine initialization code
- *
- * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
- * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.0 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
-#ifdef _FORTIFY_SOURCE
-#undef _FORTIFY_SOURCE
-#endif
-#include <stdlib.h>
-#include <setjmp.h>
-#include <stdint.h>
-#include <pthread.h>
-#include <ucontext.h>
-#include "qemu-common.h"
-#include "block/coroutine_int.h"
-
-#ifdef CONFIG_VALGRIND_H
-#include <valgrind/valgrind.h>
-#endif
-
-typedef struct {
- Coroutine base;
- void *stack;
- sigjmp_buf env;
-
-#ifdef CONFIG_VALGRIND_H
- unsigned int valgrind_stack_id;
-#endif
-
-} CoroutineUContext;
-
-/**
- * Per-thread coroutine bookkeeping
- */
-typedef struct {
- /** Currently executing coroutine */
- Coroutine *current;
-
- /** The default coroutine */
- CoroutineUContext leader;
-} CoroutineThreadState;
-
-static pthread_key_t thread_state_key;
-
-/*
- * va_args to makecontext() must be type 'int', so passing
- * the pointer we need may require several int args. This
- * union is a quick hack to let us do that
- */
-union cc_arg {
- void *p;
- int i[2];
-};
-
-static CoroutineThreadState *coroutine_get_thread_state(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- if (!s) {
- s = g_malloc0(sizeof(*s));
- s->current = &s->leader.base;
- pthread_setspecific(thread_state_key, s);
- }
- return s;
-}
-
-static void qemu_coroutine_thread_cleanup(void *opaque)
-{
- CoroutineThreadState *s = opaque;
-
- g_free(s);
-}
-
-static void __attribute__((constructor)) coroutine_init(void)
-{
- int ret;
-
- ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
- if (ret != 0) {
- fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
- abort();
- }
-}
-
-static void coroutine_trampoline(int i0, int i1)
-{
- union cc_arg arg;
- CoroutineUContext *self;
- Coroutine *co;
-
- arg.i[0] = i0;
- arg.i[1] = i1;
- self = arg.p;
- co = &self->base;
-
- /* Initialize longjmp environment and switch back the caller */
- if (!sigsetjmp(self->env, 0)) {
- siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
- }
-
- while (true) {
- co->entry(co->entry_arg);
- qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
- }
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
- const size_t stack_size = 1 << 20;
- CoroutineUContext *co;
- ucontext_t old_uc, uc;
- sigjmp_buf old_env;
- union cc_arg arg = {0};
-
- /* The ucontext functions preserve signal masks which incurs a
- * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
- * preserve signal masks but only works on the current stack.
- * Since we need a way to create and switch to a new stack, use
- * the ucontext functions for that but sigsetjmp()/siglongjmp() for
- * everything else.
- */
-
- if (getcontext(&uc) == -1) {
- abort();
- }
-
- co = g_malloc0(sizeof(*co));
- co->stack = g_malloc(stack_size);
- co->base.entry_arg = &old_env; /* stash away our jmp_buf */
-
- uc.uc_link = &old_uc;
- uc.uc_stack.ss_sp = co->stack;
- uc.uc_stack.ss_size = stack_size;
- uc.uc_stack.ss_flags = 0;
-
-#ifdef CONFIG_VALGRIND_H
- co->valgrind_stack_id =
- VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
-#endif
-
- arg.p = co;
-
- makecontext(&uc, (void (*)(void))coroutine_trampoline,
- 2, arg.i[0], arg.i[1]);
-
- /* swapcontext() in, siglongjmp() back out */
- if (!sigsetjmp(old_env, 0)) {
- swapcontext(&old_uc, &uc);
- }
- return &co->base;
-}
-
-#ifdef CONFIG_VALGRIND_H
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-/* Work around an unused variable in the valgrind.h macro... */
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-static inline void valgrind_stack_deregister(CoroutineUContext *co)
-{
- VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
-}
-#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
-#pragma GCC diagnostic pop
-#endif
-#endif
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
- CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
-
-#ifdef CONFIG_VALGRIND_H
- valgrind_stack_deregister(co);
-#endif
-
- g_free(co->stack);
- g_free(co);
-}
-
-CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
- CoroutineAction action)
-{
- CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
- CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
- CoroutineThreadState *s = coroutine_get_thread_state();
- int ret;
-
- s->current = to_;
-
- ret = sigsetjmp(from->env, 0);
- if (ret == 0) {
- siglongjmp(to->env, action);
- }
- return ret;
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
- CoroutineThreadState *s = coroutine_get_thread_state();
-
- return s->current;
-}
-
-bool qemu_in_coroutine(void)
-{
- CoroutineThreadState *s = pthread_getspecific(thread_state_key);
-
- return s && s->current->caller;
-}
diff --git a/coroutine-win32.c b/coroutine-win32.c
deleted file mode 100644
index 17ace37..0000000
--- a/coroutine-win32.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Win32 coroutine initialization code
- *
- * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-#include "qemu-common.h"
-#include "block/coroutine_int.h"
-
-typedef struct
-{
- Coroutine base;
-
- LPVOID fiber;
- CoroutineAction action;
-} CoroutineWin32;
-
-static __thread CoroutineWin32 leader;
-static __thread Coroutine *current;
-
-/* This function is marked noinline to prevent GCC from inlining it
- * into coroutine_trampoline(). If we allow it to do that then it
- * hoists the code to get the address of the TLS variable "current"
- * out of the while() loop. This is an invalid transformation because
- * the SwitchToFiber() call may be called when running thread A but
- * return in thread B, and so we might be in a different thread
- * context each time round the loop.
- */
-CoroutineAction __attribute__((noinline))
-qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
- CoroutineAction action)
-{
- CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
- CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
-
- current = to_;
-
- to->action = action;
- SwitchToFiber(to->fiber);
- return from->action;
-}
-
-static void CALLBACK coroutine_trampoline(void *co_)
-{
- Coroutine *co = co_;
-
- while (true) {
- co->entry(co->entry_arg);
- qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
- }
-}
-
-Coroutine *qemu_coroutine_new(void)
-{
- const size_t stack_size = 1 << 20;
- CoroutineWin32 *co;
-
- co = g_malloc0(sizeof(*co));
- co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
- return &co->base;
-}
-
-void qemu_coroutine_delete(Coroutine *co_)
-{
- CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
-
- DeleteFiber(co->fiber);
- g_free(co);
-}
-
-Coroutine *qemu_coroutine_self(void)
-{
- if (!current) {
- current = &leader.base;
- leader.fiber = ConvertThreadToFiber(NULL);
- }
- return current;
-}
-
-bool qemu_in_coroutine(void)
-{
- return current && current->caller;
-}
diff --git a/cpu-exec-common.c b/cpu-exec-common.c
new file mode 100644
index 0000000..0cb4ae6
--- /dev/null
+++ b/cpu-exec-common.c
@@ -0,0 +1,79 @@
+/*
+ * emulator main execution loop
+ *
+ * Copyright (c) 2003-2005 Fabrice Bellard
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "sysemu/cpus.h"
+#include "exec/exec-all.h"
+#include "exec/memory-internal.h"
+
+bool exit_request;
+CPUState *tcg_current_cpu;
+
+/* exit the current TB, but without causing any exception to be raised */
+void cpu_loop_exit_noexc(CPUState *cpu)
+{
+ /* XXX: restore cpu registers saved in host registers */
+
+ cpu->exception_index = -1;
+ siglongjmp(cpu->jmp_env, 1);
+}
+
+#if defined(CONFIG_SOFTMMU)
+void cpu_reloading_memory_map(void)
+{
+ if (qemu_in_vcpu_thread()) {
+ /* The guest can in theory prolong the RCU critical section as long
+ * as it feels like. The major problem with this is that because it
+ * can do multiple reconfigurations of the memory map within the
+ * critical section, we could potentially accumulate an unbounded
+ * collection of memory data structures awaiting reclamation.
+ *
+ * Because the only thing we're currently protecting with RCU is the
+ * memory data structures, it's sufficient to break the critical section
+ * in this callback, which we know will get called every time the
+ * memory map is rearranged.
+ *
+ * (If we add anything else in the system that uses RCU to protect
+ * its data structures, we will need to implement some other mechanism
+ * to force TCG CPUs to exit the critical section, at which point this
+ * part of this callback might become unnecessary.)
+ *
+ * This pair matches cpu_exec's rcu_read_lock()/rcu_read_unlock(), which
+ * only protects cpu->as->dispatch. Since we know our caller is about
+ * to reload it, it's safe to split the critical section.
+ */
+ rcu_read_unlock();
+ rcu_read_lock();
+ }
+}
+#endif
+
+void cpu_loop_exit(CPUState *cpu)
+{
+ siglongjmp(cpu->jmp_env, 1);
+}
+
+void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
+{
+ if (pc) {
+ cpu_restore_state(cpu, pc);
+ }
+ siglongjmp(cpu->jmp_env, 1);
+}
diff --git a/cpu-exec.c b/cpu-exec.c
index 3f3529e..02e4c97 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -16,15 +16,26 @@
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "config.h"
+#include "qemu/osdep.h"
#include "cpu.h"
#include "trace.h"
#include "disas/disas.h"
+#include "exec/exec-all.h"
#include "tcg.h"
#include "qemu/atomic.h"
+#ifdef CONFIG_HAX
+#include "sysemu/hax.h"
+#endif /* CONFIG_HAX */
#include "sysemu/qtest.h"
#include "qemu/timer.h"
-#include "sysemu/hax.h"
+#include "exec/address-spaces.h"
+#include "qemu/rcu.h"
+#include "exec/tb-hash.h"
+#include "exec/log.h"
+#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
+#include "hw/i386/apic.h"
+#endif
+#include "sysemu/replay.h"
/* -icount align implementation. */
@@ -62,8 +73,7 @@
sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
if (nanosleep(&sleep_delay, &rem_delay) < 0) {
- sc->diff_clk -= (sleep_delay.tv_sec - rem_delay.tv_sec) * 1000000000LL;
- sc->diff_clk -= sleep_delay.tv_nsec - rem_delay.tv_nsec;
+ sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
} else {
sc->diff_clk = 0;
}
@@ -102,10 +112,8 @@
if (!icount_align_option) {
return;
}
- sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
- sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) -
- sc->realtime_clock +
- cpu_get_clock_offset();
+ sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
+ sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
if (sc->diff_clk < max_delay) {
max_delay = sc->diff_clk;
@@ -128,30 +136,18 @@
}
#endif /* CONFIG USER ONLY */
-void cpu_loop_exit(CPUState *cpu)
-{
- cpu->current_tb = NULL;
- siglongjmp(cpu->jmp_env, 1);
-}
-
-/* exit the current TB from a signal handler. The host registers are
- restored in a state compatible with the CPU emulator
- */
-#if defined(CONFIG_SOFTMMU)
-void cpu_resume_from_signal(CPUState *cpu, void *puc)
-{
- /* XXX: restore cpu registers saved in host registers */
-
- cpu->exception_index = -1;
- siglongjmp(cpu->jmp_env, 1);
-}
-#endif
-
/* Execute a TB, and fix up the CPU state afterwards if necessary */
-static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
+static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
{
CPUArchState *env = cpu->env_ptr;
- uintptr_t next_tb;
+ uintptr_t ret;
+ TranslationBlock *last_tb;
+ int tb_exit;
+ uint8_t *tb_ptr = itb->tc_ptr;
+
+ qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
+ "Trace %p [" TARGET_FMT_lx "] %s\n",
+ itb->tc_ptr, itb->pc, lookup_symbol(itb->pc));
#if defined(DEBUG_DISAS)
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)) {
@@ -169,136 +165,230 @@
}
#endif /* DEBUG_DISAS */
- next_tb = tcg_qemu_tb_exec(env, tb_ptr);
- trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
- next_tb & TB_EXIT_MASK);
+ cpu->can_do_io = !use_icount;
+ ret = tcg_qemu_tb_exec(env, tb_ptr);
+ cpu->can_do_io = 1;
+ last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
+ tb_exit = ret & TB_EXIT_MASK;
+ trace_exec_tb_exit(last_tb, tb_exit);
- if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
+ if (tb_exit > TB_EXIT_IDX1) {
/* We didn't start executing this TB (eg because the instruction
* counter hit zero); we must restore the guest PC to the address
* of the start of the TB.
*/
CPUClass *cc = CPU_GET_CLASS(cpu);
- TranslationBlock *tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
+ qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
+ "Stopped execution of TB chain before %p ["
+ TARGET_FMT_lx "] %s\n",
+ last_tb->tc_ptr, last_tb->pc,
+ lookup_symbol(last_tb->pc));
if (cc->synchronize_from_tb) {
- cc->synchronize_from_tb(cpu, tb);
+ cc->synchronize_from_tb(cpu, last_tb);
} else {
assert(cc->set_pc);
- cc->set_pc(cpu, tb->pc);
+ cc->set_pc(cpu, last_tb->pc);
}
}
- if ((next_tb & TB_EXIT_MASK) == TB_EXIT_REQUESTED) {
+ if (tb_exit == TB_EXIT_REQUESTED) {
/* We were asked to stop executing TBs (probably a pending
* interrupt. We've now stopped, so clear the flag.
*/
cpu->tcg_exit_req = 0;
}
- return next_tb;
+ return ret;
}
+#ifndef CONFIG_USER_ONLY
/* Execute the code without caching the generated code. An interpreter
could be used if available. */
-static void cpu_exec_nocache(CPUArchState *env, int max_cycles,
- TranslationBlock *orig_tb)
+static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
+ TranslationBlock *orig_tb, bool ignore_icount)
{
- CPUState *cpu = ENV_GET_CPU(env);
TranslationBlock *tb;
+ bool old_tb_flushed;
/* Should never happen.
We only end up here when an existing TB is too long. */
if (max_cycles > CF_COUNT_MASK)
max_cycles = CF_COUNT_MASK;
+ old_tb_flushed = cpu->tb_flushed;
+ cpu->tb_flushed = false;
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
- max_cycles);
- cpu->current_tb = tb;
+ max_cycles | CF_NOCACHE
+ | (ignore_icount ? CF_IGNORE_ICOUNT : 0));
+ tb->orig_tb = cpu->tb_flushed ? NULL : orig_tb;
+ cpu->tb_flushed |= old_tb_flushed;
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
- cpu_tb_exec(cpu, tb->tc_ptr);
- cpu->current_tb = NULL;
+ cpu_tb_exec(cpu, tb);
tb_phys_invalidate(tb, -1);
tb_free(tb);
}
+#endif
-static TranslationBlock *tb_find_slow(CPUArchState *env,
- target_ulong pc,
- target_ulong cs_base,
- uint64_t flags)
+struct tb_desc {
+ target_ulong pc;
+ target_ulong cs_base;
+ CPUArchState *env;
+ tb_page_addr_t phys_page1;
+ uint32_t flags;
+};
+
+static bool tb_cmp(const void *p, const void *d)
{
- CPUState *cpu = ENV_GET_CPU(env);
- TranslationBlock *tb, **ptb1;
- unsigned int h;
- tb_page_addr_t phys_pc, phys_page1;
- target_ulong virt_page2;
+ const TranslationBlock *tb = p;
+ const struct tb_desc *desc = d;
- tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+ if (tb->pc == desc->pc &&
+ tb->page_addr[0] == desc->phys_page1 &&
+ tb->cs_base == desc->cs_base &&
+ tb->flags == desc->flags) {
+ /* check next page if needed */
+ if (tb->page_addr[1] == -1) {
+ return true;
+ } else {
+ tb_page_addr_t phys_page2;
+ target_ulong virt_page2;
- /* find translated block using physical mappings */
- phys_pc = get_page_addr_code(env, pc);
- phys_page1 = phys_pc & TARGET_PAGE_MASK;
- h = tb_phys_hash_func(phys_pc);
- ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
- for(;;) {
- tb = *ptb1;
- if (!tb)
- goto not_found;
- if (tb->pc == pc &&
- tb->page_addr[0] == phys_page1 &&
- tb->cs_base == cs_base &&
- tb->flags == flags) {
- /* check next page if needed */
- if (tb->page_addr[1] != -1) {
- tb_page_addr_t phys_page2;
-
- virt_page2 = (pc & TARGET_PAGE_MASK) +
- TARGET_PAGE_SIZE;
- phys_page2 = get_page_addr_code(env, virt_page2);
- if (tb->page_addr[1] == phys_page2)
- goto found;
- } else {
- goto found;
+ virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
+ phys_page2 = get_page_addr_code(desc->env, virt_page2);
+ if (tb->page_addr[1] == phys_page2) {
+ return true;
}
}
- ptb1 = &tb->phys_hash_next;
}
- not_found:
- /* if no translated code available, then translate it now */
+ return false;
+}
+
+static TranslationBlock *tb_find_physical(CPUState *cpu,
+ target_ulong pc,
+ target_ulong cs_base,
+ uint32_t flags)
+{
+ tb_page_addr_t phys_pc;
+ struct tb_desc desc;
+ uint32_t h;
+
+ desc.env = (CPUArchState *)cpu->env_ptr;
+ desc.cs_base = cs_base;
+ desc.flags = flags;
+ desc.pc = pc;
+ phys_pc = get_page_addr_code(desc.env, pc);
+ desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
+ h = tb_hash_func(phys_pc, pc, flags);
+ return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
+}
+
+static TranslationBlock *tb_find_slow(CPUState *cpu,
+ target_ulong pc,
+ target_ulong cs_base,
+ uint32_t flags)
+{
+ TranslationBlock *tb;
+
+ tb = tb_find_physical(cpu, pc, cs_base, flags);
+ if (tb) {
+ goto found;
+ }
+
+#ifdef CONFIG_USER_ONLY
+ /* mmap_lock is needed by tb_gen_code, and mmap_lock must be
+ * taken outside tb_lock. Since we're momentarily dropping
+ * tb_lock, there's a chance that our desired tb has been
+ * translated.
+ */
+ tb_unlock();
+ mmap_lock();
+ tb_lock();
+ tb = tb_find_physical(cpu, pc, cs_base, flags);
+ if (tb) {
+ mmap_unlock();
+ goto found;
+ }
+#endif
+
+ /* if no translated code available, then translate it now */
tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
- found:
- /* Move the last found TB to the head of the list */
- if (likely(*ptb1)) {
- *ptb1 = tb->phys_hash_next;
- tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
- tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
- }
+#ifdef CONFIG_USER_ONLY
+ mmap_unlock();
+#endif
+
+found:
/* we add the TB in the virtual pc hash table */
cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
return tb;
}
-static inline TranslationBlock *tb_find_fast(CPUArchState *env)
+static inline TranslationBlock *tb_find_fast(CPUState *cpu,
+ TranslationBlock **last_tb,
+ int tb_exit)
{
- CPUState *cpu = ENV_GET_CPU(env);
+ CPUArchState *env = (CPUArchState *)cpu->env_ptr;
TranslationBlock *tb;
target_ulong cs_base, pc;
- int flags;
+ uint32_t flags;
/* we record a subset of the CPU state. It will
always be the same before a given translated block
is executed. */
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
+ tb_lock();
tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
tb->flags != flags)) {
- tb = tb_find_slow(env, pc, cs_base, flags);
+ tb = tb_find_slow(cpu, pc, cs_base, flags);
}
+ if (cpu->tb_flushed) {
+ /* Ensure that no TB jump will be modified as the
+ * translation buffer has been flushed.
+ */
+ *last_tb = NULL;
+ cpu->tb_flushed = false;
+ }
+#ifndef CONFIG_USER_ONLY
+ /* We don't take care of direct jumps when address mapping changes in
+ * system emulation. So it's not safe to make a direct jump to a TB
+ * spanning two pages because the mapping for the second page can change.
+ */
+ if (tb->page_addr[1] != -1) {
+ *last_tb = NULL;
+ }
+#endif
+ /* See if we can patch the calling TB. */
+ if (*last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
+ tb_add_jump(*last_tb, tb_exit, tb);
+ }
+ tb_unlock();
return tb;
}
-static void cpu_handle_debug_exception(CPUArchState *env)
+static inline bool cpu_handle_halt(CPUState *cpu)
{
- CPUState *cpu = ENV_GET_CPU(env);
+ if (cpu->halted) {
+#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
+ if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
+ && replay_interrupt()) {
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ apic_poll_irq(x86_cpu->apic_state);
+ cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
+ }
+#endif
+ if (!cpu_has_work(cpu)) {
+ current_cpu = NULL;
+ return true;
+ }
+
+ cpu->halted = 0;
+ }
+
+ return false;
+}
+
+static inline void cpu_handle_debug_exception(CPUState *cpu)
+{
CPUClass *cc = CPU_GET_CLASS(cpu);
CPUWatchpoint *wp;
@@ -311,67 +401,223 @@
cc->debug_excp_handler(cpu);
}
-/* main execution loop */
-
-volatile sig_atomic_t exit_request;
-
-/*
- * QEMU emulate can happens because of MMIO or emulation mode, i.e. non-PG mode,
- * when it's because of MMIO, the MMIO, the interrupt should not be emulated,
- * because MMIO is emulated for only one instruction now and then back to
- * HAX kernel
- */
-static int need_handle_intr_request(CPUState *cpu)
+static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
{
-#ifdef CONFIG_HAX
- if (!hax_enabled() || hax_vcpu_emulation_mode(cpu))
- return cpu->interrupt_request;
- return 0;
+ if (cpu->exception_index >= 0) {
+ if (cpu->exception_index >= EXCP_INTERRUPT) {
+ /* exit request from the cpu execution loop */
+ *ret = cpu->exception_index;
+ if (*ret == EXCP_DEBUG) {
+ cpu_handle_debug_exception(cpu);
+ }
+ cpu->exception_index = -1;
+ return true;
+ } else {
+#if defined(CONFIG_USER_ONLY)
+ /* if user mode only, we simulate a fake exception
+ which will be handled outside the cpu execution
+ loop */
+#if defined(TARGET_I386)
+ CPUClass *cc = CPU_GET_CLASS(cpu);
+ cc->do_interrupt(cpu);
+#endif
+ *ret = cpu->exception_index;
+ cpu->exception_index = -1;
+ return true;
#else
- return cpu->interrupt_request;
+ if (replay_exception()) {
+ CPUClass *cc = CPU_GET_CLASS(cpu);
+ cc->do_interrupt(cpu);
+ cpu->exception_index = -1;
+ } else if (!replay_has_interrupt()) {
+ /* give a chance to iothread in replay mode */
+ *ret = EXCP_INTERRUPT;
+ return true;
+ }
#endif
-}
-
-int cpu_exec(CPUArchState *env)
-{
- CPUState *cpu = ENV_GET_CPU(env);
- CPUClass *cc = CPU_GET_CLASS(cpu);
-#ifdef TARGET_I386
- X86CPU *x86_cpu = X86_CPU(cpu);
-#endif
- int ret, interrupt_request;
- TranslationBlock *tb;
- uint8_t *tc_ptr;
- uintptr_t next_tb;
- SyncClocks sc;
-
- /* This must be volatile so it is not trashed by longjmp() */
- volatile bool have_tb_lock = false;
-
- if (cpu->halted) {
- if (!cpu_has_work(cpu)) {
- return EXCP_HALTED;
}
-
- cpu->halted = 0;
+#ifndef CONFIG_USER_ONLY
+ } else if (replay_has_exception()
+ && cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
+ /* try to cause an exception pending in the log */
+ TranslationBlock *last_tb = NULL; /* Avoid chaining TBs */
+ cpu_exec_nocache(cpu, 1, tb_find_fast(cpu, &last_tb, 0), true);
+ *ret = -1;
+ return true;
+#endif
}
+ return false;
+}
+
+static inline int cpu_get_interrupt_request(CPUState *cpu)
+{
+#ifdef CONFIG_HAX
+ /* When HAX is enabled, there are two cases where TCG emulation might happen:
+ * MMIO instructions, or non-paged mode. When this is due to an MMIO, the interrupt
+ * should not be emulated because only one instruction will be translated and run
+ * through TCG before returning to the HAX kernel.
+ */
+ if (hax_enabled() && !hax_vcpu_emulation_mode(cpu)) {
+ /* Mask interrupt during MMIO emulation. */
+ return 0;
+ }
+#endif
+ return cpu->interrupt_request;
+}
+
+static inline void cpu_handle_interrupt(CPUState *cpu,
+ TranslationBlock **last_tb)
+{
+ CPUClass *cc = CPU_GET_CLASS(cpu);
+ int interrupt_request = cpu_get_interrupt_request(cpu);
+
+ if (unlikely(interrupt_request)) {
+ if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
+ /* Mask out external interrupts for this step. */
+ interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
+ }
+ if (interrupt_request & CPU_INTERRUPT_DEBUG) {
+ cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
+ cpu->exception_index = EXCP_DEBUG;
+ cpu_loop_exit(cpu);
+ }
+ if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
+ /* Do nothing */
+ } else if (interrupt_request & CPU_INTERRUPT_HALT) {
+ replay_interrupt();
+ cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
+ cpu->halted = 1;
+ cpu->exception_index = EXCP_HLT;
+ cpu_loop_exit(cpu);
+ }
+#if defined(TARGET_I386)
+ else if (interrupt_request & CPU_INTERRUPT_INIT) {
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUArchState *env = &x86_cpu->env;
+ replay_interrupt();
+ cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
+ do_cpu_init(x86_cpu);
+ cpu->exception_index = EXCP_HALTED;
+ cpu_loop_exit(cpu);
+ }
+#else
+ else if (interrupt_request & CPU_INTERRUPT_RESET) {
+ replay_interrupt();
+ cpu_reset(cpu);
+ cpu_loop_exit(cpu);
+ }
+#endif
+ /* The target hook has 3 exit conditions:
+ False when the interrupt isn't processed,
+ True when it is, and we should restart on a new TB,
+ and via longjmp via cpu_loop_exit. */
+ else {
+ replay_interrupt();
+ if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
+ *last_tb = NULL;
+ }
+ /* The target hook may have updated the 'cpu->interrupt_request';
+ * reload the 'interrupt_request' value */
+ interrupt_request = cpu->interrupt_request;
+ }
+ if (interrupt_request & CPU_INTERRUPT_EXITTB) {
+ cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
+ /* ensure that no TB jump will be modified as
+ the program flow was changed */
+ *last_tb = NULL;
+ }
+ }
+ if (unlikely(cpu->exit_request || replay_has_interrupt())) {
+ cpu->exit_request = 0;
+ cpu->exception_index = EXCP_INTERRUPT;
+ cpu_loop_exit(cpu);
+ }
+}
+
+static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
+ TranslationBlock **last_tb, int *tb_exit,
+ SyncClocks *sc)
+{
+ uintptr_t ret;
+
+ if (unlikely(cpu->exit_request)) {
+ return;
+ }
+
+ trace_exec_tb(tb, tb->pc);
+ ret = cpu_tb_exec(cpu, tb);
+ *last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
+ *tb_exit = ret & TB_EXIT_MASK;
+ switch (*tb_exit) {
+ case TB_EXIT_REQUESTED:
+ /* Something asked us to stop executing
+ * chained TBs; just continue round the main
+ * loop. Whatever requested the exit will also
+ * have set something else (eg exit_request or
+ * interrupt_request) which we will handle
+ * next time around the loop. But we need to
+ * ensure the tcg_exit_req read in generated code
+ * comes before the next read of cpu->exit_request
+ * or cpu->interrupt_request.
+ */
+ smp_rmb();
+ *last_tb = NULL;
+ break;
+ case TB_EXIT_ICOUNT_EXPIRED:
+ {
+ /* Instruction counter expired. */
+#ifdef CONFIG_USER_ONLY
+ abort();
+#else
+ int insns_left = cpu->icount_decr.u32;
+ if (cpu->icount_extra && insns_left >= 0) {
+ /* Refill decrementer and continue execution. */
+ cpu->icount_extra += insns_left;
+ insns_left = MIN(0xffff, cpu->icount_extra);
+ cpu->icount_extra -= insns_left;
+ cpu->icount_decr.u16.low = insns_left;
+ } else {
+ if (insns_left > 0) {
+ /* Execute remaining instructions. */
+ cpu_exec_nocache(cpu, insns_left, *last_tb, false);
+ align_clocks(sc, cpu);
+ }
+ cpu->exception_index = EXCP_INTERRUPT;
+ *last_tb = NULL;
+ cpu_loop_exit(cpu);
+ }
+ break;
+#endif
+ }
+ default:
+ break;
+ }
+}
+
+/* main execution loop */
+
+int cpu_exec(CPUState *cpu)
+{
+ CPUClass *cc = CPU_GET_CLASS(cpu);
+ int ret;
+ SyncClocks sc = {};
+
+ /* replay_interrupt may need current_cpu */
current_cpu = cpu;
- /* As long as current_cpu is null, up to the assignment just above,
- * requests by other threads to exit the execution loop are expected to
- * be issued using the exit_request global. We must make sure that our
- * evaluation of the global value is performed past the current_cpu
- * value transition point, which requires a memory barrier as well as
- * an instruction scheduling constraint on modern architectures. */
- smp_mb();
+ if (cpu_handle_halt(cpu)) {
+ return EXCP_HALTED;
+ }
- if (unlikely(exit_request)) {
+ atomic_mb_set(&tcg_current_cpu, cpu);
+ rcu_read_lock();
+
+ if (unlikely(atomic_mb_read(&exit_request))) {
cpu->exit_request = 1;
}
cc->cpu_exec_enter(cpu);
- cpu->exception_index = -1;
/* Calculate difference between guest clock and host clock.
* This delay includes the delay of the last cycle, so
@@ -380,204 +626,75 @@
*/
init_delay_params(&sc, cpu);
- /* prepare setjmp context for exception handling */
for(;;) {
+ /* prepare setjmp context for exception handling */
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
+ TranslationBlock *tb, *last_tb = NULL;
+ int tb_exit = 0;
+
/* if an exception is pending, we execute it here */
- if (cpu->exception_index >= 0) {
- if (cpu->exception_index >= EXCP_INTERRUPT) {
- /* exit request from the cpu execution loop */
- ret = cpu->exception_index;
- if (ret == EXCP_DEBUG) {
- cpu_handle_debug_exception(env);
- }
- break;
- } else {
-#if defined(CONFIG_USER_ONLY)
- /* if user mode only, we simulate a fake exception
- which will be handled outside the cpu execution
- loop */
-#if defined(TARGET_I386)
- cc->do_interrupt(cpu);
-#endif
- ret = cpu->exception_index;
- break;
-#else
- cc->do_interrupt(cpu);
- cpu->exception_index = -1;
-#endif
- }
+ if (cpu_handle_exception(cpu, &ret)) {
+ break;
}
#ifdef CONFIG_HAX
- if (hax_enabled() && !hax_vcpu_exec(cpu))
- longjmp(cpu->jmp_env, 1);
-#endif
+ /* When HAX is enabled but VMX "unrestricted guest" mode is not
+ * supported, call hax_vcpu_exec() to run the current instructions.
+ * The function returns 1 when execution should stop immediately
+ * (e.g. if the vCPU is halted, or received an interrupt). However,
+ * it will return 0 to indicate that the next instructions need to
+ * be handled through TCG. This happens when the virtual CPU runs
+ * in "real mode", or to handle MMIO operations only. */
+ if (hax_enabled() && !hax_vcpu_exec(cpu)) {
+ break;
+ }
+#endif /* CONFIG_HAX */
- next_tb = 0; /* force lookup of first TB */
+ cpu->tb_flushed = false; /* reset before first TB lookup */
for(;;) {
- interrupt_request = need_handle_intr_request(cpu);
- if (unlikely(interrupt_request)) {
- if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
- /* Mask out external interrupts for this step. */
- interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
- }
- if (interrupt_request & CPU_INTERRUPT_DEBUG) {
- cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
- cpu->exception_index = EXCP_DEBUG;
- cpu_loop_exit(cpu);
- }
- if (interrupt_request & CPU_INTERRUPT_HALT) {
- cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
- cpu->halted = 1;
- cpu->exception_index = EXCP_HLT;
- cpu_loop_exit(cpu);
- }
-#if defined(TARGET_I386)
- if (interrupt_request & CPU_INTERRUPT_INIT) {
- cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
- do_cpu_init(x86_cpu);
- cpu->exception_index = EXCP_HALTED;
- cpu_loop_exit(cpu);
- }
-#else
- if (interrupt_request & CPU_INTERRUPT_RESET) {
- cpu_reset(cpu);
- }
-#endif
- /* The target hook has 3 exit conditions:
- False when the interrupt isn't processed,
- True when it is, and we should restart on a new TB,
- and via longjmp via cpu_loop_exit. */
- if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
- next_tb = 0;
- }
- /* Don't use the cached interrupt_request value,
- do_interrupt may have updated the EXITTB flag. */
- if (cpu->interrupt_request & CPU_INTERRUPT_EXITTB) {
- cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
- /* ensure that no TB jump will be modified as
- the program flow was changed */
- next_tb = 0;
- }
- }
- if (unlikely(cpu->exit_request)) {
- cpu->exit_request = 0;
- cpu->exception_index = EXCP_INTERRUPT;
- cpu_loop_exit(cpu);
- }
- spin_lock(&tcg_ctx.tb_ctx.tb_lock);
- have_tb_lock = true;
- tb = tb_find_fast(env);
- /* Note: we do it here to avoid a gcc bug on Mac OS X when
- doing it in tb_find_slow */
- if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
- /* as some TB could have been invalidated because
- of memory exceptions while generating the code, we
- must recompute the hash index here */
- next_tb = 0;
- tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
- }
- if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
- qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
- tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
- }
- /* see if we can patch the calling TB. When the TB
- spans two pages, we cannot safely do a direct
- jump. */
- if (next_tb != 0 && tb->page_addr[1] == -1) {
- tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
- next_tb & TB_EXIT_MASK, tb);
- }
- have_tb_lock = false;
- spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
-
- /* cpu_interrupt might be called while translating the
- TB, but before it is linked into a potentially
- infinite loop and becomes env->current_tb. Avoid
- starting execution if there is a pending interrupt. */
- cpu->current_tb = tb;
- barrier();
- if (likely(!cpu->exit_request)) {
- trace_exec_tb(tb, tb->pc);
- tc_ptr = tb->tc_ptr;
- /* execute the generated code */
- next_tb = cpu_tb_exec(cpu, tc_ptr);
- switch (next_tb & TB_EXIT_MASK) {
- case TB_EXIT_REQUESTED:
- /* Something asked us to stop executing
- * chained TBs; just continue round the main
- * loop. Whatever requested the exit will also
- * have set something else (eg exit_request or
- * interrupt_request) which we will handle
- * next time around the loop.
- */
- tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
- next_tb = 0;
- break;
- case TB_EXIT_ICOUNT_EXPIRED:
- {
- /* Instruction counter expired. */
- int insns_left;
- tb = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK);
- insns_left = cpu->icount_decr.u32;
- if (cpu->icount_extra && insns_left >= 0) {
- /* Refill decrementer and continue execution. */
- cpu->icount_extra += insns_left;
- if (cpu->icount_extra > 0xffff) {
- insns_left = 0xffff;
- } else {
- insns_left = cpu->icount_extra;
- }
- cpu->icount_extra -= insns_left;
- cpu->icount_decr.u16.low = insns_left;
- } else {
- if (insns_left > 0) {
- /* Execute remaining instructions. */
- cpu_exec_nocache(env, insns_left, tb);
- align_clocks(&sc, cpu);
- }
- cpu->exception_index = EXCP_INTERRUPT;
- next_tb = 0;
- cpu_loop_exit(cpu);
- }
- break;
- }
- default:
- break;
- }
- }
- cpu->current_tb = NULL;
+ cpu_handle_interrupt(cpu, &last_tb);
+ tb = tb_find_fast(cpu, &last_tb, tb_exit);
+ cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit, &sc);
#ifdef CONFIG_HAX
- if (hax_enabled() && hax_stop_emulation(cpu))
+ if (hax_enabled() && hax_stop_emulation(cpu)) {
+ /* This will end TCG emulation of instructions if the vCPU
+ * just switched to paged-mode (which can be handled by
+ * hax_vcpu_exec() in the next call to this function), or
+ * if the single-instruction MMIO operation has completed.
+ * (see target-i386/translate.c).
+ */
cpu_loop_exit(cpu);
-#endif
-
+ }
+#endif /* CONFIG_HAX */
/* Try to align the host and virtual clocks
if the guest is in advance */
align_clocks(&sc, cpu);
- /* reset soft MMU for next block (it can currently
- only be set by a memory fault) */
} /* for(;;) */
} else {
- /* Reload env after longjmp - the compiler may have smashed all
- * local variables as longjmp is marked 'noreturn'. */
+#if defined(__clang__) || !QEMU_GNUC_PREREQ(4, 6)
+ /* Some compilers wrongly smash all local variables after
+ * siglongjmp. There were bug reports for gcc 4.5.0 and clang.
+ * Reload essential local variables here for those compilers.
+ * Newer versions of gcc would complain about this code (-Wclobbered). */
cpu = current_cpu;
- env = cpu->env_ptr;
cc = CPU_GET_CLASS(cpu);
-#ifdef TARGET_I386
- x86_cpu = X86_CPU(cpu);
-#endif
- if (have_tb_lock) {
- spin_unlock(&tcg_ctx.tb_ctx.tb_lock);
- have_tb_lock = false;
- }
+#else /* buggy compiler */
+ /* Assert that the compiler does not smash local variables. */
+ g_assert(cpu == current_cpu);
+ g_assert(cc == CPU_GET_CLASS(cpu));
+#endif /* buggy compiler */
+ cpu->can_do_io = 1;
+ tb_lock_reset();
}
} /* for(;;) */
cc->cpu_exec_exit(cpu);
+ rcu_read_unlock();
/* fail safe : never use current_cpu outside cpu_exec() */
current_cpu = NULL;
+
+ /* Does not need atomic_mb_set because a spurious wakeup is okay. */
+ atomic_set(&tcg_current_cpu, NULL);
return ret;
}
diff --git a/cpus.c b/cpus.c
index a1df372..a2be482 100644
--- a/cpus.c
+++ b/cpus.c
@@ -23,16 +23,20 @@
*/
/* Needed early for CONFIG_BSD etc. */
-#include "config-host.h"
-
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "cpu.h"
#include "monitor/monitor.h"
#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
#include "sysemu/sysemu.h"
+#include "sysemu/block-backend.h"
#include "exec/gdbstub.h"
#include "sysemu/dma.h"
#include "sysemu/kvm.h"
#include "sysemu/hax.h"
#include "qmp-commands.h"
+#include "exec/exec-all.h"
#include "qemu/thread.h"
#include "sysemu/cpus.h"
@@ -42,6 +46,7 @@
#include "qemu/seqlock.h"
#include "qapi-event.h"
#include "hw/nmi.h"
+#include "sysemu/replay.h"
#ifndef _WIN32
#include "qemu/compatfd.h"
@@ -69,6 +74,14 @@
int64_t max_delay;
int64_t max_advance;
+/* vcpu throttling controls */
+static QEMUTimer *throttle_timer;
+static unsigned int throttle_percentage;
+
+#define CPU_THROTTLE_PCT_MIN 1
+#define CPU_THROTTLE_PCT_MAX 99
+#define CPU_THROTTLE_TIMESLICE_NS 10000000
+
bool cpu_is_stopped(CPUState *cpu)
{
return cpu->stopped || !runstate_is_running();
@@ -106,6 +119,7 @@
/* Protected by TimersState seqlock */
+static bool icount_sleep = true;
static int64_t vm_clock_warp_start = -1;
/* Conversion factor from emulated instructions to virtual clock ticks. */
static int icount_time_shift;
@@ -137,19 +151,26 @@
static TimersState timers_state;
-/* Return the virtual CPU time, based on the instruction counter. */
-static int64_t cpu_get_icount_locked(void)
+int64_t cpu_get_icount_raw(void)
{
int64_t icount;
CPUState *cpu = current_cpu;
icount = timers_state.qemu_icount;
if (cpu) {
- if (!cpu_can_do_io(cpu)) {
- fprintf(stderr, "Bad clock read\n");
+ if (!cpu->can_do_io) {
+ fprintf(stderr, "Bad icount read\n");
+ exit(1);
}
icount -= (cpu->icount_decr.u16.low + cpu->icount_extra);
}
+ return icount;
+}
+
+/* Return the virtual CPU time, based on the instruction counter. */
+static int64_t cpu_get_icount_locked(void)
+{
+ int64_t icount = cpu_get_icount_raw();
return timers_state.qemu_icount_bias + cpu_icount_to_ns(icount);
}
@@ -183,7 +204,7 @@
ticks = timers_state.cpu_ticks_offset;
if (timers_state.cpu_ticks_enabled) {
- ticks += cpu_get_real_ticks();
+ ticks += cpu_get_host_ticks();
}
if (timers_state.cpu_ticks_prev > ticks) {
@@ -223,36 +244,19 @@
return ti;
}
-/* return the offset between the host clock and virtual CPU clock */
-int64_t cpu_get_clock_offset(void)
-{
- int64_t ti;
- unsigned start;
-
- do {
- start = seqlock_read_begin(&timers_state.vm_clock_seqlock);
- ti = timers_state.cpu_clock_offset;
- if (!timers_state.cpu_ticks_enabled) {
- ti -= get_clock();
- }
- } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, start));
-
- return -ti;
-}
-
/* enable cpu_get_ticks()
* Caller must hold BQL which server as mutex for vm_clock_seqlock.
*/
void cpu_enable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (!timers_state.cpu_ticks_enabled) {
- timers_state.cpu_ticks_offset -= cpu_get_real_ticks();
+ timers_state.cpu_ticks_offset -= cpu_get_host_ticks();
timers_state.cpu_clock_offset -= get_clock();
timers_state.cpu_ticks_enabled = 1;
}
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* disable cpu_get_ticks() : the clock is stopped. You must not call
@@ -262,20 +266,20 @@
void cpu_disable_ticks(void)
{
/* Here, the really thing protected by seqlock is cpu_clock_offset. */
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (timers_state.cpu_ticks_enabled) {
- timers_state.cpu_ticks_offset += cpu_get_real_ticks();
+ timers_state.cpu_ticks_offset += cpu_get_host_ticks();
timers_state.cpu_clock_offset = cpu_get_clock_locked();
timers_state.cpu_ticks_enabled = 0;
}
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
}
/* Correlation between real and virtual time is always going to be
fairly approximate, so ignore small variation.
When the guest is idle real and virtual time will be aligned in
the IO wait loop. */
-#define ICOUNT_WOBBLE (get_ticks_per_sec() / 10)
+#define ICOUNT_WOBBLE (NANOSECONDS_PER_SECOND / 10)
static void icount_adjust(void)
{
@@ -291,7 +295,7 @@
return;
}
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
cur_time = cpu_get_clock_locked();
cur_icount = cpu_get_icount_locked();
@@ -312,13 +316,13 @@
last_delta = delta;
timers_state.qemu_icount_bias = cur_icount
- (timers_state.qemu_icount << icount_time_shift);
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
}
static void icount_adjust_rt(void *opaque)
{
timer_mod(icount_rt_timer,
- qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
icount_adjust();
}
@@ -326,7 +330,7 @@
{
timer_mod(icount_vm_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- get_ticks_per_sec() / 10);
+ NANOSECONDS_PER_SECOND / 10);
icount_adjust();
}
@@ -335,18 +339,27 @@
return (count + (1 << icount_time_shift) - 1) >> icount_time_shift;
}
-static void icount_warp_rt(void *opaque)
+static void icount_warp_rt(void)
{
+ unsigned seq;
+ int64_t warp_start;
+
/* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
* changes from -1 to another value, so the race here is okay.
*/
- if (atomic_read(&vm_clock_warp_start) == -1) {
+ do {
+ seq = seqlock_read_begin(&timers_state.vm_clock_seqlock);
+ warp_start = vm_clock_warp_start;
+ } while (seqlock_read_retry(&timers_state.vm_clock_seqlock, seq));
+
+ if (warp_start == -1) {
return;
}
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
if (runstate_is_running()) {
- int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ int64_t clock = REPLAY_CLOCK(REPLAY_CLOCK_VIRTUAL_RT,
+ cpu_get_clock_locked());
int64_t warp_delta;
warp_delta = clock - vm_clock_warp_start;
@@ -355,74 +368,88 @@
* In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
* far ahead of real time.
*/
- int64_t cur_time = cpu_get_clock_locked();
int64_t cur_icount = cpu_get_icount_locked();
- int64_t delta = cur_time - cur_icount;
+ int64_t delta = clock - cur_icount;
warp_delta = MIN(warp_delta, delta);
}
timers_state.qemu_icount_bias += warp_delta;
}
vm_clock_warp_start = -1;
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
}
+static void icount_timer_cb(void *opaque)
+{
+ /* No need for a checkpoint because the timer already synchronizes
+ * with CHECKPOINT_CLOCK_VIRTUAL_RT.
+ */
+ icount_warp_rt();
+}
+
void qtest_clock_warp(int64_t dest)
{
int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+ AioContext *aio_context;
assert(qtest_enabled());
+ aio_context = qemu_get_aio_context();
while (clock < dest) {
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
int64_t warp = qemu_soonest_timeout(dest - clock, deadline);
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
+
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
timers_state.qemu_icount_bias += warp;
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
+ timerlist_run_timers(aio_context->tlg.tl[QEMU_CLOCK_VIRTUAL]);
clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
}
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
-void qemu_clock_warp(QEMUClockType type)
+void qemu_start_warp_timer(void)
{
int64_t clock;
int64_t deadline;
- /*
- * There are too many global variables to make the "warp" behavior
- * applicable to other clocks. But a clock argument removes the
- * need for if statements all over the place.
- */
- if (type != QEMU_CLOCK_VIRTUAL || !use_icount) {
+ if (!use_icount) {
return;
}
- /*
- * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now.
- * This ensures that the deadline for the timer is computed correctly below.
- * This also makes sure that the insn counter is synchronized before the
- * CPU starts running, in case the CPU is woken by an event other than
- * the earliest QEMU_CLOCK_VIRTUAL timer.
+ /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
+ * do not fire, so computing the deadline does not make sense.
*/
- icount_warp_rt(NULL);
- timer_del(icount_warp_timer);
+ if (!runstate_is_running()) {
+ return;
+ }
+
+ /* warp clock deterministically in record/replay mode */
+ if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_START)) {
+ return;
+ }
+
if (!all_cpu_threads_idle()) {
return;
}
if (qtest_enabled()) {
/* When testing, qtest commands advance icount. */
- return;
+ return;
}
/* We want to use the earliest deadline from ALL vm_clocks */
- clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+ clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
if (deadline < 0) {
+ static bool notified;
+ if (!icount_sleep && !notified) {
+ error_report("WARNING: icount sleep disabled and no active timers");
+ notified = true;
+ }
return;
}
@@ -433,28 +460,62 @@
* interrupt to wake it up, but the interrupt never comes because
* the vCPU isn't running any insns and thus doesn't advance the
* QEMU_CLOCK_VIRTUAL.
- *
- * An extreme solution for this problem would be to never let VCPUs
- * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL
- * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL
- * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL
- * after some e"real" time, (related to the time left until the next
- * event) has passed. The QEMU_CLOCK_REALTIME timer will do this.
- * This avoids that the warps are visible externally; for example,
- * you will not be sending network packets continuously instead of
- * every 100ms.
*/
- seqlock_write_lock(&timers_state.vm_clock_seqlock);
- if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
- vm_clock_warp_start = clock;
+ if (!icount_sleep) {
+ /*
+ * We never let VCPUs sleep in no sleep icount mode.
+ * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance
+ * to the next QEMU_CLOCK_VIRTUAL event and notify it.
+ * It is useful when we want a deterministic execution time,
+ * isolated from host latencies.
+ */
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ timers_state.qemu_icount_bias += deadline;
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
+ qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+ } else {
+ /*
+ * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some
+ * "real" time, (related to the time left until the next event) has
+ * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this.
+ * This avoids that the warps are visible externally; for example,
+ * you will not be sending network packets continuously instead of
+ * every 100ms.
+ */
+ seqlock_write_begin(&timers_state.vm_clock_seqlock);
+ if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) {
+ vm_clock_warp_start = clock;
+ }
+ seqlock_write_end(&timers_state.vm_clock_seqlock);
+ timer_mod_anticipate(icount_warp_timer, clock + deadline);
}
- seqlock_write_unlock(&timers_state.vm_clock_seqlock);
- timer_mod_anticipate(icount_warp_timer, clock + deadline);
} else if (deadline == 0) {
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
}
+static void qemu_account_warp_timer(void)
+{
+ if (!use_icount || !icount_sleep) {
+ return;
+ }
+
+ /* Nothing to do if the VM is stopped: QEMU_CLOCK_VIRTUAL timers
+ * do not fire, so computing the deadline does not make sense.
+ */
+ if (!runstate_is_running()) {
+ return;
+ }
+
+ /* warp clock deterministically in record/replay mode */
+ if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP_ACCOUNT)) {
+ return;
+ }
+
+ timer_del(icount_warp_timer);
+ icount_warp_rt();
+}
+
static bool icount_state_needed(void *opaque)
{
return use_icount;
@@ -467,6 +528,7 @@
.name = "timer/icount",
.version_id = 1,
.minimum_version_id = 1,
+ .needed = icount_state_needed,
.fields = (VMStateField[]) {
VMSTATE_INT64(qemu_icount_bias, TimersState),
VMSTATE_INT64(qemu_icount, TimersState),
@@ -484,20 +546,86 @@
VMSTATE_INT64_V(cpu_clock_offset, TimersState, 2),
VMSTATE_END_OF_LIST()
},
- .subsections = (VMStateSubsection[]) {
- {
- .vmsd = &icount_vmstate_timers,
- .needed = icount_state_needed,
- }, {
- /* empty */
- }
+ .subsections = (const VMStateDescription*[]) {
+ &icount_vmstate_timers,
+ NULL
}
};
+static void cpu_throttle_thread(void *opaque)
+{
+ CPUState *cpu = opaque;
+ double pct;
+ double throttle_ratio;
+ long sleeptime_ns;
+
+ if (!cpu_throttle_get_percentage()) {
+ return;
+ }
+
+ pct = (double)cpu_throttle_get_percentage()/100;
+ throttle_ratio = pct / (1 - pct);
+ sleeptime_ns = (long)(throttle_ratio * CPU_THROTTLE_TIMESLICE_NS);
+
+ qemu_mutex_unlock_iothread();
+ atomic_set(&cpu->throttle_thread_scheduled, 0);
+ g_usleep(sleeptime_ns / 1000); /* Convert ns to us for usleep call */
+ qemu_mutex_lock_iothread();
+}
+
+static void cpu_throttle_timer_tick(void *opaque)
+{
+ CPUState *cpu;
+ double pct;
+
+ /* Stop the timer if needed */
+ if (!cpu_throttle_get_percentage()) {
+ return;
+ }
+ CPU_FOREACH(cpu) {
+ if (!atomic_xchg(&cpu->throttle_thread_scheduled, 1)) {
+ async_run_on_cpu(cpu, cpu_throttle_thread, cpu);
+ }
+ }
+
+ pct = (double)cpu_throttle_get_percentage()/100;
+ timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
+ CPU_THROTTLE_TIMESLICE_NS / (1-pct));
+}
+
+void cpu_throttle_set(int new_throttle_pct)
+{
+ /* Ensure throttle percentage is within valid range */
+ new_throttle_pct = MIN(new_throttle_pct, CPU_THROTTLE_PCT_MAX);
+ new_throttle_pct = MAX(new_throttle_pct, CPU_THROTTLE_PCT_MIN);
+
+ atomic_set(&throttle_percentage, new_throttle_pct);
+
+ timer_mod(throttle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT) +
+ CPU_THROTTLE_TIMESLICE_NS);
+}
+
+void cpu_throttle_stop(void)
+{
+ atomic_set(&throttle_percentage, 0);
+}
+
+bool cpu_throttle_active(void)
+{
+ return (cpu_throttle_get_percentage() != 0);
+}
+
+int cpu_throttle_get_percentage(void)
+{
+ return atomic_read(&throttle_percentage);
+}
+
void cpu_ticks_init(void)
{
- seqlock_init(&timers_state.vm_clock_seqlock, NULL);
+ seqlock_init(&timers_state.vm_clock_seqlock);
vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
+ throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+ cpu_throttle_timer_tick, NULL);
}
void configure_icount(QemuOpts *opts, Error **errp)
@@ -512,9 +640,18 @@
}
return;
}
+
+ icount_sleep = qemu_opt_get_bool(opts, "sleep", true);
+ if (icount_sleep) {
+ icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
+ icount_timer_cb, NULL);
+ }
+
icount_align_option = qemu_opt_get_bool(opts, "align", false);
- icount_warp_timer = timer_new_ns(QEMU_CLOCK_REALTIME,
- icount_warp_rt, NULL);
+
+ if (icount_align_option && !icount_sleep) {
+ error_setg(errp, "align=on and sleep=off are incompatible");
+ }
if (strcmp(option, "auto") != 0) {
errno = 0;
icount_time_shift = strtol(option, &rem_str, 0);
@@ -525,6 +662,8 @@
return;
} else if (icount_align_option) {
error_setg(errp, "shift=auto and align=on are incompatible");
+ } else if (!icount_sleep) {
+ error_setg(errp, "shift=auto and sleep=off are incompatible");
}
use_icount = 2;
@@ -538,15 +677,15 @@
the virtual time trigger catches emulated time passing too fast.
Realtime triggers occur even when idle, so use them less frequently
than VM triggers. */
- icount_rt_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
- icount_adjust_rt, NULL);
+ icount_rt_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL_RT,
+ icount_adjust_rt, NULL);
timer_mod(icount_rt_timer,
- qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 1000);
+ qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL_RT) + 1000);
icount_vm_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
icount_adjust_vm, NULL);
timer_mod(icount_vm_timer,
qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
- get_ticks_per_sec() / 10);
+ NANOSECONDS_PER_SECOND / 10);
}
/***********************************************************/
@@ -607,15 +746,6 @@
}
}
-void cpu_clean_all_dirty(void)
-{
- CPUState *cpu;
-
- CPU_FOREACH(cpu) {
- cpu_clean_state(cpu);
- }
-}
-
static int do_vm_stop(RunState state)
{
int ret = 0;
@@ -629,7 +759,7 @@
}
bdrv_drain_all();
- ret = bdrv_flush_all();
+ ret = blk_flush_all();
return ret;
}
@@ -652,14 +782,6 @@
cpu->stopped = true;
}
-static void cpu_signal(int sig)
-{
- if (current_cpu) {
- cpu_exit(current_cpu);
- }
- exit_request = 1;
-}
-
#ifdef CONFIG_LINUX
static void sigbus_reraise(void)
{
@@ -672,7 +794,7 @@
raise(SIGBUS);
sigemptyset(&set);
sigaddset(&set, SIGBUS);
- sigprocmask(SIG_UNBLOCK, &set, NULL);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
}
perror("Failed to re-raise SIGBUS!\n");
abort();
@@ -772,51 +894,18 @@
}
}
-static void qemu_tcg_init_cpu_signals(void)
-{
- sigset_t set;
- struct sigaction sigact;
-
- memset(&sigact, 0, sizeof(sigact));
- sigact.sa_handler = cpu_signal;
- sigaction(SIG_IPI, &sigact, NULL);
-
- sigemptyset(&set);
- sigaddset(&set, SIG_IPI);
- pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-}
-
#else /* _WIN32 */
static void qemu_kvm_init_cpu_signals(CPUState *cpu)
{
abort();
}
-
-static void qemu_tcg_init_cpu_signals(void)
-{
-}
#endif /* _WIN32 */
static QemuMutex qemu_global_mutex;
static QemuCond qemu_io_proceeded_cond;
-static bool iothread_requesting_mutex;
+static unsigned iothread_requesting_mutex;
-#ifdef CONFIG_ANDROID
-static __thread bool qemu_global_mutex_held;
-
-static void qemu_global_cond_wait(QemuCond* cond) {
- qemu_global_mutex_held = false;
- qemu_cond_wait(cond, &qemu_global_mutex);
- qemu_global_mutex_held = true;
-}
-#else
-static void qemu_global_cond_wait(QemuCond* cond) {
- qemu_cond_wait(cond, &qemu_global_mutex);
-}
-#endif
-
-static QemuThread *tcg_cpu_thread;
-static QemuCond *tcg_halt_cond;
+static QemuThread io_thread;
/* cpu creation */
static QemuCond qemu_cpu_cond;
@@ -832,6 +921,8 @@
qemu_cond_init(&qemu_work_cond);
qemu_cond_init(&qemu_io_proceeded_cond);
qemu_mutex_init(&qemu_global_mutex);
+
+ qemu_thread_get_self(&io_thread);
}
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
@@ -846,6 +937,8 @@
wi.func = func;
wi.data = data;
wi.free = false;
+
+ qemu_mutex_lock(&cpu->work_mutex);
if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = &wi;
} else {
@@ -854,12 +947,13 @@
cpu->queued_work_last = &wi;
wi.next = NULL;
wi.done = false;
+ qemu_mutex_unlock(&cpu->work_mutex);
qemu_cpu_kick(cpu);
- while (!wi.done) {
+ while (!atomic_mb_read(&wi.done)) {
CPUState *self_cpu = current_cpu;
- qemu_global_cond_wait(&qemu_work_cond);
+ qemu_cond_wait(&qemu_work_cond, &qemu_global_mutex);
current_cpu = self_cpu;
}
}
@@ -877,6 +971,8 @@
wi->func = func;
wi->data = data;
wi->free = true;
+
+ qemu_mutex_lock(&cpu->work_mutex);
if (cpu->queued_work_first == NULL) {
cpu->queued_work_first = wi;
} else {
@@ -885,10 +981,23 @@
cpu->queued_work_last = wi;
wi->next = NULL;
wi->done = false;
+ qemu_mutex_unlock(&cpu->work_mutex);
qemu_cpu_kick(cpu);
}
+static void qemu_kvm_destroy_vcpu(CPUState *cpu)
+{
+ if (kvm_destroy_vcpu(cpu) < 0) {
+ error_report("kvm_destroy_vcpu failed");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void qemu_tcg_destroy_vcpu(CPUState *cpu)
+{
+}
+
static void flush_queued_work(CPUState *cpu)
{
struct qemu_work_item *wi;
@@ -897,15 +1006,23 @@
return;
}
- while ((wi = cpu->queued_work_first)) {
+ qemu_mutex_lock(&cpu->work_mutex);
+ while (cpu->queued_work_first != NULL) {
+ wi = cpu->queued_work_first;
cpu->queued_work_first = wi->next;
+ if (!cpu->queued_work_first) {
+ cpu->queued_work_last = NULL;
+ }
+ qemu_mutex_unlock(&cpu->work_mutex);
wi->func(wi->data);
- wi->done = true;
+ qemu_mutex_lock(&cpu->work_mutex);
if (wi->free) {
g_free(wi);
+ } else {
+ atomic_mb_set(&wi->done, true);
}
}
- cpu->queued_work_last = NULL;
+ qemu_mutex_unlock(&cpu->work_mutex);
qemu_cond_broadcast(&qemu_work_cond);
}
@@ -914,25 +1031,20 @@
if (cpu->stop) {
cpu->stop = false;
cpu->stopped = true;
- qemu_cond_signal(&qemu_pause_cond);
+ qemu_cond_broadcast(&qemu_pause_cond);
}
flush_queued_work(cpu);
cpu->thread_kicked = false;
}
-static void qemu_tcg_wait_io_event(void)
+static void qemu_tcg_wait_io_event(CPUState *cpu)
{
- CPUState *cpu;
-
while (all_cpu_threads_idle()) {
- /* Start accounting real time to the virtual clock if the CPUs
- are idle. */
- qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
- qemu_global_cond_wait(tcg_halt_cond);
+ qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
while (iothread_requesting_mutex) {
- qemu_global_cond_wait(&qemu_io_proceeded_cond);
+ qemu_cond_wait(&qemu_io_proceeded_cond, &qemu_global_mutex);
}
CPU_FOREACH(cpu) {
@@ -944,17 +1056,16 @@
static void qemu_hax_wait_io_event(CPUState *cpu)
{
while (cpu_thread_is_idle(cpu)) {
- qemu_global_cond_wait(cpu->halt_cond);
+ qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
-
qemu_wait_io_event_common(cpu);
}
-#endif
+#endif /* CONFIG_HAX */
static void qemu_kvm_wait_io_event(CPUState *cpu)
{
while (cpu_thread_is_idle(cpu)) {
- qemu_global_cond_wait(cpu->halt_cond);
+ qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
}
qemu_kvm_eat_signals(cpu);
@@ -966,12 +1077,12 @@
CPUState *cpu = arg;
int r;
- qemu_mutex_lock(&qemu_global_mutex);
-#ifdef CONFIG_ANDROID
- qemu_global_mutex_held = true;
-#endif
+ rcu_register_thread();
+
+ qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
+ cpu->can_do_io = 1;
current_cpu = cpu;
r = kvm_init_vcpu(cpu);
@@ -986,7 +1097,7 @@
cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
- while (1) {
+ do {
if (cpu_can_run(cpu)) {
r = kvm_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
@@ -994,8 +1105,12 @@
}
}
qemu_kvm_wait_io_event(cpu);
- }
+ } while (!cpu->unplug || cpu_can_run(cpu));
+ qemu_kvm_destroy_vcpu(cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ qemu_mutex_unlock_iothread();
return NULL;
}
@@ -1010,9 +1125,12 @@
sigset_t waitset;
int r;
+ rcu_register_thread();
+
qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
cpu->thread_id = qemu_get_thread_id();
+ cpu->can_do_io = 1;
sigemptyset(&waitset);
sigaddset(&waitset, SIG_IPI);
@@ -1047,23 +1165,23 @@
static void *qemu_tcg_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
+ CPUState *remove_cpu = NULL;
- qemu_tcg_init_cpu_signals();
+ rcu_register_thread();
+
+ qemu_mutex_lock_iothread();
qemu_thread_get_self(cpu->thread);
- qemu_mutex_lock(&qemu_global_mutex);
-#ifdef CONFIG_ANDROID
- qemu_global_mutex_held = true;
-#endif
CPU_FOREACH(cpu) {
cpu->thread_id = qemu_get_thread_id();
cpu->created = true;
+ cpu->can_do_io = 1;
}
qemu_cond_signal(&qemu_cpu_cond);
/* wait for initial kick-off after machine start */
- while (QTAILQ_FIRST(&cpus)->stopped) {
- qemu_global_cond_wait(tcg_halt_cond);
+ while (first_cpu->stopped) {
+ qemu_cond_wait(first_cpu->halt_cond, &qemu_global_mutex);
/* process any pending work */
CPU_FOREACH(cpu) {
@@ -1071,6 +1189,9 @@
}
}
+ /* process any pending work */
+ atomic_mb_set(&exit_request, 1);
+
while (1) {
tcg_exec_all();
@@ -1081,31 +1202,52 @@
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
}
}
- qemu_tcg_wait_io_event();
+ qemu_tcg_wait_io_event(QTAILQ_FIRST(&cpus));
+ CPU_FOREACH(cpu) {
+ if (cpu->unplug && !cpu_can_run(cpu)) {
+ remove_cpu = cpu;
+ break;
+ }
+ }
+ if (remove_cpu) {
+ qemu_tcg_destroy_vcpu(remove_cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ remove_cpu = NULL;
+ }
}
return NULL;
}
#ifdef CONFIG_HAX
+/* The HAX-specific vCPU thread function. This one should only run when the host
+ * CPU supports the VMX "unrestricted guest" feature. */
static void *qemu_hax_cpu_thread_fn(void *arg)
{
CPUState *cpu = arg;
int r;
- qemu_thread_get_self(cpu->thread);
+
+ assert(hax_enabled() && hax_ug_platform());
+
+ rcu_register_thread();
+
qemu_mutex_lock(&qemu_global_mutex);
-#ifdef CONFIG_ANDROID
- qemu_global_mutex_held = true;
-#endif
+ qemu_thread_get_self(cpu->thread);
+
cpu->thread_id = qemu_get_thread_id();
- cpu->created = true;
- cpu->halted = 0;
+ cpu->can_do_io = 1;
+// cpu->created = true;
+// cpu->halted = 0;
current_cpu = cpu;
hax_init_vcpu(cpu);
+
+ /* signal CPU creation */
+ cpu->created = true;
qemu_cond_signal(&qemu_cpu_cond);
- while (1) {
+ do {
if (cpu_can_run(cpu)) {
r = hax_smp_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
@@ -1113,33 +1255,47 @@
}
}
qemu_hax_wait_io_event(cpu);
- }
+ } while (!cpu->unplug || cpu_can_run(cpu));
+
+ hax_vcpu_destroy(cpu);
+ cpu->created = false;
+ qemu_cond_signal(&qemu_cpu_cond);
+ qemu_mutex_unlock_iothread();
return NULL;
}
-#endif
+#endif /* CONFIG_HAX */
static void qemu_cpu_kick_thread(CPUState *cpu)
{
#ifndef _WIN32
int err;
+ if (cpu->thread_kicked) {
+ return;
+ }
+ cpu->thread_kicked = true;
err = pthread_kill(cpu->thread->thread, SIG_IPI);
if (err) {
fprintf(stderr, "qemu:%s: %s", __func__, strerror(err));
exit(1);
}
-
-#ifdef CONFIG_DARWIN
- /* The cpu thread cannot catch it reliably when shutdown the guest on Mac.
- * We can double check it and resend it
- */
- if (!exit_request)
- cpu_signal(0);
-
- if (hax_enabled() && hax_ug_platform())
- cpu->exit_request = 1;
-#endif
+#ifdef __APPLE__
+ // On OS X, the signal isn't caught reliably during shutdown.
+ if (!atomic_mb_read(&exit_request)) {
+ cpu_exit(cpu);
+ atomic_mb_set(&exit_request, 1);
+ }
+#endif /* __APPLE__ */
+#ifdef CONFIG_HAX
+ if (hax_enabled() && hax_ug_platform()) {
+ cpu_exit(cpu);
+ }
+#endif /* CONFIG_HAX */
#else /* _WIN32 */
+ if (cpu->thread_kicked) {
+ return;
+ }
+ cpu->thread_kicked = true;
if (!qemu_cpu_is_self(cpu)) {
CONTEXT tcgContext;
@@ -1150,18 +1306,14 @@
}
/* On multi-core systems, we are not sure that the thread is actually
- * suspended until we can get the context.
- */
+ * suspended until we can get the context. */
tcgContext.ContextFlags = CONTEXT_CONTROL;
while (GetThreadContext(cpu->hThread, &tcgContext) != 0) {
continue;
}
- cpu_signal(0);
-#ifdef CONFIG_HAX
- if (hax_enabled() && hax_ug_platform())
- cpu->exit_request = 1;
-#endif
+ cpu_exit(cpu);
+ atomic_mb_set(&exit_request, 1);
if (ResumeThread(cpu->hThread) == (DWORD)-1) {
fprintf(stderr, "qemu:%s: GetLastError:%lu\n", __func__,
@@ -1169,34 +1321,50 @@
exit(1);
}
}
-#endif
+#endif /* _WIN32 */
+}
+
+static void qemu_cpu_kick_no_halt(void)
+{
+ CPUState *cpu;
+ /* Ensure whatever caused the exit has reached the CPU threads before
+ * writing exit_request.
+ */
+ atomic_mb_set(&exit_request, 1);
+ cpu = atomic_mb_read(&tcg_current_cpu);
+ if (cpu) {
+ cpu_exit(cpu);
+ }
}
void qemu_cpu_kick(CPUState *cpu)
{
qemu_cond_broadcast(cpu->halt_cond);
+ /* There are three cases to consider here:
+ *
+ * - TCG is being used without HAX, then qemu_cpu_kick_no_halt() can be
+ * called directly.
+ *
+ * - TCG is being used with HAX, then kicking the thread with a signal (on Posix)
+ * or with a thread suspend/resume (on Win32) is still needed.
+ *
+ * - TCG is not being used, kick the thread with a signal or suspend/resume.
+ */
#ifdef CONFIG_HAX
- if (((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) && !cpu->thread_kicked) {
+ if (tcg_enabled() && !(hax_enabled() && hax_ug_platform())) {
#else
- if (!tcg_enabled() && !cpu->thread_kicked) {
+ if (tcg_enabled()) {
#endif
+ qemu_cpu_kick_no_halt();
+ } else {
qemu_cpu_kick_thread(cpu);
- cpu->thread_kicked = true;
}
}
void qemu_cpu_kick_self(void)
{
-#ifndef _WIN32
assert(current_cpu);
-
- if (!current_cpu->thread_kicked) {
- qemu_cpu_kick_thread(current_cpu);
- current_cpu->thread_kicked = true;
- }
-#else
- abort();
-#endif
+ qemu_cpu_kick_thread(current_cpu);
}
bool qemu_cpu_is_self(CPUState *cpu)
@@ -1204,48 +1372,86 @@
return qemu_thread_is_self(cpu->thread);
}
-static bool qemu_in_vcpu_thread(void)
+bool qemu_in_vcpu_thread(void)
{
return current_cpu && qemu_cpu_is_self(current_cpu);
}
+static __thread bool iothread_locked = false;
+
+bool qemu_mutex_iothread_locked(void)
+{
+ return iothread_locked;
+}
+
void qemu_mutex_lock_iothread(void)
{
+ /* Technical note on what's going on here, because it's really subtle :-)
+ *
+ * The single TCG vCPU thread always holds the global mutex when executing
+ * instructions, and only releases it very briefly in qemu_tcg_wait_io_event(),
+ * which gets called periodically to process interrupts.
+ *
+ * Under heavy guest CPU load, it will be hard for other threads to acquire
+ * the lock due to this. To counter that, several things are implemented here:
+ *
+ * - First, |iothread_requesting_mutex| is used as a global atomic counter that
+ * will be > 0 whenever other threads are trying to acquire the lock. It is
+ * actually read by qemu_tcg_wait_io_event() to force the vCPU thread to
+ * release the lock until its value reaches 0 again. The |qemu_io_proceeded_cond|
+ * condition variable is used to do that.
+ *
+ * - Second, if TCG is enabled, a trylock() is first tried to acquire the lock.
+ * If this fail, the TCG vCPU thread is kicked(), which forces generated code
+ * to exit to qemu_tcg_wait_io_event() as soon as possible.
+ *
+ * NOTE: It looks like the use of |iothread_requesting_mutex| isn't needed at all
+ * when KVM or HAX execution modes are being used, because the corresponding
+ * vCPU threads actually _release_ the lock just before entering guest mode
+ * (and re-acquire it just after exiting from it).
+ */
+ atomic_inc(&iothread_requesting_mutex);
+
+ /* A simple lock is sufficient in the following cases:
+ *
+ * - TCG is not enabled (KVM execution mode).
+ * [This is the !tcg_enabled() check]
+ *
+ * - TCG is enabled, but this called from the TCG vCPU thread directly.
+ * [This is the qemu_in_vcpu_thread() check]
+ *
+ * - TCG is enabled, but so is HAX in "unrestricted guest" mode, which allows it
+ * to execute all guest code directly (i.e. there is no TCG vCPU thread).
+ * [This is the (hax_enabled() && hax_ug_platform()) check].
+ *
+ * - TCG is enabled, but its thread has not started yet (e.g. when this
+ * function is called during virtual device realization).
+ * [This is (!first_cpu || !first_cpu->created)].
+ */
+ if (!tcg_enabled() || qemu_in_vcpu_thread() ||
#ifdef CONFIG_HAX
- if ((hax_enabled() && hax_ug_platform()) || !tcg_enabled()) {
-#else
- if (!tcg_enabled()) {
+ (hax_enabled() && hax_ug_platform()) ||
#endif
+ !first_cpu || !first_cpu->created) {
qemu_mutex_lock(&qemu_global_mutex);
+ atomic_dec(&iothread_requesting_mutex);
} else {
- iothread_requesting_mutex = true;
if (qemu_mutex_trylock(&qemu_global_mutex)) {
- qemu_cpu_kick_thread(first_cpu);
+ qemu_cpu_kick_no_halt();
qemu_mutex_lock(&qemu_global_mutex);
}
- iothread_requesting_mutex = false;
+ atomic_dec(&iothread_requesting_mutex);
qemu_cond_broadcast(&qemu_io_proceeded_cond);
}
-#ifdef CONFIG_ANDROID
- qemu_global_mutex_held = true;
-#endif
+ iothread_locked = true;
}
void qemu_mutex_unlock_iothread(void)
{
-#ifdef CONFIG_ANDROID
- qemu_global_mutex_held = false;
-#endif
+ iothread_locked = false;
qemu_mutex_unlock(&qemu_global_mutex);
}
-#ifdef CONFIG_ANDROID
-bool qemu_mutex_check_iothread(void)
-{
- return qemu_global_mutex_held;
-}
-#endif
-
static int all_vcpus_paused(void)
{
CPUState *cpu;
@@ -1281,7 +1487,7 @@
}
while (!all_vcpus_paused()) {
- qemu_global_cond_wait(&qemu_pause_cond);
+ qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex);
CPU_FOREACH(cpu) {
qemu_cpu_kick(cpu);
}
@@ -1305,19 +1511,40 @@
}
}
+void cpu_remove(CPUState *cpu)
+{
+ cpu->stop = true;
+ cpu->unplug = true;
+ qemu_cpu_kick(cpu);
+}
+
+void cpu_remove_sync(CPUState *cpu)
+{
+ cpu_remove(cpu);
+ while (cpu->created) {
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
+ }
+}
+
/* For temporary buffers for forming a name */
#define VCPU_THREAD_NAME_SIZE 16
static void qemu_tcg_init_vcpu(CPUState *cpu)
{
char thread_name[VCPU_THREAD_NAME_SIZE];
+ static QemuCond *tcg_halt_cond;
+ static QemuThread *tcg_cpu_thread;
#ifdef CONFIG_HAX
- if (hax_enabled())
+ if (hax_enabled()) {
+ /* This code path should only be taken when HAX is enabled but the
+ * CPU doesn't support "unrestricted guest" mode. */
+ assert(!hax_ug_platform());
+ /* Initialize HAX-related state for the TCG thread. This is required for
+ * cpu_exec() to work correctly when HAX is enabled. */
hax_init_vcpu(cpu);
-#endif
-
- tcg_cpu_address_space_init(cpu, cpu->as);
+ }
+#endif /* CONFIG_HAX */
/* share a single thread for all cpus with TCG */
if (!tcg_cpu_thread) {
@@ -1333,7 +1560,7 @@
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
- qemu_global_cond_wait(&qemu_cpu_cond);
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
tcg_cpu_thread = cpu->thread;
} else {
@@ -1347,6 +1574,13 @@
{
char thread_name[VCPU_THREAD_NAME_SIZE];
+ /* This function shall only be called when HAX is enabled, and the host CPU
+ * supports "unrestricted guest" mode. This allows emulation of "real mode"
+ * and completely avoids the use of TCG. It's only the only way to get
+ * multi-core accelerated emulation with HAX. */
+ assert(hax_enabled());
+ assert(hax_ug_platform());
+
cpu->thread = g_malloc0(sizeof(QemuThread));
cpu->halt_cond = g_malloc0(sizeof(QemuCond));
qemu_cond_init(cpu->halt_cond);
@@ -1359,10 +1593,10 @@
cpu->hThread = qemu_thread_get_handle(cpu->thread);
#endif
while (!cpu->created) {
- qemu_global_cond_wait(&qemu_cpu_cond);
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
-#endif
+#endif /* CONFIG_HAX */
static void qemu_kvm_start_vcpu(CPUState *cpu)
{
@@ -1376,7 +1610,7 @@
qemu_thread_create(cpu->thread, thread_name, qemu_kvm_cpu_thread_fn,
cpu, QEMU_THREAD_JOINABLE);
while (!cpu->created) {
- qemu_global_cond_wait(&qemu_cpu_cond);
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
@@ -1392,7 +1626,7 @@
qemu_thread_create(cpu->thread, thread_name, qemu_dummy_cpu_thread_fn, cpu,
QEMU_THREAD_JOINABLE);
while (!cpu->created) {
- qemu_global_cond_wait(&qemu_cpu_cond);
+ qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex);
}
}
@@ -1401,12 +1635,23 @@
cpu->nr_cores = smp_cores;
cpu->nr_threads = smp_threads;
cpu->stopped = true;
+
+ if (!cpu->as) {
+ /* If the target cpu hasn't set up any address spaces itself,
+ * give it the default one.
+ */
+ AddressSpace *as = address_space_init_shareable(cpu->memory,
+ "cpu-memory");
+ cpu->num_ases = 1;
+ cpu_address_space_init(cpu, as, 0);
+ }
+
if (kvm_enabled()) {
qemu_kvm_start_vcpu(cpu);
#ifdef CONFIG_HAX
} else if (hax_enabled() && hax_ug_platform()) {
qemu_hax_start_vcpu(cpu);
-#endif
+#endif
} else if (tcg_enabled()) {
qemu_tcg_init_vcpu(cpu);
} else {
@@ -1420,7 +1665,7 @@
current_cpu->stop = false;
current_cpu->stopped = true;
cpu_exit(current_cpu);
- qemu_cond_signal(&qemu_pause_cond);
+ qemu_cond_broadcast(&qemu_pause_cond);
}
}
@@ -1448,31 +1693,19 @@
return vm_stop(state);
} else {
runstate_set(state);
+
+ bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
- return bdrv_flush_all();
+ return blk_flush_all();
}
}
-static int tcg_cpu_exec(CPUArchState *env)
+static int64_t tcg_get_icount_limit(void)
{
- CPUState *cpu = ENV_GET_CPU(env);
- int ret;
-#ifdef CONFIG_PROFILER
- int64_t ti;
-#endif
+ int64_t deadline;
-#ifdef CONFIG_PROFILER
- ti = profile_getclock();
-#endif
- if (use_icount) {
- int64_t count;
- int64_t deadline;
- int decr;
- timers_state.qemu_icount -= (cpu->icount_decr.u16.low
- + cpu->icount_extra);
- cpu->icount_decr.u16.low = 0;
- cpu->icount_extra = 0;
+ if (replay_mode != REPLAY_MODE_PLAY) {
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
/* Maintain prior (possibly buggy) behaviour where if no deadline
@@ -1484,16 +1717,39 @@
deadline = INT32_MAX;
}
- count = qemu_icount_round(deadline);
+ return qemu_icount_round(deadline);
+ } else {
+ return replay_get_instructions();
+ }
+}
+
+static int tcg_cpu_exec(CPUState *cpu)
+{
+ int ret;
+#ifdef CONFIG_PROFILER
+ int64_t ti;
+#endif
+
+#ifdef CONFIG_PROFILER
+ ti = profile_getclock();
+#endif
+ if (use_icount) {
+ int64_t count;
+ int decr;
+ timers_state.qemu_icount -= (cpu->icount_decr.u16.low
+ + cpu->icount_extra);
+ cpu->icount_decr.u16.low = 0;
+ cpu->icount_extra = 0;
+ count = tcg_get_icount_limit();
timers_state.qemu_icount += count;
decr = (count > 0xffff) ? 0xffff : count;
count -= decr;
cpu->icount_decr.u16.low = decr;
cpu->icount_extra = count;
}
- ret = cpu_exec(env);
+ ret = cpu_exec(cpu);
#ifdef CONFIG_PROFILER
- qemu_time += profile_getclock() - ti;
+ tcg_time += profile_getclock() - ti;
#endif
if (use_icount) {
/* Fold pending instructions back into the
@@ -1502,6 +1758,7 @@
+ cpu->icount_extra);
cpu->icount_decr.u32 = 0;
cpu->icount_extra = 0;
+ replay_account_executed_instructions();
}
return ret;
}
@@ -1511,29 +1768,33 @@
int r;
/* Account partial waits to QEMU_CLOCK_VIRTUAL. */
- qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
+ qemu_account_warp_timer();
if (next_cpu == NULL) {
next_cpu = first_cpu;
}
for (; next_cpu != NULL && !exit_request; next_cpu = CPU_NEXT(next_cpu)) {
CPUState *cpu = next_cpu;
- CPUArchState *env = cpu->env_ptr;
qemu_clock_enable(QEMU_CLOCK_VIRTUAL,
(cpu->singlestep_enabled & SSTEP_NOTIMER) == 0);
if (cpu_can_run(cpu)) {
- r = tcg_cpu_exec(env);
+ r = tcg_cpu_exec(cpu);
if (r == EXCP_DEBUG) {
cpu_handle_guest_debug(cpu);
break;
}
} else if (cpu->stop || cpu->stopped) {
+ if (cpu->unplug) {
+ next_cpu = CPU_NEXT(cpu);
+ }
break;
}
}
- exit_request = 0;
+
+ /* Pairs with smp_wmb in qemu_cpu_kick. */
+ atomic_mb_set(&exit_request, 0);
}
void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
@@ -1575,24 +1836,26 @@
info->value->CPU = cpu->cpu_index;
info->value->current = (cpu == first_cpu);
info->value->halted = cpu->halted;
+ info->value->qom_path = object_get_canonical_path(OBJECT(cpu));
info->value->thread_id = cpu->thread_id;
#if defined(TARGET_I386)
- info->value->has_pc = true;
- info->value->pc = env->eip + env->segs[R_CS].base;
+ info->value->arch = CPU_INFO_ARCH_X86;
+ info->value->u.x86.pc = env->eip + env->segs[R_CS].base;
#elif defined(TARGET_PPC)
- info->value->has_nip = true;
- info->value->nip = env->nip;
+ info->value->arch = CPU_INFO_ARCH_PPC;
+ info->value->u.ppc.nip = env->nip;
#elif defined(TARGET_SPARC)
- info->value->has_pc = true;
- info->value->pc = env->pc;
- info->value->has_npc = true;
- info->value->npc = env->npc;
+ info->value->arch = CPU_INFO_ARCH_SPARC;
+ info->value->u.q_sparc.pc = env->pc;
+ info->value->u.q_sparc.npc = env->npc;
#elif defined(TARGET_MIPS)
- info->value->has_PC = true;
- info->value->PC = env->active_tc.PC;
+ info->value->arch = CPU_INFO_ARCH_MIPS;
+ info->value->u.q_mips.PC = env->active_tc.PC;
#elif defined(TARGET_TRICORE)
- info->value->has_PC = true;
- info->value->PC = env->PC;
+ info->value->arch = CPU_INFO_ARCH_TRICORE;
+ info->value->u.tricore.PC = env->PC;
+#else
+ info->value->arch = CPU_INFO_ARCH_OTHER;
#endif
/* XXX: waiting for the qapi to support GSList */
@@ -1614,6 +1877,7 @@
uint32_t l;
CPUState *cpu;
uint8_t buf[1024];
+ int64_t orig_addr = addr, orig_size = size;
if (!has_cpu) {
cpu_index = 0;
@@ -1621,8 +1885,8 @@
cpu = qemu_get_cpu(cpu_index);
if (cpu == NULL) {
- error_set(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
- "a CPU number");
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cpu-index",
+ "a CPU number");
return;
}
@@ -1637,11 +1901,12 @@
if (l > size)
l = size;
if (cpu_memory_rw_debug(cpu, addr, buf, l, 0) != 0) {
- error_setg(errp, "Invalid addr 0x%016" PRIx64 "specified", addr);
+ error_setg(errp, "Invalid addr 0x%016" PRIx64 "/size %" PRId64
+ " specified", orig_addr, orig_size);
goto exit;
}
if (fwrite(buf, 1, l, f) != l) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
goto exit;
}
addr += l;
@@ -1671,7 +1936,7 @@
l = size;
cpu_physical_memory_read(addr, buf, l);
if (fwrite(buf, 1, l, f) != l) {
- error_set(errp, QERR_IO_ERROR);
+ error_setg(errp, QERR_IO_ERROR);
goto exit;
}
addr += l;
@@ -1684,21 +1949,7 @@
void qmp_inject_nmi(Error **errp)
{
-#if defined(TARGET_I386)
- CPUState *cs;
-
- CPU_FOREACH(cs) {
- X86CPU *cpu = X86_CPU(cs);
-
- if (!cpu->apic_state) {
- cpu_interrupt(cs, CPU_INTERRUPT_NMI);
- } else {
- apic_deliver_nmi(cpu->apic_state);
- }
- }
-#else
nmi_monitor_handle(monitor_get_cpu_index(), errp);
-#endif
}
void dump_drift_info(FILE *f, fprintf_function cpu_fprintf)
diff --git a/cputlb.c b/cputlb.c
index a55518a..d068ee5 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -17,7 +17,7 @@
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
-#include "config.h"
+#include "qemu/osdep.h"
#include "cpu.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
@@ -28,10 +28,35 @@
#include "exec/memory-internal.h"
#include "exec/ram_addr.h"
+#include "exec/exec-all.h"
#include "tcg/tcg.h"
+#include "qemu/error-report.h"
+#include "exec/log.h"
-//#define DEBUG_TLB
-//#define DEBUG_TLB_CHECK
+/* DEBUG defines, enable DEBUG_TLB_LOG to log to the CPU_LOG_MMU target */
+/* #define DEBUG_TLB */
+/* #define DEBUG_TLB_LOG */
+
+#ifdef DEBUG_TLB
+# define DEBUG_TLB_GATE 1
+# ifdef DEBUG_TLB_LOG
+# define DEBUG_TLB_LOG_GATE 1
+# else
+# define DEBUG_TLB_LOG_GATE 0
+# endif
+#else
+# define DEBUG_TLB_GATE 0
+# define DEBUG_TLB_LOG_GATE 0
+#endif
+
+#define tlb_debug(fmt, ...) do { \
+ if (DEBUG_TLB_LOG_GATE) { \
+ qemu_log_mask(CPU_LOG_MMU, "%s: " fmt, __func__, \
+ ## __VA_ARGS__); \
+ } else if (DEBUG_TLB_GATE) { \
+ fprintf(stderr, "%s: " fmt, __func__, ## __VA_ARGS__); \
+ } \
+} while (0)
/* statistics */
int tlb_flush_count;
@@ -52,12 +77,7 @@
{
CPUArchState *env = cpu->env_ptr;
-#if defined(DEBUG_TLB)
- printf("tlb_flush:\n");
-#endif
- /* must reset current TB so that interrupts cannot modify the
- links while we are modifying them */
- cpu->current_tb = NULL;
+ tlb_debug("(%d)\n", flush_global);
memset(env->tlb_table, -1, sizeof(env->tlb_table));
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
@@ -69,6 +89,36 @@
tlb_flush_count++;
}
+static inline void v_tlb_flush_by_mmuidx(CPUState *cpu, va_list argp)
+{
+ CPUArchState *env = cpu->env_ptr;
+
+ tlb_debug("start\n");
+
+ for (;;) {
+ int mmu_idx = va_arg(argp, int);
+
+ if (mmu_idx < 0) {
+ break;
+ }
+
+ tlb_debug("%d\n", mmu_idx);
+
+ memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[0]));
+ memset(env->tlb_v_table[mmu_idx], -1, sizeof(env->tlb_v_table[0]));
+ }
+
+ memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+}
+
+void tlb_flush_by_mmuidx(CPUState *cpu, ...)
+{
+ va_list argp;
+ va_start(argp, cpu);
+ v_tlb_flush_by_mmuidx(cpu, argp);
+ va_end(argp);
+}
+
static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
{
if (addr == (tlb_entry->addr_read &
@@ -87,22 +137,17 @@
int i;
int mmu_idx;
-#if defined(DEBUG_TLB)
- printf("tlb_flush_page: " TARGET_FMT_lx "\n", addr);
-#endif
+ tlb_debug("page :" TARGET_FMT_lx "\n", addr);
+
/* Check if we need to flush due to large pages. */
if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
-#if defined(DEBUG_TLB)
- printf("tlb_flush_page: forced full flush ("
- TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
- env->tlb_flush_addr, env->tlb_flush_mask);
-#endif
+ tlb_debug("forcing full flush ("
+ TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+ env->tlb_flush_addr, env->tlb_flush_mask);
+
tlb_flush(cpu, 1);
return;
}
- /* must reset current TB so that interrupts cannot modify the
- links while we are modifying them */
- cpu->current_tb = NULL;
addr &= TARGET_PAGE_MASK;
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
@@ -121,18 +166,62 @@
tb_flush_jmp_cache(cpu, addr);
}
+void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
+{
+ CPUArchState *env = cpu->env_ptr;
+ int i, k;
+ va_list argp;
+
+ va_start(argp, addr);
+
+ tlb_debug("addr "TARGET_FMT_lx"\n", addr);
+
+ /* Check if we need to flush due to large pages. */
+ if ((addr & env->tlb_flush_mask) == env->tlb_flush_addr) {
+ tlb_debug("forced full flush ("
+ TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
+ env->tlb_flush_addr, env->tlb_flush_mask);
+
+ v_tlb_flush_by_mmuidx(cpu, argp);
+ va_end(argp);
+ return;
+ }
+
+ addr &= TARGET_PAGE_MASK;
+ i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+
+ for (;;) {
+ int mmu_idx = va_arg(argp, int);
+
+ if (mmu_idx < 0) {
+ break;
+ }
+
+ tlb_debug("idx %d\n", mmu_idx);
+
+ tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+
+ /* check whether there are vltb entries that need to be flushed */
+ for (k = 0; k < CPU_VTLB_SIZE; k++) {
+ tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
+ }
+ }
+ va_end(argp);
+
+ tb_flush_jmp_cache(cpu, addr);
+}
+
/* update the TLBs so that writes to code in the virtual page 'addr'
can be detected */
void tlb_protect_code(ram_addr_t ram_addr)
{
- cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE,
- DIRTY_MEMORY_CODE);
+ cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE,
+ DIRTY_MEMORY_CODE);
}
/* update the TLB so that writes in physical page 'phys_addr' are no longer
tested for self modifying code */
-void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr,
- target_ulong vaddr)
+void tlb_unprotect_code(ram_addr_t ram_addr)
{
cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE);
}
@@ -159,34 +248,32 @@
{
ram_addr_t ram_addr;
- if (qemu_ram_addr_from_host(ptr, &ram_addr) == NULL) {
+ ram_addr = qemu_ram_addr_from_host(ptr);
+ if (ram_addr == RAM_ADDR_INVALID) {
fprintf(stderr, "Bad ram pointer %p\n", ptr);
abort();
}
return ram_addr;
}
-void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length)
+void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
{
- CPUState *cpu;
CPUArchState *env;
- CPU_FOREACH(cpu) {
- int mmu_idx;
+ int mmu_idx;
- env = cpu->env_ptr;
- for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
- unsigned int i;
+ env = cpu->env_ptr;
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ unsigned int i;
- for (i = 0; i < CPU_TLB_SIZE; i++) {
- tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
- start1, length);
- }
+ for (i = 0; i < CPU_TLB_SIZE; i++) {
+ tlb_reset_dirty_range(&env->tlb_table[mmu_idx][i],
+ start1, length);
+ }
- for (i = 0; i < CPU_VTLB_SIZE; i++) {
- tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
- start1, length);
- }
+ for (i = 0; i < CPU_VTLB_SIZE; i++) {
+ tlb_reset_dirty_range(&env->tlb_v_table[mmu_idx][i],
+ start1, length);
}
}
}
@@ -200,8 +287,9 @@
/* update the TLB corresponding to virtual page vaddr
so that it is no longer dirty */
-void tlb_set_dirty(CPUArchState *env, target_ulong vaddr)
+void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
{
+ CPUArchState *env = cpu->env_ptr;
int i;
int mmu_idx;
@@ -243,11 +331,15 @@
}
/* Add a new TLB entry. At most one entry for a given virtual address
- is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
- supplied size is only used by tlb_flush_page. */
-void tlb_set_page(CPUState *cpu, target_ulong vaddr,
- hwaddr paddr, int prot,
- int mmu_idx, target_ulong size)
+ * is permitted. Only a single TARGET_PAGE_SIZE region is mapped, the
+ * supplied size is only used by tlb_flush_page.
+ *
+ * Called from TCG-generated code, which is under an RCU read-side
+ * critical section.
+ */
+void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
+ hwaddr paddr, MemTxAttrs attrs, int prot,
+ int mmu_idx, target_ulong size)
{
CPUArchState *env = cpu->env_ptr;
MemoryRegionSection *section;
@@ -258,6 +350,7 @@
CPUTLBEntry *te;
hwaddr iotlb, xlat, sz;
unsigned vidx = env->vtlb_index++ % CPU_VTLB_SIZE;
+ int asidx = cpu_asidx_from_attrs(cpu, attrs);
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
@@ -265,15 +358,12 @@
}
sz = size;
- section = address_space_translate_for_iotlb(cpu->as, paddr,
- &xlat, &sz);
+ section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);
assert(sz >= TARGET_PAGE_SIZE);
-#if defined(DEBUG_TLB)
- printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
- " prot=%x idx=%d\n",
- vaddr, paddr, prot, mmu_idx);
-#endif
+ tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
+ " prot=%x idx=%d\n",
+ vaddr, paddr, prot, mmu_idx);
address = vaddr;
if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) {
@@ -297,7 +387,8 @@
env->iotlb_v[mmu_idx][vidx] = env->iotlb[mmu_idx][index];
/* refill the tlb */
- env->iotlb[mmu_idx][index] = iotlb - vaddr;
+ env->iotlb[mmu_idx][index].addr = iotlb - vaddr;
+ env->iotlb[mmu_idx][index].attrs = attrs;
te->addend = addend - vaddr;
if (prot & PAGE_READ) {
te->addr_read = address;
@@ -316,8 +407,8 @@
/* Write access calls the I/O callback. */
te->addr_write = address | TLB_MMIO;
} else if (memory_region_is_ram(section->mr)
- && cpu_physical_memory_is_clean(section->mr->ram_addr
- + xlat)) {
+ && cpu_physical_memory_is_clean(
+ memory_region_get_ram_addr(section->mr) + xlat)) {
te->addr_write = address | TLB_NOTDIRTY;
} else {
te->addr_write = address;
@@ -327,6 +418,50 @@
}
}
+/* Add a new TLB entry, but without specifying the memory
+ * transaction attributes to be used.
+ */
+void tlb_set_page(CPUState *cpu, target_ulong vaddr,
+ hwaddr paddr, int prot,
+ int mmu_idx, target_ulong size)
+{
+ tlb_set_page_with_attrs(cpu, vaddr, paddr, MEMTXATTRS_UNSPECIFIED,
+ prot, mmu_idx, size);
+}
+
+static void report_bad_exec(CPUState *cpu, target_ulong addr)
+{
+ /* Accidentally executing outside RAM or ROM is quite common for
+ * several user-error situations, so report it in a way that
+ * makes it clear that this isn't a QEMU bug and provide suggestions
+ * about what a user could do to fix things.
+ */
+ error_report("Trying to execute code outside RAM or ROM at 0x"
+ TARGET_FMT_lx, addr);
+ error_printf("This usually means one of the following happened:\n\n"
+ "(1) You told QEMU to execute a kernel for the wrong machine "
+ "type, and it crashed on startup (eg trying to run a "
+ "raspberry pi kernel on a versatilepb QEMU machine)\n"
+ "(2) You didn't give QEMU a kernel or BIOS filename at all, "
+ "and QEMU executed a ROM full of no-op instructions until "
+ "it fell off the end\n"
+ "(3) Your guest kernel has a bug and crashed by jumping "
+ "off into nowhere\n\n"
+ "This is almost always one of the first two, so check your "
+ "command line and that you are using the right type of kernel "
+ "for this machine.\n"
+ "If you think option (3) is likely then you can try debugging "
+ "your guest with the -d debug options; in particular "
+ "-d guest_errors will cause the log to include a dump of the "
+ "guest register state at this point.\n\n"
+ "Execution cannot continue; stopping here.\n\n");
+
+ /* Report also to the logs, with more detail including register dump */
+ qemu_log_mask(LOG_GUEST_ERROR, "qemu: fatal: Trying to execute code "
+ "outside RAM or ROM at 0x" TARGET_FMT_lx "\n", addr);
+ log_cpu_state_mask(LOG_GUEST_ERROR, cpu, CPU_DUMP_FPU | CPU_DUMP_CCOP);
+}
+
/* NOTE: this function can trigger an exception */
/* NOTE2: the returned address is not exactly the physical address: it
* is actually a ram_addr_t (in system mode; the user mode emulation
@@ -338,29 +473,60 @@
void *p;
MemoryRegion *mr;
CPUState *cpu = ENV_GET_CPU(env1);
+ CPUIOTLBEntry *iotlbentry;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- mmu_idx = cpu_mmu_index(env1);
+ mmu_idx = cpu_mmu_index(env1, true);
if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
(addr & TARGET_PAGE_MASK))) {
cpu_ldub_code(env1, addr);
}
- pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
- mr = iotlb_to_region(cpu->as, pd);
+ iotlbentry = &env1->iotlb[mmu_idx][page_index];
+ pd = iotlbentry->addr & ~TARGET_PAGE_MASK;
+ mr = iotlb_to_region(cpu, pd, iotlbentry->attrs);
if (memory_region_is_unassigned(mr)) {
CPUClass *cc = CPU_GET_CLASS(cpu);
if (cc->do_unassigned_access) {
cc->do_unassigned_access(cpu, addr, false, true, 0, 4);
} else {
- cpu_abort(cpu, "Trying to execute code outside RAM or ROM at 0x"
- TARGET_FMT_lx "\n", addr);
+ report_bad_exec(cpu, addr);
+ exit(1);
}
}
p = (void *)((uintptr_t)addr + env1->tlb_table[mmu_idx][page_index].addend);
return qemu_ram_addr_from_host_nofail(p);
}
+/* Return true if ADDR is present in the victim tlb, and has been copied
+ back to the main tlb. */
+static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
+ size_t elt_ofs, target_ulong page)
+{
+ size_t vidx;
+ for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
+ CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
+ target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
+
+ if (cmp == page) {
+ /* Found entry in victim tlb, swap tlb and iotlb. */
+ CPUTLBEntry tmptlb, *tlb = &env->tlb_table[mmu_idx][index];
+ CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
+ CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
+
+ tmptlb = *tlb; *tlb = *vtlb; *vtlb = tmptlb;
+ tmpio = *io; *io = *vio; *vio = tmpio;
+ return true;
+ }
+ }
+ return false;
+}
+
+/* Macro to call the above, with local variables from the use context. */
+#define VICTIM_TLB_HIT(TY, ADDR) \
+ victim_tlb_hit(env, mmu_idx, index, offsetof(CPUTLBEntry, TY), \
+ (ADDR) & TARGET_PAGE_MASK)
+
#define MMUSUFFIX _mmu
#define SHIFT 0
diff --git a/crypto/Makefile.objs b/crypto/Makefile.objs
new file mode 100644
index 0000000..a36d2d9
--- /dev/null
+++ b/crypto/Makefile.objs
@@ -0,0 +1,33 @@
+crypto-obj-y = init.o
+crypto-obj-y += hash.o
+crypto-obj-$(CONFIG_NETTLE) += hash-nettle.o
+crypto-obj-$(if $(CONFIG_NETTLE),n,$(CONFIG_GCRYPT)) += hash-gcrypt.o
+crypto-obj-$(if $(CONFIG_NETTLE),n,$(if $(CONFIG_GCRYPT),n,y)) += hash-glib.o
+crypto-obj-y += aes.o
+crypto-obj-y += desrfb.o
+crypto-obj-y += cipher.o
+crypto-obj-y += tlscreds.o
+crypto-obj-y += tlscredsanon.o
+crypto-obj-y += tlscredsx509.o
+crypto-obj-y += tlssession.o
+crypto-obj-y += secret.o
+crypto-obj-$(CONFIG_GCRYPT) += random-gcrypt.o
+crypto-obj-$(if $(CONFIG_GCRYPT),n,$(CONFIG_GNUTLS_RND)) += random-gnutls.o
+crypto-obj-$(if $(CONFIG_GCRYPT),n,$(if $(CONFIG_GNUTLS_RND),n,y)) += random-platform.o
+crypto-obj-y += pbkdf.o
+crypto-obj-$(CONFIG_NETTLE_KDF) += pbkdf-nettle.o
+crypto-obj-$(if $(CONFIG_NETTLE_KDF),n,$(CONFIG_GCRYPT_KDF)) += pbkdf-gcrypt.o
+crypto-obj-y += ivgen.o
+crypto-obj-y += ivgen-essiv.o
+crypto-obj-y += ivgen-plain.o
+crypto-obj-y += ivgen-plain64.o
+crypto-obj-y += afsplit.o
+crypto-obj-y += xts.o
+crypto-obj-y += block.o
+crypto-obj-y += block-qcow.o
+crypto-obj-y += block-luks.o
+
+# Let the userspace emulators avoid linking gnutls/etc
+crypto-aes-obj-y = aes.o
+
+stub-obj-y += pbkdf-stub.o
diff --git a/crypto/aes.c b/crypto/aes.c
new file mode 100644
index 0000000..3456eac
--- /dev/null
+++ b/crypto/aes.c
@@ -0,0 +1,1653 @@
+/**
+ *
+ * aes.c - integrated in QEMU by Fabrice Bellard from the OpenSSL project.
+ */
+/*
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+#include "crypto/aes.h"
+
+typedef uint32_t u32;
+typedef uint8_t u8;
+
+/* This controls loop-unrolling in aes_core.c */
+#undef FULL_UNROLL
+# define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+const uint8_t AES_sbox[256] = {
+ 0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+ 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+ 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+ 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+ 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+ 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+ 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+ 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+ 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+ 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+ 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+ 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+ 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+ 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+ 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+ 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+ 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+ 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+ 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+ 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+ 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+ 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+ 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+ 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+ 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+ 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+ 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+ 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+ 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+ 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+ 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+ 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
+};
+
+const uint8_t AES_isbox[256] = {
+ 0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
+ 0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+ 0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+ 0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+ 0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
+ 0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+ 0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
+ 0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+ 0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+ 0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+ 0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+ 0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+ 0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
+ 0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+ 0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+ 0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+ 0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
+ 0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+ 0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
+ 0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+ 0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+ 0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+ 0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
+ 0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+ 0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
+ 0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+ 0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+ 0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+ 0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
+ 0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+ 0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
+ 0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
+};
+
+const uint8_t AES_shifts[16] = {
+ 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11
+};
+
+const uint8_t AES_ishifts[16] = {
+ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3
+};
+
+/* AES_imc[x][0] = [x].[0e, 09, 0d, 0b]; */
+/* AES_imc[x][1] = [x].[0b, 0e, 09, 0d]; */
+/* AES_imc[x][2] = [x].[0d, 0b, 0e, 09]; */
+/* AES_imc[x][3] = [x].[09, 0d, 0b, 0e]; */
+const uint32_t AES_imc[256][4] = {
+ { 0x00000000, 0x00000000, 0x00000000, 0x00000000, }, /* x=00 */
+ { 0x0E090D0B, 0x0B0E090D, 0x0D0B0E09, 0x090D0B0E, }, /* x=01 */
+ { 0x1C121A16, 0x161C121A, 0x1A161C12, 0x121A161C, }, /* x=02 */
+ { 0x121B171D, 0x1D121B17, 0x171D121B, 0x1B171D12, }, /* x=03 */
+ { 0x3824342C, 0x2C382434, 0x342C3824, 0x24342C38, }, /* x=04 */
+ { 0x362D3927, 0x27362D39, 0x3927362D, 0x2D392736, }, /* x=05 */
+ { 0x24362E3A, 0x3A24362E, 0x2E3A2436, 0x362E3A24, }, /* x=06 */
+ { 0x2A3F2331, 0x312A3F23, 0x23312A3F, 0x3F23312A, }, /* x=07 */
+ { 0x70486858, 0x58704868, 0x68587048, 0x48685870, }, /* x=08 */
+ { 0x7E416553, 0x537E4165, 0x65537E41, 0x4165537E, }, /* x=09 */
+ { 0x6C5A724E, 0x4E6C5A72, 0x724E6C5A, 0x5A724E6C, }, /* x=0A */
+ { 0x62537F45, 0x4562537F, 0x7F456253, 0x537F4562, }, /* x=0B */
+ { 0x486C5C74, 0x74486C5C, 0x5C74486C, 0x6C5C7448, }, /* x=0C */
+ { 0x4665517F, 0x7F466551, 0x517F4665, 0x65517F46, }, /* x=0D */
+ { 0x547E4662, 0x62547E46, 0x4662547E, 0x7E466254, }, /* x=0E */
+ { 0x5A774B69, 0x695A774B, 0x4B695A77, 0x774B695A, }, /* x=0F */
+ { 0xE090D0B0, 0xB0E090D0, 0xD0B0E090, 0x90D0B0E0, }, /* x=10 */
+ { 0xEE99DDBB, 0xBBEE99DD, 0xDDBBEE99, 0x99DDBBEE, }, /* x=11 */
+ { 0xFC82CAA6, 0xA6FC82CA, 0xCAA6FC82, 0x82CAA6FC, }, /* x=12 */
+ { 0xF28BC7AD, 0xADF28BC7, 0xC7ADF28B, 0x8BC7ADF2, }, /* x=13 */
+ { 0xD8B4E49C, 0x9CD8B4E4, 0xE49CD8B4, 0xB4E49CD8, }, /* x=14 */
+ { 0xD6BDE997, 0x97D6BDE9, 0xE997D6BD, 0xBDE997D6, }, /* x=15 */
+ { 0xC4A6FE8A, 0x8AC4A6FE, 0xFE8AC4A6, 0xA6FE8AC4, }, /* x=16 */
+ { 0xCAAFF381, 0x81CAAFF3, 0xF381CAAF, 0xAFF381CA, }, /* x=17 */
+ { 0x90D8B8E8, 0xE890D8B8, 0xB8E890D8, 0xD8B8E890, }, /* x=18 */
+ { 0x9ED1B5E3, 0xE39ED1B5, 0xB5E39ED1, 0xD1B5E39E, }, /* x=19 */
+ { 0x8CCAA2FE, 0xFE8CCAA2, 0xA2FE8CCA, 0xCAA2FE8C, }, /* x=1A */
+ { 0x82C3AFF5, 0xF582C3AF, 0xAFF582C3, 0xC3AFF582, }, /* x=1B */
+ { 0xA8FC8CC4, 0xC4A8FC8C, 0x8CC4A8FC, 0xFC8CC4A8, }, /* x=1C */
+ { 0xA6F581CF, 0xCFA6F581, 0x81CFA6F5, 0xF581CFA6, }, /* x=1D */
+ { 0xB4EE96D2, 0xD2B4EE96, 0x96D2B4EE, 0xEE96D2B4, }, /* x=1E */
+ { 0xBAE79BD9, 0xD9BAE79B, 0x9BD9BAE7, 0xE79BD9BA, }, /* x=1F */
+ { 0xDB3BBB7B, 0x7BDB3BBB, 0xBB7BDB3B, 0x3BBB7BDB, }, /* x=20 */
+ { 0xD532B670, 0x70D532B6, 0xB670D532, 0x32B670D5, }, /* x=21 */
+ { 0xC729A16D, 0x6DC729A1, 0xA16DC729, 0x29A16DC7, }, /* x=22 */
+ { 0xC920AC66, 0x66C920AC, 0xAC66C920, 0x20AC66C9, }, /* x=23 */
+ { 0xE31F8F57, 0x57E31F8F, 0x8F57E31F, 0x1F8F57E3, }, /* x=24 */
+ { 0xED16825C, 0x5CED1682, 0x825CED16, 0x16825CED, }, /* x=25 */
+ { 0xFF0D9541, 0x41FF0D95, 0x9541FF0D, 0x0D9541FF, }, /* x=26 */
+ { 0xF104984A, 0x4AF10498, 0x984AF104, 0x04984AF1, }, /* x=27 */
+ { 0xAB73D323, 0x23AB73D3, 0xD323AB73, 0x73D323AB, }, /* x=28 */
+ { 0xA57ADE28, 0x28A57ADE, 0xDE28A57A, 0x7ADE28A5, }, /* x=29 */
+ { 0xB761C935, 0x35B761C9, 0xC935B761, 0x61C935B7, }, /* x=2A */
+ { 0xB968C43E, 0x3EB968C4, 0xC43EB968, 0x68C43EB9, }, /* x=2B */
+ { 0x9357E70F, 0x0F9357E7, 0xE70F9357, 0x57E70F93, }, /* x=2C */
+ { 0x9D5EEA04, 0x049D5EEA, 0xEA049D5E, 0x5EEA049D, }, /* x=2D */
+ { 0x8F45FD19, 0x198F45FD, 0xFD198F45, 0x45FD198F, }, /* x=2E */
+ { 0x814CF012, 0x12814CF0, 0xF012814C, 0x4CF01281, }, /* x=2F */
+ { 0x3BAB6BCB, 0xCB3BAB6B, 0x6BCB3BAB, 0xAB6BCB3B, }, /* x=30 */
+ { 0x35A266C0, 0xC035A266, 0x66C035A2, 0xA266C035, }, /* x=31 */
+ { 0x27B971DD, 0xDD27B971, 0x71DD27B9, 0xB971DD27, }, /* x=32 */
+ { 0x29B07CD6, 0xD629B07C, 0x7CD629B0, 0xB07CD629, }, /* x=33 */
+ { 0x038F5FE7, 0xE7038F5F, 0x5FE7038F, 0x8F5FE703, }, /* x=34 */
+ { 0x0D8652EC, 0xEC0D8652, 0x52EC0D86, 0x8652EC0D, }, /* x=35 */
+ { 0x1F9D45F1, 0xF11F9D45, 0x45F11F9D, 0x9D45F11F, }, /* x=36 */
+ { 0x119448FA, 0xFA119448, 0x48FA1194, 0x9448FA11, }, /* x=37 */
+ { 0x4BE30393, 0x934BE303, 0x03934BE3, 0xE303934B, }, /* x=38 */
+ { 0x45EA0E98, 0x9845EA0E, 0x0E9845EA, 0xEA0E9845, }, /* x=39 */
+ { 0x57F11985, 0x8557F119, 0x198557F1, 0xF1198557, }, /* x=3A */
+ { 0x59F8148E, 0x8E59F814, 0x148E59F8, 0xF8148E59, }, /* x=3B */
+ { 0x73C737BF, 0xBF73C737, 0x37BF73C7, 0xC737BF73, }, /* x=3C */
+ { 0x7DCE3AB4, 0xB47DCE3A, 0x3AB47DCE, 0xCE3AB47D, }, /* x=3D */
+ { 0x6FD52DA9, 0xA96FD52D, 0x2DA96FD5, 0xD52DA96F, }, /* x=3E */
+ { 0x61DC20A2, 0xA261DC20, 0x20A261DC, 0xDC20A261, }, /* x=3F */
+ { 0xAD766DF6, 0xF6AD766D, 0x6DF6AD76, 0x766DF6AD, }, /* x=40 */
+ { 0xA37F60FD, 0xFDA37F60, 0x60FDA37F, 0x7F60FDA3, }, /* x=41 */
+ { 0xB16477E0, 0xE0B16477, 0x77E0B164, 0x6477E0B1, }, /* x=42 */
+ { 0xBF6D7AEB, 0xEBBF6D7A, 0x7AEBBF6D, 0x6D7AEBBF, }, /* x=43 */
+ { 0x955259DA, 0xDA955259, 0x59DA9552, 0x5259DA95, }, /* x=44 */
+ { 0x9B5B54D1, 0xD19B5B54, 0x54D19B5B, 0x5B54D19B, }, /* x=45 */
+ { 0x894043CC, 0xCC894043, 0x43CC8940, 0x4043CC89, }, /* x=46 */
+ { 0x87494EC7, 0xC787494E, 0x4EC78749, 0x494EC787, }, /* x=47 */
+ { 0xDD3E05AE, 0xAEDD3E05, 0x05AEDD3E, 0x3E05AEDD, }, /* x=48 */
+ { 0xD33708A5, 0xA5D33708, 0x08A5D337, 0x3708A5D3, }, /* x=49 */
+ { 0xC12C1FB8, 0xB8C12C1F, 0x1FB8C12C, 0x2C1FB8C1, }, /* x=4A */
+ { 0xCF2512B3, 0xB3CF2512, 0x12B3CF25, 0x2512B3CF, }, /* x=4B */
+ { 0xE51A3182, 0x82E51A31, 0x3182E51A, 0x1A3182E5, }, /* x=4C */
+ { 0xEB133C89, 0x89EB133C, 0x3C89EB13, 0x133C89EB, }, /* x=4D */
+ { 0xF9082B94, 0x94F9082B, 0x2B94F908, 0x082B94F9, }, /* x=4E */
+ { 0xF701269F, 0x9FF70126, 0x269FF701, 0x01269FF7, }, /* x=4F */
+ { 0x4DE6BD46, 0x464DE6BD, 0xBD464DE6, 0xE6BD464D, }, /* x=50 */
+ { 0x43EFB04D, 0x4D43EFB0, 0xB04D43EF, 0xEFB04D43, }, /* x=51 */
+ { 0x51F4A750, 0x5051F4A7, 0xA75051F4, 0xF4A75051, }, /* x=52 */
+ { 0x5FFDAA5B, 0x5B5FFDAA, 0xAA5B5FFD, 0xFDAA5B5F, }, /* x=53 */
+ { 0x75C2896A, 0x6A75C289, 0x896A75C2, 0xC2896A75, }, /* x=54 */
+ { 0x7BCB8461, 0x617BCB84, 0x84617BCB, 0xCB84617B, }, /* x=55 */
+ { 0x69D0937C, 0x7C69D093, 0x937C69D0, 0xD0937C69, }, /* x=56 */
+ { 0x67D99E77, 0x7767D99E, 0x9E7767D9, 0xD99E7767, }, /* x=57 */
+ { 0x3DAED51E, 0x1E3DAED5, 0xD51E3DAE, 0xAED51E3D, }, /* x=58 */
+ { 0x33A7D815, 0x1533A7D8, 0xD81533A7, 0xA7D81533, }, /* x=59 */
+ { 0x21BCCF08, 0x0821BCCF, 0xCF0821BC, 0xBCCF0821, }, /* x=5A */
+ { 0x2FB5C203, 0x032FB5C2, 0xC2032FB5, 0xB5C2032F, }, /* x=5B */
+ { 0x058AE132, 0x32058AE1, 0xE132058A, 0x8AE13205, }, /* x=5C */
+ { 0x0B83EC39, 0x390B83EC, 0xEC390B83, 0x83EC390B, }, /* x=5D */
+ { 0x1998FB24, 0x241998FB, 0xFB241998, 0x98FB2419, }, /* x=5E */
+ { 0x1791F62F, 0x2F1791F6, 0xF62F1791, 0x91F62F17, }, /* x=5F */
+ { 0x764DD68D, 0x8D764DD6, 0xD68D764D, 0x4DD68D76, }, /* x=60 */
+ { 0x7844DB86, 0x867844DB, 0xDB867844, 0x44DB8678, }, /* x=61 */
+ { 0x6A5FCC9B, 0x9B6A5FCC, 0xCC9B6A5F, 0x5FCC9B6A, }, /* x=62 */
+ { 0x6456C190, 0x906456C1, 0xC1906456, 0x56C19064, }, /* x=63 */
+ { 0x4E69E2A1, 0xA14E69E2, 0xE2A14E69, 0x69E2A14E, }, /* x=64 */
+ { 0x4060EFAA, 0xAA4060EF, 0xEFAA4060, 0x60EFAA40, }, /* x=65 */
+ { 0x527BF8B7, 0xB7527BF8, 0xF8B7527B, 0x7BF8B752, }, /* x=66 */
+ { 0x5C72F5BC, 0xBC5C72F5, 0xF5BC5C72, 0x72F5BC5C, }, /* x=67 */
+ { 0x0605BED5, 0xD50605BE, 0xBED50605, 0x05BED506, }, /* x=68 */
+ { 0x080CB3DE, 0xDE080CB3, 0xB3DE080C, 0x0CB3DE08, }, /* x=69 */
+ { 0x1A17A4C3, 0xC31A17A4, 0xA4C31A17, 0x17A4C31A, }, /* x=6A */
+ { 0x141EA9C8, 0xC8141EA9, 0xA9C8141E, 0x1EA9C814, }, /* x=6B */
+ { 0x3E218AF9, 0xF93E218A, 0x8AF93E21, 0x218AF93E, }, /* x=6C */
+ { 0x302887F2, 0xF2302887, 0x87F23028, 0x2887F230, }, /* x=6D */
+ { 0x223390EF, 0xEF223390, 0x90EF2233, 0x3390EF22, }, /* x=6E */
+ { 0x2C3A9DE4, 0xE42C3A9D, 0x9DE42C3A, 0x3A9DE42C, }, /* x=6F */
+ { 0x96DD063D, 0x3D96DD06, 0x063D96DD, 0xDD063D96, }, /* x=70 */
+ { 0x98D40B36, 0x3698D40B, 0x0B3698D4, 0xD40B3698, }, /* x=71 */
+ { 0x8ACF1C2B, 0x2B8ACF1C, 0x1C2B8ACF, 0xCF1C2B8A, }, /* x=72 */
+ { 0x84C61120, 0x2084C611, 0x112084C6, 0xC6112084, }, /* x=73 */
+ { 0xAEF93211, 0x11AEF932, 0x3211AEF9, 0xF93211AE, }, /* x=74 */
+ { 0xA0F03F1A, 0x1AA0F03F, 0x3F1AA0F0, 0xF03F1AA0, }, /* x=75 */
+ { 0xB2EB2807, 0x07B2EB28, 0x2807B2EB, 0xEB2807B2, }, /* x=76 */
+ { 0xBCE2250C, 0x0CBCE225, 0x250CBCE2, 0xE2250CBC, }, /* x=77 */
+ { 0xE6956E65, 0x65E6956E, 0x6E65E695, 0x956E65E6, }, /* x=78 */
+ { 0xE89C636E, 0x6EE89C63, 0x636EE89C, 0x9C636EE8, }, /* x=79 */
+ { 0xFA877473, 0x73FA8774, 0x7473FA87, 0x877473FA, }, /* x=7A */
+ { 0xF48E7978, 0x78F48E79, 0x7978F48E, 0x8E7978F4, }, /* x=7B */
+ { 0xDEB15A49, 0x49DEB15A, 0x5A49DEB1, 0xB15A49DE, }, /* x=7C */
+ { 0xD0B85742, 0x42D0B857, 0x5742D0B8, 0xB85742D0, }, /* x=7D */
+ { 0xC2A3405F, 0x5FC2A340, 0x405FC2A3, 0xA3405FC2, }, /* x=7E */
+ { 0xCCAA4D54, 0x54CCAA4D, 0x4D54CCAA, 0xAA4D54CC, }, /* x=7F */
+ { 0x41ECDAF7, 0xF741ECDA, 0xDAF741EC, 0xECDAF741, }, /* x=80 */
+ { 0x4FE5D7FC, 0xFC4FE5D7, 0xD7FC4FE5, 0xE5D7FC4F, }, /* x=81 */
+ { 0x5DFEC0E1, 0xE15DFEC0, 0xC0E15DFE, 0xFEC0E15D, }, /* x=82 */
+ { 0x53F7CDEA, 0xEA53F7CD, 0xCDEA53F7, 0xF7CDEA53, }, /* x=83 */
+ { 0x79C8EEDB, 0xDB79C8EE, 0xEEDB79C8, 0xC8EEDB79, }, /* x=84 */
+ { 0x77C1E3D0, 0xD077C1E3, 0xE3D077C1, 0xC1E3D077, }, /* x=85 */
+ { 0x65DAF4CD, 0xCD65DAF4, 0xF4CD65DA, 0xDAF4CD65, }, /* x=86 */
+ { 0x6BD3F9C6, 0xC66BD3F9, 0xF9C66BD3, 0xD3F9C66B, }, /* x=87 */
+ { 0x31A4B2AF, 0xAF31A4B2, 0xB2AF31A4, 0xA4B2AF31, }, /* x=88 */
+ { 0x3FADBFA4, 0xA43FADBF, 0xBFA43FAD, 0xADBFA43F, }, /* x=89 */
+ { 0x2DB6A8B9, 0xB92DB6A8, 0xA8B92DB6, 0xB6A8B92D, }, /* x=8A */
+ { 0x23BFA5B2, 0xB223BFA5, 0xA5B223BF, 0xBFA5B223, }, /* x=8B */
+ { 0x09808683, 0x83098086, 0x86830980, 0x80868309, }, /* x=8C */
+ { 0x07898B88, 0x8807898B, 0x8B880789, 0x898B8807, }, /* x=8D */
+ { 0x15929C95, 0x9515929C, 0x9C951592, 0x929C9515, }, /* x=8E */
+ { 0x1B9B919E, 0x9E1B9B91, 0x919E1B9B, 0x9B919E1B, }, /* x=8F */
+ { 0xA17C0A47, 0x47A17C0A, 0x0A47A17C, 0x7C0A47A1, }, /* x=90 */
+ { 0xAF75074C, 0x4CAF7507, 0x074CAF75, 0x75074CAF, }, /* x=91 */
+ { 0xBD6E1051, 0x51BD6E10, 0x1051BD6E, 0x6E1051BD, }, /* x=92 */
+ { 0xB3671D5A, 0x5AB3671D, 0x1D5AB367, 0x671D5AB3, }, /* x=93 */
+ { 0x99583E6B, 0x6B99583E, 0x3E6B9958, 0x583E6B99, }, /* x=94 */
+ { 0x97513360, 0x60975133, 0x33609751, 0x51336097, }, /* x=95 */
+ { 0x854A247D, 0x7D854A24, 0x247D854A, 0x4A247D85, }, /* x=96 */
+ { 0x8B432976, 0x768B4329, 0x29768B43, 0x4329768B, }, /* x=97 */
+ { 0xD134621F, 0x1FD13462, 0x621FD134, 0x34621FD1, }, /* x=98 */
+ { 0xDF3D6F14, 0x14DF3D6F, 0x6F14DF3D, 0x3D6F14DF, }, /* x=99 */
+ { 0xCD267809, 0x09CD2678, 0x7809CD26, 0x267809CD, }, /* x=9A */
+ { 0xC32F7502, 0x02C32F75, 0x7502C32F, 0x2F7502C3, }, /* x=9B */
+ { 0xE9105633, 0x33E91056, 0x5633E910, 0x105633E9, }, /* x=9C */
+ { 0xE7195B38, 0x38E7195B, 0x5B38E719, 0x195B38E7, }, /* x=9D */
+ { 0xF5024C25, 0x25F5024C, 0x4C25F502, 0x024C25F5, }, /* x=9E */
+ { 0xFB0B412E, 0x2EFB0B41, 0x412EFB0B, 0x0B412EFB, }, /* x=9F */
+ { 0x9AD7618C, 0x8C9AD761, 0x618C9AD7, 0xD7618C9A, }, /* x=A0 */
+ { 0x94DE6C87, 0x8794DE6C, 0x6C8794DE, 0xDE6C8794, }, /* x=A1 */
+ { 0x86C57B9A, 0x9A86C57B, 0x7B9A86C5, 0xC57B9A86, }, /* x=A2 */
+ { 0x88CC7691, 0x9188CC76, 0x769188CC, 0xCC769188, }, /* x=A3 */
+ { 0xA2F355A0, 0xA0A2F355, 0x55A0A2F3, 0xF355A0A2, }, /* x=A4 */
+ { 0xACFA58AB, 0xABACFA58, 0x58ABACFA, 0xFA58ABAC, }, /* x=A5 */
+ { 0xBEE14FB6, 0xB6BEE14F, 0x4FB6BEE1, 0xE14FB6BE, }, /* x=A6 */
+ { 0xB0E842BD, 0xBDB0E842, 0x42BDB0E8, 0xE842BDB0, }, /* x=A7 */
+ { 0xEA9F09D4, 0xD4EA9F09, 0x09D4EA9F, 0x9F09D4EA, }, /* x=A8 */
+ { 0xE49604DF, 0xDFE49604, 0x04DFE496, 0x9604DFE4, }, /* x=A9 */
+ { 0xF68D13C2, 0xC2F68D13, 0x13C2F68D, 0x8D13C2F6, }, /* x=AA */
+ { 0xF8841EC9, 0xC9F8841E, 0x1EC9F884, 0x841EC9F8, }, /* x=AB */
+ { 0xD2BB3DF8, 0xF8D2BB3D, 0x3DF8D2BB, 0xBB3DF8D2, }, /* x=AC */
+ { 0xDCB230F3, 0xF3DCB230, 0x30F3DCB2, 0xB230F3DC, }, /* x=AD */
+ { 0xCEA927EE, 0xEECEA927, 0x27EECEA9, 0xA927EECE, }, /* x=AE */
+ { 0xC0A02AE5, 0xE5C0A02A, 0x2AE5C0A0, 0xA02AE5C0, }, /* x=AF */
+ { 0x7A47B13C, 0x3C7A47B1, 0xB13C7A47, 0x47B13C7A, }, /* x=B0 */
+ { 0x744EBC37, 0x37744EBC, 0xBC37744E, 0x4EBC3774, }, /* x=B1 */
+ { 0x6655AB2A, 0x2A6655AB, 0xAB2A6655, 0x55AB2A66, }, /* x=B2 */
+ { 0x685CA621, 0x21685CA6, 0xA621685C, 0x5CA62168, }, /* x=B3 */
+ { 0x42638510, 0x10426385, 0x85104263, 0x63851042, }, /* x=B4 */
+ { 0x4C6A881B, 0x1B4C6A88, 0x881B4C6A, 0x6A881B4C, }, /* x=B5 */
+ { 0x5E719F06, 0x065E719F, 0x9F065E71, 0x719F065E, }, /* x=B6 */
+ { 0x5078920D, 0x0D507892, 0x920D5078, 0x78920D50, }, /* x=B7 */
+ { 0x0A0FD964, 0x640A0FD9, 0xD9640A0F, 0x0FD9640A, }, /* x=B8 */
+ { 0x0406D46F, 0x6F0406D4, 0xD46F0406, 0x06D46F04, }, /* x=B9 */
+ { 0x161DC372, 0x72161DC3, 0xC372161D, 0x1DC37216, }, /* x=BA */
+ { 0x1814CE79, 0x791814CE, 0xCE791814, 0x14CE7918, }, /* x=BB */
+ { 0x322BED48, 0x48322BED, 0xED48322B, 0x2BED4832, }, /* x=BC */
+ { 0x3C22E043, 0x433C22E0, 0xE0433C22, 0x22E0433C, }, /* x=BD */
+ { 0x2E39F75E, 0x5E2E39F7, 0xF75E2E39, 0x39F75E2E, }, /* x=BE */
+ { 0x2030FA55, 0x552030FA, 0xFA552030, 0x30FA5520, }, /* x=BF */
+ { 0xEC9AB701, 0x01EC9AB7, 0xB701EC9A, 0x9AB701EC, }, /* x=C0 */
+ { 0xE293BA0A, 0x0AE293BA, 0xBA0AE293, 0x93BA0AE2, }, /* x=C1 */
+ { 0xF088AD17, 0x17F088AD, 0xAD17F088, 0x88AD17F0, }, /* x=C2 */
+ { 0xFE81A01C, 0x1CFE81A0, 0xA01CFE81, 0x81A01CFE, }, /* x=C3 */
+ { 0xD4BE832D, 0x2DD4BE83, 0x832DD4BE, 0xBE832DD4, }, /* x=C4 */
+ { 0xDAB78E26, 0x26DAB78E, 0x8E26DAB7, 0xB78E26DA, }, /* x=C5 */
+ { 0xC8AC993B, 0x3BC8AC99, 0x993BC8AC, 0xAC993BC8, }, /* x=C6 */
+ { 0xC6A59430, 0x30C6A594, 0x9430C6A5, 0xA59430C6, }, /* x=C7 */
+ { 0x9CD2DF59, 0x599CD2DF, 0xDF599CD2, 0xD2DF599C, }, /* x=C8 */
+ { 0x92DBD252, 0x5292DBD2, 0xD25292DB, 0xDBD25292, }, /* x=C9 */
+ { 0x80C0C54F, 0x4F80C0C5, 0xC54F80C0, 0xC0C54F80, }, /* x=CA */
+ { 0x8EC9C844, 0x448EC9C8, 0xC8448EC9, 0xC9C8448E, }, /* x=CB */
+ { 0xA4F6EB75, 0x75A4F6EB, 0xEB75A4F6, 0xF6EB75A4, }, /* x=CC */
+ { 0xAAFFE67E, 0x7EAAFFE6, 0xE67EAAFF, 0xFFE67EAA, }, /* x=CD */
+ { 0xB8E4F163, 0x63B8E4F1, 0xF163B8E4, 0xE4F163B8, }, /* x=CE */
+ { 0xB6EDFC68, 0x68B6EDFC, 0xFC68B6ED, 0xEDFC68B6, }, /* x=CF */
+ { 0x0C0A67B1, 0xB10C0A67, 0x67B10C0A, 0x0A67B10C, }, /* x=D0 */
+ { 0x02036ABA, 0xBA02036A, 0x6ABA0203, 0x036ABA02, }, /* x=D1 */
+ { 0x10187DA7, 0xA710187D, 0x7DA71018, 0x187DA710, }, /* x=D2 */
+ { 0x1E1170AC, 0xAC1E1170, 0x70AC1E11, 0x1170AC1E, }, /* x=D3 */
+ { 0x342E539D, 0x9D342E53, 0x539D342E, 0x2E539D34, }, /* x=D4 */
+ { 0x3A275E96, 0x963A275E, 0x5E963A27, 0x275E963A, }, /* x=D5 */
+ { 0x283C498B, 0x8B283C49, 0x498B283C, 0x3C498B28, }, /* x=D6 */
+ { 0x26354480, 0x80263544, 0x44802635, 0x35448026, }, /* x=D7 */
+ { 0x7C420FE9, 0xE97C420F, 0x0FE97C42, 0x420FE97C, }, /* x=D8 */
+ { 0x724B02E2, 0xE2724B02, 0x02E2724B, 0x4B02E272, }, /* x=D9 */
+ { 0x605015FF, 0xFF605015, 0x15FF6050, 0x5015FF60, }, /* x=DA */
+ { 0x6E5918F4, 0xF46E5918, 0x18F46E59, 0x5918F46E, }, /* x=DB */
+ { 0x44663BC5, 0xC544663B, 0x3BC54466, 0x663BC544, }, /* x=DC */
+ { 0x4A6F36CE, 0xCE4A6F36, 0x36CE4A6F, 0x6F36CE4A, }, /* x=DD */
+ { 0x587421D3, 0xD3587421, 0x21D35874, 0x7421D358, }, /* x=DE */
+ { 0x567D2CD8, 0xD8567D2C, 0x2CD8567D, 0x7D2CD856, }, /* x=DF */
+ { 0x37A10C7A, 0x7A37A10C, 0x0C7A37A1, 0xA10C7A37, }, /* x=E0 */
+ { 0x39A80171, 0x7139A801, 0x017139A8, 0xA8017139, }, /* x=E1 */
+ { 0x2BB3166C, 0x6C2BB316, 0x166C2BB3, 0xB3166C2B, }, /* x=E2 */
+ { 0x25BA1B67, 0x6725BA1B, 0x1B6725BA, 0xBA1B6725, }, /* x=E3 */
+ { 0x0F853856, 0x560F8538, 0x38560F85, 0x8538560F, }, /* x=E4 */
+ { 0x018C355D, 0x5D018C35, 0x355D018C, 0x8C355D01, }, /* x=E5 */
+ { 0x13972240, 0x40139722, 0x22401397, 0x97224013, }, /* x=E6 */
+ { 0x1D9E2F4B, 0x4B1D9E2F, 0x2F4B1D9E, 0x9E2F4B1D, }, /* x=E7 */
+ { 0x47E96422, 0x2247E964, 0x642247E9, 0xE9642247, }, /* x=E8 */
+ { 0x49E06929, 0x2949E069, 0x692949E0, 0xE0692949, }, /* x=E9 */
+ { 0x5BFB7E34, 0x345BFB7E, 0x7E345BFB, 0xFB7E345B, }, /* x=EA */
+ { 0x55F2733F, 0x3F55F273, 0x733F55F2, 0xF2733F55, }, /* x=EB */
+ { 0x7FCD500E, 0x0E7FCD50, 0x500E7FCD, 0xCD500E7F, }, /* x=EC */
+ { 0x71C45D05, 0x0571C45D, 0x5D0571C4, 0xC45D0571, }, /* x=ED */
+ { 0x63DF4A18, 0x1863DF4A, 0x4A1863DF, 0xDF4A1863, }, /* x=EE */
+ { 0x6DD64713, 0x136DD647, 0x47136DD6, 0xD647136D, }, /* x=EF */
+ { 0xD731DCCA, 0xCAD731DC, 0xDCCAD731, 0x31DCCAD7, }, /* x=F0 */
+ { 0xD938D1C1, 0xC1D938D1, 0xD1C1D938, 0x38D1C1D9, }, /* x=F1 */
+ { 0xCB23C6DC, 0xDCCB23C6, 0xC6DCCB23, 0x23C6DCCB, }, /* x=F2 */
+ { 0xC52ACBD7, 0xD7C52ACB, 0xCBD7C52A, 0x2ACBD7C5, }, /* x=F3 */
+ { 0xEF15E8E6, 0xE6EF15E8, 0xE8E6EF15, 0x15E8E6EF, }, /* x=F4 */
+ { 0xE11CE5ED, 0xEDE11CE5, 0xE5EDE11C, 0x1CE5EDE1, }, /* x=F5 */
+ { 0xF307F2F0, 0xF0F307F2, 0xF2F0F307, 0x07F2F0F3, }, /* x=F6 */
+ { 0xFD0EFFFB, 0xFBFD0EFF, 0xFFFBFD0E, 0x0EFFFBFD, }, /* x=F7 */
+ { 0xA779B492, 0x92A779B4, 0xB492A779, 0x79B492A7, }, /* x=F8 */
+ { 0xA970B999, 0x99A970B9, 0xB999A970, 0x70B999A9, }, /* x=F9 */
+ { 0xBB6BAE84, 0x84BB6BAE, 0xAE84BB6B, 0x6BAE84BB, }, /* x=FA */
+ { 0xB562A38F, 0x8FB562A3, 0xA38FB562, 0x62A38FB5, }, /* x=FB */
+ { 0x9F5D80BE, 0xBE9F5D80, 0x80BE9F5D, 0x5D80BE9F, }, /* x=FC */
+ { 0x91548DB5, 0xB591548D, 0x8DB59154, 0x548DB591, }, /* x=FD */
+ { 0x834F9AA8, 0xA8834F9A, 0x9AA8834F, 0x4F9AA883, }, /* x=FE */
+ { 0x8D4697A3, 0xA38D4697, 0x97A38D46, 0x4697A38D, }, /* x=FF */
+};
+
+
+
+/*
+AES_Te0[x] = S [x].[02, 01, 01, 03];
+AES_Te1[x] = S [x].[03, 02, 01, 01];
+AES_Te2[x] = S [x].[01, 03, 02, 01];
+AES_Te3[x] = S [x].[01, 01, 03, 02];
+AES_Te4[x] = S [x].[01, 01, 01, 01];
+
+AES_Td0[x] = Si[x].[0e, 09, 0d, 0b];
+AES_Td1[x] = Si[x].[0b, 0e, 09, 0d];
+AES_Td2[x] = Si[x].[0d, 0b, 0e, 09];
+AES_Td3[x] = Si[x].[09, 0d, 0b, 0e];
+AES_Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+const uint32_t AES_Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+const uint32_t AES_Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+const uint32_t AES_Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+const uint32_t AES_Te3[256] = {
+
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+const uint32_t AES_Te4[256] = {
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
+};
+const uint32_t AES_Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+const uint32_t AES_Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+const uint32_t AES_Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+const uint32_t AES_Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+const uint32_t AES_Td4[256] = {
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits==128)
+ key->rounds = 10;
+ else if (bits==192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey );
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (AES_Te4[(temp >> 24) ] & 0xff000000) ^
+ (AES_Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(temp ) & 0xff] & 0x000000ff);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ abort();
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+ rk[0] =
+ AES_Td0[AES_Te4[(rk[0] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[0] ) & 0xff] & 0xff];
+ rk[1] =
+ AES_Td0[AES_Te4[(rk[1] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[1] ) & 0xff] & 0xff];
+ rk[2] =
+ AES_Td0[AES_Te4[(rk[2] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[2] ) & 0xff] & 0xff];
+ rk[3] =
+ AES_Td0[AES_Te4[(rk[3] >> 24) ] & 0xff] ^
+ AES_Td1[AES_Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ AES_Td2[AES_Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ AES_Td3[AES_Te4[(rk[3] ) & 0xff] & 0xff];
+ }
+ return 0;
+}
+
+#ifndef AES_ASM
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[10];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[12];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[13];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[14];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[16];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[17];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[18];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[20];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[21];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[22];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[24];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[25];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[26];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[28];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[29];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[30];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[32];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[33];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[34];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[36];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[37];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[38];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[40];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[41];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[42];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[44];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[45];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[46];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = AES_Te0[t0 >> 24] ^ AES_Te1[(t1 >> 16) & 0xff] ^ AES_Te2[(t2 >> 8) & 0xff] ^ AES_Te3[t3 & 0xff] ^ rk[48];
+ s1 = AES_Te0[t1 >> 24] ^ AES_Te1[(t2 >> 16) & 0xff] ^ AES_Te2[(t3 >> 8) & 0xff] ^ AES_Te3[t0 & 0xff] ^ rk[49];
+ s2 = AES_Te0[t2 >> 24] ^ AES_Te1[(t3 >> 16) & 0xff] ^ AES_Te2[(t0 >> 8) & 0xff] ^ AES_Te3[t1 & 0xff] ^ rk[50];
+ s3 = AES_Te0[t3 >> 24] ^ AES_Te1[(t0 >> 16) & 0xff] ^ AES_Te2[(t1 >> 8) & 0xff] ^ AES_Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = AES_Te0[s0 >> 24] ^ AES_Te1[(s1 >> 16) & 0xff] ^ AES_Te2[(s2 >> 8) & 0xff] ^ AES_Te3[s3 & 0xff] ^ rk[52];
+ t1 = AES_Te0[s1 >> 24] ^ AES_Te1[(s2 >> 16) & 0xff] ^ AES_Te2[(s3 >> 8) & 0xff] ^ AES_Te3[s0 & 0xff] ^ rk[53];
+ t2 = AES_Te0[s2 >> 24] ^ AES_Te1[(s3 >> 16) & 0xff] ^ AES_Te2[(s0 >> 8) & 0xff] ^ AES_Te3[s1 & 0xff] ^ rk[54];
+ t3 = AES_Te0[s3 >> 24] ^ AES_Te1[(s0 >> 16) & 0xff] ^ AES_Te2[(s1 >> 8) & 0xff] ^ AES_Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ AES_Te0[(s0 >> 24) ] ^
+ AES_Te1[(s1 >> 16) & 0xff] ^
+ AES_Te2[(s2 >> 8) & 0xff] ^
+ AES_Te3[(s3 ) & 0xff] ^
+ rk[4];
+ t1 =
+ AES_Te0[(s1 >> 24) ] ^
+ AES_Te1[(s2 >> 16) & 0xff] ^
+ AES_Te2[(s3 >> 8) & 0xff] ^
+ AES_Te3[(s0 ) & 0xff] ^
+ rk[5];
+ t2 =
+ AES_Te0[(s2 >> 24) ] ^
+ AES_Te1[(s3 >> 16) & 0xff] ^
+ AES_Te2[(s0 >> 8) & 0xff] ^
+ AES_Te3[(s1 ) & 0xff] ^
+ rk[6];
+ t3 =
+ AES_Te0[(s3 >> 24) ] ^
+ AES_Te1[(s0 >> 16) & 0xff] ^
+ AES_Te2[(s1 >> 8) & 0xff] ^
+ AES_Te3[(s2 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ AES_Te0[(t0 >> 24) ] ^
+ AES_Te1[(t1 >> 16) & 0xff] ^
+ AES_Te2[(t2 >> 8) & 0xff] ^
+ AES_Te3[(t3 ) & 0xff] ^
+ rk[0];
+ s1 =
+ AES_Te0[(t1 >> 24) ] ^
+ AES_Te1[(t2 >> 16) & 0xff] ^
+ AES_Te2[(t3 >> 8) & 0xff] ^
+ AES_Te3[(t0 ) & 0xff] ^
+ rk[1];
+ s2 =
+ AES_Te0[(t2 >> 24) ] ^
+ AES_Te1[(t3 >> 16) & 0xff] ^
+ AES_Te2[(t0 >> 8) & 0xff] ^
+ AES_Te3[(t1 ) & 0xff] ^
+ rk[2];
+ s3 =
+ AES_Te0[(t3 >> 24) ] ^
+ AES_Te1[(t0 >> 16) & 0xff] ^
+ AES_Te2[(t1 >> 8) & 0xff] ^
+ AES_Te3[(t2 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (AES_Te4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (AES_Te4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (AES_Te4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (AES_Te4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[10];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[12];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[13];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[14];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[16];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[17];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[18];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[20];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[21];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[22];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[24];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[25];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[26];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[28];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[29];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[30];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[32];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[33];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[34];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[36];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[37];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[38];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[39];
+ if (key->rounds > 10) {
+ /* round 10: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[40];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[41];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[42];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[44];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[45];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[46];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[47];
+ if (key->rounds > 12) {
+ /* round 12: */
+ s0 = AES_Td0[t0 >> 24] ^ AES_Td1[(t3 >> 16) & 0xff] ^ AES_Td2[(t2 >> 8) & 0xff] ^ AES_Td3[t1 & 0xff] ^ rk[48];
+ s1 = AES_Td0[t1 >> 24] ^ AES_Td1[(t0 >> 16) & 0xff] ^ AES_Td2[(t3 >> 8) & 0xff] ^ AES_Td3[t2 & 0xff] ^ rk[49];
+ s2 = AES_Td0[t2 >> 24] ^ AES_Td1[(t1 >> 16) & 0xff] ^ AES_Td2[(t0 >> 8) & 0xff] ^ AES_Td3[t3 & 0xff] ^ rk[50];
+ s3 = AES_Td0[t3 >> 24] ^ AES_Td1[(t2 >> 16) & 0xff] ^ AES_Td2[(t1 >> 8) & 0xff] ^ AES_Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = AES_Td0[s0 >> 24] ^ AES_Td1[(s3 >> 16) & 0xff] ^ AES_Td2[(s2 >> 8) & 0xff] ^ AES_Td3[s1 & 0xff] ^ rk[52];
+ t1 = AES_Td0[s1 >> 24] ^ AES_Td1[(s0 >> 16) & 0xff] ^ AES_Td2[(s3 >> 8) & 0xff] ^ AES_Td3[s2 & 0xff] ^ rk[53];
+ t2 = AES_Td0[s2 >> 24] ^ AES_Td1[(s1 >> 16) & 0xff] ^ AES_Td2[(s0 >> 8) & 0xff] ^ AES_Td3[s3 & 0xff] ^ rk[54];
+ t3 = AES_Td0[s3 >> 24] ^ AES_Td1[(s2 >> 16) & 0xff] ^ AES_Td2[(s1 >> 8) & 0xff] ^ AES_Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += key->rounds << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = key->rounds >> 1;
+ for (;;) {
+ t0 =
+ AES_Td0[(s0 >> 24) ] ^
+ AES_Td1[(s3 >> 16) & 0xff] ^
+ AES_Td2[(s2 >> 8) & 0xff] ^
+ AES_Td3[(s1 ) & 0xff] ^
+ rk[4];
+ t1 =
+ AES_Td0[(s1 >> 24) ] ^
+ AES_Td1[(s0 >> 16) & 0xff] ^
+ AES_Td2[(s3 >> 8) & 0xff] ^
+ AES_Td3[(s2 ) & 0xff] ^
+ rk[5];
+ t2 =
+ AES_Td0[(s2 >> 24) ] ^
+ AES_Td1[(s1 >> 16) & 0xff] ^
+ AES_Td2[(s0 >> 8) & 0xff] ^
+ AES_Td3[(s3 ) & 0xff] ^
+ rk[6];
+ t3 =
+ AES_Td0[(s3 >> 24) ] ^
+ AES_Td1[(s2 >> 16) & 0xff] ^
+ AES_Td2[(s1 >> 8) & 0xff] ^
+ AES_Td3[(s0 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ AES_Td0[(t0 >> 24) ] ^
+ AES_Td1[(t3 >> 16) & 0xff] ^
+ AES_Td2[(t2 >> 8) & 0xff] ^
+ AES_Td3[(t1 ) & 0xff] ^
+ rk[0];
+ s1 =
+ AES_Td0[(t1 >> 24) ] ^
+ AES_Td1[(t0 >> 16) & 0xff] ^
+ AES_Td2[(t3 >> 8) & 0xff] ^
+ AES_Td3[(t2 ) & 0xff] ^
+ rk[1];
+ s2 =
+ AES_Td0[(t2 >> 24) ] ^
+ AES_Td1[(t1 >> 16) & 0xff] ^
+ AES_Td2[(t0 >> 8) & 0xff] ^
+ AES_Td3[(t3 ) & 0xff] ^
+ rk[2];
+ s3 =
+ AES_Td0[(t3 >> 24) ] ^
+ AES_Td1[(t2 >> 16) & 0xff] ^
+ AES_Td2[(t1 >> 8) & 0xff] ^
+ AES_Td3[(t0 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (AES_Td4[(t0 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(out , s0);
+ s1 =
+ (AES_Td4[(t1 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(out + 4, s1);
+ s2 =
+ (AES_Td4[(t2 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(out + 8, s2);
+ s3 =
+ (AES_Td4[(t3 >> 24) ] & 0xff000000) ^
+ (AES_Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (AES_Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (AES_Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(out + 12, s3);
+}
+
+#endif /* AES_ASM */
+
+void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ const unsigned long length, const AES_KEY *key,
+ unsigned char *ivec, const int enc)
+{
+
+ unsigned long n;
+ unsigned long len = length;
+ unsigned char tmp[AES_BLOCK_SIZE];
+
+ assert(in && out && key && ivec);
+
+ if (enc) {
+ while (len >= AES_BLOCK_SIZE) {
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ AES_encrypt(tmp, out, key);
+ memcpy(ivec, out, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ for(n=0; n < len; ++n)
+ tmp[n] = in[n] ^ ivec[n];
+ for(n=len; n < AES_BLOCK_SIZE; ++n)
+ tmp[n] = ivec[n];
+ AES_encrypt(tmp, tmp, key);
+ memcpy(out, tmp, AES_BLOCK_SIZE);
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ } else {
+ while (len >= AES_BLOCK_SIZE) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(in, out, key);
+ for(n=0; n < AES_BLOCK_SIZE; ++n)
+ out[n] ^= ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ len -= AES_BLOCK_SIZE;
+ in += AES_BLOCK_SIZE;
+ out += AES_BLOCK_SIZE;
+ }
+ if (len) {
+ memcpy(tmp, in, AES_BLOCK_SIZE);
+ AES_decrypt(tmp, tmp, key);
+ for(n=0; n < len; ++n)
+ out[n] = tmp[n] ^ ivec[n];
+ memcpy(ivec, tmp, AES_BLOCK_SIZE);
+ }
+ }
+}
diff --git a/crypto/afsplit.c b/crypto/afsplit.c
new file mode 100644
index 0000000..825e2cf
--- /dev/null
+++ b/crypto/afsplit.c
@@ -0,0 +1,159 @@
+/*
+ * QEMU Crypto anti forensic information splitter
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * Derived from cryptsetup package lib/luks1/af.c
+ *
+ * Copyright (C) 2004, Clemens Fruhwirth <clemens@endorphin.org>
+ * Copyright (C) 2009-2012, Red Hat, Inc. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/bswap.h"
+#include "crypto/afsplit.h"
+#include "crypto/random.h"
+
+
+static void qcrypto_afsplit_xor(size_t blocklen,
+ const uint8_t *in1,
+ const uint8_t *in2,
+ uint8_t *out)
+{
+ size_t i;
+ for (i = 0; i < blocklen; i++) {
+ out[i] = in1[i] ^ in2[i];
+ }
+}
+
+
+static int qcrypto_afsplit_hash(QCryptoHashAlgorithm hash,
+ size_t blocklen,
+ uint8_t *block,
+ Error **errp)
+{
+ size_t digestlen = qcrypto_hash_digest_len(hash);
+
+ size_t hashcount = blocklen / digestlen;
+ size_t finallen = blocklen % digestlen;
+ uint32_t i;
+
+ if (finallen) {
+ hashcount++;
+ } else {
+ finallen = digestlen;
+ }
+
+ for (i = 0; i < hashcount; i++) {
+ uint8_t *out = NULL;
+ size_t outlen = 0;
+ uint32_t iv = cpu_to_be32(i);
+ struct iovec in[] = {
+ { .iov_base = &iv,
+ .iov_len = sizeof(iv) },
+ { .iov_base = block + (i * digestlen),
+ .iov_len = (i == (hashcount - 1)) ? finallen : digestlen },
+ };
+
+ if (qcrypto_hash_bytesv(hash,
+ in,
+ G_N_ELEMENTS(in),
+ &out, &outlen,
+ errp) < 0) {
+ return -1;
+ }
+
+ assert(outlen == digestlen);
+ memcpy(block + (i * digestlen), out,
+ (i == (hashcount - 1)) ? finallen : digestlen);
+ g_free(out);
+ }
+
+ return 0;
+}
+
+
+int qcrypto_afsplit_encode(QCryptoHashAlgorithm hash,
+ size_t blocklen,
+ uint32_t stripes,
+ const uint8_t *in,
+ uint8_t *out,
+ Error **errp)
+{
+ uint8_t *block = g_new0(uint8_t, blocklen);
+ size_t i;
+ int ret = -1;
+
+ for (i = 0; i < (stripes - 1); i++) {
+ if (qcrypto_random_bytes(out + (i * blocklen), blocklen, errp) < 0) {
+ goto cleanup;
+ }
+
+ qcrypto_afsplit_xor(blocklen,
+ out + (i * blocklen),
+ block,
+ block);
+ if (qcrypto_afsplit_hash(hash, blocklen, block,
+ errp) < 0) {
+ goto cleanup;
+ }
+ }
+ qcrypto_afsplit_xor(blocklen,
+ in,
+ block,
+ out + (i * blocklen));
+ ret = 0;
+
+ cleanup:
+ g_free(block);
+ return ret;
+}
+
+
+int qcrypto_afsplit_decode(QCryptoHashAlgorithm hash,
+ size_t blocklen,
+ uint32_t stripes,
+ const uint8_t *in,
+ uint8_t *out,
+ Error **errp)
+{
+ uint8_t *block = g_new0(uint8_t, blocklen);
+ size_t i;
+ int ret = -1;
+
+ for (i = 0; i < (stripes - 1); i++) {
+ qcrypto_afsplit_xor(blocklen,
+ in + (i * blocklen),
+ block,
+ block);
+ if (qcrypto_afsplit_hash(hash, blocklen, block,
+ errp) < 0) {
+ goto cleanup;
+ }
+ }
+
+ qcrypto_afsplit_xor(blocklen,
+ in + (i * blocklen),
+ block,
+ out);
+
+ ret = 0;
+
+ cleanup:
+ g_free(block);
+ return ret;
+}
diff --git a/crypto/block-luks.c b/crypto/block-luks.c
new file mode 100644
index 0000000..aba4455
--- /dev/null
+++ b/crypto/block-luks.c
@@ -0,0 +1,1416 @@
+/*
+ * QEMU Crypto block device encryption LUKS format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "qemu/bswap.h"
+
+#include "crypto/block-luks.h"
+
+#include "crypto/hash.h"
+#include "crypto/afsplit.h"
+#include "crypto/pbkdf.h"
+#include "crypto/secret.h"
+#include "crypto/random.h"
+
+#ifdef CONFIG_UUID
+#include <uuid/uuid.h>
+#endif
+
+#include "qemu/coroutine.h"
+
+/*
+ * Reference for the LUKS format implemented here is
+ *
+ * docs/on-disk-format.pdf
+ *
+ * in 'cryptsetup' package source code
+ *
+ * This file implements the 1.2.1 specification, dated
+ * Oct 16, 2011.
+ */
+
+typedef struct QCryptoBlockLUKS QCryptoBlockLUKS;
+typedef struct QCryptoBlockLUKSHeader QCryptoBlockLUKSHeader;
+typedef struct QCryptoBlockLUKSKeySlot QCryptoBlockLUKSKeySlot;
+
+
+/* The following constants are all defined by the LUKS spec */
+#define QCRYPTO_BLOCK_LUKS_VERSION 1
+
+#define QCRYPTO_BLOCK_LUKS_MAGIC_LEN 6
+#define QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN 32
+#define QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN 32
+#define QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN 32
+#define QCRYPTO_BLOCK_LUKS_DIGEST_LEN 20
+#define QCRYPTO_BLOCK_LUKS_SALT_LEN 32
+#define QCRYPTO_BLOCK_LUKS_UUID_LEN 40
+#define QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS 8
+#define QCRYPTO_BLOCK_LUKS_STRIPES 4000
+#define QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS 1000
+#define QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS 1000
+#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET 4096
+
+#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED 0x0000DEAD
+#define QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED 0x00AC71F3
+
+#define QCRYPTO_BLOCK_LUKS_SECTOR_SIZE 512LL
+
+static const char qcrypto_block_luks_magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN] = {
+ 'L', 'U', 'K', 'S', 0xBA, 0xBE
+};
+
+typedef struct QCryptoBlockLUKSNameMap QCryptoBlockLUKSNameMap;
+struct QCryptoBlockLUKSNameMap {
+ const char *name;
+ int id;
+};
+
+typedef struct QCryptoBlockLUKSCipherSizeMap QCryptoBlockLUKSCipherSizeMap;
+struct QCryptoBlockLUKSCipherSizeMap {
+ uint32_t key_bytes;
+ int id;
+};
+typedef struct QCryptoBlockLUKSCipherNameMap QCryptoBlockLUKSCipherNameMap;
+struct QCryptoBlockLUKSCipherNameMap {
+ const char *name;
+ const QCryptoBlockLUKSCipherSizeMap *sizes;
+};
+
+
+static const QCryptoBlockLUKSCipherSizeMap
+qcrypto_block_luks_cipher_size_map_aes[] = {
+ { 16, QCRYPTO_CIPHER_ALG_AES_128 },
+ { 24, QCRYPTO_CIPHER_ALG_AES_192 },
+ { 32, QCRYPTO_CIPHER_ALG_AES_256 },
+ { 0, 0 },
+};
+
+static const QCryptoBlockLUKSCipherSizeMap
+qcrypto_block_luks_cipher_size_map_cast5[] = {
+ { 16, QCRYPTO_CIPHER_ALG_CAST5_128 },
+ { 0, 0 },
+};
+
+static const QCryptoBlockLUKSCipherSizeMap
+qcrypto_block_luks_cipher_size_map_serpent[] = {
+ { 16, QCRYPTO_CIPHER_ALG_SERPENT_128 },
+ { 24, QCRYPTO_CIPHER_ALG_SERPENT_192 },
+ { 32, QCRYPTO_CIPHER_ALG_SERPENT_256 },
+ { 0, 0 },
+};
+
+static const QCryptoBlockLUKSCipherSizeMap
+qcrypto_block_luks_cipher_size_map_twofish[] = {
+ { 16, QCRYPTO_CIPHER_ALG_TWOFISH_128 },
+ { 24, QCRYPTO_CIPHER_ALG_TWOFISH_192 },
+ { 32, QCRYPTO_CIPHER_ALG_TWOFISH_256 },
+ { 0, 0 },
+};
+
+static const QCryptoBlockLUKSCipherNameMap
+qcrypto_block_luks_cipher_name_map[] = {
+ { "aes", qcrypto_block_luks_cipher_size_map_aes },
+ { "cast5", qcrypto_block_luks_cipher_size_map_cast5 },
+ { "serpent", qcrypto_block_luks_cipher_size_map_serpent },
+ { "twofish", qcrypto_block_luks_cipher_size_map_twofish },
+};
+
+
+/*
+ * This struct is written to disk in big-endian format,
+ * but operated upon in native-endian format.
+ */
+struct QCryptoBlockLUKSKeySlot {
+ /* state of keyslot, enabled/disable */
+ uint32_t active;
+ /* iterations for PBKDF2 */
+ uint32_t iterations;
+ /* salt for PBKDF2 */
+ uint8_t salt[QCRYPTO_BLOCK_LUKS_SALT_LEN];
+ /* start sector of key material */
+ uint32_t key_offset;
+ /* number of anti-forensic stripes */
+ uint32_t stripes;
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSKeySlot) != 48);
+
+
+/*
+ * This struct is written to disk in big-endian format,
+ * but operated upon in native-endian format.
+ */
+struct QCryptoBlockLUKSHeader {
+ /* 'L', 'U', 'K', 'S', '0xBA', '0xBE' */
+ char magic[QCRYPTO_BLOCK_LUKS_MAGIC_LEN];
+
+ /* LUKS version, currently 1 */
+ uint16_t version;
+
+ /* cipher name specification (aes, etc) */
+ char cipher_name[QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN];
+
+ /* cipher mode specification (cbc-plain, xts-essiv:sha256, etc) */
+ char cipher_mode[QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN];
+
+ /* hash specification (sha256, etc) */
+ char hash_spec[QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN];
+
+ /* start offset of the volume data (in 512 byte sectors) */
+ uint32_t payload_offset;
+
+ /* Number of key bytes */
+ uint32_t key_bytes;
+
+ /* master key checksum after PBKDF2 */
+ uint8_t master_key_digest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN];
+
+ /* salt for master key PBKDF2 */
+ uint8_t master_key_salt[QCRYPTO_BLOCK_LUKS_SALT_LEN];
+
+ /* iterations for master key PBKDF2 */
+ uint32_t master_key_iterations;
+
+ /* UUID of the partition in standard ASCII representation */
+ uint8_t uuid[QCRYPTO_BLOCK_LUKS_UUID_LEN];
+
+ /* key slots */
+ QCryptoBlockLUKSKeySlot key_slots[QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS];
+} QEMU_PACKED;
+
+QEMU_BUILD_BUG_ON(sizeof(struct QCryptoBlockLUKSHeader) != 592);
+
+
+struct QCryptoBlockLUKS {
+ QCryptoBlockLUKSHeader header;
+
+ /* Cache parsed versions of what's in header fields,
+ * as we can't rely on QCryptoBlock.cipher being
+ * non-NULL */
+ QCryptoCipherAlgorithm cipher_alg;
+ QCryptoCipherMode cipher_mode;
+ QCryptoIVGenAlgorithm ivgen_alg;
+ QCryptoHashAlgorithm ivgen_hash_alg;
+ QCryptoHashAlgorithm hash_alg;
+};
+
+
+static int qcrypto_block_luks_cipher_name_lookup(const char *name,
+ QCryptoCipherMode mode,
+ uint32_t key_bytes,
+ Error **errp)
+{
+ const QCryptoBlockLUKSCipherNameMap *map =
+ qcrypto_block_luks_cipher_name_map;
+ size_t maplen = G_N_ELEMENTS(qcrypto_block_luks_cipher_name_map);
+ size_t i, j;
+
+ if (mode == QCRYPTO_CIPHER_MODE_XTS) {
+ key_bytes /= 2;
+ }
+
+ for (i = 0; i < maplen; i++) {
+ if (!g_str_equal(map[i].name, name)) {
+ continue;
+ }
+ for (j = 0; j < map[i].sizes[j].key_bytes; j++) {
+ if (map[i].sizes[j].key_bytes == key_bytes) {
+ return map[i].sizes[j].id;
+ }
+ }
+ }
+
+ error_setg(errp, "Algorithm %s with key size %d bytes not supported",
+ name, key_bytes);
+ return 0;
+}
+
+static const char *
+qcrypto_block_luks_cipher_alg_lookup(QCryptoCipherAlgorithm alg,
+ Error **errp)
+{
+ const QCryptoBlockLUKSCipherNameMap *map =
+ qcrypto_block_luks_cipher_name_map;
+ size_t maplen = G_N_ELEMENTS(qcrypto_block_luks_cipher_name_map);
+ size_t i, j;
+ for (i = 0; i < maplen; i++) {
+ for (j = 0; j < map[i].sizes[j].key_bytes; j++) {
+ if (map[i].sizes[j].id == alg) {
+ return map[i].name;
+ }
+ }
+ }
+
+ error_setg(errp, "Algorithm '%s' not supported",
+ QCryptoCipherAlgorithm_lookup[alg]);
+ return NULL;
+}
+
+/* XXX replace with qapi_enum_parse() in future, when we can
+ * make that function emit a more friendly error message */
+static int qcrypto_block_luks_name_lookup(const char *name,
+ const char *const *map,
+ size_t maplen,
+ const char *type,
+ Error **errp)
+{
+ size_t i;
+ for (i = 0; i < maplen; i++) {
+ if (g_str_equal(map[i], name)) {
+ return i;
+ }
+ }
+
+ error_setg(errp, "%s %s not supported", type, name);
+ return 0;
+}
+
+#define qcrypto_block_luks_cipher_mode_lookup(name, errp) \
+ qcrypto_block_luks_name_lookup(name, \
+ QCryptoCipherMode_lookup, \
+ QCRYPTO_CIPHER_MODE__MAX, \
+ "Cipher mode", \
+ errp)
+
+#define qcrypto_block_luks_hash_name_lookup(name, errp) \
+ qcrypto_block_luks_name_lookup(name, \
+ QCryptoHashAlgorithm_lookup, \
+ QCRYPTO_HASH_ALG__MAX, \
+ "Hash algorithm", \
+ errp)
+
+#define qcrypto_block_luks_ivgen_name_lookup(name, errp) \
+ qcrypto_block_luks_name_lookup(name, \
+ QCryptoIVGenAlgorithm_lookup, \
+ QCRYPTO_IVGEN_ALG__MAX, \
+ "IV generator", \
+ errp)
+
+
+static bool
+qcrypto_block_luks_has_format(const uint8_t *buf,
+ size_t buf_size)
+{
+ const QCryptoBlockLUKSHeader *luks_header = (const void *)buf;
+
+ if (buf_size >= offsetof(QCryptoBlockLUKSHeader, cipher_name) &&
+ memcmp(luks_header->magic, qcrypto_block_luks_magic,
+ QCRYPTO_BLOCK_LUKS_MAGIC_LEN) == 0 &&
+ be16_to_cpu(luks_header->version) == QCRYPTO_BLOCK_LUKS_VERSION) {
+ return true;
+ } else {
+ return false;
+ }
+}
+
+
+/**
+ * Deal with a quirk of dm-crypt usage of ESSIV.
+ *
+ * When calculating ESSIV IVs, the cipher length used by ESSIV
+ * may be different from the cipher length used for the block
+ * encryption, becauses dm-crypt uses the hash digest length
+ * as the key size. ie, if you have AES 128 as the block cipher
+ * and SHA 256 as ESSIV hash, then ESSIV will use AES 256 as
+ * the cipher since that gets a key length matching the digest
+ * size, not AES 128 with truncated digest as might be imagined
+ */
+static QCryptoCipherAlgorithm
+qcrypto_block_luks_essiv_cipher(QCryptoCipherAlgorithm cipher,
+ QCryptoHashAlgorithm hash,
+ Error **errp)
+{
+ size_t digestlen = qcrypto_hash_digest_len(hash);
+ size_t keylen = qcrypto_cipher_get_key_len(cipher);
+ if (digestlen == keylen) {
+ return cipher;
+ }
+
+ switch (cipher) {
+ case QCRYPTO_CIPHER_ALG_AES_128:
+ case QCRYPTO_CIPHER_ALG_AES_192:
+ case QCRYPTO_CIPHER_ALG_AES_256:
+ if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_AES_128)) {
+ return QCRYPTO_CIPHER_ALG_AES_128;
+ } else if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_AES_192)) {
+ return QCRYPTO_CIPHER_ALG_AES_192;
+ } else if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_AES_256)) {
+ return QCRYPTO_CIPHER_ALG_AES_256;
+ } else {
+ error_setg(errp, "No AES cipher with key size %zu available",
+ digestlen);
+ return 0;
+ }
+ break;
+ case QCRYPTO_CIPHER_ALG_SERPENT_128:
+ case QCRYPTO_CIPHER_ALG_SERPENT_192:
+ case QCRYPTO_CIPHER_ALG_SERPENT_256:
+ if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_SERPENT_128)) {
+ return QCRYPTO_CIPHER_ALG_SERPENT_128;
+ } else if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_SERPENT_192)) {
+ return QCRYPTO_CIPHER_ALG_SERPENT_192;
+ } else if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_SERPENT_256)) {
+ return QCRYPTO_CIPHER_ALG_SERPENT_256;
+ } else {
+ error_setg(errp, "No Serpent cipher with key size %zu available",
+ digestlen);
+ return 0;
+ }
+ break;
+ case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+ case QCRYPTO_CIPHER_ALG_TWOFISH_192:
+ case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+ if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_TWOFISH_128)) {
+ return QCRYPTO_CIPHER_ALG_TWOFISH_128;
+ } else if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_TWOFISH_192)) {
+ return QCRYPTO_CIPHER_ALG_TWOFISH_192;
+ } else if (digestlen == qcrypto_cipher_get_key_len(
+ QCRYPTO_CIPHER_ALG_TWOFISH_256)) {
+ return QCRYPTO_CIPHER_ALG_TWOFISH_256;
+ } else {
+ error_setg(errp, "No Twofish cipher with key size %zu available",
+ digestlen);
+ return 0;
+ }
+ break;
+ default:
+ error_setg(errp, "Cipher %s not supported with essiv",
+ QCryptoCipherAlgorithm_lookup[cipher]);
+ return 0;
+ }
+}
+
+/*
+ * Given a key slot, and user password, this will attempt to unlock
+ * the master encryption key from the key slot.
+ *
+ * Returns:
+ * 0 if the key slot is disabled, or key could not be decrypted
+ * with the provided password
+ * 1 if the key slot is enabled, and key decrypted successfully
+ * with the provided password
+ * -1 if a fatal error occurred loading the key
+ */
+static int
+qcrypto_block_luks_load_key(QCryptoBlock *block,
+ QCryptoBlockLUKSKeySlot *slot,
+ const char *password,
+ QCryptoCipherAlgorithm cipheralg,
+ QCryptoCipherMode ciphermode,
+ QCryptoHashAlgorithm hash,
+ QCryptoIVGenAlgorithm ivalg,
+ QCryptoCipherAlgorithm ivcipheralg,
+ QCryptoHashAlgorithm ivhash,
+ uint8_t *masterkey,
+ size_t masterkeylen,
+ QCryptoBlockReadFunc readfunc,
+ void *opaque,
+ Error **errp)
+{
+ QCryptoBlockLUKS *luks = block->opaque;
+ uint8_t *splitkey;
+ size_t splitkeylen;
+ uint8_t *possiblekey;
+ int ret = -1;
+ ssize_t rv;
+ QCryptoCipher *cipher = NULL;
+ uint8_t keydigest[QCRYPTO_BLOCK_LUKS_DIGEST_LEN];
+ QCryptoIVGen *ivgen = NULL;
+ size_t niv;
+
+ if (slot->active != QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED) {
+ return 0;
+ }
+
+ splitkeylen = masterkeylen * slot->stripes;
+ splitkey = g_new0(uint8_t, splitkeylen);
+ possiblekey = g_new0(uint8_t, masterkeylen);
+
+ /*
+ * The user password is used to generate a (possible)
+ * decryption key. This may or may not successfully
+ * decrypt the master key - we just blindly assume
+ * the key is correct and validate the results of
+ * decryption later.
+ */
+ if (qcrypto_pbkdf2(hash,
+ (const uint8_t *)password, strlen(password),
+ slot->salt, QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ slot->iterations,
+ possiblekey, masterkeylen,
+ errp) < 0) {
+ goto cleanup;
+ }
+
+ /*
+ * We need to read the master key material from the
+ * LUKS key material header. What we're reading is
+ * not the raw master key, but rather the data after
+ * it has been passed through AFSplit and the result
+ * then encrypted.
+ */
+ rv = readfunc(block,
+ slot->key_offset * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+ splitkey, splitkeylen,
+ errp,
+ opaque);
+ if (rv < 0) {
+ goto cleanup;
+ }
+
+
+ /* Setup the cipher/ivgen that we'll use to try to decrypt
+ * the split master key material */
+ cipher = qcrypto_cipher_new(cipheralg, ciphermode,
+ possiblekey, masterkeylen,
+ errp);
+ if (!cipher) {
+ goto cleanup;
+ }
+
+ niv = qcrypto_cipher_get_iv_len(cipheralg,
+ ciphermode);
+ ivgen = qcrypto_ivgen_new(ivalg,
+ ivcipheralg,
+ ivhash,
+ possiblekey, masterkeylen,
+ errp);
+ if (!ivgen) {
+ goto cleanup;
+ }
+
+
+ /*
+ * The master key needs to be decrypted in the same
+ * way that the block device payload will be decrypted
+ * later. In particular we'll be using the IV generator
+ * to reset the encryption cipher every time the master
+ * key crosses a sector boundary.
+ */
+ if (qcrypto_block_decrypt_helper(cipher,
+ niv,
+ ivgen,
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+ 0,
+ splitkey,
+ splitkeylen,
+ errp) < 0) {
+ goto cleanup;
+ }
+
+ /*
+ * Now we've decrypted the split master key, join
+ * it back together to get the actual master key.
+ */
+ if (qcrypto_afsplit_decode(hash,
+ masterkeylen,
+ slot->stripes,
+ splitkey,
+ masterkey,
+ errp) < 0) {
+ goto cleanup;
+ }
+
+
+ /*
+ * We still don't know that the masterkey we got is valid,
+ * because we just blindly assumed the user's password
+ * was correct. This is where we now verify it. We are
+ * creating a hash of the master key using PBKDF and
+ * then comparing that to the hash stored in the key slot
+ * header
+ */
+ if (qcrypto_pbkdf2(hash,
+ masterkey, masterkeylen,
+ luks->header.master_key_salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ luks->header.master_key_iterations,
+ keydigest, G_N_ELEMENTS(keydigest),
+ errp) < 0) {
+ goto cleanup;
+ }
+
+ if (memcmp(keydigest, luks->header.master_key_digest,
+ QCRYPTO_BLOCK_LUKS_DIGEST_LEN) == 0) {
+ /* Success, we got the right master key */
+ ret = 1;
+ goto cleanup;
+ }
+
+ /* Fail, user's password was not valid for this key slot,
+ * tell caller to try another slot */
+ ret = 0;
+
+ cleanup:
+ qcrypto_ivgen_free(ivgen);
+ qcrypto_cipher_free(cipher);
+ g_free(splitkey);
+ g_free(possiblekey);
+ return ret;
+}
+
+
+/*
+ * Given a user password, this will iterate over all key
+ * slots and try to unlock each active key slot using the
+ * password until it successfully obtains a master key.
+ *
+ * Returns 0 if a key was loaded, -1 if no keys could be loaded
+ */
+static int
+qcrypto_block_luks_find_key(QCryptoBlock *block,
+ const char *password,
+ QCryptoCipherAlgorithm cipheralg,
+ QCryptoCipherMode ciphermode,
+ QCryptoHashAlgorithm hash,
+ QCryptoIVGenAlgorithm ivalg,
+ QCryptoCipherAlgorithm ivcipheralg,
+ QCryptoHashAlgorithm ivhash,
+ uint8_t **masterkey,
+ size_t *masterkeylen,
+ QCryptoBlockReadFunc readfunc,
+ void *opaque,
+ Error **errp)
+{
+ QCryptoBlockLUKS *luks = block->opaque;
+ size_t i;
+ int rv;
+
+ *masterkey = g_new0(uint8_t, luks->header.key_bytes);
+ *masterkeylen = luks->header.key_bytes;
+
+ for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+ rv = qcrypto_block_luks_load_key(block,
+ &luks->header.key_slots[i],
+ password,
+ cipheralg,
+ ciphermode,
+ hash,
+ ivalg,
+ ivcipheralg,
+ ivhash,
+ *masterkey,
+ *masterkeylen,
+ readfunc,
+ opaque,
+ errp);
+ if (rv < 0) {
+ goto error;
+ }
+ if (rv == 1) {
+ return 0;
+ }
+ }
+
+ error_setg(errp, "Invalid password, cannot unlock any keyslot");
+
+ error:
+ g_free(*masterkey);
+ *masterkey = NULL;
+ *masterkeylen = 0;
+ return -1;
+}
+
+
+static int
+qcrypto_block_luks_open(QCryptoBlock *block,
+ QCryptoBlockOpenOptions *options,
+ QCryptoBlockReadFunc readfunc,
+ void *opaque,
+ unsigned int flags,
+ Error **errp)
+{
+ QCryptoBlockLUKS *luks;
+ Error *local_err = NULL;
+ int ret = 0;
+ size_t i;
+ ssize_t rv;
+ uint8_t *masterkey = NULL;
+ size_t masterkeylen;
+ char *ivgen_name, *ivhash_name;
+ QCryptoCipherMode ciphermode;
+ QCryptoCipherAlgorithm cipheralg;
+ QCryptoIVGenAlgorithm ivalg;
+ QCryptoCipherAlgorithm ivcipheralg;
+ QCryptoHashAlgorithm hash;
+ QCryptoHashAlgorithm ivhash;
+ char *password = NULL;
+
+ if (!(flags & QCRYPTO_BLOCK_OPEN_NO_IO)) {
+ if (!options->u.luks.key_secret) {
+ error_setg(errp, "Parameter 'key-secret' is required for cipher");
+ return -1;
+ }
+ password = qcrypto_secret_lookup_as_utf8(
+ options->u.luks.key_secret, errp);
+ if (!password) {
+ return -1;
+ }
+ }
+
+ luks = g_new0(QCryptoBlockLUKS, 1);
+ block->opaque = luks;
+
+ /* Read the entire LUKS header, minus the key material from
+ * the underlying device */
+ rv = readfunc(block, 0,
+ (uint8_t *)&luks->header,
+ sizeof(luks->header),
+ errp,
+ opaque);
+ if (rv < 0) {
+ ret = rv;
+ goto fail;
+ }
+
+ /* The header is always stored in big-endian format, so
+ * convert everything to native */
+ be16_to_cpus(&luks->header.version);
+ be32_to_cpus(&luks->header.payload_offset);
+ be32_to_cpus(&luks->header.key_bytes);
+ be32_to_cpus(&luks->header.master_key_iterations);
+
+ for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+ be32_to_cpus(&luks->header.key_slots[i].active);
+ be32_to_cpus(&luks->header.key_slots[i].iterations);
+ be32_to_cpus(&luks->header.key_slots[i].key_offset);
+ be32_to_cpus(&luks->header.key_slots[i].stripes);
+ }
+
+ if (memcmp(luks->header.magic, qcrypto_block_luks_magic,
+ QCRYPTO_BLOCK_LUKS_MAGIC_LEN) != 0) {
+ error_setg(errp, "Volume is not in LUKS format");
+ ret = -EINVAL;
+ goto fail;
+ }
+ if (luks->header.version != QCRYPTO_BLOCK_LUKS_VERSION) {
+ error_setg(errp, "LUKS version %" PRIu32 " is not supported",
+ luks->header.version);
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ /*
+ * The cipher_mode header contains a string that we have
+ * to further parse, of the format
+ *
+ * <cipher-mode>-<iv-generator>[:<iv-hash>]
+ *
+ * eg cbc-essiv:sha256, cbc-plain64
+ */
+ ivgen_name = strchr(luks->header.cipher_mode, '-');
+ if (!ivgen_name) {
+ ret = -EINVAL;
+ error_setg(errp, "Unexpected cipher mode string format %s",
+ luks->header.cipher_mode);
+ goto fail;
+ }
+ *ivgen_name = '\0';
+ ivgen_name++;
+
+ ivhash_name = strchr(ivgen_name, ':');
+ if (!ivhash_name) {
+ ivhash = 0;
+ } else {
+ *ivhash_name = '\0';
+ ivhash_name++;
+
+ ivhash = qcrypto_block_luks_hash_name_lookup(ivhash_name,
+ &local_err);
+ if (local_err) {
+ ret = -ENOTSUP;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+ }
+
+ ciphermode = qcrypto_block_luks_cipher_mode_lookup(luks->header.cipher_mode,
+ &local_err);
+ if (local_err) {
+ ret = -ENOTSUP;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ cipheralg = qcrypto_block_luks_cipher_name_lookup(luks->header.cipher_name,
+ ciphermode,
+ luks->header.key_bytes,
+ &local_err);
+ if (local_err) {
+ ret = -ENOTSUP;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ hash = qcrypto_block_luks_hash_name_lookup(luks->header.hash_spec,
+ &local_err);
+ if (local_err) {
+ ret = -ENOTSUP;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ ivalg = qcrypto_block_luks_ivgen_name_lookup(ivgen_name,
+ &local_err);
+ if (local_err) {
+ ret = -ENOTSUP;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+
+ if (ivalg == QCRYPTO_IVGEN_ALG_ESSIV) {
+ if (!ivhash_name) {
+ ret = -EINVAL;
+ error_setg(errp, "Missing IV generator hash specification");
+ goto fail;
+ }
+ ivcipheralg = qcrypto_block_luks_essiv_cipher(cipheralg,
+ ivhash,
+ &local_err);
+ if (local_err) {
+ ret = -ENOTSUP;
+ error_propagate(errp, local_err);
+ goto fail;
+ }
+ } else {
+ /* Note we parsed the ivhash_name earlier in the cipher_mode
+ * spec string even with plain/plain64 ivgens, but we
+ * will ignore it, since it is irrelevant for these ivgens.
+ * This is for compat with dm-crypt which will silently
+ * ignore hash names with these ivgens rather than report
+ * an error about the invalid usage
+ */
+ ivcipheralg = cipheralg;
+ }
+
+ if (!(flags & QCRYPTO_BLOCK_OPEN_NO_IO)) {
+ /* Try to find which key slot our password is valid for
+ * and unlock the master key from that slot.
+ */
+ if (qcrypto_block_luks_find_key(block,
+ password,
+ cipheralg, ciphermode,
+ hash,
+ ivalg,
+ ivcipheralg,
+ ivhash,
+ &masterkey, &masterkeylen,
+ readfunc, opaque,
+ errp) < 0) {
+ ret = -EACCES;
+ goto fail;
+ }
+
+ /* We have a valid master key now, so can setup the
+ * block device payload decryption objects
+ */
+ block->kdfhash = hash;
+ block->niv = qcrypto_cipher_get_iv_len(cipheralg,
+ ciphermode);
+ block->ivgen = qcrypto_ivgen_new(ivalg,
+ ivcipheralg,
+ ivhash,
+ masterkey, masterkeylen,
+ errp);
+ if (!block->ivgen) {
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ block->cipher = qcrypto_cipher_new(cipheralg,
+ ciphermode,
+ masterkey, masterkeylen,
+ errp);
+ if (!block->cipher) {
+ ret = -ENOTSUP;
+ goto fail;
+ }
+ }
+
+ block->payload_offset = luks->header.payload_offset *
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
+
+ luks->cipher_alg = cipheralg;
+ luks->cipher_mode = ciphermode;
+ luks->ivgen_alg = ivalg;
+ luks->ivgen_hash_alg = ivhash;
+ luks->hash_alg = hash;
+
+ g_free(masterkey);
+ g_free(password);
+
+ return 0;
+
+ fail:
+ g_free(masterkey);
+ qcrypto_cipher_free(block->cipher);
+ qcrypto_ivgen_free(block->ivgen);
+ g_free(luks);
+ g_free(password);
+ return ret;
+}
+
+
+static int
+qcrypto_block_luks_uuid_gen(uint8_t *uuidstr, Error **errp)
+{
+#ifdef CONFIG_UUID
+ uuid_t uuid;
+ uuid_generate(uuid);
+ uuid_unparse(uuid, (char *)uuidstr);
+ return 0;
+#else
+ error_setg(errp, "Unable to generate uuids on this platform");
+ return -1;
+#endif
+}
+
+static int
+qcrypto_block_luks_create(QCryptoBlock *block,
+ QCryptoBlockCreateOptions *options,
+ QCryptoBlockInitFunc initfunc,
+ QCryptoBlockWriteFunc writefunc,
+ void *opaque,
+ Error **errp)
+{
+ QCryptoBlockLUKS *luks;
+ QCryptoBlockCreateOptionsLUKS luks_opts;
+ Error *local_err = NULL;
+ uint8_t *masterkey = NULL;
+ uint8_t *slotkey = NULL;
+ uint8_t *splitkey = NULL;
+ size_t splitkeylen = 0;
+ size_t i;
+ QCryptoCipher *cipher = NULL;
+ QCryptoIVGen *ivgen = NULL;
+ char *password;
+ const char *cipher_alg;
+ const char *cipher_mode;
+ const char *ivgen_alg;
+ const char *ivgen_hash_alg = NULL;
+ const char *hash_alg;
+ char *cipher_mode_spec = NULL;
+ QCryptoCipherAlgorithm ivcipheralg = 0;
+
+ memcpy(&luks_opts, &options->u.luks, sizeof(luks_opts));
+ if (!luks_opts.has_cipher_alg) {
+ luks_opts.cipher_alg = QCRYPTO_CIPHER_ALG_AES_256;
+ }
+ if (!luks_opts.has_cipher_mode) {
+ luks_opts.cipher_mode = QCRYPTO_CIPHER_MODE_XTS;
+ }
+ if (!luks_opts.has_ivgen_alg) {
+ luks_opts.ivgen_alg = QCRYPTO_IVGEN_ALG_PLAIN64;
+ }
+ if (!luks_opts.has_hash_alg) {
+ luks_opts.hash_alg = QCRYPTO_HASH_ALG_SHA256;
+ }
+ if (luks_opts.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
+ if (!luks_opts.has_ivgen_hash_alg) {
+ luks_opts.ivgen_hash_alg = QCRYPTO_HASH_ALG_SHA256;
+ luks_opts.has_ivgen_hash_alg = true;
+ }
+ }
+ /* Note we're allowing ivgen_hash_alg to be set even for
+ * non-essiv iv generators that don't need a hash. It will
+ * be silently ignored, for compatibility with dm-crypt */
+
+ if (!options->u.luks.key_secret) {
+ error_setg(errp, "Parameter 'key-secret' is required for cipher");
+ return -1;
+ }
+ password = qcrypto_secret_lookup_as_utf8(luks_opts.key_secret, errp);
+ if (!password) {
+ return -1;
+ }
+
+ luks = g_new0(QCryptoBlockLUKS, 1);
+ block->opaque = luks;
+
+ memcpy(luks->header.magic, qcrypto_block_luks_magic,
+ QCRYPTO_BLOCK_LUKS_MAGIC_LEN);
+
+ /* We populate the header in native endianness initially and
+ * then convert everything to big endian just before writing
+ * it out to disk
+ */
+ luks->header.version = QCRYPTO_BLOCK_LUKS_VERSION;
+ if (qcrypto_block_luks_uuid_gen(luks->header.uuid,
+ errp) < 0) {
+ goto error;
+ }
+
+ cipher_alg = qcrypto_block_luks_cipher_alg_lookup(luks_opts.cipher_alg,
+ errp);
+ if (!cipher_alg) {
+ goto error;
+ }
+
+ cipher_mode = QCryptoCipherMode_lookup[luks_opts.cipher_mode];
+ ivgen_alg = QCryptoIVGenAlgorithm_lookup[luks_opts.ivgen_alg];
+ if (luks_opts.has_ivgen_hash_alg) {
+ ivgen_hash_alg = QCryptoHashAlgorithm_lookup[luks_opts.ivgen_hash_alg];
+ cipher_mode_spec = g_strdup_printf("%s-%s:%s", cipher_mode, ivgen_alg,
+ ivgen_hash_alg);
+ } else {
+ cipher_mode_spec = g_strdup_printf("%s-%s", cipher_mode, ivgen_alg);
+ }
+ hash_alg = QCryptoHashAlgorithm_lookup[luks_opts.hash_alg];
+
+
+ if (strlen(cipher_alg) >= QCRYPTO_BLOCK_LUKS_CIPHER_NAME_LEN) {
+ error_setg(errp, "Cipher name '%s' is too long for LUKS header",
+ cipher_alg);
+ goto error;
+ }
+ if (strlen(cipher_mode_spec) >= QCRYPTO_BLOCK_LUKS_CIPHER_MODE_LEN) {
+ error_setg(errp, "Cipher mode '%s' is too long for LUKS header",
+ cipher_mode_spec);
+ goto error;
+ }
+ if (strlen(hash_alg) >= QCRYPTO_BLOCK_LUKS_HASH_SPEC_LEN) {
+ error_setg(errp, "Hash name '%s' is too long for LUKS header",
+ hash_alg);
+ goto error;
+ }
+
+ if (luks_opts.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
+ ivcipheralg = qcrypto_block_luks_essiv_cipher(luks_opts.cipher_alg,
+ luks_opts.ivgen_hash_alg,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto error;
+ }
+ } else {
+ ivcipheralg = luks_opts.cipher_alg;
+ }
+
+ strcpy(luks->header.cipher_name, cipher_alg);
+ strcpy(luks->header.cipher_mode, cipher_mode_spec);
+ strcpy(luks->header.hash_spec, hash_alg);
+
+ luks->header.key_bytes = qcrypto_cipher_get_key_len(luks_opts.cipher_alg);
+ if (luks_opts.cipher_mode == QCRYPTO_CIPHER_MODE_XTS) {
+ luks->header.key_bytes *= 2;
+ }
+
+ /* Generate the salt used for hashing the master key
+ * with PBKDF later
+ */
+ if (qcrypto_random_bytes(luks->header.master_key_salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ errp) < 0) {
+ goto error;
+ }
+
+ /* Generate random master key */
+ masterkey = g_new0(uint8_t, luks->header.key_bytes);
+ if (qcrypto_random_bytes(masterkey,
+ luks->header.key_bytes, errp) < 0) {
+ goto error;
+ }
+
+
+ /* Setup the block device payload encryption objects */
+ block->cipher = qcrypto_cipher_new(luks_opts.cipher_alg,
+ luks_opts.cipher_mode,
+ masterkey, luks->header.key_bytes,
+ errp);
+ if (!block->cipher) {
+ goto error;
+ }
+
+ block->kdfhash = luks_opts.hash_alg;
+ block->niv = qcrypto_cipher_get_iv_len(luks_opts.cipher_alg,
+ luks_opts.cipher_mode);
+ block->ivgen = qcrypto_ivgen_new(luks_opts.ivgen_alg,
+ ivcipheralg,
+ luks_opts.ivgen_hash_alg,
+ masterkey, luks->header.key_bytes,
+ errp);
+
+ if (!block->ivgen) {
+ goto error;
+ }
+
+
+ /* Determine how many iterations we need to hash the master
+ * key, in order to have 1 second of compute time used
+ */
+ luks->header.master_key_iterations =
+ qcrypto_pbkdf2_count_iters(luks_opts.hash_alg,
+ masterkey, luks->header.key_bytes,
+ luks->header.master_key_salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto error;
+ }
+
+ /* Why /= 8 ? That matches cryptsetup, but there's no
+ * explanation why they chose /= 8... Probably so that
+ * if all 8 keyslots are active we only spend 1 second
+ * in total time to check all keys */
+ luks->header.master_key_iterations /= 8;
+ luks->header.master_key_iterations = MAX(
+ luks->header.master_key_iterations,
+ QCRYPTO_BLOCK_LUKS_MIN_MASTER_KEY_ITERS);
+
+
+ /* Hash the master key, saving the result in the LUKS
+ * header. This hash is used when opening the encrypted
+ * device to verify that the user password unlocked a
+ * valid master key
+ */
+ if (qcrypto_pbkdf2(luks_opts.hash_alg,
+ masterkey, luks->header.key_bytes,
+ luks->header.master_key_salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ luks->header.master_key_iterations,
+ luks->header.master_key_digest,
+ QCRYPTO_BLOCK_LUKS_DIGEST_LEN,
+ errp) < 0) {
+ goto error;
+ }
+
+
+ /* Although LUKS has multiple key slots, we're just going
+ * to use the first key slot */
+ splitkeylen = luks->header.key_bytes * QCRYPTO_BLOCK_LUKS_STRIPES;
+ for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+ luks->header.key_slots[i].active = i == 0 ?
+ QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED :
+ QCRYPTO_BLOCK_LUKS_KEY_SLOT_DISABLED;
+ luks->header.key_slots[i].stripes = QCRYPTO_BLOCK_LUKS_STRIPES;
+
+ /* This calculation doesn't match that shown in the spec,
+ * but instead follows the cryptsetup implementation.
+ */
+ luks->header.key_slots[i].key_offset =
+ (QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET /
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE) +
+ (ROUND_UP(DIV_ROUND_UP(splitkeylen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE),
+ (QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET /
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) * i);
+ }
+
+ if (qcrypto_random_bytes(luks->header.key_slots[0].salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ errp) < 0) {
+ goto error;
+ }
+
+ /* Again we determine how many iterations are required to
+ * hash the user password while consuming 1 second of compute
+ * time */
+ luks->header.key_slots[0].iterations =
+ qcrypto_pbkdf2_count_iters(luks_opts.hash_alg,
+ (uint8_t *)password, strlen(password),
+ luks->header.key_slots[0].salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto error;
+ }
+ /* Why /= 2 ? That matches cryptsetup, but there's no
+ * explanation why they chose /= 2... */
+ luks->header.key_slots[0].iterations /= 2;
+ luks->header.key_slots[0].iterations = MAX(
+ luks->header.key_slots[0].iterations,
+ QCRYPTO_BLOCK_LUKS_MIN_SLOT_KEY_ITERS);
+
+
+ /* Generate a key that we'll use to encrypt the master
+ * key, from the user's password
+ */
+ slotkey = g_new0(uint8_t, luks->header.key_bytes);
+ if (qcrypto_pbkdf2(luks_opts.hash_alg,
+ (uint8_t *)password, strlen(password),
+ luks->header.key_slots[0].salt,
+ QCRYPTO_BLOCK_LUKS_SALT_LEN,
+ luks->header.key_slots[0].iterations,
+ slotkey, luks->header.key_bytes,
+ errp) < 0) {
+ goto error;
+ }
+
+
+ /* Setup the encryption objects needed to encrypt the
+ * master key material
+ */
+ cipher = qcrypto_cipher_new(luks_opts.cipher_alg,
+ luks_opts.cipher_mode,
+ slotkey, luks->header.key_bytes,
+ errp);
+ if (!cipher) {
+ goto error;
+ }
+
+ ivgen = qcrypto_ivgen_new(luks_opts.ivgen_alg,
+ ivcipheralg,
+ luks_opts.ivgen_hash_alg,
+ slotkey, luks->header.key_bytes,
+ errp);
+ if (!ivgen) {
+ goto error;
+ }
+
+ /* Before storing the master key, we need to vastly
+ * increase its size, as protection against forensic
+ * disk data recovery */
+ splitkey = g_new0(uint8_t, splitkeylen);
+
+ if (qcrypto_afsplit_encode(luks_opts.hash_alg,
+ luks->header.key_bytes,
+ luks->header.key_slots[0].stripes,
+ masterkey,
+ splitkey,
+ errp) < 0) {
+ goto error;
+ }
+
+ /* Now we encrypt the split master key with the key generated
+ * from the user's password, before storing it */
+ if (qcrypto_block_encrypt_helper(cipher, block->niv, ivgen,
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+ 0,
+ splitkey,
+ splitkeylen,
+ errp) < 0) {
+ goto error;
+ }
+
+
+ /* The total size of the LUKS headers is the partition header + key
+ * slot headers, rounded up to the nearest sector, combined with
+ * the size of each master key material region, also rounded up
+ * to the nearest sector */
+ luks->header.payload_offset =
+ (QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET /
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE) +
+ (ROUND_UP(DIV_ROUND_UP(splitkeylen, QCRYPTO_BLOCK_LUKS_SECTOR_SIZE),
+ (QCRYPTO_BLOCK_LUKS_KEY_SLOT_OFFSET /
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE)) *
+ QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS);
+
+ block->payload_offset = luks->header.payload_offset *
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
+
+ /* Reserve header space to match payload offset */
+ initfunc(block, block->payload_offset, &local_err, opaque);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto error;
+ }
+
+ /* Everything on disk uses Big Endian, so flip header fields
+ * before writing them */
+ cpu_to_be16s(&luks->header.version);
+ cpu_to_be32s(&luks->header.payload_offset);
+ cpu_to_be32s(&luks->header.key_bytes);
+ cpu_to_be32s(&luks->header.master_key_iterations);
+
+ for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+ cpu_to_be32s(&luks->header.key_slots[i].active);
+ cpu_to_be32s(&luks->header.key_slots[i].iterations);
+ cpu_to_be32s(&luks->header.key_slots[i].key_offset);
+ cpu_to_be32s(&luks->header.key_slots[i].stripes);
+ }
+
+
+ /* Write out the partition header and key slot headers */
+ writefunc(block, 0,
+ (const uint8_t *)&luks->header,
+ sizeof(luks->header),
+ &local_err,
+ opaque);
+
+ /* Delay checking local_err until we've byte-swapped */
+
+ /* Byte swap the header back to native, in case we need
+ * to read it again later */
+ be16_to_cpus(&luks->header.version);
+ be32_to_cpus(&luks->header.payload_offset);
+ be32_to_cpus(&luks->header.key_bytes);
+ be32_to_cpus(&luks->header.master_key_iterations);
+
+ for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+ be32_to_cpus(&luks->header.key_slots[i].active);
+ be32_to_cpus(&luks->header.key_slots[i].iterations);
+ be32_to_cpus(&luks->header.key_slots[i].key_offset);
+ be32_to_cpus(&luks->header.key_slots[i].stripes);
+ }
+
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto error;
+ }
+
+ /* Write out the master key material, starting at the
+ * sector immediately following the partition header. */
+ if (writefunc(block,
+ luks->header.key_slots[0].key_offset *
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+ splitkey, splitkeylen,
+ errp,
+ opaque) != splitkeylen) {
+ goto error;
+ }
+
+ luks->cipher_alg = luks_opts.cipher_alg;
+ luks->cipher_mode = luks_opts.cipher_mode;
+ luks->ivgen_alg = luks_opts.ivgen_alg;
+ luks->ivgen_hash_alg = luks_opts.ivgen_hash_alg;
+ luks->hash_alg = luks_opts.hash_alg;
+
+ memset(masterkey, 0, luks->header.key_bytes);
+ g_free(masterkey);
+ memset(slotkey, 0, luks->header.key_bytes);
+ g_free(slotkey);
+ g_free(splitkey);
+ g_free(password);
+ g_free(cipher_mode_spec);
+
+ qcrypto_ivgen_free(ivgen);
+ qcrypto_cipher_free(cipher);
+
+ return 0;
+
+ error:
+ if (masterkey) {
+ memset(masterkey, 0, luks->header.key_bytes);
+ }
+ g_free(masterkey);
+ if (slotkey) {
+ memset(slotkey, 0, luks->header.key_bytes);
+ }
+ g_free(slotkey);
+ g_free(splitkey);
+ g_free(password);
+ g_free(cipher_mode_spec);
+
+ qcrypto_ivgen_free(ivgen);
+ qcrypto_cipher_free(cipher);
+
+ g_free(luks);
+ return -1;
+}
+
+
+static int qcrypto_block_luks_get_info(QCryptoBlock *block,
+ QCryptoBlockInfo *info,
+ Error **errp)
+{
+ QCryptoBlockLUKS *luks = block->opaque;
+ QCryptoBlockInfoLUKSSlot *slot;
+ QCryptoBlockInfoLUKSSlotList *slots = NULL, **prev = &info->u.luks.slots;
+ size_t i;
+
+ info->u.luks.cipher_alg = luks->cipher_alg;
+ info->u.luks.cipher_mode = luks->cipher_mode;
+ info->u.luks.ivgen_alg = luks->ivgen_alg;
+ if (info->u.luks.ivgen_alg == QCRYPTO_IVGEN_ALG_ESSIV) {
+ info->u.luks.has_ivgen_hash_alg = true;
+ info->u.luks.ivgen_hash_alg = luks->ivgen_hash_alg;
+ }
+ info->u.luks.hash_alg = luks->hash_alg;
+ info->u.luks.payload_offset = block->payload_offset;
+ info->u.luks.master_key_iters = luks->header.master_key_iterations;
+ info->u.luks.uuid = g_strndup((const char *)luks->header.uuid,
+ sizeof(luks->header.uuid));
+
+ for (i = 0; i < QCRYPTO_BLOCK_LUKS_NUM_KEY_SLOTS; i++) {
+ slots = g_new0(QCryptoBlockInfoLUKSSlotList, 1);
+ *prev = slots;
+
+ slots->value = slot = g_new0(QCryptoBlockInfoLUKSSlot, 1);
+ slot->active = luks->header.key_slots[i].active ==
+ QCRYPTO_BLOCK_LUKS_KEY_SLOT_ENABLED;
+ slot->key_offset = luks->header.key_slots[i].key_offset
+ * QCRYPTO_BLOCK_LUKS_SECTOR_SIZE;
+ if (slot->active) {
+ slot->has_iters = true;
+ slot->iters = luks->header.key_slots[i].iterations;
+ slot->has_stripes = true;
+ slot->stripes = luks->header.key_slots[i].stripes;
+ }
+
+ prev = &slots->next;
+ }
+
+ return 0;
+}
+
+
+static void qcrypto_block_luks_cleanup(QCryptoBlock *block)
+{
+ g_free(block->opaque);
+}
+
+
+static int
+qcrypto_block_luks_decrypt(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ return qcrypto_block_decrypt_helper(block->cipher,
+ block->niv, block->ivgen,
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+ startsector, buf, len, errp);
+}
+
+
+static int
+qcrypto_block_luks_encrypt(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ return qcrypto_block_encrypt_helper(block->cipher,
+ block->niv, block->ivgen,
+ QCRYPTO_BLOCK_LUKS_SECTOR_SIZE,
+ startsector, buf, len, errp);
+}
+
+
+const QCryptoBlockDriver qcrypto_block_driver_luks = {
+ .open = qcrypto_block_luks_open,
+ .create = qcrypto_block_luks_create,
+ .get_info = qcrypto_block_luks_get_info,
+ .cleanup = qcrypto_block_luks_cleanup,
+ .decrypt = qcrypto_block_luks_decrypt,
+ .encrypt = qcrypto_block_luks_encrypt,
+ .has_format = qcrypto_block_luks_has_format,
+};
diff --git a/crypto/block-luks.h b/crypto/block-luks.h
new file mode 100644
index 0000000..b2d8a35
--- /dev/null
+++ b/crypto/block-luks.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU Crypto block device encryption LUKS format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_BLOCK_LUKS_H
+#define QCRYPTO_BLOCK_LUKS_H
+
+#include "crypto/blockpriv.h"
+
+extern const QCryptoBlockDriver qcrypto_block_driver_luks;
+
+#endif /* QCRYPTO_BLOCK_LUKS_H */
diff --git a/crypto/block-qcow.c b/crypto/block-qcow.c
new file mode 100644
index 0000000..be88c6f
--- /dev/null
+++ b/crypto/block-qcow.c
@@ -0,0 +1,174 @@
+/*
+ * QEMU Crypto block device encryption QCow/QCow2 AES-CBC format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+/*
+ * Note that the block encryption implemented in this file is broken
+ * by design. This exists only to allow data to be liberated from
+ * existing qcow[2] images and should not be used in any new areas.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "crypto/block-qcow.h"
+#include "crypto/secret.h"
+
+#define QCRYPTO_BLOCK_QCOW_SECTOR_SIZE 512
+
+
+static bool
+qcrypto_block_qcow_has_format(const uint8_t *buf G_GNUC_UNUSED,
+ size_t buf_size G_GNUC_UNUSED)
+{
+ return false;
+}
+
+
+static int
+qcrypto_block_qcow_init(QCryptoBlock *block,
+ const char *keysecret,
+ Error **errp)
+{
+ char *password;
+ int ret;
+ uint8_t keybuf[16];
+ int len;
+
+ memset(keybuf, 0, 16);
+
+ password = qcrypto_secret_lookup_as_utf8(keysecret, errp);
+ if (!password) {
+ return -1;
+ }
+
+ len = strlen(password);
+ memcpy(keybuf, password, MIN(len, sizeof(keybuf)));
+ g_free(password);
+
+ block->niv = qcrypto_cipher_get_iv_len(QCRYPTO_CIPHER_ALG_AES_128,
+ QCRYPTO_CIPHER_MODE_CBC);
+ block->ivgen = qcrypto_ivgen_new(QCRYPTO_IVGEN_ALG_PLAIN64,
+ 0, 0, NULL, 0, errp);
+ if (!block->ivgen) {
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ block->cipher = qcrypto_cipher_new(QCRYPTO_CIPHER_ALG_AES_128,
+ QCRYPTO_CIPHER_MODE_CBC,
+ keybuf, G_N_ELEMENTS(keybuf),
+ errp);
+ if (!block->cipher) {
+ ret = -ENOTSUP;
+ goto fail;
+ }
+
+ block->payload_offset = 0;
+
+ return 0;
+
+ fail:
+ qcrypto_cipher_free(block->cipher);
+ qcrypto_ivgen_free(block->ivgen);
+ return ret;
+}
+
+
+static int
+qcrypto_block_qcow_open(QCryptoBlock *block,
+ QCryptoBlockOpenOptions *options,
+ QCryptoBlockReadFunc readfunc G_GNUC_UNUSED,
+ void *opaque G_GNUC_UNUSED,
+ unsigned int flags,
+ Error **errp)
+{
+ if (flags & QCRYPTO_BLOCK_OPEN_NO_IO) {
+ return 0;
+ } else {
+ if (!options->u.qcow.key_secret) {
+ error_setg(errp,
+ "Parameter 'key-secret' is required for cipher");
+ return -1;
+ }
+ return qcrypto_block_qcow_init(block,
+ options->u.qcow.key_secret, errp);
+ }
+}
+
+
+static int
+qcrypto_block_qcow_create(QCryptoBlock *block,
+ QCryptoBlockCreateOptions *options,
+ QCryptoBlockInitFunc initfunc G_GNUC_UNUSED,
+ QCryptoBlockWriteFunc writefunc G_GNUC_UNUSED,
+ void *opaque G_GNUC_UNUSED,
+ Error **errp)
+{
+ if (!options->u.qcow.key_secret) {
+ error_setg(errp, "Parameter 'key-secret' is required for cipher");
+ return -1;
+ }
+ /* QCow2 has no special header, since everything is hardwired */
+ return qcrypto_block_qcow_init(block, options->u.qcow.key_secret, errp);
+}
+
+
+static void
+qcrypto_block_qcow_cleanup(QCryptoBlock *block)
+{
+}
+
+
+static int
+qcrypto_block_qcow_decrypt(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ return qcrypto_block_decrypt_helper(block->cipher,
+ block->niv, block->ivgen,
+ QCRYPTO_BLOCK_QCOW_SECTOR_SIZE,
+ startsector, buf, len, errp);
+}
+
+
+static int
+qcrypto_block_qcow_encrypt(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ return qcrypto_block_encrypt_helper(block->cipher,
+ block->niv, block->ivgen,
+ QCRYPTO_BLOCK_QCOW_SECTOR_SIZE,
+ startsector, buf, len, errp);
+}
+
+
+const QCryptoBlockDriver qcrypto_block_driver_qcow = {
+ .open = qcrypto_block_qcow_open,
+ .create = qcrypto_block_qcow_create,
+ .cleanup = qcrypto_block_qcow_cleanup,
+ .decrypt = qcrypto_block_qcow_decrypt,
+ .encrypt = qcrypto_block_qcow_encrypt,
+ .has_format = qcrypto_block_qcow_has_format,
+};
diff --git a/crypto/block-qcow.h b/crypto/block-qcow.h
new file mode 100644
index 0000000..3e2c0a8
--- /dev/null
+++ b/crypto/block-qcow.h
@@ -0,0 +1,28 @@
+/*
+ * QEMU Crypto block device encryption QCow/QCow2 AES-CBC format
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_BLOCK_QCOW_H
+#define QCRYPTO_BLOCK_QCOW_H
+
+#include "crypto/blockpriv.h"
+
+extern const QCryptoBlockDriver qcrypto_block_driver_qcow;
+
+#endif /* QCRYPTO_BLOCK_QCOW_H */
diff --git a/crypto/block.c b/crypto/block.c
new file mode 100644
index 0000000..be823ee
--- /dev/null
+++ b/crypto/block.c
@@ -0,0 +1,278 @@
+/*
+ * QEMU Crypto block device encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "crypto/blockpriv.h"
+#include "crypto/block-qcow.h"
+#include "crypto/block-luks.h"
+
+static const QCryptoBlockDriver *qcrypto_block_drivers[] = {
+ [Q_CRYPTO_BLOCK_FORMAT_QCOW] = &qcrypto_block_driver_qcow,
+ [Q_CRYPTO_BLOCK_FORMAT_LUKS] = &qcrypto_block_driver_luks,
+};
+
+
+bool qcrypto_block_has_format(QCryptoBlockFormat format,
+ const uint8_t *buf,
+ size_t len)
+{
+ const QCryptoBlockDriver *driver;
+
+ if (format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
+ !qcrypto_block_drivers[format]) {
+ return false;
+ }
+
+ driver = qcrypto_block_drivers[format];
+
+ return driver->has_format(buf, len);
+}
+
+
+QCryptoBlock *qcrypto_block_open(QCryptoBlockOpenOptions *options,
+ QCryptoBlockReadFunc readfunc,
+ void *opaque,
+ unsigned int flags,
+ Error **errp)
+{
+ QCryptoBlock *block = g_new0(QCryptoBlock, 1);
+
+ block->format = options->format;
+
+ if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
+ !qcrypto_block_drivers[options->format]) {
+ error_setg(errp, "Unsupported block driver %d", options->format);
+ g_free(block);
+ return NULL;
+ }
+
+ block->driver = qcrypto_block_drivers[options->format];
+
+ if (block->driver->open(block, options,
+ readfunc, opaque, flags, errp) < 0) {
+ g_free(block);
+ return NULL;
+ }
+
+ return block;
+}
+
+
+QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
+ QCryptoBlockInitFunc initfunc,
+ QCryptoBlockWriteFunc writefunc,
+ void *opaque,
+ Error **errp)
+{
+ QCryptoBlock *block = g_new0(QCryptoBlock, 1);
+
+ block->format = options->format;
+
+ if (options->format >= G_N_ELEMENTS(qcrypto_block_drivers) ||
+ !qcrypto_block_drivers[options->format]) {
+ error_setg(errp, "Unsupported block driver %d", options->format);
+ g_free(block);
+ return NULL;
+ }
+
+ block->driver = qcrypto_block_drivers[options->format];
+
+ if (block->driver->create(block, options, initfunc,
+ writefunc, opaque, errp) < 0) {
+ g_free(block);
+ return NULL;
+ }
+
+ return block;
+}
+
+
+QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
+ Error **errp)
+{
+ QCryptoBlockInfo *info = g_new0(QCryptoBlockInfo, 1);
+
+ info->format = block->format;
+
+ if (block->driver->get_info &&
+ block->driver->get_info(block, info, errp) < 0) {
+ g_free(info);
+ return NULL;
+ }
+
+ return info;
+}
+
+
+int qcrypto_block_decrypt(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ return block->driver->decrypt(block, startsector, buf, len, errp);
+}
+
+
+int qcrypto_block_encrypt(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ return block->driver->encrypt(block, startsector, buf, len, errp);
+}
+
+
+QCryptoCipher *qcrypto_block_get_cipher(QCryptoBlock *block)
+{
+ return block->cipher;
+}
+
+
+QCryptoIVGen *qcrypto_block_get_ivgen(QCryptoBlock *block)
+{
+ return block->ivgen;
+}
+
+
+QCryptoHashAlgorithm qcrypto_block_get_kdf_hash(QCryptoBlock *block)
+{
+ return block->kdfhash;
+}
+
+
+uint64_t qcrypto_block_get_payload_offset(QCryptoBlock *block)
+{
+ return block->payload_offset;
+}
+
+
+void qcrypto_block_free(QCryptoBlock *block)
+{
+ if (!block) {
+ return;
+ }
+
+ block->driver->cleanup(block);
+
+ qcrypto_cipher_free(block->cipher);
+ qcrypto_ivgen_free(block->ivgen);
+ g_free(block);
+}
+
+
+int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
+ size_t niv,
+ QCryptoIVGen *ivgen,
+ int sectorsize,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ uint8_t *iv;
+ int ret = -1;
+
+ iv = niv ? g_new0(uint8_t, niv) : NULL;
+
+ while (len > 0) {
+ size_t nbytes;
+ if (niv) {
+ if (qcrypto_ivgen_calculate(ivgen,
+ startsector,
+ iv, niv,
+ errp) < 0) {
+ goto cleanup;
+ }
+
+ if (qcrypto_cipher_setiv(cipher,
+ iv, niv,
+ errp) < 0) {
+ goto cleanup;
+ }
+ }
+
+ nbytes = len > sectorsize ? sectorsize : len;
+ if (qcrypto_cipher_decrypt(cipher, buf, buf,
+ nbytes, errp) < 0) {
+ goto cleanup;
+ }
+
+ startsector++;
+ buf += nbytes;
+ len -= nbytes;
+ }
+
+ ret = 0;
+ cleanup:
+ g_free(iv);
+ return ret;
+}
+
+
+int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
+ size_t niv,
+ QCryptoIVGen *ivgen,
+ int sectorsize,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp)
+{
+ uint8_t *iv;
+ int ret = -1;
+
+ iv = niv ? g_new0(uint8_t, niv) : NULL;
+
+ while (len > 0) {
+ size_t nbytes;
+ if (niv) {
+ if (qcrypto_ivgen_calculate(ivgen,
+ startsector,
+ iv, niv,
+ errp) < 0) {
+ goto cleanup;
+ }
+
+ if (qcrypto_cipher_setiv(cipher,
+ iv, niv,
+ errp) < 0) {
+ goto cleanup;
+ }
+ }
+
+ nbytes = len > sectorsize ? sectorsize : len;
+ if (qcrypto_cipher_encrypt(cipher, buf, buf,
+ nbytes, errp) < 0) {
+ goto cleanup;
+ }
+
+ startsector++;
+ buf += nbytes;
+ len -= nbytes;
+ }
+
+ ret = 0;
+ cleanup:
+ g_free(iv);
+ return ret;
+}
diff --git a/crypto/blockpriv.h b/crypto/blockpriv.h
new file mode 100644
index 0000000..68f0f06
--- /dev/null
+++ b/crypto/blockpriv.h
@@ -0,0 +1,96 @@
+/*
+ * QEMU Crypto block device encryption
+ *
+ * Copyright (c) 2015-2016 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifndef QCRYPTO_BLOCKPRIV_H
+#define QCRYPTO_BLOCKPRIV_H
+
+#include "crypto/block.h"
+
+typedef struct QCryptoBlockDriver QCryptoBlockDriver;
+
+struct QCryptoBlock {
+ QCryptoBlockFormat format;
+
+ const QCryptoBlockDriver *driver;
+ void *opaque;
+
+ QCryptoCipher *cipher;
+ QCryptoIVGen *ivgen;
+ QCryptoHashAlgorithm kdfhash;
+ size_t niv;
+ uint64_t payload_offset; /* In bytes */
+};
+
+struct QCryptoBlockDriver {
+ int (*open)(QCryptoBlock *block,
+ QCryptoBlockOpenOptions *options,
+ QCryptoBlockReadFunc readfunc,
+ void *opaque,
+ unsigned int flags,
+ Error **errp);
+
+ int (*create)(QCryptoBlock *block,
+ QCryptoBlockCreateOptions *options,
+ QCryptoBlockInitFunc initfunc,
+ QCryptoBlockWriteFunc writefunc,
+ void *opaque,
+ Error **errp);
+
+ int (*get_info)(QCryptoBlock *block,
+ QCryptoBlockInfo *info,
+ Error **errp);
+
+ void (*cleanup)(QCryptoBlock *block);
+
+ int (*encrypt)(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp);
+ int (*decrypt)(QCryptoBlock *block,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp);
+
+ bool (*has_format)(const uint8_t *buf,
+ size_t buflen);
+};
+
+
+int qcrypto_block_decrypt_helper(QCryptoCipher *cipher,
+ size_t niv,
+ QCryptoIVGen *ivgen,
+ int sectorsize,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp);
+
+int qcrypto_block_encrypt_helper(QCryptoCipher *cipher,
+ size_t niv,
+ QCryptoIVGen *ivgen,
+ int sectorsize,
+ uint64_t startsector,
+ uint8_t *buf,
+ size_t len,
+ Error **errp);
+
+#endif /* QCRYPTO_BLOCKPRIV_H */
diff --git a/crypto/cipher-builtin.c b/crypto/cipher-builtin.c
new file mode 100644
index 0000000..88963f6
--- /dev/null
+++ b/crypto/cipher-builtin.c
@@ -0,0 +1,513 @@
+/*
+ * QEMU Crypto cipher built-in algorithms
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/aes.h"
+#include "crypto/desrfb.h"
+#include "crypto/xts.h"
+
+typedef struct QCryptoCipherBuiltinAESContext QCryptoCipherBuiltinAESContext;
+struct QCryptoCipherBuiltinAESContext {
+ AES_KEY enc;
+ AES_KEY dec;
+};
+typedef struct QCryptoCipherBuiltinAES QCryptoCipherBuiltinAES;
+struct QCryptoCipherBuiltinAES {
+ QCryptoCipherBuiltinAESContext key;
+ QCryptoCipherBuiltinAESContext key_tweak;
+ uint8_t iv[AES_BLOCK_SIZE];
+};
+typedef struct QCryptoCipherBuiltinDESRFB QCryptoCipherBuiltinDESRFB;
+struct QCryptoCipherBuiltinDESRFB {
+ uint8_t *key;
+ size_t nkey;
+};
+
+typedef struct QCryptoCipherBuiltin QCryptoCipherBuiltin;
+struct QCryptoCipherBuiltin {
+ union {
+ QCryptoCipherBuiltinAES aes;
+ QCryptoCipherBuiltinDESRFB desrfb;
+ } state;
+ size_t blocksize;
+ void (*free)(QCryptoCipher *cipher);
+ int (*setiv)(QCryptoCipher *cipher,
+ const uint8_t *iv, size_t niv,
+ Error **errp);
+ int (*encrypt)(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp);
+ int (*decrypt)(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp);
+};
+
+
+static void qcrypto_cipher_free_aes(QCryptoCipher *cipher)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ g_free(ctxt);
+ cipher->opaque = NULL;
+}
+
+
+static void qcrypto_cipher_aes_ecb_encrypt(AES_KEY *key,
+ const void *in,
+ void *out,
+ size_t len)
+{
+ const uint8_t *inptr = in;
+ uint8_t *outptr = out;
+ while (len) {
+ if (len > AES_BLOCK_SIZE) {
+ AES_encrypt(inptr, outptr, key);
+ inptr += AES_BLOCK_SIZE;
+ outptr += AES_BLOCK_SIZE;
+ len -= AES_BLOCK_SIZE;
+ } else {
+ uint8_t tmp1[AES_BLOCK_SIZE], tmp2[AES_BLOCK_SIZE];
+ memcpy(tmp1, inptr, len);
+ /* Fill with 0 to avoid valgrind uninitialized reads */
+ memset(tmp1 + len, 0, sizeof(tmp1) - len);
+ AES_encrypt(tmp1, tmp2, key);
+ memcpy(outptr, tmp2, len);
+ len = 0;
+ }
+ }
+}
+
+
+static void qcrypto_cipher_aes_ecb_decrypt(AES_KEY *key,
+ const void *in,
+ void *out,
+ size_t len)
+{
+ const uint8_t *inptr = in;
+ uint8_t *outptr = out;
+ while (len) {
+ if (len > AES_BLOCK_SIZE) {
+ AES_decrypt(inptr, outptr, key);
+ inptr += AES_BLOCK_SIZE;
+ outptr += AES_BLOCK_SIZE;
+ len -= AES_BLOCK_SIZE;
+ } else {
+ uint8_t tmp1[AES_BLOCK_SIZE], tmp2[AES_BLOCK_SIZE];
+ memcpy(tmp1, inptr, len);
+ /* Fill with 0 to avoid valgrind uninitialized reads */
+ memset(tmp1 + len, 0, sizeof(tmp1) - len);
+ AES_decrypt(tmp1, tmp2, key);
+ memcpy(outptr, tmp2, len);
+ len = 0;
+ }
+ }
+}
+
+
+static void qcrypto_cipher_aes_xts_encrypt(const void *ctx,
+ size_t length,
+ uint8_t *dst,
+ const uint8_t *src)
+{
+ const QCryptoCipherBuiltinAESContext *aesctx = ctx;
+
+ qcrypto_cipher_aes_ecb_encrypt((AES_KEY *)&aesctx->enc,
+ src, dst, length);
+}
+
+
+static void qcrypto_cipher_aes_xts_decrypt(const void *ctx,
+ size_t length,
+ uint8_t *dst,
+ const uint8_t *src)
+{
+ const QCryptoCipherBuiltinAESContext *aesctx = ctx;
+
+ qcrypto_cipher_aes_ecb_decrypt((AES_KEY *)&aesctx->dec,
+ src, dst, length);
+}
+
+
+static int qcrypto_cipher_encrypt_aes(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ switch (cipher->mode) {
+ case QCRYPTO_CIPHER_MODE_ECB:
+ qcrypto_cipher_aes_ecb_encrypt(&ctxt->state.aes.key.enc,
+ in, out, len);
+ break;
+ case QCRYPTO_CIPHER_MODE_CBC:
+ AES_cbc_encrypt(in, out, len,
+ &ctxt->state.aes.key.enc,
+ ctxt->state.aes.iv, 1);
+ break;
+ case QCRYPTO_CIPHER_MODE_XTS:
+ xts_encrypt(&ctxt->state.aes.key,
+ &ctxt->state.aes.key_tweak,
+ qcrypto_cipher_aes_xts_encrypt,
+ qcrypto_cipher_aes_xts_decrypt,
+ ctxt->state.aes.iv,
+ len, out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return 0;
+}
+
+
+static int qcrypto_cipher_decrypt_aes(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ switch (cipher->mode) {
+ case QCRYPTO_CIPHER_MODE_ECB:
+ qcrypto_cipher_aes_ecb_decrypt(&ctxt->state.aes.key.dec,
+ in, out, len);
+ break;
+ case QCRYPTO_CIPHER_MODE_CBC:
+ AES_cbc_encrypt(in, out, len,
+ &ctxt->state.aes.key.dec,
+ ctxt->state.aes.iv, 0);
+ break;
+ case QCRYPTO_CIPHER_MODE_XTS:
+ xts_decrypt(&ctxt->state.aes.key,
+ &ctxt->state.aes.key_tweak,
+ qcrypto_cipher_aes_xts_encrypt,
+ qcrypto_cipher_aes_xts_decrypt,
+ ctxt->state.aes.iv,
+ len, out, in);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+
+ return 0;
+}
+
+static int qcrypto_cipher_setiv_aes(QCryptoCipher *cipher,
+ const uint8_t *iv, size_t niv,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+ if (niv != AES_BLOCK_SIZE) {
+ error_setg(errp, "IV must be %d bytes not %zu",
+ AES_BLOCK_SIZE, niv);
+ return -1;
+ }
+
+ memcpy(ctxt->state.aes.iv, iv, AES_BLOCK_SIZE);
+
+ return 0;
+}
+
+
+
+
+static int qcrypto_cipher_init_aes(QCryptoCipher *cipher,
+ const uint8_t *key, size_t nkey,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt;
+
+ if (cipher->mode != QCRYPTO_CIPHER_MODE_CBC &&
+ cipher->mode != QCRYPTO_CIPHER_MODE_ECB &&
+ cipher->mode != QCRYPTO_CIPHER_MODE_XTS) {
+ error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
+ return -1;
+ }
+
+ ctxt = g_new0(QCryptoCipherBuiltin, 1);
+
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ if (AES_set_encrypt_key(key, nkey * 4, &ctxt->state.aes.key.enc) != 0) {
+ error_setg(errp, "Failed to set encryption key");
+ goto error;
+ }
+
+ if (AES_set_decrypt_key(key, nkey * 4, &ctxt->state.aes.key.dec) != 0) {
+ error_setg(errp, "Failed to set decryption key");
+ goto error;
+ }
+
+ if (AES_set_encrypt_key(key + (nkey / 2), nkey * 4,
+ &ctxt->state.aes.key_tweak.enc) != 0) {
+ error_setg(errp, "Failed to set encryption key");
+ goto error;
+ }
+
+ if (AES_set_decrypt_key(key + (nkey / 2), nkey * 4,
+ &ctxt->state.aes.key_tweak.dec) != 0) {
+ error_setg(errp, "Failed to set decryption key");
+ goto error;
+ }
+ } else {
+ if (AES_set_encrypt_key(key, nkey * 8, &ctxt->state.aes.key.enc) != 0) {
+ error_setg(errp, "Failed to set encryption key");
+ goto error;
+ }
+
+ if (AES_set_decrypt_key(key, nkey * 8, &ctxt->state.aes.key.dec) != 0) {
+ error_setg(errp, "Failed to set decryption key");
+ goto error;
+ }
+ }
+
+ ctxt->blocksize = AES_BLOCK_SIZE;
+ ctxt->free = qcrypto_cipher_free_aes;
+ ctxt->setiv = qcrypto_cipher_setiv_aes;
+ ctxt->encrypt = qcrypto_cipher_encrypt_aes;
+ ctxt->decrypt = qcrypto_cipher_decrypt_aes;
+
+ cipher->opaque = ctxt;
+
+ return 0;
+
+ error:
+ g_free(ctxt);
+ return -1;
+}
+
+
+static void qcrypto_cipher_free_des_rfb(QCryptoCipher *cipher)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ g_free(ctxt->state.desrfb.key);
+ g_free(ctxt);
+ cipher->opaque = NULL;
+}
+
+
+static int qcrypto_cipher_encrypt_des_rfb(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+ size_t i;
+
+ if (len % 8) {
+ error_setg(errp, "Buffer size must be multiple of 8 not %zu",
+ len);
+ return -1;
+ }
+
+ deskey(ctxt->state.desrfb.key, EN0);
+
+ for (i = 0; i < len; i += 8) {
+ des((void *)in + i, out + i);
+ }
+
+ return 0;
+}
+
+
+static int qcrypto_cipher_decrypt_des_rfb(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+ size_t i;
+
+ if (len % 8) {
+ error_setg(errp, "Buffer size must be multiple of 8 not %zu",
+ len);
+ return -1;
+ }
+
+ deskey(ctxt->state.desrfb.key, DE1);
+
+ for (i = 0; i < len; i += 8) {
+ des((void *)in + i, out + i);
+ }
+
+ return 0;
+}
+
+
+static int qcrypto_cipher_setiv_des_rfb(QCryptoCipher *cipher,
+ const uint8_t *iv, size_t niv,
+ Error **errp)
+{
+ error_setg(errp, "Setting IV is not supported");
+ return -1;
+}
+
+
+static int qcrypto_cipher_init_des_rfb(QCryptoCipher *cipher,
+ const uint8_t *key, size_t nkey,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt;
+
+ if (cipher->mode != QCRYPTO_CIPHER_MODE_ECB) {
+ error_setg(errp, "Unsupported cipher mode %d", cipher->mode);
+ return -1;
+ }
+
+ ctxt = g_new0(QCryptoCipherBuiltin, 1);
+
+ ctxt->state.desrfb.key = g_new0(uint8_t, nkey);
+ memcpy(ctxt->state.desrfb.key, key, nkey);
+ ctxt->state.desrfb.nkey = nkey;
+
+ ctxt->blocksize = 8;
+ ctxt->free = qcrypto_cipher_free_des_rfb;
+ ctxt->setiv = qcrypto_cipher_setiv_des_rfb;
+ ctxt->encrypt = qcrypto_cipher_encrypt_des_rfb;
+ ctxt->decrypt = qcrypto_cipher_decrypt_des_rfb;
+
+ cipher->opaque = ctxt;
+
+ return 0;
+}
+
+
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+{
+ switch (alg) {
+ case QCRYPTO_CIPHER_ALG_DES_RFB:
+ case QCRYPTO_CIPHER_ALG_AES_128:
+ case QCRYPTO_CIPHER_ALG_AES_192:
+ case QCRYPTO_CIPHER_ALG_AES_256:
+ return true;
+ default:
+ return false;
+ }
+}
+
+
+QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
+ QCryptoCipherMode mode,
+ const uint8_t *key, size_t nkey,
+ Error **errp)
+{
+ QCryptoCipher *cipher;
+
+ cipher = g_new0(QCryptoCipher, 1);
+ cipher->alg = alg;
+ cipher->mode = mode;
+
+ if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
+ goto error;
+ }
+
+ switch (cipher->alg) {
+ case QCRYPTO_CIPHER_ALG_DES_RFB:
+ if (qcrypto_cipher_init_des_rfb(cipher, key, nkey, errp) < 0) {
+ goto error;
+ }
+ break;
+ case QCRYPTO_CIPHER_ALG_AES_128:
+ case QCRYPTO_CIPHER_ALG_AES_192:
+ case QCRYPTO_CIPHER_ALG_AES_256:
+ if (qcrypto_cipher_init_aes(cipher, key, nkey, errp) < 0) {
+ goto error;
+ }
+ break;
+ default:
+ error_setg(errp,
+ "Unsupported cipher algorithm %d", cipher->alg);
+ goto error;
+ }
+
+ return cipher;
+
+ error:
+ g_free(cipher);
+ return NULL;
+}
+
+void qcrypto_cipher_free(QCryptoCipher *cipher)
+{
+ QCryptoCipherBuiltin *ctxt;
+
+ if (!cipher) {
+ return;
+ }
+
+ ctxt = cipher->opaque;
+ ctxt->free(cipher);
+ g_free(cipher);
+}
+
+
+int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ if (len % ctxt->blocksize) {
+ error_setg(errp, "Length %zu must be a multiple of block size %zu",
+ len, ctxt->blocksize);
+ return -1;
+ }
+
+ return ctxt->encrypt(cipher, in, out, len, errp);
+}
+
+
+int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ if (len % ctxt->blocksize) {
+ error_setg(errp, "Length %zu must be a multiple of block size %zu",
+ len, ctxt->blocksize);
+ return -1;
+ }
+
+ return ctxt->decrypt(cipher, in, out, len, errp);
+}
+
+
+int qcrypto_cipher_setiv(QCryptoCipher *cipher,
+ const uint8_t *iv, size_t niv,
+ Error **errp)
+{
+ QCryptoCipherBuiltin *ctxt = cipher->opaque;
+
+ return ctxt->setiv(cipher, iv, niv, errp);
+}
diff --git a/crypto/cipher-gcrypt.c b/crypto/cipher-gcrypt.c
new file mode 100644
index 0000000..ede2f70
--- /dev/null
+++ b/crypto/cipher-gcrypt.c
@@ -0,0 +1,344 @@
+/*
+ * QEMU Crypto cipher libgcrypt algorithms
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/xts.h"
+
+#include <gcrypt.h>
+
+
+bool qcrypto_cipher_supports(QCryptoCipherAlgorithm alg)
+{
+ switch (alg) {
+ case QCRYPTO_CIPHER_ALG_DES_RFB:
+ case QCRYPTO_CIPHER_ALG_AES_128:
+ case QCRYPTO_CIPHER_ALG_AES_192:
+ case QCRYPTO_CIPHER_ALG_AES_256:
+ case QCRYPTO_CIPHER_ALG_CAST5_128:
+ case QCRYPTO_CIPHER_ALG_SERPENT_128:
+ case QCRYPTO_CIPHER_ALG_SERPENT_192:
+ case QCRYPTO_CIPHER_ALG_SERPENT_256:
+ case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+ case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+ return true;
+ default:
+ return false;
+ }
+}
+
+typedef struct QCryptoCipherGcrypt QCryptoCipherGcrypt;
+struct QCryptoCipherGcrypt {
+ gcry_cipher_hd_t handle;
+ gcry_cipher_hd_t tweakhandle;
+ size_t blocksize;
+ uint8_t *iv;
+};
+
+QCryptoCipher *qcrypto_cipher_new(QCryptoCipherAlgorithm alg,
+ QCryptoCipherMode mode,
+ const uint8_t *key, size_t nkey,
+ Error **errp)
+{
+ QCryptoCipher *cipher;
+ QCryptoCipherGcrypt *ctx;
+ gcry_error_t err;
+ int gcryalg, gcrymode;
+
+ switch (mode) {
+ case QCRYPTO_CIPHER_MODE_ECB:
+ case QCRYPTO_CIPHER_MODE_XTS:
+ gcrymode = GCRY_CIPHER_MODE_ECB;
+ break;
+ case QCRYPTO_CIPHER_MODE_CBC:
+ gcrymode = GCRY_CIPHER_MODE_CBC;
+ break;
+ default:
+ error_setg(errp, "Unsupported cipher mode %d", mode);
+ return NULL;
+ }
+
+ if (!qcrypto_cipher_validate_key_length(alg, mode, nkey, errp)) {
+ return NULL;
+ }
+
+ switch (alg) {
+ case QCRYPTO_CIPHER_ALG_DES_RFB:
+ gcryalg = GCRY_CIPHER_DES;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_AES_128:
+ gcryalg = GCRY_CIPHER_AES128;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_AES_192:
+ gcryalg = GCRY_CIPHER_AES192;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_AES_256:
+ gcryalg = GCRY_CIPHER_AES256;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_CAST5_128:
+ gcryalg = GCRY_CIPHER_CAST5;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_SERPENT_128:
+ gcryalg = GCRY_CIPHER_SERPENT128;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_SERPENT_192:
+ gcryalg = GCRY_CIPHER_SERPENT192;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_SERPENT_256:
+ gcryalg = GCRY_CIPHER_SERPENT256;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+ gcryalg = GCRY_CIPHER_TWOFISH128;
+ break;
+
+ case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+ gcryalg = GCRY_CIPHER_TWOFISH;
+ break;
+
+ default:
+ error_setg(errp, "Unsupported cipher algorithm %d", alg);
+ return NULL;
+ }
+
+ cipher = g_new0(QCryptoCipher, 1);
+ cipher->alg = alg;
+ cipher->mode = mode;
+
+ ctx = g_new0(QCryptoCipherGcrypt, 1);
+
+ err = gcry_cipher_open(&ctx->handle, gcryalg, gcrymode, 0);
+ if (err != 0) {
+ error_setg(errp, "Cannot initialize cipher: %s",
+ gcry_strerror(err));
+ goto error;
+ }
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ err = gcry_cipher_open(&ctx->tweakhandle, gcryalg, gcrymode, 0);
+ if (err != 0) {
+ error_setg(errp, "Cannot initialize cipher: %s",
+ gcry_strerror(err));
+ goto error;
+ }
+ }
+
+ if (cipher->alg == QCRYPTO_CIPHER_ALG_DES_RFB) {
+ /* We're using standard DES cipher from gcrypt, so we need
+ * to munge the key so that the results are the same as the
+ * bizarre RFB variant of DES :-)
+ */
+ uint8_t *rfbkey = qcrypto_cipher_munge_des_rfb_key(key, nkey);
+ err = gcry_cipher_setkey(ctx->handle, rfbkey, nkey);
+ g_free(rfbkey);
+ ctx->blocksize = 8;
+ } else {
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ nkey /= 2;
+ err = gcry_cipher_setkey(ctx->handle, key, nkey);
+ if (err != 0) {
+ error_setg(errp, "Cannot set key: %s",
+ gcry_strerror(err));
+ goto error;
+ }
+ err = gcry_cipher_setkey(ctx->tweakhandle, key + nkey, nkey);
+ } else {
+ err = gcry_cipher_setkey(ctx->handle, key, nkey);
+ }
+ if (err != 0) {
+ error_setg(errp, "Cannot set key: %s",
+ gcry_strerror(err));
+ goto error;
+ }
+ switch (cipher->alg) {
+ case QCRYPTO_CIPHER_ALG_AES_128:
+ case QCRYPTO_CIPHER_ALG_AES_192:
+ case QCRYPTO_CIPHER_ALG_AES_256:
+ case QCRYPTO_CIPHER_ALG_SERPENT_128:
+ case QCRYPTO_CIPHER_ALG_SERPENT_192:
+ case QCRYPTO_CIPHER_ALG_SERPENT_256:
+ case QCRYPTO_CIPHER_ALG_TWOFISH_128:
+ case QCRYPTO_CIPHER_ALG_TWOFISH_256:
+ ctx->blocksize = 16;
+ break;
+ case QCRYPTO_CIPHER_ALG_CAST5_128:
+ ctx->blocksize = 8;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ ctx->iv = g_new0(uint8_t, ctx->blocksize);
+ }
+
+ cipher->opaque = ctx;
+ return cipher;
+
+ error:
+ gcry_cipher_close(ctx->handle);
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ gcry_cipher_close(ctx->tweakhandle);
+ }
+ g_free(ctx);
+ g_free(cipher);
+ return NULL;
+}
+
+
+void qcrypto_cipher_free(QCryptoCipher *cipher)
+{
+ QCryptoCipherGcrypt *ctx;
+ if (!cipher) {
+ return;
+ }
+ ctx = cipher->opaque;
+ gcry_cipher_close(ctx->handle);
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ gcry_cipher_close(ctx->tweakhandle);
+ }
+ g_free(ctx->iv);
+ g_free(ctx);
+ g_free(cipher);
+}
+
+
+static void qcrypto_gcrypt_xts_encrypt(const void *ctx,
+ size_t length,
+ uint8_t *dst,
+ const uint8_t *src)
+{
+ gcry_error_t err;
+ err = gcry_cipher_encrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
+ g_assert(err == 0);
+}
+
+static void qcrypto_gcrypt_xts_decrypt(const void *ctx,
+ size_t length,
+ uint8_t *dst,
+ const uint8_t *src)
+{
+ gcry_error_t err;
+ err = gcry_cipher_decrypt((gcry_cipher_hd_t)ctx, dst, length, src, length);
+ g_assert(err == 0);
+}
+
+int qcrypto_cipher_encrypt(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherGcrypt *ctx = cipher->opaque;
+ gcry_error_t err;
+
+ if (len % ctx->blocksize) {
+ error_setg(errp, "Length %zu must be a multiple of block size %zu",
+ len, ctx->blocksize);
+ return -1;
+ }
+
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ xts_encrypt(ctx->handle, ctx->tweakhandle,
+ qcrypto_gcrypt_xts_encrypt,
+ qcrypto_gcrypt_xts_decrypt,
+ ctx->iv, len, out, in);
+ } else {
+ err = gcry_cipher_encrypt(ctx->handle,
+ out, len,
+ in, len);
+ if (err != 0) {
+ error_setg(errp, "Cannot encrypt data: %s",
+ gcry_strerror(err));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+
+int qcrypto_cipher_decrypt(QCryptoCipher *cipher,
+ const void *in,
+ void *out,
+ size_t len,
+ Error **errp)
+{
+ QCryptoCipherGcrypt *ctx = cipher->opaque;
+ gcry_error_t err;
+
+ if (len % ctx->blocksize) {
+ error_setg(errp, "Length %zu must be a multiple of block size %zu",
+ len, ctx->blocksize);
+ return -1;
+ }
+
+ if (cipher->mode == QCRYPTO_CIPHER_MODE_XTS) {
+ xts_decrypt(ctx->handle, ctx->tweakhandle,
+ qcrypto_gcrypt_xts_encrypt,
+ qcrypto_gcrypt_xts_decrypt,
+ ctx->iv, len, out, in);
+ } else {
+ err = gcry_cipher_decrypt(ctx->handle,
+ out, len,
+ in, len);
+ if (err != 0) {
+ error_setg(errp, "Cannot decrypt data: %s",
+ gcry_strerror(err));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int qcrypto_cipher_setiv(QCryptoCipher *cipher,
+ const uint8_t *iv, size_t niv,
+ Error **errp)
+{
+ QCryptoCipherGcrypt *ctx = cipher->opaque;
+ gcry_error_t err;
+
+ if (niv != ctx->blocksize) {
+ error_setg(errp, "Expected IV size %zu not %zu",
+ ctx->blocksize, niv);
+ return -1;
+ }
+
+ if (ctx->iv) {
+ memcpy(ctx->iv, iv, niv);
+ } else {
+ gcry_cipher_reset(ctx->handle);
+ err = gcry_cipher_setiv(ctx->handle, iv, niv);
+ if (err != 0) {
+ error_setg(errp, "Cannot set IV: %s",
+ gcry_strerror(err));
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/crypto/cipher-nettle.c b/crypto/cipher-nettle.c
new file mode 100644
index 0000000..70909fb
--- /dev/null
+++ b/crypto/cipher-nettle.c
@@ -0,0 +1,489 @@
+/*
+ * QEMU Crypto cipher nettle algorithms
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "crypto/xts.h"
+
+#include <nettle/nettle-types.h>
+#include <nettle/aes.h>
+#include <nettle/des.h>
+#include <nettle/cbc.h>
+#include <nettle/cast128.h>
+#include <nettle/serpent.h>
+#include <nettle/twofish.h>
+
+typedef void (*QCryptoCipherNettleFuncWrapper)(const void *ctx,
+ size_t length,
+ uint8_t *dst,
+ const uint8_t *src);
+
+#if CONFIG_NETTLE_VERSION_MAJOR < 3
+typedef nettle_crypt_func * QCryptoCipherNettleFuncNative;
+typedef void * cipher_ctx_t;
+typedef unsigned cipher_length_t;
+
+#define cast5_set_key cast128_set_key
+#else
+typedef nettle_cipher_func * QCryptoCipherNettleFuncNative;
+typedef const void * cipher_ctx_t;
+typedef size_t cipher_length_t;
+#endif
+
+typedef struct QCryptoNettleAES {
+ struct aes_ctx enc;
+ struct aes_ctx dec;
+} QCryptoNettleAES;
+
+static void aes_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ const QCryptoNettleAES *aesctx = ctx;
+ aes_encrypt(&aesctx->enc, length, dst, src);
+}
+
+static void aes_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ const QCryptoNettleAES *aesctx = ctx;
+ aes_decrypt(&aesctx->dec, length, dst, src);
+}
+
+static void des_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ des_encrypt(ctx, length, dst, src);
+}
+
+static void des_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ des_decrypt(ctx, length, dst, src);
+}
+
+static void cast128_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ cast128_encrypt(ctx, length, dst, src);
+}
+
+static void cast128_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ cast128_decrypt(ctx, length, dst, src);
+}
+
+static void serpent_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ serpent_encrypt(ctx, length, dst, src);
+}
+
+static void serpent_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ serpent_decrypt(ctx, length, dst, src);
+}
+
+static void twofish_encrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ twofish_encrypt(ctx, length, dst, src);
+}
+
+static void twofish_decrypt_native(cipher_ctx_t ctx, cipher_length_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ twofish_decrypt(ctx, length, dst, src);
+}
+
+static void aes_encrypt_wrapper(const void *ctx, size_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ const QCryptoNettleAES *aesctx = ctx;
+ aes_encrypt(&aesctx->enc, length, dst, src);
+}
+
+static void aes_decrypt_wrapper(const void *ctx, size_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ const QCryptoNettleAES *aesctx = ctx;
+ aes_decrypt(&aesctx->dec, length, dst, src);
+}
+
+static void des_encrypt_wrapper(const void *ctx, size_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ des_encrypt(ctx, length, dst, src);
+}
+
+static void des_decrypt_wrapper(const void *ctx, size_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ des_decrypt(ctx, length, dst, src);
+}
+
+static void cast128_encrypt_wrapper(const void *ctx, size_t length,
+ uint8_t *dst, const uint8_t *src)
+{
+ cast128_encrypt(ctx