virtio-pci: Use ioeventfd for virtqueue notify

Virtqueue notify is currently handled synchronously in userspace virtio.  This
prevents the vcpu from executing guest code while hardware emulation code
handles the notify.

On systems that support KVM, the ioeventfd mechanism can be used to make
virtqueue notify a lightweight exit by deferring hardware emulation to the
iothread and allowing the VM to continue execution.  This model is similar to
how vhost receives virtqueue notifies.

The result of this change is improved performance for userspace virtio devices.
Virtio-blk throughput increases especially for multithreaded scenarios and
virtio-net transmit throughput increases substantially.

Some virtio devices are known to have guest drivers which expect a notify to be
processed synchronously and spin waiting for completion.
For virtio-net, this also seems to interact with the guest stack in strange
ways so that TCP throughput for small message sizes (~200bytes)
is harmed. Only enable ioeventfd for virtio-blk for now.

Care must be taken not to interfere with vhost-net, which uses host
notifiers.  If the set_host_notifier() API is used by a device
virtio-pci will disable virtio-ioeventfd and let the device deal with
host notifiers as it wishes.

Finally, there used to be a limit of 6 KVM io bus devices inside the
kernel.  On such a kernel, don't use ioeventfd for virtqueue host
notification since the limit is reached too easily.  This ensures that
existing vhost-net setups (which always use ioeventfd) have ioeventfds
available so they can continue to work.

After migration and on VM change state (running/paused) virtio-ioeventfd
will enable/disable itself.

 * VIRTIO_CONFIG_S_DRIVER_OK -> enable virtio-ioeventfd
 * !VIRTIO_CONFIG_S_DRIVER_OK -> disable virtio-ioeventfd
 * virtio_pci_set_host_notifier() -> disable virtio-ioeventfd
 * vm_change_state(running=0) -> disable virtio-ioeventfd
 * vm_change_state(running=1) -> enable virtio-ioeventfd

Signed-off-by: Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
3 files changed
tree: f1a5117bf6ffe11264f9da55b00ac5f803076269
  1. audio/
  2. block/
  3. bsd-user/
  4. darwin-user/
  5. default-configs/
  6. docs/
  7. fpu/
  8. fsdev/
  9. gdb-xml/
  10. hw/
  11. linux-user/
  12. net/
  13. pc-bios/
  14. QMP/
  15. roms/
  16. slirp/
  17. sysconfigs/
  18. target-alpha/
  19. target-arm/
  20. target-cris/
  21. target-i386/
  22. target-m68k/
  23. target-microblaze/
  24. target-mips/
  25. target-ppc/
  26. target-s390x/
  27. target-sh4/
  28. target-sparc/
  29. tcg/
  30. tests/
  31. ui/
  32. .gitignore
  33. .gitmodules
  34. a.out.h
  35. acl.c
  36. acl.h
  37. aes.c
  38. aes.h
  39. aio.c
  40. alpha-dis.c
  41. alpha.ld
  42. arch_init.c
  43. arch_init.h
  44. arm-dis.c
  45. arm-semi.c
  46. arm.ld
  47. async.c
  48. balloon.c
  49. balloon.h
  50. block-migration.c
  51. block-migration.h
  52. block.c
  53. block.h
  54. block_int.h
  55. blockdev.c
  56. blockdev.h
  57. bswap.h
  58. bt-host.c
  59. bt-host.h
  60. bt-vhci.c
  61. buffered_file.c
  62. buffered_file.h
  63. cache-utils.c
  64. cache-utils.h
  65. Changelog
  66. check-qdict.c
  67. check-qfloat.c
  68. check-qint.c
  69. check-qjson.c
  70. check-qlist.c
  71. check-qstring.c
  72. cmd.c
  73. cmd.h
  74. CODING_STYLE
  75. compatfd.c
  76. compatfd.h
  77. config.h
  78. configure
  79. console.c
  80. console.h
  81. COPYING
  82. COPYING.LIB
  83. cpu-all.h
  84. cpu-common.h
  85. cpu-defs.h
  86. cpu-exec.c
  87. cpus.c
  88. cpus.h
  89. create_config
  90. cris-dis.c
  91. cursor.c
  92. cursor_hidden.xpm
  93. cursor_left_ptr.xpm
  94. cutils.c
  95. def-helper.h
  96. device_tree.c
  97. device_tree.h
  98. dis-asm.h
  99. disas.c
  100. disas.h
  101. dma-helpers.c
  102. dma.h
  103. dyngen-exec.h
  104. elf.h
  105. envlist.c
  106. envlist.h
  107. exec-all.h
  108. exec.c
  109. feature_to_c.sh
  110. gdbstub.c
  111. gdbstub.h
  112. gen-icount.h
  113. HACKING
  114. hmp-commands.hx
  115. host-utils.c
  116. host-utils.h
  117. hpet.h
  118. hppa-dis.c
  119. hppa.ld
  120. hxtool
  121. i386-dis.c
  122. i386.ld
  123. ia64-dis.c
  124. ia64.ld
  125. input.c
  126. ioport-user.c
  127. ioport.c
  128. ioport.h
  129. iorange.h
  130. iov.c
  131. iov.h
  132. json-lexer.c
  133. json-lexer.h
  134. json-parser.c
  135. json-parser.h
  136. json-streamer.c
  137. json-streamer.h
  138. kvm-all.c
  139. kvm-stub.c
  140. kvm.h
  141. libfdt_env.h
  142. LICENSE
  143. linux-aio.c
  144. m68k-dis.c
  145. m68k-semi.c
  146. m68k.ld
  147. MAINTAINERS
  148. make_device_config.sh
  149. Makefile
  150. Makefile.dis
  151. Makefile.hw
  152. Makefile.objs
  153. Makefile.target
  154. Makefile.user
  155. microblaze-dis.c
  156. migration-exec.c
  157. migration-fd.c
  158. migration-tcp.c
  159. migration-unix.c
  160. migration.c
  161. migration.h
  162. mips-dis.c
  163. mips.ld
  164. module.c
  165. module.h
  166. monitor.c
  167. monitor.h
  168. nbd.c
  169. nbd.h
  170. net-checksum.c
  171. net.c
  172. net.h
  173. notify.c
  174. notify.h
  175. os-posix.c
  176. os-win32.c
  177. osdep.c
  178. osdep.h
  179. oslib-posix.c
  180. oslib-win32.c
  181. path.c
  182. pci-ids.txt
  183. pflib.c
  184. pflib.h
  185. poison.h
  186. posix-aio-compat.c
  187. ppc-dis.c
  188. ppc.ld
  189. ppc64.ld
  190. qbool.c
  191. qbool.h
  192. qdict-test-data.txt
  193. qdict.c
  194. qdict.h
  195. qemu-aio.h
  196. qemu-barrier.h
  197. qemu-binfmt-conf.sh
  198. qemu-char.c
  199. qemu-char.h
  200. qemu-common.h
  201. qemu-config.c
  202. qemu-config.h
  203. qemu-doc.texi
  204. qemu-error.c
  205. qemu-error.h
  206. qemu-img-cmds.hx
  207. qemu-img.c
  208. qemu-img.texi
  209. qemu-io.c
  210. qemu-lock.h
  211. qemu-log.h
  212. qemu-malloc.c
  213. qemu-nbd.c
  214. qemu-nbd.texi
  215. qemu-objects.h
  216. qemu-option.c
  217. qemu-option.h
  218. qemu-options.h
  219. qemu-options.hx
  220. qemu-os-posix.h
  221. qemu-os-win32.h
  222. qemu-queue.h
  223. qemu-sockets.c
  224. qemu-tech.texi
  225. qemu-thread.c
  226. qemu-thread.h
  227. qemu-timer-common.c
  228. qemu-timer.c
  229. qemu-timer.h
  230. qemu-tool.c
  231. qemu-x509.h
  232. qemu.sasl
  233. qemu_socket.h
  234. qerror.c
  235. qerror.h
  236. qfloat.c
  237. qfloat.h
  238. qint.c
  239. qint.h
  240. qjson.c
  241. qjson.h
  242. qlist.c
  243. qlist.h
  244. qmp-commands.hx
  245. qobject.h
  246. qstring.c
  247. qstring.h
  248. range.h
  249. readline.c
  250. readline.h
  251. README
  252. rules.mak
  253. rwhandler.c
  254. rwhandler.h
  255. s390-dis.c
  256. s390.ld
  257. savevm.c
  258. sh4-dis.c
  259. simpletrace.c
  260. simpletrace.h
  261. simpletrace.py
  262. softmmu-semi.h
  263. softmmu_defs.h
  264. softmmu_exec.h
  265. softmmu_header.h
  266. softmmu_template.h
  267. sparc-dis.c
  268. sparc.ld
  269. sparc64.ld
  270. sysemu.h
  271. targphys.h
  272. tcg-runtime.c
  273. texi2pod.pl
  274. thunk.c
  275. thunk.h
  276. TODO
  277. trace-events
  278. tracetool
  279. translate-all.c
  280. uboot_image.h
  281. usb-bsd.c
  282. usb-linux.c
  283. usb-stub.c
  284. VERSION
  285. version.rc
  286. vgafont.h
  287. vl.c
  288. x86_64.ld