Merge branch 'usb.64' of git://git.kraxel.org/qemu

* 'usb.64' of git://git.kraxel.org/qemu: (54 commits)
  xhci: allow bytewise capability register reads
  xhci: kill xhci_mem_{read,write} dispatcher functions
  xhci: support multiple interrupters
  xhci: pick target interrupter
  xhci: prepare xhci_runtime_{read,write} for multiple interrupters
  xhci: add XHCIInterrupter
  xhci: move register update into xhci_intr_raise
  xhci: add msix support
  xhci: rework interrupt handling
  xhci: fix & cleanup msi.
  usb-storage: usb3 support
  usb3: bos decriptor
  usb3: superspeed endpoint companion
  usb3: superspeed descriptors
  xhci: update port handling
  xhci: update register layout
  xhci: fix runtime write tracepoint
  xhci: add trace_usb_xhci_ep_set_dequeue
  xhci: trace cc codes in cleartext
  xhci: iso xfer support
  ...
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9c65474..fba0ed9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -267,6 +267,67 @@
     return res;
 }
 
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+                                       TCGArg y, TCGCond c)
+{
+    switch (op_bits(op)) {
+    case 32:
+        switch (c) {
+        case TCG_COND_EQ:
+            return (uint32_t)x == (uint32_t)y;
+        case TCG_COND_NE:
+            return (uint32_t)x != (uint32_t)y;
+        case TCG_COND_LT:
+            return (int32_t)x < (int32_t)y;
+        case TCG_COND_GE:
+            return (int32_t)x >= (int32_t)y;
+        case TCG_COND_LE:
+            return (int32_t)x <= (int32_t)y;
+        case TCG_COND_GT:
+            return (int32_t)x > (int32_t)y;
+        case TCG_COND_LTU:
+            return (uint32_t)x < (uint32_t)y;
+        case TCG_COND_GEU:
+            return (uint32_t)x >= (uint32_t)y;
+        case TCG_COND_LEU:
+            return (uint32_t)x <= (uint32_t)y;
+        case TCG_COND_GTU:
+            return (uint32_t)x > (uint32_t)y;
+        }
+        break;
+    case 64:
+        switch (c) {
+        case TCG_COND_EQ:
+            return (uint64_t)x == (uint64_t)y;
+        case TCG_COND_NE:
+            return (uint64_t)x != (uint64_t)y;
+        case TCG_COND_LT:
+            return (int64_t)x < (int64_t)y;
+        case TCG_COND_GE:
+            return (int64_t)x >= (int64_t)y;
+        case TCG_COND_LE:
+            return (int64_t)x <= (int64_t)y;
+        case TCG_COND_GT:
+            return (int64_t)x > (int64_t)y;
+        case TCG_COND_LTU:
+            return (uint64_t)x < (uint64_t)y;
+        case TCG_COND_GEU:
+            return (uint64_t)x >= (uint64_t)y;
+        case TCG_COND_LEU:
+            return (uint64_t)x <= (uint64_t)y;
+        case TCG_COND_GTU:
+            return (uint64_t)x > (uint64_t)y;
+        }
+        break;
+    }
+
+    fprintf(stderr,
+            "Unrecognized bitness %d or condition %d in "
+            "do_constant_folding_cond.\n", op_bits(op), c);
+    tcg_abort();
+}
+
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -318,11 +379,49 @@
                 args[2] = tmp;
             }
             break;
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state != TCG_TEMP_CONST) {
+                tmp = args[0];
+                args[0] = args[1];
+                args[1] = tmp;
+                args[2] = tcg_swap_cond(args[2]);
+            }
+            break;
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                args[3] = tcg_swap_cond(args[3]);
+            }
+            break;
         default:
             break;
         }
 
-        /* Simplify expression if possible. */
+        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
+        switch (op) {
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[1]].val == 0) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
         switch (op) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
@@ -331,6 +430,8 @@
         CASE_OP_32_64(sar):
         CASE_OP_32_64(rotl):
         CASE_OP_32_64(rotr):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 /* Proceed with possible constant folding. */
                 break;
@@ -340,18 +441,24 @@
                 if ((temps[args[0]].state == TCG_TEMP_COPY
                     && temps[args[0]].val == args[1])
                     || args[0] == args[1]) {
-                    args += 3;
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1],
                                     nb_temps, nb_globals);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(and):
         CASE_OP_32_64(mul):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
@@ -362,19 +469,24 @@
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => mov r, a" cases */
+        switch (op) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
                 if (args[1] == args[0]) {
-                    args += 3;
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
                                     nb_globals);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
@@ -424,17 +536,14 @@
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
-                gen_args += 2;
-                args += 2;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
-                gen_args += 2;
-                args += 2;
-                break;
             }
+            gen_args += 2;
+            args += 2;
+            break;
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -458,17 +567,56 @@
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
                 gen_args += 2;
-                args += 3;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
                 gen_args += 3;
-                args += 3;
-                break;
             }
+            args += 3;
+            break;
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = do_constant_folding_cond(op, temps[args[1]].val,
+                                               temps[args[2]].val, args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                gen_args += 2;
+            } else {
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+            }
+            args += 4;
+            break;
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state == TCG_TEMP_CONST) {
+                if (do_constant_folding_cond(op, temps[args[0]].val,
+                                             temps[args[1]].val, args[2])) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[3];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else {
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+            }
+            args += 4;
+            break;
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -490,7 +638,6 @@
         case INDEX_op_set_label:
         case INDEX_op_jmp:
         case INDEX_op_br:
-        CASE_OP_32_64(brcond):
             memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             for (i = 0; i < def->nb_args; i++) {
                 *gen_args = *args;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8386b70..a4e7f42 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2059,22 +2059,29 @@
     }
 #endif
 
+#ifdef CONFIG_PROFILER
+    s->opt_time -= profile_getclock();
+#endif
+
 #ifdef USE_TCG_OPTIMIZATIONS
     gen_opparam_ptr =
         tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
 #endif
 
 #ifdef CONFIG_PROFILER
+    s->opt_time += profile_getclock();
     s->la_time -= profile_getclock();
 #endif
+
     tcg_liveness_analysis(s);
+
 #ifdef CONFIG_PROFILER
     s->la_time += profile_getclock();
 #endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
-        qemu_log("OP after liveness analysis:\n");
+        qemu_log("OP after optimization and liveness analysis:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
     }
@@ -2241,6 +2248,9 @@
                 (double)s->interm_time / tot * 100.0);
     cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
                 (double)s->code_time / tot * 100.0);
+    cpu_fprintf(f, "optim./code time    %0.1f%%\n",
+                (double)s->opt_time / (s->code_time ? s->code_time : 1)
+                * 100.0);
     cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index d710694..7a72729 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -382,6 +382,7 @@
     int64_t interm_time;
     int64_t code_time;
     int64_t la_time;
+    int64_t opt_time;
     int64_t restore_count;
     int64_t restore_time;
 #endif