target-ppc: Clean up mullwo
Simplify the implementation of mullwo. For 64 bit CPUs, the result is
the concatenation of the upper and lower parts of the muls2_i32 operation,
which may be slightly better than deposit. For 32 bit CPUs, the lower part
of the muls_i32 operation is moved into the target GPR.
Signed-off-by: Tom Musta <tommusta@gmail.com>
Suggested-by: Richard Henderson <rth@twiddle.net>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Alexander Graf <agraf@suse.de>
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ced295f..1062634 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1150,19 +1150,14 @@
{
TCGv_i32 t0 = tcg_temp_new_i32();
TCGv_i32 t1 = tcg_temp_new_i32();
-#if defined(TARGET_PPC64)
- TCGv_i64 t2 = tcg_temp_new_i64();
-#endif
tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]);
tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]);
tcg_gen_muls2_i32(t0, t1, t0, t1);
- tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx->opcode)], t0);
#if defined(TARGET_PPC64)
- tcg_gen_ext_i32_tl(t2, t1);
- tcg_gen_deposit_i64(cpu_gpr[rD(ctx->opcode)],
- cpu_gpr[rD(ctx->opcode)], t2, 32, 32);
- tcg_temp_free(t2);
+ tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx->opcode)], t0, t1);
+#else
+ tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], t0);
#endif
tcg_gen_sari_i32(t0, t0, 31);