target-ppc: fix evmergelo and evmergelohi

For 32-bit PPC targets, we translated:

evmergelo rX, rX, rY

as:

rX-lo = rY-lo
rX-hi = rX-lo

which is wrong, because we should be transferring rX-lo first.  This
problem is fixed by swapping the order in which we write the parts of
rX.

Similarly, we translated:

evmergelohi rX, rX, rY

as:

rX-lo = rY-hi
rX-hi = rX-lo

In this case, we can't swap the assignment statements, because that
would just cause problems for:

evmergelohi rX, rY, rX

Instead, we detect the first case and save rX-lo in a temporary
variable:

tmp = rX-lo
rX-lo = rY-hi
rX-hi = tmp

These problems don't occur on PPC64 targets because we don't split the
SPE registers into hi/lo parts for such targets.

Signed-off-by: Nathan Froyd <froydnj@codesourcery.com>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a14d197..b278638 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -6908,8 +6908,8 @@
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 #else
-    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
     tcg_gen_mov_i32(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
 #endif
 }
 static always_inline void gen_evmergehilo (DisasContext *ctx)
@@ -6946,8 +6946,16 @@
     tcg_temp_free(t0);
     tcg_temp_free(t1);
 #else
-    tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
-    tcg_gen_mov_i32(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    if (rD(ctx->opcode) == rA(ctx->opcode)) {
+        TCGv_i32 tmp = tcg_temp_new_i32();
+        tcg_gen_mov_i32(tmp, cpu_gpr[rA(ctx->opcode)]);
+        tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
+        tcg_gen_mov_i32(cpu_gprh[rD(ctx->opcode)], tmp);
+        tcg_temp_free_i32(tmp);
+    } else {
+        tcg_gen_mov_i32(cpu_gpr[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
+        tcg_gen_mov_i32(cpu_gprh[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
+    }
 #endif
 }
 static always_inline void gen_evsplati (DisasContext *ctx)