diff --git a/cpu-all.h b/cpu-all.h
index 3cfb220..9a2a548 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -233,6 +233,15 @@
     int val;
     __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
     return val;
+#elif defined(__sparc__)
+#ifndef ASI_PRIMARY_LITTLE
+#define ASI_PRIMARY_LITTLE 0x88
+#endif
+
+    int val;
+    __asm__ __volatile__ ("lduha [%1] %2, %0" : "=r" (val) : "r" (ptr),
+                          "i" (ASI_PRIMARY_LITTLE));
+    return val;
 #else
     uint8_t *p = ptr;
     return p[0] | (p[1] << 8);
@@ -245,6 +254,11 @@
     int val;
     __asm__ __volatile__ ("lhbrx %0,0,%1" : "=r" (val) : "r" (ptr));
     return (int16_t)val;
+#elif defined(__sparc__)
+    int val;
+    __asm__ __volatile__ ("ldsha [%1] %2, %0" : "=r" (val) : "r" (ptr),
+                          "i" (ASI_PRIMARY_LITTLE));
+    return val;
 #else
     uint8_t *p = ptr;
     return (int16_t)(p[0] | (p[1] << 8));
@@ -257,6 +271,11 @@
     int val;
     __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (val) : "r" (ptr));
     return val;
+#elif defined(__sparc__)
+    int val;
+    __asm__ __volatile__ ("lduwa [%1] %2, %0" : "=r" (val) : "r" (ptr),
+                          "i" (ASI_PRIMARY_LITTLE));
+    return val;
 #else
     uint8_t *p = ptr;
     return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
@@ -265,17 +284,27 @@
 
 static inline uint64_t ldq_le_p(void *ptr)
 {
+#if defined(__sparc__)
+    uint64_t val;
+    __asm__ __volatile__ ("ldxa [%1] %2, %0" : "=r" (val) : "r" (ptr),
+                          "i" (ASI_PRIMARY_LITTLE));
+    return val;
+#else
     uint8_t *p = ptr;
     uint32_t v1, v2;
     v1 = ldl_le_p(p);
     v2 = ldl_le_p(p + 4);
     return v1 | ((uint64_t)v2 << 32);
+#endif
 }
 
 static inline void stw_le_p(void *ptr, int v)
 {
 #ifdef __powerpc__
     __asm__ __volatile__ ("sthbrx %1,0,%2" : "=m" (*(uint16_t *)ptr) : "r" (v), "r" (ptr));
+#elif defined(__sparc__)
+    __asm__ __volatile__ ("stha %1, [%2] %3" : "=m" (*(uint16_t *)ptr) : "r" (v),
+                          "r" (ptr), "i" (ASI_PRIMARY_LITTLE));
 #else
     uint8_t *p = ptr;
     p[0] = v;
@@ -287,6 +316,9 @@
 {
 #ifdef __powerpc__
     __asm__ __volatile__ ("stwbrx %1,0,%2" : "=m" (*(uint32_t *)ptr) : "r" (v), "r" (ptr));
+#elif defined(__sparc__)
+    __asm__ __volatile__ ("stwa %1, [%2] %3" : "=m" (*(uint32_t *)ptr) : "r" (v),
+                          "r" (ptr), "i" (ASI_PRIMARY_LITTLE));
 #else
     uint8_t *p = ptr;
     p[0] = v;
@@ -298,9 +330,15 @@
 
 static inline void stq_le_p(void *ptr, uint64_t v)
 {
+#if defined(__sparc__)
+    __asm__ __volatile__ ("stxa %1, [%2] %3" : "=m" (*(uint64_t *)ptr) : "r" (v),
+                          "r" (ptr), "i" (ASI_PRIMARY_LITTLE));
+#undef ASI_PRIMARY_LITTLE
+#else
     uint8_t *p = ptr;
     stl_le_p(p, (uint32_t)v);
     stl_le_p(p + 4, v >> 32);
+#endif
 }
 
 /* float access */
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 276ec91..492ed3d 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -178,6 +178,7 @@
 #define INSN_RD(x)  ((x) << 25)
 #define INSN_RS1(x) ((x) << 14)
 #define INSN_RS2(x) (x)
+#define INSN_ASI(x) ((x) << 5)
 
 #define INSN_IMM13(x) ((1 << 13) | ((x) & 0x1fff))
 #define INSN_OFF22(x) (((x) >> 2) & 0x3fffff)
@@ -242,6 +243,21 @@
 #define STH        (INSN_OP(3) | INSN_OP3(0x06))
 #define STW        (INSN_OP(3) | INSN_OP3(0x04))
 #define STX        (INSN_OP(3) | INSN_OP3(0x0e))
+#define LDUBA      (INSN_OP(3) | INSN_OP3(0x11))
+#define LDSBA      (INSN_OP(3) | INSN_OP3(0x19))
+#define LDUHA      (INSN_OP(3) | INSN_OP3(0x12))
+#define LDSHA      (INSN_OP(3) | INSN_OP3(0x1a))
+#define LDUWA      (INSN_OP(3) | INSN_OP3(0x10))
+#define LDSWA      (INSN_OP(3) | INSN_OP3(0x18))
+#define LDXA       (INSN_OP(3) | INSN_OP3(0x1b))
+#define STBA       (INSN_OP(3) | INSN_OP3(0x15))
+#define STHA       (INSN_OP(3) | INSN_OP3(0x16))
+#define STWA       (INSN_OP(3) | INSN_OP3(0x14))
+#define STXA       (INSN_OP(3) | INSN_OP3(0x1e))
+
+#ifndef ASI_PRIMARY_LITTLE
+#define ASI_PRIMARY_LITTLE 0x88
+#endif
 
 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
                                  int op)
@@ -332,6 +348,14 @@
     }
 }
 
+static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
+                                    int offset, int op, int asi)
+{
+    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
+    tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
+              INSN_ASI(asi) | INSN_RS2(addr));
+}
+
 static inline void tcg_out_ld(TCGContext *s, TCGType type, int ret,
                               int arg1, tcg_target_long arg2)
 {
@@ -457,7 +481,7 @@
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_reg, data_reg, r0, r1, mem_index, s_bits, bswap, ld_op;
+    int addr_reg, data_reg, r0, r1, mem_index, s_bits, ld_op;
 #if defined(CONFIG_SOFTMMU)
     uint8_t *label1_ptr, *label2_ptr;
 #endif
@@ -565,11 +589,6 @@
     r0 = addr_reg;
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 0;
-#else
-    bswap = 1;
-#endif
     switch(opc) {
     case 0:
         /* ldub [r0], data_reg */
@@ -580,39 +599,49 @@
         tcg_out_ldst(s, data_reg, r0, 0, LDSB);
         break;
     case 1:
+#ifdef TARGET_WORDS_BIGENDIAN
         /* lduh [r0], data_reg */
         tcg_out_ldst(s, data_reg, r0, 0, LDUH);
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#else
+        /* lduha [r0] ASI_PRIMARY_LITTLE, data_reg */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, LDUHA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     case 1 | 4:
+#ifdef TARGET_WORDS_BIGENDIAN
         /* ldsh [r0], data_reg */
         tcg_out_ldst(s, data_reg, r0, 0, LDSH);
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#else
+        /* ldsha [r0] ASI_PRIMARY_LITTLE, data_reg */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, LDSHA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     case 2:
+#ifdef TARGET_WORDS_BIGENDIAN
         /* lduw [r0], data_reg */
         tcg_out_ldst(s, data_reg, r0, 0, LDUW);
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#else
+        /* lduwa [r0] ASI_PRIMARY_LITTLE, data_reg */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, LDUWA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     case 2 | 4:
+#ifdef TARGET_WORDS_BIGENDIAN
         /* ldsw [r0], data_reg */
         tcg_out_ldst(s, data_reg, r0, 0, LDSW);
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#else
+        /* ldswa [r0] ASI_PRIMARY_LITTLE, data_reg */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, LDSWA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     case 3:
+#ifdef TARGET_WORDS_BIGENDIAN
         /* ldx [r0], data_reg */
         tcg_out_ldst(s, data_reg, r0, 0, LDX);
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#else
+        /* ldxa [r0] ASI_PRIMARY_LITTLE, data_reg */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, LDXA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     default:
         tcg_abort();
@@ -629,7 +658,7 @@
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_reg, data_reg, r0, r1, mem_index, s_bits, bswap, ld_op;
+    int addr_reg, data_reg, r0, r1, mem_index, s_bits, ld_op;
 #if defined(CONFIG_SOFTMMU)
     uint8_t *label1_ptr, *label2_ptr;
 #endif
@@ -737,36 +766,37 @@
     r0 = addr_reg;
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-    bswap = 0;
-#else
-    bswap = 1;
-#endif
     switch(opc) {
     case 0:
         /* stb data_reg, [r0] */
         tcg_out_ldst(s, data_reg, r0, 0, STB);
         break;
     case 1:
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#ifdef TARGET_WORDS_BIGENDIAN
         /* sth data_reg, [r0] */
         tcg_out_ldst(s, data_reg, r0, 0, STH);
+#else
+        /* stha data_reg, [r0] ASI_PRIMARY_LITTLE */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, STHA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     case 2:
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#ifdef TARGET_WORDS_BIGENDIAN
         /* stw data_reg, [r0] */
         tcg_out_ldst(s, data_reg, r0, 0, STW);
+#else
+        /* stwa data_reg, [r0] ASI_PRIMARY_LITTLE */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, STWA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     case 3:
-        if (bswap) {
-            fprintf(stderr, "unimplemented %s with bswap\n", __func__);
-        }
+#ifdef TARGET_WORDS_BIGENDIAN
         /* stx data_reg, [r0] */
         tcg_out_ldst(s, data_reg, r0, 0, STX);
+#else
+        /* stxa data_reg, [r0] ASI_PRIMARY_LITTLE */
+        tcg_out_ldst_asi(s, data_reg, r0, 0, STXA, ASI_PRIMARY_LITTLE);
+#endif
         break;
     default:
         tcg_abort();
