diff mbox

[5/5] target/sh4: fix RTE instruction delay slot

Message ID 20170516224721.13832-6-aurelien@aurel32.net (mailing list archive)
State New, archived
Headers show

Commit Message

Aurelien Jarno May 16, 2017, 10:47 p.m. UTC
The ReTurn from Exception (RTE) instruction loads the system register
(SR) with the saved system register (SSR). It has a delay slot, and
behaves specially according to the SH4 manual:

  The SR value accessed by the instruction in the RTE delay slot is the
  value restored from SSR by the RTE instruction. The SR and MD values
  defined prior to RTE execution are used to fetch the instruction in
  the RTE delay slot.

The instruction in the delay slot being often a NOP, it doesn't cause
any issue most of the time except in some rare cases where the NOP is
being splitted in a different TB (for example when the TCG op buffer
is full). In that case the NOP is fetched with the user permissions
and causes an instruction TLB protection violation exception.

This patches fixes that by introducing a new delay slot flag for the
RTE instruction. Given it's a privileged instruction, the RTE delay
slot instruction is always fetched in privileged mode. It is therefore
enough to to check for this flag in cpu_mmu_index.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 target/sh4/cpu.h       | 13 ++++++++++---
 target/sh4/translate.c |  8 ++++++--
 2 files changed, 16 insertions(+), 5 deletions(-)

Comments

Richard Henderson May 24, 2017, 11:11 p.m. UTC | #1
On 05/16/2017 03:47 PM, Aurelien Jarno wrote:
> The ReTurn from Exception (RTE) instruction loads the system register
> (SR) with the saved system register (SSR). It has a delay slot, and
> behaves specially according to the SH4 manual:
> 
>    The SR value accessed by the instruction in the RTE delay slot is the
>    value restored from SSR by the RTE instruction. The SR and MD values
>    defined prior to RTE execution are used to fetch the instruction in
>    the RTE delay slot.
> 
> The instruction in the delay slot being often a NOP, it doesn't cause
> any issue most of the time except in some rare cases where the NOP is
> being splitted in a different TB (for example when the TCG op buffer
> is full). In that case the NOP is fetched with the user permissions
> and causes an instruction TLB protection violation exception.
> 
> This patches fixes that by introducing a new delay slot flag for the
> RTE instruction. Given it's a privileged instruction, the RTE delay
> slot instruction is always fetched in privileged mode. It is therefore
> enough to to check for this flag in cpu_mmu_index.
> 
> Signed-off-by: Aurelien Jarno<aurelien@aurel32.net>
> ---
>   target/sh4/cpu.h       | 13 ++++++++++---
>   target/sh4/translate.c |  8 ++++++--
>   2 files changed, 16 insertions(+), 5 deletions(-)

Reviewed-by: Richard Henderson <rth@twiddle.net>


r~
diff mbox

Patch

diff --git a/target/sh4/cpu.h b/target/sh4/cpu.h
index 7969c9af98..ffb91687b8 100644
--- a/target/sh4/cpu.h
+++ b/target/sh4/cpu.h
@@ -91,9 +91,10 @@ 
 #define FPSCR_RM_NEAREST       (0 << 0)
 #define FPSCR_RM_ZERO          (1 << 0)
 
-#define DELAY_SLOT_MASK        0x3
+#define DELAY_SLOT_MASK        0x7
 #define DELAY_SLOT             (1 << 0)
 #define DELAY_SLOT_CONDITIONAL (1 << 1)
+#define DELAY_SLOT_RTE         (1 << 2)
 
 typedef struct tlb_t {
     uint32_t vpn;		/* virtual page number */
@@ -264,7 +265,13 @@  void cpu_load_tlb(CPUSH4State * env);
 #define MMU_USER_IDX 1
 static inline int cpu_mmu_index (CPUSH4State *env, bool ifetch)
 {
-    return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
+    /* The instruction in a RTE delay slot is fetched in privileged
+       mode, but executed in user mode.  */
+    if (ifetch && (env->flags & DELAY_SLOT_RTE)) {
+        return 0;
+    } else {
+        return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0;
+    }
 }
 
 #include "exec/cpu-all.h"
@@ -381,7 +388,7 @@  static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc,
 {
     *pc = env->pc;
     *cs_base = 0;
-    *flags = (env->flags & DELAY_SLOT_MASK)                    /* Bits  0- 1 */
+    *flags = (env->flags & DELAY_SLOT_MASK)                    /* Bits  0- 2 */
             | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR))  /* Bits 19-21 */
             | (env->sr & ((1u << SR_MD) | (1u << SR_RB)))      /* Bits 29-30 */
             | (env->sr & (1u << SR_FD))                        /* Bit 15 */
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index aba316f593..8bc132b27b 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -185,6 +185,9 @@  void superh_cpu_dump_state(CPUState *cs, FILE *f,
     } else if (env->flags & DELAY_SLOT_CONDITIONAL) {
 	cpu_fprintf(f, "in conditional delay slot (delayed_pc=0x%08x)\n",
 		    env->delayed_pc);
+    } else if (env->flags & DELAY_SLOT_RTE) {
+        cpu_fprintf(f, "in rte delay slot (delayed_pc=0x%08x)\n",
+                    env->delayed_pc);
     }
 }
 
@@ -427,8 +430,9 @@  static void _decode_opc(DisasContext * ctx)
 	CHECK_NOT_DELAY_SLOT
         gen_write_sr(cpu_ssr);
 	tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc);
-        ctx->envflags |= DELAY_SLOT;
+        ctx->envflags |= DELAY_SLOT_RTE;
 	ctx->delayed_pc = (uint32_t) - 1;
+        ctx->bstate = BS_STOP;
 	return;
     case 0x0058:		/* sets */
         tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S));
@@ -1804,7 +1808,7 @@  static void decode_opc(DisasContext * ctx)
         ctx->bstate = BS_BRANCH;
         if (old_flags & DELAY_SLOT_CONDITIONAL) {
 	    gen_delayed_conditional_jump(ctx);
-        } else if (old_flags & DELAY_SLOT) {
+        } else {
             gen_jump(ctx);
 	}