diff mbox series

[34/39] target/hexagon: Add TLB, k0 {un,}lock

Message ID 20250301052845.1012069-35-brian.cain@oss.qualcomm.com (mailing list archive)
State New
Headers show
Series hexagon system emu, part 2/3 | expand

Commit Message

Brian Cain March 1, 2025, 5:28 a.m. UTC
From: Brian Cain <bcain@quicinc.com>

Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
---
 target/hexagon/sys_macros.h |   8 +--
 target/hexagon/op_helper.c  | 104 ++++++++++++++++++++++++++++++++++++
 2 files changed, 108 insertions(+), 4 deletions(-)

Comments

Brian Cain March 3, 2025, 4:24 p.m. UTC | #1
On 2/28/2025 11:28 PM, Brian Cain wrote:
> From: Brian Cain <bcain@quicinc.com>
>
> Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com>
> ---
>   target/hexagon/sys_macros.h |   8 +--
>   target/hexagon/op_helper.c  | 104 ++++++++++++++++++++++++++++++++++++
>   2 files changed, 108 insertions(+), 4 deletions(-)
>
> diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h
> index 3c4c3c7aa5..e5dc1ce0ab 100644
> --- a/target/hexagon/sys_macros.h
> +++ b/target/hexagon/sys_macros.h
> @@ -143,11 +143,11 @@
>   #define fDCINVIDX(REG)
>   #define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */
>   
> -#define fSET_TLB_LOCK()       g_assert_not_reached()
> -#define fCLEAR_TLB_LOCK()     g_assert_not_reached()
> +#define fSET_TLB_LOCK()       hex_tlb_lock(env);
> +#define fCLEAR_TLB_LOCK()     hex_tlb_unlock(env);
>   
> -#define fSET_K0_LOCK()        g_assert_not_reached()
> -#define fCLEAR_K0_LOCK()      g_assert_not_reached()
> +#define fSET_K0_LOCK()        hex_k0_lock(env);
> +#define fCLEAR_K0_LOCK()      hex_k0_unlock(env);
>   
>   #define fTLB_IDXMASK(INDEX) \
>       ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1))
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index 702c3dd3c6..f3b14fbf58 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -1184,6 +1184,110 @@ void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old)
>       BQL_LOCK_GUARD();
>       hexagon_modify_ssr(env, new, old);
>   }
> +
> +static void hex_k0_lock(CPUHexagonState *env)
> +{
> +    BQL_LOCK_GUARD();
> +    g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
> +
> +    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
> +    if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) {
> +        if (env->k0_lock_state == HEX_LOCK_QUEUED) {
> +            env->next_PC += 4;
> +            env->k0_lock_count++;
> +            env->k0_lock_state = HEX_LOCK_OWNER;
> +            SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
> +            return;
> +        }
> +        if (env->k0_lock_state == HEX_LOCK_OWNER) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "Double k0lock at PC: 0x%x, thread may hang\n",
> +                          env->next_PC);
> +            env->next_PC += 4;
> +            CPUState *cs = env_cpu(env);
> +            cpu_interrupt(cs, CPU_INTERRUPT_HALT);
> +            return;
> +        }
> +        env->k0_lock_state = HEX_LOCK_WAITING;
> +        CPUState *cs = env_cpu(env);
> +        cpu_interrupt(cs, CPU_INTERRUPT_HALT);
> +    } else {
> +        env->next_PC += 4;
> +        env->k0_lock_count++;
> +        env->k0_lock_state = HEX_LOCK_OWNER;
> +        SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
> +    }
> +
> +}

This was discussed previously at 
https://lore.kernel.org/qemu-devel/CH3PR02MB102479550F96F09E0C9D50BA7B87B2@CH3PR02MB10247.namprd02.prod.outlook.com/

We have taken some but not all of the suggestions from then.  One of our 
concerns is regarding an architectural requirement for "fairness" with 
regards to picking the hardware thread to be selected to pass the lock 
to.  If we unleash the thundering herd, does this just mean that the 
fairness is dependent on the host scheduler design / configuration?

Also - I note that we didn't take the suggestions regarding 
cpu_loop_exit / cpu_loop_exit_restore.  That was an oversight, the next 
revision will include that update.

> +
> +static void hex_k0_unlock(CPUHexagonState *env)
> +{
> +    BQL_LOCK_GUARD();
> +    g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
> +
> +    /* Nothing to do if the k0 isn't locked by this thread */
> +    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
> +    if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) ||
> +        (env->k0_lock_state != HEX_LOCK_OWNER)) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "thread %d attempted to unlock k0 without having the "
> +                      "lock, k0_lock state = %d, syscfg:k0 = %d\n",
> +                      env->threadId, env->k0_lock_state,
> +                      GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg));
> +        g_assert(env->k0_lock_state != HEX_LOCK_WAITING);
> +        return;
> +    }
> +
> +    env->k0_lock_count--;
> +    env->k0_lock_state = HEX_LOCK_UNLOCKED;
> +    SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0);
> +
> +    /* Look for a thread to unlock */
> +    unsigned int this_threadId = env->threadId;
> +    CPUHexagonState *unlock_thread = NULL;
> +    CPUState *cs;
> +    CPU_FOREACH(cs) {
> +        CPUHexagonState *thread = cpu_env(cs);
> +
> +        /*
> +         * The hardware implements round-robin fairness, so we look for threads
> +         * starting at env->threadId + 1 and incrementing modulo the number of
> +         * threads.
> +         *
> +         * To implement this, we check if thread is a earlier in the modulo
> +         * sequence than unlock_thread.
> +         *     if unlock thread is higher than this thread
> +         *         thread must be between this thread and unlock_thread
> +         *     else
> +         *         thread higher than this thread is ahead of unlock_thread
> +         *         thread must be lower then unlock thread
> +         */
> +        if (thread->k0_lock_state == HEX_LOCK_WAITING) {
> +            if (!unlock_thread) {
> +                unlock_thread = thread;
> +            } else if (unlock_thread->threadId > this_threadId) {
> +                if (this_threadId < thread->threadId &&
> +                    thread->threadId < unlock_thread->threadId) {
> +                    unlock_thread = thread;
> +                }
> +            } else {
> +                if (thread->threadId > this_threadId) {
> +                    unlock_thread = thread;
> +                }
> +                if (thread->threadId < unlock_thread->threadId) {
> +                    unlock_thread = thread;
> +                }
> +            }
> +        }
> +    }
> +    if (unlock_thread) {
> +        cs = env_cpu(unlock_thread);
> +        unlock_thread->k0_lock_state = HEX_LOCK_QUEUED;
> +        SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1);
> +        cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK);
> +    }
> +
> +}
>   #endif
>   
>
diff mbox series

Patch

diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h
index 3c4c3c7aa5..e5dc1ce0ab 100644
--- a/target/hexagon/sys_macros.h
+++ b/target/hexagon/sys_macros.h
@@ -143,11 +143,11 @@ 
 #define fDCINVIDX(REG)
 #define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */
 
-#define fSET_TLB_LOCK()       g_assert_not_reached()
-#define fCLEAR_TLB_LOCK()     g_assert_not_reached()
+#define fSET_TLB_LOCK()       hex_tlb_lock(env);
+#define fCLEAR_TLB_LOCK()     hex_tlb_unlock(env);
 
-#define fSET_K0_LOCK()        g_assert_not_reached()
-#define fCLEAR_K0_LOCK()      g_assert_not_reached()
+#define fSET_K0_LOCK()        hex_k0_lock(env);
+#define fCLEAR_K0_LOCK()      hex_k0_unlock(env);
 
 #define fTLB_IDXMASK(INDEX) \
     ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1))
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 702c3dd3c6..f3b14fbf58 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1184,6 +1184,110 @@  void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old)
     BQL_LOCK_GUARD();
     hexagon_modify_ssr(env, new, old);
 }
+
+static void hex_k0_lock(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
+
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) {
+        if (env->k0_lock_state == HEX_LOCK_QUEUED) {
+            env->next_PC += 4;
+            env->k0_lock_count++;
+            env->k0_lock_state = HEX_LOCK_OWNER;
+            SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
+            return;
+        }
+        if (env->k0_lock_state == HEX_LOCK_OWNER) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "Double k0lock at PC: 0x%x, thread may hang\n",
+                          env->next_PC);
+            env->next_PC += 4;
+            CPUState *cs = env_cpu(env);
+            cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+            return;
+        }
+        env->k0_lock_state = HEX_LOCK_WAITING;
+        CPUState *cs = env_cpu(env);
+        cpu_interrupt(cs, CPU_INTERRUPT_HALT);
+    } else {
+        env->next_PC += 4;
+        env->k0_lock_count++;
+        env->k0_lock_state = HEX_LOCK_OWNER;
+        SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1);
+    }
+
+}
+
+static void hex_k0_unlock(CPUHexagonState *env)
+{
+    BQL_LOCK_GUARD();
+    g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1));
+
+    /* Nothing to do if the k0 isn't locked by this thread */
+    uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG);
+    if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) ||
+        (env->k0_lock_state != HEX_LOCK_OWNER)) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "thread %d attempted to unlock k0 without having the "
+                      "lock, k0_lock state = %d, syscfg:k0 = %d\n",
+                      env->threadId, env->k0_lock_state,
+                      GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg));
+        g_assert(env->k0_lock_state != HEX_LOCK_WAITING);
+        return;
+    }
+
+    env->k0_lock_count--;
+    env->k0_lock_state = HEX_LOCK_UNLOCKED;
+    SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0);
+
+    /* Look for a thread to unlock */
+    unsigned int this_threadId = env->threadId;
+    CPUHexagonState *unlock_thread = NULL;
+    CPUState *cs;
+    CPU_FOREACH(cs) {
+        CPUHexagonState *thread = cpu_env(cs);
+
+        /*
+         * The hardware implements round-robin fairness, so we look for threads
+         * starting at env->threadId + 1 and incrementing modulo the number of
+         * threads.
+         *
+         * To implement this, we check if thread is a earlier in the modulo
+         * sequence than unlock_thread.
+         *     if unlock thread is higher than this thread
+         *         thread must be between this thread and unlock_thread
+         *     else
+         *         thread higher than this thread is ahead of unlock_thread
+         *         thread must be lower then unlock thread
+         */
+        if (thread->k0_lock_state == HEX_LOCK_WAITING) {
+            if (!unlock_thread) {
+                unlock_thread = thread;
+            } else if (unlock_thread->threadId > this_threadId) {
+                if (this_threadId < thread->threadId &&
+                    thread->threadId < unlock_thread->threadId) {
+                    unlock_thread = thread;
+                }
+            } else {
+                if (thread->threadId > this_threadId) {
+                    unlock_thread = thread;
+                }
+                if (thread->threadId < unlock_thread->threadId) {
+                    unlock_thread = thread;
+                }
+            }
+        }
+    }
+    if (unlock_thread) {
+        cs = env_cpu(unlock_thread);
+        unlock_thread->k0_lock_state = HEX_LOCK_QUEUED;
+        SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1);
+        cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK);
+    }
+
+}
 #endif