Message ID | 20250301052845.1012069-35-brian.cain@oss.qualcomm.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | hexagon system emu, part 2/3 | expand |
On 2/28/2025 11:28 PM, Brian Cain wrote: > From: Brian Cain <bcain@quicinc.com> > > Signed-off-by: Brian Cain <brian.cain@oss.qualcomm.com> > --- > target/hexagon/sys_macros.h | 8 +-- > target/hexagon/op_helper.c | 104 ++++++++++++++++++++++++++++++++++++ > 2 files changed, 108 insertions(+), 4 deletions(-) > > diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h > index 3c4c3c7aa5..e5dc1ce0ab 100644 > --- a/target/hexagon/sys_macros.h > +++ b/target/hexagon/sys_macros.h > @@ -143,11 +143,11 @@ > #define fDCINVIDX(REG) > #define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ > > -#define fSET_TLB_LOCK() g_assert_not_reached() > -#define fCLEAR_TLB_LOCK() g_assert_not_reached() > +#define fSET_TLB_LOCK() hex_tlb_lock(env); > +#define fCLEAR_TLB_LOCK() hex_tlb_unlock(env); > > -#define fSET_K0_LOCK() g_assert_not_reached() > -#define fCLEAR_K0_LOCK() g_assert_not_reached() > +#define fSET_K0_LOCK() hex_k0_lock(env); > +#define fCLEAR_K0_LOCK() hex_k0_unlock(env); > > #define fTLB_IDXMASK(INDEX) \ > ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1)) > diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c > index 702c3dd3c6..f3b14fbf58 100644 > --- a/target/hexagon/op_helper.c > +++ b/target/hexagon/op_helper.c > @@ -1184,6 +1184,110 @@ void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old) > BQL_LOCK_GUARD(); > hexagon_modify_ssr(env, new, old); > } > + > +static void hex_k0_lock(CPUHexagonState *env) > +{ > + BQL_LOCK_GUARD(); > + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); > + > + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); > + if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) { > + if (env->k0_lock_state == HEX_LOCK_QUEUED) { > + env->next_PC += 4; > + env->k0_lock_count++; > + env->k0_lock_state = HEX_LOCK_OWNER; > + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); > + return; > + } > + if (env->k0_lock_state == HEX_LOCK_OWNER) { > + qemu_log_mask(LOG_GUEST_ERROR, > + "Double k0lock at PC: 0x%x, thread may hang\n", > + env->next_PC); > + env->next_PC += 4; > + CPUState *cs = env_cpu(env); > + cpu_interrupt(cs, CPU_INTERRUPT_HALT); > + return; > + } > + env->k0_lock_state = HEX_LOCK_WAITING; > + CPUState *cs = env_cpu(env); > + cpu_interrupt(cs, CPU_INTERRUPT_HALT); > + } else { > + env->next_PC += 4; > + env->k0_lock_count++; > + env->k0_lock_state = HEX_LOCK_OWNER; > + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); > + } > + > +} This was discussed previously at https://lore.kernel.org/qemu-devel/CH3PR02MB102479550F96F09E0C9D50BA7B87B2@CH3PR02MB10247.namprd02.prod.outlook.com/ We have taken some but not all of the suggestions from then. One of our concerns is regarding an architectural requirement for "fairness" with regards to picking the hardware thread to be selected to pass the lock to. If we unleash the thundering herd, does this just mean that the fairness is dependent on the host scheduler design / configuration? Also - I note that we didn't take the suggestions regarding cpu_loop_exit / cpu_loop_exit_restore. That was an oversight, the next revision will include that update. > + > +static void hex_k0_unlock(CPUHexagonState *env) > +{ > + BQL_LOCK_GUARD(); > + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); > + > + /* Nothing to do if the k0 isn't locked by this thread */ > + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); > + if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) || > + (env->k0_lock_state != HEX_LOCK_OWNER)) { > + qemu_log_mask(LOG_GUEST_ERROR, > + "thread %d attempted to unlock k0 without having the " > + "lock, k0_lock state = %d, syscfg:k0 = %d\n", > + env->threadId, env->k0_lock_state, > + GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)); > + g_assert(env->k0_lock_state != HEX_LOCK_WAITING); > + return; > + } > + > + env->k0_lock_count--; > + env->k0_lock_state = HEX_LOCK_UNLOCKED; > + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0); > + > + /* Look for a thread to unlock */ > + unsigned int this_threadId = env->threadId; > + CPUHexagonState *unlock_thread = NULL; > + CPUState *cs; > + CPU_FOREACH(cs) { > + CPUHexagonState *thread = cpu_env(cs); > + > + /* > + * The hardware implements round-robin fairness, so we look for threads > + * starting at env->threadId + 1 and incrementing modulo the number of > + * threads. > + * > + * To implement this, we check if thread is a earlier in the modulo > + * sequence than unlock_thread. > + * if unlock thread is higher than this thread > + * thread must be between this thread and unlock_thread > + * else > + * thread higher than this thread is ahead of unlock_thread > + * thread must be lower then unlock thread > + */ > + if (thread->k0_lock_state == HEX_LOCK_WAITING) { > + if (!unlock_thread) { > + unlock_thread = thread; > + } else if (unlock_thread->threadId > this_threadId) { > + if (this_threadId < thread->threadId && > + thread->threadId < unlock_thread->threadId) { > + unlock_thread = thread; > + } > + } else { > + if (thread->threadId > this_threadId) { > + unlock_thread = thread; > + } > + if (thread->threadId < unlock_thread->threadId) { > + unlock_thread = thread; > + } > + } > + } > + } > + if (unlock_thread) { > + cs = env_cpu(unlock_thread); > + unlock_thread->k0_lock_state = HEX_LOCK_QUEUED; > + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1); > + cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK); > + } > + > +} > #endif > >
diff --git a/target/hexagon/sys_macros.h b/target/hexagon/sys_macros.h index 3c4c3c7aa5..e5dc1ce0ab 100644 --- a/target/hexagon/sys_macros.h +++ b/target/hexagon/sys_macros.h @@ -143,11 +143,11 @@ #define fDCINVIDX(REG) #define fDCINVA(REG) do { REG = REG; } while (0) /* Nothing to do in qemu */ -#define fSET_TLB_LOCK() g_assert_not_reached() -#define fCLEAR_TLB_LOCK() g_assert_not_reached() +#define fSET_TLB_LOCK() hex_tlb_lock(env); +#define fCLEAR_TLB_LOCK() hex_tlb_unlock(env); -#define fSET_K0_LOCK() g_assert_not_reached() -#define fCLEAR_K0_LOCK() g_assert_not_reached() +#define fSET_K0_LOCK() hex_k0_lock(env); +#define fCLEAR_K0_LOCK() hex_k0_unlock(env); #define fTLB_IDXMASK(INDEX) \ ((INDEX) & (fPOW2_ROUNDUP(fCAST4u(env_archcpu(env)->num_tlbs)) - 1)) diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c index 702c3dd3c6..f3b14fbf58 100644 --- a/target/hexagon/op_helper.c +++ b/target/hexagon/op_helper.c @@ -1184,6 +1184,110 @@ void HELPER(modify_ssr)(CPUHexagonState *env, uint32_t new, uint32_t old) BQL_LOCK_GUARD(); hexagon_modify_ssr(env, new, old); } + +static void hex_k0_lock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); + + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + if (GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)) { + if (env->k0_lock_state == HEX_LOCK_QUEUED) { + env->next_PC += 4; + env->k0_lock_count++; + env->k0_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); + return; + } + if (env->k0_lock_state == HEX_LOCK_OWNER) { + qemu_log_mask(LOG_GUEST_ERROR, + "Double k0lock at PC: 0x%x, thread may hang\n", + env->next_PC); + env->next_PC += 4; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + return; + } + env->k0_lock_state = HEX_LOCK_WAITING; + CPUState *cs = env_cpu(env); + cpu_interrupt(cs, CPU_INTERRUPT_HALT); + } else { + env->next_PC += 4; + env->k0_lock_count++; + env->k0_lock_state = HEX_LOCK_OWNER; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 1); + } + +} + +static void hex_k0_unlock(CPUHexagonState *env) +{ + BQL_LOCK_GUARD(); + g_assert((env->k0_lock_count == 0) || (env->k0_lock_count == 1)); + + /* Nothing to do if the k0 isn't locked by this thread */ + uint32_t syscfg = arch_get_system_reg(env, HEX_SREG_SYSCFG); + if ((GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg) == 0) || + (env->k0_lock_state != HEX_LOCK_OWNER)) { + qemu_log_mask(LOG_GUEST_ERROR, + "thread %d attempted to unlock k0 without having the " + "lock, k0_lock state = %d, syscfg:k0 = %d\n", + env->threadId, env->k0_lock_state, + GET_SYSCFG_FIELD(SYSCFG_K0LOCK, syscfg)); + g_assert(env->k0_lock_state != HEX_LOCK_WAITING); + return; + } + + env->k0_lock_count--; + env->k0_lock_state = HEX_LOCK_UNLOCKED; + SET_SYSCFG_FIELD(env, SYSCFG_K0LOCK, 0); + + /* Look for a thread to unlock */ + unsigned int this_threadId = env->threadId; + CPUHexagonState *unlock_thread = NULL; + CPUState *cs; + CPU_FOREACH(cs) { + CPUHexagonState *thread = cpu_env(cs); + + /* + * The hardware implements round-robin fairness, so we look for threads + * starting at env->threadId + 1 and incrementing modulo the number of + * threads. + * + * To implement this, we check if thread is a earlier in the modulo + * sequence than unlock_thread. + * if unlock thread is higher than this thread + * thread must be between this thread and unlock_thread + * else + * thread higher than this thread is ahead of unlock_thread + * thread must be lower then unlock thread + */ + if (thread->k0_lock_state == HEX_LOCK_WAITING) { + if (!unlock_thread) { + unlock_thread = thread; + } else if (unlock_thread->threadId > this_threadId) { + if (this_threadId < thread->threadId && + thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } else { + if (thread->threadId > this_threadId) { + unlock_thread = thread; + } + if (thread->threadId < unlock_thread->threadId) { + unlock_thread = thread; + } + } + } + } + if (unlock_thread) { + cs = env_cpu(unlock_thread); + unlock_thread->k0_lock_state = HEX_LOCK_QUEUED; + SET_SYSCFG_FIELD(unlock_thread, SYSCFG_K0LOCK, 1); + cpu_interrupt(cs, CPU_INTERRUPT_K0_UNLOCK); + } + +} #endif