@@ -124,9 +124,16 @@
#define SYS_OSDTRTX_EL1 sys_reg(2, 0, 0, 3, 2)
#define SYS_OSECCR_EL1 sys_reg(2, 0, 0, 6, 2)
#define SYS_DBGBVRn_EL1(n) sys_reg(2, 0, 0, n, 4)
-#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5)
#define SYS_DBGWVRn_EL1(n) sys_reg(2, 0, 0, n, 6)
+
+#define SYS_DBGBCRn_EL1(n) sys_reg(2, 0, 0, n, 5)
+#define SYS_DBGBCR_EL1_LBN_SHIFT 16
+#define SYS_DBGBCR_EL1_LBN_MASK GENMASK(3, 0)
+
#define SYS_DBGWCRn_EL1(n) sys_reg(2, 0, 0, n, 7)
+#define SYS_DBGWCR_EL1_LBN_SHIFT 16
+#define SYS_DBGWCR_EL1_LBN_MASK GENMASK(3, 0)
+
#define SYS_MDRAR_EL1 sys_reg(2, 0, 1, 0, 0)
#define SYS_OSLAR_EL1 sys_reg(2, 0, 1, 0, 4)
@@ -849,17 +849,230 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu *vcpu,
#define AA64DFR0_CTX_CMPS(v) \
((u8)cpuid_feature_extract_unsigned_field(v, ID_AA64DFR0_CTX_CMPS_SHIFT))
+#define INVALID_BRPN ((u8)-1)
+
+static u8 get_bcr_lbn(u64 val)
+{
+ return ((val >> SYS_DBGBCR_EL1_LBN_SHIFT) & SYS_DBGBCR_EL1_LBN_MASK);
+}
+
+static u64 update_bcr_lbn(u64 val, u8 lbn)
+{
+ u64 new;
+
+ new = val & ~(SYS_DBGBCR_EL1_LBN_MASK << SYS_DBGBCR_EL1_LBN_SHIFT);
+ new |= ((u64)lbn & SYS_DBGBCR_EL1_LBN_MASK) << SYS_DBGBCR_EL1_LBN_SHIFT;
+ return new;
+}
+
+/*
+ * KVM will emulate breakpoints access when the number of non-context
+ * aware (normal) breakpoints is decreased for the guest. For instsance,
+ * it will happen when userspace decreases the number of breakpoints
+ * for the guest keeping the same number of context aware breakpoints.
+ * Simply narrowing the number of breakpoints for the guest will lead
+ * to narrowing context aware breakpoints for the guest because as per
+ * Arm ARM, highest numbered breakpoints are context aware breakpoints.
+ * So, in that case, KVM will map context aware breakpoints for the
+ * vCPU to different numbered breakpoints for the pCPU, but will
+ * maintain the offset in context aware breakpoints.
+ * For instance, if 5 breakpoints are supported, and 2 of them are
+ * context aware breakpoints, breakpoint#0, #1 and #2 are normal
+ * breakpoints, and #3 and #4 are context aware breakpoints.
+ * If userspace decreases the number of breakpoints to 4 keeping the
+ * same number of context aware breakpoints (== 2), the guest expects
+ * breakpoint#0 and #1 to be normal breakpoints, and #2 and #3 to be
+ * context aware breakpoints. So, KVM will map the (virtual) context
+ * aware breakpoint #2 and #3 for the vCPU to (physical) context aware
+ * breakpoint #3 and #4 for the pCPU as follows.
+ *
+ * [Example]
+ *
+ * Normal Breakpoints Context aware breakpoints
+ * Virtual #0 #1 #2 #3
+ * | | | |
+ * Physical #0 #1 #2 #3 #4
+ *
+ * So, dbg{b,w}cr.lbn (linked breakpoint number) for vCPU might be
+ * different from the ones for pCPU (e.g. With the above example,
+ * when the guest sets dbgbcr0.lbn to 2 for the vCPU, dbgbcr0.lbn
+ * for the pCPU should be set to 3).
+ * Values in vcpu_debug_state of kvm_vcpu_arch will basically be the ones
+ * that are going to be set to the physical registers (indexed by physical
+ * context breakpoint number). But, they hold the values from the guest
+ * point of view until the first KVM_RUN (physical/virtual breakpoint
+ * numbers mapping is fixed) and they will be converted to the
+ * physical values during the process of first KVM_RUN.
+ *
+ * As there is no functional difference between any watchpoints,
+ * virtual watchpoint# will be always same as physical watchpoint#.
+ */
+
+/*
+ * Convert breakpoint# for the guest to breakpoint# for the real hardware.
+ * Return INVALID_BRPN if the given breakpoint# is invalid.
+ */
+static inline u8 virt_to_phys_bpn(struct kvm_vcpu *vcpu, u8 v_bpn)
+{
+ u8 virt_ctx_base, phys_ctx_base;
+ u64 p_val, v_val;
+
+ v_val = read_id_reg_with_encoding(vcpu, SYS_ID_AA64DFR0_EL1);
+ if (v_bpn > AA64DFR0_BRPS(v_val)) {
+ /*
+ * The virtual bpn is out of valid virtual breakpoint number
+ * range. Return the invalid breakpoint number.
+ */
+ return INVALID_BRPN;
+ }
+
+ if (!test_bit(KVM_ARCH_FLAG_EMULATE_DEBUG_REGS, &vcpu->kvm->arch.flags))
+ /* physical bpn == virtual bpn when no emulation is needed */
+ return v_bpn;
+
+ /* The lowest virtual context aware bpn */
+ virt_ctx_base = AA64DFR0_BRPS(v_val) - AA64DFR0_CTX_CMPS(v_val);
+ if (v_bpn < virt_ctx_base)
+ /*
+ * physical bpn == virtual bpn when v_bpn is not a
+ * context aware breakpoint.
+ */
+ return v_bpn;
+
+ p_val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+ /* The lowest physical context aware bpn */
+ phys_ctx_base = AA64DFR0_BRPS(p_val) - AA64DFR0_CTX_CMPS(p_val);
+
+ WARN_ON_ONCE(virt_ctx_base >= phys_ctx_base);
+
+ /*
+ * Context aware bpn. Map it to the same offset of physical
+ * context aware registers.
+ */
+ return phys_ctx_base + (v_bpn - virt_ctx_base);
+}
+
/*
- * Set KVM_ARCH_FLAG_EMULATE_DEBUG_REGS in the VM flags when the number of
- * non-context aware breakpoints for the guest is decreased by userspace
- * (meaning that debug register accesses need to be emulated).
+ * Convert breakpoint# for the real hardware to breakpoint# for the guest.
+ * Return INVALID_BRPN if the given breakpoint# is not used for the guest.
+ */
+static inline u8 phys_to_virt_bpn(struct kvm_vcpu *vcpu, u8 p_bpn)
+{
+ u8 virt_ctx_base, phys_ctx_base, v_bpn;
+ u64 p_val, v_val;
+
+ if (!test_bit(KVM_ARCH_FLAG_EMULATE_DEBUG_REGS, &vcpu->kvm->arch.flags))
+ return p_bpn;
+
+ v_val = read_id_reg_with_encoding(vcpu, SYS_ID_AA64DFR0_EL1);
+
+ /* The lowest virtual context aware bpn */
+ virt_ctx_base = AA64DFR0_BRPS(v_val) - AA64DFR0_CTX_CMPS(v_val);
+ if (p_bpn < virt_ctx_base)
+ /*
+ * physical bpn == virtual bpn when p_bpn is smaller than
+ * the lowest virutual context aware breakpoint number.
+ */
+ return p_bpn;
+
+ p_val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+
+ /* The lowest physical context aware bpn */
+ phys_ctx_base = AA64DFR0_BRPS(p_val) - AA64DFR0_CTX_CMPS(p_val);
+ if (p_bpn < phys_ctx_base)
+ /*
+ * Unused non-context aware breakpoint.
+ * No virtual breakpoint is assigned for this.
+ */
+ return INVALID_BRPN;
+
+ WARN_ON_ONCE(virt_ctx_base >= phys_ctx_base);
+
+ /*
+ * Context aware bpn. Map it to the same offset of virtual
+ * context aware registers.
+ */
+ v_bpn = virt_ctx_base + (p_bpn - phys_ctx_base);
+ if (v_bpn > AA64DFR0_BRPS(v_val)) {
+ /* This pysical bpn is not mapped to any virtual bpn */
+ return INVALID_BRPN;
+ }
+
+ return v_bpn;
+}
+
+static u8 get_unused_p_bpn(struct kvm_vcpu *vcpu)
+{
+ u64 p_val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+
+ WARN_ON_ONCE(!test_bit(KVM_ARCH_FLAG_EMULATE_DEBUG_REGS, &vcpu->kvm->arch.flags));
+
+ /*
+ * The last normal (non-context aware) break point is always unused
+ * (and disabled) when kvm_arm_need_emulate_debug_regs() is true.
+ */
+ return AA64DFR0_BRPS(p_val) - AA64DFR0_CTX_CMPS(p_val) - 1;
+}
+
+/*
+ * virt_to_phys_bcr() converts the virtual bcr value (the bcr value from
+ * the guest point of view) to physical bcr value, which is going to be set
+ * to the real hardware. More specifically, as a lbn field value of the
+ * virtual bcr includes the virtual breakpoint number, the function will
+ * update the bcr with physical breakpoint number, and will return it as
+ * the physical bcr value. phys_to_virt_bcr()) does the opposite.
+ *
+ * As per Arm ARM (ARM DDI 0487H.a), if a Linked Address breakpoint links
+ * to a breakpoint that is not implemented or that is not context aware,
+ * then reads of bcr.lbn return an unknown value, and the Linked Address
+ * breakpoint behaves as if it is either disabled or linked to an UNKNOWN
+ * context aware breakpoint. In such cases, KVM will return 0 to reads of
+ * bcr.lbn, and have the breakpoint behaves as if it is disabled by
+ * setting the lbn to unused (disabled) breakpoint.
+ */
+static u64 virt_to_phys_bcr(struct kvm_vcpu *vcpu, u64 v_bcr)
+{
+ u8 v_lbn, p_lbn;
+
+ v_lbn = get_bcr_lbn(v_bcr);
+ p_lbn = virt_to_phys_bpn(vcpu, v_lbn);
+ if (p_lbn == INVALID_BRPN)
+ p_lbn = get_unused_p_bpn(vcpu);
+
+ return update_bcr_lbn(v_bcr, p_lbn);
+}
+
+static u64 phys_to_virt_bcr(struct kvm_vcpu *vcpu, u64 p_bcr)
+{
+ u8 v_lbn, p_lbn;
+
+ p_lbn = get_bcr_lbn(p_bcr);
+ v_lbn = phys_to_virt_bpn(vcpu, p_lbn);
+ if (v_lbn == INVALID_BRPN)
+ v_lbn = 0;
+
+ return update_bcr_lbn(p_bcr, v_lbn);
+}
+
+/*
+ * Check if the number of normal breakpoints for the guest is same as
+ * the one for the host. If so, do nothing.
+ * Otherwise (accesses of debug registers needs to be emulated), set
+ * KVM_ARCH_FLAG_EMULATE_DEBUG_REGS in the VM flags, and convert values
+ * in vcpu->arch.vcpu_debug_state that are values from the guest
+ * point of view to values that are going to be set to hardware
+ * registers. See comments for set_bvr() for some more details.
*/
void kvm_vcpu_breakpoint_config(struct kvm_vcpu *vcpu)
{
u64 p_val = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
u64 v_val = read_id_reg_with_encoding(vcpu, SYS_ID_AA64DFR0_EL1);
u8 v_nbpn, p_nbpn;
+ u64 p_bcr;
struct kvm *kvm = vcpu->kvm;
+ int v;
+ u8 p_bpn;
+ struct kvm_guest_debug_arch *dbg = &vcpu->arch.vcpu_debug_state;
/*
* Check the number of normal (non-context aware) breakpoints
@@ -877,11 +1090,39 @@ void kvm_vcpu_breakpoint_config(struct kvm_vcpu *vcpu)
if (!test_bit(KVM_ARCH_FLAG_EMULATE_DEBUG_REGS, &kvm->arch.flags))
set_bit(KVM_ARCH_FLAG_EMULATE_DEBUG_REGS, &kvm->arch.flags);
+
+ /*
+ * Before the first KVM_RUN, vcpu->arch.vcpu_debug_state holds
+ * values of the registers to be exposed to the guest and their
+ * positions are indexed by virtual breakpoint numbers.
+ * Convert the values to physical values that are going to set
+ * to hardware registers, and move them to positions indexed
+ * by physical breakpoint numbers.
+ */
+ for (v = KVM_ARM_MAX_DBG_REGS - 1; v >= 0; v--) {
+ /* Get physical breakpoint number */
+ p_bpn = virt_to_phys_bpn(vcpu, v);
+ WARN_ON_ONCE(p_bpn < v);
+
+ if (p_bpn != INVALID_BRPN) {
+ /* Get physical bcr */
+ p_bcr = virt_to_phys_bcr(vcpu, dbg->dbg_bcr[v]);
+ dbg->dbg_bcr[p_bpn] = p_bcr;
+ dbg->dbg_bvr[p_bpn] = dbg->dbg_bvr[v];
+ }
+
+ /* Clear dbg_b{c,v}r, which might not be used */
+ if (p_bpn != v) {
+ dbg->dbg_bcr[v] = 0;
+ dbg->dbg_bvr[v] = 0;
+ }
+ }
}
/*
* We want to avoid world-switching all the DBG registers all the
- * time:
+ * time unless userspace decrease number of non-context break points,
+ * where emulating of access to debug registers is required.
*
* - If we've touched any debug register, it is likely that we're
* going to touch more of them. It then makes sense to disable the
@@ -963,8 +1204,17 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+ u64 p_bpn;
+ u64 *dbg_reg;
+ /* Convert the virt breakpoint num to phys breakpoint num */
+ p_bpn = virt_to_phys_bpn(vcpu, rd->CRm);
+ if (p_bpn == INVALID_BRPN) {
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[p_bpn];
if (p->is_write)
reg_to_dbg(vcpu, p, rd, dbg_reg);
else
@@ -975,23 +1225,85 @@ static bool trap_bvr(struct kvm_vcpu *vcpu,
return true;
}
+/*
+ * The behaviors of {s,g}et_b{c,v}r change depending on whether they
+ * are called before or after the first KVM_RUN.
+ *
+ * Before the first KVM_RUN (the number of breakpoints is not fixed yet),
+ * the vcpu->arch.vcpu_debug_state holds debug register values from
+ * the guest point of view. The set_b{c,v}r() simply save the value
+ * from userspace in vcpu->arch.vcpu_debug_state, and get_b{c,v}r()
+ * simply return the value in vcpu->arch.vcpu_debug_state to userspace.
+ *
+ * At the first KVM_RUN (where the number of breakpoints is immutable),
+ * b{c,v}r values in vcpu->arch.vcpu_debug_state are converted to
+ * the values that are going to be set to hardware registers.
+ * After that, vcpu->arch.vcpu_debug_state holds debug register values that
+ * are going to set to hardware registers. The set_b{c,v}r functions convert
+ * the value from userspace to the one that will be set to the hardware
+ * register and save the converted value in vcpu->arch.vcpu_debug_state.
+ * The get_b{c,v}r functions read the value from vcpu->arch.vcpu_debug_state,
+ * convert it to the value as seen by the guest and return the converted
+ * value to the userspace.
+ *
+ * The {s,g}et_b{c,v}r will treat the invalid breakpoint registers,
+ * which are not mapped to physical breakpoints, as RAZ/WI after the first
+ * KVM_RUN (values that userspace attempts to set in those registers will
+ * not be saved anywhere), which shouldn't be a problem because they will
+ * never be exposed to the guest anyway. Until the first KVM_RUN, setting
+ * and getting of those work normally though (The number of breakpoints
+ * could be changed by userspace until the first KVM_RUN).
+ */
static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+ u8 v_bpn, p_bpn;
+ __u64 bvr;
- if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ if (copy_from_user(&bvr, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
+
+ v_bpn = rd->CRm;
+
+ /*
+ * Until the first KVM_RUN, vcpu_debug_state holds the virtual bvr.
+ * After that, vcpu_debug_state holds the physical bvr.
+ */
+ if (vcpu_has_run_once(vcpu)) {
+ /* Convert the virt breakpoint num to phys breakpoint num */
+ p_bpn = virt_to_phys_bpn(vcpu, v_bpn);
+ if (p_bpn != INVALID_BRPN)
+ vcpu->arch.vcpu_debug_state.dbg_bvr[p_bpn] = bvr;
+ } else {
+ vcpu->arch.vcpu_debug_state.dbg_bvr[v_bpn] = bvr;
+ }
+
return 0;
}
static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->CRm];
+ u8 v_bpn, p_bpn;
+ u64 bvr = 0;
- if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ v_bpn = rd->CRm;
+ /*
+ * Until the first KVM_RUN, vcpu_debug_state holds the virtual bvr.
+ * After that, vcpu_debug_state holds the physical bvr.
+ */
+ if (vcpu_has_run_once(vcpu)) {
+ /* Convert the virt breakpoint num to phys breakpoint num */
+ p_bpn = virt_to_phys_bpn(vcpu, v_bpn);
+ if (p_bpn != INVALID_BRPN)
+ bvr = vcpu->arch.vcpu_debug_state.dbg_bvr[p_bpn];
+ } else {
+ bvr = vcpu->arch.vcpu_debug_state.dbg_bvr[v_bpn];
+ }
+
+ if (copy_to_user(uaddr, &bvr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
+
return 0;
}
@@ -1005,12 +1317,27 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *rd)
{
- u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+ u8 p_bpn;
+ u64 *dbg_reg;
- if (p->is_write)
+ /* Convert the given virt breakpoint num to phys breakpoint num */
+ p_bpn = virt_to_phys_bpn(vcpu, rd->CRm);
+ if (p_bpn == INVALID_BRPN) {
+ /* Invalid breakpoint number */
+ kvm_inject_undefined(vcpu);
+ return false;
+ }
+
+ dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[p_bpn];
+ if (p->is_write) {
+ /* Convert virtual bcr to physical bcr */
+ p->regval = virt_to_phys_bcr(vcpu, p->regval);
reg_to_dbg(vcpu, p, rd, dbg_reg);
- else
+ } else {
dbg_to_reg(vcpu, p, rd, dbg_reg);
+ /* Convert physical bcr to virtual bcr */
+ p->regval = phys_to_virt_bcr(vcpu, p->regval);
+ }
trace_trap_reg(__func__, rd->CRm, p->is_write, *dbg_reg);
@@ -1020,21 +1347,64 @@ static bool trap_bcr(struct kvm_vcpu *vcpu,
static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+ u8 v_bpn, p_bpn;
+ u64 v_bcr, p_bcr;
- if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
+ if (copy_from_user(&v_bcr, uaddr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
+ v_bpn = rd->CRm;
+
+ /*
+ * Until the first KVM_RUN, vcpu_debug_state holds the virtual bcr.
+ * After that, vcpu_debug_state holds the physical bcr.
+ */
+ if (vcpu_has_run_once(vcpu)) {
+ /* Convert the virt breakpoint num to phys breakpoint num */
+ p_bpn = virt_to_phys_bpn(vcpu, v_bpn);
+ if (p_bpn != INVALID_BRPN) {
+ /* Convert virt bcr to phys bcr, and save it */
+ p_bcr = virt_to_phys_bcr(vcpu, v_bcr);
+ vcpu->arch.vcpu_debug_state.dbg_bcr[p_bpn] = p_bcr;
+ }
+ } else {
+ vcpu->arch.vcpu_debug_state.dbg_bcr[v_bpn] = v_bcr;
+ }
+
return 0;
}
static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
const struct kvm_one_reg *reg, void __user *uaddr)
{
- __u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->CRm];
+ u8 v_bpn, p_bpn;
+ u64 v_bcr = 0;
+ u64 p_bcr;
- if (copy_to_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+ v_bpn = rd->CRm;
+ /*
+ * Until the first KVM_RUN, vcpu_debug_state holds the virtual bcr.
+ * After that, vcpu_debug_state holds the physical bcr.
+ */
+ if (vcpu_has_run_once(vcpu)) {
+ /*
+ * Convert the virtual breakpoint num to phys breakpoint num,
+ * and get the physical bcr value.
+ */
+ p_bpn = virt_to_phys_bpn(vcpu, v_bpn);
+ if (p_bpn != INVALID_BRPN) {
+ p_bcr = vcpu->arch.vcpu_debug_state.dbg_bcr[p_bpn];
+
+ /* Convert physical bcr to */
+ v_bcr = phys_to_virt_bcr(vcpu, p_bcr);
+ }
+ } else {
+ v_bcr = vcpu->arch.vcpu_debug_state.dbg_bcr[v_bpn];
+ }
+
+ if (copy_to_user(uaddr, &v_bcr, KVM_REG_SIZE(reg->id)) != 0)
return -EFAULT;
+
return 0;
}
Highest numbered breakpoints must be context aware breakpoints (as specified by Arm ARM). If the number of non-context aware breakpoints for the guest is decreased by userspace (e.g. Lower ID_AA64DFR0.BRPs keeping ID_AA64DFR0.CTX_CMPs the same), simply narrowing the breakpoints will be problematic because it will lead to narrowing context aware breakpoints for the guest. Emulate dbgbcr/dbgbvr accesses in that case and map context aware breakpoints for the vCPU to different numbered breakpoints for the pCPU, but will maintain the offset in context aware breakpoints. Signed-off-by: Reiji Watanabe <reijiw@google.com> --- arch/arm64/include/asm/sysreg.h | 9 +- arch/arm64/kvm/sys_regs.c | 402 ++++++++++++++++++++++++++++++-- 2 files changed, 394 insertions(+), 17 deletions(-)