Message ID | 20230317050637.766317-7-jingzhangos@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Support writable CPU ID registers from userspace | expand |
On Fri, 17 Mar 2023 05:06:37 +0000, Jing Zhang <jingzhangos@google.com> wrote: > > Save KVM sanitised ID register value in ID descriptor (kvm_sys_val). Why do we need to store a separate value *beside* the sanitised value the kernel already holds? > Add an init callback for every ID register to setup kvm_sys_val. Same question. > All per VCPU sanitizations are still handled on the fly during ID > register read and write from userspace. > An arm64_ftr_bits array is used to indicate writable feature fields. > > Refactor writings for ID_AA64PFR0_EL1.[CSV2|CSV3], > ID_AA64DFR0_EL1.PMUVer and ID_DFR0_ELF.PerfMon based on utilities > introduced by ID register descriptor. > > No functional change intended. > > Co-developed-by: Reiji Watanabe <reijiw@google.com> > Signed-off-by: Reiji Watanabe <reijiw@google.com> > Signed-off-by: Jing Zhang <jingzhangos@google.com> > --- > arch/arm64/include/asm/cpufeature.h | 25 +++ > arch/arm64/include/asm/kvm_host.h | 2 +- > arch/arm64/kernel/cpufeature.c | 26 +-- > arch/arm64/kvm/arm.c | 2 +- > arch/arm64/kvm/id_regs.c | 325 ++++++++++++++++++++-------- > arch/arm64/kvm/sys_regs.c | 3 +- > arch/arm64/kvm/sys_regs.h | 2 +- > 7 files changed, 261 insertions(+), 124 deletions(-) > > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h > index fc2c739f48f1..493ec530eefc 100644 > --- a/arch/arm64/include/asm/cpufeature.h > +++ b/arch/arm64/include/asm/cpufeature.h > @@ -64,6 +64,30 @@ struct arm64_ftr_bits { > s64 safe_val; /* safe value for FTR_EXACT features */ > }; > > +#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > + { \ > + .sign = SIGNED, \ > + .visible = VISIBLE, \ > + .strict = STRICT, \ > + .type = TYPE, \ > + .shift = SHIFT, \ > + .width = WIDTH, \ > + .safe_val = SAFE_VAL, \ > + } > + > +/* Define a feature with unsigned values */ > +#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > + __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > + > +/* Define a feature with a signed value */ > +#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > + __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > + > +#define ARM64_FTR_END \ > + { \ > + .width = 0, \ > + } > + > /* > * Describe the early feature override to the core override code: > * > @@ -911,6 +935,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) > return 8; > } > > +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); > struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); > > extern struct arm64_ftr_override id_aa64mmfr1_override; > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 102860ba896d..aa83dd79e7ff 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -1013,7 +1013,7 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, > long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, > struct kvm_arm_copy_mte_tags *copy_tags); > > -void kvm_arm_set_default_id_regs(struct kvm *kvm); > +void kvm_arm_init_id_regs(struct kvm *kvm); > > /* Guest/host FPSIMD coordination helpers */ > int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 23bd2a926b74..e18848ee4b98 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -139,30 +139,6 @@ void dump_cpu_features(void) > pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); > } > > -#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > - { \ > - .sign = SIGNED, \ > - .visible = VISIBLE, \ > - .strict = STRICT, \ > - .type = TYPE, \ > - .shift = SHIFT, \ > - .width = WIDTH, \ > - .safe_val = SAFE_VAL, \ > - } > - > -/* Define a feature with unsigned values */ > -#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > - __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > - > -/* Define a feature with a signed value */ > -#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > - __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > - > -#define ARM64_FTR_END \ > - { \ > - .width = 0, \ > - } > - > static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); > > static bool __system_matches_cap(unsigned int n); > @@ -790,7 +766,7 @@ static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, > return reg; > } > > -static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, > +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, > s64 cur) > { > s64 ret = 0; > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index fb2de2cb98cb..e539d9ca9d01 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -135,7 +135,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > /* The maximum number of VCPUs is limited by the host's GIC model */ > kvm->max_vcpus = kvm_arm_default_max_vcpus(); > > - kvm_arm_set_default_id_regs(kvm); > + kvm_arm_init_id_regs(kvm); How about picking the name once and for all from the first patch? > kvm_arm_init_hypercalls(kvm); > > return 0; > diff --git a/arch/arm64/kvm/id_regs.c b/arch/arm64/kvm/id_regs.c > index 9956c99d20f7..726b810b6e06 100644 > --- a/arch/arm64/kvm/id_regs.c > +++ b/arch/arm64/kvm/id_regs.c > @@ -18,10 +18,88 @@ > > #include "sys_regs.h" > > +/* > + * Number of entries in id_reg_desc's ftr_bits[] (Number of 4 bits fields > + * in 64 bit register + 1 entry for a terminator entry). > + */ > +#define FTR_FIELDS_NUM 17 Please see SMFR0_EL1 for an example of a sysreg that doesn't follow the 4bits-per-field format. I expect to see more of those in the future. And given that this is always a variable set of fields, why do we need to define this as a fixed array that only bloats the structure? I'd rather see a variable array in a side structure. > + > struct id_reg_desc { > const struct sys_reg_desc reg_desc; > + /* > + * KVM sanitised ID register value. > + * It is the default value for per VM emulated ID register. > + */ > + u64 kvm_sys_val; > + /* > + * Used to validate the ID register values with arm64_check_features(). > + * The last item in the array must be terminated by an item whose > + * width field is zero as that is expected by arm64_check_features(). > + * Only feature bits defined in this array are writable. > + */ > + struct arm64_ftr_bits ftr_bits[FTR_FIELDS_NUM]; > + > + /* > + * Basically init() is used to setup the KVM sanitised value > + * stored in kvm_sys_val. > + */ > + void (*init)(struct id_reg_desc *idr); Given that this callback only builds the value from the sanitised view, and that it is very cheap (only a set of masking), why do we bother keeping the value around? It would also allow this structure to be kept *const*, something that is extremely desirable. Also, why do we need an init() method when each sysreg already have a reset() method? Surely this should be the same thing... My gut feeling is that we should only have a callback returning the limit value computed on the fly. > }; > > +static struct id_reg_desc id_reg_descs[]; > + > +/** > + * arm64_check_features() - Check if a feature register value constitutes > + * a subset of features indicated by @limit. > + * > + * @ftrp: Pointer to an array of arm64_ftr_bits. It must be terminated by > + * an item whose width field is zero. > + * @val: The feature register value to check > + * @limit: The limit value of the feature register > + * > + * This function will check if each feature field of @val is the "safe" value > + * against @limit based on @ftrp[], each of which specifies the target field > + * (shift, width), whether or not the field is for a signed value (sign), > + * how the field is determined to be "safe" (type), and the safe value > + * (safe_val) when type == FTR_EXACT (safe_val won't be used by this > + * function when type != FTR_EXACT). Any other fields in arm64_ftr_bits > + * won't be used by this function. If a field value in @val is the same > + * as the one in @limit, it is always considered the safe value regardless > + * of the type. For register fields that are not in @ftrp[], only the value > + * in @limit is considered the safe value. > + * > + * Return: 0 if all the fields are safe. Otherwise, return negative errno. > + */ > +static int arm64_check_features(const struct arm64_ftr_bits *ftrp, u64 val, u64 limit) > +{ > + u64 mask = 0; > + > + for (; ftrp->width; ftrp++) { > + s64 f_val, f_lim, safe_val; > + > + f_val = arm64_ftr_value(ftrp, val); > + f_lim = arm64_ftr_value(ftrp, limit); > + mask |= arm64_ftr_mask(ftrp); > + > + if (f_val == f_lim) > + safe_val = f_val; > + else > + safe_val = arm64_ftr_safe_value(ftrp, f_val, f_lim); > + > + if (safe_val != f_val) > + return -E2BIG; > + } > + > + /* > + * For fields that are not indicated in ftrp, values in limit are the > + * safe values. > + */ > + if ((val & ~mask) != (limit & ~mask)) > + return -E2BIG; > + > + return 0; > +} I have the feeling that the core code already implements something similar... > + > static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) > { > if (kvm_vcpu_has_pmu(vcpu)) > @@ -67,7 +145,6 @@ u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) > case SYS_ID_AA64PFR0_EL1: > if (!vcpu_has_sve(vcpu)) > val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE); > - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); > if (kvm_vgic_global_state.type == VGIC_V3) { > val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); > val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); > @@ -94,15 +171,10 @@ u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) > val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); > break; > case SYS_ID_AA64DFR0_EL1: > - /* Limit debug to ARMv8.0 */ > - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); > - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); > /* Set PMUver to the required version */ > val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), > vcpu_pmuver(vcpu)); > - /* Hide SPE from guests */ > - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); > break; > case SYS_ID_DFR0_EL1: > val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > @@ -161,9 +233,15 @@ static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, > static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, > u64 val) > { > - /* This is what we mean by invariant: you can't change it. */ > - if (val != read_id_reg(vcpu, rd)) > - return -EINVAL; > + int ret; > + int id = reg_to_encoding(rd); > + > + ret = arm64_check_features(id_reg_descs[IDREG_IDX(id)].ftr_bits, val, > + id_reg_descs[IDREG_IDX(id)].kvm_sys_val); > + if (ret) > + return ret; > + > + vcpu->kvm->arch.id_regs[IDREG_IDX(id)] = val; > > return 0; > } > @@ -197,12 +275,47 @@ static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu, > return id_visibility(vcpu, r); > } > > +static void init_id_reg(struct id_reg_desc *idr) > +{ > + idr->kvm_sys_val = read_sanitised_ftr_reg(reg_to_encoding(&idr->reg_desc)); > +} > + > +static void init_id_aa64pfr0_el1(struct id_reg_desc *idr) > +{ > + u64 val; > + u32 id = reg_to_encoding(&idr->reg_desc); > + > + val = read_sanitised_ftr_reg(id); > + /* > + * The default is to expose CSV2 == 1 if the HW isn't affected. > + * Although this is a per-CPU feature, we make it global because > + * asymmetric systems are just a nuisance. > + * > + * Userspace can override this as long as it doesn't promise > + * the impossible. > + */ > + if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) { > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2); > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), 1); > + } > + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) { > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3); > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), 1); > + } > + > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); > + > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); What? Why? What if I have a GICv2? What if I have no GIC? > + > + idr->kvm_sys_val = val; > +} How does this compose with the runtime feature reduction that takes place in access_nested_id_reg()? > + > static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, > const struct sys_reg_desc *rd, > u64 val) > { > u8 csv2, csv3; > - u64 sval = val; > > /* > * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as > @@ -220,16 +333,29 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, > (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) > return -EINVAL; > > - /* We can only differ with CSV[23], and anything else is an error */ > - val ^= read_id_reg(vcpu, rd); > - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | > - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); > - if (val) > - return -EINVAL; > + return set_id_reg(vcpu, rd, val); > +} > > - vcpu->kvm->arch.id_regs[IDREG_IDX(reg_to_encoding(rd))] = sval; > +static void init_id_aa64dfr0_el1(struct id_reg_desc *idr) > +{ > + u64 val; > + u32 id = reg_to_encoding(&idr->reg_desc); > > - return 0; > + val = read_sanitised_ftr_reg(id); > + /* Limit debug to ARMv8.0 */ > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); > + /* > + * Initialise the default PMUver before there is a chance to > + * create an actual PMU. > + */ > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), > + kvm_arm_pmu_get_pmuver_limit()); > + /* Hide SPE from guests */ > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); > + > + idr->kvm_sys_val = val; > } > > static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, > @@ -238,6 +364,7 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, > { > u8 pmuver, host_pmuver; > bool valid_pmu; > + int ret; > > host_pmuver = kvm_arm_pmu_get_pmuver_limit(); > > @@ -257,39 +384,58 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, > if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) > return -EINVAL; > > - /* We can only differ with PMUver, and anything else is an error */ > - val ^= read_id_reg(vcpu, rd); > - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > - if (val) > - return -EINVAL; > - > if (valid_pmu) { > mutex_lock(&vcpu->kvm->lock); > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= > - ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= > - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), pmuver); > + ret = set_id_reg(vcpu, rd, val); > + if (ret) > + return ret; Next stop, Deadlock City, our final destination. > > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] &= > ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] |= FIELD_PREP( > ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), pmuver_to_perfmon(pmuver)); > mutex_unlock(&vcpu->kvm->lock); > - } else if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) { > - set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > } else { > - clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > + /* We can only differ with PMUver, and anything else is an error */ > + val ^= read_id_reg(vcpu, rd); > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > + if (val) > + return -EINVAL; I find it very odd that you add all this infrastructure to check for writable fields, and yet have to keep this comparison. It makes me thing that the data structures are not necessarily the right ones. > + > + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) > + set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > + else > + clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > + > } > > return 0; > } > > +static void init_id_dfr0_el1(struct id_reg_desc *idr) > +{ > + u64 val; > + u32 id = reg_to_encoding(&idr->reg_desc); > + > + val = read_sanitised_ftr_reg(id); > + /* > + * Initialise the default PMUver before there is a chance to > + * create an actual PMU. > + */ > + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), > + kvm_arm_pmu_get_pmuver_limit()); > + > + idr->kvm_sys_val = val; > +} > + > static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > const struct sys_reg_desc *rd, > u64 val) > { > u8 perfmon, host_perfmon; > bool valid_pmu; > + int ret; > > host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); > > @@ -310,42 +456,46 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) > return -EINVAL; > > - /* We can only differ with PerfMon, and anything else is an error */ > - val ^= read_id_reg(vcpu, rd); > - val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > - if (val) > - return -EINVAL; > - > if (valid_pmu) { > mutex_lock(&vcpu->kvm->lock); > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] &= > - ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] |= FIELD_PREP( > - ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), perfmon); > + ret = set_id_reg(vcpu, rd, val); > + if (ret) > + return ret; Same player, shoot again. > > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= > ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= FIELD_PREP( > ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), perfmon_to_pmuver(perfmon)); > mutex_unlock(&vcpu->kvm->lock); > - } else if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) { > - set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > } else { > - clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > + /* We can only differ with PerfMon, and anything else is an error */ > + val ^= read_id_reg(vcpu, rd); > + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > + if (val) > + return -EINVAL; > + > + if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) > + set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > + else > + clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > } Same remarks. > > return 0; > } > > /* sys_reg_desc initialiser for known cpufeature ID registers */ > +#define SYS_DESC_SANITISED(name) { \ > + SYS_DESC(SYS_##name), \ > + .access = access_id_reg, \ > + .get_user = get_id_reg, \ > + .set_user = set_id_reg, \ > + .visibility = id_visibility, \ > +} > + > #define ID_SANITISED(name) { \ > - .reg_desc = { \ > - SYS_DESC(SYS_##name), \ > - .access = access_id_reg, \ > - .get_user = get_id_reg, \ > - .set_user = set_id_reg, \ > - .visibility = id_visibility, \ > - }, \ > + .reg_desc = SYS_DESC_SANITISED(name), \ > + .ftr_bits = { ARM64_FTR_END, }, \ > + .init = init_id_reg, \ > } > > /* sys_reg_desc initialiser for known cpufeature ID registers */ > @@ -357,6 +507,8 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > .set_user = set_id_reg, \ > .visibility = aa32_id_visibility, \ > }, \ > + .ftr_bits = { ARM64_FTR_END, }, \ > + .init = init_id_reg, \ > } > > /* > @@ -372,6 +524,7 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > .set_user = set_id_reg, \ > .visibility = raz_visibility \ > }, \ > + .ftr_bits = { ARM64_FTR_END, }, \ > } > > /* > @@ -387,9 +540,10 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > .set_user = set_id_reg, \ > .visibility = raz_visibility, \ > }, \ > + .ftr_bits = { ARM64_FTR_END, }, \ > } > > -static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > +static struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > /* > * ID regs: all ID_SANITISED() entries here must have corresponding > * entries in arm64_ftr_regs[]. > @@ -405,6 +559,11 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > .get_user = get_id_reg, > .set_user = set_id_dfr0_el1, > .visibility = aa32_id_visibility, }, > + .ftr_bits = { > + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, > + ID_DFR0_EL1_PerfMon_SHIFT, ID_DFR0_EL1_PerfMon_WIDTH, 0), > + ARM64_FTR_END, }, > + .init = init_id_dfr0_el1, > }, > ID_HIDDEN(ID_AFR0_EL1), > AA32_ID_SANITISED(ID_MMFR0_EL1), > @@ -439,6 +598,13 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > .access = access_id_reg, > .get_user = get_id_reg, > .set_user = set_id_aa64pfr0_el1, }, > + .ftr_bits = { > + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, > + ID_AA64PFR0_EL1_CSV2_SHIFT, ID_AA64PFR0_EL1_CSV2_WIDTH, 0), > + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, > + ID_AA64PFR0_EL1_CSV3_SHIFT, ID_AA64PFR0_EL1_CSV3_WIDTH, 0), > + ARM64_FTR_END, }, It really strikes me that you are 100% duplicating data that is already in ftr_id_aa64pfr0[]. Only that this is a subset of the existing data. You could instead have your 'init()' callback return a pair of values: the default value based on the sanitised one, and a 64bit mask. At this stage, you'll realise that this looks a lot like the feature override, and that you should be able to reuse some of the existing infrastructure. Thanks, M.
Hi Marc, On Mon, Mar 27, 2023 at 6:34 AM Marc Zyngier <maz@kernel.org> wrote: > > On Fri, 17 Mar 2023 05:06:37 +0000, > Jing Zhang <jingzhangos@google.com> wrote: > > > > Save KVM sanitised ID register value in ID descriptor (kvm_sys_val). > > Why do we need to store a separate value *beside* the sanitised value > the kernel already holds? > > > Add an init callback for every ID register to setup kvm_sys_val. > > Same question. It is used to store the value further sanitised by KVM, which might be different from the one held by kernel. But as you suggested later, this isn't necessary, we can create KVM sanitised value on the fly since it is cheap. > > > All per VCPU sanitizations are still handled on the fly during ID > > register read and write from userspace. > > An arm64_ftr_bits array is used to indicate writable feature fields. > > > > Refactor writings for ID_AA64PFR0_EL1.[CSV2|CSV3], > > ID_AA64DFR0_EL1.PMUVer and ID_DFR0_ELF.PerfMon based on utilities > > introduced by ID register descriptor. > > > > No functional change intended. > > > > Co-developed-by: Reiji Watanabe <reijiw@google.com> > > Signed-off-by: Reiji Watanabe <reijiw@google.com> > > Signed-off-by: Jing Zhang <jingzhangos@google.com> > > --- > > arch/arm64/include/asm/cpufeature.h | 25 +++ > > arch/arm64/include/asm/kvm_host.h | 2 +- > > arch/arm64/kernel/cpufeature.c | 26 +-- > > arch/arm64/kvm/arm.c | 2 +- > > arch/arm64/kvm/id_regs.c | 325 ++++++++++++++++++++-------- > > arch/arm64/kvm/sys_regs.c | 3 +- > > arch/arm64/kvm/sys_regs.h | 2 +- > > 7 files changed, 261 insertions(+), 124 deletions(-) > > > > diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h > > index fc2c739f48f1..493ec530eefc 100644 > > --- a/arch/arm64/include/asm/cpufeature.h > > +++ b/arch/arm64/include/asm/cpufeature.h > > @@ -64,6 +64,30 @@ struct arm64_ftr_bits { > > s64 safe_val; /* safe value for FTR_EXACT features */ > > }; > > > > +#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > > + { \ > > + .sign = SIGNED, \ > > + .visible = VISIBLE, \ > > + .strict = STRICT, \ > > + .type = TYPE, \ > > + .shift = SHIFT, \ > > + .width = WIDTH, \ > > + .safe_val = SAFE_VAL, \ > > + } > > + > > +/* Define a feature with unsigned values */ > > +#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > > + __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > > + > > +/* Define a feature with a signed value */ > > +#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > > + __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > > + > > +#define ARM64_FTR_END \ > > + { \ > > + .width = 0, \ > > + } > > + > > /* > > * Describe the early feature override to the core override code: > > * > > @@ -911,6 +935,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) > > return 8; > > } > > > > +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); > > struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); > > > > extern struct arm64_ftr_override id_aa64mmfr1_override; > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index 102860ba896d..aa83dd79e7ff 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -1013,7 +1013,7 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, > > long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, > > struct kvm_arm_copy_mte_tags *copy_tags); > > > > -void kvm_arm_set_default_id_regs(struct kvm *kvm); > > +void kvm_arm_init_id_regs(struct kvm *kvm); > > > > /* Guest/host FPSIMD coordination helpers */ > > int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); > > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > > index 23bd2a926b74..e18848ee4b98 100644 > > --- a/arch/arm64/kernel/cpufeature.c > > +++ b/arch/arm64/kernel/cpufeature.c > > @@ -139,30 +139,6 @@ void dump_cpu_features(void) > > pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); > > } > > > > -#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > > - { \ > > - .sign = SIGNED, \ > > - .visible = VISIBLE, \ > > - .strict = STRICT, \ > > - .type = TYPE, \ > > - .shift = SHIFT, \ > > - .width = WIDTH, \ > > - .safe_val = SAFE_VAL, \ > > - } > > - > > -/* Define a feature with unsigned values */ > > -#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > > - __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > > - > > -/* Define a feature with a signed value */ > > -#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ > > - __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) > > - > > -#define ARM64_FTR_END \ > > - { \ > > - .width = 0, \ > > - } > > - > > static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); > > > > static bool __system_matches_cap(unsigned int n); > > @@ -790,7 +766,7 @@ static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, > > return reg; > > } > > > > -static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, > > +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, > > s64 cur) > > { > > s64 ret = 0; > > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > > index fb2de2cb98cb..e539d9ca9d01 100644 > > --- a/arch/arm64/kvm/arm.c > > +++ b/arch/arm64/kvm/arm.c > > @@ -135,7 +135,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) > > /* The maximum number of VCPUs is limited by the host's GIC model */ > > kvm->max_vcpus = kvm_arm_default_max_vcpus(); > > > > - kvm_arm_set_default_id_regs(kvm); > > + kvm_arm_init_id_regs(kvm); > > How about picking the name once and for all from the first patch? Sure, will do. > > > kvm_arm_init_hypercalls(kvm); > > > > return 0; > > diff --git a/arch/arm64/kvm/id_regs.c b/arch/arm64/kvm/id_regs.c > > index 9956c99d20f7..726b810b6e06 100644 > > --- a/arch/arm64/kvm/id_regs.c > > +++ b/arch/arm64/kvm/id_regs.c > > @@ -18,10 +18,88 @@ > > > > #include "sys_regs.h" > > > > +/* > > + * Number of entries in id_reg_desc's ftr_bits[] (Number of 4 bits fields > > + * in 64 bit register + 1 entry for a terminator entry). > > + */ > > +#define FTR_FIELDS_NUM 17 > > Please see SMFR0_EL1 for an example of a sysreg that doesn't follow > the 4bits-per-field format. I expect to see more of those in the > future. > > And given that this is always a variable set of fields, why do we need > to define this as a fixed array that only bloats the structure? I'd > rather see a variable array in a side structure. > Yes, it makes more sense to use a variable array here. Do you have any suggestions? xarray? > > + > > struct id_reg_desc { > > const struct sys_reg_desc reg_desc; > > + /* > > + * KVM sanitised ID register value. > > + * It is the default value for per VM emulated ID register. > > + */ > > + u64 kvm_sys_val; > > + /* > > + * Used to validate the ID register values with arm64_check_features(). > > + * The last item in the array must be terminated by an item whose > > + * width field is zero as that is expected by arm64_check_features(). > > + * Only feature bits defined in this array are writable. > > + */ > > + struct arm64_ftr_bits ftr_bits[FTR_FIELDS_NUM]; > > + > > + /* > > + * Basically init() is used to setup the KVM sanitised value > > + * stored in kvm_sys_val. > > + */ > > + void (*init)(struct id_reg_desc *idr); > > Given that this callback only builds the value from the sanitised > view, and that it is very cheap (only a set of masking), why do we > bother keeping the value around? It would also allow this structure to > be kept *const*, something that is extremely desirable. > > Also, why do we need an init() method when each sysreg already have a > reset() method? Surely this should be the same thing... > > My gut feeling is that we should only have a callback returning the > limit value computed on the fly. Sure, will use a callback to return the limit value on the fly. > > > }; > > > > +static struct id_reg_desc id_reg_descs[]; > > + > > +/** > > + * arm64_check_features() - Check if a feature register value constitutes > > + * a subset of features indicated by @limit. > > + * > > + * @ftrp: Pointer to an array of arm64_ftr_bits. It must be terminated by > > + * an item whose width field is zero. > > + * @val: The feature register value to check > > + * @limit: The limit value of the feature register > > + * > > + * This function will check if each feature field of @val is the "safe" value > > + * against @limit based on @ftrp[], each of which specifies the target field > > + * (shift, width), whether or not the field is for a signed value (sign), > > + * how the field is determined to be "safe" (type), and the safe value > > + * (safe_val) when type == FTR_EXACT (safe_val won't be used by this > > + * function when type != FTR_EXACT). Any other fields in arm64_ftr_bits > > + * won't be used by this function. If a field value in @val is the same > > + * as the one in @limit, it is always considered the safe value regardless > > + * of the type. For register fields that are not in @ftrp[], only the value > > + * in @limit is considered the safe value. > > + * > > + * Return: 0 if all the fields are safe. Otherwise, return negative errno. > > + */ > > +static int arm64_check_features(const struct arm64_ftr_bits *ftrp, u64 val, u64 limit) > > +{ > > + u64 mask = 0; > > + > > + for (; ftrp->width; ftrp++) { > > + s64 f_val, f_lim, safe_val; > > + > > + f_val = arm64_ftr_value(ftrp, val); > > + f_lim = arm64_ftr_value(ftrp, limit); > > + mask |= arm64_ftr_mask(ftrp); > > + > > + if (f_val == f_lim) > > + safe_val = f_val; > > + else > > + safe_val = arm64_ftr_safe_value(ftrp, f_val, f_lim); > > + > > + if (safe_val != f_val) > > + return -E2BIG; > > + } > > + > > + /* > > + * For fields that are not indicated in ftrp, values in limit are the > > + * safe values. > > + */ > > + if ((val & ~mask) != (limit & ~mask)) > > + return -E2BIG; > > + > > + return 0; > > +} > > I have the feeling that the core code already implements something > similar... Right, it is similar to update_cpu_ftr_reg() in cpufeature.c. But that function can't meet the needs here. > > > + > > static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) > > { > > if (kvm_vcpu_has_pmu(vcpu)) > > @@ -67,7 +145,6 @@ u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) > > case SYS_ID_AA64PFR0_EL1: > > if (!vcpu_has_sve(vcpu)) > > val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE); > > - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); > > if (kvm_vgic_global_state.type == VGIC_V3) { > > val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); > > val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); > > @@ -94,15 +171,10 @@ u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) > > val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); > > break; > > case SYS_ID_AA64DFR0_EL1: > > - /* Limit debug to ARMv8.0 */ > > - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); > > - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); > > /* Set PMUver to the required version */ > > val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > > val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), > > vcpu_pmuver(vcpu)); > > - /* Hide SPE from guests */ > > - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); > > break; > > case SYS_ID_DFR0_EL1: > > val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > > @@ -161,9 +233,15 @@ static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, > > static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, > > u64 val) > > { > > - /* This is what we mean by invariant: you can't change it. */ > > - if (val != read_id_reg(vcpu, rd)) > > - return -EINVAL; > > + int ret; > > + int id = reg_to_encoding(rd); > > + > > + ret = arm64_check_features(id_reg_descs[IDREG_IDX(id)].ftr_bits, val, > > + id_reg_descs[IDREG_IDX(id)].kvm_sys_val); > > + if (ret) > > + return ret; > > + > > + vcpu->kvm->arch.id_regs[IDREG_IDX(id)] = val; > > > > return 0; > > } > > @@ -197,12 +275,47 @@ static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu, > > return id_visibility(vcpu, r); > > } > > > > +static void init_id_reg(struct id_reg_desc *idr) > > +{ > > + idr->kvm_sys_val = read_sanitised_ftr_reg(reg_to_encoding(&idr->reg_desc)); > > +} > > + > > +static void init_id_aa64pfr0_el1(struct id_reg_desc *idr) > > +{ > > + u64 val; > > + u32 id = reg_to_encoding(&idr->reg_desc); > > + > > + val = read_sanitised_ftr_reg(id); > > + /* > > + * The default is to expose CSV2 == 1 if the HW isn't affected. > > + * Although this is a per-CPU feature, we make it global because > > + * asymmetric systems are just a nuisance. > > + * > > + * Userspace can override this as long as it doesn't promise > > + * the impossible. > > + */ > > + if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) { > > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2); > > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), 1); > > + } > > + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) { > > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3); > > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), 1); > > + } > > + > > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); > > + > > + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); > > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); > > What? Why? What if I have a GICv2? What if I have no GIC? :-) Forgot how these two lines got in here. Will remove them. > > > + > > + idr->kvm_sys_val = val; > > +} > > How does this compose with the runtime feature reduction that takes > place in access_nested_id_reg()? kvm_sys_val is used as the initial value for the per VM idregs, which is passed into access_nested_id_reg in function access_id_reg(). > > > + > > static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, > > const struct sys_reg_desc *rd, > > u64 val) > > { > > u8 csv2, csv3; > > - u64 sval = val; > > > > /* > > * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as > > @@ -220,16 +333,29 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, > > (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) > > return -EINVAL; > > > > - /* We can only differ with CSV[23], and anything else is an error */ > > - val ^= read_id_reg(vcpu, rd); > > - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | > > - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); > > - if (val) > > - return -EINVAL; > > + return set_id_reg(vcpu, rd, val); > > +} > > > > - vcpu->kvm->arch.id_regs[IDREG_IDX(reg_to_encoding(rd))] = sval; > > +static void init_id_aa64dfr0_el1(struct id_reg_desc *idr) > > +{ > > + u64 val; > > + u32 id = reg_to_encoding(&idr->reg_desc); > > > > - return 0; > > + val = read_sanitised_ftr_reg(id); > > + /* Limit debug to ARMv8.0 */ > > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); > > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); > > + /* > > + * Initialise the default PMUver before there is a chance to > > + * create an actual PMU. > > + */ > > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), > > + kvm_arm_pmu_get_pmuver_limit()); > > + /* Hide SPE from guests */ > > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); > > + > > + idr->kvm_sys_val = val; > > } > > > > static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, > > @@ -238,6 +364,7 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, > > { > > u8 pmuver, host_pmuver; > > bool valid_pmu; > > + int ret; > > > > host_pmuver = kvm_arm_pmu_get_pmuver_limit(); > > > > @@ -257,39 +384,58 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, > > if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) > > return -EINVAL; > > > > - /* We can only differ with PMUver, and anything else is an error */ > > - val ^= read_id_reg(vcpu, rd); > > - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > > - if (val) > > - return -EINVAL; > > - > > if (valid_pmu) { > > mutex_lock(&vcpu->kvm->lock); > > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= > > - ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= > > - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), pmuver); > > + ret = set_id_reg(vcpu, rd, val); > > + if (ret) > > + return ret; > > Next stop, Deadlock City, our final destination. Will fix it. > > > > > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] &= > > ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] |= FIELD_PREP( > > ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), pmuver_to_perfmon(pmuver)); > > mutex_unlock(&vcpu->kvm->lock); > > - } else if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) { > > - set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > } else { > > - clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > + /* We can only differ with PMUver, and anything else is an error */ > > + val ^= read_id_reg(vcpu, rd); > > + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > > + if (val) > > + return -EINVAL; > > I find it very odd that you add all this infrastructure to check for > writable fields, and yet have to keep this comparison. It makes me > thing that the data structures are not necessarily the right ones. This comparison is still here because for this patch, we don't allow the writable for the whole ID register and in the path of invalid pmu, the set_id_reg() (This function will do all the checks) is not called. This comparison can be removed as long as the whole ID reg is enabled writable. > > > + > > + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) > > + set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > + else > > + clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > + > > } > > > > return 0; > > } > > > > +static void init_id_dfr0_el1(struct id_reg_desc *idr) > > +{ > > + u64 val; > > + u32 id = reg_to_encoding(&idr->reg_desc); > > + > > + val = read_sanitised_ftr_reg(id); > > + /* > > + * Initialise the default PMUver before there is a chance to > > + * create an actual PMU. > > + */ > > + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > > + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), > > + kvm_arm_pmu_get_pmuver_limit()); > > + > > + idr->kvm_sys_val = val; > > +} > > + > > static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > > const struct sys_reg_desc *rd, > > u64 val) > > { > > u8 perfmon, host_perfmon; > > bool valid_pmu; > > + int ret; > > > > host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); > > > > @@ -310,42 +456,46 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > > if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) > > return -EINVAL; > > > > - /* We can only differ with PerfMon, and anything else is an error */ > > - val ^= read_id_reg(vcpu, rd); > > - val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > > - if (val) > > - return -EINVAL; > > - > > if (valid_pmu) { > > mutex_lock(&vcpu->kvm->lock); > > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] &= > > - ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > > - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] |= FIELD_PREP( > > - ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), perfmon); > > + ret = set_id_reg(vcpu, rd, val); > > + if (ret) > > + return ret; > > Same player, shoot again. Will fix it. > > > > > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= > > ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); > > vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= FIELD_PREP( > > ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), perfmon_to_pmuver(perfmon)); > > mutex_unlock(&vcpu->kvm->lock); > > - } else if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) { > > - set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > } else { > > - clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > + /* We can only differ with PerfMon, and anything else is an error */ > > + val ^= read_id_reg(vcpu, rd); > > + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); > > + if (val) > > + return -EINVAL; > > + > > + if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) > > + set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > + else > > + clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); > > } > > Same remarks. > > > > > return 0; > > } > > > > /* sys_reg_desc initialiser for known cpufeature ID registers */ > > +#define SYS_DESC_SANITISED(name) { \ > > + SYS_DESC(SYS_##name), \ > > + .access = access_id_reg, \ > > + .get_user = get_id_reg, \ > > + .set_user = set_id_reg, \ > > + .visibility = id_visibility, \ > > +} > > + > > #define ID_SANITISED(name) { \ > > - .reg_desc = { \ > > - SYS_DESC(SYS_##name), \ > > - .access = access_id_reg, \ > > - .get_user = get_id_reg, \ > > - .set_user = set_id_reg, \ > > - .visibility = id_visibility, \ > > - }, \ > > + .reg_desc = SYS_DESC_SANITISED(name), \ > > + .ftr_bits = { ARM64_FTR_END, }, \ > > + .init = init_id_reg, \ > > } > > > > /* sys_reg_desc initialiser for known cpufeature ID registers */ > > @@ -357,6 +507,8 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > > .set_user = set_id_reg, \ > > .visibility = aa32_id_visibility, \ > > }, \ > > + .ftr_bits = { ARM64_FTR_END, }, \ > > + .init = init_id_reg, \ > > } > > > > /* > > @@ -372,6 +524,7 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > > .set_user = set_id_reg, \ > > .visibility = raz_visibility \ > > }, \ > > + .ftr_bits = { ARM64_FTR_END, }, \ > > } > > > > /* > > @@ -387,9 +540,10 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, > > .set_user = set_id_reg, \ > > .visibility = raz_visibility, \ > > }, \ > > + .ftr_bits = { ARM64_FTR_END, }, \ > > } > > > > -static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > > +static struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > > /* > > * ID regs: all ID_SANITISED() entries here must have corresponding > > * entries in arm64_ftr_regs[]. > > @@ -405,6 +559,11 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > > .get_user = get_id_reg, > > .set_user = set_id_dfr0_el1, > > .visibility = aa32_id_visibility, }, > > + .ftr_bits = { > > + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, > > + ID_DFR0_EL1_PerfMon_SHIFT, ID_DFR0_EL1_PerfMon_WIDTH, 0), > > + ARM64_FTR_END, }, > > + .init = init_id_dfr0_el1, > > }, > > ID_HIDDEN(ID_AFR0_EL1), > > AA32_ID_SANITISED(ID_MMFR0_EL1), > > @@ -439,6 +598,13 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { > > .access = access_id_reg, > > .get_user = get_id_reg, > > .set_user = set_id_aa64pfr0_el1, }, > > + .ftr_bits = { > > + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, > > + ID_AA64PFR0_EL1_CSV2_SHIFT, ID_AA64PFR0_EL1_CSV2_WIDTH, 0), > > + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, > > + ID_AA64PFR0_EL1_CSV3_SHIFT, ID_AA64PFR0_EL1_CSV3_WIDTH, 0), > > + ARM64_FTR_END, }, > > It really strikes me that you are 100% duplicating data that is > already in ftr_id_aa64pfr0[]. Only that this is a subset of the > existing data. > > You could instead have your 'init()' callback return a pair of values: > the default value based on the sanitised one, and a 64bit mask. At > this stage, you'll realise that this looks a lot like the feature > override, and that you should be able to reuse some of the existing > infrastructure. Sure, will try to improve this by your suggestion. > > Thanks, > > M. > > -- > Without deviation from the norm, progress is not possible. Thanks, Jing
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index fc2c739f48f1..493ec530eefc 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -64,6 +64,30 @@ struct arm64_ftr_bits { s64 safe_val; /* safe value for FTR_EXACT features */ }; +#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + { \ + .sign = SIGNED, \ + .visible = VISIBLE, \ + .strict = STRICT, \ + .type = TYPE, \ + .shift = SHIFT, \ + .width = WIDTH, \ + .safe_val = SAFE_VAL, \ + } + +/* Define a feature with unsigned values */ +#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + +/* Define a feature with a signed value */ +#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ + __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) + +#define ARM64_FTR_END \ + { \ + .width = 0, \ + } + /* * Describe the early feature override to the core override code: * @@ -911,6 +935,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) return 8; } +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur); struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id); extern struct arm64_ftr_override id_aa64mmfr1_override; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 102860ba896d..aa83dd79e7ff 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -1013,7 +1013,7 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, struct kvm_arm_copy_mte_tags *copy_tags); -void kvm_arm_set_default_id_regs(struct kvm *kvm); +void kvm_arm_init_id_regs(struct kvm *kvm); /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 23bd2a926b74..e18848ee4b98 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -139,30 +139,6 @@ void dump_cpu_features(void) pr_emerg("0x%*pb\n", ARM64_NCAPS, &cpu_hwcaps); } -#define __ARM64_FTR_BITS(SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - { \ - .sign = SIGNED, \ - .visible = VISIBLE, \ - .strict = STRICT, \ - .type = TYPE, \ - .shift = SHIFT, \ - .width = WIDTH, \ - .safe_val = SAFE_VAL, \ - } - -/* Define a feature with unsigned values */ -#define ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - __ARM64_FTR_BITS(FTR_UNSIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) - -/* Define a feature with a signed value */ -#define S_ARM64_FTR_BITS(VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) \ - __ARM64_FTR_BITS(FTR_SIGNED, VISIBLE, STRICT, TYPE, SHIFT, WIDTH, SAFE_VAL) - -#define ARM64_FTR_END \ - { \ - .width = 0, \ - } - static void cpu_enable_cnp(struct arm64_cpu_capabilities const *cap); static bool __system_matches_cap(unsigned int n); @@ -790,7 +766,7 @@ static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, return reg; } -static s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, +s64 arm64_ftr_safe_value(const struct arm64_ftr_bits *ftrp, s64 new, s64 cur) { s64 ret = 0; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fb2de2cb98cb..e539d9ca9d01 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -135,7 +135,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* The maximum number of VCPUs is limited by the host's GIC model */ kvm->max_vcpus = kvm_arm_default_max_vcpus(); - kvm_arm_set_default_id_regs(kvm); + kvm_arm_init_id_regs(kvm); kvm_arm_init_hypercalls(kvm); return 0; diff --git a/arch/arm64/kvm/id_regs.c b/arch/arm64/kvm/id_regs.c index 9956c99d20f7..726b810b6e06 100644 --- a/arch/arm64/kvm/id_regs.c +++ b/arch/arm64/kvm/id_regs.c @@ -18,10 +18,88 @@ #include "sys_regs.h" +/* + * Number of entries in id_reg_desc's ftr_bits[] (Number of 4 bits fields + * in 64 bit register + 1 entry for a terminator entry). + */ +#define FTR_FIELDS_NUM 17 + struct id_reg_desc { const struct sys_reg_desc reg_desc; + /* + * KVM sanitised ID register value. + * It is the default value for per VM emulated ID register. + */ + u64 kvm_sys_val; + /* + * Used to validate the ID register values with arm64_check_features(). + * The last item in the array must be terminated by an item whose + * width field is zero as that is expected by arm64_check_features(). + * Only feature bits defined in this array are writable. + */ + struct arm64_ftr_bits ftr_bits[FTR_FIELDS_NUM]; + + /* + * Basically init() is used to setup the KVM sanitised value + * stored in kvm_sys_val. + */ + void (*init)(struct id_reg_desc *idr); }; +static struct id_reg_desc id_reg_descs[]; + +/** + * arm64_check_features() - Check if a feature register value constitutes + * a subset of features indicated by @limit. + * + * @ftrp: Pointer to an array of arm64_ftr_bits. It must be terminated by + * an item whose width field is zero. + * @val: The feature register value to check + * @limit: The limit value of the feature register + * + * This function will check if each feature field of @val is the "safe" value + * against @limit based on @ftrp[], each of which specifies the target field + * (shift, width), whether or not the field is for a signed value (sign), + * how the field is determined to be "safe" (type), and the safe value + * (safe_val) when type == FTR_EXACT (safe_val won't be used by this + * function when type != FTR_EXACT). Any other fields in arm64_ftr_bits + * won't be used by this function. If a field value in @val is the same + * as the one in @limit, it is always considered the safe value regardless + * of the type. For register fields that are not in @ftrp[], only the value + * in @limit is considered the safe value. + * + * Return: 0 if all the fields are safe. Otherwise, return negative errno. + */ +static int arm64_check_features(const struct arm64_ftr_bits *ftrp, u64 val, u64 limit) +{ + u64 mask = 0; + + for (; ftrp->width; ftrp++) { + s64 f_val, f_lim, safe_val; + + f_val = arm64_ftr_value(ftrp, val); + f_lim = arm64_ftr_value(ftrp, limit); + mask |= arm64_ftr_mask(ftrp); + + if (f_val == f_lim) + safe_val = f_val; + else + safe_val = arm64_ftr_safe_value(ftrp, f_val, f_lim); + + if (safe_val != f_val) + return -E2BIG; + } + + /* + * For fields that are not indicated in ftrp, values in limit are the + * safe values. + */ + if ((val & ~mask) != (limit & ~mask)) + return -E2BIG; + + return 0; +} + static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) { if (kvm_vcpu_has_pmu(vcpu)) @@ -67,7 +145,6 @@ u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) case SYS_ID_AA64PFR0_EL1: if (!vcpu_has_sve(vcpu)) val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_SVE); - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); if (kvm_vgic_global_state.type == VGIC_V3) { val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); @@ -94,15 +171,10 @@ u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT); break; case SYS_ID_AA64DFR0_EL1: - /* Limit debug to ARMv8.0 */ - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); /* Set PMUver to the required version */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), vcpu_pmuver(vcpu)); - /* Hide SPE from guests */ - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; case SYS_ID_DFR0_EL1: val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); @@ -161,9 +233,15 @@ static int get_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { - /* This is what we mean by invariant: you can't change it. */ - if (val != read_id_reg(vcpu, rd)) - return -EINVAL; + int ret; + int id = reg_to_encoding(rd); + + ret = arm64_check_features(id_reg_descs[IDREG_IDX(id)].ftr_bits, val, + id_reg_descs[IDREG_IDX(id)].kvm_sys_val); + if (ret) + return ret; + + vcpu->kvm->arch.id_regs[IDREG_IDX(id)] = val; return 0; } @@ -197,12 +275,47 @@ static unsigned int aa32_id_visibility(const struct kvm_vcpu *vcpu, return id_visibility(vcpu, r); } +static void init_id_reg(struct id_reg_desc *idr) +{ + idr->kvm_sys_val = read_sanitised_ftr_reg(reg_to_encoding(&idr->reg_desc)); +} + +static void init_id_aa64pfr0_el1(struct id_reg_desc *idr) +{ + u64 val; + u32 id = reg_to_encoding(&idr->reg_desc); + + val = read_sanitised_ftr_reg(id); + /* + * The default is to expose CSV2 == 1 if the HW isn't affected. + * Although this is a per-CPU feature, we make it global because + * asymmetric systems are just a nuisance. + * + * Userspace can override this as long as it doesn't promise + * the impossible. + */ + if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), 1); + } + if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), 1); + } + + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_AMU); + + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), 1); + + idr->kvm_sys_val = val; +} + static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { u8 csv2, csv3; - u64 sval = val; /* * Allow AA64PFR0_EL1.CSV2 to be set from userspace as long as @@ -220,16 +333,29 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, (csv3 && arm64_get_meltdown_state() != SPECTRE_UNAFFECTED)) return -EINVAL; - /* We can only differ with CSV[23], and anything else is an error */ - val ^= read_id_reg(vcpu, rd); - val &= ~(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2) | - ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3)); - if (val) - return -EINVAL; + return set_id_reg(vcpu, rd, val); +} - vcpu->kvm->arch.id_regs[IDREG_IDX(reg_to_encoding(rd))] = sval; +static void init_id_aa64dfr0_el1(struct id_reg_desc *idr) +{ + u64 val; + u32 id = reg_to_encoding(&idr->reg_desc); - return 0; + val = read_sanitised_ftr_reg(id); + /* Limit debug to ARMv8.0 */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); + /* + * Initialise the default PMUver before there is a chance to + * create an actual PMU. + */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), + kvm_arm_pmu_get_pmuver_limit()); + /* Hide SPE from guests */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); + + idr->kvm_sys_val = val; } static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, @@ -238,6 +364,7 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, { u8 pmuver, host_pmuver; bool valid_pmu; + int ret; host_pmuver = kvm_arm_pmu_get_pmuver_limit(); @@ -257,39 +384,58 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) return -EINVAL; - /* We can only differ with PMUver, and anything else is an error */ - val ^= read_id_reg(vcpu, rd); - val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); - if (val) - return -EINVAL; - if (valid_pmu) { mutex_lock(&vcpu->kvm->lock); - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= - ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), pmuver); + ret = set_id_reg(vcpu, rd, val); + if (ret) + return ret; vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] |= FIELD_PREP( ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), pmuver_to_perfmon(pmuver)); mutex_unlock(&vcpu->kvm->lock); - } else if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) { - set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); } else { - clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); + /* We can only differ with PMUver, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + if (val) + return -EINVAL; + + if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); + else + clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); + } return 0; } +static void init_id_dfr0_el1(struct id_reg_desc *idr) +{ + u64 val; + u32 id = reg_to_encoding(&idr->reg_desc); + + val = read_sanitised_ftr_reg(id); + /* + * Initialise the default PMUver before there is a chance to + * create an actual PMU. + */ + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), + kvm_arm_pmu_get_pmuver_limit()); + + idr->kvm_sys_val = val; +} + static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, u64 val) { u8 perfmon, host_perfmon; bool valid_pmu; + int ret; host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); @@ -310,42 +456,46 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) return -EINVAL; - /* We can only differ with PerfMon, and anything else is an error */ - val ^= read_id_reg(vcpu, rd); - val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); - if (val) - return -EINVAL; - if (valid_pmu) { mutex_lock(&vcpu->kvm->lock); - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] &= - ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); - vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_DFR0_EL1)] |= FIELD_PREP( - ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), perfmon); + ret = set_id_reg(vcpu, rd, val); + if (ret) + return ret; vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); vcpu->kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= FIELD_PREP( ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), perfmon_to_pmuver(perfmon)); mutex_unlock(&vcpu->kvm->lock); - } else if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) { - set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); } else { - clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); + /* We can only differ with PerfMon, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); + if (val) + return -EINVAL; + + if (perfmon == ID_DFR0_EL1_PerfMon_IMPDEF) + set_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); + else + clear_bit(KVM_ARCH_FLAG_VCPU_HAS_IMP_DEF_PMU, &vcpu->kvm->arch.flags); } return 0; } /* sys_reg_desc initialiser for known cpufeature ID registers */ +#define SYS_DESC_SANITISED(name) { \ + SYS_DESC(SYS_##name), \ + .access = access_id_reg, \ + .get_user = get_id_reg, \ + .set_user = set_id_reg, \ + .visibility = id_visibility, \ +} + #define ID_SANITISED(name) { \ - .reg_desc = { \ - SYS_DESC(SYS_##name), \ - .access = access_id_reg, \ - .get_user = get_id_reg, \ - .set_user = set_id_reg, \ - .visibility = id_visibility, \ - }, \ + .reg_desc = SYS_DESC_SANITISED(name), \ + .ftr_bits = { ARM64_FTR_END, }, \ + .init = init_id_reg, \ } /* sys_reg_desc initialiser for known cpufeature ID registers */ @@ -357,6 +507,8 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, .set_user = set_id_reg, \ .visibility = aa32_id_visibility, \ }, \ + .ftr_bits = { ARM64_FTR_END, }, \ + .init = init_id_reg, \ } /* @@ -372,6 +524,7 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, .set_user = set_id_reg, \ .visibility = raz_visibility \ }, \ + .ftr_bits = { ARM64_FTR_END, }, \ } /* @@ -387,9 +540,10 @@ static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, .set_user = set_id_reg, \ .visibility = raz_visibility, \ }, \ + .ftr_bits = { ARM64_FTR_END, }, \ } -static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { +static struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { /* * ID regs: all ID_SANITISED() entries here must have corresponding * entries in arm64_ftr_regs[]. @@ -405,6 +559,11 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { .get_user = get_id_reg, .set_user = set_id_dfr0_el1, .visibility = aa32_id_visibility, }, + .ftr_bits = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_DFR0_EL1_PerfMon_SHIFT, ID_DFR0_EL1_PerfMon_WIDTH, 0), + ARM64_FTR_END, }, + .init = init_id_dfr0_el1, }, ID_HIDDEN(ID_AFR0_EL1), AA32_ID_SANITISED(ID_MMFR0_EL1), @@ -439,6 +598,13 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_aa64pfr0_el1, }, + .ftr_bits = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64PFR0_EL1_CSV2_SHIFT, ID_AA64PFR0_EL1_CSV2_WIDTH, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64PFR0_EL1_CSV3_SHIFT, ID_AA64PFR0_EL1_CSV3_WIDTH, 0), + ARM64_FTR_END, }, + .init = init_id_aa64pfr0_el1, }, ID_SANITISED(ID_AA64PFR1_EL1), ID_UNALLOCATED(4, 2), @@ -454,6 +620,11 @@ static const struct id_reg_desc id_reg_descs[KVM_ARM_ID_REG_NUM] = { .access = access_id_reg, .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, }, + .ftr_bits = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, + ID_AA64DFR0_EL1_PMUVer_SHIFT, ID_AA64DFR0_EL1_PMUVer_WIDTH, 0), + ARM64_FTR_END, }, + .init = init_id_aa64dfr0_el1, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5, 2), @@ -583,7 +754,7 @@ int kvm_arm_set_id_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) return ret; } -bool kvm_arm_check_idreg_table(void) +bool kvm_arm_idreg_table_init(void) { unsigned int i; @@ -595,6 +766,9 @@ bool kvm_arm_check_idreg_table(void) &id_reg_descs[i].reg_desc, i); return false; } + + if (id_reg_descs[i].init) + id_reg_descs[i].init(&id_reg_descs[i]); } return true; @@ -618,53 +792,16 @@ int kvm_arm_walk_id_regs(struct kvm_vcpu *vcpu, u64 __user *uind) } /* - * Set the guest's ID registers that are defined in id_reg_descs[] - * with ID_SANITISED() to the host's sanitized value. + * Initialize the guest's ID registers with KVM sanitised values that were setup + * during ID register descriptors initialization. */ -void kvm_arm_set_default_id_regs(struct kvm *kvm) +void kvm_arm_init_id_regs(struct kvm *kvm) { int i; u32 id; - u64 val; for (i = 0; i < ARRAY_SIZE(id_reg_descs); i++) { id = reg_to_encoding(&id_reg_descs[i].reg_desc); - - if (id_reg_descs[i].reg_desc.visibility == raz_visibility) - /* Hidden or reserved ID register */ - continue; - - val = read_sanitised_ftr_reg(id); - kvm->arch.id_regs[IDREG_IDX(id)] = val; + kvm->arch.id_regs[IDREG_IDX(id)] = id_reg_descs[i].kvm_sys_val; } - /* - * The default is to expose CSV2 == 1 if the HW isn't affected. - * Although this is a per-CPU feature, we make it global because - * asymmetric systems are just a nuisance. - * - * Userspace can override this as long as it doesn't promise - * the impossible. - */ - val = kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)]; - - if (arm64_get_spectre_v2_state() == SPECTRE_UNAFFECTED) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV2), 1); - } - if (arm64_get_meltdown_state() == SPECTRE_UNAFFECTED) { - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_CSV3), 1); - } - - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64PFR0_EL1)] = val; - - /* - * Initialise the default PMUver before there is a chance to - * create an actual PMU. - */ - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] &= - ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); - kvm->arch.id_regs[IDREG_IDX(SYS_ID_AA64DFR0_EL1)] |= - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), - kvm_arm_pmu_get_pmuver_limit()); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 608a0378bdae..61b9adfd5d5e 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -2912,14 +2912,13 @@ int __init kvm_sys_reg_table_init(void) unsigned int i; /* Make sure tables are unique and in order. */ - valid &= kvm_arm_check_idreg_table(); valid &= check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false); valid &= check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true); valid &= check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true); valid &= check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true); valid &= check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true); valid &= check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false); - + valid &= kvm_arm_idreg_table_init(); if (!valid) return -EINVAL; diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h index 8fd0020c67ca..55484d1615be 100644 --- a/arch/arm64/kvm/sys_regs.h +++ b/arch/arm64/kvm/sys_regs.h @@ -260,7 +260,7 @@ int emulate_id_reg(struct kvm_vcpu *vcpu, struct sys_reg_params *params); void kvm_arm_reset_id_regs(struct kvm_vcpu *vcpu); int kvm_arm_get_id_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); int kvm_arm_set_id_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg); -bool kvm_arm_check_idreg_table(void); +bool kvm_arm_idreg_table_init(void); int kvm_arm_walk_id_regs(struct kvm_vcpu *vcpu, u64 __user *uind); u64 kvm_arm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id);