Message ID | 20240625133508.259829-9-maz@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: nv: Add support for address translation instructions | expand |
Hi, On Tue, Jun 25, 2024 at 02:35:07PM +0100, Marc Zyngier wrote: > On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the > combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk. > > However, there is a great deal of complexity coming from combining > the S1 and S2 attributes to report something consistent in PAR_EL1. > > This is an absolute mine field, and I have a splitting headache. > > [..] > +static u8 compute_sh(u8 attr, u64 desc) > +{ > + /* Any form of device, as well as NC has SH[1:0]=0b10 */ > + if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) > + return 0b10; > + > + return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10; If shareability is 0b00 (non-shareable), the PAR_EL1.SH field will be 0b10 (outer-shareable), which seems to be contradicting PAREncodeShareability(). > +} > + > +static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, > + struct kvm_s2_trans *tr) > +{ > + u8 s1_parattr, s2_memattr, final_attr; > + u64 par; > + > + /* If S2 has failed to translate, report the damage */ > + if (tr->esr) { > + par = SYS_PAR_EL1_RES1; > + par |= SYS_PAR_EL1_F; > + par |= SYS_PAR_EL1_S; > + par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); > + return par; > + } > + > + s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); > + s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); > + > + if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { > + if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) > + s2_memattr &= ~BIT(3); > + > + /* Combination of R_VRJSW and R_RHWZM */ > + switch (s2_memattr) { > + case 0b0101: > + if (MEMATTR_IS_DEVICE(s1_parattr)) > + final_attr = s1_parattr; > + else > + final_attr = MEMATTR(NC, NC); > + break; > + case 0b0110: > + case 0b1110: > + final_attr = MEMATTR(WbRaWa, WbRaWa); > + break; > + case 0b0111: > + case 0b1111: > + /* Preserve S1 attribute */ > + final_attr = s1_parattr; > + break; > + case 0b0100: > + case 0b1100: > + case 0b1101: > + /* Reserved, do something non-silly */ > + final_attr = s1_parattr; > + break; > + default: > + /* MemAttr[2]=0, Device from S2 */ > + final_attr = s2_memattr & GENMASK(1,0) << 2; > + } > + } else { > + /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ > + u8 s2_parattr = s2_memattr_to_attr(s2_memattr); > + > + if (MEMATTR_IS_DEVICE(s1_parattr) || > + MEMATTR_IS_DEVICE(s2_parattr)) { > + final_attr = min(s1_parattr, s2_parattr); > + } else { > + /* At this stage, this is memory vs memory */ > + final_attr = combine_s1_s2_attr(s1_parattr & 0xf, > + s2_parattr & 0xf); > + final_attr |= combine_s1_s2_attr(s1_parattr >> 4, > + s2_parattr >> 4) << 4; > + } > + } > + > + if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && > + !MEMATTR_IS_DEVICE(final_attr)) > + final_attr = MEMATTR(NC, NC); > + > + par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); > + par |= tr->output & GENMASK(47, 12); > + par |= FIELD_PREP(SYS_PAR_EL1_SH, > + compute_sh(final_attr, tr->desc)); > + > + return par; > It seems that the code doesn't combine shareability attributes, as per rule RGDTNP and S2CombineS1MemAttrs() or S2ApplyFWBMemAttrs(), which both end up calling S2CombineS1Shareability(). Thanks, Alex
On Thu, 18 Jul 2024 16:10:20 +0100, Alexandru Elisei <alexandru.elisei@arm.com> wrote: > > Hi, > > On Tue, Jun 25, 2024 at 02:35:07PM +0100, Marc Zyngier wrote: > > On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the > > combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk. > > > > However, there is a great deal of complexity coming from combining > > the S1 and S2 attributes to report something consistent in PAR_EL1. > > > > This is an absolute mine field, and I have a splitting headache. > > > > [..] > > +static u8 compute_sh(u8 attr, u64 desc) > > +{ > > + /* Any form of device, as well as NC has SH[1:0]=0b10 */ > > + if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) > > + return 0b10; > > + > > + return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10; > > If shareability is 0b00 (non-shareable), the PAR_EL1.SH field will be 0b10 > (outer-shareable), which seems to be contradicting PAREncodeShareability(). Yup, well caught. > > + par |= FIELD_PREP(SYS_PAR_EL1_SH, > > + compute_sh(final_attr, tr->desc)); > > + > > + return par; > > > > It seems that the code doesn't combine shareability attributes, as per rule > RGDTNP and S2CombineS1MemAttrs() or S2ApplyFWBMemAttrs(), which both end up > calling S2CombineS1Shareability(). That as well. See below what I'm stashing on top. Thanks, M. diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index e66c97fc1fd3..28c4344d1c34 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -459,13 +459,34 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2) return final; } +#define ATTR_NSH 0b00 +#define ATTR_RSV 0b01 +#define ATTR_OSH 0b10 +#define ATTR_ISH 0b11 + static u8 compute_sh(u8 attr, u64 desc) { + u8 sh; + /* Any form of device, as well as NC has SH[1:0]=0b10 */ if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) - return 0b10; + return ATTR_OSH; + + sh = FIELD_GET(PTE_SHARED, desc); + if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ + sh = ATTR_NSH; + + return sh; +} + +static u8 combine_sh(u8 s1_sh, u8 s2_sh) +{ + if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) + return ATTR_OSH; + if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) + return ATTR_ISH; - return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10; + return ATTR_NSH; } static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, @@ -540,7 +561,8 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); par |= tr->output & GENMASK(47, 12); par |= FIELD_PREP(SYS_PAR_EL1_SH, - compute_sh(final_attr, tr->desc)); + combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), + compute_sh(final_attr, tr->desc))); return par; }
Hi, On Sat, Jul 20, 2024 at 10:49:29AM +0100, Marc Zyngier wrote: > On Thu, 18 Jul 2024 16:10:20 +0100, > Alexandru Elisei <alexandru.elisei@arm.com> wrote: > > > > Hi, > > > > On Tue, Jun 25, 2024 at 02:35:07PM +0100, Marc Zyngier wrote: > > > On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the > > > combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk. > > > > > > However, there is a great deal of complexity coming from combining > > > the S1 and S2 attributes to report something consistent in PAR_EL1. > > > > > > This is an absolute mine field, and I have a splitting headache. > > > > > > [..] > > > +static u8 compute_sh(u8 attr, u64 desc) > > > +{ > > > + /* Any form of device, as well as NC has SH[1:0]=0b10 */ > > > + if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) > > > + return 0b10; > > > + > > > + return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10; > > > > If shareability is 0b00 (non-shareable), the PAR_EL1.SH field will be 0b10 > > (outer-shareable), which seems to be contradicting PAREncodeShareability(). > > Yup, well caught. > > > > + par |= FIELD_PREP(SYS_PAR_EL1_SH, > > > + compute_sh(final_attr, tr->desc)); > > > + > > > + return par; > > > > > > > It seems that the code doesn't combine shareability attributes, as per rule > > RGDTNP and S2CombineS1MemAttrs() or S2ApplyFWBMemAttrs(), which both end up > > calling S2CombineS1Shareability(). > > That as well. See below what I'm stashing on top. > > Thanks, > > M. > > diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c > index e66c97fc1fd3..28c4344d1c34 100644 > --- a/arch/arm64/kvm/at.c > +++ b/arch/arm64/kvm/at.c > @@ -459,13 +459,34 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2) > return final; > } > > +#define ATTR_NSH 0b00 > +#define ATTR_RSV 0b01 > +#define ATTR_OSH 0b10 > +#define ATTR_ISH 0b11 Matches Table D8-89 from DDI 0487K.a. > + > static u8 compute_sh(u8 attr, u64 desc) > { > + u8 sh; > + > /* Any form of device, as well as NC has SH[1:0]=0b10 */ > if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) > - return 0b10; > + return ATTR_OSH; > + > + sh = FIELD_GET(PTE_SHARED, desc); > + if (sh == ATTR_RSV) /* Reserved, mapped to NSH */ > + sh = ATTR_NSH; > + > + return sh; > +} Matches PAREncodeShareability(). > + > +static u8 combine_sh(u8 s1_sh, u8 s2_sh) > +{ > + if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH) > + return ATTR_OSH; > + if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH) > + return ATTR_ISH; > > - return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10; > + return ATTR_NSH; > } Matches S2CombineS1Shareability(). > > static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, > @@ -540,7 +561,8 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, > par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); > par |= tr->output & GENMASK(47, 12); > par |= FIELD_PREP(SYS_PAR_EL1_SH, > - compute_sh(final_attr, tr->desc)); > + combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par), > + compute_sh(final_attr, tr->desc))); Looks good. Thanks, Alex > > return par; > } > > -- > Without deviation from the norm, progress is not possible.
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 6ec0622969766..b36a3b6cc0116 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -238,6 +238,7 @@ extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding); extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); +extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c index 147df5a9cc4e0..71e3390b43b4c 100644 --- a/arch/arm64/kvm/at.c +++ b/arch/arm64/kvm/at.c @@ -51,6 +51,189 @@ static void __mmu_config_restore(struct mmu_config *config) isb(); } +#define MEMATTR(ic, oc) (MEMATTR_##oc << 4 | MEMATTR_##ic) +#define MEMATTR_NC 0b0100 +#define MEMATTR_Wt 0b1000 +#define MEMATTR_Wb 0b1100 +#define MEMATTR_WbRaWa 0b1111 + +#define MEMATTR_IS_DEVICE(m) (((m) & GENMASK(7, 4)) == 0) + +static u8 s2_memattr_to_attr(u8 memattr) +{ + memattr &= 0b1111; + + switch (memattr) { + case 0b0000: + case 0b0001: + case 0b0010: + case 0b0011: + return memattr << 2; + case 0b0100: + return MEMATTR(Wb, Wb); + case 0b0101: + return MEMATTR(NC, NC); + case 0b0110: + return MEMATTR(Wt, NC); + case 0b0111: + return MEMATTR(Wb, NC); + case 0b1000: + /* Reserved, assume NC */ + return MEMATTR(NC, NC); + case 0b1001: + return MEMATTR(NC, Wt); + case 0b1010: + return MEMATTR(Wt, Wt); + case 0b1011: + return MEMATTR(Wb, Wt); + case 0b1100: + /* Reserved, assume NC */ + return MEMATTR(NC, NC); + case 0b1101: + return MEMATTR(NC, Wb); + case 0b1110: + return MEMATTR(Wt, Wb); + case 0b1111: + return MEMATTR(Wb, Wb); + default: + unreachable(); + } +} + +static u8 combine_s1_s2_attr(u8 s1, u8 s2) +{ + bool transient; + u8 final = 0; + + /* Upgrade transient s1 to non-transient to simplify things */ + switch (s1) { + case 0b0001 ... 0b0011: /* Normal, Write-Through Transient */ + transient = true; + s1 = MEMATTR_Wt | (s1 & GENMASK(1,0)); + break; + case 0b0101 ... 0b0111: /* Normal, Write-Back Transient */ + transient = true; + s1 = MEMATTR_Wb | (s1 & GENMASK(1,0)); + break; + default: + transient = false; + } + + /* S2CombineS1AttrHints() */ + if ((s1 & GENMASK(3, 2)) == MEMATTR_NC || + (s2 & GENMASK(3, 2)) == MEMATTR_NC) + final = MEMATTR_NC; + else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt || + (s2 & GENMASK(3, 2)) == MEMATTR_Wt) + final = MEMATTR_Wt; + else + final = MEMATTR_Wb; + + if (final != MEMATTR_NC) { + /* Inherit RaWa hints form S1 */ + if (transient) { + switch (s1 & GENMASK(3, 2)) { + case MEMATTR_Wt: + final = 0; + break; + case MEMATTR_Wb: + final = MEMATTR_NC; + break; + } + } + + final |= s1 & GENMASK(1, 0); + } + + return final; +} + +static u8 compute_sh(u8 attr, u64 desc) +{ + /* Any form of device, as well as NC has SH[1:0]=0b10 */ + if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC)) + return 0b10; + + return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10; +} + +static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par, + struct kvm_s2_trans *tr) +{ + u8 s1_parattr, s2_memattr, final_attr; + u64 par; + + /* If S2 has failed to translate, report the damage */ + if (tr->esr) { + par = SYS_PAR_EL1_RES1; + par |= SYS_PAR_EL1_F; + par |= SYS_PAR_EL1_S; + par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr); + return par; + } + + s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par); + s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc); + + if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) { + if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP)) + s2_memattr &= ~BIT(3); + + /* Combination of R_VRJSW and R_RHWZM */ + switch (s2_memattr) { + case 0b0101: + if (MEMATTR_IS_DEVICE(s1_parattr)) + final_attr = s1_parattr; + else + final_attr = MEMATTR(NC, NC); + break; + case 0b0110: + case 0b1110: + final_attr = MEMATTR(WbRaWa, WbRaWa); + break; + case 0b0111: + case 0b1111: + /* Preserve S1 attribute */ + final_attr = s1_parattr; + break; + case 0b0100: + case 0b1100: + case 0b1101: + /* Reserved, do something non-silly */ + final_attr = s1_parattr; + break; + default: + /* MemAttr[2]=0, Device from S2 */ + final_attr = s2_memattr & GENMASK(1,0) << 2; + } + } else { + /* Combination of R_HMNDG, R_TNHFM and R_GQFSF */ + u8 s2_parattr = s2_memattr_to_attr(s2_memattr); + + if (MEMATTR_IS_DEVICE(s1_parattr) || + MEMATTR_IS_DEVICE(s2_parattr)) { + final_attr = min(s1_parattr, s2_parattr); + } else { + /* At this stage, this is memory vs memory */ + final_attr = combine_s1_s2_attr(s1_parattr & 0xf, + s2_parattr & 0xf); + final_attr |= combine_s1_s2_attr(s1_parattr >> 4, + s2_parattr >> 4) << 4; + } + } + + if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) && + !MEMATTR_IS_DEVICE(final_attr)) + final_attr = MEMATTR(NC, NC); + + par = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr); + par |= tr->output & GENMASK(47, 12); + par |= FIELD_PREP(SYS_PAR_EL1_SH, + compute_sh(final_attr, tr->desc)); + + return par; +} + static bool check_at_pan(struct kvm_vcpu *vcpu, u64 vaddr, u64 *res) { u64 par_e0; @@ -252,3 +435,62 @@ void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) vcpu_write_sys_reg(vcpu, par, PAR_EL1); } + +void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr) +{ + struct kvm_s2_trans out = {}; + u64 ipa, par; + bool write; + int ret; + + /* Do the stage-1 translation */ + switch (op) { + case OP_AT_S12E1R: + op = OP_AT_S1E1R; + write = false; + break; + case OP_AT_S12E1W: + op = OP_AT_S1E1W; + write = true; + break; + case OP_AT_S12E0R: + op = OP_AT_S1E0R; + write = false; + break; + case OP_AT_S12E0W: + op = OP_AT_S1E0W; + write = true; + break; + default: + WARN_ON_ONCE(1); + return; + } + + __kvm_at_s1e01(vcpu, op, vaddr); + par = vcpu_read_sys_reg(vcpu, PAR_EL1); + if (par & SYS_PAR_EL1_F) + return; + + /* + * If we only have a single stage of translation (E2H=0 or + * TGE=1), exit early. Same thing if {VM,DC}=={0,0}. + */ + if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) || + !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC))) + return; + + /* Do the stage-2 translation */ + ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0)); + out.esr = 0; + ret = kvm_walk_nested_s2(vcpu, ipa, &out); + if (ret < 0) + return; + + /* Check the access permission */ + if (!out.esr && + ((!write && !out.readable) || (write && !out.writable))) + out.esr = ESR_ELx_FSC_PERM | (out.level & 0x3); + + par = compute_par_s12(vcpu, par, &out); + vcpu_write_sys_reg(vcpu, par, PAR_EL1); +}
On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk. However, there is a great deal of complexity coming from combining the S1 and S2 attributes to report something consistent in PAR_EL1. This is an absolute mine field, and I have a splitting headache. Signed-off-by: Marc Zyngier <maz@kernel.org> --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/kvm/at.c | 242 +++++++++++++++++++++++++++++++ 2 files changed, 243 insertions(+)