diff mbox series

[08/12] KVM: arm64: nv: Add emulation of AT S12E{0,1}{R,W}

Message ID 20240625133508.259829-9-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: nv: Add support for address translation instructions | expand

Commit Message

Marc Zyngier June 25, 2024, 1:35 p.m. UTC
On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the
combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk.

However, there is a great deal of complexity coming from combining
the S1 and S2 attributes to report something consistent in PAR_EL1.

This is an absolute mine field, and I have a splitting headache.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_asm.h |   1 +
 arch/arm64/kvm/at.c              | 242 +++++++++++++++++++++++++++++++
 2 files changed, 243 insertions(+)

Comments

Alexandru Elisei July 18, 2024, 3:10 p.m. UTC | #1
Hi,

On Tue, Jun 25, 2024 at 02:35:07PM +0100, Marc Zyngier wrote:
> On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the
> combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk.
> 
> However, there is a great deal of complexity coming from combining
> the S1 and S2 attributes to report something consistent in PAR_EL1.
> 
> This is an absolute mine field, and I have a splitting headache.
> 
> [..]
> +static u8 compute_sh(u8 attr, u64 desc)
> +{
> +	/* Any form of device, as well as NC has SH[1:0]=0b10 */
> +	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
> +		return 0b10;
> +
> +	return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10;

If shareability is 0b00 (non-shareable), the PAR_EL1.SH field will be 0b10
(outer-shareable), which seems to be contradicting PAREncodeShareability().

> +}
> +
> +static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
> +			   struct kvm_s2_trans *tr)
> +{
> +	u8 s1_parattr, s2_memattr, final_attr;
> +	u64 par;
> +
> +	/* If S2 has failed to translate, report the damage */
> +	if (tr->esr) {
> +		par = SYS_PAR_EL1_RES1;
> +		par |= SYS_PAR_EL1_F;
> +		par |= SYS_PAR_EL1_S;
> +		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
> +		return par;
> +	}
> +
> +	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
> +	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
> +
> +	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
> +		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
> +			s2_memattr &= ~BIT(3);
> +
> +		/* Combination of R_VRJSW and R_RHWZM */
> +		switch (s2_memattr) {
> +		case 0b0101:
> +			if (MEMATTR_IS_DEVICE(s1_parattr))
> +				final_attr = s1_parattr;
> +			else
> +				final_attr = MEMATTR(NC, NC);
> +			break;
> +		case 0b0110:
> +		case 0b1110:
> +			final_attr = MEMATTR(WbRaWa, WbRaWa);
> +			break;
> +		case 0b0111:
> +		case 0b1111:
> +			/* Preserve S1 attribute */
> +			final_attr = s1_parattr;
> +			break;
> +		case 0b0100:
> +		case 0b1100:
> +		case 0b1101:
> +			/* Reserved, do something non-silly */
> +			final_attr = s1_parattr;
> +			break;
> +		default:
> +			/* MemAttr[2]=0, Device from S2 */
> +			final_attr = s2_memattr & GENMASK(1,0) << 2;
> +		}
> +	} else {
> +		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
> +		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
> +
> +		if (MEMATTR_IS_DEVICE(s1_parattr) ||
> +		    MEMATTR_IS_DEVICE(s2_parattr)) {
> +			final_attr = min(s1_parattr, s2_parattr);
> +		} else {
> +			/* At this stage, this is memory vs memory */
> +			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
> +							 s2_parattr & 0xf);
> +			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
> +							 s2_parattr >> 4) << 4;
> +		}
> +	}
> +
> +	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
> +	    !MEMATTR_IS_DEVICE(final_attr))
> +		final_attr = MEMATTR(NC, NC);
> +
> +	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
> +	par |= tr->output & GENMASK(47, 12);
> +	par |= FIELD_PREP(SYS_PAR_EL1_SH,
> +			  compute_sh(final_attr, tr->desc));
> +
> +	return par;
>

It seems that the code doesn't combine shareability attributes, as per rule
RGDTNP and S2CombineS1MemAttrs() or S2ApplyFWBMemAttrs(), which both end up
calling S2CombineS1Shareability().

Thanks,
Alex
Marc Zyngier July 20, 2024, 9:49 a.m. UTC | #2
On Thu, 18 Jul 2024 16:10:20 +0100,
Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> 
> Hi,
> 
> On Tue, Jun 25, 2024 at 02:35:07PM +0100, Marc Zyngier wrote:
> > On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the
> > combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk.
> > 
> > However, there is a great deal of complexity coming from combining
> > the S1 and S2 attributes to report something consistent in PAR_EL1.
> > 
> > This is an absolute mine field, and I have a splitting headache.
> > 
> > [..]
> > +static u8 compute_sh(u8 attr, u64 desc)
> > +{
> > +	/* Any form of device, as well as NC has SH[1:0]=0b10 */
> > +	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
> > +		return 0b10;
> > +
> > +	return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10;
> 
> If shareability is 0b00 (non-shareable), the PAR_EL1.SH field will be 0b10
> (outer-shareable), which seems to be contradicting PAREncodeShareability().

Yup, well caught.

> > +	par |= FIELD_PREP(SYS_PAR_EL1_SH,
> > +			  compute_sh(final_attr, tr->desc));
> > +
> > +	return par;
> >
> 
> It seems that the code doesn't combine shareability attributes, as per rule
> RGDTNP and S2CombineS1MemAttrs() or S2ApplyFWBMemAttrs(), which both end up
> calling S2CombineS1Shareability().

That as well. See below what I'm stashing on top.

Thanks,

	M.

diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index e66c97fc1fd3..28c4344d1c34 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -459,13 +459,34 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2)
 	return final;
 }
 
+#define ATTR_NSH	0b00
+#define ATTR_RSV	0b01
+#define ATTR_OSH	0b10
+#define ATTR_ISH	0b11
+
 static u8 compute_sh(u8 attr, u64 desc)
 {
+	u8 sh;
+
 	/* Any form of device, as well as NC has SH[1:0]=0b10 */
 	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
-		return 0b10;
+		return ATTR_OSH;
+
+	sh = FIELD_GET(PTE_SHARED, desc);
+	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
+		sh = ATTR_NSH;
+
+	return sh;
+}
+
+static u8 combine_sh(u8 s1_sh, u8 s2_sh)
+{
+	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
+		return ATTR_OSH;
+	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
+		return ATTR_ISH;
 
-	return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10;
+	return ATTR_NSH;
 }
 
 static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
@@ -540,7 +561,8 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
 	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
 	par |= tr->output & GENMASK(47, 12);
 	par |= FIELD_PREP(SYS_PAR_EL1_SH,
-			  compute_sh(final_attr, tr->desc));
+			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
+				     compute_sh(final_attr, tr->desc)));
 
 	return par;
 }
Alexandru Elisei July 22, 2024, 10:33 a.m. UTC | #3
Hi,

On Sat, Jul 20, 2024 at 10:49:29AM +0100, Marc Zyngier wrote:
> On Thu, 18 Jul 2024 16:10:20 +0100,
> Alexandru Elisei <alexandru.elisei@arm.com> wrote:
> > 
> > Hi,
> > 
> > On Tue, Jun 25, 2024 at 02:35:07PM +0100, Marc Zyngier wrote:
> > > On the face of it, AT S12E{0,1}{R,W} is pretty simple. It is the
> > > combination of AT S1E{0,1}{R,W}, followed by an extra S2 walk.
> > > 
> > > However, there is a great deal of complexity coming from combining
> > > the S1 and S2 attributes to report something consistent in PAR_EL1.
> > > 
> > > This is an absolute mine field, and I have a splitting headache.
> > > 
> > > [..]
> > > +static u8 compute_sh(u8 attr, u64 desc)
> > > +{
> > > +	/* Any form of device, as well as NC has SH[1:0]=0b10 */
> > > +	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
> > > +		return 0b10;
> > > +
> > > +	return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10;
> > 
> > If shareability is 0b00 (non-shareable), the PAR_EL1.SH field will be 0b10
> > (outer-shareable), which seems to be contradicting PAREncodeShareability().
> 
> Yup, well caught.
> 
> > > +	par |= FIELD_PREP(SYS_PAR_EL1_SH,
> > > +			  compute_sh(final_attr, tr->desc));
> > > +
> > > +	return par;
> > >
> > 
> > It seems that the code doesn't combine shareability attributes, as per rule
> > RGDTNP and S2CombineS1MemAttrs() or S2ApplyFWBMemAttrs(), which both end up
> > calling S2CombineS1Shareability().
> 
> That as well. See below what I'm stashing on top.
> 
> Thanks,
> 
> 	M.
> 
> diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
> index e66c97fc1fd3..28c4344d1c34 100644
> --- a/arch/arm64/kvm/at.c
> +++ b/arch/arm64/kvm/at.c
> @@ -459,13 +459,34 @@ static u8 combine_s1_s2_attr(u8 s1, u8 s2)
>  	return final;
>  }
>  
> +#define ATTR_NSH	0b00
> +#define ATTR_RSV	0b01
> +#define ATTR_OSH	0b10
> +#define ATTR_ISH	0b11

Matches Table D8-89 from DDI 0487K.a.

> +
>  static u8 compute_sh(u8 attr, u64 desc)
>  {
> +	u8 sh;
> +
>  	/* Any form of device, as well as NC has SH[1:0]=0b10 */
>  	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
> -		return 0b10;
> +		return ATTR_OSH;
> +
> +	sh = FIELD_GET(PTE_SHARED, desc);
> +	if (sh == ATTR_RSV)		/* Reserved, mapped to NSH */
> +		sh = ATTR_NSH;
> +
> +	return sh;
> +}

Matches PAREncodeShareability().

> +
> +static u8 combine_sh(u8 s1_sh, u8 s2_sh)
> +{
> +	if (s1_sh == ATTR_OSH || s2_sh == ATTR_OSH)
> +		return ATTR_OSH;
> +	if (s1_sh == ATTR_ISH || s2_sh == ATTR_ISH)
> +		return ATTR_ISH;
>  
> -	return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10;
> +	return ATTR_NSH;
>  }

Matches S2CombineS1Shareability().

>  
>  static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
> @@ -540,7 +561,8 @@ static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
>  	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
>  	par |= tr->output & GENMASK(47, 12);
>  	par |= FIELD_PREP(SYS_PAR_EL1_SH,
> -			  compute_sh(final_attr, tr->desc));
> +			  combine_sh(FIELD_GET(SYS_PAR_EL1_SH, s1_par),
> +				     compute_sh(final_attr, tr->desc)));

Looks good.

Thanks,
Alex

>  
>  	return par;
>  }
> 
> -- 
> Without deviation from the norm, progress is not possible.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 6ec0622969766..b36a3b6cc0116 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -238,6 +238,7 @@  extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 extern void __kvm_at_s1e01(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
 extern void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
+extern void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
diff --git a/arch/arm64/kvm/at.c b/arch/arm64/kvm/at.c
index 147df5a9cc4e0..71e3390b43b4c 100644
--- a/arch/arm64/kvm/at.c
+++ b/arch/arm64/kvm/at.c
@@ -51,6 +51,189 @@  static void __mmu_config_restore(struct mmu_config *config)
 	isb();
 }
 
+#define MEMATTR(ic, oc)		(MEMATTR_##oc << 4 | MEMATTR_##ic)
+#define MEMATTR_NC		0b0100
+#define MEMATTR_Wt		0b1000
+#define MEMATTR_Wb		0b1100
+#define MEMATTR_WbRaWa		0b1111
+
+#define MEMATTR_IS_DEVICE(m)	(((m) & GENMASK(7, 4)) == 0)
+
+static u8 s2_memattr_to_attr(u8 memattr)
+{
+	memattr &= 0b1111;
+
+	switch (memattr) {
+	case 0b0000:
+	case 0b0001:
+	case 0b0010:
+	case 0b0011:
+		return memattr << 2;
+	case 0b0100:
+		return MEMATTR(Wb, Wb);
+	case 0b0101:
+		return MEMATTR(NC, NC);
+	case 0b0110:
+		return MEMATTR(Wt, NC);
+	case 0b0111:
+		return MEMATTR(Wb, NC);
+	case 0b1000:
+		/* Reserved, assume NC */
+		return MEMATTR(NC, NC);
+	case 0b1001:
+		return MEMATTR(NC, Wt);
+	case 0b1010:
+		return MEMATTR(Wt, Wt);
+	case 0b1011:
+		return MEMATTR(Wb, Wt);
+	case 0b1100:
+		/* Reserved, assume NC */
+		return MEMATTR(NC, NC);
+	case 0b1101:
+		return MEMATTR(NC, Wb);
+	case 0b1110:
+		return MEMATTR(Wt, Wb);
+	case 0b1111:
+		return MEMATTR(Wb, Wb);
+	default:
+		unreachable();
+	}
+}
+
+static u8 combine_s1_s2_attr(u8 s1, u8 s2)
+{
+	bool transient;
+	u8 final = 0;
+
+	/* Upgrade transient s1 to non-transient to simplify things */
+	switch (s1) {
+	case 0b0001 ... 0b0011:	/* Normal, Write-Through Transient */
+		transient = true;
+		s1 = MEMATTR_Wt | (s1 & GENMASK(1,0));
+		break;
+	case 0b0101 ... 0b0111:	/* Normal, Write-Back Transient */
+		transient = true;
+		s1 = MEMATTR_Wb | (s1 & GENMASK(1,0));
+		break;
+	default:
+		transient = false;
+	}
+
+	/* S2CombineS1AttrHints() */
+	if ((s1 & GENMASK(3, 2)) == MEMATTR_NC ||
+	    (s2 & GENMASK(3, 2)) == MEMATTR_NC)
+		final = MEMATTR_NC;
+	else if ((s1 & GENMASK(3, 2)) == MEMATTR_Wt ||
+		 (s2 & GENMASK(3, 2)) == MEMATTR_Wt)
+		final = MEMATTR_Wt;
+	else
+		final = MEMATTR_Wb;
+
+	if (final != MEMATTR_NC) {
+		/* Inherit RaWa hints form S1 */
+		if (transient) {
+			switch (s1 & GENMASK(3, 2)) {
+			case MEMATTR_Wt:
+				final = 0;
+				break;
+			case MEMATTR_Wb:
+				final = MEMATTR_NC;
+				break;
+			}
+		}
+
+		final |= s1 & GENMASK(1, 0);
+	}
+
+	return final;
+}
+
+static u8 compute_sh(u8 attr, u64 desc)
+{
+	/* Any form of device, as well as NC has SH[1:0]=0b10 */
+	if (MEMATTR_IS_DEVICE(attr) || attr == MEMATTR(NC, NC))
+		return 0b10;
+
+	return FIELD_GET(PTE_SHARED, desc) == 0b11 ? 0b11 : 0b10;
+}
+
+static u64 compute_par_s12(struct kvm_vcpu *vcpu, u64 s1_par,
+			   struct kvm_s2_trans *tr)
+{
+	u8 s1_parattr, s2_memattr, final_attr;
+	u64 par;
+
+	/* If S2 has failed to translate, report the damage */
+	if (tr->esr) {
+		par = SYS_PAR_EL1_RES1;
+		par |= SYS_PAR_EL1_F;
+		par |= SYS_PAR_EL1_S;
+		par |= FIELD_PREP(SYS_PAR_EL1_FST, tr->esr);
+		return par;
+	}
+
+	s1_parattr = FIELD_GET(SYS_PAR_EL1_ATTR, s1_par);
+	s2_memattr = FIELD_GET(GENMASK(5, 2), tr->desc);
+
+	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_FWB) {
+		if (!kvm_has_feat(vcpu->kvm, ID_AA64PFR2_EL1, MTEPERM, IMP))
+			s2_memattr &= ~BIT(3);
+
+		/* Combination of R_VRJSW and R_RHWZM */
+		switch (s2_memattr) {
+		case 0b0101:
+			if (MEMATTR_IS_DEVICE(s1_parattr))
+				final_attr = s1_parattr;
+			else
+				final_attr = MEMATTR(NC, NC);
+			break;
+		case 0b0110:
+		case 0b1110:
+			final_attr = MEMATTR(WbRaWa, WbRaWa);
+			break;
+		case 0b0111:
+		case 0b1111:
+			/* Preserve S1 attribute */
+			final_attr = s1_parattr;
+			break;
+		case 0b0100:
+		case 0b1100:
+		case 0b1101:
+			/* Reserved, do something non-silly */
+			final_attr = s1_parattr;
+			break;
+		default:
+			/* MemAttr[2]=0, Device from S2 */
+			final_attr = s2_memattr & GENMASK(1,0) << 2;
+		}
+	} else {
+		/* Combination of R_HMNDG, R_TNHFM and R_GQFSF */
+		u8 s2_parattr = s2_memattr_to_attr(s2_memattr);
+
+		if (MEMATTR_IS_DEVICE(s1_parattr) ||
+		    MEMATTR_IS_DEVICE(s2_parattr)) {
+			final_attr = min(s1_parattr, s2_parattr);
+		} else {
+			/* At this stage, this is memory vs memory */
+			final_attr  = combine_s1_s2_attr(s1_parattr & 0xf,
+							 s2_parattr & 0xf);
+			final_attr |= combine_s1_s2_attr(s1_parattr >> 4,
+							 s2_parattr >> 4) << 4;
+		}
+	}
+
+	if ((__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_CD) &&
+	    !MEMATTR_IS_DEVICE(final_attr))
+		final_attr = MEMATTR(NC, NC);
+
+	par  = FIELD_PREP(SYS_PAR_EL1_ATTR, final_attr);
+	par |= tr->output & GENMASK(47, 12);
+	par |= FIELD_PREP(SYS_PAR_EL1_SH,
+			  compute_sh(final_attr, tr->desc));
+
+	return par;
+}
+
 static bool check_at_pan(struct kvm_vcpu *vcpu, u64 vaddr, u64 *res)
 {
 	u64 par_e0;
@@ -252,3 +435,62 @@  void __kvm_at_s1e2(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
 
 	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
 }
+
+void __kvm_at_s12(struct kvm_vcpu *vcpu, u32 op, u64 vaddr)
+{
+	struct kvm_s2_trans out = {};
+	u64 ipa, par;
+	bool write;
+	int ret;
+
+	/* Do the stage-1 translation */
+	switch (op) {
+	case OP_AT_S12E1R:
+		op = OP_AT_S1E1R;
+		write = false;
+		break;
+	case OP_AT_S12E1W:
+		op = OP_AT_S1E1W;
+		write = true;
+		break;
+	case OP_AT_S12E0R:
+		op = OP_AT_S1E0R;
+		write = false;
+		break;
+	case OP_AT_S12E0W:
+		op = OP_AT_S1E0W;
+		write = true;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	__kvm_at_s1e01(vcpu, op, vaddr);
+	par = vcpu_read_sys_reg(vcpu, PAR_EL1);
+	if (par & SYS_PAR_EL1_F)
+		return;
+
+	/*
+	 * If we only have a single stage of translation (E2H=0 or
+	 * TGE=1), exit early. Same thing if {VM,DC}=={0,0}.
+	 */
+	if (!vcpu_el2_e2h_is_set(vcpu) || vcpu_el2_tge_is_set(vcpu) ||
+	    !(vcpu_read_sys_reg(vcpu, HCR_EL2) & (HCR_VM | HCR_DC)))
+		return;
+
+	/* Do the stage-2 translation */
+	ipa = (par & GENMASK_ULL(47, 12)) | (vaddr & GENMASK_ULL(11, 0));
+	out.esr = 0;
+	ret = kvm_walk_nested_s2(vcpu, ipa, &out);
+	if (ret < 0)
+		return;
+
+	/* Check the access permission */
+	if (!out.esr &&
+	    ((!write && !out.readable) || (write && !out.writable)))
+		out.esr = ESR_ELx_FSC_PERM | (out.level & 0x3);
+
+	par = compute_par_s12(vcpu, par, &out);
+	vcpu_write_sys_reg(vcpu, par, PAR_EL1);
+}