diff mbox series

[v10,08/50] x86/fault: Add helper for dumping RMP entries

Message ID 20231016132819.1002933-9-michael.roth@amd.com (mailing list archive)
State Not Applicable
Delegated to: Herbert Xu
Headers show
Series Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand

Commit Message

Michael Roth Oct. 16, 2023, 1:27 p.m. UTC
From: Brijesh Singh <brijesh.singh@amd.com>

This information will be useful for debugging things like page faults
due to RMP access violations and RMPUPDATE failures.

Signed-off-by: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Ashish Kalra <ashish.kalra@amd.com>
[mdr: move helper to standalone patch]
Signed-off-by: Michael Roth <michael.roth@amd.com>
---
 arch/x86/include/asm/sev-host.h |  2 +
 arch/x86/virt/svm/sev.c         | 77 +++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

Comments

Borislav Petkov Nov. 15, 2023, 4:08 p.m. UTC | #1
On Mon, Oct 16, 2023 at 08:27:37AM -0500, Michael Roth wrote:
> +/*
> + * Dump the raw RMP entry for a particular PFN. These bits are documented in the
> + * PPR for a particular CPU model and provide useful information about how a
> + * particular PFN is being utilized by the kernel/firmware at the time certain
> + * unexpected events occur, such as RMP faults.
> + */
> +static void sev_dump_rmpentry(u64 dumped_pfn)

Just "dump_rmentry"

s/dumped_pfn/pfn/g

> +	struct rmpentry e;
> +	u64 pfn, pfn_end;
> +	int level, ret;
> +	u64 *e_data;
> +
> +	ret = __snp_lookup_rmpentry(dumped_pfn, &e, &level);
> +	if (ret) {
> +		pr_info("Failed to read RMP entry for PFN 0x%llx, error %d\n",
> +			dumped_pfn, ret);
> +		return;
> +	}
> +
> +	e_data = (u64 *)&e;
> +	if (e.assigned) {
> +		pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> +			dumped_pfn, e_data[1], e_data[0]);
> +		return;
> +	}
> +
> +	/*
> +	 * If the RMP entry for a particular PFN is not in an assigned state,
> +	 * then it is sometimes useful to get an idea of whether or not any RMP
> +	 * entries for other PFNs within the same 2MB region are assigned, since
> +	 * those too can affect the ability to access a particular PFN in
> +	 * certain situations, such as when the PFN is being accessed via a 2MB
> +	 * mapping in the host page table.
> +	 */
> +	pfn = ALIGN(dumped_pfn, PTRS_PER_PMD);
> +	pfn_end = pfn + PTRS_PER_PMD;
> +
> +	while (pfn < pfn_end) {
> +		ret = __snp_lookup_rmpentry(pfn, &e, &level);
> +		if (ret) {
> +			pr_info_ratelimited("Failed to read RMP entry for PFN 0x%llx\n", pfn);

Why ratelmited?

No need to print anything if you fail to read it - simply dump the range
[pfn, pfn_end], _data[0], e_data[1] exactly *once* before the loop and
inside the loop dump only the ones you can lookup...

> +			pfn++;
> +			continue;
> +		}
> +
> +		if (e_data[0] || e_data[1]) {
> +			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> +				dumped_pfn, pfn, e_data[1], e_data[0]);
> +			return;
> +		}
> +		pfn++;
> +	}
> +
> +	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
> +		dumped_pfn);

... and then you don't need this one either.

> +}
> +
> +void sev_dump_hva_rmpentry(unsigned long hva)
> +{
> +	unsigned int level;
> +	pgd_t *pgd;
> +	pte_t *pte;
> +
> +	pgd = __va(read_cr3_pa());
> +	pgd += pgd_index(hva);
> +	pte = lookup_address_in_pgd(pgd, hva, &level);

If this is using the current CR3, why aren't you simply using
lookup_address() here without the need to read pgd?

> +
> +	if (pte) {

	if (!pte)

Doh.

> +		pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
> +		return;
> +	}
> +
> +	sev_dump_rmpentry(pte_pfn(*pte));
> +}
> +EXPORT_SYMBOL_GPL(sev_dump_hva_rmpentry);

Who's going to use this, kvm?
Michael Roth Dec. 19, 2023, 6:08 a.m. UTC | #2
On Wed, Nov 15, 2023 at 05:08:52PM +0100, Borislav Petkov wrote:
> On Mon, Oct 16, 2023 at 08:27:37AM -0500, Michael Roth wrote:
> > +/*
> > + * Dump the raw RMP entry for a particular PFN. These bits are documented in the
> > + * PPR for a particular CPU model and provide useful information about how a
> > + * particular PFN is being utilized by the kernel/firmware at the time certain
> > + * unexpected events occur, such as RMP faults.
> > + */
> > +static void sev_dump_rmpentry(u64 dumped_pfn)
> 
> Just "dump_rmentry"
> 
> s/dumped_pfn/pfn/g
> 
> > +	struct rmpentry e;
> > +	u64 pfn, pfn_end;
> > +	int level, ret;
> > +	u64 *e_data;
> > +
> > +	ret = __snp_lookup_rmpentry(dumped_pfn, &e, &level);
> > +	if (ret) {
> > +		pr_info("Failed to read RMP entry for PFN 0x%llx, error %d\n",
> > +			dumped_pfn, ret);
> > +		return;
> > +	}
> > +
> > +	e_data = (u64 *)&e;
> > +	if (e.assigned) {
> > +		pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> > +			dumped_pfn, e_data[1], e_data[0]);
> > +		return;
> > +	}
> > +
> > +	/*
> > +	 * If the RMP entry for a particular PFN is not in an assigned state,
> > +	 * then it is sometimes useful to get an idea of whether or not any RMP
> > +	 * entries for other PFNs within the same 2MB region are assigned, since
> > +	 * those too can affect the ability to access a particular PFN in
> > +	 * certain situations, such as when the PFN is being accessed via a 2MB
> > +	 * mapping in the host page table.
> > +	 */
> > +	pfn = ALIGN(dumped_pfn, PTRS_PER_PMD);
> > +	pfn_end = pfn + PTRS_PER_PMD;
> > +
> > +	while (pfn < pfn_end) {
> > +		ret = __snp_lookup_rmpentry(pfn, &e, &level);
> > +		if (ret) {
> > +			pr_info_ratelimited("Failed to read RMP entry for PFN 0x%llx\n", pfn);
> 
> Why ratelmited?

Dave had some concerns about potentially printing out ~512 messages
for a particular PFN dump, and this seemed like a potential case where
that might still occur if there was some issue with RMP table access.
But I still wanted to print some indicator if we did hit that case,
since it might be related to whatever caused the dump to get triggered.

> 
> No need to print anything if you fail to read it - simply dump the range
> [pfn, pfn_end], _data[0], e_data[1] exactly *once* before the loop and
> inside the loop dump only the ones you can lookup...

Similar to above, the loop used to print every populated entry in the
2M range if the dumped PFN wasn't itself in an assigned state, but Dave
had some concerns about flooding. So now the loop only prints 1
populated entry to provide some indication that there are entries
present that could explain things like RMP faults for the PFN that caused
the dump.

That makes it a bit awkward to print a header statement, since you end
up with something like:

 PFN is not assigned, so dumping the first populated RMP entry found with the 2MB range (if any)
 PFN_x is populated, contents [high=... low=...]

Or if nothing found:

 PFN is not assigned, so dumping the first populated RMP entry found with the 2MB range (if any)

Whereas the current logic just prints 1 self-contained statement which
fully explains each of the above cases and doesn't require the user to
infer there was nothing present in the range based on the lack of
statement. It's a little clearer, a little less verbose, and a little easier
to grep for either situation without needed to get context from surrounding
statements.

> 
> > +			pfn++;
> > +			continue;
> > +		}
> > +
> > +		if (e_data[0] || e_data[1]) {
> > +			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
> > +				dumped_pfn, pfn, e_data[1], e_data[0]);
> > +			return;
> > +		}
> > +		pfn++;
> > +	}
> > +
> > +	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
> > +		dumped_pfn);
> 
> ... and then you don't need this one either.
> 
> > +}
> > +
> > +void sev_dump_hva_rmpentry(unsigned long hva)
> > +{
> > +	unsigned int level;
> > +	pgd_t *pgd;
> > +	pte_t *pte;
> > +
> > +	pgd = __va(read_cr3_pa());
> > +	pgd += pgd_index(hva);
> > +	pte = lookup_address_in_pgd(pgd, hva, &level);
> 
> If this is using the current CR3, why aren't you simply using
> lookup_address() here without the need to read pgd?
> 
> > +
> > +	if (pte) {
> 
> 	if (!pte)
> 
> Doh.

Yikes. Thanks for the catch.

> 
> > +		pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
> > +		return;
> > +	}
> > +
> > +	sev_dump_rmpentry(pte_pfn(*pte));
> > +}
> > +EXPORT_SYMBOL_GPL(sev_dump_hva_rmpentry);
> 
> Who's going to use this, kvm?

This is mainly used by the host #PF handler via show_fault_oops(). It
can happen both for kernel or userspace accesses if there's a bug, so
that's why the read_cr3_pa() is needed, since these may be userspace
HVAs. Though I just realized the patch that uses this (next one in the
series) claims to only be for kernel #PFs, so that might cause some
confusion. I'll get that commit message fixed up.

Thanks,

Mike

> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/sev-host.h b/arch/x86/include/asm/sev-host.h
index 4c487ce8457f..bb06c57f2909 100644
--- a/arch/x86/include/asm/sev-host.h
+++ b/arch/x86/include/asm/sev-host.h
@@ -15,8 +15,10 @@ 
 
 #ifdef CONFIG_KVM_AMD_SEV
 int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level);
+void sev_dump_hva_rmpentry(unsigned long address);
 #else
 static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENXIO; }
+static inline void sev_dump_hva_rmpentry(unsigned long address) {}
 #endif
 
 #endif
diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c
index 7d3802605376..cac3e311c38f 100644
--- a/arch/x86/virt/svm/sev.c
+++ b/arch/x86/virt/svm/sev.c
@@ -290,3 +290,80 @@  int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(snp_lookup_rmpentry);
+
+/*
+ * Dump the raw RMP entry for a particular PFN. These bits are documented in the
+ * PPR for a particular CPU model and provide useful information about how a
+ * particular PFN is being utilized by the kernel/firmware at the time certain
+ * unexpected events occur, such as RMP faults.
+ */
+static void sev_dump_rmpentry(u64 dumped_pfn)
+{
+	struct rmpentry e;
+	u64 pfn, pfn_end;
+	int level, ret;
+	u64 *e_data;
+
+	ret = __snp_lookup_rmpentry(dumped_pfn, &e, &level);
+	if (ret) {
+		pr_info("Failed to read RMP entry for PFN 0x%llx, error %d\n",
+			dumped_pfn, ret);
+		return;
+	}
+
+	e_data = (u64 *)&e;
+	if (e.assigned) {
+		pr_info("RMP entry for PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
+			dumped_pfn, e_data[1], e_data[0]);
+		return;
+	}
+
+	/*
+	 * If the RMP entry for a particular PFN is not in an assigned state,
+	 * then it is sometimes useful to get an idea of whether or not any RMP
+	 * entries for other PFNs within the same 2MB region are assigned, since
+	 * those too can affect the ability to access a particular PFN in
+	 * certain situations, such as when the PFN is being accessed via a 2MB
+	 * mapping in the host page table.
+	 */
+	pfn = ALIGN(dumped_pfn, PTRS_PER_PMD);
+	pfn_end = pfn + PTRS_PER_PMD;
+
+	while (pfn < pfn_end) {
+		ret = __snp_lookup_rmpentry(pfn, &e, &level);
+		if (ret) {
+			pr_info_ratelimited("Failed to read RMP entry for PFN 0x%llx\n", pfn);
+			pfn++;
+			continue;
+		}
+
+		if (e_data[0] || e_data[1]) {
+			pr_info("No assigned RMP entry for PFN 0x%llx, but the 2MB region contains populated RMP entries, e.g.: PFN 0x%llx: [high=0x%016llx low=0x%016llx]\n",
+				dumped_pfn, pfn, e_data[1], e_data[0]);
+			return;
+		}
+		pfn++;
+	}
+
+	pr_info("No populated RMP entries in the 2MB region containing PFN 0x%llx\n",
+		dumped_pfn);
+}
+
+void sev_dump_hva_rmpentry(unsigned long hva)
+{
+	unsigned int level;
+	pgd_t *pgd;
+	pte_t *pte;
+
+	pgd = __va(read_cr3_pa());
+	pgd += pgd_index(hva);
+	pte = lookup_address_in_pgd(pgd, hva, &level);
+
+	if (pte) {
+		pr_info("Can't dump RMP entry for HVA %lx: no PTE/PFN found\n", hva);
+		return;
+	}
+
+	sev_dump_rmpentry(pte_pfn(*pte));
+}
+EXPORT_SYMBOL_GPL(sev_dump_hva_rmpentry);