@@ -780,8 +780,13 @@ void __noreturn stop_this_cpu(void *dummy)
*
* Test the CPUID bit directly because the machine might've cleared
* X86_FEATURE_SME due to cmdline options.
+ *
+ * The TDX module or guests might have left dirty cachelines
+ * behind. Flush them to avoid corruption from later writeback.
+ * Note that this flushes on all systems where TDX is possible,
+ * but does not actually check that TDX was in use.
*/
- if (cpuid_eax(0x8000001f) & BIT(0))
+ if (cpuid_eax(0x8000001f) & BIT(0) || platform_tdx_enabled())
native_wbinvd();
for (;;) {
/*
@@ -32,6 +32,7 @@
#include <asm/realmode.h>
#include <asm/x86_init.h>
#include <asm/efi.h>
+#include <asm/tdx.h>
/*
* Power off function, if any
@@ -695,6 +696,20 @@ void native_machine_shutdown(void)
local_irq_disable();
stop_other_cpus();
#endif
+ /*
+ * stop_other_cpus() has flushed all dirty cachelines of TDX
+ * private memory on remote cpus. Unlike SME, which does the
+ * cache flush on _this_ cpu in the relocate_kernel(), flush
+ * the cache for _this_ cpu here. This is because on the
+ * platforms with "partial write machine check" erratum the
+ * kernel needs to convert all TDX private pages back to normal
+ * before booting to the new kernel in kexec(), and the cache
+ * flush must be done before that. If the kernel took SME's way,
+ * it would have to muck with the relocate_kernel() assembly to
+ * do memory conversion.
+ */
+ if (platform_tdx_enabled())
+ native_wbinvd();
lapic_shutdown();
restore_boot_irq_mode();