Message ID | 20240913113705.419146-4-Neeraj.Upadhyay@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | AMD: Add Secure AVIC Guest Support | expand |
On Fri, Sep 13, 2024 at 05:06:54PM +0530, Neeraj Upadhyay wrote: > @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) > return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); > } > > +static inline u32 get_reg(char *page, int reg_off) Just "reg" like the other APICs. > +{ > + return READ_ONCE(*((u32 *)(page + reg_off))); > +} > + > +static inline void set_reg(char *page, int reg_off, u32 val) > +{ > + WRITE_ONCE(*((u32 *)(page + reg_off)), val); > +} > + > +#define SAVIC_ALLOWED_IRR_OFFSET 0x204 > + > +static u32 x2apic_savic_read(u32 reg) > +{ > + void *backing_page = this_cpu_read(apic_backing_page); > + > + switch (reg) { > + case APIC_LVTT: > + case APIC_TMICT: > + case APIC_TMCCT: > + case APIC_TDCR: > + case APIC_ID: > + case APIC_LVR: > + case APIC_TASKPRI: > + case APIC_ARBPRI: > + case APIC_PROCPRI: > + case APIC_LDR: > + case APIC_SPIV: > + case APIC_ESR: > + case APIC_ICR: > + case APIC_LVTTHMR: > + case APIC_LVTPC: > + case APIC_LVT0: > + case APIC_LVT1: > + case APIC_LVTERR: > + case APIC_EFEAT: > + case APIC_ECTRL: > + case APIC_SEOI: > + case APIC_IER: I'm sure those can be turned into ranges instead of enumerating every single APIC register... > + case APIC_EILVTn(0) ... APIC_EILVTn(3): Like here. > + return get_reg(backing_page, reg); > + case APIC_ISR ... APIC_ISR + 0x70: > + case APIC_TMR ... APIC_TMR + 0x70: > + WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg); What's the point of a WARN... > + return get_reg(backing_page, reg); ... and then allowing the register access anyway? > + /* IRR and ALLOWED_IRR offset range */ > + case APIC_IRR ... APIC_IRR + 0x74: > + /* > + * Either aligned at 16 bytes for valid IRR reg offset or a > + * valid Secure AVIC ALLOWED_IRR offset. > + */ > + WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)), > + "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg); > + return get_reg(backing_page, reg); Ditto. And below too. > + default: > + pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg); > + return 0; > + } > +} > + > +#define SAVIC_NMI_REQ_OFFSET 0x278 > + > +static void x2apic_savic_write(u32 reg, u32 data) > +{ > + void *backing_page = this_cpu_read(apic_backing_page); > + > + switch (reg) { > + case APIC_LVTT: > + case APIC_LVT0: > + case APIC_LVT1: > + case APIC_TMICT: > + case APIC_TDCR: > + case APIC_SELF_IPI: > + /* APIC_ID is writable and configured by guest for Secure AVIC */ > + case APIC_ID: > + case APIC_TASKPRI: > + case APIC_EOI: > + case APIC_SPIV: > + case SAVIC_NMI_REQ_OFFSET: > + case APIC_ESR: > + case APIC_ICR: > + case APIC_LVTTHMR: > + case APIC_LVTPC: > + case APIC_LVTERR: > + case APIC_ECTRL: > + case APIC_SEOI: > + case APIC_IER: > + case APIC_EILVTn(0) ... APIC_EILVTn(3): > + set_reg(backing_page, reg, data); > + break; > + /* ALLOWED_IRR offsets are writable */ > + case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: > + if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { > + set_reg(backing_page, reg, data); > + break; > + } > + fallthrough; > + default: > + pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg); > + } > +} > + > static void x2apic_savic_send_IPI(int cpu, int vector) > { > u32 dest = per_cpu(x86_cpu_to_apicid, cpu); > @@ -140,8 +243,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { > .send_IPI_self = x2apic_send_IPI_self, > .nmi_to_offline_cpu = true, > > - .read = native_apic_msr_read, > - .write = native_apic_msr_write, > + .read = x2apic_savic_read, > + .write = x2apic_savic_write, > .eoi = native_apic_msr_eoi, > .icr_read = native_x2apic_icr_read, > .icr_write = native_x2apic_icr_write, > --
Hi Neeraj, On 9/13/2024 4:36 AM, Neeraj Upadhyay wrote: > The x2APIC registers are mapped at an offset within the guest APIC > backing page which is same as their x2APIC MMIO offset. Secure AVIC > adds new registers such as ALLOWED_IRRs (which are at 4-byte offset > within the IRR register offset range) and NMI_REQ to the APIC register > space. In addition, the APIC_ID register is writable and configured by > guest. > > Add read() and write() APIC callback functions to read and write x2APIC > registers directly from the guest APIC backing page. > > The default .read()/.write() callbacks of x2APIC drivers perform > a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled, > these would result in #VC exception (for non-accelerated register > accesses). The #VC exception handler reads/write the x2APIC register > in the guest APIC backing page. Since this would increase the latency > of accessing x2APIC registers, the read() and write() callbacks of > Secure AVIC driver directly reads/writes to the guest APIC backing page. > I think this is important non-obvious information which should be in a comment in the code itself, not just in the commit message. Thanks, Melody
On 11/6/2024 11:46 PM, Borislav Petkov wrote: > On Fri, Sep 13, 2024 at 05:06:54PM +0530, Neeraj Upadhyay wrote: >> @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) >> return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); >> } >> >> +static inline u32 get_reg(char *page, int reg_off) > > Just "reg" like the other APICs. > Ok sure. >> +static u32 x2apic_savic_read(u32 reg) >> +{ >> + void *backing_page = this_cpu_read(apic_backing_page); >> + >> + switch (reg) { >> + case APIC_LVTT: >> + case APIC_TMICT: >> + case APIC_TMCCT: >> + case APIC_TDCR: >> + case APIC_ID: >> + case APIC_LVR: >> + case APIC_TASKPRI: >> + case APIC_ARBPRI: >> + case APIC_PROCPRI: >> + case APIC_LDR: >> + case APIC_SPIV: >> + case APIC_ESR: >> + case APIC_ICR: >> + case APIC_LVTTHMR: >> + case APIC_LVTPC: >> + case APIC_LVT0: >> + case APIC_LVT1: >> + case APIC_LVTERR: >> + case APIC_EFEAT: >> + case APIC_ECTRL: >> + case APIC_SEOI: >> + case APIC_IER: > > I'm sure those can be turned into ranges instead of enumerating every single > APIC register... > Below are the offset of these, as per "Table 16-6. x2APIC Register" in APM vol2: #Reg #offset APIC_LVTT - 0x320 APIC_TMICT - 0x380 APIC_TMCCT - 0x390 APIC_TDCR - 0x3E0 Above timer related registers are read from HV when we reach the end of this patch series. APIC_ID - 20h APIC_LVR - 30h APIC_TASKPRI - 80h APIC_ARBPRI - 90h APIC_PROCPRI - A0h APIC_LDR - D0h APIC_SPIV - F0h APIC_ESR - 280h APIC_ICR - 300h APIC_LVTTHMR - 330h APIC_LVTPC - 340h APIC_LVT0 - 350h APIC_LVT1 - 360h APIC_LVTERR - 370h APIC_EFEAT - 0x400h APIC_ECTRL - 0x410h APIC_SEOI - 0x420h APIC_IER - 0x480h These are few registers like part of LVT (APIC_LVTTHMR ... APIC_LVTERR) , priority (APIC_TASKPRI ... APIC_PROCPRI), extended APIC (APIC_EFEAT ... APIC_ECTRL) which can be grouped. Intention of doing per reg is to be explicit about which registers are accessed from backing page, which from hv and which are not allowed access. As access (and their perms) are per-reg and not range-based, this made sense to me. Also, if ranges are used, I think 16-byte aligned checks are needed for the range. If using ranges looks more logical grouping here, I can update it as per the above range groupings. >> + case APIC_EILVTn(0) ... APIC_EILVTn(3): > > Like here. > As this case is for EILVT register range, these registers were grouped. (I need to add a 16-byte alignment check here). >> + return get_reg(backing_page, reg); >> + case APIC_ISR ... APIC_ISR + 0x70: >> + case APIC_TMR ... APIC_TMR + 0x70: >> + WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg); > > What's the point of a WARN... > >> + return get_reg(backing_page, reg); > > ... and then allowing the register access anyway? > I will skip access for non-aligned case. >> + /* IRR and ALLOWED_IRR offset range */ >> + case APIC_IRR ... APIC_IRR + 0x74: >> + /* >> + * Either aligned at 16 bytes for valid IRR reg offset or a >> + * valid Secure AVIC ALLOWED_IRR offset. >> + */ >> + WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)), >> + "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg); >> + return get_reg(backing_page, reg); > > Ditto. > > And below too. > Same reply as above. - Neeraj >> + default: >> + pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg); >> + return 0; >> + } >> +} >> +
On 11/7/2024 12:50 AM, Melody (Huibo) Wang wrote: > Hi Neeraj, > > On 9/13/2024 4:36 AM, Neeraj Upadhyay wrote: >> The x2APIC registers are mapped at an offset within the guest APIC >> backing page which is same as their x2APIC MMIO offset. Secure AVIC >> adds new registers such as ALLOWED_IRRs (which are at 4-byte offset >> within the IRR register offset range) and NMI_REQ to the APIC register >> space. In addition, the APIC_ID register is writable and configured by >> guest. >> >> Add read() and write() APIC callback functions to read and write x2APIC >> registers directly from the guest APIC backing page. >> >> The default .read()/.write() callbacks of x2APIC drivers perform >> a rdmsr/wrmsr of the x2APIC registers. When Secure AVIC is enabled, >> these would result in #VC exception (for non-accelerated register >> accesses). The #VC exception handler reads/write the x2APIC register >> in the guest APIC backing page. Since this would increase the latency >> of accessing x2APIC registers, the read() and write() callbacks of >> Secure AVIC driver directly reads/writes to the guest APIC backing page. >> > I think this is important non-obvious information which should be in a comment in the code > itself, not just in the commit message. > Sure, I will add some of this information in the comments. Thanks for the review! - Neeraj > Thanks, > Melody
On Thu, Nov 07, 2024 at 09:02:16AM +0530, Neeraj Upadhyay wrote: > Intention of doing per reg is to be explicit about which registers > are accessed from backing page, which from hv and which are not allowed > access. As access (and their perms) are per-reg and not range-based, this > made sense to me. Also, if ranges are used, I think 16-byte aligned > checks are needed for the range. If using ranges looks more logical grouping > here, I can update it as per the above range groupings. Is this list of registers going to remain or are we going to keep adding to it so that the ranges become contiguous? And yes, there is some merit to explicitly naming them but you can also put that in a comment once above those functions too.
On 11/7/2024 7:58 PM, Borislav Petkov wrote: > On Thu, Nov 07, 2024 at 09:02:16AM +0530, Neeraj Upadhyay wrote: >> Intention of doing per reg is to be explicit about which registers >> are accessed from backing page, which from hv and which are not allowed >> access. As access (and their perms) are per-reg and not range-based, this >> made sense to me. Also, if ranges are used, I think 16-byte aligned >> checks are needed for the range. If using ranges looks more logical grouping >> here, I can update it as per the above range groupings. > > Is this list of registers going to remain or are we going to keep adding to > it so that the ranges become contiguous? > From the APIC architecture details in APM and SDM, I see these gaps are reserved ranges which are present for xapic also. x2apic keeps the register layout consistent. So, these gaps looks to have have remained reserved for long. I don't have information on the uarch reasons (if any) for the reserved space layout. > And yes, there is some merit to explicitly naming them but you can also put > that in a comment once above those functions too. > I understand your point but, for this specific case, to me, each register as a separate "switch case" looks clearer and self-describing than keeping a range of different registers and putting comment about which registers it covers. In addition, while each APIC register is at 16-byte alignment, they are accessed using dword size reads/writes. So, as mentioned in previous reply, using ranges requires alignment checks. One hypothetical example where using range checks could become klugy is when the unused 12 bytes of 16 byte of a register is repurposed for implementation- specific features and the read/write permissions are different for the architecture register and the implementation-defined one. Secure AVIC uses (IRRn + 4) address for ALLOWED_IRRn. However, the r/w permissions are same for IRRn and ALLOWED_IRRn. So, using ranges for IRR register space works fine. However, it may not work if similar register-space-repurposing happens for other features in future. I understand this could be considered as speculative and hand-wavy reasoning. So, I would ask, does above reasoning convince you with the current switch-case layout or you want it to be range-based? - Neeraj
On Fri, Nov 08, 2024 at 02:29:03PM +0530, Neeraj Upadhyay wrote: > From the APIC architecture details in APM and SDM, I see these gaps are reserved What I actually meant here is whether SAVIC enablement is going to keep adding more and more entries here so that it becomes practically *all* possible, each spelled out explicitly. But I went further in your patchset and it doesn't look like it so meh, ok. > I would ask, does above reasoning convince you with the current switch-case layout > or you want it to be range-based? That's fine, let's keep them like they are now and we can always revisit if the list grows too ugly.
On 11/8/2024 4:18 PM, Borislav Petkov wrote: > On Fri, Nov 08, 2024 at 02:29:03PM +0530, Neeraj Upadhyay wrote: >> From the APIC architecture details in APM and SDM, I see these gaps are reserved > > What I actually meant here is whether SAVIC enablement is going to keep adding > more and more entries here so that it becomes practically *all* possible, each > spelled out explicitly. > Ah ok. My bad, I completely misread it and went too far. > But I went further in your patchset and it doesn't look like it so meh, ok. > Yes, the offsets layout I described was with respect to the complete series. Thanks for checking it. >> I would ask, does above reasoning convince you with the current switch-case layout >> or you want it to be range-based? > > That's fine, let's keep them like they are now and we can always revisit if > the list grows too ugly. > Sure, thanks! - Neeraj
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index 094106b6a538..be39a543fbe5 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h @@ -135,6 +135,8 @@ #define APIC_TDR_DIV_128 0xA #define APIC_EFEAT 0x400 #define APIC_ECTRL 0x410 +#define APIC_SEOI 0x420 +#define APIC_IER 0x480 #define APIC_EILVTn(n) (0x500 + 0x10 * n) #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ #define APIC_EILVT_NR_AMD_10H 4 diff --git a/arch/x86/kernel/apic/x2apic_savic.c b/arch/x86/kernel/apic/x2apic_savic.c index d903c35b8b64..6a471bbc3dba 100644 --- a/arch/x86/kernel/apic/x2apic_savic.c +++ b/arch/x86/kernel/apic/x2apic_savic.c @@ -10,6 +10,7 @@ #include <linux/cpumask.h> #include <linux/cc_platform.h> #include <linux/percpu-defs.h> +#include <linux/align.h> #include <asm/apic.h> #include <asm/sev.h> @@ -24,6 +25,108 @@ static int x2apic_savic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return x2apic_enabled() && cc_platform_has(CC_ATTR_SNP_SECURE_AVIC); } +static inline u32 get_reg(char *page, int reg_off) +{ + return READ_ONCE(*((u32 *)(page + reg_off))); +} + +static inline void set_reg(char *page, int reg_off, u32 val) +{ + WRITE_ONCE(*((u32 *)(page + reg_off)), val); +} + +#define SAVIC_ALLOWED_IRR_OFFSET 0x204 + +static u32 x2apic_savic_read(u32 reg) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_TMICT: + case APIC_TMCCT: + case APIC_TDCR: + case APIC_ID: + case APIC_LVR: + case APIC_TASKPRI: + case APIC_ARBPRI: + case APIC_PROCPRI: + case APIC_LDR: + case APIC_SPIV: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVT0: + case APIC_LVT1: + case APIC_LVTERR: + case APIC_EFEAT: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + return get_reg(backing_page, reg); + case APIC_ISR ... APIC_ISR + 0x70: + case APIC_TMR ... APIC_TMR + 0x70: + WARN_ONCE(!IS_ALIGNED(reg, 16), "Reg offset %#x not aligned at 16 bytes", reg); + return get_reg(backing_page, reg); + /* IRR and ALLOWED_IRR offset range */ + case APIC_IRR ... APIC_IRR + 0x74: + /* + * Either aligned at 16 bytes for valid IRR reg offset or a + * valid Secure AVIC ALLOWED_IRR offset. + */ + WARN_ONCE(!(IS_ALIGNED(reg, 16) || IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)), + "Misaligned IRR/ALLOWED_IRR reg offset %#x", reg); + return get_reg(backing_page, reg); + default: + pr_err("Permission denied: read of Secure AVIC reg offset %#x\n", reg); + return 0; + } +} + +#define SAVIC_NMI_REQ_OFFSET 0x278 + +static void x2apic_savic_write(u32 reg, u32 data) +{ + void *backing_page = this_cpu_read(apic_backing_page); + + switch (reg) { + case APIC_LVTT: + case APIC_LVT0: + case APIC_LVT1: + case APIC_TMICT: + case APIC_TDCR: + case APIC_SELF_IPI: + /* APIC_ID is writable and configured by guest for Secure AVIC */ + case APIC_ID: + case APIC_TASKPRI: + case APIC_EOI: + case APIC_SPIV: + case SAVIC_NMI_REQ_OFFSET: + case APIC_ESR: + case APIC_ICR: + case APIC_LVTTHMR: + case APIC_LVTPC: + case APIC_LVTERR: + case APIC_ECTRL: + case APIC_SEOI: + case APIC_IER: + case APIC_EILVTn(0) ... APIC_EILVTn(3): + set_reg(backing_page, reg, data); + break; + /* ALLOWED_IRR offsets are writable */ + case SAVIC_ALLOWED_IRR_OFFSET ... SAVIC_ALLOWED_IRR_OFFSET + 0x70: + if (IS_ALIGNED(reg - SAVIC_ALLOWED_IRR_OFFSET, 16)) { + set_reg(backing_page, reg, data); + break; + } + fallthrough; + default: + pr_err("Permission denied: write to Secure AVIC reg offset %#x\n", reg); + } +} + static void x2apic_savic_send_IPI(int cpu, int vector) { u32 dest = per_cpu(x86_cpu_to_apicid, cpu); @@ -140,8 +243,8 @@ static struct apic apic_x2apic_savic __ro_after_init = { .send_IPI_self = x2apic_send_IPI_self, .nmi_to_offline_cpu = true, - .read = native_apic_msr_read, - .write = native_apic_msr_write, + .read = x2apic_savic_read, + .write = x2apic_savic_write, .eoi = native_apic_msr_eoi, .icr_read = native_x2apic_icr_read, .icr_write = native_x2apic_icr_write,