Message ID | 20230220183847.59159-25-michael.roth@amd.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Hypervisor Support | expand |
On Mon, 20 Feb 2023 12:38:15 -0600 Michael Roth <michael.roth@amd.com> wrote: > From: Brijesh Singh <brijesh.singh@amd.com> > > The behavior and requirement for the SEV-legacy command is altered when > the SNP firmware is in the INIT state. See SEV-SNP firmware specification > for more details. > > Allocate the Trusted Memory Region (TMR) as a 2mb sized/aligned region > when SNP is enabled to satisfy new requirements for the SNP. Continue > allocating a 1mb region for !SNP configuration. > > While at it, provide API that can be used by others to allocate a page > that can be used by the firmware. The immediate user for this API will > be the KVM driver. The KVM driver to need to allocate a firmware context > page during the guest creation. The context page need to be updated > by the firmware. See the SEV-SNP specification for further details. > > Co-developed-by: Ashish Kalra <ashish.kalra@amd.com> > Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > Signed-off-by: Michael Roth <michael.roth@amd.com> > --- > drivers/crypto/ccp/sev-dev.c | 148 +++++++++++++++++++++++++++++++++-- > include/linux/psp-sev.h | 9 +++ > 2 files changed, 149 insertions(+), 8 deletions(-) > > diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c > index eca4e59b0f44..4c12e98a1219 100644 > --- a/drivers/crypto/ccp/sev-dev.c > +++ b/drivers/crypto/ccp/sev-dev.c > @@ -94,6 +94,13 @@ static void *sev_init_ex_buffer; > */ > struct sev_data_range_list *snp_range_list; > > +/* When SEV-SNP is enabled the TMR needs to be 2MB aligned and 2MB size. */ > +#define SEV_SNP_ES_TMR_SIZE (2 * 1024 * 1024) It would be better to re-use the kernel size definition macros. E.g. SZ_2MB. > + > +static size_t sev_es_tmr_size = SEV_ES_TMR_SIZE; > + > +static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret); > + > static inline bool sev_version_greater_or_equal(u8 maj, u8 min) > { > struct sev_device *sev = psp_master->sev_data; > @@ -216,11 +223,134 @@ void snp_mark_pages_offline(unsigned long pfn, unsigned int npages) > } > EXPORT_SYMBOL_GPL(snp_mark_pages_offline); > > +static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked) > +{ > + /* Cbit maybe set in the paddr */ This is confusing. I suppose C-bit is treated as a attribute of PTE in the kernel not part of the PA. It means only a PTE might carry a C-bit. The paddr is from __pa(page_address()). It is not extracted from a PTE. Thus, the return from them should never have a C-bit. BTW: Wouldn't it be better to have pfn as input param instead of paddr? The caller has struct page, calling snp_reclaim_pages(page_to_pfn(page), xxxxx) would be much clearer than the current conversion: page_address() (struct page is converted to VA), __pa() (VA is converted to PA) in the caller and then PA is converted to pfn here. > + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; > + int ret, err, i, n = 0; > + should be unsigned int i, n; as the input param npage is unsigned int. > + if (!pfn_valid(pfn)) { > + pr_err("%s: Invalid PFN %lx\n", __func__, pfn); > + return 0; > + } > + > + for (i = 0; i < npages; i++, pfn++, n++) { > + paddr = pfn << PAGE_SHIFT; > + > + if (locked) > + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); > + else > + ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); > + > + if (ret) > + goto cleanup; > + > + ret = rmp_make_shared(pfn, PG_LEVEL_4K); > + if (ret) > + goto cleanup; > + } > + > + return 0; > + > +cleanup: > + /* > + * If failed to reclaim the page then page is no longer safe to > + * be release back to the system, leak it. > + */ > + snp_mark_pages_offline(pfn, npages - n); > + return ret; > +} > + > +static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked) The same comment as above. Better take pfn or page instead of paddr with redundant conversions. > +{ > + /* Cbit maybe set in the paddr */ > + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; > + int rc, n = 0, i; > + > + for (i = 0; i < npages; i++, n++, pfn++) { > + rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true); > + if (rc) > + goto cleanup; > + } > + > + return 0; > + > +cleanup: > + /* > + * Try unrolling the firmware state changes by > + * reclaiming the pages which were already changed to the > + * firmware state. > + */ > + snp_reclaim_pages(paddr, n, locked); > + > + return rc; > +} > + > +static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order, bool locked) > +{ > + unsigned long npages = 1ul << order, paddr; > + struct sev_device *sev; > + struct page *page; > + > + if (!psp_master || !psp_master->sev_data) > + return NULL; > + > + page = alloc_pages(gfp_mask, order); > + if (!page) > + return NULL; > + > + /* If SEV-SNP is initialized then add the page in RMP table. */ > + sev = psp_master->sev_data; > + if (!sev->snp_initialized) > + return page; > + > + paddr = __pa((unsigned long)page_address(page)); > + if (rmp_mark_pages_firmware(paddr, npages, locked)) > + return NULL; > + > + return page; > +} > + > +void *snp_alloc_firmware_page(gfp_t gfp_mask) > +{ > + struct page *page; > + > + page = __snp_alloc_firmware_pages(gfp_mask, 0, false); > + > + return page ? page_address(page) : NULL; > +} > +EXPORT_SYMBOL_GPL(snp_alloc_firmware_page); > + > +static void __snp_free_firmware_pages(struct page *page, int order, bool locked) > +{ > + struct sev_device *sev = psp_master->sev_data; > + unsigned long paddr, npages = 1ul << order; > + > + if (!page) > + return; > + > + paddr = __pa((unsigned long)page_address(page)); > + if (sev->snp_initialized && > + snp_reclaim_pages(paddr, npages, locked)) > + return; > + > + __free_pages(page, order); > +} > + > +void snp_free_firmware_page(void *addr) > +{ > + if (!addr) > + return; > + > + __snp_free_firmware_pages(virt_to_page(addr), 0, false); > +} > +EXPORT_SYMBOL_GPL(snp_free_firmware_page); > + > static void *sev_fw_alloc(unsigned long len) > { > struct page *page; > > - page = alloc_pages(GFP_KERNEL, get_order(len)); > + page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len), false); > if (!page) > return NULL; > > @@ -468,7 +598,7 @@ static int __sev_init_locked(int *error) > data.tmr_address = __pa(sev_es_tmr); > > data.flags |= SEV_INIT_FLAGS_SEV_ES; > - data.tmr_len = SEV_ES_TMR_SIZE; > + data.tmr_len = sev_es_tmr_size; > } > > return __sev_do_cmd_locked(SEV_CMD_INIT, &data, error); > @@ -491,7 +621,7 @@ static int __sev_init_ex_locked(int *error) > data.tmr_address = __pa(sev_es_tmr); > > data.flags |= SEV_INIT_FLAGS_SEV_ES; > - data.tmr_len = SEV_ES_TMR_SIZE; > + data.tmr_len = sev_es_tmr_size; > } > > return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error); > @@ -982,6 +1112,8 @@ static int __sev_snp_init_locked(int *error) > sev->snp_initialized = true; > dev_dbg(sev->dev, "SEV-SNP firmware initialized\n"); > > + sev_es_tmr_size = SEV_SNP_ES_TMR_SIZE; > + > return rc; > } > > @@ -1499,8 +1631,9 @@ static void sev_firmware_shutdown(struct sev_device *sev) > /* The TMR area was encrypted, flush it from the cache */ > wbinvd_on_all_cpus(); > > - free_pages((unsigned long)sev_es_tmr, > - get_order(SEV_ES_TMR_SIZE)); > + __snp_free_firmware_pages(virt_to_page(sev_es_tmr), > + get_order(sev_es_tmr_size), > + false); > sev_es_tmr = NULL; > } > > @@ -1511,8 +1644,7 @@ static void sev_firmware_shutdown(struct sev_device *sev) > } > > if (snp_range_list) { > - free_pages((unsigned long)snp_range_list, > - get_order(PAGE_SIZE)); > + snp_free_firmware_page(snp_range_list); > snp_range_list = NULL; > } > > @@ -1593,7 +1725,7 @@ void sev_pci_init(void) > } > > /* Obtain the TMR memory area for SEV-ES use */ > - sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); > + sev_es_tmr = sev_fw_alloc(sev_es_tmr_size); > if (!sev_es_tmr) > dev_warn(sev->dev, > "SEV: TMR allocation failed, SEV-ES support unavailable\n"); > diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h > index 8edf5c548fbf..d19744807471 100644 > --- a/include/linux/psp-sev.h > +++ b/include/linux/psp-sev.h > @@ -922,6 +922,8 @@ int sev_guest_decommission(struct sev_data_decommission *data, int *error); > int sev_do_cmd(int cmd, void *data, int *psp_ret); > > void *psp_copy_user_blob(u64 uaddr, u32 len); > +void *snp_alloc_firmware_page(gfp_t mask); > +void snp_free_firmware_page(void *addr); > > /** > * sev_mark_pages_offline - insert non-reclaimed firmware/guest pages > @@ -959,6 +961,13 @@ static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_P > > void snp_mark_pages_offline(unsigned long pfn, unsigned int npages) {} > > +static inline void *snp_alloc_firmware_page(gfp_t mask) > +{ > + return NULL; > +} > + > +static inline void snp_free_firmware_page(void *addr) { } > + > #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ > > #endif /* __PSP_SEV_H__ */
>> +static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked) >> +{ >> + /* Cbit maybe set in the paddr */ > > This is confusing. > > I suppose C-bit is treated as a attribute of PTE in the kernel not part of the > PA. It means only a PTE might carry a C-bit. > snp_reclaim_pages() is also called for reclaiming guest memory, in which case the (guest) paddr will have the C-bit set. Hence this C-bit handling is done within snp_reclaim_pages() so that the callers don't need to handle it explicitly. > The paddr is from __pa(page_address()). It is not extracted from a PTE. Thus, the > return from them should never have a C-bit. > > BTW: Wouldn't it be better to have pfn as input param instead of paddr? > > The caller has struct page, calling snp_reclaim_pages(page_to_pfn(page), xxxxx) > would be much clearer than the current conversion: > page_address() (struct page is converted to VA), __pa() (VA is converted to PA) > in the caller and then PA is converted to pfn here. > >> + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; >> + int ret, err, i, n = 0; >> + > > should be unsigned int i, n; as the input param npage is unsigned int. > >> + if (!pfn_valid(pfn)) { >> + pr_err("%s: Invalid PFN %lx\n", __func__, pfn); >> + return 0; >> + } >> + >> + for (i = 0; i < npages; i++, pfn++, n++) { >> + paddr = pfn << PAGE_SHIFT; >> + >> + if (locked) >> + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); >> + else >> + ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); >> + >> + if (ret) >> + goto cleanup; >> + >> + ret = rmp_make_shared(pfn, PG_LEVEL_4K); >> + if (ret) >> + goto cleanup; >> + } >> + >> + return 0; >> + >> +cleanup: >> + /* >> + * If failed to reclaim the page then page is no longer safe to >> + * be release back to the system, leak it. >> + */ >> + snp_mark_pages_offline(pfn, npages - n); >> + return ret; >> +} >> + >> +static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked) > > The same comment as above. Better take pfn or page instead of paddr with > redundant conversions. > Again, the paddr can point to guest memory so it can have C-bit set. Thanks, Ashish >> +{ >> + /* Cbit maybe set in the paddr */ >> + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; >> + int rc, n = 0, i; >> + >> + for (i = 0; i < npages; i++, n++, pfn++) { >> + rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true); >> + if (rc) >> + goto cleanup; >> + } >> + >> + return 0; >> + >> +cleanup: >> + /* >> + * Try unrolling the firmware state changes by >> + * reclaiming the pages which were already changed to the >> + * firmware state. >> + */ >> + snp_reclaim_pages(paddr, n, locked); >> + >> + return rc; >> +} >> +
On Tue, 21 Feb 2023 09:31:01 -0600 "Kalra, Ashish" <ashish.kalra@amd.com> wrote: > >> +static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked) > >> +{ > >> + /* Cbit maybe set in the paddr */ > > > > This is confusing. > > > > I suppose C-bit is treated as a attribute of PTE in the kernel not part of the > > PA. It means only a PTE might carry a C-bit. > > > > snp_reclaim_pages() is also called for reclaiming guest memory, in which > case the (guest) paddr will have the C-bit set. Hence this C-bit > handling is done within snp_reclaim_pages() so that the callers don't > need to handle it explicitly. Thanks for the explanation. Do you mean it will be used like that in the later patch? Sorry if it is in the later patch as I was making progress slowly. It is quite a big patch set. At least, I don't see that kind of usage in the current patch. Feel free to correct me if I am wrong. The call chains: __snp_free_firmware_page() snp_reclaim_pages(); As __snp_free_firmware_page() takes struct page*, all the follwing coversion from it would not carry C-bit. __snp_alloc_firmware_pages() rmp_mark_pages_firmware() snp_reclaim_pages() As __snp_alloc_firmware_page() allocates page with struct page*, the same conclusion as above. > > > > The paddr is from __pa(page_address()). It is not extracted from a PTE. Thus, the > > return from them should never have a C-bit. > > > > BTW: Wouldn't it be better to have pfn as input param instead of paddr? > > > > The caller has struct page, calling snp_reclaim_pages(page_to_pfn(page), xxxxx) > > would be much clearer than the current conversion: > > page_address() (struct page is converted to VA), __pa() (VA is converted to PA) > > in the caller and then PA is converted to pfn here. > > > >> + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; > >> + int ret, err, i, n = 0; > >> + > > > > should be unsigned int i, n; as the input param npage is unsigned int. > > > >> + if (!pfn_valid(pfn)) { > >> + pr_err("%s: Invalid PFN %lx\n", __func__, pfn); > >> + return 0; > >> + } > >> + > >> + for (i = 0; i < npages; i++, pfn++, n++) { > >> + paddr = pfn << PAGE_SHIFT; > >> + > >> + if (locked) > >> + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); > >> + else > >> + ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); > >> + > >> + if (ret) > >> + goto cleanup; > >> + > >> + ret = rmp_make_shared(pfn, PG_LEVEL_4K); > >> + if (ret) > >> + goto cleanup; > >> + } > >> + > >> + return 0; > >> + > >> +cleanup: > >> + /* > >> + * If failed to reclaim the page then page is no longer safe to > >> + * be release back to the system, leak it. > >> + */ > >> + snp_mark_pages_offline(pfn, npages - n); > >> + return ret; > >> +} > >> + > >> +static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked) > > > > The same comment as above. Better take pfn or page instead of paddr with > > redundant conversions. > > > > Again, the paddr can point to guest memory so it can have C-bit set. > > Thanks, > Ashish > > >> +{ > >> + /* Cbit maybe set in the paddr */ > >> + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; > >> + int rc, n = 0, i; > >> + > >> + for (i = 0; i < npages; i++, n++, pfn++) { > >> + rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true); > >> + if (rc) > >> + goto cleanup; > >> + } > >> + > >> + return 0; > >> + > >> +cleanup: > >> + /* > >> + * Try unrolling the firmware state changes by > >> + * reclaiming the pages which were already changed to the > >> + * firmware state. > >> + */ > >> + snp_reclaim_pages(paddr, n, locked); > >> + > >> + return rc; > >> +} > >> +
On 2/21/2023 3:15 PM, Zhi Wang wrote: > On Tue, 21 Feb 2023 09:31:01 -0600 > "Kalra, Ashish" <ashish.kalra@amd.com> wrote: > >>>> +static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked) >>>> +{ >>>> + /* Cbit maybe set in the paddr */ >>> >>> This is confusing. >>> >>> I suppose C-bit is treated as a attribute of PTE in the kernel not part of the >>> PA. It means only a PTE might carry a C-bit. >>> >> >> snp_reclaim_pages() is also called for reclaiming guest memory, in which >> case the (guest) paddr will have the C-bit set. Hence this C-bit >> handling is done within snp_reclaim_pages() so that the callers don't >> need to handle it explicitly. > > Thanks for the explanation. > > Do you mean it will be used like that in the later patch? Sorry if it is in the > later patch as I was making progress slowly. It is quite a big patch set. > Yes, these are callers in later patches, like the following code path in patch 25: static int unmap_firmware_writeable(u64 *paddr, u32 len, bool guest, struct snp_host_map *map) { unsigned int npages = PAGE_ALIGN(len) >> PAGE_SHIFT; ... /* If paddr points to a guest memory then restore the page state to hypervisor. */ if (guest) { if (snp_reclaim_pages(*paddr, npages, true)) return -EFAULT; goto done; } ... ... Or, the following as part of patch 52: int snp_guest_dbg_decrypt_page(u64 gctx_pfn, u64 src_pfn, u64 dst_pfn, int *error) { ... data.gctx_paddr = sme_me_mask | (gctx_pfn << PAGE_SHIFT); data.src_addr = sme_me_mask | (src_pfn << PAGE_SHIFT); data.dst_addr = sme_me_mask | (dst_pfn << PAGE_SHIFT); /* The destination page must be in the firmware state. */ if (rmp_mark_pages_firmware(data.dst_addr, 1, false)) return -EIO; ret = sev_do_cmd(SEV_CMD_SNP_DBG_DECRYPT, &data, error); /* Restore the page state */ if (snp_reclaim_pages(data.dst_addr, 1, false)) ... ... Thanks, Ashish > At least, I don't see that kind of usage in the current patch. Feel free to > correct me if I am wrong. > > The call chains: > > __snp_free_firmware_page() > snp_reclaim_pages(); > > As __snp_free_firmware_page() takes struct page*, all the follwing coversion > from it would not carry C-bit. > > __snp_alloc_firmware_pages() > rmp_mark_pages_firmware() > snp_reclaim_pages() > > As __snp_alloc_firmware_page() allocates page with struct page*, the same > conclusion as above. > >> >> >>> The paddr is from __pa(page_address()). It is not extracted from a PTE. Thus, the >>> return from them should never have a C-bit. >>> >>> BTW: Wouldn't it be better to have pfn as input param instead of paddr? >>> >>> The caller has struct page, calling snp_reclaim_pages(page_to_pfn(page), xxxxx) >>> would be much clearer than the current conversion: >>> page_address() (struct page is converted to VA), __pa() (VA is converted to PA) >>> in the caller and then PA is converted to pfn here. >>> >>>> + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; >>>> + int ret, err, i, n = 0; >>>> + >>> >>> should be unsigned int i, n; as the input param npage is unsigned int. >>> >>>> + if (!pfn_valid(pfn)) { >>>> + pr_err("%s: Invalid PFN %lx\n", __func__, pfn); >>>> + return 0; >>>> + } >>>> + >>>> + for (i = 0; i < npages; i++, pfn++, n++) { >>>> + paddr = pfn << PAGE_SHIFT; >>>> + >>>> + if (locked) >>>> + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); >>>> + else >>>> + ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); >>>> + >>>> + if (ret) >>>> + goto cleanup; >>>> + >>>> + ret = rmp_make_shared(pfn, PG_LEVEL_4K); >>>> + if (ret) >>>> + goto cleanup; >>>> + } >>>> + >>>> + return 0; >>>> + >>>> +cleanup: >>>> + /* >>>> + * If failed to reclaim the page then page is no longer safe to >>>> + * be release back to the system, leak it. >>>> + */ >>>> + snp_mark_pages_offline(pfn, npages - n); >>>> + return ret; >>>> +} >>>> + >>>> +static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked) >>> >>> The same comment as above. Better take pfn or page instead of paddr with >>> redundant conversions. >>> >> >> Again, the paddr can point to guest memory so it can have C-bit set. >> >> Thanks, >> Ashish >> >>>> +{ >>>> + /* Cbit maybe set in the paddr */ >>>> + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; >>>> + int rc, n = 0, i; >>>> + >>>> + for (i = 0; i < npages; i++, n++, pfn++) { >>>> + rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true); >>>> + if (rc) >>>> + goto cleanup; >>>> + } >>>> + >>>> + return 0; >>>> + >>>> +cleanup: >>>> + /* >>>> + * Try unrolling the firmware state changes by >>>> + * reclaiming the pages which were already changed to the >>>> + * firmware state. >>>> + */ >>>> + snp_reclaim_pages(paddr, n, locked); >>>> + >>>> + return rc; >>>> +} >>>> + >
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index eca4e59b0f44..4c12e98a1219 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -94,6 +94,13 @@ static void *sev_init_ex_buffer; */ struct sev_data_range_list *snp_range_list; +/* When SEV-SNP is enabled the TMR needs to be 2MB aligned and 2MB size. */ +#define SEV_SNP_ES_TMR_SIZE (2 * 1024 * 1024) + +static size_t sev_es_tmr_size = SEV_ES_TMR_SIZE; + +static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret); + static inline bool sev_version_greater_or_equal(u8 maj, u8 min) { struct sev_device *sev = psp_master->sev_data; @@ -216,11 +223,134 @@ void snp_mark_pages_offline(unsigned long pfn, unsigned int npages) } EXPORT_SYMBOL_GPL(snp_mark_pages_offline); +static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked) +{ + /* Cbit maybe set in the paddr */ + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; + int ret, err, i, n = 0; + + if (!pfn_valid(pfn)) { + pr_err("%s: Invalid PFN %lx\n", __func__, pfn); + return 0; + } + + for (i = 0; i < npages; i++, pfn++, n++) { + paddr = pfn << PAGE_SHIFT; + + if (locked) + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); + else + ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &paddr, &err); + + if (ret) + goto cleanup; + + ret = rmp_make_shared(pfn, PG_LEVEL_4K); + if (ret) + goto cleanup; + } + + return 0; + +cleanup: + /* + * If failed to reclaim the page then page is no longer safe to + * be release back to the system, leak it. + */ + snp_mark_pages_offline(pfn, npages - n); + return ret; +} + +static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked) +{ + /* Cbit maybe set in the paddr */ + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; + int rc, n = 0, i; + + for (i = 0; i < npages; i++, n++, pfn++) { + rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true); + if (rc) + goto cleanup; + } + + return 0; + +cleanup: + /* + * Try unrolling the firmware state changes by + * reclaiming the pages which were already changed to the + * firmware state. + */ + snp_reclaim_pages(paddr, n, locked); + + return rc; +} + +static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order, bool locked) +{ + unsigned long npages = 1ul << order, paddr; + struct sev_device *sev; + struct page *page; + + if (!psp_master || !psp_master->sev_data) + return NULL; + + page = alloc_pages(gfp_mask, order); + if (!page) + return NULL; + + /* If SEV-SNP is initialized then add the page in RMP table. */ + sev = psp_master->sev_data; + if (!sev->snp_initialized) + return page; + + paddr = __pa((unsigned long)page_address(page)); + if (rmp_mark_pages_firmware(paddr, npages, locked)) + return NULL; + + return page; +} + +void *snp_alloc_firmware_page(gfp_t gfp_mask) +{ + struct page *page; + + page = __snp_alloc_firmware_pages(gfp_mask, 0, false); + + return page ? page_address(page) : NULL; +} +EXPORT_SYMBOL_GPL(snp_alloc_firmware_page); + +static void __snp_free_firmware_pages(struct page *page, int order, bool locked) +{ + struct sev_device *sev = psp_master->sev_data; + unsigned long paddr, npages = 1ul << order; + + if (!page) + return; + + paddr = __pa((unsigned long)page_address(page)); + if (sev->snp_initialized && + snp_reclaim_pages(paddr, npages, locked)) + return; + + __free_pages(page, order); +} + +void snp_free_firmware_page(void *addr) +{ + if (!addr) + return; + + __snp_free_firmware_pages(virt_to_page(addr), 0, false); +} +EXPORT_SYMBOL_GPL(snp_free_firmware_page); + static void *sev_fw_alloc(unsigned long len) { struct page *page; - page = alloc_pages(GFP_KERNEL, get_order(len)); + page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len), false); if (!page) return NULL; @@ -468,7 +598,7 @@ static int __sev_init_locked(int *error) data.tmr_address = __pa(sev_es_tmr); data.flags |= SEV_INIT_FLAGS_SEV_ES; - data.tmr_len = SEV_ES_TMR_SIZE; + data.tmr_len = sev_es_tmr_size; } return __sev_do_cmd_locked(SEV_CMD_INIT, &data, error); @@ -491,7 +621,7 @@ static int __sev_init_ex_locked(int *error) data.tmr_address = __pa(sev_es_tmr); data.flags |= SEV_INIT_FLAGS_SEV_ES; - data.tmr_len = SEV_ES_TMR_SIZE; + data.tmr_len = sev_es_tmr_size; } return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error); @@ -982,6 +1112,8 @@ static int __sev_snp_init_locked(int *error) sev->snp_initialized = true; dev_dbg(sev->dev, "SEV-SNP firmware initialized\n"); + sev_es_tmr_size = SEV_SNP_ES_TMR_SIZE; + return rc; } @@ -1499,8 +1631,9 @@ static void sev_firmware_shutdown(struct sev_device *sev) /* The TMR area was encrypted, flush it from the cache */ wbinvd_on_all_cpus(); - free_pages((unsigned long)sev_es_tmr, - get_order(SEV_ES_TMR_SIZE)); + __snp_free_firmware_pages(virt_to_page(sev_es_tmr), + get_order(sev_es_tmr_size), + false); sev_es_tmr = NULL; } @@ -1511,8 +1644,7 @@ static void sev_firmware_shutdown(struct sev_device *sev) } if (snp_range_list) { - free_pages((unsigned long)snp_range_list, - get_order(PAGE_SIZE)); + snp_free_firmware_page(snp_range_list); snp_range_list = NULL; } @@ -1593,7 +1725,7 @@ void sev_pci_init(void) } /* Obtain the TMR memory area for SEV-ES use */ - sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); + sev_es_tmr = sev_fw_alloc(sev_es_tmr_size); if (!sev_es_tmr) dev_warn(sev->dev, "SEV: TMR allocation failed, SEV-ES support unavailable\n"); diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index 8edf5c548fbf..d19744807471 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -922,6 +922,8 @@ int sev_guest_decommission(struct sev_data_decommission *data, int *error); int sev_do_cmd(int cmd, void *data, int *psp_ret); void *psp_copy_user_blob(u64 uaddr, u32 len); +void *snp_alloc_firmware_page(gfp_t mask); +void snp_free_firmware_page(void *addr); /** * sev_mark_pages_offline - insert non-reclaimed firmware/guest pages @@ -959,6 +961,13 @@ static inline void *psp_copy_user_blob(u64 __user uaddr, u32 len) { return ERR_P void snp_mark_pages_offline(unsigned long pfn, unsigned int npages) {} +static inline void *snp_alloc_firmware_page(gfp_t mask) +{ + return NULL; +} + +static inline void snp_free_firmware_page(void *addr) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */