Message ID | 20211201160257.1003912-2-ltykernel@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | x86/Hyper-V: Add Hyper-V Isolation VM support(Second part) | expand |
On 12/1/21 10:02 AM, Tianyu Lan wrote: > From: Tianyu Lan <Tianyu.Lan@microsoft.com> > > In Isolation VM with AMD SEV, bounce buffer needs to be accessed via > extra address space which is above shared_gpa_boundary (E.G 39 bit > address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access > physical address will be original physical address + shared_gpa_boundary. > The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of > memory(vTOM). Memory addresses below vTOM are automatically treated as > private while memory above vTOM is treated as shared. > > Expose swiotlb_unencrypted_base for platforms to set unencrypted > memory base offset and platform calls swiotlb_update_mem_attributes() > to remap swiotlb mem to unencrypted address space. memremap() can > not be called in the early stage and so put remapping code into > swiotlb_update_mem_attributes(). Store remap address and use it to copy > data from/to swiotlb bounce buffer. > > Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> This patch results in the following stack trace during a bare-metal boot on my EPYC system with SME active (e.g. mem_encrypt=on): [ 0.123932] BUG: Bad page state in process swapper pfn:108001 [ 0.123942] page:(____ptrval____) refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x108001 [ 0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) [ 0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 0000000000000000 [ 0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f 0000000000000000 [ 0.123955] page dumped because: nonzero mapcount [ 0.123957] Modules linked in: [ 0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted 5.16.0-rc3-sos-custom #2 [ 0.123964] Hardware name: AMD Corporation [ 0.123967] Call Trace: [ 0.123971] <TASK> [ 0.123975] dump_stack_lvl+0x48/0x5e [ 0.123985] bad_page.cold+0x65/0x96 [ 0.123990] __free_pages_ok+0x3a8/0x410 [ 0.123996] memblock_free_all+0x171/0x1dc [ 0.124005] mem_init+0x1f/0x14b [ 0.124011] start_kernel+0x3b5/0x6a1 [ 0.124016] secondary_startup_64_no_verify+0xb0/0xbb [ 0.124022] </TASK> I see ~40 of these traces, each for different pfns. Thanks, Tom > --- > Change since v2: > * Leave mem->vaddr with phys_to_virt(mem->start) when fail > to remap swiotlb memory. > > Change since v1: > * Rework comment in the swiotlb_init_io_tlb_mem() > * Make swiotlb_init_io_tlb_mem() back to return void. > --- > include/linux/swiotlb.h | 6 ++++++ > kernel/dma/swiotlb.c | 47 ++++++++++++++++++++++++++++++++++++----- > 2 files changed, 48 insertions(+), 5 deletions(-) > > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > index 569272871375..f6c3638255d5 100644 > --- a/include/linux/swiotlb.h > +++ b/include/linux/swiotlb.h > @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; > * @end: The end address of the swiotlb memory pool. Used to do a quick > * range check to see if the memory was in fact allocated by this > * API. > + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool > + * may be remapped in the memory encrypted case and store virtual > + * address for bounce buffer operation. > * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and > * @end. For default swiotlb, this is command line adjustable via > * setup_io_tlb_npages. > @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; > struct io_tlb_mem { > phys_addr_t start; > phys_addr_t end; > + void *vaddr; > unsigned long nslabs; > unsigned long used; > unsigned int index; > @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev) > } > #endif /* CONFIG_DMA_RESTRICTED_POOL */ > > +extern phys_addr_t swiotlb_unencrypted_base; > + > #endif /* __LINUX_SWIOTLB_H */ > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > index 8e840fbbed7c..adb9d06af5c8 100644 > --- a/kernel/dma/swiotlb.c > +++ b/kernel/dma/swiotlb.c > @@ -50,6 +50,7 @@ > #include <asm/io.h> > #include <asm/dma.h> > > +#include <linux/io.h> > #include <linux/init.h> > #include <linux/memblock.h> > #include <linux/iommu-helper.h> > @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force; > > struct io_tlb_mem io_tlb_default_mem; > > +phys_addr_t swiotlb_unencrypted_base; > + > /* > * Max segment that we can provide which (if pages are contingous) will > * not be bounced (unless SWIOTLB_FORCE is set). > @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val) > return DIV_ROUND_UP(val, IO_TLB_SIZE); > } > > +/* > + * Remap swioltb memory in the unencrypted physical address space > + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP > + * Isolation VMs). > + */ > +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) > +{ > + void *vaddr = NULL; > + > + if (swiotlb_unencrypted_base) { > + phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; > + > + vaddr = memremap(paddr, bytes, MEMREMAP_WB); > + if (!vaddr) > + pr_err("Failed to map the unencrypted memory %llx size %lx.\n", > + paddr, bytes); > + } > + > + return vaddr; > +} > + > /* > * Early SWIOTLB allocation may be too early to allow an architecture to > * perform the desired operations. This function allows the architecture to > @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void) > vaddr = phys_to_virt(mem->start); > bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); > set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); > - memset(vaddr, 0, bytes); > + > + mem->vaddr = swiotlb_mem_remap(mem, bytes); > + if (!mem->vaddr) > + mem->vaddr = vaddr; > + > + memset(mem->vaddr, 0, bytes); > } > > static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, > @@ -196,7 +225,18 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, > mem->slots[i].orig_addr = INVALID_PHYS_ADDR; > mem->slots[i].alloc_size = 0; > } > + > + /* > + * If swiotlb_unencrypted_base is set, the bounce buffer memory will > + * be remapped and cleared in swiotlb_update_mem_attributes. > + */ > + if (swiotlb_unencrypted_base) > + return; > + > + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); > memset(vaddr, 0, bytes); > + mem->vaddr = vaddr; > + return; > } > > int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) > @@ -318,7 +358,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) > if (!mem->slots) > return -ENOMEM; > > - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); > swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true); > > swiotlb_print_info(); > @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size > phys_addr_t orig_addr = mem->slots[index].orig_addr; > size_t alloc_size = mem->slots[index].alloc_size; > unsigned long pfn = PFN_DOWN(orig_addr); > - unsigned char *vaddr = phys_to_virt(tlb_addr); > + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; > unsigned int tlb_offset, orig_addr_offset; > > if (orig_addr == INVALID_PHYS_ADDR) > @@ -806,8 +845,6 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, > return -ENOMEM; > } > > - set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), > - rmem->size >> PAGE_SHIFT); > swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); > mem->force_bounce = true; > mem->for_alloc = true; >
On 12/2/2021 10:42 PM, Tom Lendacky wrote: > On 12/1/21 10:02 AM, Tianyu Lan wrote: >> From: Tianyu Lan <Tianyu.Lan@microsoft.com> >> >> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via >> extra address space which is above shared_gpa_boundary (E.G 39 bit >> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access >> physical address will be original physical address + shared_gpa_boundary. >> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of >> memory(vTOM). Memory addresses below vTOM are automatically treated as >> private while memory above vTOM is treated as shared. >> >> Expose swiotlb_unencrypted_base for platforms to set unencrypted >> memory base offset and platform calls swiotlb_update_mem_attributes() >> to remap swiotlb mem to unencrypted address space. memremap() can >> not be called in the early stage and so put remapping code into >> swiotlb_update_mem_attributes(). Store remap address and use it to copy >> data from/to swiotlb bounce buffer. >> >> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> > > This patch results in the following stack trace during a bare-metal boot > on my EPYC system with SME active (e.g. mem_encrypt=on): > > [ 0.123932] BUG: Bad page state in process swapper pfn:108001 > [ 0.123942] page:(____ptrval____) refcount:0 mapcount:-128 > mapping:0000000000000000 index:0x0 pfn:0x108001 > [ 0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) > [ 0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 > 0000000000000000 > [ 0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f > 0000000000000000 > [ 0.123955] page dumped because: nonzero mapcount > [ 0.123957] Modules linked in: > [ 0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted > 5.16.0-rc3-sos-custom #2 > [ 0.123964] Hardware name: AMD Corporation > [ 0.123967] Call Trace: > [ 0.123971] <TASK> > [ 0.123975] dump_stack_lvl+0x48/0x5e > [ 0.123985] bad_page.cold+0x65/0x96 > [ 0.123990] __free_pages_ok+0x3a8/0x410 > [ 0.123996] memblock_free_all+0x171/0x1dc > [ 0.124005] mem_init+0x1f/0x14b > [ 0.124011] start_kernel+0x3b5/0x6a1 > [ 0.124016] secondary_startup_64_no_verify+0xb0/0xbb > [ 0.124022] </TASK> > > I see ~40 of these traces, each for different pfns. > > Thanks, > Tom Hi Tom: Thanks for your test. Could you help to test the following patch and check whether it can fix the issue. diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 569272871375..f6c3638255d5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; * @end: The end address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool + * may be remapped in the memory encrypted case and store virtual + * address for bounce buffer operation. * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; struct io_tlb_mem { phys_addr_t start; phys_addr_t end; + void *vaddr; unsigned long nslabs; unsigned long used; unsigned int index; @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev) } #endif /* CONFIG_DMA_RESTRICTED_POOL */ +extern phys_addr_t swiotlb_unencrypted_base; + #endif /* __LINUX_SWIOTLB_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 8e840fbbed7c..34e6ade4f73c 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -50,6 +50,7 @@ #include <asm/io.h> #include <asm/dma.h> +#include <linux/io.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/iommu-helper.h> @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force; struct io_tlb_mem io_tlb_default_mem; +phys_addr_t swiotlb_unencrypted_base; + /* * Max segment that we can provide which (if pages are contingous) will * not be bounced (unless SWIOTLB_FORCE is set). @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val) return DIV_ROUND_UP(val, IO_TLB_SIZE); } +/* + * Remap swioltb memory in the unencrypted physical address space + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP + * Isolation VMs). + */ +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) +{ + void *vaddr = NULL; + + if (swiotlb_unencrypted_base) { + phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; + + vaddr = memremap(paddr, bytes, MEMREMAP_WB); + if (!vaddr) + pr_err("Failed to map the unencrypted memory %llx size %lx.\n", + paddr, bytes); + } + + return vaddr; +} + /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void) vaddr = phys_to_virt(mem->start); bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); + + mem->vaddr = swiotlb_mem_remap(mem, bytes); + if (!mem->vaddr) + mem->vaddr = vaddr; + + memset(mem->vaddr, 0, bytes); } static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, @@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } + + /* + * If swiotlb_unencrypted_base is set, the bounce buffer memory will + * be remapped and cleared in swiotlb_update_mem_attributes. + */ + if (swiotlb_unencrypted_base) + return; + memset(vaddr, 0, bytes); + mem->vaddr = vaddr; + return; } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; unsigned int tlb_offset, orig_addr_offset; if (orig_addr == INVALID_PHYS_ADDR) Thanks.
On 12/3/21 5:20 AM, Tianyu Lan wrote: > On 12/2/2021 10:42 PM, Tom Lendacky wrote: >> On 12/1/21 10:02 AM, Tianyu Lan wrote: >>> From: Tianyu Lan <Tianyu.Lan@microsoft.com> >>> >>> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via >>> extra address space which is above shared_gpa_boundary (E.G 39 bit >>> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access >>> physical address will be original physical address + shared_gpa_boundary. >>> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of >>> memory(vTOM). Memory addresses below vTOM are automatically treated as >>> private while memory above vTOM is treated as shared. >>> >>> Expose swiotlb_unencrypted_base for platforms to set unencrypted >>> memory base offset and platform calls swiotlb_update_mem_attributes() >>> to remap swiotlb mem to unencrypted address space. memremap() can >>> not be called in the early stage and so put remapping code into >>> swiotlb_update_mem_attributes(). Store remap address and use it to copy >>> data from/to swiotlb bounce buffer. >>> >>> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> >> >> This patch results in the following stack trace during a bare-metal boot >> on my EPYC system with SME active (e.g. mem_encrypt=on): >> >> [ 0.123932] BUG: Bad page state in process swapper pfn:108001 >> [ 0.123942] page:(____ptrval____) refcount:0 mapcount:-128 >> mapping:0000000000000000 index:0x0 pfn:0x108001 >> [ 0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) >> [ 0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 >> 0000000000000000 >> [ 0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f >> 0000000000000000 >> [ 0.123955] page dumped because: nonzero mapcount >> [ 0.123957] Modules linked in: >> [ 0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted >> 5.16.0-rc3-sos-custom #2 >> [ 0.123964] Hardware name: AMD Corporation >> [ 0.123967] Call Trace: >> [ 0.123971] <TASK> >> [ 0.123975] dump_stack_lvl+0x48/0x5e >> [ 0.123985] bad_page.cold+0x65/0x96 >> [ 0.123990] __free_pages_ok+0x3a8/0x410 >> [ 0.123996] memblock_free_all+0x171/0x1dc >> [ 0.124005] mem_init+0x1f/0x14b >> [ 0.124011] start_kernel+0x3b5/0x6a1 >> [ 0.124016] secondary_startup_64_no_verify+0xb0/0xbb >> [ 0.124022] </TASK> >> >> I see ~40 of these traces, each for different pfns. >> >> Thanks, >> Tom > > Hi Tom: > Thanks for your test. Could you help to test the following patch > and check whether it can fix the issue. The patch is mangled. Is the only difference where set_memory_decrypted() is called? Thanks, Tom > > > diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h > index 569272871375..f6c3638255d5 100644 > --- a/include/linux/swiotlb.h > +++ b/include/linux/swiotlb.h > @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; > * @end: The end address of the swiotlb memory pool. Used to do a > quick > * range check to see if the memory was in fact allocated by > this > * API. > + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool > + * may be remapped in the memory encrypted case and store > virtual > + * address for bounce buffer operation. > * @nslabs: The number of IO TLB blocks (in groups of 64) between > @start and > * @end. For default swiotlb, this is command line > adjustable via > * setup_io_tlb_npages. > @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; > struct io_tlb_mem { > phys_addr_t start; > phys_addr_t end; > + void *vaddr; > unsigned long nslabs; > unsigned long used; > unsigned int index; > @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device > *dev) > } > #endif /* CONFIG_DMA_RESTRICTED_POOL */ > > +extern phys_addr_t swiotlb_unencrypted_base; > + > #endif /* __LINUX_SWIOTLB_H */ > diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c > index 8e840fbbed7c..34e6ade4f73c 100644 > --- a/kernel/dma/swiotlb.c > +++ b/kernel/dma/swiotlb.c > @@ -50,6 +50,7 @@ > #include <asm/io.h> > #include <asm/dma.h> > > +#include <linux/io.h> > #include <linux/init.h> > #include <linux/memblock.h> > #include <linux/iommu-helper.h> > @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force; > > struct io_tlb_mem io_tlb_default_mem; > > +phys_addr_t swiotlb_unencrypted_base; > + > /* > * Max segment that we can provide which (if pages are contingous) will > * not be bounced (unless SWIOTLB_FORCE is set). > @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val) > return DIV_ROUND_UP(val, IO_TLB_SIZE); > } > > +/* > + * Remap swioltb memory in the unencrypted physical address space > + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP > + * Isolation VMs). > + */ > +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) > +{ > + void *vaddr = NULL; > + > + if (swiotlb_unencrypted_base) { > + phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; > + > + vaddr = memremap(paddr, bytes, MEMREMAP_WB); > + if (!vaddr) > + pr_err("Failed to map the unencrypted memory %llx > size %lx.\n", > + paddr, bytes); > + } > + > + return vaddr; > +} > + > /* > * Early SWIOTLB allocation may be too early to allow an architecture to > * perform the desired operations. This function allows the > architecture to > @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void) > vaddr = phys_to_virt(mem->start); > bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); > set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); > - memset(vaddr, 0, bytes); > + > + mem->vaddr = swiotlb_mem_remap(mem, bytes); > + if (!mem->vaddr) > + mem->vaddr = vaddr; > + > + memset(mem->vaddr, 0, bytes); > } > > static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t > start, > @@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem > *mem, phys_addr_t start, > mem->slots[i].orig_addr = INVALID_PHYS_ADDR; > mem->slots[i].alloc_size = 0; > } > + > + /* > + * If swiotlb_unencrypted_base is set, the bounce buffer memory will > + * be remapped and cleared in swiotlb_update_mem_attributes. > + */ > + if (swiotlb_unencrypted_base) > + return; > + > memset(vaddr, 0, bytes); > + mem->vaddr = vaddr; > + return; > } > > int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int > verbose) > @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, > phys_addr_t tlb_addr, size_t size > phys_addr_t orig_addr = mem->slots[index].orig_addr; > size_t alloc_size = mem->slots[index].alloc_size; > unsigned long pfn = PFN_DOWN(orig_addr); > - unsigned char *vaddr = phys_to_virt(tlb_addr); > + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; > unsigned int tlb_offset, orig_addr_offset; > > if (orig_addr == INVALID_PHYS_ADDR) > > > Thanks. >
On 12/3/21 1:11 PM, Tom Lendacky wrote: > On 12/3/21 5:20 AM, Tianyu Lan wrote: >> On 12/2/2021 10:42 PM, Tom Lendacky wrote: >>> On 12/1/21 10:02 AM, Tianyu Lan wrote: >>>> From: Tianyu Lan <Tianyu.Lan@microsoft.com> >>>> >>>> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via >>>> extra address space which is above shared_gpa_boundary (E.G 39 bit >>>> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access >>>> physical address will be original physical address + shared_gpa_boundary. >>>> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of >>>> memory(vTOM). Memory addresses below vTOM are automatically treated as >>>> private while memory above vTOM is treated as shared. >>>> >>>> Expose swiotlb_unencrypted_base for platforms to set unencrypted >>>> memory base offset and platform calls swiotlb_update_mem_attributes() >>>> to remap swiotlb mem to unencrypted address space. memremap() can >>>> not be called in the early stage and so put remapping code into >>>> swiotlb_update_mem_attributes(). Store remap address and use it to copy >>>> data from/to swiotlb bounce buffer. >>>> >>>> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com> >>> >>> This patch results in the following stack trace during a bare-metal boot >>> on my EPYC system with SME active (e.g. mem_encrypt=on): >>> >>> [ 0.123932] BUG: Bad page state in process swapper pfn:108001 >>> [ 0.123942] page:(____ptrval____) refcount:0 mapcount:-128 >>> mapping:0000000000000000 index:0x0 pfn:0x108001 >>> [ 0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff) >>> [ 0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 >>> 0000000000000000 >>> [ 0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f >>> 0000000000000000 >>> [ 0.123955] page dumped because: nonzero mapcount >>> [ 0.123957] Modules linked in: >>> [ 0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted >>> 5.16.0-rc3-sos-custom #2 >>> [ 0.123964] Hardware name: AMD Corporation >>> [ 0.123967] Call Trace: >>> [ 0.123971] <TASK> >>> [ 0.123975] dump_stack_lvl+0x48/0x5e >>> [ 0.123985] bad_page.cold+0x65/0x96 >>> [ 0.123990] __free_pages_ok+0x3a8/0x410 >>> [ 0.123996] memblock_free_all+0x171/0x1dc >>> [ 0.124005] mem_init+0x1f/0x14b >>> [ 0.124011] start_kernel+0x3b5/0x6a1 >>> [ 0.124016] secondary_startup_64_no_verify+0xb0/0xbb >>> [ 0.124022] </TASK> >>> >>> I see ~40 of these traces, each for different pfns. >>> >>> Thanks, >>> Tom >> >> Hi Tom: >> Thanks for your test. Could you help to test the following patch >> and check whether it can fix the issue. > > The patch is mangled. Is the only difference where set_memory_decrypted() > is called? I de-mangled the patch. No more stack traces with SME active. Thanks, Tom > > Thanks, > Tom > >> >> >> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h >> index 569272871375..f6c3638255d5 100644 >> --- a/include/linux/swiotlb.h >> +++ b/include/linux/swiotlb.h >> @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; >> * @end: The end address of the swiotlb memory pool. Used to do >> a quick >> * range check to see if the memory was in fact allocated >> by this >> * API. >> + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory >> pool >> + * may be remapped in the memory encrypted case and store >> virtual >> + * address for bounce buffer operation. >> * @nslabs: The number of IO TLB blocks (in groups of 64) between >> @start and >> * @end. For default swiotlb, this is command line >> adjustable via >> * setup_io_tlb_npages. >> @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; >> struct io_tlb_mem { >> phys_addr_t start; >> phys_addr_t end; >> + void *vaddr; >> unsigned long nslabs; >> unsigned long used; >> unsigned int index; >> @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct >> device *dev) >> } >> #endif /* CONFIG_DMA_RESTRICTED_POOL */ >> >> +extern phys_addr_t swiotlb_unencrypted_base; >> + >> #endif /* __LINUX_SWIOTLB_H */ >> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c >> index 8e840fbbed7c..34e6ade4f73c 100644 >> --- a/kernel/dma/swiotlb.c >> +++ b/kernel/dma/swiotlb.c >> @@ -50,6 +50,7 @@ >> #include <asm/io.h> >> #include <asm/dma.h> >> >> +#include <linux/io.h> >> #include <linux/init.h> >> #include <linux/memblock.h> >> #include <linux/iommu-helper.h> >> @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force; >> >> struct io_tlb_mem io_tlb_default_mem; >> >> +phys_addr_t swiotlb_unencrypted_base; >> + >> /* >> * Max segment that we can provide which (if pages are contingous) will >> * not be bounced (unless SWIOTLB_FORCE is set). >> @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val) >> return DIV_ROUND_UP(val, IO_TLB_SIZE); >> } >> >> +/* >> + * Remap swioltb memory in the unencrypted physical address space >> + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP >> + * Isolation VMs). >> + */ >> +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) >> +{ >> + void *vaddr = NULL; >> + >> + if (swiotlb_unencrypted_base) { >> + phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; >> + >> + vaddr = memremap(paddr, bytes, MEMREMAP_WB); >> + if (!vaddr) >> + pr_err("Failed to map the unencrypted memory >> %llx size %lx.\n", >> + paddr, bytes); >> + } >> + >> + return vaddr; >> +} >> + >> /* >> * Early SWIOTLB allocation may be too early to allow an architecture to >> * perform the desired operations. This function allows the >> architecture to >> @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void) >> vaddr = phys_to_virt(mem->start); >> bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); >> set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); >> - memset(vaddr, 0, bytes); >> + >> + mem->vaddr = swiotlb_mem_remap(mem, bytes); >> + if (!mem->vaddr) >> + mem->vaddr = vaddr; >> + >> + memset(mem->vaddr, 0, bytes); >> } >> >> static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, >> phys_addr_t start, >> @@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct >> io_tlb_mem *mem, phys_addr_t start, >> mem->slots[i].orig_addr = INVALID_PHYS_ADDR; >> mem->slots[i].alloc_size = 0; >> } >> + >> + /* >> + * If swiotlb_unencrypted_base is set, the bounce buffer memory >> will >> + * be remapped and cleared in swiotlb_update_mem_attributes. >> + */ >> + if (swiotlb_unencrypted_base) >> + return; >> + >> memset(vaddr, 0, bytes); >> + mem->vaddr = vaddr; >> + return; >> } >> >> int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int >> verbose) >> @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, >> phys_addr_t tlb_addr, size_t size >> phys_addr_t orig_addr = mem->slots[index].orig_addr; >> size_t alloc_size = mem->slots[index].alloc_size; >> unsigned long pfn = PFN_DOWN(orig_addr); >> - unsigned char *vaddr = phys_to_virt(tlb_addr); >> + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; >> unsigned int tlb_offset, orig_addr_offset; >> >> if (orig_addr == INVALID_PHYS_ADDR) >> >> >> Thanks. >>
On 12/4/2021 4:06 AM, Tom Lendacky wrote: >>> Hi Tom: >>> Thanks for your test. Could you help to test the following >>> patch and check whether it can fix the issue. >> >> The patch is mangled. Is the only difference where >> set_memory_decrypted() is called? > > I de-mangled the patch. No more stack traces with SME active. > > Thanks, > Tom Hi Tom: Thanks a lot for your rework and test. I will update in the next version. Thanks.
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 569272871375..f6c3638255d5 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force; * @end: The end address of the swiotlb memory pool. Used to do a quick * range check to see if the memory was in fact allocated by this * API. + * @vaddr: The vaddr of the swiotlb memory pool. The swiotlb memory pool + * may be remapped in the memory encrypted case and store virtual + * address for bounce buffer operation. * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and * @end. For default swiotlb, this is command line adjustable via * setup_io_tlb_npages. @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force; struct io_tlb_mem { phys_addr_t start; phys_addr_t end; + void *vaddr; unsigned long nslabs; unsigned long used; unsigned int index; @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev) } #endif /* CONFIG_DMA_RESTRICTED_POOL */ +extern phys_addr_t swiotlb_unencrypted_base; + #endif /* __LINUX_SWIOTLB_H */ diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 8e840fbbed7c..adb9d06af5c8 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -50,6 +50,7 @@ #include <asm/io.h> #include <asm/dma.h> +#include <linux/io.h> #include <linux/init.h> #include <linux/memblock.h> #include <linux/iommu-helper.h> @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force; struct io_tlb_mem io_tlb_default_mem; +phys_addr_t swiotlb_unencrypted_base; + /* * Max segment that we can provide which (if pages are contingous) will * not be bounced (unless SWIOTLB_FORCE is set). @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val) return DIV_ROUND_UP(val, IO_TLB_SIZE); } +/* + * Remap swioltb memory in the unencrypted physical address space + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP + * Isolation VMs). + */ +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes) +{ + void *vaddr = NULL; + + if (swiotlb_unencrypted_base) { + phys_addr_t paddr = mem->start + swiotlb_unencrypted_base; + + vaddr = memremap(paddr, bytes, MEMREMAP_WB); + if (!vaddr) + pr_err("Failed to map the unencrypted memory %llx size %lx.\n", + paddr, bytes); + } + + return vaddr; +} + /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void) vaddr = phys_to_virt(mem->start); bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); + + mem->vaddr = swiotlb_mem_remap(mem, bytes); + if (!mem->vaddr) + mem->vaddr = vaddr; + + memset(mem->vaddr, 0, bytes); } static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, @@ -196,7 +225,18 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; } + + /* + * If swiotlb_unencrypted_base is set, the bounce buffer memory will + * be remapped and cleared in swiotlb_update_mem_attributes. + */ + if (swiotlb_unencrypted_base) + return; + + set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); + mem->vaddr = vaddr; + return; } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) @@ -318,7 +358,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) if (!mem->slots) return -ENOMEM; - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true); swiotlb_print_info(); @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size phys_addr_t orig_addr = mem->slots[index].orig_addr; size_t alloc_size = mem->slots[index].alloc_size; unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); + unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start; unsigned int tlb_offset, orig_addr_offset; if (orig_addr == INVALID_PHYS_ADDR) @@ -806,8 +845,6 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem, return -ENOMEM; } - set_memory_decrypted((unsigned long)phys_to_virt(rmem->base), - rmem->size >> PAGE_SHIFT); swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); mem->force_bounce = true; mem->for_alloc = true;