diff mbox series

[V3,1/5] Swiotlb: Add Swiotlb bounce buffer remap function for HV IVM

Message ID 20211201160257.1003912-2-ltykernel@gmail.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series x86/Hyper-V: Add Hyper-V Isolation VM support(Second part) | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Tianyu Lan Dec. 1, 2021, 4:02 p.m. UTC
From: Tianyu Lan <Tianyu.Lan@microsoft.com>

In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
extra address space which is above shared_gpa_boundary (E.G 39 bit
address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
physical address will be original physical address + shared_gpa_boundary.
The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
memory(vTOM). Memory addresses below vTOM are automatically treated as
private while memory above vTOM is treated as shared.

Expose swiotlb_unencrypted_base for platforms to set unencrypted
memory base offset and platform calls swiotlb_update_mem_attributes()
to remap swiotlb mem to unencrypted address space. memremap() can
not be called in the early stage and so put remapping code into
swiotlb_update_mem_attributes(). Store remap address and use it to copy
data from/to swiotlb bounce buffer.

Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
Change since v2:
	* Leave mem->vaddr with phys_to_virt(mem->start) when fail
	  to remap swiotlb memory.

Change since v1:
	* Rework comment in the swiotlb_init_io_tlb_mem()
	* Make swiotlb_init_io_tlb_mem() back to return void.
---
 include/linux/swiotlb.h |  6 ++++++
 kernel/dma/swiotlb.c    | 47 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 48 insertions(+), 5 deletions(-)

Comments

Tom Lendacky Dec. 2, 2021, 2:42 p.m. UTC | #1
On 12/1/21 10:02 AM, Tianyu Lan wrote:
> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
> extra address space which is above shared_gpa_boundary (E.G 39 bit
> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
> physical address will be original physical address + shared_gpa_boundary.
> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
> memory(vTOM). Memory addresses below vTOM are automatically treated as
> private while memory above vTOM is treated as shared.
> 
> Expose swiotlb_unencrypted_base for platforms to set unencrypted
> memory base offset and platform calls swiotlb_update_mem_attributes()
> to remap swiotlb mem to unencrypted address space. memremap() can
> not be called in the early stage and so put remapping code into
> swiotlb_update_mem_attributes(). Store remap address and use it to copy
> data from/to swiotlb bounce buffer.
> 
> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>

This patch results in the following stack trace during a bare-metal boot
on my EPYC system with SME active (e.g. mem_encrypt=on):

[    0.123932] BUG: Bad page state in process swapper  pfn:108001
[    0.123942] page:(____ptrval____) refcount:0 mapcount:-128 mapping:0000000000000000 index:0x0 pfn:0x108001
[    0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
[    0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 0000000000000000
[    0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f 0000000000000000
[    0.123955] page dumped because: nonzero mapcount
[    0.123957] Modules linked in:
[    0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted 5.16.0-rc3-sos-custom #2
[    0.123964] Hardware name: AMD Corporation
[    0.123967] Call Trace:
[    0.123971]  <TASK>
[    0.123975]  dump_stack_lvl+0x48/0x5e
[    0.123985]  bad_page.cold+0x65/0x96
[    0.123990]  __free_pages_ok+0x3a8/0x410
[    0.123996]  memblock_free_all+0x171/0x1dc
[    0.124005]  mem_init+0x1f/0x14b
[    0.124011]  start_kernel+0x3b5/0x6a1
[    0.124016]  secondary_startup_64_no_verify+0xb0/0xbb
[    0.124022]  </TASK>

I see ~40 of these traces, each for different pfns.

Thanks,
Tom

> ---
> Change since v2:
> 	* Leave mem->vaddr with phys_to_virt(mem->start) when fail
> 	  to remap swiotlb memory.
> 
> Change since v1:
> 	* Rework comment in the swiotlb_init_io_tlb_mem()
> 	* Make swiotlb_init_io_tlb_mem() back to return void.
> ---
>   include/linux/swiotlb.h |  6 ++++++
>   kernel/dma/swiotlb.c    | 47 ++++++++++++++++++++++++++++++++++++-----
>   2 files changed, 48 insertions(+), 5 deletions(-)
> 
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 569272871375..f6c3638255d5 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
>    * @end:	The end address of the swiotlb memory pool. Used to do a quick
>    *		range check to see if the memory was in fact allocated by this
>    *		API.
> + * @vaddr:	The vaddr of the swiotlb memory pool. The swiotlb memory pool
> + *		may be remapped in the memory encrypted case and store virtual
> + *		address for bounce buffer operation.
>    * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
>    *		@end. For default swiotlb, this is command line adjustable via
>    *		setup_io_tlb_npages.
> @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
>   struct io_tlb_mem {
>   	phys_addr_t start;
>   	phys_addr_t end;
> +	void *vaddr;
>   	unsigned long nslabs;
>   	unsigned long used;
>   	unsigned int index;
> @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device *dev)
>   }
>   #endif /* CONFIG_DMA_RESTRICTED_POOL */
>   
> +extern phys_addr_t swiotlb_unencrypted_base;
> +
>   #endif /* __LINUX_SWIOTLB_H */
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 8e840fbbed7c..adb9d06af5c8 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -50,6 +50,7 @@
>   #include <asm/io.h>
>   #include <asm/dma.h>
>   
> +#include <linux/io.h>
>   #include <linux/init.h>
>   #include <linux/memblock.h>
>   #include <linux/iommu-helper.h>
> @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force;
>   
>   struct io_tlb_mem io_tlb_default_mem;
>   
> +phys_addr_t swiotlb_unencrypted_base;
> +
>   /*
>    * Max segment that we can provide which (if pages are contingous) will
>    * not be bounced (unless SWIOTLB_FORCE is set).
> @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val)
>   	return DIV_ROUND_UP(val, IO_TLB_SIZE);
>   }
>   
> +/*
> + * Remap swioltb memory in the unencrypted physical address space
> + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
> + * Isolation VMs).
> + */
> +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
> +{
> +	void *vaddr = NULL;
> +
> +	if (swiotlb_unencrypted_base) {
> +		phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
> +
> +		vaddr = memremap(paddr, bytes, MEMREMAP_WB);
> +		if (!vaddr)
> +			pr_err("Failed to map the unencrypted memory %llx size %lx.\n",
> +			       paddr, bytes);
> +	}
> +
> +	return vaddr;
> +}
> +
>   /*
>    * Early SWIOTLB allocation may be too early to allow an architecture to
>    * perform the desired operations.  This function allows the architecture to
> @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void)
>   	vaddr = phys_to_virt(mem->start);
>   	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
>   	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> -	memset(vaddr, 0, bytes);
> +
> +	mem->vaddr = swiotlb_mem_remap(mem, bytes);
> +	if (!mem->vaddr)
> +		mem->vaddr = vaddr;
> +
> +	memset(mem->vaddr, 0, bytes);
>   }
>   
>   static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
> @@ -196,7 +225,18 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
>   		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
>   		mem->slots[i].alloc_size = 0;
>   	}
> +
> +	/*
> +	 * If swiotlb_unencrypted_base is set, the bounce buffer memory will
> +	 * be remapped and cleared in swiotlb_update_mem_attributes.
> +	 */
> +	if (swiotlb_unencrypted_base)
> +		return;
> +
> +	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
>   	memset(vaddr, 0, bytes);
> +	mem->vaddr = vaddr;
> +	return;
>   }
>   
>   int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
> @@ -318,7 +358,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
>   	if (!mem->slots)
>   		return -ENOMEM;
>   
> -	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
>   	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
>   
>   	swiotlb_print_info();
> @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
>   	phys_addr_t orig_addr = mem->slots[index].orig_addr;
>   	size_t alloc_size = mem->slots[index].alloc_size;
>   	unsigned long pfn = PFN_DOWN(orig_addr);
> -	unsigned char *vaddr = phys_to_virt(tlb_addr);
> +	unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
>   	unsigned int tlb_offset, orig_addr_offset;
>   
>   	if (orig_addr == INVALID_PHYS_ADDR)
> @@ -806,8 +845,6 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
>   			return -ENOMEM;
>   		}
>   
> -		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
> -				     rmem->size >> PAGE_SHIFT);
>   		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
>   		mem->force_bounce = true;
>   		mem->for_alloc = true;
>
Tianyu Lan Dec. 3, 2021, 11:20 a.m. UTC | #2
On 12/2/2021 10:42 PM, Tom Lendacky wrote:
> On 12/1/21 10:02 AM, Tianyu Lan wrote:
>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>
>> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
>> extra address space which is above shared_gpa_boundary (E.G 39 bit
>> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
>> physical address will be original physical address + shared_gpa_boundary.
>> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
>> memory(vTOM). Memory addresses below vTOM are automatically treated as
>> private while memory above vTOM is treated as shared.
>>
>> Expose swiotlb_unencrypted_base for platforms to set unencrypted
>> memory base offset and platform calls swiotlb_update_mem_attributes()
>> to remap swiotlb mem to unencrypted address space. memremap() can
>> not be called in the early stage and so put remapping code into
>> swiotlb_update_mem_attributes(). Store remap address and use it to copy
>> data from/to swiotlb bounce buffer.
>>
>> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
> 
> This patch results in the following stack trace during a bare-metal boot
> on my EPYC system with SME active (e.g. mem_encrypt=on):
> 
> [    0.123932] BUG: Bad page state in process swapper  pfn:108001
> [    0.123942] page:(____ptrval____) refcount:0 mapcount:-128 
> mapping:0000000000000000 index:0x0 pfn:0x108001
> [    0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
> [    0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 
> 0000000000000000
> [    0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f 
> 0000000000000000
> [    0.123955] page dumped because: nonzero mapcount
> [    0.123957] Modules linked in:
> [    0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted 
> 5.16.0-rc3-sos-custom #2
> [    0.123964] Hardware name: AMD Corporation
> [    0.123967] Call Trace:
> [    0.123971]  <TASK>
> [    0.123975]  dump_stack_lvl+0x48/0x5e
> [    0.123985]  bad_page.cold+0x65/0x96
> [    0.123990]  __free_pages_ok+0x3a8/0x410
> [    0.123996]  memblock_free_all+0x171/0x1dc
> [    0.124005]  mem_init+0x1f/0x14b
> [    0.124011]  start_kernel+0x3b5/0x6a1
> [    0.124016]  secondary_startup_64_no_verify+0xb0/0xbb
> [    0.124022]  </TASK>
> 
> I see ~40 of these traces, each for different pfns.
> 
> Thanks,
> Tom

Hi Tom:
       Thanks for your test. Could you help to test the following patch 
and check whether it can fix the issue.


diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 569272871375..f6c3638255d5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
   * @end:       The end address of the swiotlb memory pool. Used to do 
a quick
   *             range check to see if the memory was in fact allocated 
by this
   *             API.
+ * @vaddr:     The vaddr of the swiotlb memory pool. The swiotlb memory 
pool
+ *             may be remapped in the memory encrypted case and store 
virtual
+ *             address for bounce buffer operation.
   * @nslabs:    The number of IO TLB blocks (in groups of 64) between 
@start and
   *             @end. For default swiotlb, this is command line 
adjustable via
   *             setup_io_tlb_npages.
@@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
  struct io_tlb_mem {
         phys_addr_t start;
         phys_addr_t end;
+       void *vaddr;
         unsigned long nslabs;
         unsigned long used;
         unsigned int index;
@@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct 
device *dev)
  }
  #endif /* CONFIG_DMA_RESTRICTED_POOL */

+extern phys_addr_t swiotlb_unencrypted_base;
+
  #endif /* __LINUX_SWIOTLB_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8e840fbbed7c..34e6ade4f73c 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,6 +50,7 @@
  #include <asm/io.h>
  #include <asm/dma.h>

+#include <linux/io.h>
  #include <linux/init.h>
  #include <linux/memblock.h>
  #include <linux/iommu-helper.h>
@@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force;

  struct io_tlb_mem io_tlb_default_mem;

+phys_addr_t swiotlb_unencrypted_base;
+
  /*
   * Max segment that we can provide which (if pages are contingous) will
   * not be bounced (unless SWIOTLB_FORCE is set).
@@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val)
         return DIV_ROUND_UP(val, IO_TLB_SIZE);
  }

+/*
+ * Remap swioltb memory in the unencrypted physical address space
+ * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
+ * Isolation VMs).
+ */
+void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+       void *vaddr = NULL;
+
+       if (swiotlb_unencrypted_base) {
+               phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
+
+               vaddr = memremap(paddr, bytes, MEMREMAP_WB);
+               if (!vaddr)
+                       pr_err("Failed to map the unencrypted memory 
%llx size %lx.\n",
+                              paddr, bytes);
+       }
+
+       return vaddr;
+}
+
  /*
   * Early SWIOTLB allocation may be too early to allow an architecture to
   * perform the desired operations.  This function allows the 
architecture to
@@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void)
         vaddr = phys_to_virt(mem->start);
         bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
         set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-       memset(vaddr, 0, bytes);
+
+       mem->vaddr = swiotlb_mem_remap(mem, bytes);
+       if (!mem->vaddr)
+               mem->vaddr = vaddr;
+
+       memset(mem->vaddr, 0, bytes);
  }

  static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, 
phys_addr_t start,
@@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct 
io_tlb_mem *mem, phys_addr_t start,
                 mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
                 mem->slots[i].alloc_size = 0;
         }
+
+       /*
+        * If swiotlb_unencrypted_base is set, the bounce buffer memory will
+        * be remapped and cleared in swiotlb_update_mem_attributes.
+        */
+       if (swiotlb_unencrypted_base)
+               return;
+
         memset(vaddr, 0, bytes);
+       mem->vaddr = vaddr;
+       return;
  }

  int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int 
verbose)
@@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, 
phys_addr_t tlb_addr, size_t size
         phys_addr_t orig_addr = mem->slots[index].orig_addr;
         size_t alloc_size = mem->slots[index].alloc_size;
         unsigned long pfn = PFN_DOWN(orig_addr);
-       unsigned char *vaddr = phys_to_virt(tlb_addr);
+       unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
         unsigned int tlb_offset, orig_addr_offset;

         if (orig_addr == INVALID_PHYS_ADDR)


Thanks.
Tom Lendacky Dec. 3, 2021, 7:11 p.m. UTC | #3
On 12/3/21 5:20 AM, Tianyu Lan wrote:
> On 12/2/2021 10:42 PM, Tom Lendacky wrote:
>> On 12/1/21 10:02 AM, Tianyu Lan wrote:
>>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>>
>>> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
>>> extra address space which is above shared_gpa_boundary (E.G 39 bit
>>> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
>>> physical address will be original physical address + shared_gpa_boundary.
>>> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
>>> memory(vTOM). Memory addresses below vTOM are automatically treated as
>>> private while memory above vTOM is treated as shared.
>>>
>>> Expose swiotlb_unencrypted_base for platforms to set unencrypted
>>> memory base offset and platform calls swiotlb_update_mem_attributes()
>>> to remap swiotlb mem to unencrypted address space. memremap() can
>>> not be called in the early stage and so put remapping code into
>>> swiotlb_update_mem_attributes(). Store remap address and use it to copy
>>> data from/to swiotlb bounce buffer.
>>>
>>> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>
>> This patch results in the following stack trace during a bare-metal boot
>> on my EPYC system with SME active (e.g. mem_encrypt=on):
>>
>> [    0.123932] BUG: Bad page state in process swapper  pfn:108001
>> [    0.123942] page:(____ptrval____) refcount:0 mapcount:-128 
>> mapping:0000000000000000 index:0x0 pfn:0x108001
>> [    0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
>> [    0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 
>> 0000000000000000
>> [    0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f 
>> 0000000000000000
>> [    0.123955] page dumped because: nonzero mapcount
>> [    0.123957] Modules linked in:
>> [    0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted 
>> 5.16.0-rc3-sos-custom #2
>> [    0.123964] Hardware name: AMD Corporation
>> [    0.123967] Call Trace:
>> [    0.123971]  <TASK>
>> [    0.123975]  dump_stack_lvl+0x48/0x5e
>> [    0.123985]  bad_page.cold+0x65/0x96
>> [    0.123990]  __free_pages_ok+0x3a8/0x410
>> [    0.123996]  memblock_free_all+0x171/0x1dc
>> [    0.124005]  mem_init+0x1f/0x14b
>> [    0.124011]  start_kernel+0x3b5/0x6a1
>> [    0.124016]  secondary_startup_64_no_verify+0xb0/0xbb
>> [    0.124022]  </TASK>
>>
>> I see ~40 of these traces, each for different pfns.
>>
>> Thanks,
>> Tom
> 
> Hi Tom:
>        Thanks for your test. Could you help to test the following patch 
> and check whether it can fix the issue.

The patch is mangled. Is the only difference where set_memory_decrypted() 
is called?

Thanks,
Tom

> 
> 
> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
> index 569272871375..f6c3638255d5 100644
> --- a/include/linux/swiotlb.h
> +++ b/include/linux/swiotlb.h
> @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
>    * @end:       The end address of the swiotlb memory pool. Used to do a 
> quick
>    *             range check to see if the memory was in fact allocated by 
> this
>    *             API.
> + * @vaddr:     The vaddr of the swiotlb memory pool. The swiotlb memory pool
> + *             may be remapped in the memory encrypted case and store 
> virtual
> + *             address for bounce buffer operation.
>    * @nslabs:    The number of IO TLB blocks (in groups of 64) between 
> @start and
>    *             @end. For default swiotlb, this is command line 
> adjustable via
>    *             setup_io_tlb_npages.
> @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
>   struct io_tlb_mem {
>          phys_addr_t start;
>          phys_addr_t end;
> +       void *vaddr;
>          unsigned long nslabs;
>          unsigned long used;
>          unsigned int index;
> @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct device 
> *dev)
>   }
>   #endif /* CONFIG_DMA_RESTRICTED_POOL */
> 
> +extern phys_addr_t swiotlb_unencrypted_base;
> +
>   #endif /* __LINUX_SWIOTLB_H */
> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
> index 8e840fbbed7c..34e6ade4f73c 100644
> --- a/kernel/dma/swiotlb.c
> +++ b/kernel/dma/swiotlb.c
> @@ -50,6 +50,7 @@
>   #include <asm/io.h>
>   #include <asm/dma.h>
> 
> +#include <linux/io.h>
>   #include <linux/init.h>
>   #include <linux/memblock.h>
>   #include <linux/iommu-helper.h>
> @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force;
> 
>   struct io_tlb_mem io_tlb_default_mem;
> 
> +phys_addr_t swiotlb_unencrypted_base;
> +
>   /*
>    * Max segment that we can provide which (if pages are contingous) will
>    * not be bounced (unless SWIOTLB_FORCE is set).
> @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val)
>          return DIV_ROUND_UP(val, IO_TLB_SIZE);
>   }
> 
> +/*
> + * Remap swioltb memory in the unencrypted physical address space
> + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
> + * Isolation VMs).
> + */
> +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
> +{
> +       void *vaddr = NULL;
> +
> +       if (swiotlb_unencrypted_base) {
> +               phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
> +
> +               vaddr = memremap(paddr, bytes, MEMREMAP_WB);
> +               if (!vaddr)
> +                       pr_err("Failed to map the unencrypted memory %llx 
> size %lx.\n",
> +                              paddr, bytes);
> +       }
> +
> +       return vaddr;
> +}
> +
>   /*
>    * Early SWIOTLB allocation may be too early to allow an architecture to
>    * perform the desired operations.  This function allows the 
> architecture to
> @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void)
>          vaddr = phys_to_virt(mem->start);
>          bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
>          set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
> -       memset(vaddr, 0, bytes);
> +
> +       mem->vaddr = swiotlb_mem_remap(mem, bytes);
> +       if (!mem->vaddr)
> +               mem->vaddr = vaddr;
> +
> +       memset(mem->vaddr, 0, bytes);
>   }
> 
>   static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t 
> start,
> @@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem 
> *mem, phys_addr_t start,
>                  mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
>                  mem->slots[i].alloc_size = 0;
>          }
> +
> +       /*
> +        * If swiotlb_unencrypted_base is set, the bounce buffer memory will
> +        * be remapped and cleared in swiotlb_update_mem_attributes.
> +        */
> +       if (swiotlb_unencrypted_base)
> +               return;
> +
>          memset(vaddr, 0, bytes);
> +       mem->vaddr = vaddr;
> +       return;
>   }
> 
>   int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int 
> verbose)
> @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, 
> phys_addr_t tlb_addr, size_t size
>          phys_addr_t orig_addr = mem->slots[index].orig_addr;
>          size_t alloc_size = mem->slots[index].alloc_size;
>          unsigned long pfn = PFN_DOWN(orig_addr);
> -       unsigned char *vaddr = phys_to_virt(tlb_addr);
> +       unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
>          unsigned int tlb_offset, orig_addr_offset;
> 
>          if (orig_addr == INVALID_PHYS_ADDR)
> 
> 
> Thanks.
>
Tom Lendacky Dec. 3, 2021, 8:06 p.m. UTC | #4
On 12/3/21 1:11 PM, Tom Lendacky wrote:
> On 12/3/21 5:20 AM, Tianyu Lan wrote:
>> On 12/2/2021 10:42 PM, Tom Lendacky wrote:
>>> On 12/1/21 10:02 AM, Tianyu Lan wrote:
>>>> From: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>>>
>>>> In Isolation VM with AMD SEV, bounce buffer needs to be accessed via
>>>> extra address space which is above shared_gpa_boundary (E.G 39 bit
>>>> address line) reported by Hyper-V CPUID ISOLATION_CONFIG. The access
>>>> physical address will be original physical address + shared_gpa_boundary.
>>>> The shared_gpa_boundary in the AMD SEV SNP spec is called virtual top of
>>>> memory(vTOM). Memory addresses below vTOM are automatically treated as
>>>> private while memory above vTOM is treated as shared.
>>>>
>>>> Expose swiotlb_unencrypted_base for platforms to set unencrypted
>>>> memory base offset and platform calls swiotlb_update_mem_attributes()
>>>> to remap swiotlb mem to unencrypted address space. memremap() can
>>>> not be called in the early stage and so put remapping code into
>>>> swiotlb_update_mem_attributes(). Store remap address and use it to copy
>>>> data from/to swiotlb bounce buffer.
>>>>
>>>> Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
>>>
>>> This patch results in the following stack trace during a bare-metal boot
>>> on my EPYC system with SME active (e.g. mem_encrypt=on):
>>>
>>> [    0.123932] BUG: Bad page state in process swapper  pfn:108001
>>> [    0.123942] page:(____ptrval____) refcount:0 mapcount:-128 
>>> mapping:0000000000000000 index:0x0 pfn:0x108001
>>> [    0.123946] flags: 0x17ffffc0000000(node=0|zone=2|lastcpupid=0x1fffff)
>>> [    0.123952] raw: 0017ffffc0000000 ffff88904f2d5e80 ffff88904f2d5e80 
>>> 0000000000000000
>>> [    0.123954] raw: 0000000000000000 0000000000000000 00000000ffffff7f 
>>> 0000000000000000
>>> [    0.123955] page dumped because: nonzero mapcount
>>> [    0.123957] Modules linked in:
>>> [    0.123961] CPU: 0 PID: 0 Comm: swapper Not tainted 
>>> 5.16.0-rc3-sos-custom #2
>>> [    0.123964] Hardware name: AMD Corporation
>>> [    0.123967] Call Trace:
>>> [    0.123971]  <TASK>
>>> [    0.123975]  dump_stack_lvl+0x48/0x5e
>>> [    0.123985]  bad_page.cold+0x65/0x96
>>> [    0.123990]  __free_pages_ok+0x3a8/0x410
>>> [    0.123996]  memblock_free_all+0x171/0x1dc
>>> [    0.124005]  mem_init+0x1f/0x14b
>>> [    0.124011]  start_kernel+0x3b5/0x6a1
>>> [    0.124016]  secondary_startup_64_no_verify+0xb0/0xbb
>>> [    0.124022]  </TASK>
>>>
>>> I see ~40 of these traces, each for different pfns.
>>>
>>> Thanks,
>>> Tom
>>
>> Hi Tom:
>>        Thanks for your test. Could you help to test the following patch 
>> and check whether it can fix the issue.
> 
> The patch is mangled. Is the only difference where set_memory_decrypted() 
> is called?

I de-mangled the patch. No more stack traces with SME active.

Thanks,
Tom

> 
> Thanks,
> Tom
> 
>>
>>
>> diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
>> index 569272871375..f6c3638255d5 100644
>> --- a/include/linux/swiotlb.h
>> +++ b/include/linux/swiotlb.h
>> @@ -73,6 +73,9 @@ extern enum swiotlb_force swiotlb_force;
>>    * @end:       The end address of the swiotlb memory pool. Used to do 
>> a quick
>>    *             range check to see if the memory was in fact allocated 
>> by this
>>    *             API.
>> + * @vaddr:     The vaddr of the swiotlb memory pool. The swiotlb memory 
>> pool
>> + *             may be remapped in the memory encrypted case and store 
>> virtual
>> + *             address for bounce buffer operation.
>>    * @nslabs:    The number of IO TLB blocks (in groups of 64) between 
>> @start and
>>    *             @end. For default swiotlb, this is command line 
>> adjustable via
>>    *             setup_io_tlb_npages.
>> @@ -92,6 +95,7 @@ extern enum swiotlb_force swiotlb_force;
>>   struct io_tlb_mem {
>>          phys_addr_t start;
>>          phys_addr_t end;
>> +       void *vaddr;
>>          unsigned long nslabs;
>>          unsigned long used;
>>          unsigned int index;
>> @@ -186,4 +190,6 @@ static inline bool is_swiotlb_for_alloc(struct 
>> device *dev)
>>   }
>>   #endif /* CONFIG_DMA_RESTRICTED_POOL */
>>
>> +extern phys_addr_t swiotlb_unencrypted_base;
>> +
>>   #endif /* __LINUX_SWIOTLB_H */
>> diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
>> index 8e840fbbed7c..34e6ade4f73c 100644
>> --- a/kernel/dma/swiotlb.c
>> +++ b/kernel/dma/swiotlb.c
>> @@ -50,6 +50,7 @@
>>   #include <asm/io.h>
>>   #include <asm/dma.h>
>>
>> +#include <linux/io.h>
>>   #include <linux/init.h>
>>   #include <linux/memblock.h>
>>   #include <linux/iommu-helper.h>
>> @@ -72,6 +73,8 @@ enum swiotlb_force swiotlb_force;
>>
>>   struct io_tlb_mem io_tlb_default_mem;
>>
>> +phys_addr_t swiotlb_unencrypted_base;
>> +
>>   /*
>>    * Max segment that we can provide which (if pages are contingous) will
>>    * not be bounced (unless SWIOTLB_FORCE is set).
>> @@ -155,6 +158,27 @@ static inline unsigned long nr_slots(u64 val)
>>          return DIV_ROUND_UP(val, IO_TLB_SIZE);
>>   }
>>
>> +/*
>> + * Remap swioltb memory in the unencrypted physical address space
>> + * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
>> + * Isolation VMs).
>> + */
>> +void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
>> +{
>> +       void *vaddr = NULL;
>> +
>> +       if (swiotlb_unencrypted_base) {
>> +               phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
>> +
>> +               vaddr = memremap(paddr, bytes, MEMREMAP_WB);
>> +               if (!vaddr)
>> +                       pr_err("Failed to map the unencrypted memory 
>> %llx size %lx.\n",
>> +                              paddr, bytes);
>> +       }
>> +
>> +       return vaddr;
>> +}
>> +
>>   /*
>>    * Early SWIOTLB allocation may be too early to allow an architecture to
>>    * perform the desired operations.  This function allows the 
>> architecture to
>> @@ -172,7 +196,12 @@ void __init swiotlb_update_mem_attributes(void)
>>          vaddr = phys_to_virt(mem->start);
>>          bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
>>          set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
>> -       memset(vaddr, 0, bytes);
>> +
>> +       mem->vaddr = swiotlb_mem_remap(mem, bytes);
>> +       if (!mem->vaddr)
>> +               mem->vaddr = vaddr;
>> +
>> +       memset(mem->vaddr, 0, bytes);
>>   }
>>
>>   static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, 
>> phys_addr_t start,
>> @@ -196,7 +225,17 @@ static void swiotlb_init_io_tlb_mem(struct 
>> io_tlb_mem *mem, phys_addr_t start,
>>                  mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
>>                  mem->slots[i].alloc_size = 0;
>>          }
>> +
>> +       /*
>> +        * If swiotlb_unencrypted_base is set, the bounce buffer memory 
>> will
>> +        * be remapped and cleared in swiotlb_update_mem_attributes.
>> +        */
>> +       if (swiotlb_unencrypted_base)
>> +               return;
>> +
>>          memset(vaddr, 0, bytes);
>> +       mem->vaddr = vaddr;
>> +       return;
>>   }
>>
>>   int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int 
>> verbose)
>> @@ -371,7 +410,7 @@ static void swiotlb_bounce(struct device *dev, 
>> phys_addr_t tlb_addr, size_t size
>>          phys_addr_t orig_addr = mem->slots[index].orig_addr;
>>          size_t alloc_size = mem->slots[index].alloc_size;
>>          unsigned long pfn = PFN_DOWN(orig_addr);
>> -       unsigned char *vaddr = phys_to_virt(tlb_addr);
>> +       unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
>>          unsigned int tlb_offset, orig_addr_offset;
>>
>>          if (orig_addr == INVALID_PHYS_ADDR)
>>
>>
>> Thanks.
>>
Tianyu Lan Dec. 4, 2021, 7:21 a.m. UTC | #5
On 12/4/2021 4:06 AM, Tom Lendacky wrote:
>>> Hi Tom:
>>>        Thanks for your test. Could you help to test the following 
>>> patch and check whether it can fix the issue.
>>
>> The patch is mangled. Is the only difference where 
>> set_memory_decrypted() is called?
> 
> I de-mangled the patch. No more stack traces with SME active.
> 
> Thanks,
> Tom

Hi Tom:
	Thanks a lot for your rework and test. I will update in the next version.

Thanks.
diff mbox series

Patch

diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 569272871375..f6c3638255d5 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -73,6 +73,9 @@  extern enum swiotlb_force swiotlb_force;
  * @end:	The end address of the swiotlb memory pool. Used to do a quick
  *		range check to see if the memory was in fact allocated by this
  *		API.
+ * @vaddr:	The vaddr of the swiotlb memory pool. The swiotlb memory pool
+ *		may be remapped in the memory encrypted case and store virtual
+ *		address for bounce buffer operation.
  * @nslabs:	The number of IO TLB blocks (in groups of 64) between @start and
  *		@end. For default swiotlb, this is command line adjustable via
  *		setup_io_tlb_npages.
@@ -92,6 +95,7 @@  extern enum swiotlb_force swiotlb_force;
 struct io_tlb_mem {
 	phys_addr_t start;
 	phys_addr_t end;
+	void *vaddr;
 	unsigned long nslabs;
 	unsigned long used;
 	unsigned int index;
@@ -186,4 +190,6 @@  static inline bool is_swiotlb_for_alloc(struct device *dev)
 }
 #endif /* CONFIG_DMA_RESTRICTED_POOL */
 
+extern phys_addr_t swiotlb_unencrypted_base;
+
 #endif /* __LINUX_SWIOTLB_H */
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 8e840fbbed7c..adb9d06af5c8 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,6 +50,7 @@ 
 #include <asm/io.h>
 #include <asm/dma.h>
 
+#include <linux/io.h>
 #include <linux/init.h>
 #include <linux/memblock.h>
 #include <linux/iommu-helper.h>
@@ -72,6 +73,8 @@  enum swiotlb_force swiotlb_force;
 
 struct io_tlb_mem io_tlb_default_mem;
 
+phys_addr_t swiotlb_unencrypted_base;
+
 /*
  * Max segment that we can provide which (if pages are contingous) will
  * not be bounced (unless SWIOTLB_FORCE is set).
@@ -155,6 +158,27 @@  static inline unsigned long nr_slots(u64 val)
 	return DIV_ROUND_UP(val, IO_TLB_SIZE);
 }
 
+/*
+ * Remap swioltb memory in the unencrypted physical address space
+ * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
+ * Isolation VMs).
+ */
+void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
+{
+	void *vaddr = NULL;
+
+	if (swiotlb_unencrypted_base) {
+		phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
+
+		vaddr = memremap(paddr, bytes, MEMREMAP_WB);
+		if (!vaddr)
+			pr_err("Failed to map the unencrypted memory %llx size %lx.\n",
+			       paddr, bytes);
+	}
+
+	return vaddr;
+}
+
 /*
  * Early SWIOTLB allocation may be too early to allow an architecture to
  * perform the desired operations.  This function allows the architecture to
@@ -172,7 +196,12 @@  void __init swiotlb_update_mem_attributes(void)
 	vaddr = phys_to_virt(mem->start);
 	bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
 	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-	memset(vaddr, 0, bytes);
+
+	mem->vaddr = swiotlb_mem_remap(mem, bytes);
+	if (!mem->vaddr)
+		mem->vaddr = vaddr;
+
+	memset(mem->vaddr, 0, bytes);
 }
 
 static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
@@ -196,7 +225,18 @@  static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
 		mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
 		mem->slots[i].alloc_size = 0;
 	}
+
+	/*
+	 * If swiotlb_unencrypted_base is set, the bounce buffer memory will
+	 * be remapped and cleared in swiotlb_update_mem_attributes.
+	 */
+	if (swiotlb_unencrypted_base)
+		return;
+
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
+	mem->vaddr = vaddr;
+	return;
 }
 
 int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
@@ -318,7 +358,6 @@  swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!mem->slots)
 		return -ENOMEM;
 
-	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), nslabs, true);
 
 	swiotlb_print_info();
@@ -371,7 +410,7 @@  static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
 	phys_addr_t orig_addr = mem->slots[index].orig_addr;
 	size_t alloc_size = mem->slots[index].alloc_size;
 	unsigned long pfn = PFN_DOWN(orig_addr);
-	unsigned char *vaddr = phys_to_virt(tlb_addr);
+	unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
 	unsigned int tlb_offset, orig_addr_offset;
 
 	if (orig_addr == INVALID_PHYS_ADDR)
@@ -806,8 +845,6 @@  static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
 			return -ENOMEM;
 		}
 
-		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
-				     rmem->size >> PAGE_SHIFT);
 		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
 		mem->force_bounce = true;
 		mem->for_alloc = true;