diff mbox series

ALSA: memalloc: Workaround for Xen PV

Message ID 20230125153104.5527-1-tiwai@suse.de (mailing list archive)
State New, archived
Headers show
Series ALSA: memalloc: Workaround for Xen PV | expand

Commit Message

Takashi Iwai Jan. 25, 2023, 3:31 p.m. UTC
We change recently the memalloc helper to use
dma_alloc_noncontiguous() and the fallback to get_pages().  Although
lots of issues with IOMMU (or non-IOMMU) have been addressed, but
there seems still a regression on Xen PV.  Interestingly, the only
proper way to work is use dma_alloc_coherent().  The use of
dma_alloc_coherent() for SG buffer was dropped as it's problematic on
IOMMU systems.  OTOH, Xen PV has a different way, and it's fine to use
the dma_alloc_coherent().

This patch is a workaround for Xen PV.  It consists of the following
changes:
- For Xen PV, use only the fallback allocation without
  dma_alloc_noncontiguous()
- In the fallback allocation, use dma_alloc_coherent();
  the DMA address from dma_alloc_coherent() is returned in get_addr
  ops
- The DMA addresses are stored in an array; the first entry stores the
  number of allocated pages in lower bits, which are referred at
  releasing pages again

Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again")
Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU")
Link: https://lore.kernel.org/r/87tu256lqs.wl-tiwai@suse.de
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---

Marek, this is another respin of the fix.
Please check this one and report back.  Thanks!

 sound/core/memalloc.c | 87 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 69 insertions(+), 18 deletions(-)

Comments

Marek Marczykowski-Górecki Jan. 26, 2023, 12:53 p.m. UTC | #1
On Wed, Jan 25, 2023 at 04:31:04PM +0100, Takashi Iwai wrote:
> We change recently the memalloc helper to use
> dma_alloc_noncontiguous() and the fallback to get_pages().  Although
> lots of issues with IOMMU (or non-IOMMU) have been addressed, but
> there seems still a regression on Xen PV.  Interestingly, the only
> proper way to work is use dma_alloc_coherent().  The use of
> dma_alloc_coherent() for SG buffer was dropped as it's problematic on
> IOMMU systems.  OTOH, Xen PV has a different way, and it's fine to use
> the dma_alloc_coherent().
> 
> This patch is a workaround for Xen PV.  It consists of the following
> changes:
> - For Xen PV, use only the fallback allocation without
>   dma_alloc_noncontiguous()
> - In the fallback allocation, use dma_alloc_coherent();
>   the DMA address from dma_alloc_coherent() is returned in get_addr
>   ops
> - The DMA addresses are stored in an array; the first entry stores the
>   number of allocated pages in lower bits, which are referred at
>   releasing pages again
> 
> Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
> Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again")
> Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU")
> Link: https://lore.kernel.org/r/87tu256lqs.wl-tiwai@suse.de
> Signed-off-by: Takashi Iwai <tiwai@suse.de>

Uptime ~20h and it still works, so
Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>

> ---
> 
> Marek, this is another respin of the fix.
> Please check this one and report back.  Thanks!
> 
>  sound/core/memalloc.c | 87 ++++++++++++++++++++++++++++++++++---------
>  1 file changed, 69 insertions(+), 18 deletions(-)
> 
> diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
> index 81025f50a542..f901504b5afc 100644
> --- a/sound/core/memalloc.c
> +++ b/sound/core/memalloc.c
> @@ -541,16 +541,15 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
>  	struct sg_table *sgt;
>  	void *p;
>  
> +#ifdef CONFIG_SND_DMA_SGBUF
> +	if (cpu_feature_enabled(X86_FEATURE_XENPV))
> +		return snd_dma_sg_fallback_alloc(dmab, size);
> +#endif
>  	sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
>  				      DEFAULT_GFP, 0);
>  #ifdef CONFIG_SND_DMA_SGBUF
> -	if (!sgt && !get_dma_ops(dmab->dev.dev)) {
> -		if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
> -			dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
> -		else
> -			dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
> +	if (!sgt && !get_dma_ops(dmab->dev.dev))
>  		return snd_dma_sg_fallback_alloc(dmab, size);
> -	}
>  #endif
>  	if (!sgt)
>  		return NULL;
> @@ -717,19 +716,38 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
>  
>  /* Fallback SG-buffer allocations for x86 */
>  struct snd_dma_sg_fallback {
> +	bool use_dma_alloc_coherent;
>  	size_t count;
>  	struct page **pages;
> +	/* DMA address array; the first page contains #pages in ~PAGE_MASK */
> +	dma_addr_t *addrs;
>  };
>  
>  static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab,
>  				       struct snd_dma_sg_fallback *sgbuf)
>  {
> -	bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
> -	size_t i;
> -
> -	for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++)
> -		do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc);
> +	size_t i, size;
> +
> +	if (sgbuf->pages && sgbuf->addrs) {
> +		i = 0;
> +		while (i < sgbuf->count) {
> +			if (!sgbuf->pages[i] || !sgbuf->addrs[i])
> +				break;
> +			size = sgbuf->addrs[i] & ~PAGE_MASK;
> +			if (WARN_ON(!size))
> +				break;
> +			if (sgbuf->use_dma_alloc_coherent)
> +				dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT,
> +						  page_address(sgbuf->pages[i]),
> +						  sgbuf->addrs[i] & PAGE_MASK);
> +			else
> +				do_free_pages(page_address(sgbuf->pages[i]),
> +					      size << PAGE_SHIFT, false);
> +			i += size;
> +		}
> +	}
>  	kvfree(sgbuf->pages);
> +	kvfree(sgbuf->addrs);
>  	kfree(sgbuf);
>  }
>  
> @@ -738,24 +756,36 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
>  	struct snd_dma_sg_fallback *sgbuf;
>  	struct page **pagep, *curp;
>  	size_t chunk, npages;
> +	dma_addr_t *addrp;
>  	dma_addr_t addr;
>  	void *p;
> -	bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
> +
> +	/* correct the type */
> +	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG)
> +		dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
> +	else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
> +		dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
>  
>  	sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL);
>  	if (!sgbuf)
>  		return NULL;
> +	sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV);
>  	size = PAGE_ALIGN(size);
>  	sgbuf->count = size >> PAGE_SHIFT;
>  	sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL);
> -	if (!sgbuf->pages)
> +	sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL);
> +	if (!sgbuf->pages || !sgbuf->addrs)
>  		goto error;
>  
>  	pagep = sgbuf->pages;
> -	chunk = size;
> +	addrp = sgbuf->addrs;
> +	chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */
>  	while (size > 0) {
>  		chunk = min(size, chunk);
> -		p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc);
> +		if (sgbuf->use_dma_alloc_coherent)
> +			p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP);
> +		else
> +			p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false);
>  		if (!p) {
>  			if (chunk <= PAGE_SIZE)
>  				goto error;
> @@ -767,17 +797,25 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
>  		size -= chunk;
>  		/* fill pages */
>  		npages = chunk >> PAGE_SHIFT;
> +		*addrp = npages; /* store in lower bits */
>  		curp = virt_to_page(p);
> -		while (npages--)
> +		while (npages--) {
>  			*pagep++ = curp++;
> +			*addrp++ |= addr;
> +			addr += PAGE_SIZE;
> +		}
>  	}
>  
>  	p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL);
>  	if (!p)
>  		goto error;
> +
> +	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
> +		set_pages_array_wc(sgbuf->pages, sgbuf->count);
> +
>  	dmab->private_data = sgbuf;
>  	/* store the first page address for convenience */
> -	dmab->addr = snd_sgbuf_get_addr(dmab, 0);
> +	dmab->addr = sgbuf->addrs[0] & PAGE_MASK;
>  	return p;
>  
>   error:
> @@ -787,10 +825,23 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
>  
>  static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab)
>  {
> +	struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
> +
> +	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
> +		set_pages_array_wb(sgbuf->pages, sgbuf->count);
>  	vunmap(dmab->area);
>  	__snd_dma_sg_fallback_free(dmab, dmab->private_data);
>  }
>  
> +static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab,
> +					       size_t offset)
> +{
> +	struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
> +	size_t index = offset >> PAGE_SHIFT;
> +
> +	return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK);
> +}
> +
>  static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab,
>  				    struct vm_area_struct *area)
>  {
> @@ -805,8 +856,8 @@ static const struct snd_malloc_ops snd_dma_sg_fallback_ops = {
>  	.alloc = snd_dma_sg_fallback_alloc,
>  	.free = snd_dma_sg_fallback_free,
>  	.mmap = snd_dma_sg_fallback_mmap,
> +	.get_addr = snd_dma_sg_fallback_get_addr,
>  	/* reuse vmalloc helpers */
> -	.get_addr = snd_dma_vmalloc_get_addr,
>  	.get_page = snd_dma_vmalloc_get_page,
>  	.get_chunk_size = snd_dma_vmalloc_get_chunk_size,
>  };
> -- 
> 2.35.3
>
diff mbox series

Patch

diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c
index 81025f50a542..f901504b5afc 100644
--- a/sound/core/memalloc.c
+++ b/sound/core/memalloc.c
@@ -541,16 +541,15 @@  static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size)
 	struct sg_table *sgt;
 	void *p;
 
+#ifdef CONFIG_SND_DMA_SGBUF
+	if (cpu_feature_enabled(X86_FEATURE_XENPV))
+		return snd_dma_sg_fallback_alloc(dmab, size);
+#endif
 	sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir,
 				      DEFAULT_GFP, 0);
 #ifdef CONFIG_SND_DMA_SGBUF
-	if (!sgt && !get_dma_ops(dmab->dev.dev)) {
-		if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
-			dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
-		else
-			dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
+	if (!sgt && !get_dma_ops(dmab->dev.dev))
 		return snd_dma_sg_fallback_alloc(dmab, size);
-	}
 #endif
 	if (!sgt)
 		return NULL;
@@ -717,19 +716,38 @@  static const struct snd_malloc_ops snd_dma_sg_wc_ops = {
 
 /* Fallback SG-buffer allocations for x86 */
 struct snd_dma_sg_fallback {
+	bool use_dma_alloc_coherent;
 	size_t count;
 	struct page **pages;
+	/* DMA address array; the first page contains #pages in ~PAGE_MASK */
+	dma_addr_t *addrs;
 };
 
 static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab,
 				       struct snd_dma_sg_fallback *sgbuf)
 {
-	bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
-	size_t i;
-
-	for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++)
-		do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc);
+	size_t i, size;
+
+	if (sgbuf->pages && sgbuf->addrs) {
+		i = 0;
+		while (i < sgbuf->count) {
+			if (!sgbuf->pages[i] || !sgbuf->addrs[i])
+				break;
+			size = sgbuf->addrs[i] & ~PAGE_MASK;
+			if (WARN_ON(!size))
+				break;
+			if (sgbuf->use_dma_alloc_coherent)
+				dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT,
+						  page_address(sgbuf->pages[i]),
+						  sgbuf->addrs[i] & PAGE_MASK);
+			else
+				do_free_pages(page_address(sgbuf->pages[i]),
+					      size << PAGE_SHIFT, false);
+			i += size;
+		}
+	}
 	kvfree(sgbuf->pages);
+	kvfree(sgbuf->addrs);
 	kfree(sgbuf);
 }
 
@@ -738,24 +756,36 @@  static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
 	struct snd_dma_sg_fallback *sgbuf;
 	struct page **pagep, *curp;
 	size_t chunk, npages;
+	dma_addr_t *addrp;
 	dma_addr_t addr;
 	void *p;
-	bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
+
+	/* correct the type */
+	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG)
+		dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK;
+	else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG)
+		dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK;
 
 	sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL);
 	if (!sgbuf)
 		return NULL;
+	sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV);
 	size = PAGE_ALIGN(size);
 	sgbuf->count = size >> PAGE_SHIFT;
 	sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL);
-	if (!sgbuf->pages)
+	sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL);
+	if (!sgbuf->pages || !sgbuf->addrs)
 		goto error;
 
 	pagep = sgbuf->pages;
-	chunk = size;
+	addrp = sgbuf->addrs;
+	chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */
 	while (size > 0) {
 		chunk = min(size, chunk);
-		p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc);
+		if (sgbuf->use_dma_alloc_coherent)
+			p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP);
+		else
+			p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false);
 		if (!p) {
 			if (chunk <= PAGE_SIZE)
 				goto error;
@@ -767,17 +797,25 @@  static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
 		size -= chunk;
 		/* fill pages */
 		npages = chunk >> PAGE_SHIFT;
+		*addrp = npages; /* store in lower bits */
 		curp = virt_to_page(p);
-		while (npages--)
+		while (npages--) {
 			*pagep++ = curp++;
+			*addrp++ |= addr;
+			addr += PAGE_SIZE;
+		}
 	}
 
 	p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL);
 	if (!p)
 		goto error;
+
+	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+		set_pages_array_wc(sgbuf->pages, sgbuf->count);
+
 	dmab->private_data = sgbuf;
 	/* store the first page address for convenience */
-	dmab->addr = snd_sgbuf_get_addr(dmab, 0);
+	dmab->addr = sgbuf->addrs[0] & PAGE_MASK;
 	return p;
 
  error:
@@ -787,10 +825,23 @@  static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size)
 
 static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab)
 {
+	struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
+
+	if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK)
+		set_pages_array_wb(sgbuf->pages, sgbuf->count);
 	vunmap(dmab->area);
 	__snd_dma_sg_fallback_free(dmab, dmab->private_data);
 }
 
+static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab,
+					       size_t offset)
+{
+	struct snd_dma_sg_fallback *sgbuf = dmab->private_data;
+	size_t index = offset >> PAGE_SHIFT;
+
+	return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK);
+}
+
 static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab,
 				    struct vm_area_struct *area)
 {
@@ -805,8 +856,8 @@  static const struct snd_malloc_ops snd_dma_sg_fallback_ops = {
 	.alloc = snd_dma_sg_fallback_alloc,
 	.free = snd_dma_sg_fallback_free,
 	.mmap = snd_dma_sg_fallback_mmap,
+	.get_addr = snd_dma_sg_fallback_get_addr,
 	/* reuse vmalloc helpers */
-	.get_addr = snd_dma_vmalloc_get_addr,
 	.get_page = snd_dma_vmalloc_get_page,
 	.get_chunk_size = snd_dma_vmalloc_get_chunk_size,
 };