Message ID | 20230125153104.5527-1-tiwai@suse.de (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | ALSA: memalloc: Workaround for Xen PV | expand |
On Wed, Jan 25, 2023 at 04:31:04PM +0100, Takashi Iwai wrote: > We change recently the memalloc helper to use > dma_alloc_noncontiguous() and the fallback to get_pages(). Although > lots of issues with IOMMU (or non-IOMMU) have been addressed, but > there seems still a regression on Xen PV. Interestingly, the only > proper way to work is use dma_alloc_coherent(). The use of > dma_alloc_coherent() for SG buffer was dropped as it's problematic on > IOMMU systems. OTOH, Xen PV has a different way, and it's fine to use > the dma_alloc_coherent(). > > This patch is a workaround for Xen PV. It consists of the following > changes: > - For Xen PV, use only the fallback allocation without > dma_alloc_noncontiguous() > - In the fallback allocation, use dma_alloc_coherent(); > the DMA address from dma_alloc_coherent() is returned in get_addr > ops > - The DMA addresses are stored in an array; the first entry stores the > number of allocated pages in lower bits, which are referred at > releasing pages again > > Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> > Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again") > Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU") > Link: https://lore.kernel.org/r/87tu256lqs.wl-tiwai@suse.de > Signed-off-by: Takashi Iwai <tiwai@suse.de> Uptime ~20h and it still works, so Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> > --- > > Marek, this is another respin of the fix. > Please check this one and report back. Thanks! > > sound/core/memalloc.c | 87 ++++++++++++++++++++++++++++++++++--------- > 1 file changed, 69 insertions(+), 18 deletions(-) > > diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c > index 81025f50a542..f901504b5afc 100644 > --- a/sound/core/memalloc.c > +++ b/sound/core/memalloc.c > @@ -541,16 +541,15 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) > struct sg_table *sgt; > void *p; > > +#ifdef CONFIG_SND_DMA_SGBUF > + if (cpu_feature_enabled(X86_FEATURE_XENPV)) > + return snd_dma_sg_fallback_alloc(dmab, size); > +#endif > sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, > DEFAULT_GFP, 0); > #ifdef CONFIG_SND_DMA_SGBUF > - if (!sgt && !get_dma_ops(dmab->dev.dev)) { > - if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) > - dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; > - else > - dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; > + if (!sgt && !get_dma_ops(dmab->dev.dev)) > return snd_dma_sg_fallback_alloc(dmab, size); > - } > #endif > if (!sgt) > return NULL; > @@ -717,19 +716,38 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { > > /* Fallback SG-buffer allocations for x86 */ > struct snd_dma_sg_fallback { > + bool use_dma_alloc_coherent; > size_t count; > struct page **pages; > + /* DMA address array; the first page contains #pages in ~PAGE_MASK */ > + dma_addr_t *addrs; > }; > > static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, > struct snd_dma_sg_fallback *sgbuf) > { > - bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; > - size_t i; > - > - for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) > - do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc); > + size_t i, size; > + > + if (sgbuf->pages && sgbuf->addrs) { > + i = 0; > + while (i < sgbuf->count) { > + if (!sgbuf->pages[i] || !sgbuf->addrs[i]) > + break; > + size = sgbuf->addrs[i] & ~PAGE_MASK; > + if (WARN_ON(!size)) > + break; > + if (sgbuf->use_dma_alloc_coherent) > + dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT, > + page_address(sgbuf->pages[i]), > + sgbuf->addrs[i] & PAGE_MASK); > + else > + do_free_pages(page_address(sgbuf->pages[i]), > + size << PAGE_SHIFT, false); > + i += size; > + } > + } > kvfree(sgbuf->pages); > + kvfree(sgbuf->addrs); > kfree(sgbuf); > } > > @@ -738,24 +756,36 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) > struct snd_dma_sg_fallback *sgbuf; > struct page **pagep, *curp; > size_t chunk, npages; > + dma_addr_t *addrp; > dma_addr_t addr; > void *p; > - bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; > + > + /* correct the type */ > + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG) > + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; > + else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) > + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; > > sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); > if (!sgbuf) > return NULL; > + sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV); > size = PAGE_ALIGN(size); > sgbuf->count = size >> PAGE_SHIFT; > sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL); > - if (!sgbuf->pages) > + sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL); > + if (!sgbuf->pages || !sgbuf->addrs) > goto error; > > pagep = sgbuf->pages; > - chunk = size; > + addrp = sgbuf->addrs; > + chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */ > while (size > 0) { > chunk = min(size, chunk); > - p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc); > + if (sgbuf->use_dma_alloc_coherent) > + p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP); > + else > + p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false); > if (!p) { > if (chunk <= PAGE_SIZE) > goto error; > @@ -767,17 +797,25 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) > size -= chunk; > /* fill pages */ > npages = chunk >> PAGE_SHIFT; > + *addrp = npages; /* store in lower bits */ > curp = virt_to_page(p); > - while (npages--) > + while (npages--) { > *pagep++ = curp++; > + *addrp++ |= addr; > + addr += PAGE_SIZE; > + } > } > > p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL); > if (!p) > goto error; > + > + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) > + set_pages_array_wc(sgbuf->pages, sgbuf->count); > + > dmab->private_data = sgbuf; > /* store the first page address for convenience */ > - dmab->addr = snd_sgbuf_get_addr(dmab, 0); > + dmab->addr = sgbuf->addrs[0] & PAGE_MASK; > return p; > > error: > @@ -787,10 +825,23 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) > > static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) > { > + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; > + > + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) > + set_pages_array_wb(sgbuf->pages, sgbuf->count); > vunmap(dmab->area); > __snd_dma_sg_fallback_free(dmab, dmab->private_data); > } > > +static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab, > + size_t offset) > +{ > + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; > + size_t index = offset >> PAGE_SHIFT; > + > + return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK); > +} > + > static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, > struct vm_area_struct *area) > { > @@ -805,8 +856,8 @@ static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { > .alloc = snd_dma_sg_fallback_alloc, > .free = snd_dma_sg_fallback_free, > .mmap = snd_dma_sg_fallback_mmap, > + .get_addr = snd_dma_sg_fallback_get_addr, > /* reuse vmalloc helpers */ > - .get_addr = snd_dma_vmalloc_get_addr, > .get_page = snd_dma_vmalloc_get_page, > .get_chunk_size = snd_dma_vmalloc_get_chunk_size, > }; > -- > 2.35.3 >
diff --git a/sound/core/memalloc.c b/sound/core/memalloc.c index 81025f50a542..f901504b5afc 100644 --- a/sound/core/memalloc.c +++ b/sound/core/memalloc.c @@ -541,16 +541,15 @@ static void *snd_dma_noncontig_alloc(struct snd_dma_buffer *dmab, size_t size) struct sg_table *sgt; void *p; +#ifdef CONFIG_SND_DMA_SGBUF + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return snd_dma_sg_fallback_alloc(dmab, size); +#endif sgt = dma_alloc_noncontiguous(dmab->dev.dev, size, dmab->dev.dir, DEFAULT_GFP, 0); #ifdef CONFIG_SND_DMA_SGBUF - if (!sgt && !get_dma_ops(dmab->dev.dev)) { - if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) - dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; - else - dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + if (!sgt && !get_dma_ops(dmab->dev.dev)) return snd_dma_sg_fallback_alloc(dmab, size); - } #endif if (!sgt) return NULL; @@ -717,19 +716,38 @@ static const struct snd_malloc_ops snd_dma_sg_wc_ops = { /* Fallback SG-buffer allocations for x86 */ struct snd_dma_sg_fallback { + bool use_dma_alloc_coherent; size_t count; struct page **pages; + /* DMA address array; the first page contains #pages in ~PAGE_MASK */ + dma_addr_t *addrs; }; static void __snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab, struct snd_dma_sg_fallback *sgbuf) { - bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; - size_t i; - - for (i = 0; i < sgbuf->count && sgbuf->pages[i]; i++) - do_free_pages(page_address(sgbuf->pages[i]), PAGE_SIZE, wc); + size_t i, size; + + if (sgbuf->pages && sgbuf->addrs) { + i = 0; + while (i < sgbuf->count) { + if (!sgbuf->pages[i] || !sgbuf->addrs[i]) + break; + size = sgbuf->addrs[i] & ~PAGE_MASK; + if (WARN_ON(!size)) + break; + if (sgbuf->use_dma_alloc_coherent) + dma_free_coherent(dmab->dev.dev, size << PAGE_SHIFT, + page_address(sgbuf->pages[i]), + sgbuf->addrs[i] & PAGE_MASK); + else + do_free_pages(page_address(sgbuf->pages[i]), + size << PAGE_SHIFT, false); + i += size; + } + } kvfree(sgbuf->pages); + kvfree(sgbuf->addrs); kfree(sgbuf); } @@ -738,24 +756,36 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) struct snd_dma_sg_fallback *sgbuf; struct page **pagep, *curp; size_t chunk, npages; + dma_addr_t *addrp; dma_addr_t addr; void *p; - bool wc = dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; + + /* correct the type */ + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_SG_FALLBACK; + else if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG) + dmab->dev.type = SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK; sgbuf = kzalloc(sizeof(*sgbuf), GFP_KERNEL); if (!sgbuf) return NULL; + sgbuf->use_dma_alloc_coherent = cpu_feature_enabled(X86_FEATURE_XENPV); size = PAGE_ALIGN(size); sgbuf->count = size >> PAGE_SHIFT; sgbuf->pages = kvcalloc(sgbuf->count, sizeof(*sgbuf->pages), GFP_KERNEL); - if (!sgbuf->pages) + sgbuf->addrs = kvcalloc(sgbuf->count, sizeof(*sgbuf->addrs), GFP_KERNEL); + if (!sgbuf->pages || !sgbuf->addrs) goto error; pagep = sgbuf->pages; - chunk = size; + addrp = sgbuf->addrs; + chunk = (PAGE_SIZE - 1) << PAGE_SHIFT; /* to fit in low bits in addrs */ while (size > 0) { chunk = min(size, chunk); - p = do_alloc_pages(dmab->dev.dev, chunk, &addr, wc); + if (sgbuf->use_dma_alloc_coherent) + p = dma_alloc_coherent(dmab->dev.dev, chunk, &addr, DEFAULT_GFP); + else + p = do_alloc_pages(dmab->dev.dev, chunk, &addr, false); if (!p) { if (chunk <= PAGE_SIZE) goto error; @@ -767,17 +797,25 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) size -= chunk; /* fill pages */ npages = chunk >> PAGE_SHIFT; + *addrp = npages; /* store in lower bits */ curp = virt_to_page(p); - while (npages--) + while (npages--) { *pagep++ = curp++; + *addrp++ |= addr; + addr += PAGE_SIZE; + } } p = vmap(sgbuf->pages, sgbuf->count, VM_MAP, PAGE_KERNEL); if (!p) goto error; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wc(sgbuf->pages, sgbuf->count); + dmab->private_data = sgbuf; /* store the first page address for convenience */ - dmab->addr = snd_sgbuf_get_addr(dmab, 0); + dmab->addr = sgbuf->addrs[0] & PAGE_MASK; return p; error: @@ -787,10 +825,23 @@ static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size) static void snd_dma_sg_fallback_free(struct snd_dma_buffer *dmab) { + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + + if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC_SG_FALLBACK) + set_pages_array_wb(sgbuf->pages, sgbuf->count); vunmap(dmab->area); __snd_dma_sg_fallback_free(dmab, dmab->private_data); } +static dma_addr_t snd_dma_sg_fallback_get_addr(struct snd_dma_buffer *dmab, + size_t offset) +{ + struct snd_dma_sg_fallback *sgbuf = dmab->private_data; + size_t index = offset >> PAGE_SHIFT; + + return (sgbuf->addrs[index] & PAGE_MASK) | (offset & ~PAGE_MASK); +} + static int snd_dma_sg_fallback_mmap(struct snd_dma_buffer *dmab, struct vm_area_struct *area) { @@ -805,8 +856,8 @@ static const struct snd_malloc_ops snd_dma_sg_fallback_ops = { .alloc = snd_dma_sg_fallback_alloc, .free = snd_dma_sg_fallback_free, .mmap = snd_dma_sg_fallback_mmap, + .get_addr = snd_dma_sg_fallback_get_addr, /* reuse vmalloc helpers */ - .get_addr = snd_dma_vmalloc_get_addr, .get_page = snd_dma_vmalloc_get_page, .get_chunk_size = snd_dma_vmalloc_get_chunk_size, };
We change recently the memalloc helper to use dma_alloc_noncontiguous() and the fallback to get_pages(). Although lots of issues with IOMMU (or non-IOMMU) have been addressed, but there seems still a regression on Xen PV. Interestingly, the only proper way to work is use dma_alloc_coherent(). The use of dma_alloc_coherent() for SG buffer was dropped as it's problematic on IOMMU systems. OTOH, Xen PV has a different way, and it's fine to use the dma_alloc_coherent(). This patch is a workaround for Xen PV. It consists of the following changes: - For Xen PV, use only the fallback allocation without dma_alloc_noncontiguous() - In the fallback allocation, use dma_alloc_coherent(); the DMA address from dma_alloc_coherent() is returned in get_addr ops - The DMA addresses are stored in an array; the first entry stores the number of allocated pages in lower bits, which are referred at releasing pages again Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> Fixes: a8d302a0b770 ("ALSA: memalloc: Revive x86-specific WC page allocations again") Fixes: 9736a325137b ("ALSA: memalloc: Don't fall back for SG-buffer with IOMMU") Link: https://lore.kernel.org/r/87tu256lqs.wl-tiwai@suse.de Signed-off-by: Takashi Iwai <tiwai@suse.de> --- Marek, this is another respin of the fix. Please check this one and report back. Thanks! sound/core/memalloc.c | 87 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 69 insertions(+), 18 deletions(-)