diff mbox series

[1/2] mm/dmapool: replace linked list with xarray

Message ID 20220428202714.17630-2-kbusch@kernel.org (mailing list archive)
State New
Headers show
Series dmapool performance enhancements | expand

Commit Message

Keith Busch April 28, 2022, 8:27 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

Store the cached dma pool pages in an xarray instead of a linked list.
This allows constant lookup time to free the page, lockless iteration
while displaying the attributes, and frees up space in 'struct dma_page'.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 mm/dmapool.c | 39 ++++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 23 deletions(-)

Comments

Matthew Wilcox April 28, 2022, 9:59 p.m. UTC | #1
On Thu, Apr 28, 2022 at 02:27:13PM -0600, kbusch@kernel.org wrote:
> Store the cached dma pool pages in an xarray instead of a linked list.
> This allows constant lookup time to free the page, lockless iteration
> while displaying the attributes, and frees up space in 'struct dma_page'.

Hey Keith, this looks great, especially since there's more performance
you can squeeze out of it.

>  struct dma_pool {		/* the pool */
> -	struct list_head page_list;
> +	struct xarray pages;
>  	spinlock_t lock;

A further optimisation you could make is to use the xa_lock to protect
the rest of the data structure.  But that would be a subsequent patch.

> @@ -282,7 +281,8 @@ void dma_pool_destroy(struct dma_pool *pool)
>  		device_remove_file(pool->dev, &dev_attr_pools);
>  	mutex_unlock(&pools_reg_lock);
>  
> -	list_for_each_entry_safe(page, tmp, &pool->page_list, page_list) {
> +	xa_for_each(&pool->pages, i, page) {
> +		xa_erase(&pool->pages, i);
>  		if (is_page_busy(page)) {
>  			if (pool->dev)
>  				dev_err(pool->dev, "%s %s, %p busy\n", __func__,
> @@ -291,12 +291,12 @@ void dma_pool_destroy(struct dma_pool *pool)
>  				pr_err("%s %s, %p busy\n", __func__,
>  				       pool->name, page->vaddr);
>  			/* leak the still-in-use consistent memory */
> -			list_del(&page->page_list);
>  			kfree(page);
>  		} else
>  			pool_free_page(pool, page);
>  	}
>  
> +	xa_destroy(&pool->pages);

If you're erasing the entries as you go, you don't need to call
xa_destroy().  Contrariwise, if you call xa_destroy(), you don't need to
call xa_erase().  I'd probably just call xa_destroy() at the end as it's
less work.

> @@ -316,13 +316,14 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
>  {
>  	unsigned long flags;
>  	struct dma_page *page;
> +	unsigned long i;
>  	size_t offset;
>  	void *retval;
>  
>  	might_alloc(mem_flags);
>  
>  	spin_lock_irqsave(&pool->lock, flags);
> -	list_for_each_entry(page, &pool->page_list, page_list) {
> +	xa_for_each(&pool->pages, i, page) {
>  		if (page->offset < pool->allocation)
>  			goto ready;
>  	}

A further optimisation you could do is use xarray search marks to
search for only pages which have free entries.

> +	xa_store(&pool->pages, page->vaddr, page, mem_flags);

Oof.  The XArray isn't good at such sparse allocations.  You can improve
it (by a significant amount) by shifting the vaddr by PAGE_SHIFT bits.
Should save you two nodes of tree height and thus two cache lines per
lookup.
Keith Busch April 29, 2022, 1:41 a.m. UTC | #2
On Thu, Apr 28, 2022 at 10:59:49PM +0100, Matthew Wilcox wrote:
> On Thu, Apr 28, 2022 at 02:27:13PM -0600, kbusch@kernel.org wrote:
> > @@ -316,13 +316,14 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
> >  {
> >  	unsigned long flags;
> >  	struct dma_page *page;
> > +	unsigned long i;
> >  	size_t offset;
> >  	void *retval;
> >  
> >  	might_alloc(mem_flags);
> >  
> >  	spin_lock_irqsave(&pool->lock, flags);
> > -	list_for_each_entry(page, &pool->page_list, page_list) {
> > +	xa_for_each(&pool->pages, i, page) {
> >  		if (page->offset < pool->allocation)
> >  			goto ready;
> >  	}
> 
> A further optimisation you could do is use xarray search marks to
> search for only pages which have free entries.

That's an interesting idea. I didn't consider setting marks since patch 2
replaces this search with essentially a stack pop. If a marked entry can be
returned in a similar time, though, I could drop patch 2. I can't tell from the
xarray code if that operation time is in the same ballpark, though, so I'll
just rerun the the benchmark. :)
diff mbox series

Patch

diff --git a/mm/dmapool.c b/mm/dmapool.c
index a7eb5d0eb2da..ac93f58d4654 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -35,13 +35,14 @@ 
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/wait.h>
+#include <linux/xarray.h>
 
 #if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
 #define DMAPOOL_DEBUG 1
 #endif
 
 struct dma_pool {		/* the pool */
-	struct list_head page_list;
+	struct xarray pages;
 	spinlock_t lock;
 	size_t size;
 	struct device *dev;
@@ -52,7 +53,6 @@  struct dma_pool {		/* the pool */
 };
 
 struct dma_page {		/* cacheable header for 'allocation' bytes */
-	struct list_head page_list;
 	void *vaddr;
 	dma_addr_t dma;
 	unsigned int in_use;
@@ -81,13 +81,12 @@  static ssize_t pools_show(struct device *dev, struct device_attribute *attr, cha
 	list_for_each_entry(pool, &dev->dma_pools, pools) {
 		unsigned pages = 0;
 		unsigned blocks = 0;
+		unsigned long i;
 
-		spin_lock_irq(&pool->lock);
-		list_for_each_entry(page, &pool->page_list, page_list) {
+		xa_for_each(&pool->pages, i, page) {
 			pages++;
 			blocks += page->in_use;
 		}
-		spin_unlock_irq(&pool->lock);
 
 		/* per-pool info, no real statistics yet */
 		temp = scnprintf(next, size, "%-16s %4u %4zu %4zu %2u\n",
@@ -160,7 +159,7 @@  struct dma_pool *dma_pool_create(const char *name, struct device *dev,
 
 	retval->dev = dev;
 
-	INIT_LIST_HEAD(&retval->page_list);
+	xa_init(&retval->pages);
 	spin_lock_init(&retval->lock);
 	retval->size = size;
 	retval->boundary = boundary;
@@ -252,7 +251,6 @@  static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
 	memset(page->vaddr, POOL_POISON_FREED, pool->allocation);
 #endif
 	dma_free_coherent(pool->dev, pool->allocation, page->vaddr, dma);
-	list_del(&page->page_list);
 	kfree(page);
 }
 
@@ -266,8 +264,9 @@  static void pool_free_page(struct dma_pool *pool, struct dma_page *page)
  */
 void dma_pool_destroy(struct dma_pool *pool)
 {
-	struct dma_page *page, *tmp;
+	struct dma_page *page;
 	bool empty = false;
+	unsigned long i;
 
 	if (unlikely(!pool))
 		return;
@@ -282,7 +281,8 @@  void dma_pool_destroy(struct dma_pool *pool)
 		device_remove_file(pool->dev, &dev_attr_pools);
 	mutex_unlock(&pools_reg_lock);
 
-	list_for_each_entry_safe(page, tmp, &pool->page_list, page_list) {
+	xa_for_each(&pool->pages, i, page) {
+		xa_erase(&pool->pages, i);
 		if (is_page_busy(page)) {
 			if (pool->dev)
 				dev_err(pool->dev, "%s %s, %p busy\n", __func__,
@@ -291,12 +291,12 @@  void dma_pool_destroy(struct dma_pool *pool)
 				pr_err("%s %s, %p busy\n", __func__,
 				       pool->name, page->vaddr);
 			/* leak the still-in-use consistent memory */
-			list_del(&page->page_list);
 			kfree(page);
 		} else
 			pool_free_page(pool, page);
 	}
 
+	xa_destroy(&pool->pages);
 	kfree(pool);
 }
 EXPORT_SYMBOL(dma_pool_destroy);
@@ -316,13 +316,14 @@  void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 {
 	unsigned long flags;
 	struct dma_page *page;
+	unsigned long i;
 	size_t offset;
 	void *retval;
 
 	might_alloc(mem_flags);
 
 	spin_lock_irqsave(&pool->lock, flags);
-	list_for_each_entry(page, &pool->page_list, page_list) {
+	xa_for_each(&pool->pages, i, page) {
 		if (page->offset < pool->allocation)
 			goto ready;
 	}
@@ -334,9 +335,9 @@  void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 	if (!page)
 		return NULL;
 
+	xa_store(&pool->pages, page->vaddr, page, mem_flags);
 	spin_lock_irqsave(&pool->lock, flags);
 
-	list_add(&page->page_list, &pool->page_list);
  ready:
 	page->in_use++;
 	offset = page->offset;
@@ -379,17 +380,9 @@  void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
 }
 EXPORT_SYMBOL(dma_pool_alloc);
 
-static struct dma_page *pool_find_page(struct dma_pool *pool, dma_addr_t dma)
+static struct dma_page *pool_find_page(struct dma_pool *pool, unsigned long vaddr)
 {
-	struct dma_page *page;
-
-	list_for_each_entry(page, &pool->page_list, page_list) {
-		if (dma < page->dma)
-			continue;
-		if ((dma - page->dma) < pool->allocation)
-			return page;
-	}
-	return NULL;
+	return xa_load(pool->pages, vaddr & ~(pool->allocation - 1));
 }
 
 /**
@@ -408,7 +401,7 @@  void dma_pool_free(struct dma_pool *pool, void *vaddr, dma_addr_t dma)
 	unsigned int offset;
 
 	spin_lock_irqsave(&pool->lock, flags);
-	page = pool_find_page(pool, dma);
+	page = pool_find_page(pool, vaddr);
 	if (!page) {
 		spin_unlock_irqrestore(&pool->lock, flags);
 		if (pool->dev)