diff mbox series

[09/11] io_uring: delay ZC pool destruction

Message ID 20230826011954.1801099-10-dw@davidwei.uk (mailing list archive)
State New
Headers show
Series Zero copy network RX using io_uring | expand

Commit Message

David Wei Aug. 26, 2023, 1:19 a.m. UTC
From: David Wei <davidhwei@meta.com>

At a point in time, a ZC buf may be in:

* RX queue
* Socket
* One of the ifq ringbufs
* Userspace

The ZC pool region and the pool itself cannot be destroyed until all
bufs have been returned.

This patch changes the ZC pool destruction to be delayed work, waiting
for up to 10 seconds for bufs to be returned before unconditionally
destroying the pool.

Signed-off-by: David Wei <davidhwei@meta.com>
Co-developed-by: Jonathan Lemon <jonathan.lemon@gmail.com>
---
 io_uring/zc_rx.c | 50 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 44 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/io_uring/zc_rx.c b/io_uring/zc_rx.c
index b8dd699e2777..70e39f851e47 100644
--- a/io_uring/zc_rx.c
+++ b/io_uring/zc_rx.c
@@ -28,6 +28,10 @@  struct io_zc_rx_pool {
 	u32			cache_count;
 	u32			cache[POOL_CACHE_SIZE];
 
+	/* delayed destruction */
+	unsigned long		delay_end;
+	struct delayed_work	destroy_work;
+
 	/* freelist */
 	spinlock_t		freelist_lock;
 	u32			free_count;
@@ -222,20 +226,56 @@  int io_zc_rx_create_pool(struct io_ring_ctx *ctx,
 	return ret;
 }
 
-static void io_zc_rx_destroy_pool(struct io_zc_rx_pool *pool)
+static void io_zc_rx_destroy_ifq(struct io_zc_rx_ifq *ifq)
+{
+	if (ifq->dev)
+		dev_put(ifq->dev);
+	io_free_rbuf_ring(ifq);
+	kfree(ifq);
+}
+
+static void io_zc_rx_destroy_pool_work(struct work_struct *work)
 {
+	struct io_zc_rx_pool *pool = container_of(
+			to_delayed_work(work), struct io_zc_rx_pool, destroy_work);
 	struct device *dev = netdev2dev(pool->ifq->dev);
 	struct io_zc_rx_buf *buf;
+	int i, refc, count;
 
-	for (int i = 0; i < pool->nr_pages; i++) {
+	for (i = 0; i < pool->nr_pages; i++) {
 		buf = &pool->bufs[i];
+		refc = atomic_read(&buf->refcount) & IO_ZC_RX_KREF_MASK;
+		if (refc) {
+			if (time_before(jiffies, pool->delay_end)) {
+				schedule_delayed_work(&pool->destroy_work, HZ);
+				return;
+			}
+			count++;
+		}
+	}
+
+	if (count)
+		pr_debug("freeing pool with %d/%d outstanding pages\n",
+			 count, pool->nr_pages);
 
+
+	for (i = 0; i < pool->nr_pages; i++) {
+		buf = &pool->bufs[i];
 		io_zc_rx_unmap_buf(dev, buf);
 	}
+
+	io_zc_rx_destroy_ifq(pool->ifq);
 	kvfree(pool->bufs);
 	kvfree(pool);
 }
 
+static void io_zc_rx_destroy_pool(struct io_zc_rx_pool *pool)
+{
+	pool->delay_end = jiffies + HZ * 10;
+	INIT_DELAYED_WORK(&pool->destroy_work, io_zc_rx_destroy_pool_work);
+	schedule_delayed_work(&pool->destroy_work, 0);
+}
+
 static struct io_zc_rx_ifq *io_zc_rx_ifq_alloc(struct io_ring_ctx *ctx)
 {
 	struct io_zc_rx_ifq *ifq;
@@ -256,10 +296,8 @@  static void io_zc_rx_ifq_free(struct io_zc_rx_ifq *ifq)
 		io_close_zc_rxq(ifq);
 	if (ifq->pool)
 		io_zc_rx_destroy_pool(ifq->pool);
-	if (ifq->dev)
-		dev_put(ifq->dev);
-	io_free_rbuf_ring(ifq);
-	kfree(ifq);
+	else
+		io_zc_rx_destroy_ifq(ifq);
 }
 
 int io_register_zc_rx_ifq(struct io_ring_ctx *ctx,