From patchwork Wed Dec 18 00:37:27 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912793 Received: from mail-pf1-f176.google.com (mail-pf1-f176.google.com [209.85.210.176]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 78494FC0E for ; Wed, 18 Dec 2024 00:37:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.176 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482277; cv=none; b=E6SS/AhBKmiaJPeYD6TMxoTY4EB6Xb6slH0nlN2qr1emIsG8k7MJc/5Tsz8cU8HRKxyrwIgpueusABYVdL6kiH1/Lq+1RL19ZMX2Lv5UbutbbXIAOmQ0qGORGQpDefzayQCXWwmJ7HBjJHhm2licHrzpkH66iI2N6dIX97/91rI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482277; c=relaxed/simple; bh=CztbvP9+RIyhpbrNQZVgPuMnd7qkzCOzRjukEnHDodc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=b6FiiALZ3hwoMalnWX9JvKY/YZWcFro2VOr4yDrwIXlmzbSuPFE32fw+7AoSl+V1Rjq5H99znjh6lwJodpOkUalQh9k8JdkmhFy7pX0J8g20XhCLQ7ZV9FBwjsSL3k0RGHyFwNk5WEJeAUtZPZUKSFIPlPaLvw0BQ04THYiG4bM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=AusVc5nf; arc=none smtp.client-ip=209.85.210.176 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="AusVc5nf" Received: by mail-pf1-f176.google.com with SMTP id d2e1a72fcca58-725ef0397aeso5281056b3a.2 for ; Tue, 17 Dec 2024 16:37:56 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482276; x=1735087076; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=3FmBtD/9kNoCPyeDOsGVnpCJKDKHQdivfmWBaZAFy/E=; b=AusVc5nfBufnLs22EbWfq+U9VvthWkO3tB3yNSQWJrZc/0bhQNh+95bZ4VquKw7KXH jwBU12DR3nFfF6SABvf5VYMDl0usCTDmQ9gxyYKjIHLi0ZYSfsNOa4mwfPEYmjVnkofu JiHbsEjg36DZVqIG7hBvgJfWvRbjUYHVFZ1fnCMr49UGIfgwvWa4ZycVA5J19oPtPkuH DJJEDzcmW1CJKWi8P1cl7g49JKg0aQ8AR9piBrwTG4nwCOn4U3nh8Ppt9E6xlVC/MJf5 t33HFxFk22+v13iO2Sgo602mxlYuMMgivLV7nnEudoIqjsh0ff2oaNqeIGSKyw0igiHN HLwQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482276; x=1735087076; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=3FmBtD/9kNoCPyeDOsGVnpCJKDKHQdivfmWBaZAFy/E=; b=rBDYcpNlHQ2CfsxeQIZzpehah9GecnF9U4Wyx2CFJV0vk/83pxjbPyiYSZmn+RmEOZ 2HX9W+8DsGaWXZUWu6ley8yNK7WvsNChS5916eKA9wHzR2XVSeYvMZ7vJM1pBf/MFam8 oaOIRQggrcRM8fuJUlPWQBGL2CQDswDpEARO7WK2itHJALs4umS5XLa1ih99ubJHP4jv FiuzLxLDXWYB5G3d0i5owAKIMpqa+IdZCtUhQZcLKxxxj5hl4qv94tDqB/UDJnUy3WZh NRzy4xx9xg4yB400qWMNj9d4VojyN8VmPzCnGgieloX5Cz3+C1Lj3q6IHmL6xoSZ19v2 WcCg== X-Gm-Message-State: AOJu0YwyC2prtpUIGXaoIyXsmVQBll+/njqrpAwrmJhj2W0d1fTvfsHg RSVC/Pz34e6u6jTlPb4uQ0vbWDekwXSaeA17banYaNRTeYsJrcM0FkwZxy1cY7hjtxVRJvKReWr I X-Gm-Gg: ASbGncsMP0PJ42Zkn7qyq+zKE0PXUK/klNwNwGwBt+RgOJ+dvRRhJf8Ns98LeL/7kiA Qgmn9PZqfs0uktmrn2KaGO1zZKZjsJSt/qCgl3dl9F0Mb1kP4g3JTv/coRCNAyiWmdWrUX58qvy DpLqwuWEvYm4GuHoqrF3ao6tKrvJkk29jjYL2Jedvjp4/8vjej1OYnEuGBfmxnrHyXY1O+RMDNl hZIc3JGf5wTwwzlplMUA/wh+0MQwqqpNyyhCoZh X-Google-Smtp-Source: AGHT+IEQ/e/f05KYvugRA8kF98/vSc9zeC46GuK5+HSz3OGGkvguKsaBFClQoh/UbEHWew5KD3fsmg== X-Received: by 2002:a05:6a00:4f87:b0:725:e1de:c0bf with SMTP id d2e1a72fcca58-72a8d2377b1mr1351050b3a.9.1734482275820; Tue, 17 Dec 2024 16:37:55 -0800 (PST) Received: from localhost ([2a03:2880:ff:d::]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-801d5aa9633sm6338924a12.21.2024.12.17.16.37.55 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:37:55 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 01/20] net: page_pool: don't cast mp param to devmem Date: Tue, 17 Dec 2024 16:37:27 -0800 Message-ID: <20241218003748.796939-2-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov page_pool_check_memory_provider() is a generic path and shouldn't assume anything about the actual type of the memory provider argument. It's fine while devmem is the only provider, but cast away the devmem specific binding types to avoid confusion. Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- net/core/page_pool_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 48335766c1bf..8d31c71bea1a 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -353,7 +353,7 @@ void page_pool_unlist(struct page_pool *pool) int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq) { - struct net_devmem_dmabuf_binding *binding = rxq->mp_params.mp_priv; + void *binding = rxq->mp_params.mp_priv; struct page_pool *pool; struct hlist_node *n; From patchwork Wed Dec 18 00:37:28 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912794 Received: from mail-pf1-f171.google.com (mail-pf1-f171.google.com [209.85.210.171]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B99181CA84 for ; Wed, 18 Dec 2024 00:37:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.171 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482279; cv=none; b=FtvbeqCY1xOQZez1VsuRbX+RKKaLcdTCRw+4pU7t0lp9jCugWwAZAwfVajmwe2bY3WORNtJmzeh2gU3I4v0iKQMSQCvx6cC9lnZS6S3T93PJ9B1N5opiCg/3cHUhMtRWxNHRkRJCkbZPP95DZvRnqnMoJa2MMZcq8IbEKU6Jxcw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482279; c=relaxed/simple; bh=kokGPXOtKmJ8NMnDjbo7lcERhIq+pUTFKWNlFSMMVJE=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jO7FlY4fSVdjshH5kC0PWGrFoTiG6VVZhtpjXSGeTfJjpwqX7KoYNYm+mKprMxPSI2xEgL+QBQ2vmWd0yo0NlzdYt4fEp8WNiWvEAmE3vbXbZzTMaRSAARvjdNd4pObiZAuHm/HC2S6B3l9KqQrLUWc6YGOeXtiGyfx8UtHJ8Dw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=0Z+H/K1I; arc=none smtp.client-ip=209.85.210.171 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="0Z+H/K1I" Received: by mail-pf1-f171.google.com with SMTP id d2e1a72fcca58-725abf74334so5188453b3a.3 for ; Tue, 17 Dec 2024 16:37:57 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482277; x=1735087077; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=ysSCaIB4GXTEiUBi/fqYnu5BV5g5/6ElTQwwBF3xc+Y=; b=0Z+H/K1IAW6vINrpYYHp2BxAQHHXvwAgmLLciv3KpGbcxl+cp38lD/ujCpUhb3XB54 rTMgxE+d3iA5QgVTJm5N69ogvB/p17m48mPzKa7ErRDKa2t2GDZyAQ/o+955LphXGdqa 5gwl+2nxbjGTEwrD5jJUHzEJE7DPwsVbS4obFRzVCcgHOT75q0U6QmZIfusC9PL4MZEc T7J0+GvIpgh7M9DgYO972QBTwszHN5SAJ7DairNkRojoH+UOZiDxS0/cWFF9L2ts+VE/ vY6zYDgeZNkD3pmuR2wAh9l6R0VKujUjNmMx6X96DtOrDu98/YkgUhnBlD+UgFz44vnR OQNg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482277; x=1735087077; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=ysSCaIB4GXTEiUBi/fqYnu5BV5g5/6ElTQwwBF3xc+Y=; b=s5Wl2QO7ls6EPyDGIMTeWFwzY4Kx51aN4scIk/QoZucCq2pl7WzDLLbg6jlPAWPchb Q7aCZgCLGP6UsE/yUZV4SZErIJal9VxICihWEejmgAjoLZyTsxBCOscY4MIJ7J/rQKPV ZgR8kQLXw5g7KMdDfn8UfwAk/jdr8DfHFrC9Z/Kwjgd6gMkMFwp0iCIbLDmAYryShIg9 BJb44vRMijUjoe1dshcqE3sx+av2UX4bILsiQUNZQTqxTnZi+X685pLeQcFs5D2gZbdM L+3865EgYyx9Lu1KeiXCjKP1UJBmHLXt+oucr/4plCgk8o4T7tmERXLRVFdHceikFLpj 3GhQ== X-Gm-Message-State: AOJu0YwYyNyzocp5H5EWLxEQ2oAe6EM2xlW4vxlCOY8U94tNYpKpyQH1 dLM2GRYaQSbyP60VD868wuCm1kA8QVFjn0tWle6uX9mfid+OB1qsi0EXR0IWRQuP2FlSHpLTarL h X-Gm-Gg: ASbGncvpKFNwzC3MYYKD5pZkcT5hRSPDS1n80a5Ycvh6sjT1kl2Us4oh7FSXakuD45d J5ORlsgbGOyQZK6Cp8Xm+rPKXNXLaOg5YacxLNft0/+3NPxeiEyek57I4pP/pPKB9+ytljDNYZ9 ybrSMmE797GS95m2AjB3xudxH7iINtwZJQrokNLnDwe0gmBePLOlHrNXxe4ybrRKmIwwp6Egz/p SFkeLQttvnZ2PZvQ8XcWIpRFqNQ0D0M1vtSJteL5g== X-Google-Smtp-Source: AGHT+IFiduWsFdTMQ7qJ3+csnpW5d6D63RLz/DAzXmw/lXvnVUdd9pwRGFj+/LTxRBsIl63Y1H4L0Q== X-Received: by 2002:a05:6a20:c70a:b0:1e0:cbd1:8046 with SMTP id adf61e73a8af0-1e5b45164f2mr1526795637.0.1734482277120; Tue, 17 Dec 2024 16:37:57 -0800 (PST) Received: from localhost ([2a03:2880:ff:13::]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-801d5aaf4a4sm6408269a12.25.2024.12.17.16.37.56 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:37:56 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 02/20] net: prefix devmem specific helpers Date: Tue, 17 Dec 2024 16:37:28 -0800 Message-ID: <20241218003748.796939-3-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Add prefixes to all helpers that are specific to devmem TCP, i.e. net_iov_binding[_id]. Reviewed-by: Mina Almasry Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- net/core/devmem.c | 2 +- net/core/devmem.h | 14 +++++++------- net/ipv4/tcp.c | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/net/core/devmem.c b/net/core/devmem.c index 0b6ed7525b22..5e1a05082ab8 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -93,7 +93,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) void net_devmem_free_dmabuf(struct net_iov *niov) { - struct net_devmem_dmabuf_binding *binding = net_iov_binding(niov); + struct net_devmem_dmabuf_binding *binding = net_devmem_iov_binding(niov); unsigned long dma_addr = net_devmem_get_dma_addr(niov); if (WARN_ON(!gen_pool_has_addr(binding->chunk_pool, dma_addr, diff --git a/net/core/devmem.h b/net/core/devmem.h index 76099ef9c482..99782ddeca40 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -86,11 +86,16 @@ static inline unsigned int net_iov_idx(const struct net_iov *niov) } static inline struct net_devmem_dmabuf_binding * -net_iov_binding(const struct net_iov *niov) +net_devmem_iov_binding(const struct net_iov *niov) { return net_iov_owner(niov)->binding; } +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) +{ + return net_devmem_iov_binding(niov)->id; +} + static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) { struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); @@ -99,11 +104,6 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT); } -static inline u32 net_iov_binding_id(const struct net_iov *niov) -{ - return net_iov_owner(niov)->binding->id; -} - static inline void net_devmem_dmabuf_binding_get(struct net_devmem_dmabuf_binding *binding) { @@ -171,7 +171,7 @@ static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) return 0; } -static inline u32 net_iov_binding_id(const struct net_iov *niov) +static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { return 0; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0d704bda6c41..b872de9a8271 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2494,7 +2494,7 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, /* Will perform the exchange later */ dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx]; - dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov); + dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov); offset += copy; remaining_len -= copy; From patchwork Wed Dec 18 00:37:29 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912795 Received: from mail-pf1-f178.google.com (mail-pf1-f178.google.com [209.85.210.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2C21F1CA84 for ; Wed, 18 Dec 2024 00:37:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482281; cv=none; b=dDA/iQMsv7lEUHJTI/gXez7c9HTd+QbtaA4TRQPcDS6L8kwRioBlxaggARH3d2djrhX4YuxRACbw2ENdYsqGWPtogmJMq11BwX0FUemEg7k3V/1NAqcLndc8TieD79XrJ1+6Aziuy7/rg6elH5w7EYBngRV1bXfdWW+qPGnJLAw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482281; c=relaxed/simple; bh=pcgA9iCs/ieieDW/xSNKiw2slohdBDWi6PwGr9syOoI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=o43+PPWoed0XDTtIOYCOplqZPCYuOagZPlgdCnb+bAnnorOXc4bMQmxph5PP9vZgUbULsnqksZuMhO5+7fPWwWcK0EEU3mi7QJh49eVCDrKe1djigjJ4jmfwcVOxRM8tYJRw9ewhhtzQjj5GW/VymEAZPAaY3u4+e6ImqvMNnm0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=JuNDnaO0; arc=none smtp.client-ip=209.85.210.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="JuNDnaO0" Received: by mail-pf1-f178.google.com with SMTP id d2e1a72fcca58-725ee6f56b4so5193225b3a.3 for ; Tue, 17 Dec 2024 16:37:59 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482278; x=1735087078; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=jpi18jMRooqrSQpnsZ+v4ZIObLSk5Puxja5FgSfrqcQ=; b=JuNDnaO0Vr9kuQ26cOqt+bBlDXQUMKDNoP5uK+wcatZFXOj1F5+HHu5Dabk7tB10j6 U3Ab5rYvKPxli39E0XXJZgPEPyPqqMzJlO6bnz1SVvji1AzsdYSsgWdVOJu9PesQgto6 JzScz+nanhaT2TRiuHrI/zU7TZ0NE2oSYHnQFqg4+KeBAxahAd+yfEbHn1z0hOzbsfdQ JGCAcFZXYQgCaF5+2E+ZdhnyWnCP3Gc2jMqbANJ0asQD3oGCHq2nnuWrd6OkfFF1drkJ VEqNufeZI0eKERDW8cbkFjkwScZhi/3RlUlut6K3S1r3Y+5FM1HJ7HFA8hynlMOZqmBz nxLQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482278; x=1735087078; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=jpi18jMRooqrSQpnsZ+v4ZIObLSk5Puxja5FgSfrqcQ=; b=DBWSN/CwhDTGslWdrj2qNEvbIMC1jrAl+VJ8YQ2xKWumjQpfFkw8CtQ1qKWBdeEttG vM2RhxG701YaTmBDVEHnVnzIRlKi5lEza6KWyLdWPC1VJ7UlwiQx6Cq+ABfhkKfz0FgF 6+gN8RRp2VYn2HftofKjjqXo+PeLd7+SClibN5Fbu59tJnOXGJ79NTP2ZRcHjFvYX/56 eABEM1jLT+TPTJ9krRfkx4jAkBsk7wqdtqtYm6osEkoNTZvtU5Bkqb2ftfHk02IXMz9o nfPbRb9Zm4n0KVlnKuiNbueKmNlRtSwXzuS9tw65TRRIi1VMA/cR34QR/MEXJ903tCFE W7Vw== X-Gm-Message-State: AOJu0Yw4KwQpwaG8HZ3QL4nEYP7zgZFMJpGoyETIiG9on5/kDwg0zS5o yV7LsNNDfnTyEkK8L90eFzvks80Y3ooD8R7IFHiqpPIgZN/8ez5mZ6/O0/Pq/TEMoBGQ5XUrRAp X X-Gm-Gg: ASbGncuDGzpATPhGAa9VJBS7ork5/CXQTitamLr7OPrXOM35otCpP60O81NLB9UE9Dd K2Vnr3TjDn2XkzuLG9foR5LO94GFBP6pyzHgz6c8vRCN5qyCqsIgSk1jyJggT0bFzpJl0EF/c0l +2x24lGdgznl/RILSVjfPQz6ffvqHTsqwzhAzqNYyHHte9qkhL8/9TNpPmRnnRwe6t3jc9kOnd3 VBG/nqietF5gj96MYrABEaOQYhis3XwFvmMtA== X-Google-Smtp-Source: AGHT+IEuN2fQggpt46flekPinYzs5Du978oFJPjfx5zWJ0aC8KhrK2bhsLKJQIEuOQgZPvqs+GHrMg== X-Received: by 2002:a05:6a00:812:b0:728:e745:23cd with SMTP id d2e1a72fcca58-72a8d21e3b9mr1382273b3a.3.1734482278472; Tue, 17 Dec 2024 16:37:58 -0800 (PST) Received: from localhost ([2a03:2880:ff::]) by smtp.gmail.com with ESMTPSA id d2e1a72fcca58-72a204619c3sm2865308b3a.125.2024.12.17.16.37.57 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:37:58 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 03/20] net: generalise net_iov chunk owners Date: Tue, 17 Dec 2024 16:37:29 -0800 Message-ID: <20241218003748.796939-4-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Currently net_iov stores a pointer to struct dmabuf_genpool_chunk_owner, which serves as a useful abstraction to share data and provide a context. However, it's too devmem specific, and we want to reuse it for other memory providers, and for that we need to decouple net_iov from devmem. Make net_iov to point to a new base structure called net_iov_area, which dmabuf_genpool_chunk_owner extends. Reviewed-by: Mina Almasry Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/net/netmem.h | 21 ++++++++++++++++++++- net/core/devmem.c | 25 +++++++++++++------------ net/core/devmem.h | 25 +++++++++---------------- 3 files changed, 42 insertions(+), 29 deletions(-) diff --git a/include/net/netmem.h b/include/net/netmem.h index 1b58faa4f20f..c61d5b21e7b4 100644 --- a/include/net/netmem.h +++ b/include/net/netmem.h @@ -24,11 +24,20 @@ struct net_iov { unsigned long __unused_padding; unsigned long pp_magic; struct page_pool *pp; - struct dmabuf_genpool_chunk_owner *owner; + struct net_iov_area *owner; unsigned long dma_addr; atomic_long_t pp_ref_count; }; +struct net_iov_area { + /* Array of net_iovs for this area. */ + struct net_iov *niovs; + size_t num_niovs; + + /* Offset into the dma-buf where this chunk starts. */ + unsigned long base_virtual; +}; + /* These fields in struct page are used by the page_pool and net stack: * * struct { @@ -54,6 +63,16 @@ NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET +static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov) +{ + return niov->owner; +} + +static inline unsigned int net_iov_idx(const struct net_iov *niov) +{ + return niov - net_iov_owner(niov)->niovs; +} + /* netmem */ /** diff --git a/net/core/devmem.c b/net/core/devmem.c index 5e1a05082ab8..c250db6993d3 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -32,14 +32,15 @@ static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, { struct dmabuf_genpool_chunk_owner *owner = chunk->owner; - kvfree(owner->niovs); + kvfree(owner->area.niovs); kfree(owner); } static dma_addr_t net_devmem_get_dma_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct dmabuf_genpool_chunk_owner *owner; + owner = net_devmem_iov_to_chunk_owner(niov); return owner->base_dma_addr + ((dma_addr_t)net_iov_idx(niov) << PAGE_SHIFT); } @@ -82,7 +83,7 @@ net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) offset = dma_addr - owner->base_dma_addr; index = offset / PAGE_SIZE; - niov = &owner->niovs[index]; + niov = &owner->area.niovs[index]; niov->pp_magic = 0; niov->pp = NULL; @@ -250,9 +251,9 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->base_virtual = virtual; + owner->area.base_virtual = virtual; owner->base_dma_addr = dma_addr; - owner->num_niovs = len / PAGE_SIZE; + owner->area.num_niovs = len / PAGE_SIZE; owner->binding = binding; err = gen_pool_add_owner(binding->chunk_pool, dma_addr, @@ -264,17 +265,17 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, goto err_free_chunks; } - owner->niovs = kvmalloc_array(owner->num_niovs, - sizeof(*owner->niovs), - GFP_KERNEL); - if (!owner->niovs) { + owner->area.niovs = kvmalloc_array(owner->area.num_niovs, + sizeof(*owner->area.niovs), + GFP_KERNEL); + if (!owner->area.niovs) { err = -ENOMEM; goto err_free_chunks; } - for (i = 0; i < owner->num_niovs; i++) { - niov = &owner->niovs[i]; - niov->owner = owner; + for (i = 0; i < owner->area.num_niovs; i++) { + niov = &owner->area.niovs[i]; + niov->owner = &owner->area; page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), net_devmem_get_dma_addr(niov)); } diff --git a/net/core/devmem.h b/net/core/devmem.h index 99782ddeca40..a2b9913e9a17 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -10,6 +10,8 @@ #ifndef _NET_DEVMEM_H #define _NET_DEVMEM_H +#include + struct netlink_ext_ack; struct net_devmem_dmabuf_binding { @@ -51,17 +53,11 @@ struct net_devmem_dmabuf_binding { * allocations from this chunk. */ struct dmabuf_genpool_chunk_owner { - /* Offset into the dma-buf where this chunk starts. */ - unsigned long base_virtual; + struct net_iov_area area; + struct net_devmem_dmabuf_binding *binding; /* dma_addr of the start of the chunk. */ dma_addr_t base_dma_addr; - - /* Array of net_iovs for this chunk. */ - struct net_iov *niovs; - size_t num_niovs; - - struct net_devmem_dmabuf_binding *binding; }; void __net_devmem_dmabuf_binding_free(struct net_devmem_dmabuf_binding *binding); @@ -75,20 +71,17 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, void dev_dmabuf_uninstall(struct net_device *dev); static inline struct dmabuf_genpool_chunk_owner * -net_iov_owner(const struct net_iov *niov) +net_devmem_iov_to_chunk_owner(const struct net_iov *niov) { - return niov->owner; -} + struct net_iov_area *owner = net_iov_owner(niov); -static inline unsigned int net_iov_idx(const struct net_iov *niov) -{ - return niov - net_iov_owner(niov)->niovs; + return container_of(owner, struct dmabuf_genpool_chunk_owner, area); } static inline struct net_devmem_dmabuf_binding * net_devmem_iov_binding(const struct net_iov *niov) { - return net_iov_owner(niov)->binding; + return net_devmem_iov_to_chunk_owner(niov)->binding; } static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) @@ -98,7 +91,7 @@ static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) static inline unsigned long net_iov_virtual_addr(const struct net_iov *niov) { - struct dmabuf_genpool_chunk_owner *owner = net_iov_owner(niov); + struct net_iov_area *owner = net_iov_owner(niov); return owner->base_virtual + ((unsigned long)net_iov_idx(niov) << PAGE_SHIFT); From patchwork Wed Dec 18 00:37:30 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912796 Received: from mail-pf1-f175.google.com (mail-pf1-f175.google.com [209.85.210.175]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6321A3596A for ; Wed, 18 Dec 2024 00:38:00 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.175 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482281; cv=none; b=Gcns/bV+BoaWYt52sEy8+QGM9Me1pLzLkAIY2GGeHqyElDTXQOKBMqrtbyiPEydL0yVGMolOohsfLuuVkgFbYJo/iFBFnbOhfJqL9rM+EGLIAGuBWfjqus9rw6aCLRn6qFn0m09VFyUiEhkfMyVXEEGI1fOgy2GXN4qlWoVWQLo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482281; c=relaxed/simple; bh=EBvFIsawVGxLxMU3p1j1YX1UqSiCQdW1wkhcOZFyBMQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=m6j4m3sje4iQOXAueqqKQDZJvpww06ydzoh8MS1Oz7Y7pRvUEzi2d056C95VkUwyoiele8Fnq+EzYgkcVAvtOJuRbcDHNYC/C4KRTDRklSrESUVnGXpsr0GNlawSz46KTBW0i8aC2LUh2Y+Qbe5AW+ovOcIGbVUQTXyOWoCY178= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=bxVd30/T; arc=none smtp.client-ip=209.85.210.175 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="bxVd30/T" Received: by mail-pf1-f175.google.com with SMTP id d2e1a72fcca58-725dac69699so5189596b3a.0 for ; Tue, 17 Dec 2024 16:38:00 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482280; x=1735087080; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=7+YHcXEAtBvssxU5fzHig35lu0TSHQacf/OaCK3OLTg=; b=bxVd30/TDcvR+Esh7VtUU7P1YMAG2Cqx/q8o6i0lnUUJc6YjRdYjvLl9FnFnH78/t6 HB5h6FD7lHKbBpHoEd+a/xa6W9Q2Lg9exbo/T0CVoCNHgyOxP8XLbePi8MlH6eEqFGFj +ncR4mBvxRZD6gvXjmp0EZMqGdlA6l4UpxonD4lireq/6x72udBrw6EXK39XzNY47zqP M62dTaplEjPPDpX/Z5rBkB5IzT1gB08IQ9mZdJ0blsMXx484whunMktywSBS43H5t4y1 YfXzTVCeXDQieggMhUsW3GFAbDXuAlXBJtY8GqLte51F9QvaokZVFA4rQDgQHw8dj8VH EjCA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482280; x=1735087080; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=7+YHcXEAtBvssxU5fzHig35lu0TSHQacf/OaCK3OLTg=; b=RHIAIQZ6CWawXoXWaYHechKORJuJ0C4VUHQ9O4jwqDMvn59qyOW8R/zmiMpcM5IUdY NT7tndWH5x54Q9VlEWqDTeE8UJz9HfrDzOuyQeQfKkzCnCj/5EFErocijK177mOYMvH+ F85/GXHJt4t90INtd+vgc7ainE1APm0KiHKUfbDHyKzkpzn64erTP1vwwlD+91qmwFMb X8Z8M6z4dc5wCP91P0mR5tMK5aW7Fw94Qrkgvs8Sfp5a6Bav/6tqNBhaicTpV3YGsqF0 PLe0FoEeSPFy2JjXFe/Ta80MBHcfwUJwOUUqU6VrIT4Z69Ff7uZBzKIx8206R8dmtUd+ 4fGQ== X-Gm-Message-State: AOJu0YyGU+66kjhRMk5psREzrOkh6SdD7ye0UG+nNHCEeWXV+7AYDpUo 5aIaOmr5kKWaJ+aBmDK2wvkBmFVrXDorXMm9xVO/Q+gvqRkffUdehLU9/f73fgHu1Cs3vB5Tn/F i X-Gm-Gg: ASbGncucAJuS7TU64MtMLOM8a3OhmpF/QErmeQxNF56dEQnRqGnI/MkF16udzOUlgH5 M0AWLBhjt0DVFVp+33bRsl3x3aieSzcNE8I1oXRSwdgRdlxKLHt/4u2J7AI1wDRsLIC799sYi52 W5IRSfcDglFc9jB5ddpkqkagE5MRyGOXcx4fztpQmVYaUIo8zXv82tmyixXR9wmoHXQCwSATMXL 4QODYRJ6E5D4BD1VSPksFzn1jBE6qpozEuFSw43 X-Google-Smtp-Source: AGHT+IHNaclYzql/KiG+f8dQp/Qdpzpc2xkSKZ9c3EdFR/saiYZ/O/25OFfs9MN3+2HMMxYSmkpHVA== X-Received: by 2002:a05:6a00:3693:b0:725:9f02:489f with SMTP id d2e1a72fcca58-72a8d2dfd73mr1599294b3a.26.1734482279804; Tue, 17 Dec 2024 16:37:59 -0800 (PST) Received: from localhost ([2a03:2880:ff:5::]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-801d5a90693sm6473914a12.9.2024.12.17.16.37.59 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:37:59 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 04/20] net: page_pool: create hooks for custom page providers Date: Tue, 17 Dec 2024 16:37:30 -0800 Message-ID: <20241218003748.796939-5-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Jakub Kicinski The page providers which try to reuse the same pages will need to hold onto the ref, even if page gets released from the pool - as in releasing the page from the pp just transfers the "ownership" reference from pp to the provider, and provider will wait for other references to be gone before feeding this page back into the pool. Signed-off-by: Jakub Kicinski Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/net/page_pool/types.h | 9 +++++++++ net/core/devmem.c | 14 +++++++++++++- net/core/page_pool.c | 22 ++++++++++++++-------- 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index ed4cd114180a..d6241e8a5106 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -152,8 +152,16 @@ struct page_pool_stats { */ #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) +struct memory_provider_ops { + netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp); + bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem); + int (*init)(struct page_pool *pool); + void (*destroy)(struct page_pool *pool); +}; + struct pp_memory_provider_params { void *mp_priv; + const struct memory_provider_ops *mp_ops; }; struct page_pool { @@ -216,6 +224,7 @@ struct page_pool { struct ptr_ring ring; void *mp_priv; + const struct memory_provider_ops *mp_ops; #ifdef CONFIG_PAGE_POOL_STATS /* recycle stats are per-cpu to avoid locking */ diff --git a/net/core/devmem.c b/net/core/devmem.c index c250db6993d3..48903b7ab215 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -26,6 +26,8 @@ /* Protected by rtnl_lock() */ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); +static const struct memory_provider_ops dmabuf_devmem_ops; + static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) @@ -117,6 +119,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) WARN_ON(rxq->mp_params.mp_priv != binding); rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; rxq_idx = get_netdev_rx_queue_index(rxq); @@ -142,7 +145,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, } rxq = __netif_get_rx_queue(dev, rxq_idx); - if (rxq->mp_params.mp_priv) { + if (rxq->mp_params.mp_ops) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); return -EEXIST; } @@ -160,6 +163,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return err; rxq->mp_params.mp_priv = binding; + rxq->mp_params.mp_ops = &dmabuf_devmem_ops; err = netdev_rx_queue_restart(dev, rxq_idx); if (err) @@ -169,6 +173,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, err_xa_erase: rxq->mp_params.mp_priv = NULL; + rxq->mp_params.mp_ops = NULL; xa_erase(&binding->bound_rxqs, xa_idx); return err; @@ -388,3 +393,10 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) /* We don't want the page pool put_page()ing our net_iovs. */ return false; } + +static const struct memory_provider_ops dmabuf_devmem_ops = { + .init = mp_dmabuf_devmem_init, + .destroy = mp_dmabuf_devmem_destroy, + .alloc_netmems = mp_dmabuf_devmem_alloc_netmems, + .release_netmem = mp_dmabuf_devmem_release_page, +}; diff --git a/net/core/page_pool.c b/net/core/page_pool.c index e07ad7315955..784a547b2ca4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -285,13 +285,19 @@ static int page_pool_init(struct page_pool *pool, rxq = __netif_get_rx_queue(pool->slow.netdev, pool->slow.queue_idx); pool->mp_priv = rxq->mp_params.mp_priv; + pool->mp_ops = rxq->mp_params.mp_ops; } - if (pool->mp_priv) { + if (pool->mp_ops) { if (!pool->dma_map || !pool->dma_sync) return -EOPNOTSUPP; - err = mp_dmabuf_devmem_init(pool); + if (WARN_ON(!is_kernel_rodata((unsigned long)pool->mp_ops))) { + err = -EFAULT; + goto free_ptr_ring; + } + + err = pool->mp_ops->init(pool); if (err) { pr_warn("%s() mem-provider init failed %d\n", __func__, err); @@ -588,8 +594,8 @@ netmem_ref page_pool_alloc_netmems(struct page_pool *pool, gfp_t gfp) return netmem; /* Slow-path: cache empty, do real allocation */ - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - netmem = mp_dmabuf_devmem_alloc_netmems(pool, gfp); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + netmem = pool->mp_ops->alloc_netmems(pool, gfp); else netmem = __page_pool_alloc_pages_slow(pool, gfp); return netmem; @@ -680,8 +686,8 @@ void page_pool_return_page(struct page_pool *pool, netmem_ref netmem) bool put; put = true; - if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_priv) - put = mp_dmabuf_devmem_release_page(pool, netmem); + if (static_branch_unlikely(&page_pool_mem_providers) && pool->mp_ops) + put = pool->mp_ops->release_netmem(pool, netmem); else __page_pool_release_page_dma(pool, netmem); @@ -1049,8 +1055,8 @@ static void __page_pool_destroy(struct page_pool *pool) page_pool_unlist(pool); page_pool_uninit(pool); - if (pool->mp_priv) { - mp_dmabuf_devmem_destroy(pool); + if (pool->mp_ops) { + pool->mp_ops->destroy(pool); static_branch_dec(&page_pool_mem_providers); } From patchwork Wed Dec 18 00:37:31 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912797 Received: from mail-pf1-f170.google.com (mail-pf1-f170.google.com [209.85.210.170]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EA697374D1 for ; Wed, 18 Dec 2024 00:38:01 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.170 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482284; cv=none; b=OboP+WNqeDCJjz77ew8x88W7iIWuq9Epue4WCb17nVMy8KFURg112SDx+phaV4XLegCC6RaI/FwvQaEf6dZf2QE+ZvcutsqRmUSFEJnuHvAbs5Jz8WnxtKKx4n85jkc20eWJeGY6mvYM/GyJZWjvZZvT3+sS3YRy4+VpsLEYWn4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482284; c=relaxed/simple; bh=135g8sWdi14q9q8Itt2VZQPm7R0gFcz1ULHZpEn4dmo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=khaPgEVqa5hT6tToLsy+FGTWX8nJtjbLPXH+ZvhowqmjrqYVC3o9Im+1v1PMnIWkuypSXJF4/H2Ur/lglQHhSnquebtFrh6taJQ7bNnPxuhUAXVfiouCyr+XfEYUppNVzQ0YK73I2USeXvxYIihlWAlpdqvWRwXVpLp3f5fp0XQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=Avswi1MT; arc=none smtp.client-ip=209.85.210.170 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="Avswi1MT" Received: by mail-pf1-f170.google.com with SMTP id d2e1a72fcca58-7265c18d79bso6295701b3a.3 for ; Tue, 17 Dec 2024 16:38:01 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482281; x=1735087081; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=KQ73wFKVt12qccWHG7cPPzOfiVyAwI1MB/XTBNYCLeg=; b=Avswi1MT3cwCmp8ZmdnQvv6eSNnUtLFSxWtgryWPBUBQv3fnmraImOQdxuyzCx8zqb cDmVA1irUZ5SNO5S+VRr+O3Xb5d6RL4rXN06t+iPlPR3LbHlhwc5cR4PvAwYAXVCpXrQ Fp9gUV3jy5EgVYLp/fUo66PerxeZUctk6SbpUkH2KVeig71w0VR1zllAWmrd26KkUuUr a8ldijZFoD4c983kq4Vfk6FULYoe/Fkltj27Ig/vIhC45NlmaGqVmu72a6KTXEUYBvXS dkceQTUVzFcbd1CHllKp2qRfQOk7JsFGveFbsJmFlHxk5h3p6yKEkJZe6lA/u/xHIPkg Bacg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482281; x=1735087081; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=KQ73wFKVt12qccWHG7cPPzOfiVyAwI1MB/XTBNYCLeg=; b=kReUGZwmQ51Y6HUM3mu3Ue3DT1xEp/N5DvLlwfeyIZxjrczZk3ac5BcjD1tZO4cEGg yxBbuKILry44iof7VJiDFpLE9hRAi3stTgHCrg631Lw+Rxm6jPBYCo8nUZgMaiQTObZ9 Z/EprRM6V334PpYuWWqTsNkNo1lStwBBP8D73j39kYF1Y8L/34jsPTuYr5BpGX3jyJF9 gtvPxEiofceG/IKcPfYTuZfMdNbAiFwn88oeT+2obPWyKg86fS4sshoj3MjdLzsS8MiI lNkOR/5iSWvwG3tdO3+KQVoSeDTPlbdp8/QHQt2QCoSoXLM4ULElZYjQhhZraJBSANtG ba/g== X-Gm-Message-State: AOJu0YyvBWKIY6VDU3nRz6WPXaVVkYE2Mhd+86Mj/RzIIXeGS7n7ioG9 I/3pS+o1oQmaoopKmNnBF4vArSlHqM3z4QzldqY0jiBjBxE4vIKjPB6Nu0EXRuwUBxkfe+nOayt X X-Gm-Gg: ASbGncsZM9cIjBHi8w0IhbL072CpDbs9jT6k5U5DXJ26DOH4iD7y5YRlkRHRUN77TOo TfdshXolroclg3uKMrY/VqSniVbTfEKBg4VxvENhV/TmSucy9q+zvY+xwWBbLqoirGLSM66O5sS qIBEzKw/nUwKj1PfJcGTRQZtFvs68JpmVcutmpHNO+mZX7T1LcK0fAAo/+y46vqpX4FV7CKsjjD BFCZ3D0Pe+uGRTKY+F3Nn98b7Y33GR+dgpvbgtj+w== X-Google-Smtp-Source: AGHT+IEncnAy4AjWnukyzHbHIB+xx0R8pVx/ioXleyWjgeqxb6nBK1dbkbwXfUfkrn1tzDRwDHd5ig== X-Received: by 2002:a05:6a21:c94:b0:1e1:a647:8a3f with SMTP id adf61e73a8af0-1e5b4824307mr2076401637.22.1734482281227; Tue, 17 Dec 2024 16:38:01 -0800 (PST) Received: from localhost ([2a03:2880:ff:72::]) by smtp.gmail.com with ESMTPSA id d2e1a72fcca58-72918bad9f8sm7329764b3a.150.2024.12.17.16.38.00 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:00 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 05/20] net: page_pool: add mp op for netlink reporting Date: Tue, 17 Dec 2024 16:37:31 -0800 Message-ID: <20241218003748.796939-6-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Add a mandatory memory provider callback that prints information about the provider. Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/net/page_pool/types.h | 1 + net/core/devmem.c | 9 +++++++++ net/core/page_pool_user.c | 3 +-- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index d6241e8a5106..a473ea0c48c4 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -157,6 +157,7 @@ struct memory_provider_ops { bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem); int (*init)(struct page_pool *pool); void (*destroy)(struct page_pool *pool); + int (*nl_report)(const struct page_pool *pool, struct sk_buff *rsp); }; struct pp_memory_provider_params { diff --git a/net/core/devmem.c b/net/core/devmem.c index 48903b7ab215..df51a6c312db 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -394,9 +394,18 @@ bool mp_dmabuf_devmem_release_page(struct page_pool *pool, netmem_ref netmem) return false; } +static int mp_dmabuf_devmem_nl_report(const struct page_pool *pool, + struct sk_buff *rsp) +{ + const struct net_devmem_dmabuf_binding *binding = pool->mp_priv; + + return nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id); +} + static const struct memory_provider_ops dmabuf_devmem_ops = { .init = mp_dmabuf_devmem_init, .destroy = mp_dmabuf_devmem_destroy, .alloc_netmems = mp_dmabuf_devmem_alloc_netmems, .release_netmem = mp_dmabuf_devmem_release_page, + .nl_report = mp_dmabuf_devmem_nl_report, }; diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 8d31c71bea1a..61212f388bc8 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -214,7 +214,6 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { - struct net_devmem_dmabuf_binding *binding = pool->mp_priv; size_t inflight, refsz; void *hdr; @@ -244,7 +243,7 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, pool->user.detach_time)) goto err_cancel; - if (binding && nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id)) + if (pool->mp_ops && pool->mp_ops->nl_report(pool, rsp)) goto err_cancel; genlmsg_end(rsp, hdr); From patchwork Wed Dec 18 00:37:32 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912798 Received: from mail-pj1-f45.google.com (mail-pj1-f45.google.com [209.85.216.45]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 49FF34436E for ; Wed, 18 Dec 2024 00:38:03 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.216.45 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482284; cv=none; b=NnzRmSx60TipbIzg2K26+lC641MMJ/27BzJP9+4WqjNSGfwvpDzQSkurZM9AJSYl8t2+zRnlh4SRcDcBX1XpH2B7htrtexba2WXRkw9qbfpN8Ik/jCc689U+gx7PmpZsKaa1s/7JQQdQV2aDc9sumX6lUpdC6S+Qsh5AQWCJFAA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482284; c=relaxed/simple; bh=rCxxW/itp5x+499AxEs1p7a9ucFkJqEo/t2FhOUVYr8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=sJdDgGOO3V6Tonzgol8MhErE6SrSnNOaolVtSseAjgAQLITL1GILPVsjrPB/ZH/bAI/babin5vgdoCkR9ScG11zRr71xC/hlu9ePRHkjFq79X520f4EWAH29uoJn7iliqVDRx7S3mzK6xY/1x1d4n+oYG6M8R2E/KAdCWgQ/liQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=IizXIM2T; arc=none smtp.client-ip=209.85.216.45 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="IizXIM2T" Received: by mail-pj1-f45.google.com with SMTP id 98e67ed59e1d1-2ef8c012913so4256744a91.3 for ; Tue, 17 Dec 2024 16:38:03 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482282; x=1735087082; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=6AfGsKFHOkdqeOf3FsE461f86oQPOHTvgRIj0UuaaTA=; b=IizXIM2TxHQthfI2Nt5WB82fKY3ajugmqa6MrH9htFN/5JhnAwuQzH+1bAbWmN7W/k 8etCF+ZWOw6K7Re6OCMo/YYlgPnQWYL98wk+z1ZGhhQxSzULOeBc0vme6kjK1CRaioWE gvN8m39njxL4VipouV3I5N8JwVCLtcw7MmbWNiTmsFRIY4G2xzHme9huMUYCYuPxtOyA 9S3DD5bhIXE2LW0A9HQD33uD6PS38BpU37vRZesw7ZtpBOYnuUa1B8QiZ7fCU7+pM8Nh gbkPlFz/LWId6DN1TGr2eq0n6nEKEo/x8rAcLxTAk4Rb2SuUeirJf0lklhTcwxldG8E7 gQ8A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482282; x=1735087082; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=6AfGsKFHOkdqeOf3FsE461f86oQPOHTvgRIj0UuaaTA=; b=RXqF5f9hbqB4fD3H8HfcBL760Qt+AdFtAJT4820nrBm7kTJL1/jzoxmiIh19lbf8V8 5GgiJ1Wu07iPr/lO2vYOzxKMt8u3T/DSXk643jhsrtv75d8ar8jfVEPgutt5dOILb+SB Korf3bc5QEN0UnyottPxdfTZzNe/7mttvXdLnEKY1QeVDgaoqXqSU6jakhOKvaBziGjT 8eeUXV4MB0BlH9j4geMhbVVfzhlj5+f1ZCqKz3tw+aZM2Jr57ebRgorLzi76Oj2/h+P1 RAJ0TFmfhk08WliVcD2pkUIOppkNQ0DimkYWDvBmgyoB4spviUjC1OMKuNkK92bONOiZ m4fg== X-Gm-Message-State: AOJu0Yy3yhLhOMD3VeA0txBiLlZsuCghJbDELxvqcfKFKzCNaVICNbMm klvCPkyZXYN1MXpp5nibwoR3SFx6prLC5K/WLWVUX9CiLm8egXe7h5dI/WznD1SV2oAEMi5Jpqr j X-Gm-Gg: ASbGncsCAH29VK0lHOxBoS/lswLC1KrlmxMUb4wFihXb09VUhuN/5bjXIrOTatlAxzA wF0wEbz8wnUXbunOthFIDhZPA7scbuHDAq6lbTHEhKJ3j/jgy41rktK6zf7frAnoisyQuiIWQIS 9hOU1Iv69iyxpz4x2B1sfUeq+KZVxfD6kcd1Pu7/15f0kTxEw4nBpopcYeMlR7dvpYIk8a+qH4K wTcrVTP0qXoL/CbNBAnGqIyN39jNnZgquQbhEe9 X-Google-Smtp-Source: AGHT+IGR5LtCiHdpBMT9BbBFtuBIuICGu+bP7i6GvYwDVpp1zApjpbwEqvUEmYFj/DD4IWaUS2GokQ== X-Received: by 2002:a17:90b:2b87:b0:2ee:45fe:63b5 with SMTP id 98e67ed59e1d1-2f2e91c4fcbmr1321471a91.3.1734482282595; Tue, 17 Dec 2024 16:38:02 -0800 (PST) Received: from localhost ([2a03:2880:ff:c::]) by smtp.gmail.com with ESMTPSA id 98e67ed59e1d1-2f2ed52cdc3sm111852a91.3.2024.12.17.16.38.01 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:02 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 06/20] net: page_pool: add a mp hook to unregister_netdevice* Date: Tue, 17 Dec 2024 16:37:32 -0800 Message-ID: <20241218003748.796939-7-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Devmem TCP needs a hook in unregister_netdevice_many_notify() to upkeep the set tracking queues it's bound to, i.e. ->bound_rxqs. Instead of devmem sticking directly out of the genetic path, add a mp function. Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/net/page_pool/types.h | 3 +++ net/core/dev.c | 15 ++++++++++++++- net/core/devmem.c | 36 ++++++++++++++++------------------- net/core/devmem.h | 5 ----- 4 files changed, 33 insertions(+), 26 deletions(-) diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index a473ea0c48c4..140fec6857c6 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -152,12 +152,15 @@ struct page_pool_stats { */ #define PAGE_POOL_FRAG_GROUP_ALIGN (4 * sizeof(long)) +struct netdev_rx_queue; + struct memory_provider_ops { netmem_ref (*alloc_netmems)(struct page_pool *pool, gfp_t gfp); bool (*release_netmem)(struct page_pool *pool, netmem_ref netmem); int (*init)(struct page_pool *pool); void (*destroy)(struct page_pool *pool); int (*nl_report)(const struct page_pool *pool, struct sk_buff *rsp); + void (*uninstall)(void *mp_priv, struct netdev_rx_queue *rxq); }; struct pp_memory_provider_params { diff --git a/net/core/dev.c b/net/core/dev.c index c7f3dea3e0eb..aa082770ab1c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11464,6 +11464,19 @@ void unregister_netdevice_queue(struct net_device *dev, struct list_head *head) } EXPORT_SYMBOL(unregister_netdevice_queue); +static void dev_memory_provider_uninstall(struct net_device *dev) +{ + unsigned int i; + + for (i = 0; i < dev->real_num_rx_queues; i++) { + struct netdev_rx_queue *rxq = &dev->_rx[i]; + struct pp_memory_provider_params *p = &rxq->mp_params; + + if (p->mp_ops && p->mp_ops->uninstall) + p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq); + } +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { @@ -11516,7 +11529,7 @@ void unregister_netdevice_many_notify(struct list_head *head, dev_tcx_uninstall(dev); dev_xdp_uninstall(dev); bpf_dev_bound_netdev_unregister(dev); - dev_dmabuf_uninstall(dev); + dev_memory_provider_uninstall(dev); netdev_offload_xstats_disable_all(dev); diff --git a/net/core/devmem.c b/net/core/devmem.c index df51a6c312db..4ef67b63ea74 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -308,26 +308,6 @@ net_devmem_bind_dmabuf(struct net_device *dev, unsigned int dmabuf_fd, return ERR_PTR(err); } -void dev_dmabuf_uninstall(struct net_device *dev) -{ - struct net_devmem_dmabuf_binding *binding; - struct netdev_rx_queue *rxq; - unsigned long xa_idx; - unsigned int i; - - for (i = 0; i < dev->real_num_rx_queues; i++) { - binding = dev->_rx[i].mp_params.mp_priv; - if (!binding) - continue; - - xa_for_each(&binding->bound_rxqs, xa_idx, rxq) - if (rxq == &dev->_rx[i]) { - xa_erase(&binding->bound_rxqs, xa_idx); - break; - } - } -} - /*** "Dmabuf devmem memory provider" ***/ int mp_dmabuf_devmem_init(struct page_pool *pool) @@ -402,10 +382,26 @@ static int mp_dmabuf_devmem_nl_report(const struct page_pool *pool, return nla_put_u32(rsp, NETDEV_A_PAGE_POOL_DMABUF, binding->id); } +static void mp_dmabuf_devmem_uninstall(void *mp_priv, + struct netdev_rx_queue *rxq) +{ + struct net_devmem_dmabuf_binding *binding = mp_priv; + struct netdev_rx_queue *bound_rxq; + unsigned long xa_idx; + + xa_for_each(&binding->bound_rxqs, xa_idx, bound_rxq) { + if (bound_rxq == rxq) { + xa_erase(&binding->bound_rxqs, xa_idx); + break; + } + } +} + static const struct memory_provider_ops dmabuf_devmem_ops = { .init = mp_dmabuf_devmem_init, .destroy = mp_dmabuf_devmem_destroy, .alloc_netmems = mp_dmabuf_devmem_alloc_netmems, .release_netmem = mp_dmabuf_devmem_release_page, .nl_report = mp_dmabuf_devmem_nl_report, + .uninstall = mp_dmabuf_devmem_uninstall, }; diff --git a/net/core/devmem.h b/net/core/devmem.h index a2b9913e9a17..8e999fe2ae67 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -68,7 +68,6 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding); int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, struct net_devmem_dmabuf_binding *binding, struct netlink_ext_ack *extack); -void dev_dmabuf_uninstall(struct net_device *dev); static inline struct dmabuf_genpool_chunk_owner * net_devmem_iov_to_chunk_owner(const struct net_iov *niov) @@ -145,10 +144,6 @@ net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -EOPNOTSUPP; } -static inline void dev_dmabuf_uninstall(struct net_device *dev) -{ -} - static inline struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding) { From patchwork Wed Dec 18 00:37:33 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912799 Received: from mail-pf1-f175.google.com (mail-pf1-f175.google.com [209.85.210.175]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7F1CC57C93 for ; Wed, 18 Dec 2024 00:38:04 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.175 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482286; cv=none; b=Z9HjrgYF6PLmaK9Ci3JXQlXEBPnABkBFw8Q3S/zPeYsqbZ0lvgJFyGWfcbG6/S7b4ViCt8fugfSk0dmqTM0cn8WZsJ/2ewScqZ8HIT3gzmpwdCeNvLvJQ/RrZHWowTpVkZtW1Twx55h3GnH8c0X7UpAJURQQRLjd70hUdjo87J0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482286; c=relaxed/simple; bh=u8mGZDNB9Qjmsc/UlJUfk9+yqa87F2hF6bBOxlT29Z8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=o9PquNEJKgGBMQMpW8jKqI5gYiGFIaBRbPEVgEB2xHWYkZ2CTDWdyL+CLc8rxah4+PpwpOUf/prgAMcklmvzFxRjaN17QjHp22s0Pq8tTClFkws6p9dkYKhm1NRNmthGcUKp8zVx3iFDrYchGXbhPYX19coo7/yk4PgIqGVCATg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=y/uNsJon; arc=none smtp.client-ip=209.85.210.175 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="y/uNsJon" Received: by mail-pf1-f175.google.com with SMTP id d2e1a72fcca58-725f2f79ed9so4716646b3a.2 for ; Tue, 17 Dec 2024 16:38:04 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482284; x=1735087084; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=SfBsLg62G3udJQ4BEA3v9yLwP4UTZUhvXQSSmczl4Gc=; b=y/uNsJonVkGAR4Y9RJlTtMp5RjcIPPaddX//xp21xRWymBP2JlBIoKCe9K5EHoL618 sF2f7RGPNjhspZ2oqOJXENnFPQhUUQTFaB9cvOWlrnqjdtvsuPxdczW9CRIL1yp7Oh5u 4H5MqbgP4ID3p20qT/TEF05g06zR+SBVtXs7YETrPYpBO5WsnePvZxJ0WCUgoGdmvV6H BhQvJzxKCOYZwYnJgkFMZIWBgDsyQImK2+z/fqbvBOuBf1+XuWElRaeaBgyJrBgTd8ei 7IBSpWssqMUeyRD6dUTVOFcgkc0nfiSHYQ+GCPI0yUGDx/QEHW9Pgbw75CR7Fo+0k2+i E7ag== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482284; x=1735087084; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=SfBsLg62G3udJQ4BEA3v9yLwP4UTZUhvXQSSmczl4Gc=; b=bJ0Zb4p6Ilwq2qJwHQLgO+HFvMINe8ob1fkZBWhgSESG/r/L49394KWJJdE2O//ky1 pvbLEe3IXbRNjgH7SHH61Msz+VWcBHKel3yeNHVilempDKxbXQrvnPz5au2TONum1MpE PpRlp1oKNM8zP30raxA53Zfw+FYE2ey8ubByJLluPONla5T0No51dxZpjcCivl3rnpYR IVYgzNgaVsHs+P2VPYIsj06eESJVGXLNRKwfO/4cMCpGWl8H80ssSsjE/ZM81MmjQA+7 4OzoZnMUrs+pRl/9R74hC37iV4M5OJEKDpRzKe/iKvedt7Ed6dmscc4DgV80+heG6W8I hIgw== X-Gm-Message-State: AOJu0YxnfHq7AiNa7q7w2Bz9jW1/bB/nS62Mzb7uddvzqJlTAtVyq+wB a7MuzgX/Mwm+/0PIChNlpCycJ6xyoIT2xlu9mABA55OdR30te9aQgY4TXqJX2wPCDhjQuQK3jMH V X-Gm-Gg: ASbGncth+R9p2gJVixDZ9rjsZBu8z7SXiMU7ZAdzK5AONRuEcMbRtNuiK9qV016H/0v ya2L6Yg0YEYDUW99srOVXBD5EbPCZ7ldoHnr8nrpxSTWPgCN2W/R0s7KUG2CcZx06/Q4teqSr3X s512aad+t7gKMhYbuBL/Wwun0rb6wZKrGG8hk0u59r/I3GzeYNk1nzVSxTIQvjOpcOrCGpZWGNW fC80pKOUflx9NgSVKoB3Odm6BMQwEp5+cUpv+2wSA== X-Google-Smtp-Source: AGHT+IEw9r6pYaV+WZpZxDFOXvkhuSj4oVM1TjX5jqIldm+QJwQg57nxRHAvaycdcT0mPs5JISR8Fg== X-Received: by 2002:a05:6a20:8408:b0:1e1:c26d:d7fd with SMTP id adf61e73a8af0-1e5b48a0f20mr1498011637.37.1734482283788; Tue, 17 Dec 2024 16:38:03 -0800 (PST) Received: from localhost ([2a03:2880:ff:10::]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-801d5ad1db8sm6414671a12.44.2024.12.17.16.38.03 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:03 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 07/20] net: prepare for non devmem TCP memory providers Date: Tue, 17 Dec 2024 16:37:33 -0800 Message-ID: <20241218003748.796939-8-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov There is a good bunch of places in generic paths assuming that the only page pool memory provider is devmem TCP. As we want to reuse the net_iov and provider infrastructure, we need to patch it up and explicitly check the provider type when we branch into devmem TCP code. Reviewed-by: Mina Almasry Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- net/core/devmem.c | 5 +++++ net/core/devmem.h | 8 ++++++++ net/ipv4/tcp.c | 5 +++++ 3 files changed, 18 insertions(+) diff --git a/net/core/devmem.c b/net/core/devmem.c index 4ef67b63ea74..fd2be02564dc 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -28,6 +28,11 @@ static DEFINE_XARRAY_FLAGS(net_devmem_dmabuf_bindings, XA_FLAGS_ALLOC1); static const struct memory_provider_ops dmabuf_devmem_ops; +bool net_is_devmem_iov(struct net_iov *niov) +{ + return niov->pp->mp_ops == &dmabuf_devmem_ops; +} + static void net_devmem_dmabuf_free_chunk_owner(struct gen_pool *genpool, struct gen_pool_chunk *chunk, void *not_used) diff --git a/net/core/devmem.h b/net/core/devmem.h index 8e999fe2ae67..5ecc1b2877e4 100644 --- a/net/core/devmem.h +++ b/net/core/devmem.h @@ -115,6 +115,8 @@ struct net_iov * net_devmem_alloc_dmabuf(struct net_devmem_dmabuf_binding *binding); void net_devmem_free_dmabuf(struct net_iov *ppiov); +bool net_is_devmem_iov(struct net_iov *niov); + #else struct net_devmem_dmabuf_binding; @@ -163,6 +165,12 @@ static inline u32 net_devmem_iov_binding_id(const struct net_iov *niov) { return 0; } + +static inline bool +bool net_is_devmem_iov(struct net_iov *niov) +{ + return false; +} #endif #endif /* _NET_DEVMEM_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index b872de9a8271..7f43d31c9400 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2476,6 +2476,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb, } niov = skb_frag_net_iov(frag); + if (!net_is_devmem_iov(niov)) { + err = -ENODEV; + goto out; + } + end = start + skb_frag_size(frag); copy = end - offset; From patchwork Wed Dec 18 00:37:34 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912800 Received: from mail-pl1-f178.google.com (mail-pl1-f178.google.com [209.85.214.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A8ED06026A for ; Wed, 18 Dec 2024 00:38:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482287; cv=none; b=e6fXSRsZ4uCP5Ww9FJvISlGGYs6BqUk0ROEaw8cN36lf13xVTAFr5K0BQABxPFNVZ9nsWXQzZog/uTgvJRku6NqpAMiNzkP6h1ZGWmVWMAILvCke2VYFMWEdOroZ5EUt/Fb1z6nJF6rCOAeEVgITTSIRu1Yy+XhioIt6D2gSbe8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482287; c=relaxed/simple; bh=uPSiaeh6DtHd5VRMdyGgXwFwrt2HnMjLjuGJrg5Ylco=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=MfCQOEa9EYwY8zu5lrWJAq3oXY4WUR9S5U+srjZe4QppiFlHm732G5uQKHoZAkCopu+94wQMzMWWB6SEA/SRvxQRQDet+/L56EZA3lA2suj5d5r2wP7WfrgFvSV5PJDe35KMHL1dxnyeFOZURLJ9L6scu2xW+bDYM+OjM++ZvEQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=wlYzYFlw; arc=none smtp.client-ip=209.85.214.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="wlYzYFlw" Received: by mail-pl1-f178.google.com with SMTP id d9443c01a7336-216401de828so47565045ad.3 for ; Tue, 17 Dec 2024 16:38:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482285; x=1735087085; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=GQ0gxue1w/Dl+jX0cfhVJPh/MBAFHEZUjUzY9/gknQk=; b=wlYzYFlwzC7B7N1t+bkxXGKgez2Qq78Q5HHqq/NGw35f0VrYNJPz8WiSa7LrfWkbBU dhGWORqE1kUYn7wb7yc2eiJ3PVnvF3mPTqV/BEdNBEY4qrGiWGLmsgOcLgzqdT7iGQcb dvUp8+mnkuvh1xUQ8PL4zYPg2CzW4yZSZpw4pJLRVN6vijHhgc3P/haRJkoeP+hEiQzQ AAMitippFXNiX8gKaePp/qrMqZIRKc3ZCtcetSbHvIOPIsWcSg/S7MxPXiPlEIt2DLjU H5uTavLV7sap3SUugaEoKRRh3ijjt0+HvQvG5Q7lzYcIatSxYzsKDn1EqNG/KrqDsPNM cZ+A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482285; x=1735087085; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=GQ0gxue1w/Dl+jX0cfhVJPh/MBAFHEZUjUzY9/gknQk=; b=c9C/DOGi+YLW7zu5ierLp6UUJMN43G+MBEHSV+JJAC8sRNtmP1jteGT32J3+YvQ3A8 PtRANtwGIgTb8Qx2coWdPlZxrMFB7Tx58pD4HZeu2ugtzxMa7CGvXv2YgXyobOf4P6uS 6zpbUGrNISwDS2BsRqMDB2oFySDxxz5Q/TNF9J4VySfMJW2HpQhD7WxTmKmD/vNkrqZz m7FQNY8NGHTZNK4NJf6pNRS51ipl5+MoJOs+iMOY5VnhxFWmbeiQIc20EPFEq2UZ2cJR MFP5gsUrkH3tHsncMsbeHomHHvlNPCQG2ClMLUlpu8q0ZRjv3FBQFp/MjZKPMPeDRK08 jOXg== X-Gm-Message-State: AOJu0Yxr7LLi8FYN4OgVH9Fxw2UpfomuE6uSmMEXvcSbeiT5l9BLsBlo axU7DpjtzGZ5NCQQpVcWiMgqX2zbZq9PrHJMjG/n4BvEr3dj449aH5JASypKX2FuZ9LgWOx2jGO A X-Gm-Gg: ASbGnct7wzA0MS6CyXQvRYmu2QB3/iJWNp19DDrkGQuVJKeOIcUYjPE9RbS8h1ImEle JNHysAtlX+dgmc2WZg0LpRvkZw/6eL6WmH9Xj5oO4wsoBkNd9fknyST8u4HfQf+y8oBSt9wnn0r hSTgdzDWuYapjz2uutWI1u+34izsFa+o+4PlcrXcZyPhnuOgU36n153PQ86TRBhW+AOVaq7p0NU TJD0euKyVd1rGC8CAY4tfU1wRNdYBwUvFjHCv+hpw== X-Google-Smtp-Source: AGHT+IF263LqxEpg2kL0USvJQC0hWKInn1I93+k0bqBXdHYZk1LYQyelqDgccvsESwAXqztJWYW15A== X-Received: by 2002:a17:902:e5ca:b0:216:3436:b87e with SMTP id d9443c01a7336-218d72437ccmr11411065ad.44.1734482285091; Tue, 17 Dec 2024 16:38:05 -0800 (PST) Received: from localhost ([2a03:2880:ff:20::]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-218a1e5011bsm65265705ad.152.2024.12.17.16.38.04 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:04 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 08/20] net: expose page_pool_{set,clear}_pp_info Date: Tue, 17 Dec 2024 16:37:34 -0800 Message-ID: <20241218003748.796939-9-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Memory providers need to set page pool to its net_iovs on allocation, so expose page_pool_{set,clear}_pp_info to providers outside net/. Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/net/page_pool/helpers.h | 13 +++++++++++++ net/core/page_pool_priv.h | 9 --------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index e555921e5233..872947179bae 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -483,4 +483,17 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) page_pool_update_nid(pool, new_nid); } +#if defined(CONFIG_PAGE_POOL) +void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem); +void page_pool_clear_pp_info(netmem_ref netmem); +#else +static inline void page_pool_set_pp_info(struct page_pool *pool, + netmem_ref netmem) +{ +} +static inline void page_pool_clear_pp_info(netmem_ref netmem) +{ +} +#endif + #endif /* _NET_PAGE_POOL_HELPERS_H */ diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 57439787b9c2..11a45a5f3c9c 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -36,18 +36,9 @@ static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) } #if defined(CONFIG_PAGE_POOL) -void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem); -void page_pool_clear_pp_info(netmem_ref netmem); int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq); #else -static inline void page_pool_set_pp_info(struct page_pool *pool, - netmem_ref netmem) -{ -} -static inline void page_pool_clear_pp_info(netmem_ref netmem) -{ -} static inline int page_pool_check_memory_provider(struct net_device *dev, struct netdev_rx_queue *rxq) { From patchwork Wed Dec 18 00:37:35 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912801 Received: from mail-pf1-f175.google.com (mail-pf1-f175.google.com [209.85.210.175]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E2ADB1D555 for ; Wed, 18 Dec 2024 00:38:06 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.175 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482288; cv=none; b=jIEpatVpcJ1FemCXm5phLTNfG9ZMpjiHTZg2gaalT0kcFxYYe8WlmAwfFPPUEChoUR9tNpjjuPvVlDAmFWUQuKmiFf1X5GKN4YfjCn6rui6HvIthVB2T3qSzjLuSuDQSxeFGfeGiPlnEstm+yGpKG/3MGHgZZad710yXEUk7VVQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482288; c=relaxed/simple; bh=vpb1RqE2oItfF+cd5OEWt41VjC/VVahEOuu/EmRc8Gg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=hzEIZijhnZx6rfhzNq/9ERsCSrlhKw5nuzabb48q7j9rSRvzLUBWf4BF2TVsrzps3NrdSGZbXtIdYuPqv9hEldb58lUL1qstfA5uasdftIW+ouyeXnLFCxsLk6JaWBMxRyWBG+xCtjaTKSBqsBbFTUGc5uEssyr05Xmk582xzlc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=H50uD3W4; arc=none smtp.client-ip=209.85.210.175 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="H50uD3W4" Received: by mail-pf1-f175.google.com with SMTP id d2e1a72fcca58-728f337a921so6146342b3a.3 for ; Tue, 17 Dec 2024 16:38:06 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482286; x=1735087086; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=vqHdgehgC1Ic8m6ep7wqnpAK9OPeHiEms51AX93eYRM=; b=H50uD3W4yo6bR4A2vdp28ASN52jXlz2Aclyi8Y7ddhOWZirV0J80NzSQLxrHpvtXBy sXZjpvYxFIYnMCweRnfL88YSzm9XOa+L1RicTGeh89yXiNoeif3loqFMH6efYNGODc8X nuDiYCpK0P2jqA0kzuxVoE/ELx1sZCWn8V/i31/uu0z89+o/T141CW0zjmYkaspIfTjU 16QthKVwQvsRPrMWnAQwzcZJoqibLJVRD5HWyhiYdc5UxTDLHw4k8qtH3hSPraaoxtUU nc94V9x4q+bkWnsDdZMoecIGwK1hhHIwRRdZ9yxEiyrnQ38vJPvNHm7fPwjKJ+xTagh8 mQtA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482286; x=1735087086; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=vqHdgehgC1Ic8m6ep7wqnpAK9OPeHiEms51AX93eYRM=; b=rq2cDxhsaO3EaZfsMlxj1vpN/KQbuVdUoc41jCVn/1exs2ubIo9MuDRr1cR+NGDbnd m1swLaapYW57juvEmtwoO5pgqczIxxQijR3x8DHh8tn6gfSjokGhDeUHN3OuWwIRVH9E JvlRkZNz8Bqg9q3oiAXxyVhQC1wjw6Rxy8LTGrOm9UArxB+lep8TUXAuHeh+01d+cmne 3V61JgSu8+alLHNwl0G5KmO1HrS3Ni3rI0H+gDcj3NAHDnRPQQfrpKdkH6yGCfoJ9zuv m7jl7KzqhzLEhT17ED7EbhPsUxODyQKKe8jLpQzHrhxY64U20GDNAirtcM7yZ8YuVi/U VxNA== X-Gm-Message-State: AOJu0YyH4RjkpsiXePJ2ByAcLRufmgKxZMEj9tdr54GOSi0dTqvh0D70 ACGDJDPStyM/6K/XN2QRJM55B6Qc8JIqlAS3eJl6cbesLpHLcJaafmD3y671qGNPtJL5K3ZbAED 4 X-Gm-Gg: ASbGncscudRFaetzhUdE16m6wz+pZ8hG+voWlIp+sJD28U5u95ACC+6Z20sXG25jI9C stf/35beEC21Xc3rCGgKiPzyhpT3fwv3KGQJ1Lby/yjCKktB96u54GDeRJy5FuRHeE1p8f8UU+K 77KTjRKWvx8HD5nqzkalKf8y1kJw6nJzyztITIvJ6/ZX2YHC3gH24F6aWV/vxzzCY34KRdQAhAl DeGAwwcFx6ixZ7sxf4sx071PQXikYeBK8hHdmgE X-Google-Smtp-Source: AGHT+IFFQoNzZ7s9BEUd/ES5OjK9CbE7OXHMUBrVHctTHOMC4qslzXHcsbqarIdlq0Oi0TFpPqvcqg== X-Received: by 2002:a05:6a21:8dc4:b0:1db:eead:c588 with SMTP id adf61e73a8af0-1e5b48733acmr1859163637.29.1734482286294; Tue, 17 Dec 2024 16:38:06 -0800 (PST) Received: from localhost ([2a03:2880:ff:b::]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-801d5c0effasm6428842a12.60.2024.12.17.16.38.05 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:05 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 09/20] net: page_pool: introduce page_pool_mp_return_in_cache Date: Tue, 17 Dec 2024 16:37:35 -0800 Message-ID: <20241218003748.796939-10-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Add a helper that allows a page pool memory provider to efficiently return a netmem off the allocation callback. Reviewed-by: Mina Almasry Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/net/page_pool/helpers.h | 2 ++ net/core/page_pool.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 872947179bae..d968eebc4322 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -486,6 +486,8 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) #if defined(CONFIG_PAGE_POOL) void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem); void page_pool_clear_pp_info(netmem_ref netmem); + +void page_pool_mp_return_in_cache(struct page_pool *pool, netmem_ref netmem); #else static inline void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 784a547b2ca4..bd7f33d02652 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -1195,3 +1195,18 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid) } } EXPORT_SYMBOL(page_pool_update_nid); + +/* + * page_pool_mp_return_in_cache() - return a netmem to the allocation cache. + * @pool: pool from which pages were allocated + * @netmem: netmem to return + * + * Return already allocated and accounted netmem to the page pool's allocation + * cache. The function doesn't provide synchronisation and can only be called + * off the mp_ops->alloc_netmems() path. + */ +void page_pool_mp_return_in_cache(struct page_pool *pool, netmem_ref netmem) +{ + page_pool_dma_sync_for_device(pool, netmem, -1); + pool->alloc.cache[pool->alloc.count++] = netmem; +} From patchwork Wed Dec 18 00:37:36 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912802 Received: from mail-pl1-f174.google.com (mail-pl1-f174.google.com [209.85.214.174]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 41E7785C5E for ; Wed, 18 Dec 2024 00:38:08 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.174 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482290; cv=none; b=CtFK/XxzsKe1djqmoBwDqAn9T34TVnt0IPEsJaT5A3V7FPD/52o0pSgtZOMuXsxQixISXuLBnExjK6tTu9dW2s5WnK9LssLEBMYSKtcyf+fFe4vsM1Z1y8UrvBTMX8ePR6Quhl+7QCmJunMl4OGYWCqlPY53YEeyvnKrPdxP5dQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482290; c=relaxed/simple; bh=TKQgqH3umws+EvCqYunz2O6oBf0U3w1G3UiBBXL0v88=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jgohCcfDMGmHYijG9+OXVTOug9RhSV8bfk9cSVfUaivyKg+gp4aa/1oPexHLd5Wf1pBIGginIZCepDHwJNlA6i7BI4IbWSyt4KMi0GYF1sc7CYQndvFOssByWMNVHMqPZcV+MVRp0Qv8jbTrCwWxgyLbJqsqqIDbL71TChOgd/g= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=2EiZ33KC; arc=none smtp.client-ip=209.85.214.174 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="2EiZ33KC" Received: by mail-pl1-f174.google.com with SMTP id d9443c01a7336-2166022c5caso47324605ad.2 for ; Tue, 17 Dec 2024 16:38:08 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482288; x=1735087088; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=IY+b9Izm5nQ820K+FXzhLCMFxkV3K5pzUzni12NfYIo=; b=2EiZ33KCAoWcKaQdC4G9fQYRWNuFEWPLVpUG5JvszHWk6UdmfHmYcjhIedg97gEfiK nxW111DWGlkTQKj7NVQ6/v/3pDfynsH075uzFoF5ujuI5kP6ztXGki1GVky3JzjIzZ+/ KqBQGYdBfGHXkUgZIPC+hKOEh9C6wgCEEuVf2JCQbSJkTi/4xyXQII+SVpSnl3dIAc7W BTMUc1S73kscpUtvFvq3e2UtL9L3dg1cKBMWle+1PpSyDKuOr6jGRCii28jlRWCyudwS ykoVIq79A0EpRh4I8U8UvvX4cGalTiTKc2UaxXdz/cCT91VfB2jHvgpvix1adkkDtzZ9 uB5w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482288; x=1735087088; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=IY+b9Izm5nQ820K+FXzhLCMFxkV3K5pzUzni12NfYIo=; b=P2wQXjVAc+lQcTSROlhToCarQcRfBN93aNjFuiwS0UXoKLRp3PfaPz+wptZdghXHLc vlUwqDHXgPdhXl9pns5etymyb/qt2KyTp/SOpmWfSrlDQU/ZW7/et+nnUoSVThW1j/Zt W2WXBRqpW4Rx9MDgOLVE2eibiWHFXrTAaHCJaArtkNu7oYSY/zs2Pt9ZqOdTp8wOnkkW Qf5yiItuLRlp5MaKPQsPWWg3yx/ezQ0v93x92qPIMZN/R62fMnNJM37wk+IDEryYxFep xttc+XQNyWP4LoeQhcYwJIVga1VNEpdAMOeNQBs4dUsE157qAAISUwhTeVxPOpRcAiNN XJaw== X-Gm-Message-State: AOJu0YyivsiC8geTgKYUoXDJ8njctx544tJRwfWX686WW+Bswh/YZHq+ pi7iSaF3b7LSnVRN1cwEGVSbXsAzGSZVPx/P9OJheSQ5cch2Jxoahm05839NzKn4cTi1ZMs2qsR E X-Gm-Gg: ASbGncv26EPScY2BAP0N4ysGBTfnBH+rtzWGszqeZqj5DKwtEod3V2dfpZHePhRm9KH kQDbrbZuUm8tc4pXClxvMs4UaZzer3FBJYvrWv6TzH3xHE/i9Hqs4ncoiHct4J54oYfOicKuXT8 lteyv74DUG9zddSZZBkEDiD4FgZirV6V+Jyotw3gghc0SJN4w7yoIEnWzL1YDJV7lJTQdELuqIa pxjbFpkQJPMsbNHMvNzmvCplakKeV4mta9WDZzFNQ== X-Google-Smtp-Source: AGHT+IFLyAz1sPu/hY5q1UwwL4pUNZSyj+xHLABGfTP5TeGDBuiAnsUSCzMCfYxI9C6//ZHV5yBnYg== X-Received: by 2002:a17:902:d2c8:b0:215:94e0:17 with SMTP id d9443c01a7336-218d720e064mr10116055ad.23.1734482287565; Tue, 17 Dec 2024 16:38:07 -0800 (PST) Received: from localhost ([2a03:2880:ff:71::]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-218a1e5c57bsm65129335ad.185.2024.12.17.16.38.06 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:07 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 10/20] io_uring/zcrx: add interface queue and refill queue Date: Tue, 17 Dec 2024 16:37:36 -0800 Message-ID: <20241218003748.796939-11-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Add a new object called an interface queue (ifq) that represents a net rx queue that has been configured for zero copy. Each ifq is registered using a new registration opcode IORING_REGISTER_ZCRX_IFQ. The refill queue is allocated by the kernel and mapped by userspace using a new offset IORING_OFF_RQ_RING, in a similar fashion to the main SQ/CQ. It is used by userspace to return buffers that it is done with, which will then be re-used by the netdev again. The main CQ ring is used to notify userspace of received data by using the upper 16 bytes of a big CQE as a new struct io_uring_zcrx_cqe. Each entry contains the offset + len to the data. For now, each io_uring instance only has a single ifq. Signed-off-by: David Wei --- Kconfig | 2 + include/linux/io_uring_types.h | 6 ++ include/uapi/linux/io_uring.h | 43 +++++++++- io_uring/KConfig | 10 +++ io_uring/Makefile | 1 + io_uring/io_uring.c | 7 ++ io_uring/memmap.h | 1 + io_uring/register.c | 7 ++ io_uring/zcrx.c | 149 +++++++++++++++++++++++++++++++++ io_uring/zcrx.h | 35 ++++++++ 10 files changed, 260 insertions(+), 1 deletion(-) create mode 100644 io_uring/KConfig create mode 100644 io_uring/zcrx.c create mode 100644 io_uring/zcrx.h diff --git a/Kconfig b/Kconfig index 745bc773f567..529ea7694ba9 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "lib/Kconfig" source "lib/Kconfig.debug" source "Documentation/Kconfig" + +source "io_uring/KConfig" diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 493a8f7fa8e4..f0c6e18d176a 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -40,6 +40,8 @@ enum io_uring_cmd_flags { IO_URING_F_TASK_DEAD = (1 << 13), }; +struct io_zcrx_ifq; + struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -384,6 +386,8 @@ struct io_ring_ctx { struct wait_queue_head poll_wq; struct io_restriction restrictions; + struct io_zcrx_ifq *ifq; + u32 pers_next; struct xarray personalities; @@ -436,6 +440,8 @@ struct io_ring_ctx { struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; + /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */ + struct io_mapped_region zcrx_region; }; struct io_tw_state { diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 38f0d6b10eaf..3af8b7a19824 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -638,7 +638,8 @@ enum io_uring_register_op { /* send MSG_RING without having a ring */ IORING_REGISTER_SEND_MSG_RING = 31, - /* 32 reserved for zc rx */ + /* register a netdev hw rx queue for zerocopy */ + IORING_REGISTER_ZCRX_IFQ = 32, /* resize CQ ring */ IORING_REGISTER_RESIZE_RINGS = 33, @@ -955,6 +956,46 @@ enum io_uring_socket_op { SOCKET_URING_OP_SETSOCKOPT, }; +/* Zero copy receive refill queue entry */ +struct io_uring_zcrx_rqe { + __u64 off; + __u32 len; + __u32 __pad; +}; + +struct io_uring_zcrx_cqe { + __u64 off; + __u64 __pad; +}; + +/* The bit from which area id is encoded into offsets */ +#define IORING_ZCRX_AREA_SHIFT 48 +#define IORING_ZCRX_AREA_MASK (~(((__u64)1 << IORING_ZCRX_AREA_SHIFT) - 1)) + +struct io_uring_zcrx_offsets { + __u32 head; + __u32 tail; + __u32 rqes; + __u32 __resv2; + __u64 __resv[2]; +}; + +/* + * Argument for IORING_REGISTER_ZCRX_IFQ + */ +struct io_uring_zcrx_ifq_reg { + __u32 if_idx; + __u32 if_rxq; + __u32 rq_entries; + __u32 flags; + + __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */ + __u64 region_ptr; /* struct io_uring_region_desc * */ + + struct io_uring_zcrx_offsets offsets; + __u64 __resv[4]; +}; + #ifdef __cplusplus } #endif diff --git a/io_uring/KConfig b/io_uring/KConfig new file mode 100644 index 000000000000..9e2a4beba1ef --- /dev/null +++ b/io_uring/KConfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# io_uring configuration +# + +config IO_URING_ZCRX + def_bool y + depends on PAGE_POOL + depends on INET + depends on NET_RX_BUSY_POLL diff --git a/io_uring/Makefile b/io_uring/Makefile index 53167bef37d7..a95b0b8229c9 100644 --- a/io_uring/Makefile +++ b/io_uring/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_IO_URING) += io_uring.o opdef.o kbuf.o rsrc.o notif.o \ epoll.o statx.o timeout.o fdinfo.o \ cancel.o waitid.o register.o \ truncate.o memmap.o +obj-$(CONFIG_IO_URING_ZCRX) += zcrx.o obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_FUTEX) += futex.o obj-$(CONFIG_NET_RX_BUSY_POLL) += napi.o diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 5535a72b0ce1..0c02a2e97f01 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -97,6 +97,7 @@ #include "uring_cmd.h" #include "msg_ring.h" #include "memmap.h" +#include "zcrx.h" #include "timeout.h" #include "poll.h" @@ -2686,6 +2687,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) mutex_lock(&ctx->uring_lock); io_sqe_buffers_unregister(ctx); io_sqe_files_unregister(ctx); + io_unregister_zcrx_ifqs(ctx); io_cqring_overflow_kill(ctx); io_eventfd_unregister(ctx); io_alloc_cache_free(&ctx->apoll_cache, kfree); @@ -2851,6 +2853,11 @@ static __cold void io_ring_exit_work(struct work_struct *work) io_cqring_overflow_kill(ctx); mutex_unlock(&ctx->uring_lock); } + if (ctx->ifq) { + mutex_lock(&ctx->uring_lock); + io_shutdown_zcrx_ifqs(ctx); + mutex_unlock(&ctx->uring_lock); + } if (ctx->flags & IORING_SETUP_DEFER_TASKRUN) io_move_task_work_from_local(ctx); diff --git a/io_uring/memmap.h b/io_uring/memmap.h index c898dcba2b4e..dad0aa5b1b45 100644 --- a/io_uring/memmap.h +++ b/io_uring/memmap.h @@ -2,6 +2,7 @@ #define IO_URING_MEMMAP_H #define IORING_MAP_OFF_PARAM_REGION 0x20000000ULL +#define IORING_MAP_OFF_ZCRX_REGION 0x30000000ULL struct page **io_pin_pages(unsigned long ubuf, unsigned long len, int *npages); diff --git a/io_uring/register.c b/io_uring/register.c index f1698c18c7cb..f9dfdca79a80 100644 --- a/io_uring/register.c +++ b/io_uring/register.c @@ -30,6 +30,7 @@ #include "eventfd.h" #include "msg_ring.h" #include "memmap.h" +#include "zcrx.h" #define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ IORING_REGISTER_LAST + IORING_OP_LAST) @@ -798,6 +799,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_register_clone_buffers(ctx, arg); break; + case IORING_REGISTER_ZCRX_IFQ: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_register_zcrx_ifq(ctx, arg); + break; case IORING_REGISTER_RESIZE_RINGS: ret = -EINVAL; if (!arg || nr_args != 1) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c new file mode 100644 index 000000000000..f3ace7e8264d --- /dev/null +++ b/io_uring/zcrx.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include + +#include "io_uring.h" +#include "kbuf.h" +#include "memmap.h" +#include "zcrx.h" + +#define IO_RQ_MAX_ENTRIES 32768 + +static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, + struct io_uring_zcrx_ifq_reg *reg, + struct io_uring_region_desc *rd) +{ + size_t off, size; + void *ptr; + int ret; + + off = sizeof(struct io_uring); + size = off + sizeof(struct io_uring_zcrx_rqe) * reg->rq_entries; + if (size > rd->size) + return -EINVAL; + + ret = io_create_region_mmap_safe(ifq->ctx, &ifq->ctx->zcrx_region, rd, + IORING_MAP_OFF_ZCRX_REGION); + if (ret < 0) + return ret; + + ptr = io_region_get_ptr(&ifq->ctx->zcrx_region); + ifq->rq_ring = (struct io_uring *)ptr; + ifq->rqes = (struct io_uring_zcrx_rqe *)(ptr + off); + return 0; +} + +static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) +{ + io_free_region(ifq->ctx, &ifq->ctx->zcrx_region); + ifq->rq_ring = NULL; + ifq->rqes = NULL; +} + +static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) +{ + struct io_zcrx_ifq *ifq; + + ifq = kzalloc(sizeof(*ifq), GFP_KERNEL); + if (!ifq) + return NULL; + + ifq->if_rxq = -1; + ifq->ctx = ctx; + return ifq; +} + +static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) +{ + io_free_rbuf_ring(ifq); + kfree(ifq); +} + +int io_register_zcrx_ifq(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg) +{ + struct io_uring_zcrx_ifq_reg reg; + struct io_uring_region_desc rd; + struct io_zcrx_ifq *ifq; + int ret; + + /* + * 1. Interface queue allocation. + * 2. It can observe data destined for sockets of other tasks. + */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* mandatory io_uring features for zc rx */ + if (!(ctx->flags & IORING_SETUP_DEFER_TASKRUN && + ctx->flags & IORING_SETUP_CQE32)) + return -EINVAL; + if (ctx->ifq) + return -EBUSY; + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + if (copy_from_user(&rd, u64_to_user_ptr(reg.region_ptr), sizeof(rd))) + return -EFAULT; + if (memchr_inv(®.__resv, 0, sizeof(reg.__resv))) + return -EINVAL; + if (reg.if_rxq == -1 || !reg.rq_entries || reg.flags) + return -EINVAL; + if (reg.rq_entries > IO_RQ_MAX_ENTRIES) { + if (!(ctx->flags & IORING_SETUP_CLAMP)) + return -EINVAL; + reg.rq_entries = IO_RQ_MAX_ENTRIES; + } + reg.rq_entries = roundup_pow_of_two(reg.rq_entries); + + if (!reg.area_ptr) + return -EFAULT; + + ifq = io_zcrx_ifq_alloc(ctx); + if (!ifq) + return -ENOMEM; + + ret = io_allocate_rbuf_ring(ifq, ®, &rd); + if (ret) + goto err; + + ifq->rq_entries = reg.rq_entries; + ifq->if_rxq = reg.if_rxq; + + reg.offsets.rqes = sizeof(struct io_uring); + reg.offsets.head = offsetof(struct io_uring, head); + reg.offsets.tail = offsetof(struct io_uring, tail); + + if (copy_to_user(arg, ®, sizeof(reg)) || + copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) { + ret = -EFAULT; + goto err; + } + + ctx->ifq = ifq; + return 0; +err: + io_zcrx_ifq_free(ifq); + return ret; +} + +void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) +{ + struct io_zcrx_ifq *ifq = ctx->ifq; + + lockdep_assert_held(&ctx->uring_lock); + + if (!ifq) + return; + + ctx->ifq = NULL; + io_zcrx_ifq_free(ifq); +} + +void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) +{ + lockdep_assert_held(&ctx->uring_lock); +} diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h new file mode 100644 index 000000000000..58e4ab6c6083 --- /dev/null +++ b/io_uring/zcrx.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef IOU_ZC_RX_H +#define IOU_ZC_RX_H + +#include + +struct io_zcrx_ifq { + struct io_ring_ctx *ctx; + struct io_uring *rq_ring; + struct io_uring_zcrx_rqe *rqes; + u32 rq_entries; + + u32 if_rxq; +}; + +#if defined(CONFIG_IO_URING_ZCRX) +int io_register_zcrx_ifq(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg); +void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); +void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx); +#else +static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx, + struct io_uring_zcrx_ifq_reg __user *arg) +{ + return -EOPNOTSUPP; +} +static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) +{ +} +static inline void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) +{ +} +#endif + +#endif From patchwork Wed Dec 18 00:37:37 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912803 Received: from mail-pf1-f180.google.com (mail-pf1-f180.google.com [209.85.210.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8D607136671 for ; Wed, 18 Dec 2024 00:38:09 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482291; cv=none; b=lXHMLYxOTf1WHIWLQbtFQvQywDD/Nmwf/qgRqSJrxCat7zJtKo1CFsVlbdqBZKDRprTXE9kGIcD5e6ZxMYI5UIRLCTW2WMfnppNKNNUHz7RysmtSidTq/jk4+OJj9+ed/7YVOa+31KP2F+WyE/kkNmyEnNUNZVN3g5al/1NI2EM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482291; c=relaxed/simple; bh=XVYbxLhQjPLjRXvHRi0K/kcnMc22GtAVQg7MWFvQ9yk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=YU4qtqvNwofQAEKyCcUqQcaC6HTX8mOmYXu6zLzDnqS69Mgx/8m6Vj3zYetFvsLMA6RKsfdGWUwSbHpDQjneqAZUI4e/wY8F+ueoelK+86iU4K3KSihwOtTtxqbnIu7KRXUgIg0Okri7Ep1OTyJ9CTrZEdy4eOKFxP9TPnU7ouY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=wI2Bf7vk; arc=none smtp.client-ip=209.85.210.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="wI2Bf7vk" Received: by mail-pf1-f180.google.com with SMTP id d2e1a72fcca58-7273967f2f0so6378560b3a.1 for ; Tue, 17 Dec 2024 16:38:09 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482289; x=1735087089; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=HANf/wnrk/pvHPDftSyBB7C0ozx5QMtCSP2F71f05Iw=; b=wI2Bf7vkEgNSMpNGiJieO4cjUkpsU46eQtAJzuECjeH+wTEPm27o408YgF4eQp7uzN Xuwkqo10mrX4tfVjwON/hJNasQsaqizdMGFARCQXiq7A6RRdt8pDh3kHDgRzIHYBI9wO SYzVKbTHPrWYzHQjtFFcl5mVM+A+Nwy23QdOC9PaoJKR4VTj0BRZYK1fqjvvsEOqImID YJ9gEB/NfFJAjE2PPFX58E8I6GQNp4u98vOdM/vdU6RClRZ9Mduato0PySkkTBKln8YN 0kzlqbDanEMzrdV9WNSUl1qui29PaYDbMHSg26idfWPg+JCbdbZF2vK30SMqlpOIadkJ NszA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482289; x=1735087089; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=HANf/wnrk/pvHPDftSyBB7C0ozx5QMtCSP2F71f05Iw=; b=ZQ1fotjFE++FlTv+LKUyVTIDg5soVoCheh1IJ2u4cuRF8pRPiHWJDnkF6hA+/fT/8U 0jKZ9XpYbS4uAqWhrz/rsgzt2S5BdOhLALbepTNtOVmADZWN8EYKS0wwauW5jbKmwwOw Hkc3JBRhFNvMU0fTg6V1ejKpqE4Qcg+wyDi3DyqAvuIc3+fVgxOBSpr2+/Em4UwgtJ4e nxm5DM8o19lvP9HXltrEhIDaXDptKE/ewhlq4ScaqMtRLFffxCpHkwKiET50ENcSXc21 iA7GkM/uLHRl7QtsRE3BdyoCKtnoqlKTfFioilCua/JLhbP1q6WGb5gXid2sG8uHG5xF FwNw== X-Gm-Message-State: AOJu0YwRrhADBWDTQ32GQJB/BPcpXjpxh1o3yJDu0UDZ90FJJ3C21NWX APlFT+KrCHP3ZQAbmswCalE6duuLEQWKVWhOHJlx5XlU9B6QPTu5guWFaaujWmEnlFTz6u13AEn u X-Gm-Gg: ASbGnctoHyikYvtLoODdfOgiy6OZogqFIaQNMct/gPxs6Bzgo7bVU633ET7vS1WloCz kk+zrF4bltWoOXaSasbUo8kLV5wDUsTEENQ40DBq9KqoXWo9K+zh3r3jDDtlFwFBNqPNkbtxK4V pBEZZYe42fWbbp1Hn2ubyqjc9nRm+dTWyKd57aYp42asuQSMm/2lGJ9Bxej+yy92pp6D3HX82ug vyVJL2ijDYQ2R8zxyR2KuLxbEpCPwjRjWfzy7DmFQ== X-Google-Smtp-Source: AGHT+IEJ4oMREpx4hC8kdpG2v5TKjl5T0w2x7ZOX3BVlIgvY0lWq99NTd4GiF/rpFhPK3mVC2Bitmg== X-Received: by 2002:a05:6a00:3a28:b0:725:e05a:c975 with SMTP id d2e1a72fcca58-72a8d2c9a60mr1680795b3a.19.1734482288800; Tue, 17 Dec 2024 16:38:08 -0800 (PST) Received: from localhost ([2a03:2880:ff:1d::]) by smtp.gmail.com with ESMTPSA id d2e1a72fcca58-72918bb4635sm7321271b3a.169.2024.12.17.16.38.08 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:08 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 11/20] io_uring/zcrx: add io_zcrx_area Date: Tue, 17 Dec 2024 16:37:37 -0800 Message-ID: <20241218003748.796939-12-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Add io_zcrx_area that represents a region of userspace memory that is used for zero copy. During ifq registration, userspace passes in the uaddr and len of userspace memory, which is then pinned by the kernel. Each net_iov is mapped to one of these pages. The freelist is a spinlock protected list that keeps track of all the net_iovs/pages that aren't used. For now, there is only one area per ifq and area registration happens implicitly as part of ifq registration. There is no API for adding/removing areas yet. The struct for area registration is there for future extensibility once we support multiple areas and TCP devmem. Reviewed-by: Jens Axboe Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/uapi/linux/io_uring.h | 9 ++++ io_uring/rsrc.c | 2 +- io_uring/rsrc.h | 1 + io_uring/zcrx.c | 89 ++++++++++++++++++++++++++++++++++- io_uring/zcrx.h | 16 +++++++ 5 files changed, 114 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 3af8b7a19824..e251f28507ce 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -980,6 +980,15 @@ struct io_uring_zcrx_offsets { __u64 __resv[2]; }; +struct io_uring_zcrx_area_reg { + __u64 addr; + __u64 len; + __u64 rq_area_token; + __u32 flags; + __u32 __resv1; + __u64 __resv2[2]; +}; + /* * Argument for IORING_REGISTER_ZCRX_IFQ */ diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index f2ff108485c8..d0f11b5aec0d 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -77,7 +77,7 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) return 0; } -static int io_buffer_validate(struct iovec *iov) +int io_buffer_validate(struct iovec *iov) { unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); diff --git a/io_uring/rsrc.h b/io_uring/rsrc.h index c8b093584461..0ae54ddeb1fd 100644 --- a/io_uring/rsrc.h +++ b/io_uring/rsrc.h @@ -66,6 +66,7 @@ int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, unsigned size, unsigned type); int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, unsigned int size, unsigned int type); +int io_buffer_validate(struct iovec *iov); bool io_check_coalesce_buffer(struct page **page_array, int nr_pages, struct io_imu_folio_data *data); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index f3ace7e8264d..04883a3ae80c 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -10,6 +10,7 @@ #include "kbuf.h" #include "memmap.h" #include "zcrx.h" +#include "rsrc.h" #define IO_RQ_MAX_ENTRIES 32768 @@ -44,6 +45,79 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) ifq->rqes = NULL; } +static void io_zcrx_free_area(struct io_zcrx_area *area) +{ + kvfree(area->freelist); + kvfree(area->nia.niovs); + if (area->pages) { + unpin_user_pages(area->pages, area->nia.num_niovs); + kvfree(area->pages); + } + kfree(area); +} + +static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, + struct io_zcrx_area **res, + struct io_uring_zcrx_area_reg *area_reg) +{ + struct io_zcrx_area *area; + int i, ret, nr_pages; + struct iovec iov; + + if (area_reg->flags || area_reg->rq_area_token) + return -EINVAL; + if (area_reg->__resv1 || area_reg->__resv2[0] || area_reg->__resv2[1]) + return -EINVAL; + if (area_reg->addr & ~PAGE_MASK || area_reg->len & ~PAGE_MASK) + return -EINVAL; + + iov.iov_base = u64_to_user_ptr(area_reg->addr); + iov.iov_len = area_reg->len; + ret = io_buffer_validate(&iov); + if (ret) + return ret; + + ret = -ENOMEM; + area = kzalloc(sizeof(*area), GFP_KERNEL); + if (!area) + goto err; + + area->pages = io_pin_pages((unsigned long)area_reg->addr, area_reg->len, + &nr_pages); + if (IS_ERR(area->pages)) { + ret = PTR_ERR(area->pages); + area->pages = NULL; + goto err; + } + area->nia.num_niovs = nr_pages; + + area->nia.niovs = kvmalloc_array(nr_pages, sizeof(area->nia.niovs[0]), + GFP_KERNEL | __GFP_ZERO); + if (!area->nia.niovs) + goto err; + + area->freelist = kvmalloc_array(nr_pages, sizeof(area->freelist[0]), + GFP_KERNEL | __GFP_ZERO); + if (!area->freelist) + goto err; + + for (i = 0; i < nr_pages; i++) + area->freelist[i] = i; + + area->free_count = nr_pages; + area->ifq = ifq; + /* we're only supporting one area per ifq for now */ + area->area_id = 0; + area_reg->rq_area_token = (u64)area->area_id << IORING_ZCRX_AREA_SHIFT; + spin_lock_init(&area->freelist_lock); + *res = area; + return 0; +err: + if (area) + io_zcrx_free_area(area); + return ret; +} + static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) { struct io_zcrx_ifq *ifq; @@ -59,6 +133,9 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) { + if (ifq->area) + io_zcrx_free_area(ifq->area); + io_free_rbuf_ring(ifq); kfree(ifq); } @@ -66,6 +143,7 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg) { + struct io_uring_zcrx_area_reg area; struct io_uring_zcrx_ifq_reg reg; struct io_uring_region_desc rd; struct io_zcrx_ifq *ifq; @@ -99,7 +177,7 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, } reg.rq_entries = roundup_pow_of_two(reg.rq_entries); - if (!reg.area_ptr) + if (copy_from_user(&area, u64_to_user_ptr(reg.area_ptr), sizeof(area))) return -EFAULT; ifq = io_zcrx_ifq_alloc(ctx); @@ -110,6 +188,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto err; + ret = io_zcrx_create_area(ifq, &ifq->area, &area); + if (ret) + goto err; + ifq->rq_entries = reg.rq_entries; ifq->if_rxq = reg.if_rxq; @@ -122,7 +204,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, ret = -EFAULT; goto err; } - + if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { + ret = -EFAULT; + goto err; + } ctx->ifq = ifq; return 0; err: diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 58e4ab6c6083..53fd94b65b38 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -3,9 +3,25 @@ #define IOU_ZC_RX_H #include +#include + +struct io_zcrx_area { + struct net_iov_area nia; + struct io_zcrx_ifq *ifq; + + u16 area_id; + struct page **pages; + + /* freelist */ + spinlock_t freelist_lock ____cacheline_aligned_in_smp; + u32 free_count; + u32 *freelist; +}; struct io_zcrx_ifq { struct io_ring_ctx *ctx; + struct io_zcrx_area *area; + struct io_uring *rq_ring; struct io_uring_zcrx_rqe *rqes; u32 rq_entries; From patchwork Wed Dec 18 00:37:38 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912804 Received: from mail-pf1-f180.google.com (mail-pf1-f180.google.com [209.85.210.180]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AC9E5137930 for ; Wed, 18 Dec 2024 00:38:10 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.180 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482292; cv=none; b=bcp6Nbf0tqJGRYazjwP1L9e2kYJTEP09+rLEWzVBEeTMP/o4K0xsUi68vPefmfthHpqkvrq+l+p5aImMGy2GEL4FGkCkLn5rpt7C+TRzUNWYetU5Rb05xXqiEz+E+ouOVxFCCkOAmxTzoi56ScEABD32qKSSwZbqsbMfbEqTupY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482292; c=relaxed/simple; bh=NRFL2+PeYQBpke3MuIFNDap9yjTLOeiLgLS3XvL+I/Q=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=jPe3nNCZydHNkvUdwhDkUEmcw5N2f4TUKCQFHjrCNFj8MCQ3sXI6zYxx5kRSTmQ8RR9naE1eyRyHw/VvUtBrWm6slgd0ykdJezsFdvCFDMHmZF8RU/BGDw33unfmG1ASZCvyYfXXrEXtW/RPy1fuVwalBgrSfcGPzZwSwKxaOb8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=gYj/39d2; arc=none smtp.client-ip=209.85.210.180 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="gYj/39d2" Received: by mail-pf1-f180.google.com with SMTP id d2e1a72fcca58-728e729562fso5116295b3a.0 for ; Tue, 17 Dec 2024 16:38:10 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482290; x=1735087090; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=N2rr3bFZ4xcAeCxIuffGlTrB+I+DvsKrMkXA8KeXORw=; b=gYj/39d23fYDT5eAQfjh6vLk3Vf1QtN40DtLx7/rDFVaFJh1GRMsnq5fgcZwHKbndm 52JvI2b0E3ZJE9iAoADjoR/24WGUJjXywJCW2yT5QcdzZrfTHaxvIC5ghTKkMbiHRben 1BztSd3Ui9nF5Z9wFi9i7Le2wbv0q+nbzm2b0MA2pwdk+Jj2/g+SwvLUter9JCSWPtlG /5li0izdRnn9YJotiaMvnX8solZ9SJ7Adu0Al54IkD6dMcxaYFX1aIga9DX0Yx5wb8EJ CYLwGqGvdv0cEVaGAJJTSEzvgcFMpNk9ysXpIxcM/YrX7X2Auq1nS4G+wKwjJgDwvUEs mDkA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482290; x=1735087090; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=N2rr3bFZ4xcAeCxIuffGlTrB+I+DvsKrMkXA8KeXORw=; b=Hll+IOxYyXP+yHKlw4rcHmhPDDZXsAVuliJyrHu2BHd2imzDERk9bQcPXxjiTIeImL 4Oz3vRTqI75CFrZp1V3IvNkQeof0cOHlxI77Ute9yoNIpJtm9yWCkm9a74sl4cGwk2pw GvGCLzammqQDBfNkNyYEcKY5U6z75jqnyETtgh4Xy4biS9/YKEan2cjWfCGr7n0sk6nZ NIuwVIrm5wEeivFMNkMnvNSZspGmPTguphAk11BI3y6aSwxQ7PAIgugkZ2+L13klUtkT luYxmYlRCN9hAQGFmbR4e9EYarDerTOdmSwss8hggwkqbnTQ+QspckKeOxWDIa2/rC7n 6LaQ== X-Gm-Message-State: AOJu0YzSqvwywmwwA2vxw/PsdgPJbA8LWQh0UhEF0YgcSLTTQpgvo48r gwwhW7i0JbQIslk20KXMh+5uVFeGEHQaEu4A21rqEIZcFOvObiwCGTd9x2shg2iUrBjEWtaVGMd o X-Gm-Gg: ASbGncsMC0DmuHRLwP/qZBZwidNYhUhbBIonLO4EjpNfJmSPQpZq8gvSivl335SsvyY MwK4yeoHOspHqxFWxa3mThpGN/rYLpclirC8KVIEH0RsvmRHVfMUmbfRZbAhiZVlpX9We4GHJFi pVgM9IaGy+ctS1gQeTkv+JkJbcDHgdrzRcWA2BTe4KKGMvr5X6KfPyYf3UHujHzNze3YoYeAPgb xAaRjjuZ1uNkAkvRyUSGT/tqpZ1AmlOtdYK3v+pPQ== X-Google-Smtp-Source: AGHT+IE5LWeBYUo39S/rAUJIC66HtimKMqRa3eFWsBxg5kxpIdN2gzU1mmJgkXRFCD1/tZShgt7iow== X-Received: by 2002:a05:6a00:1948:b0:728:9d19:d2ea with SMTP id d2e1a72fcca58-72a8d2611e6mr1662236b3a.13.1734482290015; Tue, 17 Dec 2024 16:38:10 -0800 (PST) Received: from localhost ([2a03:2880:ff:1b::]) by smtp.gmail.com with ESMTPSA id d2e1a72fcca58-72918af0f01sm7332001b3a.87.2024.12.17.16.38.09 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:09 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 12/20] io_uring/zcrx: grab a net device Date: Tue, 17 Dec 2024 16:37:38 -0800 Message-ID: <20241218003748.796939-13-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Zerocopy receive needs a net device to bind to its rx queue and dma map buffers. As a preparation to following patches, resolve a net device from the if_idx parameter with no functional changes otherwise. Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- io_uring/zcrx.c | 10 ++++++++++ io_uring/zcrx.h | 3 +++ 2 files changed, 13 insertions(+) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 04883a3ae80c..e6cca6747148 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include @@ -136,6 +138,8 @@ static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) if (ifq->area) io_zcrx_free_area(ifq->area); + if (ifq->dev) + netdev_put(ifq->dev, &ifq->netdev_tracker); io_free_rbuf_ring(ifq); kfree(ifq); } @@ -195,6 +199,12 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, ifq->rq_entries = reg.rq_entries; ifq->if_rxq = reg.if_rxq; + ret = -ENODEV; + ifq->dev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx, + &ifq->netdev_tracker, GFP_KERNEL); + if (!ifq->dev) + goto err; + reg.offsets.rqes = sizeof(struct io_uring); reg.offsets.head = offsetof(struct io_uring, head); reg.offsets.tail = offsetof(struct io_uring, tail); diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 53fd94b65b38..46988a1dbd54 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -4,6 +4,7 @@ #include #include +#include struct io_zcrx_area { struct net_iov_area nia; @@ -27,6 +28,8 @@ struct io_zcrx_ifq { u32 rq_entries; u32 if_rxq; + struct net_device *dev; + netdevice_tracker netdev_tracker; }; #if defined(CONFIG_IO_URING_ZCRX) From patchwork Wed Dec 18 00:37:39 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912805 Received: from mail-pf1-f178.google.com (mail-pf1-f178.google.com [209.85.210.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E238B13BAEE for ; Wed, 18 Dec 2024 00:38:11 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482293; cv=none; b=BdFnN4I6nUBKOLdmNcsphOzg0ToGuPe2GFE5qebwqBjftOT1XEqa/EX6+jwm3JMxs6k3vsg+rgetVTOGC9dBjMpJrecxo2dUFKyihaNGsvoi6XV8ffTOkOUMfySAK0I43f+NElUYxNcx9Q0NRmHkH43VlgR2Li5itCcZd++TlJk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482293; c=relaxed/simple; bh=3FYWOzv7GI9lx7mwhuutMTOmOihGpo6sHz92R4r6kzA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=kxcfXF03ivpdbliLRR/1rUg/2GlOGT98uCEhrD9p5ANyzvpCppMdND67EgSjGWbeFporE3PSeAn5n2WQ5mUX2dr9yR6kl41PHGbkbgpZC+ym6QTEabGG8Rk0d9jgAyxGYjsnwiJZ5+VaauAicvCq5goKei3pwM/gafVD7mtlHro= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=SMaVC01T; arc=none smtp.client-ip=209.85.210.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="SMaVC01T" Received: by mail-pf1-f178.google.com with SMTP id d2e1a72fcca58-728e3826211so4743354b3a.0 for ; Tue, 17 Dec 2024 16:38:11 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482291; x=1735087091; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=VHQGzO085Dbg8z+SUztmKbhehQYfjKmzsCXFZJE+G38=; b=SMaVC01TISE3JoQWpkZqkJYEoFnnpLfZjmHua9Vb+iNL3VZwPIB0FiyLWE+o7kL8Sz 2O1ZNkoqDBWxxFJVmdDOHrtSS5GDMwLr5ABlNo/xQJbaQkokOJQ2O1QRumH4DHIQuoLy dVs7s6nDl/l7hbYbqt5tYMuSTk+4WhJexLymhWD2lb95ChbpB57h5I++/Dz3KLm1maIW ZRvPjWkMch/XGoUXM3E5nCIHwJLJR8xniCFvxLcqRUHZMKU8KO5fwVuQasXj77C0stKT iopwfmt9pl4TMJKviUbLSIgHY+bRqCKjM9rMXI7xN143IVW90U85YKyzX52S0YwTvsTw NIjg== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482291; x=1735087091; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=VHQGzO085Dbg8z+SUztmKbhehQYfjKmzsCXFZJE+G38=; b=AKnxJO6Rd1JKhk8gQtA3yYFM9E5dY+JUQwheHXumjVJ3Og5yckIo1rd9DrBnuEamot DKT1iHOXC0OlnKEwGReCQ374VfEcYDos9uboPLS5kNxmnrEK8+JclXAbS3YGDyX/rhrY nRIQERIaATKixHnRj9W42k3u3L9MmbvdE9t2SKCD1cUrGr1d9Z1akW1qz5LzL/Yh6Wsr WP4bBhnxSR2LbI+D0dvuyHjqt6riwlJGSp8nq7w9fsXeVTa+iue8KB8Ygv8BGCVgBXM4 TIWCbyvKX9RbBmrlg4AEV5ZveUOnNXBFF132OiLk4Mo2T4mOb9QsW1d9NZPemX1S87to Xvpw== X-Gm-Message-State: AOJu0Yyu5Kvgf5B8PnqyE9LaYu4WmM2SfJK0Fjjz1YxjZ3keSGOMRbSS OXcE75eVamI3lRmE/OwrhlvUxPaIYzBr/xKf7HmSHQCJzQXarXjoVQgFYbsRGr6GDdKiTqCQtQR 5 X-Gm-Gg: ASbGnct5gdLb5dC6/7OvXVmDZV9EvlSNa9s593stZx+HJilRgffecx5NofZGR67Avad N9YAMdW+Hn7i+EOknESYIcyOfDvFvoxlYhPqLRxtvfKfUQM9kNzsb3iKZW1JWVoC3deJDUEBeJd C7qQ68oe4oeHHQXImo8mmTHa4zjCPbDDCM4Qnqv8hgkBM23yM9j+WNuidU9LFRfxJ05r34MX2qe 853k/3TT9tT2NIW8c7jvZE7kLIsiW6yPLNLQgOxcw== X-Google-Smtp-Source: AGHT+IHyJo80ER+2cYHjOYQYCiTLHf0WPuVqPkbdGqdarTU8UyCaxQIIjztwSgcQbCsUmwCDhcsOoA== X-Received: by 2002:a17:90a:d00b:b0:2ee:ba84:5cac with SMTP id 98e67ed59e1d1-2f2e91a9adbmr1275798a91.7.1734482291218; Tue, 17 Dec 2024 16:38:11 -0800 (PST) Received: from localhost ([2a03:2880:ff:74::]) by smtp.gmail.com with ESMTPSA id 98e67ed59e1d1-2f2ed62cdabsm131945a91.14.2024.12.17.16.38.10 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:10 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 13/20] net: page pool: export page_pool_set_dma_addr_netmem() Date: Tue, 17 Dec 2024 16:37:39 -0800 Message-ID: <20241218003748.796939-14-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Export page_pool_set_dma_addr_netmem() in page_pool/helpers.h. This is needed by memory provider implementations that are outside of net/ to be able to set the dma addrs on net_iovs during alloc/free. Signed-off-by: David Wei --- include/net/page_pool/helpers.h | 5 +++++ net/core/page_pool.c | 16 ++++++++++++++++ net/core/page_pool_priv.h | 17 ----------------- 3 files changed, 21 insertions(+), 17 deletions(-) diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index d968eebc4322..00eea5dd6f88 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -486,6 +486,7 @@ static inline void page_pool_nid_changed(struct page_pool *pool, int new_nid) #if defined(CONFIG_PAGE_POOL) void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem); void page_pool_clear_pp_info(netmem_ref netmem); +bool page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr); void page_pool_mp_return_in_cache(struct page_pool *pool, netmem_ref netmem); #else @@ -493,6 +494,10 @@ static inline void page_pool_set_pp_info(struct page_pool *pool, netmem_ref netmem) { } +static inline bool page_pool_set_dma_addr_netmem(netmem_ref netmem, + dma_addr_t addr) +{ +} static inline void page_pool_clear_pp_info(netmem_ref netmem) { } diff --git a/net/core/page_pool.c b/net/core/page_pool.c index bd7f33d02652..3d1ed8b8f79e 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -655,6 +655,22 @@ void page_pool_clear_pp_info(netmem_ref netmem) netmem_set_pp(netmem, NULL); } +bool page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr) +{ + if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { + netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT); + + /* We assume page alignment to shave off bottom bits, + * if this "compression" doesn't work we need to drop. + */ + return addr != (dma_addr_t)netmem_get_dma_addr(netmem) + << PAGE_SHIFT; + } + + netmem_set_dma_addr(netmem, addr); + return false; +} + static __always_inline void __page_pool_release_page_dma(struct page_pool *pool, netmem_ref netmem) { diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 11a45a5f3c9c..cac300c83e29 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -13,23 +13,6 @@ int page_pool_list(struct page_pool *pool); void page_pool_detached(struct page_pool *pool); void page_pool_unlist(struct page_pool *pool); -static inline bool -page_pool_set_dma_addr_netmem(netmem_ref netmem, dma_addr_t addr) -{ - if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA) { - netmem_set_dma_addr(netmem, addr >> PAGE_SHIFT); - - /* We assume page alignment to shave off bottom bits, - * if this "compression" doesn't work we need to drop. - */ - return addr != (dma_addr_t)netmem_get_dma_addr(netmem) - << PAGE_SHIFT; - } - - netmem_set_dma_addr(netmem, addr); - return false; -} - static inline bool page_pool_set_dma_addr(struct page *page, dma_addr_t addr) { return page_pool_set_dma_addr_netmem(page_to_netmem(page), addr); From patchwork Wed Dec 18 00:37:40 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912806 Received: from mail-pl1-f178.google.com (mail-pl1-f178.google.com [209.85.214.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6B42A13DDD3 for ; Wed, 18 Dec 2024 00:38:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482295; cv=none; b=p2Sf5sgVc1eADtn84B/2GucS4KfanHC851xIwm47CXuNiIKc8Y1aVBCcPpuZ5pWwkIMpkBJF2+oG7c6MutUTMIk9auZ+1Dx5nga1Kh4xwhwqfZ3Iyg4IH4RtywqK+aafDNBwUUQNAQKOFRgQlI0xSZ9oMNQPTqAuu4dahiFl/R0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482295; c=relaxed/simple; bh=ayZO1p/xdCKMs+Fe9n0Jxpv0eY9Egs/CfQ+ZAezZ7So=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=EHS/jHeS0pDNwXUXVO+MSdPSmtbeWKNwtEVjHIvENRtRGxsWwzdu4Hq2CCtNuDbaR9+BrAIjvfJl4UcXsNlciqRv5AXRn1GHo7mLe6/WFFj/NrLChyhdYlQHRt4Uw6D/jkxKHnQPCtpEqLmf4iJK8ifIsb4SwoEdTRK/hxFGFr0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=HETCjp6A; arc=none smtp.client-ip=209.85.214.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="HETCjp6A" Received: by mail-pl1-f178.google.com with SMTP id d9443c01a7336-2164b662090so47543895ad.1 for ; Tue, 17 Dec 2024 16:38:13 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482293; x=1735087093; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=hFDW8/jDpFissyoMjOJTF6aMSnlKbLXD4WQxy4V2rHU=; b=HETCjp6AzLJyCRvoF5llJBcRGxryj6zJgPPpAjARtTfo2OXx1z91ajSzhSt/0ljp50 SEkhl7vqtKYieK6rFRzKmdWIjDcUsHel6OOxdz3pta+EOKWhHO1g/wra1DJyL31G8h/E aEmof9lngno8wYlO3MQHN1j06V2uL0+FfrDqU6U3Y5yhBS16ViptiEfbf8DZkn2Gto/1 DcBF/cvTkK/d7LHnVMPOsBNiEQpUvoFNBeO+JQKShzk3C6ZgiDbemEdmMEJ9shIoItI/ a0JIxL94oUu9Jku0pgf+w23hX0HMDgreGC1+s+uKPJzBqs24zg4by+mzzzfFj33RbLzM J/oQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482293; x=1735087093; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=hFDW8/jDpFissyoMjOJTF6aMSnlKbLXD4WQxy4V2rHU=; b=pZ+tD36SmkCVpIU/EPCbj/20R3xRbYmNvVg1qp6iK4/ZbTgFYVsTOAFGHC1bDg1IZL D8uRtDS8MMKguTEklC+NAzY874X1MwCjFBcIiP0LY0XzG9r7fT1n9MGyZYwMjYoBrblm COADGuA9kZSt3jIscpGF/SXgzTbpaPMwE4iwuQjgE/NyZi/WXZMj9PxhrmVFxUFvnWVK OrR4B5bkwd1QvOgfvoVgrjNVCYROnw66wnDW4cNDrttYtAOnBsN8oR2YjhMOZ7/DxaeR IRsKIcPmpa032IcJ9mDr4BGftTVpYzH9tp+/WE5pZk1x/0Wv0CwbhddzzabGHhS3ReTh ZhLg== X-Gm-Message-State: AOJu0YwUNmowM/SEY8bk94P20gj2V8czumNtfYYrLzHAEk0wIF6ah437 HptrIEFKChY7AH4r8d92mjPqoIBKlarys7wykVhYcIRXbuNgIMR7/4BkEpGLxZoTghk2xApkQaA A X-Gm-Gg: ASbGncsCD6xiBvbL6LRUrBYGw6cpbu4EUGxUYfi42Zo7yn4iiDVFQS6ilCivH4mZOG2 8q/FI8ucQQgzOk5E0RKj6IDoZka7vqfbLIcV7+xKtsuEATx47Hr844AWFtScI7mu+n9+GXaDWJr WpodOF4YTXC35IlS4SDI5j/POdoRnwwJU4te3qAygVvnuZEMZfV5R6mhGWdGh8EucSqetrJImhJ PXXZ2VJU3H8GLoXYTczfMXFndidWTI9jUXDqw9O X-Google-Smtp-Source: AGHT+IEZxdWvw9DF77b/0R+5QvSxNjbMl61C+f+Zcul99UQRWHgdhC+BURHFRDgerMUTC9sWt2Tmlg== X-Received: by 2002:a17:902:eccd:b0:215:7b06:90ca with SMTP id d9443c01a7336-218d70d9630mr12054035ad.17.1734482292413; Tue, 17 Dec 2024 16:38:12 -0800 (PST) Received: from localhost ([2a03:2880:ff:3::]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-218a1e5cf5asm65456855ad.178.2024.12.17.16.38.11 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:11 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 14/20] io_uring/zcrx: dma-map area for the device Date: Tue, 17 Dec 2024 16:37:40 -0800 Message-ID: <20241218003748.796939-15-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov Setup DMA mappings for the area into which we intend to receive data later on. We know the device we want to attach to even before we get a page pool and can pre-map in advance. All net_iov are synchronised for device when allocated, see page_pool_mp_return_in_cache(). Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- include/uapi/linux/netdev.h | 1 + io_uring/zcrx.c | 320 ++++++++++++++++++++++++++++++++++++ io_uring/zcrx.h | 4 + 3 files changed, 325 insertions(+) diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e4be227d3ad6..13d810a28ed6 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -94,6 +94,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, NETDEV_A_PAGE_POOL_DMABUF, + NETDEV_A_PAGE_POOL_IO_URING, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index e6cca6747148..42098bc1a60f 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -1,11 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include +#include #include #include #include +#include +#include + +#include + #include #include "io_uring.h" @@ -14,8 +21,92 @@ #include "zcrx.h" #include "rsrc.h" +#define IO_DMA_ATTR (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING) + +static void __io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, + struct io_zcrx_area *area, int nr_mapped) +{ + struct device *dev = ifq->dev->dev.parent; + int i; + + for (i = 0; i < nr_mapped; i++) { + struct net_iov *niov = &area->nia.niovs[i]; + dma_addr_t dma; + + dma = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov)); + dma_unmap_page_attrs(dev, dma, PAGE_SIZE, DMA_FROM_DEVICE, + IO_DMA_ATTR); + page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), 0); + } +} + +static void io_zcrx_unmap_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) +{ + if (area->is_mapped) + __io_zcrx_unmap_area(ifq, area, area->nia.num_niovs); +} + +static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) +{ + struct device *dev = ifq->dev->dev.parent; + int i; + + if (!dev) + return -EINVAL; + + for (i = 0; i < area->nia.num_niovs; i++) { + struct net_iov *niov = &area->nia.niovs[i]; + dma_addr_t dma; + + dma = dma_map_page_attrs(dev, area->pages[i], 0, PAGE_SIZE, + DMA_FROM_DEVICE, IO_DMA_ATTR); + if (dma_mapping_error(dev, dma)) + break; + if (page_pool_set_dma_addr_netmem(net_iov_to_netmem(niov), dma)) { + dma_unmap_page_attrs(dev, dma, PAGE_SIZE, + DMA_FROM_DEVICE, IO_DMA_ATTR); + break; + } + } + + if (i != area->nia.num_niovs) { + __io_zcrx_unmap_area(ifq, area, i); + return -EINVAL; + } + + area->is_mapped = true; + return 0; +} + #define IO_RQ_MAX_ENTRIES 32768 +__maybe_unused +static const struct memory_provider_ops io_uring_pp_zc_ops; + +static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov) +{ + struct net_iov_area *owner = net_iov_owner(niov); + + return container_of(owner, struct io_zcrx_area, nia); +} + +static inline atomic_t *io_get_user_counter(struct net_iov *niov) +{ + struct io_zcrx_area *area = io_zcrx_iov_to_area(niov); + + return &area->user_refs[net_iov_idx(niov)]; +} + +static bool io_zcrx_put_niov_uref(struct net_iov *niov) +{ + atomic_t *uref = io_get_user_counter(niov); + + if (unlikely(!atomic_read(uref))) + return false; + atomic_dec(uref); + return true; +} + static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, struct io_uring_zcrx_ifq_reg *reg, struct io_uring_region_desc *rd) @@ -49,8 +140,11 @@ static void io_free_rbuf_ring(struct io_zcrx_ifq *ifq) static void io_zcrx_free_area(struct io_zcrx_area *area) { + io_zcrx_unmap_area(area->ifq, area); + kvfree(area->freelist); kvfree(area->nia.niovs); + kvfree(area->user_refs); if (area->pages) { unpin_user_pages(area->pages, area->nia.num_niovs); kvfree(area->pages); @@ -106,6 +200,19 @@ static int io_zcrx_create_area(struct io_zcrx_ifq *ifq, for (i = 0; i < nr_pages; i++) area->freelist[i] = i; + area->user_refs = kvmalloc_array(nr_pages, sizeof(area->user_refs[0]), + GFP_KERNEL | __GFP_ZERO); + if (!area->user_refs) + goto err; + + for (i = 0; i < nr_pages; i++) { + struct net_iov *niov = &area->nia.niovs[i]; + + niov->owner = &area->nia; + area->freelist[i] = i; + atomic_set(&area->user_refs[i], 0); + } + area->free_count = nr_pages; area->ifq = ifq; /* we're only supporting one area per ifq for now */ @@ -130,6 +237,7 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) ifq->if_rxq = -1; ifq->ctx = ctx; + spin_lock_init(&ifq->rq_lock); return ifq; } @@ -205,6 +313,10 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (!ifq->dev) goto err; + ret = io_zcrx_map_area(ifq, ifq->area); + if (ret) + goto err; + reg.offsets.rqes = sizeof(struct io_uring); reg.offsets.head = offsetof(struct io_uring, head); reg.offsets.tail = offsetof(struct io_uring, tail); @@ -238,7 +350,215 @@ void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) io_zcrx_ifq_free(ifq); } +static struct net_iov *__io_zcrx_get_free_niov(struct io_zcrx_area *area) +{ + unsigned niov_idx; + + lockdep_assert_held(&area->freelist_lock); + + niov_idx = area->freelist[--area->free_count]; + return &area->nia.niovs[niov_idx]; +} + +static void io_zcrx_return_niov_freelist(struct net_iov *niov) +{ + struct io_zcrx_area *area = io_zcrx_iov_to_area(niov); + + spin_lock_bh(&area->freelist_lock); + area->freelist[area->free_count++] = net_iov_idx(niov); + spin_unlock_bh(&area->freelist_lock); +} + +static void io_zcrx_return_niov(struct net_iov *niov) +{ + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false); +} + +static void io_zcrx_scrub(struct io_zcrx_ifq *ifq) +{ + struct io_zcrx_area *area = ifq->area; + int i; + + if (!area) + return; + + /* Reclaim back all buffers given to the user space. */ + for (i = 0; i < area->nia.num_niovs; i++) { + struct net_iov *niov = &area->nia.niovs[i]; + int nr; + + if (!atomic_read(io_get_user_counter(niov))) + continue; + nr = atomic_xchg(io_get_user_counter(niov), 0); + if (nr && !page_pool_unref_netmem(net_iov_to_netmem(niov), nr)) + io_zcrx_return_niov(niov); + } +} + void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) { lockdep_assert_held(&ctx->uring_lock); + + if (ctx->ifq) + io_zcrx_scrub(ctx->ifq); +} + +static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq) +{ + u32 entries; + + entries = smp_load_acquire(&ifq->rq_ring->tail) - ifq->cached_rq_head; + return min(entries, ifq->rq_entries); +} + +static struct io_uring_zcrx_rqe *io_zcrx_get_rqe(struct io_zcrx_ifq *ifq, + unsigned mask) +{ + unsigned int idx = ifq->cached_rq_head++ & mask; + + return &ifq->rqes[idx]; +} + +static void io_zcrx_ring_refill(struct page_pool *pp, + struct io_zcrx_ifq *ifq) +{ + unsigned int mask = ifq->rq_entries - 1; + unsigned int entries; + netmem_ref netmem; + + spin_lock_bh(&ifq->rq_lock); + + entries = io_zcrx_rqring_entries(ifq); + entries = min_t(unsigned, entries, PP_ALLOC_CACHE_REFILL - pp->alloc.count); + if (unlikely(!entries)) { + spin_unlock_bh(&ifq->rq_lock); + return; + } + + do { + struct io_uring_zcrx_rqe *rqe = io_zcrx_get_rqe(ifq, mask); + struct io_zcrx_area *area; + struct net_iov *niov; + unsigned niov_idx, area_idx; + + area_idx = rqe->off >> IORING_ZCRX_AREA_SHIFT; + niov_idx = (rqe->off & ~IORING_ZCRX_AREA_MASK) >> PAGE_SHIFT; + + if (unlikely(rqe->__pad || area_idx)) + continue; + area = ifq->area; + + if (unlikely(niov_idx >= area->nia.num_niovs)) + continue; + niov_idx = array_index_nospec(niov_idx, area->nia.num_niovs); + + niov = &area->nia.niovs[niov_idx]; + if (!io_zcrx_put_niov_uref(niov)) + continue; + + netmem = net_iov_to_netmem(niov); + if (page_pool_unref_netmem(netmem, 1) != 0) + continue; + + if (unlikely(niov->pp != pp)) { + io_zcrx_return_niov(niov); + continue; + } + + page_pool_mp_return_in_cache(pp, netmem); + } while (--entries); + + smp_store_release(&ifq->rq_ring->head, ifq->cached_rq_head); + spin_unlock_bh(&ifq->rq_lock); +} + +static void io_zcrx_refill_slow(struct page_pool *pp, struct io_zcrx_ifq *ifq) +{ + struct io_zcrx_area *area = ifq->area; + + spin_lock_bh(&area->freelist_lock); + while (area->free_count && pp->alloc.count < PP_ALLOC_CACHE_REFILL) { + struct net_iov *niov = __io_zcrx_get_free_niov(area); + netmem_ref netmem = net_iov_to_netmem(niov); + + page_pool_set_pp_info(pp, netmem); + page_pool_mp_return_in_cache(pp, netmem); + + pp->pages_state_hold_cnt++; + trace_page_pool_state_hold(pp, netmem, pp->pages_state_hold_cnt); + } + spin_unlock_bh(&area->freelist_lock); +} + +static netmem_ref io_pp_zc_alloc_netmems(struct page_pool *pp, gfp_t gfp) +{ + struct io_zcrx_ifq *ifq = pp->mp_priv; + + /* pp should already be ensuring that */ + if (unlikely(pp->alloc.count)) + goto out_return; + + io_zcrx_ring_refill(pp, ifq); + if (likely(pp->alloc.count)) + goto out_return; + + io_zcrx_refill_slow(pp, ifq); + if (!pp->alloc.count) + return 0; +out_return: + return pp->alloc.cache[--pp->alloc.count]; +} + +static bool io_pp_zc_release_netmem(struct page_pool *pp, netmem_ref netmem) +{ + if (WARN_ON_ONCE(!netmem_is_net_iov(netmem))) + return false; + + if (page_pool_unref_netmem(netmem, 1) == 0) + io_zcrx_return_niov_freelist(netmem_to_net_iov(netmem)); + return false; } + +static int io_pp_zc_init(struct page_pool *pp) +{ + struct io_zcrx_ifq *ifq = pp->mp_priv; + + if (WARN_ON_ONCE(!ifq)) + return -EINVAL; + if (WARN_ON_ONCE(ifq->dev != pp->slow.netdev)) + return -EINVAL; + if (pp->dma_map) + return -EOPNOTSUPP; + if (pp->p.order != 0) + return -EOPNOTSUPP; + if (pp->p.dma_dir != DMA_FROM_DEVICE) + return -EOPNOTSUPP; + + percpu_ref_get(&ifq->ctx->refs); + return 0; +} + +static void io_pp_zc_destroy(struct page_pool *pp) +{ + struct io_zcrx_ifq *ifq = pp->mp_priv; + struct io_zcrx_area *area = ifq->area; + + if (WARN_ON_ONCE(area->free_count != area->nia.num_niovs)) + return; + percpu_ref_put(&ifq->ctx->refs); +} + +static int io_pp_nl_report(const struct page_pool *pool, struct sk_buff *rsp) +{ + return nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IO_URING, 0); +} + +static const struct memory_provider_ops io_uring_pp_zc_ops = { + .alloc_netmems = io_pp_zc_alloc_netmems, + .release_netmem = io_pp_zc_release_netmem, + .init = io_pp_zc_init, + .destroy = io_pp_zc_destroy, + .nl_report = io_pp_nl_report, +}; diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index 46988a1dbd54..beacf1ea6380 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -9,7 +9,9 @@ struct io_zcrx_area { struct net_iov_area nia; struct io_zcrx_ifq *ifq; + atomic_t *user_refs; + bool is_mapped; u16 area_id; struct page **pages; @@ -26,6 +28,8 @@ struct io_zcrx_ifq { struct io_uring *rq_ring; struct io_uring_zcrx_rqe *rqes; u32 rq_entries; + u32 cached_rq_head; + spinlock_t rq_lock; u32 if_rxq; struct net_device *dev; From patchwork Wed Dec 18 00:37:41 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912807 Received: from mail-pf1-f176.google.com (mail-pf1-f176.google.com [209.85.210.176]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 834131448E3 for ; Wed, 18 Dec 2024 00:38:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.176 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482297; cv=none; b=PpoN6ZPNJjoitoMPLXnmFdtERECmo3ixF5ia0hjwaY34zvgq4tMbN6Si8F4/UDZsvY2yzo5HiRPobD8ySqCvUeZzlhiVbhNEh2F3YgG+Y97CY2mDrXPTk9Q++bg0QffmWA9UIc0sUaiXUGyi61zDCdlMtrRbvPJZSOuqLu8cgT0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482297; c=relaxed/simple; bh=r5+Q6NGI0Im5ymImLQcEPiVdLQVXnkj7xCojU3Na86U=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=faJOHFA9YeCigzpu7vuAXFabCqYED5ffsEVvwbkOUlCObUCu0WmdmQfrOnAaN1PECt+4eowtFG/rJWCjo+USgLsjsCjXXM30evan6f/AKpaIY4oxBiWsp/Mbn2l7MnglP1t/egr2uuPvoOa3cVqQbu01ux7lUzQFWK+zb91QAzM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=TZixSjn4; arc=none smtp.client-ip=209.85.210.176 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="TZixSjn4" Received: by mail-pf1-f176.google.com with SMTP id d2e1a72fcca58-725ef0397aeso5281214b3a.2 for ; Tue, 17 Dec 2024 16:38:14 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482294; x=1735087094; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=04opTDRkAJr0Hlhy5ocRRUaRwKdBBJ3CEiTXa5KmQuM=; b=TZixSjn4MsceysCP+mupwiXK+ycs2/bfUElGzw69xruYsMYA0WAdiIsEcO0Ugo/M7e hz4ayOVckdk+FrGHidP8+PUw8KHKgZJbenztovz2PuIV3xi491dGy3UP6aWQnmhDB4QE sAEVFNgQVbdKJokbBBVAlhQxtliBj7epYHcQK/5/QZTa2nFDYeDuYmu7YjIN2ljRQrEi Nl2QvGesSiFo6XqoxO9fnjwss9wxpXSkDkS8EsFOphMpjXxbjH4ijEkqD2oMds2r7Ihd uvTa21XhaejfxBodkXxd/tpq5FgolYuJ0RhDfa/rh8/TJzkF0pVbGsM8t2FBL7aNksAs SYJw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482294; x=1735087094; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=04opTDRkAJr0Hlhy5ocRRUaRwKdBBJ3CEiTXa5KmQuM=; b=CIYQYEKpMfxJLQFGWF+EuxLFeozhQ4dCHtkdetKlwajlFdvHFgnEgq5fybbzQjDThB Kd0BM4dLtuHUfKB9v7Tyrj6MydvKkGF6li7ZoFvp/L7+irE+Ue1zFbooOyUEaVz9Hb26 aLMtXo9Sqajdn24QZykxlkundVLoEwYPxte2i8V26b85Mqb34UMQr1+0SgDAZd+irv0W 5IOc3eVcAtyJJWFmxs2nGQSsrg+YgcNnYSIuo7RXNN3X/AzN4vJdNfEDQLwQ49i+AxSW TwNGIEwIz+azK1Nl1dSOhjJHtFCQ+zrJVG3WnnqmMDg9/SH7fwK8D1gB0W9pWURRCOot CSjg== X-Gm-Message-State: AOJu0YybZaEW3KjEZ1DZ5kLGm/a7oQe6+uOgRhFO6rxeuOY7hBKbd9nj M4M/Jq5KLFnFFPK5nYbupRRvIEGqOrLcSnK4fdqbUnrJilX6wc0R9es08AOEEFeh2xTUe6IbpXa A X-Gm-Gg: ASbGncuVCUzjpVAQ5LQOOuwbt6nqEZlVYOuvXnEO5602k4ZubQ5O6ww06pLCX7/NrXy N1nnWjTgwA5sRlmgBuGXkHNKWX2eg2lKRzj7zA/FQIuA7SK2KcG+N6ZuqWhHi+CHj7jLtscGbD5 LLpZ5xK9iyQLzse8MgEoQmUT7Zg9PA6CirJUNS+QVF/U+/p4UfRXoyLfVT5bw8T/uADlaXCkW7O ZIz+ncpQOhJQyr1dKmWzQUjX5BF8XFxEzHaXITQxQ== X-Google-Smtp-Source: AGHT+IHupOIfvDpdra086GB6dEqtjUtJeWxrGr8XLigJnszqrDxr+pXvJbao4SDX9DBx0TvTt+XTXg== X-Received: by 2002:a05:6a00:32cf:b0:725:dab9:f734 with SMTP id d2e1a72fcca58-72a8d23774amr1569568b3a.6.1734482293709; Tue, 17 Dec 2024 16:38:13 -0800 (PST) Received: from localhost ([2a03:2880:ff:1e::]) by smtp.gmail.com with ESMTPSA id d2e1a72fcca58-72918b78f0esm7546507b3a.121.2024.12.17.16.38.13 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:13 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 15/20] io_uring/zcrx: add io_recvzc request Date: Tue, 17 Dec 2024 16:37:41 -0800 Message-ID: <20241218003748.796939-16-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Add io_uring opcode OP_RECV_ZC for doing zero copy reads out of a socket. Only the connection should be land on the specific rx queue set up for zero copy, and the socket must be handled by the io_uring instance that the rx queue was registered for zero copy with. That's because neither net_iovs / buffers from our queue can be read by outside applications, nor zero copy is possible if traffic for the zero copy connection goes to another queue. This coordination is outside of the scope of this patch series. Also, any traffic directed to the zero copy enabled queue is immediately visible to the application, which is why CAP_NET_ADMIN is required at the registration step. Of course, no data is actually read out of the socket, it has already been copied by the netdev into userspace memory via DMA. OP_RECV_ZC reads skbs out of the socket and checks that its frags are indeed net_iovs that belong to io_uring. A cqe is queued for each one of these frags. Recall that each cqe is a big cqe, with the top half being an io_uring_zcrx_cqe. The cqe res field contains the len or error. The lower IORING_ZCRX_AREA_SHIFT bits of the struct io_uring_zcrx_cqe::off field contain the offset relative to the start of the zero copy area. The upper part of the off field is trivially zero, and will be used to carry the area id. For now, there is no limit as to how much work each OP_RECV_ZC request does. It will attempt to drain a socket of all available data. This request always operates in multishot mode. Signed-off-by: David Wei --- include/uapi/linux/io_uring.h | 2 + io_uring/io_uring.h | 10 ++ io_uring/net.c | 72 +++++++++++++ io_uring/opdef.c | 16 +++ io_uring/zcrx.c | 190 +++++++++++++++++++++++++++++++++- io_uring/zcrx.h | 13 +++ 6 files changed, 302 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index e251f28507ce..b919a541d44f 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -87,6 +87,7 @@ struct io_uring_sqe { union { __s32 splice_fd_in; __u32 file_index; + __u32 zcrx_ifq_idx; __u32 optlen; struct { __u16 addr_len; @@ -278,6 +279,7 @@ enum io_uring_op { IORING_OP_FTRUNCATE, IORING_OP_BIND, IORING_OP_LISTEN, + IORING_OP_RECV_ZC, /* this goes last, obviously */ IORING_OP_LAST, diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 032758b28d78..2b1ce5539bfe 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -184,6 +184,16 @@ static inline bool io_get_cqe(struct io_ring_ctx *ctx, struct io_uring_cqe **ret return io_get_cqe_overflow(ctx, ret, false); } +static inline bool io_defer_get_uncommited_cqe(struct io_ring_ctx *ctx, + struct io_uring_cqe **cqe_ret) +{ + io_lockdep_assert_cq_locked(ctx); + + ctx->cq_extra++; + ctx->submit_state.cq_flush = true; + return io_get_cqe(ctx, cqe_ret); +} + static __always_inline bool io_fill_cqe_req(struct io_ring_ctx *ctx, struct io_kiocb *req) { diff --git a/io_uring/net.c b/io_uring/net.c index 8457408194e7..5d8b9a016766 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -16,6 +16,7 @@ #include "net.h" #include "notif.h" #include "rsrc.h" +#include "zcrx.h" #if defined(CONFIG_NET) struct io_shutdown { @@ -88,6 +89,13 @@ struct io_sr_msg { */ #define MULTISHOT_MAX_RETRY 32 +struct io_recvzc { + struct file *file; + unsigned msg_flags; + u16 flags; + struct io_zcrx_ifq *ifq; +}; + int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -1209,6 +1217,70 @@ int io_recv(struct io_kiocb *req, unsigned int issue_flags) return ret; } +int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); + unsigned ifq_idx; + + if (unlikely(sqe->file_index || sqe->addr2 || sqe->addr || + sqe->len || sqe->addr3)) + return -EINVAL; + + ifq_idx = READ_ONCE(sqe->zcrx_ifq_idx); + if (ifq_idx != 0) + return -EINVAL; + zc->ifq = req->ctx->ifq; + if (!zc->ifq) + return -EINVAL; + + zc->flags = READ_ONCE(sqe->ioprio); + zc->msg_flags = READ_ONCE(sqe->msg_flags); + if (zc->msg_flags) + return -EINVAL; + if (zc->flags & ~(IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT)) + return -EINVAL; + /* multishot required */ + if (!(zc->flags & IORING_RECV_MULTISHOT)) + return -EINVAL; + /* All data completions are posted as aux CQEs. */ + req->flags |= REQ_F_APOLL_MULTISHOT; + + return 0; +} + +int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_recvzc *zc = io_kiocb_to_cmd(req, struct io_recvzc); + struct socket *sock; + int ret; + + if (!(req->flags & REQ_F_POLLED) && + (zc->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + ret = io_zcrx_recv(req, zc->ifq, sock, zc->msg_flags | MSG_DONTWAIT, + issue_flags); + if (unlikely(ret <= 0) && ret != -EAGAIN) { + if (ret == -ERESTARTSYS) + ret = -EINTR; + + req_set_fail(req); + io_req_set_res(req, ret, 0); + + if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_STOP_MULTISHOT; + return IOU_OK; + } + + if (issue_flags & IO_URING_F_MULTISHOT) + return IOU_ISSUE_SKIP_COMPLETE; + return -EAGAIN; +} + void io_send_zc_cleanup(struct io_kiocb *req) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); diff --git a/io_uring/opdef.c b/io_uring/opdef.c index 3de75eca1c92..6ae00c0af9a8 100644 --- a/io_uring/opdef.c +++ b/io_uring/opdef.c @@ -36,6 +36,7 @@ #include "waitid.h" #include "futex.h" #include "truncate.h" +#include "zcrx.h" static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags) { @@ -513,6 +514,18 @@ const struct io_issue_def io_issue_defs[] = { .async_size = sizeof(struct io_async_msghdr), #else .prep = io_eopnotsupp_prep, +#endif + }, + [IORING_OP_RECV_ZC] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .ioprio = 1, +#if defined(CONFIG_NET) + .prep = io_recvzc_prep, + .issue = io_recvzc, +#else + .prep = io_eopnotsupp_prep, #endif }, }; @@ -744,6 +757,9 @@ const struct io_cold_def io_cold_defs[] = { [IORING_OP_LISTEN] = { .name = "LISTEN", }, + [IORING_OP_RECV_ZC] = { + .name = "RECV_ZC", + }, }; const char *io_uring_get_opcode(u8 opcode) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 42098bc1a60f..1122c80502d6 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -12,6 +12,8 @@ #include #include +#include +#include #include @@ -80,7 +82,12 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) #define IO_RQ_MAX_ENTRIES 32768 -__maybe_unused +struct io_zcrx_args { + struct io_kiocb *req; + struct io_zcrx_ifq *ifq; + struct socket *sock; +}; + static const struct memory_provider_ops io_uring_pp_zc_ops; static inline struct io_zcrx_area *io_zcrx_iov_to_area(const struct net_iov *niov) @@ -107,6 +114,11 @@ static bool io_zcrx_put_niov_uref(struct net_iov *niov) return true; } +static void io_zcrx_get_niov_uref(struct net_iov *niov) +{ + atomic_inc(io_get_user_counter(niov)); +} + static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, struct io_uring_zcrx_ifq_reg *reg, struct io_uring_region_desc *rd) @@ -562,3 +574,179 @@ static const struct memory_provider_ops io_uring_pp_zc_ops = { .destroy = io_pp_zc_destroy, .nl_report = io_pp_nl_report, }; + +static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, + struct io_zcrx_ifq *ifq, int off, int len) +{ + struct io_uring_zcrx_cqe *rcqe; + struct io_zcrx_area *area; + struct io_uring_cqe *cqe; + u64 offset; + + if (!io_defer_get_uncommited_cqe(req->ctx, &cqe)) + return false; + + cqe->user_data = req->cqe.user_data; + cqe->res = len; + cqe->flags = IORING_CQE_F_MORE; + + area = io_zcrx_iov_to_area(niov); + offset = off + (net_iov_idx(niov) << PAGE_SHIFT); + rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + rcqe->off = offset + ((u64)area->area_id << IORING_ZCRX_AREA_SHIFT); + rcqe->__pad = 0; + return true; +} + +static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + const skb_frag_t *frag, int off, int len) +{ + struct net_iov *niov; + + if (unlikely(!skb_frag_is_net_iov(frag))) + return -EOPNOTSUPP; + + niov = netmem_to_net_iov(frag->netmem); + if (niov->pp->mp_ops != &io_uring_pp_zc_ops || + niov->pp->mp_priv != ifq) + return -EFAULT; + + if (!io_zcrx_queue_cqe(req, niov, ifq, off + skb_frag_off(frag), len)) + return -ENOSPC; + + /* + * Prevent it from being recycled while user is accessing it. + * It has to be done before grabbing a user reference. + */ + page_pool_ref_netmem(net_iov_to_netmem(niov)); + io_zcrx_get_niov_uref(niov); + return len; +} + +static int +io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, + unsigned int offset, size_t len) +{ + struct io_zcrx_args *args = desc->arg.data; + struct io_zcrx_ifq *ifq = args->ifq; + struct io_kiocb *req = args->req; + struct sk_buff *frag_iter; + unsigned start, start_off; + int i, copy, end, off; + int ret = 0; + + start = skb_headlen(skb); + start_off = offset; + + if (offset < start) + return -EOPNOTSUPP; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag; + + if (WARN_ON(start > offset + len)) + return -EFAULT; + + frag = &skb_shinfo(skb)->frags[i]; + end = start + skb_frag_size(frag); + + if (offset < end) { + copy = end - offset; + if (copy > len) + copy = len; + + off = offset - start; + ret = io_zcrx_recv_frag(req, ifq, frag, off, copy); + if (ret < 0) + goto out; + + offset += ret; + len -= ret; + if (len == 0 || ret != copy) + goto out; + } + start = end; + } + + skb_walk_frags(skb, frag_iter) { + if (WARN_ON(start > offset + len)) + return -EFAULT; + + end = start + frag_iter->len; + if (offset < end) { + copy = end - offset; + if (copy > len) + copy = len; + + off = offset - start; + ret = io_zcrx_recv_skb(desc, frag_iter, off, copy); + if (ret < 0) + goto out; + + offset += ret; + len -= ret; + if (len == 0 || ret != copy) + goto out; + } + start = end; + } + +out: + if (offset == start_off) + return ret; + return offset - start_off; +} + +static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + struct sock *sk, int flags, + unsigned issue_flags) +{ + struct io_zcrx_args args = { + .req = req, + .ifq = ifq, + .sock = sk->sk_socket, + }; + read_descriptor_t rd_desc = { + .count = 1, + .arg.data = &args, + }; + int ret; + + lock_sock(sk); + ret = tcp_read_sock(sk, &rd_desc, io_zcrx_recv_skb); + if (ret <= 0) { + if (ret < 0 || sock_flag(sk, SOCK_DONE)) + goto out; + if (sk->sk_err) + ret = sock_error(sk); + else if (sk->sk_shutdown & RCV_SHUTDOWN) + goto out; + else if (sk->sk_state == TCP_CLOSE) + ret = -ENOTCONN; + else + ret = -EAGAIN; + } else if (sock_flag(sk, SOCK_DONE)) { + /* Make it to retry until it finally gets 0. */ + if (issue_flags & IO_URING_F_MULTISHOT) + ret = IOU_REQUEUE; + else + ret = -EAGAIN; + } +out: + release_sock(sk); + return ret; +} + +int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + struct socket *sock, unsigned int flags, + unsigned issue_flags) +{ + struct sock *sk = sock->sk; + const struct proto *prot = READ_ONCE(sk->sk_prot); + + if (prot->recvmsg != tcp_recvmsg) + return -EPROTONOSUPPORT; + + sock_rps_record_flow(sk); + return io_zcrx_tcp_recvmsg(req, ifq, sk, flags, issue_flags); +} diff --git a/io_uring/zcrx.h b/io_uring/zcrx.h index beacf1ea6380..65e92756720f 100644 --- a/io_uring/zcrx.h +++ b/io_uring/zcrx.h @@ -3,6 +3,7 @@ #define IOU_ZC_RX_H #include +#include #include #include @@ -41,6 +42,9 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg); void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx); void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx); +int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + struct socket *sock, unsigned int flags, + unsigned issue_flags); #else static inline int io_register_zcrx_ifq(struct io_ring_ctx *ctx, struct io_uring_zcrx_ifq_reg __user *arg) @@ -53,6 +57,15 @@ static inline void io_unregister_zcrx_ifqs(struct io_ring_ctx *ctx) static inline void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) { } +static inline int io_zcrx_recv(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + struct socket *sock, unsigned int flags, + unsigned issue_flags) +{ + return -EOPNOTSUPP; +} #endif +int io_recvzc(struct io_kiocb *req, unsigned int issue_flags); +int io_recvzc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe); + #endif From patchwork Wed Dec 18 00:37:42 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912808 Received: from mail-pf1-f177.google.com (mail-pf1-f177.google.com [209.85.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9D0991465B3 for ; Wed, 18 Dec 2024 00:38:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.177 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482298; cv=none; b=eipkf89cTd11azF82f5qMt5XFOLejXdoF4EivGoppCCmUDpnKk+QrlJ23zIuSQuBn0vAzICxvTg1LTg/4KW511Bid67C+RhJMXKJTR4TfB9TKfQPANHjsWBH7Hvlq5a6lCrUX2TGZ/oSPR2w2ZtrSOVoSByMWLjpgXJZ0kRy34k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482298; c=relaxed/simple; bh=NN6l70wJTo+V3AXDIpdVN8AsIz5j8mOAPtjJ/ZcVfqo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cQ0RjqXOYj+aeyw8vRfviGbuGtZUUAIoAVKFzlTibhRNv6t0qI1QNSLwTZ7QDoKRWNhEU1F4yZEQtZlQw2wBQDNmFJ7FueHGNtkHmPdsrjoyc1pFSif6p+eWj0XxrNUXvp0s2WoSOncyCn09rewi/MvLy+1M7IZ2DWp1gFjwIXk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=UU+catRN; arc=none smtp.client-ip=209.85.210.177 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="UU+catRN" Received: by mail-pf1-f177.google.com with SMTP id d2e1a72fcca58-725f4623df7so5267531b3a.2 for ; Tue, 17 Dec 2024 16:38:15 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482295; x=1735087095; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=eMOrlUp6jAnTkKL1sSea4dlKz2vU1sfuxmAW5sBPRZc=; b=UU+catRNRM7NL9gQG1SQuvh9L3Fyf9SWU/sdsKwAWkdn2SRiD3AVKDLD2qtDkyPexn E6Wl3MadEcKnrHElY3+qioBEGzT0Df9Ppz5K+tFBGfqJBinOnKYobDha8KjnnRiyPOxV dPd0vWDyPtObWFlcxflJjbILmD5yN5OH6UKFXugUymLlaZgVXz15aoMQWlrjdszd8LmA 7zS+v32/KxSfII4mY1OdGaSdQHkyIxxnUbi2s0znQMKbfpMSpbEqHSu04fh2rDQavaaW QyewwKa4Jmph2kn1uuNg/xZsQE7cHm4jBoinDnzy16fxKlgzfGotUViNGnNlOHhFkHbg JYEw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482295; x=1735087095; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=eMOrlUp6jAnTkKL1sSea4dlKz2vU1sfuxmAW5sBPRZc=; b=eAstW+C5xRXGXlNBBzw5a7R52kkLx1SVPxis9EMHcprlKKelG4V1pK/wDRw7JUMpEH ATYgIzpsaAU0F/LxahHkivq0eiFSBpgGLGt7zhqk5oE9q5DsquT8b7XlN7acjXjdFRKa sJ0yWAEyAgXuKY2J7n1ue2TGmb7NvPMYw4LRsE7udyiqeTxoJO0ipjLkgotD5Gxt2QF9 f71SF9uhhesd37FmdHrKYbAydAX8zIRrvZ334EnG90LYFym8bUjgCSwgdNj3eJCC0fKz gSA0a9DHE5+y5A6wsJOmXVzkFJB8OU67HN2VySotA5Y8/cn1rstNgTxj6P9jWyJqf7ZX Mhcw== X-Gm-Message-State: AOJu0YzzZTyI/5rVXx/X0//O+LMGtZANxajrlJ/YFR0/aF+RojZN1B+x CXa4BZdD+QMLCcTOQ2JzjI967any/OtW6KlMXWkjPRDCRzm5zssoBjGDVIHN3oQ+gPh3WXYHWvg 8 X-Gm-Gg: ASbGnctNSV9MzMv25yP7+NDS4HOUh7vxWT8zLssOAgEkid3bdpOIAcOgqCFYGJ28Ri+ 58hDA8FtcMIEqB1w2vdrM/QX7OQ7YgKYgWksXoYTy67al4xNjEPDlIPMrrYPioS1AC31k18WFgb W5IJsdCYv58VF5UYDKyEJ5fhH0ZqRFIEerjXX/TqWBM/zp3WvMzjchuly/l2fH40RmjthGcYE1u dXaN1P6eogglTaaxLLA2p3P1gFbB/NpoEEaIhJV X-Google-Smtp-Source: AGHT+IGvs56gJHOktJseiAi7TrAKx7QXPL0L6ug4z9uFCs5Je1RvXtfDkS7+qoqOQ3y1lhNxg5hYlw== X-Received: by 2002:a17:90b:2e4a:b0:2ee:90a1:5d42 with SMTP id 98e67ed59e1d1-2f2e8f55a1dmr1520309a91.0.1734482294978; Tue, 17 Dec 2024 16:38:14 -0800 (PST) Received: from localhost ([2a03:2880:ff:c::]) by smtp.gmail.com with ESMTPSA id 98e67ed59e1d1-2f2ed0d84d0sm123242a91.0.2024.12.17.16.38.14 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:14 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 16/20] io_uring/zcrx: set pp memory provider for an rx queue Date: Tue, 17 Dec 2024 16:37:42 -0800 Message-ID: <20241218003748.796939-17-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Set the page pool memory provider for the rx queue configured for zero copy to io_uring. Then the rx queue is reset using netdev_rx_queue_restart() and netdev core + page pool will take care of filling the rx queue from the io_uring zero copy memory provider. For now, there is only one ifq so its destruction happens implicitly during io_uring cleanup. Reviewed-by: Jens Axboe Signed-off-by: David Wei --- io_uring/zcrx.c | 83 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 75 insertions(+), 8 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 1122c80502d6..756c78c0920e 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -10,11 +10,12 @@ #include #include - -#include +#include #include #include +#include + #include #include "io_uring.h" @@ -119,6 +120,65 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov) atomic_inc(io_get_user_counter(niov)); } +static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx) +{ + struct netdev_rx_queue *rxq; + struct net_device *dev = ifq->dev; + int ret; + + ASSERT_RTNL(); + + if (ifq_idx >= dev->num_rx_queues) + return -EINVAL; + ifq_idx = array_index_nospec(ifq_idx, dev->num_rx_queues); + + rxq = __netif_get_rx_queue(ifq->dev, ifq_idx); + if (rxq->mp_params.mp_priv) + return -EEXIST; + + ifq->if_rxq = ifq_idx; + rxq->mp_params.mp_ops = &io_uring_pp_zc_ops; + rxq->mp_params.mp_priv = ifq; + ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq); + if (ret) + goto fail; + return 0; +fail: + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + ifq->if_rxq = -1; + return ret; +} + +static void io_close_zc_rxq(struct io_zcrx_ifq *ifq) +{ + struct netdev_rx_queue *rxq; + int err; + + if (ifq->if_rxq == -1) + return; + + rtnl_lock(); + if (WARN_ON_ONCE(ifq->if_rxq >= ifq->dev->num_rx_queues)) { + rtnl_unlock(); + return; + } + + rxq = __netif_get_rx_queue(ifq->dev, ifq->if_rxq); + + WARN_ON_ONCE(rxq->mp_params.mp_priv != ifq); + + rxq->mp_params.mp_ops = NULL; + rxq->mp_params.mp_priv = NULL; + + err = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq); + if (err) + pr_devel("io_uring: can't restart a queue on zcrx close\n"); + + rtnl_unlock(); + ifq->if_rxq = -1; +} + static int io_allocate_rbuf_ring(struct io_zcrx_ifq *ifq, struct io_uring_zcrx_ifq_reg *reg, struct io_uring_region_desc *rd) @@ -255,6 +315,8 @@ static struct io_zcrx_ifq *io_zcrx_ifq_alloc(struct io_ring_ctx *ctx) static void io_zcrx_ifq_free(struct io_zcrx_ifq *ifq) { + io_close_zc_rxq(ifq); + if (ifq->area) io_zcrx_free_area(ifq->area); @@ -317,7 +379,6 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, goto err; ifq->rq_entries = reg.rq_entries; - ifq->if_rxq = reg.if_rxq; ret = -ENODEV; ifq->dev = netdev_get_by_index(current->nsproxy->net_ns, reg.if_idx, @@ -329,16 +390,20 @@ int io_register_zcrx_ifq(struct io_ring_ctx *ctx, if (ret) goto err; + rtnl_lock(); + ret = io_open_zc_rxq(ifq, reg.if_rxq); + rtnl_unlock(); + if (ret) + goto err; + reg.offsets.rqes = sizeof(struct io_uring); reg.offsets.head = offsetof(struct io_uring, head); reg.offsets.tail = offsetof(struct io_uring, tail); if (copy_to_user(arg, ®, sizeof(reg)) || - copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd))) { - ret = -EFAULT; - goto err; - } - if (copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { + copy_to_user(u64_to_user_ptr(reg.region_ptr), &rd, sizeof(rd)) || + copy_to_user(u64_to_user_ptr(reg.area_ptr), &area, sizeof(area))) { + io_close_zc_rxq(ifq); ret = -EFAULT; goto err; } @@ -415,6 +480,8 @@ void io_shutdown_zcrx_ifqs(struct io_ring_ctx *ctx) if (ctx->ifq) io_zcrx_scrub(ctx->ifq); + + io_close_zc_rxq(ctx->ifq); } static inline u32 io_zcrx_rqring_entries(struct io_zcrx_ifq *ifq) From patchwork Wed Dec 18 00:37:43 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912809 Received: from mail-pf1-f172.google.com (mail-pf1-f172.google.com [209.85.210.172]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id E6BEA146A68 for ; Wed, 18 Dec 2024 00:38:16 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482299; cv=none; b=ktndEaUt/oy+k6dAYCLmbRs/O1IyVhYaWXDy37JMvDKFVvfmsUQowlNuFUD95gqnM8xybuAeEzfSqo7OOiqqzxDhvJzJ8ehpeSmheopa7qBdt4w3usUMgTejdK2C4iuhzMAoK32P+8eYw8GEwdAQbNngMz3IVHwrva0y9RCZZUQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482299; c=relaxed/simple; bh=lVCOuuWDfuKOXGIrHnPB8CsTsDheB1WzUxlQN8EhV+M=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=NUEf3fwjBujEBaah38HT29e0bPYCWak90/67oEDvbNhiYewoVmWq8JOJki9PNQ8GEb7Ghp2nbnPQHd8gFeCGT9rRyXVSqT4IlFhbLaCS+UuoBubGYyO5gBCW0cuf8yJ8e5TS4+gBrOZjIZKOQS+q8J0NzKUXj8SC+Uy7eum2bI8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=kHGnU2RC; arc=none smtp.client-ip=209.85.210.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="kHGnU2RC" Received: by mail-pf1-f172.google.com with SMTP id d2e1a72fcca58-728ea1e0bdbso4629158b3a.0 for ; Tue, 17 Dec 2024 16:38:16 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482296; x=1735087096; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=AAmVgJjEX2zRkXc1WMFyYK6FynX6DxypvcjR34FvDLQ=; b=kHGnU2RCcKoHe04FV86GSQFM6UfD1bAvIYKBh7116FMC0BdfQP+bqHGXC/x+EXGW6E FsBAjKKNL0/U3M63HxXYlGp+zpeS7guqQ9PRwBLyApKyHQulebHsbLiqyV8Ka5FkwGdz MTGDpbQvjTJlD1XyiUv3+85cLH8h8OD+XhUKHkQSGW595TF8kyjQbyhYWWPc42z0wUh6 c4tqR8XVI2G7qyEgp6Yn5APF0Iv5pWOsvCjXVbXHWQYT7za2mphJImecjtTsXYxHHDMm 9voALYEaFSSiM1bDpXugs4AJNZhX35X9Dbtp/KAFnOX1W7XIcgGZ7wkA2xf4n+dmsx0/ xY6w== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482296; x=1735087096; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=AAmVgJjEX2zRkXc1WMFyYK6FynX6DxypvcjR34FvDLQ=; b=lswvriKREsHDw74AaBtkBfCw52hcy8JbEmPlcywG2seSDxjGAf0Vn6LyuPeqZLlsP3 jOxWmLGKT2kLW1y/C8OxG+4m0jtc7ox6OSW3CeAADNrNM1v8Oh+sPrGVWOBNHi37bOcT yuu0SqG1t8BQsnsMJlS0VfUJRXJSvJtzaxEbWLI4SM+KpluZQkJ2XDoXBsEFQhRehj4n BQnIMHZnpddsOSpbo7v/0nJQ7xSNIsd8+wR+vHS4amwzF3m5ZdQovVIg7sgpeT9KxYj0 hTF/9ThafbOl0Vr9FGN8uDgDtDxQv+L/FWrKh+UFCjbWl/hlEidYXyPt61fzFuIFWt7B Oyyg== X-Gm-Message-State: AOJu0YxwAWCamD/XmV5MO+uU9vs8MZAdVPATpvMlfids7o+Aqc/zOpCP Tnipr+U1g0rraYNu/XgXAtkpwiE8CWo5ZUSDz2V+VxK/LkYwXW7Ybwpx3+VmEi5jF213RfRZf2z m X-Gm-Gg: ASbGnctkTT7fDFa+qBm13GZ0TX+FniXI4LMP1kKxBFKeSy9KJtkyRO6raEV5LktYGja IaWK3R9YSYiXYA8dCe6W7T8Ta5H92kOKxZ1BcplCqkV0OFkdBqt+st4WxBnqoXocIEf3fdxwkq2 FAXg2lDF4J/H7oaU6v5s+d+TUHuEkAmz9Qy7zwxXP2hPoJsut3gAry8r9jQ/KcpJ2bZ/5G5eT1A wNW3ceinsIuv/bRpYd8ndIik2NHToVrqJYt4WDgEg== X-Google-Smtp-Source: AGHT+IGBYf6cB6qBpXVxmhnTAAXi9dKtoUVUeDwYV+80YBoMFlVtpBJajP377jN3aOi0ncdTXU6FdA== X-Received: by 2002:a05:6a20:4309:b0:1e0:d8c1:cfe2 with SMTP id adf61e73a8af0-1e5b487df65mr1755623637.34.1734482296299; Tue, 17 Dec 2024 16:38:16 -0800 (PST) Received: from localhost ([2a03:2880:ff:13::]) by smtp.gmail.com with ESMTPSA id d2e1a72fcca58-72918af0eb1sm7315461b3a.84.2024.12.17.16.38.15 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:15 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 17/20] io_uring/zcrx: throttle receive requests Date: Tue, 17 Dec 2024 16:37:43 -0800 Message-ID: <20241218003748.796939-18-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov io_zc_rx_tcp_recvmsg() continues until it fails or there is nothing to receive. If the other side sends fast enough, we might get stuck in io_zc_rx_tcp_recvmsg() producing more and more CQEs but not letting the user to handle them leading to unbound latencies. Break out of it based on an arbitrarily chosen limit, the upper layer will either return to userspace or requeue the request. Reviewed-by: Jens Axboe Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- io_uring/net.c | 2 ++ io_uring/zcrx.c | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index 5d8b9a016766..86eaba37e739 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -1267,6 +1267,8 @@ int io_recvzc(struct io_kiocb *req, unsigned int issue_flags) if (unlikely(ret <= 0) && ret != -EAGAIN) { if (ret == -ERESTARTSYS) ret = -EINTR; + if (ret == IOU_REQUEUE) + return IOU_REQUEUE; req_set_fail(req); io_req_set_res(req, ret, 0); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 756c78c0920e..ffa388fbb1e4 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -83,10 +83,13 @@ static int io_zcrx_map_area(struct io_zcrx_ifq *ifq, struct io_zcrx_area *area) #define IO_RQ_MAX_ENTRIES 32768 +#define IO_SKBS_PER_CALL_LIMIT 20 + struct io_zcrx_args { struct io_kiocb *req; struct io_zcrx_ifq *ifq; struct socket *sock; + unsigned nr_skbs; }; static const struct memory_provider_ops io_uring_pp_zc_ops; @@ -702,6 +705,9 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, int i, copy, end, off; int ret = 0; + if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT)) + return -EAGAIN; + start = skb_headlen(skb); start_off = offset; @@ -792,6 +798,9 @@ static int io_zcrx_tcp_recvmsg(struct io_kiocb *req, struct io_zcrx_ifq *ifq, ret = -ENOTCONN; else ret = -EAGAIN; + } else if (unlikely(args.nr_skbs > IO_SKBS_PER_CALL_LIMIT) && + (issue_flags & IO_URING_F_MULTISHOT)) { + ret = IOU_REQUEUE; } else if (sock_flag(sk, SOCK_DONE)) { /* Make it to retry until it finally gets 0. */ if (issue_flags & IO_URING_F_MULTISHOT) From patchwork Wed Dec 18 00:37:44 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912810 Received: from mail-pl1-f175.google.com (mail-pl1-f175.google.com [209.85.214.175]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 475791494BF for ; Wed, 18 Dec 2024 00:38:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.175 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482299; cv=none; b=R5IS/T4CCe6LpKanV+PB9NFOh30GxqygPhOoDwQJ0xw+d0hdbBtsJLhxpDjfd6hhkdUJSZBq0S79fjCIGfrO8ZTHApdlsRTJkEsCiunY+6O0iUlIly0shvf+Mo1DIS8EXIK/Yn2Z3+jUoj5bDYOG0T4gbA9ioAYlA6aI3tvc7IU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482299; c=relaxed/simple; bh=JKJzvHyt5sznHV1sOjLpEDcqYe2RMVMKXPgYpcTrlOo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cm8hJi0/Q3Dv4JF+a8lK7/YSrJZfahGhBAymkAWZX1jNDEDx0rIFBOO8G7u5wF0V/W0M5TDWFrLFrNzZ7Dhdt7Be41iwJaGw4UF293LN/mrehgrQ+ORdHbVsoIRNwdJvnmNiKoRCZTwVa3kj/sRLKr913GiiXlqX8xTdELYgP34= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=sxsESeaC; arc=none smtp.client-ip=209.85.214.175 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="sxsESeaC" Received: by mail-pl1-f175.google.com with SMTP id d9443c01a7336-21644aca3a0so70193855ad.3 for ; Tue, 17 Dec 2024 16:38:18 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482298; x=1735087098; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=IhSWTqbUE9kI0iLFqMR6WJU8T9mdHQ80JyDSL7z9+fU=; b=sxsESeaCtk1BxmPNQVI2iZdz3i6B1mNy1NDWWhbJPybuZdoSQz9SmnRFGyeLZOJQo3 HBrwO0neNELrip7Rc3+sEydqxUXFvzRrob7H5h+o9i/7JknmCydGKz5wyLc0ACVAl1dd 53aTHbvALxOPrBJrvXqsvriUAbUJwGBYsDDDEimqyHDWL3trP5434/8YjgqyultchfDA 6yaHyX68Hvo2ESo2q86+QnylAXUZQsRYzm7gRsgRi1yfFBBq8I56xkh0b3PLkyn6wCW3 4ingwzHd9Nec/M4prDTwZcu2YFvqgXusnIN3aE6FVDPRs6hB6OnQBUde8Dq2/w7ZiDrF L39Q== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482298; x=1735087098; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=IhSWTqbUE9kI0iLFqMR6WJU8T9mdHQ80JyDSL7z9+fU=; b=t+vRdfQ4FVkACkYe6vPlR0gviUFptxE6n2kCQ8fp74owxy4K1zRT16W01AaVS6ieVq dsr7eYw39xmbmLoaG7fqwT0807HNnchu3ylEc/BtvBkb8CqXzGjFpfnRd+oh54mR5Vji MG/H4NQQbXrBFgMUpF7d1yTJtB2CUZAjpjEs1cwxqiFcHo4gumOYstRDdgoL2Yww0f4m UKebLmcK/NBZ9z19npbVDZW2XOFLC0lh0oB3YXCAcpgIuWRMkSck4zfqrQgYIy1VyO8P qq0Dr8exS1oCjq/Ri7vu6INVnpQGsoRF2Y1yYOwctCMkUyZgsl4vIDAelIS/gp9z4ZID w2VA== X-Gm-Message-State: AOJu0Ywehk63h40QNeecj5sJZn/bxM6oqxfSujtNwMtmyw23LGZLuRWM u2xrNRPBvO6RuMf/0ggCjt0SHkUamD/mdqlIUP9kLEIpINxJFYRorovkh+V2GKFyPnoSmWfdYXD a X-Gm-Gg: ASbGncthj89KoZidLxXdtmFMf+C+kjmGOpcYY8eEG19yFK03Uj/jziWyYqB1L6DrhMd ee4EiRJffB4EKmwxr7emi/ThpuLxq7/C9MwV0InwDfJMoSTWPx0KjlcnMOAbvF6JqlFRUwYVGVg qje6UXuliMgX//2dRg+XXQlPnwDud4utGPilxgS2f5tG7M9B1ku0k73p/pLeCOKa/Hu3z28fVCO ZB5CX2x29B9Gtryiul6Nc2q5DOklHgeaPaVodUw X-Google-Smtp-Source: AGHT+IGue6sNC7s3+IMK5xCRtkElyUkrFt1nXyfkNZq489VEfSmxfnyMXPvKqFVicKTKOfcl3zjMmw== X-Received: by 2002:a17:902:db0f:b0:216:3083:d03d with SMTP id d9443c01a7336-218d7247a16mr12881395ad.44.1734482297508; Tue, 17 Dec 2024 16:38:17 -0800 (PST) Received: from localhost ([2a03:2880:ff:b::]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-218a1e728c1sm65113635ad.278.2024.12.17.16.38.16 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:17 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 18/20] io_uring/zcrx: add copy fallback Date: Tue, 17 Dec 2024 16:37:44 -0800 Message-ID: <20241218003748.796939-19-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 From: Pavel Begunkov There are scenarios in which the zerocopy path can get a kernel buffer instead of a net_iov and needs to copy it to the user, whether it is because of mis-steering or simply getting an skb with the linear part. In this case, grab a net_iov, copy into it and return it to the user as normally. At the moment the user doesn't get any indication whether there was a copy or not, which is left for follow up work. Signed-off-by: Pavel Begunkov Signed-off-by: David Wei --- io_uring/zcrx.c | 123 +++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 6 deletions(-) diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index ffa388fbb1e4..92b4d91f97f7 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -123,6 +124,13 @@ static void io_zcrx_get_niov_uref(struct net_iov *niov) atomic_inc(io_get_user_counter(niov)); } +static inline struct page *io_zcrx_iov_page(const struct net_iov *niov) +{ + struct io_zcrx_area *area = io_zcrx_iov_to_area(niov); + + return area->pages[net_iov_idx(niov)]; +} + static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx) { struct netdev_rx_queue *rxq; @@ -145,6 +153,7 @@ static int io_open_zc_rxq(struct io_zcrx_ifq *ifq, unsigned ifq_idx) ret = netdev_rx_queue_restart(ifq->dev, ifq->if_rxq); if (ret) goto fail; + return 0; fail: rxq->mp_params.mp_ops = NULL; @@ -453,6 +462,11 @@ static void io_zcrx_return_niov(struct net_iov *niov) { netmem_ref netmem = net_iov_to_netmem(niov); + if (!niov->pp) { + /* copy fallback allocated niovs */ + io_zcrx_return_niov_freelist(niov); + return; + } page_pool_put_unrefed_netmem(niov->pp, netmem, -1, false); } @@ -668,13 +682,95 @@ static bool io_zcrx_queue_cqe(struct io_kiocb *req, struct net_iov *niov, return true; } +static struct net_iov *io_zcrx_alloc_fallback(struct io_zcrx_area *area) +{ + struct net_iov *niov = NULL; + + spin_lock_bh(&area->freelist_lock); + if (area->free_count) + niov = __io_zcrx_get_free_niov(area); + spin_unlock_bh(&area->freelist_lock); + + if (niov) { + page_pool_fragment_netmem(net_iov_to_netmem(niov), 1); + page_pool_clear_pp_info(net_iov_to_netmem(niov)); + } + return niov; +} + +static ssize_t io_zcrx_copy_chunk(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + void *src_base, struct page *src_page, + unsigned int src_offset, size_t len) +{ + struct io_zcrx_area *area = ifq->area; + size_t copied = 0; + int ret = 0; + + while (len) { + size_t copy_size = min_t(size_t, PAGE_SIZE, len); + const int dst_off = 0; + struct net_iov *niov; + struct page *dst_page; + void *dst_addr; + + niov = io_zcrx_alloc_fallback(area); + if (!niov) { + ret = -ENOMEM; + break; + } + + dst_page = io_zcrx_iov_page(niov); + dst_addr = kmap_local_page(dst_page); + if (src_page) + src_base = kmap_local_page(src_page); + + memcpy(dst_addr, src_base + src_offset, copy_size); + + if (src_page) + kunmap_local(src_base); + kunmap_local(dst_addr); + + if (!io_zcrx_queue_cqe(req, niov, ifq, dst_off, copy_size)) { + io_zcrx_return_niov(niov); + ret = -ENOSPC; + break; + } + + io_zcrx_get_niov_uref(niov); + src_offset += copy_size; + len -= copy_size; + copied += copy_size; + } + + return copied ? copied : ret; +} + +static int io_zcrx_copy_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq, + const skb_frag_t *frag, int off, int len) +{ + struct page *page = skb_frag_page(frag); + u32 p_off, p_len, t, copied = 0; + int ret = 0; + + off += skb_frag_off(frag); + + skb_frag_foreach_page(frag, off, len, + page, p_off, p_len, t) { + ret = io_zcrx_copy_chunk(req, ifq, NULL, page, p_off, p_len); + if (ret < 0) + return copied ? copied : ret; + copied += ret; + } + return copied; +} + static int io_zcrx_recv_frag(struct io_kiocb *req, struct io_zcrx_ifq *ifq, const skb_frag_t *frag, int off, int len) { struct net_iov *niov; if (unlikely(!skb_frag_is_net_iov(frag))) - return -EOPNOTSUPP; + return io_zcrx_copy_frag(req, ifq, frag, off, len); niov = netmem_to_net_iov(frag->netmem); if (niov->pp->mp_ops != &io_uring_pp_zc_ops || @@ -701,18 +797,33 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, struct io_zcrx_ifq *ifq = args->ifq; struct io_kiocb *req = args->req; struct sk_buff *frag_iter; - unsigned start, start_off; + unsigned start, start_off = offset; int i, copy, end, off; int ret = 0; if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT)) return -EAGAIN; - start = skb_headlen(skb); - start_off = offset; + if (unlikely(offset < skb_headlen(skb))) { + ssize_t copied; + size_t to_copy; - if (offset < start) - return -EOPNOTSUPP; + to_copy = min_t(size_t, skb_headlen(skb) - offset, len); + copied = io_zcrx_copy_chunk(req, ifq, skb->data, NULL, + offset, to_copy); + if (copied < 0) { + ret = copied; + goto out; + } + offset += copied; + len -= copied; + if (!len) + goto out; + if (offset != skb_headlen(skb)) + goto out; + } + + start = skb_headlen(skb); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { const skb_frag_t *frag; From patchwork Wed Dec 18 00:37:45 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912811 Received: from mail-pf1-f177.google.com (mail-pf1-f177.google.com [209.85.210.177]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7D8B735954 for ; Wed, 18 Dec 2024 00:38:19 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.210.177 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482301; cv=none; b=QDrUbmONQMhCsGlNpPuFjShhqCEV80Zm8+FLMR9U8r11JvEk2GTK+mFbqPPzthgL2ihzw4PVfBXWvWR99fUskPvLgcDEr5ldkqm5YW2mTabZJm9uDJ7ftIjE9gvG2B8+WE01MTE5GDjlYnhGDSaPJQk+PwhwgSubgJkz5JhIUWk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482301; c=relaxed/simple; bh=Fti+Fru2O/FtINq8EuHguEm2wBZezE166gy2vOhc4kc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=ZqOX23c1zxdU1ET23OZ8qcz327sFJ79aIt7L3HvEykbIxa3jiop4ln+ZqBRdd9U8Os6y1O1fFD+/QBhfEv1vqbGfY2A5CMb+LvaWYYdq7RTTliOlRP/xRUBpLemYEoDVl1J7CENZdvX35rCTwh+iNr2o1uXVv1XYvi1I+bN+sVQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=qWMQZMCe; arc=none smtp.client-ip=209.85.210.177 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="qWMQZMCe" Received: by mail-pf1-f177.google.com with SMTP id d2e1a72fcca58-725e71a11f7so179180b3a.1 for ; Tue, 17 Dec 2024 16:38:19 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482299; x=1735087099; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=eCciZZc3yq3EVqcC94j9luI0Vp+16mZd5MSngMUL+ao=; b=qWMQZMCeCFZp+tnzEePTxEFyI8kaFjsUjB2ND+acwmZIn5WJNWQbMvjtv2n1ufkMgy sr1/abzkccIUi1dnWEVpgOy96uW130CN1lyb8SP5YdBWBAy0R96C4SsNHrwiF9f8Hyf0 Xz/0SFQuq+anMG3SIxn0K0SX7SXX5nW1gZX7FyfPuKRLnQG5G/Qmvo33odPoda5THiXM DnB9sQnyD4ryyh4p8xJj3IcYtqT7Syc+dSoXxcXB7kmgiMsxZbMRqY56jSRUKYfJHZCb QKXAWjA5PQGIfwDYx6Mx3+c6gNEY0E2/hgZXGCUVj9SCICw0ZOZ2XuIwytgenIp9DSur oU3A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482299; x=1735087099; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=eCciZZc3yq3EVqcC94j9luI0Vp+16mZd5MSngMUL+ao=; b=dr9rHzMX/Uk8SVlGWKsIb08p3VmFY+ccvwpE7qFXsKSY7yudRnOyjvgEQeiz8G1/o0 BopSaX9r+EL5CUx3ruk2ldB3R5exRUld1kf/9XFvxfSdCDBpVkSptBbd16NRGY8WTOBv r42LDAfaOyc+tyi9MzQQSDEEnsoxKKuxpmJ56drh013rT7PbLJ7FRufWUpkXyVY2ET2v Fv+dES171ak/rEcMqnpIo+LbyWKth86x5qna2YoMklD3FJGAoRLnLrsczhg/AjSiAELk QrGOr72kM4JF4OBnrkWH4JcLgLVg+LRb8G4SHj6Zrs4GYDq++pspPiXTpx9csPmJ5sgo MF/Q== X-Gm-Message-State: AOJu0YyNeybweAha+HNGwJmMZGmV3fnOcGQw29s1DVJF97jmgVS66N0A PsVdsHzGNeJA1MjCY43fKcujE+rjpAqeYdiEMVaiqq1mJhF+3LhzNHtMynqiKHQtYFtsETiRpos C X-Gm-Gg: ASbGnctFNM1iUHgPSpCYYeOOqsuKfhiZHdO71j63Hf5/ZluIwnNbWcFZuIJv7vTl5pc bUO2RpL5z0I8sghAm996/hrGDigJZ7CRBbioy/3rDbcFhJyMoFk6kV0yr1/qkl9FRp23c9K1mPI KDUvA1wjkYC45hqbr0yq+xulu9KggjjC6vJGPkG5a2By2yNtWuPH6G2+Favcd1yDlHlu2oagPPL jBwejKvG0BbFG/oATf6fmQtpF9LiNNfPxVy7GS5Fw== X-Google-Smtp-Source: AGHT+IE/aF0H+vfJ03/915bxOxU5FrppyJM5Zop0EY0U+bzqWnbOjPe7+5sXEtJSGHhhJjkk34drdQ== X-Received: by 2002:a05:6a20:6a0b:b0:1d8:c74d:1ca0 with SMTP id adf61e73a8af0-1e4e7782a9fmr7641838637.11.1734482298849; Tue, 17 Dec 2024 16:38:18 -0800 (PST) Received: from localhost ([2a03:2880:ff:21::]) by smtp.gmail.com with ESMTPSA id 41be03b00d2f7-801d5aaf567sm6416992a12.18.2024.12.17.16.38.18 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:18 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 19/20] net: add documentation for io_uring zcrx Date: Tue, 17 Dec 2024 16:37:45 -0800 Message-ID: <20241218003748.796939-20-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Add documentation for io_uring zero copy Rx that explains requirements and the user API. Signed-off-by: David Wei --- Documentation/networking/index.rst | 1 + Documentation/networking/iou-zcrx.rst | 201 ++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 Documentation/networking/iou-zcrx.rst diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 46c178e564b3..d6ce9c5f179c 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -63,6 +63,7 @@ Contents: gtp ila ioam6-sysctl + iou-zcrx ip_dynaddr ipsec ip-sysctl diff --git a/Documentation/networking/iou-zcrx.rst b/Documentation/networking/iou-zcrx.rst new file mode 100644 index 000000000000..7f6b7c072b59 --- /dev/null +++ b/Documentation/networking/iou-zcrx.rst @@ -0,0 +1,201 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +io_uring zero copy Rx +===================== + +Introduction +============ + +io_uring zero copy Rx (ZC Rx) is a feature that removes kernel-to-user copy on +the network receive path, allowing packet data to be received directly into +userspace memory. This feature is different to TCP_ZEROCOPY_RECEIVE in that +there are no strict alignment requirements and no need to mmap()/munmap(). +Compared to kernel bypass solutions such as e.g. DPDK, the packet headers are +processed by the kernel TCP stack as normal. + +NIC HW Requirements +=================== + +Several NIC HW features are required for io_uring ZC Rx to work. For now the +kernel API does not configure the NIC and it must be done by the user. + +Header/data split +----------------- + +Required to split packets at the L4 boundary into a header and a payload. +Headers are received into kernel memory as normal and processed by the TCP +stack as normal. Payloads are received into userspace memory directly. + +Flow steering +------------- + +Specific HW Rx queues are configured for this feature, but modern NICs +typically distribute flows across all HW Rx queues. Flow steering is required +to ensure that only desired flows are directed towards HW queues that are +configured for io_uring ZC Rx. + +RSS +--- + +In addition to flow steering above, RSS is required to steer all other non-zero +copy flows away from queues that are configured for io_uring ZC Rx. + +Usage +===== + +Setup NIC +--------- + +Must be done out of band for now. + +Ensure there are at least two queues:: + + ethtool -L eth0 combined 2 + +Enable header/data split:: + + ethtool -G eth0 tcp-data-split on + +Carve out half of the HW Rx queues for zero copy using RSS:: + + ethtool -X eth0 equal 1 + +Set up flow steering, bearing in mind that queues are 0-indexed:: + + ethtool -N eth0 flow-type tcp6 ... action 1 + +Setup io_uring +-------------- + +This section describes the low level io_uring kernel API. Please refer to +liburing documentation for how to use the higher level API. + +Create an io_uring instance with the following required setup flags:: + + IORING_SETUP_SINGLE_ISSUER + IORING_SETUP_DEFER_TASKRUN + IORING_SETUP_CQE32 + +Create memory area +------------------ + +Allocate userspace memory area for receiving zero copy data:: + + void *area_ptr = mmap(NULL, area_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, 0); + +Create refill ring +------------------ + +Allocate memory for a shared ringbuf used for returning consumed buffers:: + + void *ring_ptr = mmap(NULL, ring_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, 0); + +This refill ring consists of some space for the header, followed by an array of +``struct io_uring_zcrx_rqe``:: + + size_t rq_entries = 4096; + size_t ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe) + PAGE_SIZE; + /* align to page size */ + ring_size = (ring_size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1); + +Register ZC Rx +-------------- + +Fill in registration structs:: + + struct io_uring_zcrx_area_reg area_reg = { + .addr = (__u64)(unsigned long)area_ptr, + .len = area_size, + .flags = 0, + }; + + struct io_uring_region_desc region_reg = { + .user_addr = (__u64)(unsigned long)ring_ptr, + .size = ring_size, + .flags = IORING_MEM_REGION_TYPE_USER, + }; + + struct io_uring_zcrx_ifq_reg reg = { + .if_idx = if_nametoindex("eth0"), + /* this is the HW queue with desired flow steered into it */ + .if_rxq = 1, + .rq_entries = rq_entries, + .area_ptr = (__u64)(unsigned long)&area_reg, + .region_ptr = (__u64)(unsigned long)®ion_reg, + }; + +Register with kernel:: + + io_uring_register_ifq(ring, ®); + +Map refill ring +--------------- + +The kernel fills in fields for the refill ring in the registration ``struct +io_uring_zcrx_ifq_reg``. Map it into userspace:: + + struct io_uring_zcrx_rq refill_ring; + + refill_ring.khead = (unsigned *)((char *)ring_ptr + reg.offsets.head); + refill_ring.khead = (unsigned *)((char *)ring_ptr + reg.offsets.tail); + refill_ring.rqes = + (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes); + refill_ring.rq_tail = 0; + refill_ring.ring_ptr = ring_ptr; + +Receiving data +-------------- + +Prepare a zero copy recv request:: + + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, fd, NULL, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + +Now, submit and wait:: + + io_uring_submit_and_wait(ring, 1); + +Finally, process completions:: + + struct io_uring_cqe *cqe; + unsigned int count = 0; + unsigned int head; + + io_uring_for_each_cqe(ring, head, cqe) { + struct io_uring_zcrx_cqe *rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + + unsigned char *data = area_ptr + (rcqe->off & IORING_ZCRX_AREA_MASK); + /* do something with the data */ + + count++; + } + io_uring_cq_advance(ring, count); + +Recycling buffers +----------------- + +Return buffers back to the kernel to be used again:: + + struct io_uring_zcrx_rqe *rqe; + unsigned mask = refill_ring.ring_entries - 1; + rqe = &refill_ring.rqes[refill_ring.rq_tail & mask]; + + area_offset = rcqe->off & IORING_ZCRX_AREA_MASK; + rqe->off = area_offset | area_reg.rq_area_token; + rqe->len = cqe->res; + IO_URING_WRITE_ONCE(*refill_ring.ktail, ++refill_ring.rq_tail); + +Testing +======= + +See ``tools/testing/selftests/drivers/net/hw/iou-zcrx.c`` From patchwork Wed Dec 18 00:37:46 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: David Wei X-Patchwork-Id: 13912812 Received: from mail-pl1-f181.google.com (mail-pl1-f181.google.com [209.85.214.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 04229142900 for ; Wed, 18 Dec 2024 00:38:20 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=209.85.214.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482303; cv=none; b=GnQ66E5EAhmMV9hKvXGm2t5Hx7gT36+vuQhWdPYSvj3G3Au1WkjnrtfzzIMLYvpE5Rz9i5CQqIGfytogcL4RXcPdJi+5QDQCr0dUVLXlr31f2CjZg4c73d78ON4Wmdg4VWt4kd3iL5+C/xeTypFMGL38fj/ERLKJWut4S/JN8qI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1734482303; c=relaxed/simple; bh=lcMzOgRhe6foeYQtk6KkeLA/bpoDBr4lIE7RfzJXV8c=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Y/6QRAaJ3LrZurShAR4Ol1E3lvy+rTI2Dpig2Bwca3lKYhDfZV1BnQ8nRhUzA4TaQwNOc10flsXh7x3Ne30fn8YReb6mxfbyU636YWlCMIW2t3hGPgIbK7u8ejd1xQ+M869bfts3WLWnkA3C46y2uiJxMth+rRXhKOCiRx5EDx8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk; spf=none smtp.mailfrom=davidwei.uk; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b=shyj43gp; arc=none smtp.client-ip=209.85.214.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=none (p=none dis=none) header.from=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; spf=none smtp.mailfrom=davidwei.uk Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=davidwei-uk.20230601.gappssmtp.com header.i=@davidwei-uk.20230601.gappssmtp.com header.b="shyj43gp" Received: by mail-pl1-f181.google.com with SMTP id d9443c01a7336-215770613dbso40305365ad.2 for ; Tue, 17 Dec 2024 16:38:20 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=davidwei-uk.20230601.gappssmtp.com; s=20230601; t=1734482300; x=1735087100; darn=vger.kernel.org; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:from:to:cc:subject:date :message-id:reply-to; bh=Z8jcULxy4J4nEWnjbRQtdx9qrQ+/W+M/aJAJhHd6jtw=; b=shyj43gpJAFYGUrdxfEUsB5gzfN+gelcykMY7RpZEW6UDElgIeW1DqtW09/JR9sPLw xNYeLpdirKMHvhSx3/Jn2NkwlqmccwONHT+wYh/58aqF2MkIOFBBbOuXljhOAWZqgPv4 AIajGx+nYyT3TuXBEViz4BRKOxFOtGAp63a9TRGRZVACb7w9BPRwy31HwIgpyvr2mx1H d56hJh9U5Kff7pB3slS4h91WlL6F3VCeKzYnTvmSBbLU6sFf51cat3jOWKKSRHHqcCJN nIVY/9Vufc24itEA0ilSvrMFSLGdtbTC/4gW6MS9PaSs1Vmq4LaE/5SER3cAgvESvcAL qQnA== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1734482300; x=1735087100; h=content-transfer-encoding:mime-version:references:in-reply-to :message-id:date:subject:cc:to:from:x-gm-message-state:from:to:cc :subject:date:message-id:reply-to; bh=Z8jcULxy4J4nEWnjbRQtdx9qrQ+/W+M/aJAJhHd6jtw=; b=SLM7hwSggLQDRR0qdFE1e+YUlgPh6zT3LSwG4SQ3DTOnAjD41u35R6X4ge04wNicZ1 kJIuFeevxX2eH/S6D3v7yLJw6tOu71oRsuj51hI0/vaZKTikmkpBoSb6zyD3IA9KRu3u wwSNF8db3kL/DJwmpdkl8d2RVafrLZ4s4gVAtHw79ejlOXwvSsa/W7ZKy1kwpt84rEB8 Gkxt9/hzbLP9SuG+iKVqnpvmADNcotCuWr1OBcylz+s5ok606/JO0zZq7xSS3gkegViW qssbMsxFM4zCA2deZKGICbz/ilitqud4aJcIxYaloQuFYpVxjWoMKoc5Xf8Ln8gwteDs vHpg== X-Gm-Message-State: AOJu0YwM0yR5xsWR2SZjXDL12gQCZIQ3iXCGzYTlokIDdVtp7AWoQbwr gYNxIxprRWvnkxUHJLpc0i2MHKrh9qC2fM6858SlXToPlrzUfMA/+WfP4AEI3v3LRgKrGPAeYg+ 1 X-Gm-Gg: ASbGncsHRy7FSY44LJTNm08PZbIxd5KDYl+4FpOlNQsrVoI36qCHGNavGTVlVovXJfo Mr7qkZ0cM6VGlPnK5KehH217juuYW06WMazmSv9IxRsGkZYiRdWDl+khFBTo4ZTZ4OIZVhZN/mw H6Tvf+Aj85w6g2f6JOGCYwDoFPcDSqX4nNFtMRt6AkJthmWlSQ6xV+Q8h4O7NMuABTDFstOEQx3 FPSCfy75Iwgs08phOcmDdtYlALb+Rut55OyaLfg X-Google-Smtp-Source: AGHT+IHhbs8hnUkijpIBsA19GBm92dgIfwCbdbCV150kccfSg1woTlw1lhnJ56BZr04LHs9VOVNg3A== X-Received: by 2002:a17:902:e5ca:b0:215:4394:40b5 with SMTP id d9443c01a7336-218d724abe7mr10351165ad.43.1734482300179; Tue, 17 Dec 2024 16:38:20 -0800 (PST) Received: from localhost ([2a03:2880:ff:4::]) by smtp.gmail.com with ESMTPSA id d9443c01a7336-218a1e50aadsm65065375ad.156.2024.12.17.16.38.19 (version=TLS1_3 cipher=TLS_AES_256_GCM_SHA384 bits=256/256); Tue, 17 Dec 2024 16:38:19 -0800 (PST) From: David Wei To: io-uring@vger.kernel.org, netdev@vger.kernel.org Cc: Jens Axboe , Pavel Begunkov , Jakub Kicinski , Paolo Abeni , "David S. Miller" , Eric Dumazet , Jesper Dangaard Brouer , David Ahern , Mina Almasry , Stanislav Fomichev , Joe Damato , Pedro Tammela Subject: [PATCH net-next v9 20/20] io_uring/zcrx: add selftest Date: Tue, 17 Dec 2024 16:37:46 -0800 Message-ID: <20241218003748.796939-21-dw@davidwei.uk> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20241218003748.796939-1-dw@davidwei.uk> References: <20241218003748.796939-1-dw@davidwei.uk> Precedence: bulk X-Mailing-List: io-uring@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Add a selftest for io_uring zero copy Rx. This test cannot run locally and requires a remote host to be configured in net.config. The remote host must have hardware support for zero copy Rx as listed in the documentation page. The test will restore the NIC config back to before the test and is idempotent. liburing is required to compile the test and be installed on the remote host running the test. Signed-off-by: David Wei --- .../selftests/drivers/net/hw/.gitignore | 2 + .../testing/selftests/drivers/net/hw/Makefile | 6 + .../selftests/drivers/net/hw/iou-zcrx.c | 432 ++++++++++++++++++ .../selftests/drivers/net/hw/iou-zcrx.py | 64 +++ 4 files changed, 504 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/hw/iou-zcrx.c create mode 100755 tools/testing/selftests/drivers/net/hw/iou-zcrx.py diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore index e9fe6ede681a..6942bf575497 100644 --- a/tools/testing/selftests/drivers/net/hw/.gitignore +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -1 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +iou-zcrx ncdevmem diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 21ba64ce1e34..5431af8e8210 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -1,5 +1,7 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT +TEST_GEN_FILES = iou-zcrx + TEST_PROGS = \ csum.py \ devlink_port_split.py \ @@ -10,6 +12,7 @@ TEST_PROGS = \ ethtool_rmon.sh \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ + iou-zcrx.py \ loopback.sh \ nic_link_layer.py \ nic_performance.py \ @@ -38,3 +41,6 @@ include ../../../lib.mk # YNL build YNL_GENS := ethtool netdev include ../../../net/ynl.mk + +$(OUTPUT)/iou-zcrx: CFLAGS += -I/usr/include/ +$(OUTPUT)/iou-zcrx: LDLIBS += -luring diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c new file mode 100644 index 000000000000..12f71e3e111e --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define PAGE_SIZE (4096) +#define AREA_SIZE (8192 * PAGE_SIZE) +#define SEND_SIZE (512 * 4096) +#define min(a, b) \ + ({ \ + typeof(a) _a = (a); \ + typeof(b) _b = (b); \ + _a < _b ? _a : _b; \ + }) +#define min_t(t, a, b) \ + ({ \ + t _ta = (a); \ + t _tb = (b); \ + min(_ta, _tb); \ + }) + +#define ALIGN_UP(v, align) (((v) + (align) - 1) & ~((align) - 1)) + +static int cfg_family = PF_UNSPEC; +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static int cfg_payload_len; +static const char *cfg_ifname; +static int cfg_queue_id = -1; + +static socklen_t cfg_alen; +static struct sockaddr_storage cfg_addr; + +static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE))); +static void *area_ptr; +static void *ring_ptr; +static size_t ring_size; +static struct io_uring_zcrx_rq rq_ring; +static unsigned long area_token; +static int connfd; +static bool stop; +static size_t received; + +static unsigned long gettimeofday_ms(void) +{ + struct timeval tv; + + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000) + (tv.tv_usec / 1000); +} + +static inline size_t get_refill_ring_size(unsigned int rq_entries) +{ + size_t size; + + ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe); + /* add space for the header (head/tail/etc.) */ + ring_size += PAGE_SIZE; + return ALIGN_UP(ring_size, 4096); +} + +static void setup_zcrx(struct io_uring *ring) +{ + unsigned int ifindex; + unsigned int rq_entries = 4096; + int ret; + + ifindex = if_nametoindex(cfg_ifname); + if (!ifindex) + error(1, 0, "bad interface name: %s", cfg_ifname); + + area_ptr = mmap(NULL, + AREA_SIZE, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + if (area_ptr == MAP_FAILED) + error(1, 0, "mmap(): zero copy area"); + + ring_size = get_refill_ring_size(rq_entries); + ring_ptr = mmap(NULL, + ring_size, + PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE, + 0, + 0); + + struct io_uring_region_desc region_reg = { + .size = ring_size, + .user_addr = (__u64)(unsigned long)ring_ptr, + .flags = IORING_MEM_REGION_TYPE_USER, + }; + + struct io_uring_zcrx_area_reg area_reg = { + .addr = (__u64)(unsigned long)area_ptr, + .len = AREA_SIZE, + .flags = 0, + }; + + struct io_uring_zcrx_ifq_reg reg = { + .if_idx = ifindex, + .if_rxq = cfg_queue_id, + .rq_entries = rq_entries, + .area_ptr = (__u64)(unsigned long)&area_reg, + .region_ptr = (__u64)(unsigned long)®ion_reg, + }; + + ret = io_uring_register_ifq(ring, ®); + if (ret) + error(1, 0, "io_uring_register_ifq(): %d", ret); + + rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head); + rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail); + rq_ring.rqes = (struct io_uring_zcrx_rqe *)((char *)ring_ptr + reg.offsets.rqes); + rq_ring.rq_tail = 0; + rq_ring.ring_entries = reg.rq_entries; + + area_token = area_reg.rq_area_token; +} + +static void add_accept(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_accept(sqe, sockfd, NULL, NULL, 0); + sqe->user_data = 1; +} + +static void add_recvzc(struct io_uring *ring, int sockfd) +{ + struct io_uring_sqe *sqe; + + sqe = io_uring_get_sqe(ring); + + io_uring_prep_rw(IORING_OP_RECV_ZC, sqe, sockfd, NULL, 0, 0); + sqe->ioprio |= IORING_RECV_MULTISHOT; + sqe->user_data = 2; +} + +static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + if (cqe->res < 0) + error(1, 0, "accept()"); + if (connfd) + error(1, 0, "Unexpected second connection"); + + connfd = cqe->res; + add_recvzc(ring, connfd); +} + +static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe) +{ + unsigned rq_mask = rq_ring.ring_entries - 1; + struct io_uring_zcrx_cqe *rcqe; + struct io_uring_zcrx_rqe *rqe; + struct io_uring_sqe *sqe; + uint64_t mask; + char *data; + ssize_t n; + int i; + + if (cqe->res == 0 && cqe->flags == 0) { + stop = true; + return; + } + + if (cqe->res < 0) + error(1, 0, "recvzc(): %d", cqe->res); + + if (!(cqe->flags & IORING_CQE_F_MORE)) + add_recvzc(ring, connfd); + + rcqe = (struct io_uring_zcrx_cqe *)(cqe + 1); + + n = cqe->res; + mask = (1ULL << IORING_ZCRX_AREA_SHIFT) - 1; + data = (char *)area_ptr + (rcqe->off & mask); + + for (i = 0; i < n; i++) { + if (*(data + i) != payload[(received + i)]) + error(1, 0, "payload mismatch"); + } + received += n; + + rqe = &rq_ring.rqes[(rq_ring.rq_tail & rq_mask)]; + rqe->off = (rcqe->off & IORING_ZCRX_AREA_MASK) | area_token; + rqe->len = cqe->res; + IO_URING_WRITE_ONCE(*rq_ring.ktail, ++rq_ring.rq_tail); +} + +static void server_loop(struct io_uring *ring) +{ + struct io_uring_cqe *cqe; + unsigned int count = 0; + unsigned int head; + int i, ret; + + io_uring_submit_and_wait(ring, 1); + + io_uring_for_each_cqe(ring, head, cqe) { + if (cqe->user_data == 1) + process_accept(ring, cqe); + else if (cqe->user_data == 2) + process_recvzc(ring, cqe); + else + error(1, 0, "unknown cqe"); + count++; + } + io_uring_cq_advance(ring, count); +} + +static void run_server(void) +{ + unsigned int flags = 0; + struct io_uring ring; + int fd, enable, ret; + uint64_t tstop; + + fd = socket(cfg_family, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + enable = 1; + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(int)); + if (ret < 0) + error(1, 0, "setsockopt(SO_REUSEADDR)"); + + ret = bind(fd, (const struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + if (ret < 0) + error(1, 0, "bind()"); + + if (listen(fd, 1024) < 0) + error(1, 0, "listen()"); + + flags |= IORING_SETUP_COOP_TASKRUN; + flags |= IORING_SETUP_SINGLE_ISSUER; + flags |= IORING_SETUP_DEFER_TASKRUN; + flags |= IORING_SETUP_SUBMIT_ALL; + flags |= IORING_SETUP_CQE32; + + io_uring_queue_init(512, &ring, flags); + + setup_zcrx(&ring); + + add_accept(&ring, fd); + + tstop = gettimeofday_ms() + 5000; + while (!stop && gettimeofday_ms() < tstop) + server_loop(&ring); + + if (!stop) + error(1, 0, "test failed\n"); +} + +static void run_client(void) +{ + ssize_t to_send = SEND_SIZE; + ssize_t sent = 0; + ssize_t chunk, res; + int fd; + + fd = socket(cfg_family, SOCK_STREAM, 0); + if (fd == -1) + error(1, 0, "socket()"); + + if (connect(fd, (void *)&cfg_addr, cfg_alen)) + error(1, 0, "connect()"); + + while (to_send) { + void *src = &payload[sent]; + + chunk = min_t(ssize_t, cfg_payload_len, to_send); + res = send(fd, src, chunk, 0); + if (res < 0) + error(1, 0, "send(): %d", sent); + sent += res; + to_send -= res; + } + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h -p " + "-l -i -q", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + const int max_payload_len = sizeof(payload) - + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) - + 40 /* max tcp options */; + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + struct sockaddr_in *addr4 = (void *) &cfg_addr; + char *addr = NULL; + int c; + + if (argc <= 1) + usage(argv[0]); + cfg_payload_len = max_payload_len; + + while ((c = getopt(argc, argv, "46sch:p:l:i:q:")) != -1) { + switch (c) { + case '4': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET; + cfg_alen = sizeof(struct sockaddr_in); + break; + case '6': + if (cfg_family != PF_UNSPEC) + error(1, 0, "Pass one of -4 or -6"); + cfg_family = PF_INET6; + cfg_alen = sizeof(struct sockaddr_in6); + break; + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 0); + break; + case 'i': + cfg_ifname = optarg; + break; + case 'q': + cfg_queue_id = strtoul(optarg, NULL, 0); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Receiver cannot have -h specified"); + + switch (cfg_family) { + case PF_INET: + memset(addr4, 0, sizeof(*addr4)); + addr4->sin_family = AF_INET; + addr4->sin_port = htons(cfg_port); + addr4->sin_addr.s_addr = htonl(INADDR_ANY); + + if (addr && + inet_pton(AF_INET, addr, &(addr4->sin_addr)) != 1) + error(1, 0, "ipv4 parse error: %s", addr); + break; + case PF_INET6: + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + + if (addr && + inet_pton(AF_INET6, addr, &(addr6->sin6_addr)) != 1) + error(1, 0, "ipv6 parse error: %s", addr); + break; + default: + error(1, 0, "illegal domain"); + } + + if (cfg_payload_len > max_payload_len) + error(1, 0, "-l: payload exceeds max (%d)", max_payload_len); +} + +int main(int argc, char **argv) +{ + const char *cfg_test = argv[argc - 1]; + int i; + + parse_opts(argc, argv); + + for (i = 0; i < SEND_SIZE; i++) + payload[i] = 'a' + (i % 26); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py new file mode 100755 index 000000000000..3998d0ad504f --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from os import path +from lib.py import ksft_run, ksft_exit +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, wait_port_listen + + +def _get_rx_ring_entries(cfg): + eth_cmd = "ethtool -g {} | awk '/RX:/ {{count++}} count == 2 {{print $2; exit}}'" + res = cmd(eth_cmd.format(cfg.ifname), host=cfg.remote) + return int(res.stdout) + + +def _get_combined_channels(cfg): + eth_cmd = "ethtool -l {} | awk '/Combined:/ {{count++}} count == 2 {{print $2; exit}}'" + res = cmd(eth_cmd.format(cfg.ifname), host=cfg.remote) + return int(res.stdout) + + +def _set_flow_rule(cfg, chan): + eth_cmd = "ethtool -N {} flow-type tcp6 dst-port 9999 action {} | awk '{{print $NF}}'" + res = cmd(eth_cmd.format(cfg.ifname, chan), host=cfg.remote) + return int(res.stdout) + + +def test_zcrx(cfg) -> None: + cfg.require_v6() + cfg.require_cmd("awk", remote=True) + + combined_chans = _get_combined_channels(cfg) + if combined_chans < 2: + raise KsftSkipEx('at least 2 combined channels required') + rx_ring = _get_rx_ring_entries(cfg) + + rx_cmd = f"{cfg.bin_remote} -6 -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_local} -6 -c -h {cfg.remote_v6} -p 9999 -l 12840" + + try: + cmd(f"ethtool -G {cfg.ifname} rx 64", host=cfg.remote) + cmd(f"ethtool -X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) + flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) + + with bkg(rx_cmd, host=cfg.remote, exit_wait=True): + wait_port_listen(9999, proto="tcp", host=cfg.remote) + cmd(tx_cmd) + finally: + cmd(f"ethtool -N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + cmd(f"ethtool -X {cfg.ifname} default", host=cfg.remote) + cmd(f"ethtool -G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx") + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main()