diff mbox series

[v1,1/2] netdevsim: add NAPI support

Message ID 20240416051527.1657233-2-dw@davidwei.uk (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series netdevsim: add NAPI support | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Guessed tree name to be net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 926 this patch: 926
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: bpf@vger.kernel.org
netdev/build_clang success Errors and warnings before: 937 this patch: 937
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 937 this patch: 937
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 315 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

David Wei April 16, 2024, 5:15 a.m. UTC
Add NAPI support to netdevim, similar to veth.

* Add a nsim_rq rx queue structure to hold a NAPI instance and a skb
  queue.
* During xmit, store the skb in the peer skb queue and schedule NAPI.
* During napi_poll(), drain the skb queue and pass up the stack.
* Add assoc between rxq and NAPI instance using netif_queue_set_napi().

Signed-off-by: David Wei <dw@davidwei.uk>
---
 drivers/net/netdevsim/netdev.c    | 227 ++++++++++++++++++++++++++++--
 drivers/net/netdevsim/netdevsim.h |   7 +
 2 files changed, 223 insertions(+), 11 deletions(-)

Comments

Jakub Kicinski April 17, 2024, 12:27 a.m. UTC | #1
On Mon, 15 Apr 2024 22:15:26 -0700 David Wei wrote:
> Add NAPI support to netdevim, similar to veth.
> 
> * Add a nsim_rq rx queue structure to hold a NAPI instance and a skb
>   queue.
> * During xmit, store the skb in the peer skb queue and schedule NAPI.
> * During napi_poll(), drain the skb queue and pass up the stack.
> * Add assoc between rxq and NAPI instance using netif_queue_set_napi().

> +#define NSIM_RING_SIZE		256
> +
> +static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
> +{
> +	if (list_count_nodes(&rq->skb_queue) > NSIM_RING_SIZE) {
> +		dev_kfree_skb_any(skb);
> +		return NET_RX_DROP;
> +	}
> +
> +	list_add_tail(&skb->list, &rq->skb_queue);

Why not use struct sk_buff_head ?
It has a purge helper for freeing, and remembers its own length.

> +static int nsim_poll(struct napi_struct *napi, int budget)
> +{
> +	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
> +	int done;
> +
> +	done = nsim_rcv(rq, budget);
> +
> +	if (done < budget && napi_complete_done(napi, done)) {
> +		if (unlikely(!list_empty(&rq->skb_queue)))
> +			napi_schedule(&rq->napi);

I think you can drop the re-check, NAPI has a built in protection 
for this kind of race.

> +	}
> +
> +	return done;
> +}

>  static int nsim_open(struct net_device *dev)
>  {
>  	struct netdevsim *ns = netdev_priv(dev);
> -	struct page_pool_params pp = { 0 };
> +	int err;
> +
> +	err = nsim_init_napi(ns);
> +	if (err)
> +		return err;
> +
> +	nsim_enable_napi(ns);
>  
> -	pp.pool_size = 128;
> -	pp.dev = &dev->dev;
> -	pp.dma_dir = DMA_BIDIRECTIONAL;
> -	pp.netdev = dev;
> +	netif_carrier_on(dev);

Why the carrier? It's on by default.
Should be a separate patch if needed.

> -	ns->pp = page_pool_create(&pp);
> -	return PTR_ERR_OR_ZERO(ns->pp);
> +	return 0;
> +}

> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
> index 7664ab823e29..87bf45ec4dd2 100644
> --- a/drivers/net/netdevsim/netdevsim.h
> +++ b/drivers/net/netdevsim/netdevsim.h
> @@ -90,11 +90,18 @@ struct nsim_ethtool {
>  	struct ethtool_fecparam fec;
>  };
>  
> +struct nsim_rq {
> +	struct napi_struct napi;
> +	struct list_head skb_queue;
> +	struct page_pool *page_pool;

You added the new page_pool pointer but didn't delete the one
I added recently to the device?

> +};
> +
>  struct netdevsim {
>  	struct net_device *netdev;
>  	struct nsim_dev *nsim_dev;
>  	struct nsim_dev_port *nsim_dev_port;
>  	struct mock_phc *phc;
> +	struct nsim_rq *rq;
>  
>  	u64 tx_packets;
>  	u64 tx_bytes;
David Wei April 19, 2024, 4:08 p.m. UTC | #2
On 2024-04-16 5:27 pm, Jakub Kicinski wrote:
> On Mon, 15 Apr 2024 22:15:26 -0700 David Wei wrote:
>> Add NAPI support to netdevim, similar to veth.
>>
>> * Add a nsim_rq rx queue structure to hold a NAPI instance and a skb
>>   queue.
>> * During xmit, store the skb in the peer skb queue and schedule NAPI.
>> * During napi_poll(), drain the skb queue and pass up the stack.
>> * Add assoc between rxq and NAPI instance using netif_queue_set_napi().
> 
>> +#define NSIM_RING_SIZE		256
>> +
>> +static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
>> +{
>> +	if (list_count_nodes(&rq->skb_queue) > NSIM_RING_SIZE) {
>> +		dev_kfree_skb_any(skb);
>> +		return NET_RX_DROP;
>> +	}
>> +
>> +	list_add_tail(&skb->list, &rq->skb_queue);
> 
> Why not use struct sk_buff_head ?
> It has a purge helper for freeing, and remembers its own length.

Didn't know about sk_buff_head! Thanks, it's much better.

> 
>> +static int nsim_poll(struct napi_struct *napi, int budget)
>> +{
>> +	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
>> +	int done;
>> +
>> +	done = nsim_rcv(rq, budget);
>> +
>> +	if (done < budget && napi_complete_done(napi, done)) {
>> +		if (unlikely(!list_empty(&rq->skb_queue)))
>> +			napi_schedule(&rq->napi);
> 
> I think you can drop the re-check, NAPI has a built in protection 
> for this kind of race.

Would veth also want this dropped, or does it serve a different purpose
there?

> 
>> +	}
>> +
>> +	return done;
>> +}
> 
>>  static int nsim_open(struct net_device *dev)
>>  {
>>  	struct netdevsim *ns = netdev_priv(dev);
>> -	struct page_pool_params pp = { 0 };
>> +	int err;
>> +
>> +	err = nsim_init_napi(ns);
>> +	if (err)
>> +		return err;
>> +
>> +	nsim_enable_napi(ns);
>>  
>> -	pp.pool_size = 128;
>> -	pp.dev = &dev->dev;
>> -	pp.dma_dir = DMA_BIDIRECTIONAL;
>> -	pp.netdev = dev;
>> +	netif_carrier_on(dev);
> 
> Why the carrier? It's on by default.
> Should be a separate patch if needed.

Symmetry, not sure if it was needed or not. Will remove.

> 
>> -	ns->pp = page_pool_create(&pp);
>> -	return PTR_ERR_OR_ZERO(ns->pp);
>> +	return 0;
>> +}
> 
>> diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
>> index 7664ab823e29..87bf45ec4dd2 100644
>> --- a/drivers/net/netdevsim/netdevsim.h
>> +++ b/drivers/net/netdevsim/netdevsim.h
>> @@ -90,11 +90,18 @@ struct nsim_ethtool {
>>  	struct ethtool_fecparam fec;
>>  };
>>  
>> +struct nsim_rq {
>> +	struct napi_struct napi;
>> +	struct list_head skb_queue;
>> +	struct page_pool *page_pool;
> 
> You added the new page_pool pointer but didn't delete the one
> I added recently to the device?

Yeah, sorry that slipped through the rebase.

> 
>> +};
>> +
>>  struct netdevsim {
>>  	struct net_device *netdev;
>>  	struct nsim_dev *nsim_dev;
>>  	struct nsim_dev_port *nsim_dev_port;
>>  	struct mock_phc *phc;
>> +	struct nsim_rq *rq;
>>  
>>  	u64 tx_packets;
>>  	u64 tx_bytes;
>
diff mbox series

Patch

diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c
index d127856f8f36..c1a99825be91 100644
--- a/drivers/net/netdevsim/netdev.c
+++ b/drivers/net/netdevsim/netdev.c
@@ -28,11 +28,33 @@ 
 
 #include "netdevsim.h"
 
+#define NSIM_RING_SIZE		256
+
+static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
+{
+	if (list_count_nodes(&rq->skb_queue) > NSIM_RING_SIZE) {
+		dev_kfree_skb_any(skb);
+		return NET_RX_DROP;
+	}
+
+	list_add_tail(&skb->list, &rq->skb_queue);
+	return NET_RX_SUCCESS;
+}
+
+static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
+			    struct nsim_rq *rq)
+{
+	return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb);
+}
+
 static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
+	struct net_device *peer_dev;
 	unsigned int len = skb->len;
 	struct netdevsim *peer_ns;
+	struct nsim_rq *rq;
+	int rxq;
 
 	rcu_read_lock();
 	if (!nsim_ipsec_tx(ns, skb))
@@ -42,10 +64,18 @@  static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (!peer_ns)
 		goto out_drop_free;
 
+	peer_dev = peer_ns->netdev;
+	rxq = skb_get_queue_mapping(skb);
+	if (rxq >= peer_dev->num_rx_queues)
+		rxq = rxq % peer_dev->num_rx_queues;
+	rq = &peer_ns->rq[rxq];
+
 	skb_tx_timestamp(skb);
-	if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP))
+	if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP))
 		goto out_drop_cnt;
 
+	napi_schedule(&rq->napi);
+
 	rcu_read_unlock();
 	u64_stats_update_begin(&ns->syncp);
 	ns->tx_packets++;
@@ -300,25 +330,153 @@  static int nsim_get_iflink(const struct net_device *dev)
 	return iflink;
 }
 
+static int nsim_rcv(struct nsim_rq *rq, int budget)
+{
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < budget; i++) {
+		if (list_empty(&rq->skb_queue))
+			break;
+
+		skb = list_first_entry(&rq->skb_queue, struct sk_buff, list);
+		list_del(&skb->list);
+
+		netif_receive_skb(skb);
+	}
+
+	return i;
+}
+
+static int nsim_poll(struct napi_struct *napi, int budget)
+{
+	struct nsim_rq *rq = container_of(napi, struct nsim_rq, napi);
+	int done;
+
+	done = nsim_rcv(rq, budget);
+
+	if (done < budget && napi_complete_done(napi, done)) {
+		if (unlikely(!list_empty(&rq->skb_queue)))
+			napi_schedule(&rq->napi);
+	}
+
+	return done;
+}
+
+static int nsim_create_page_pool(struct nsim_rq *rq)
+{
+	struct page_pool_params p = {
+		.order = 0,
+		.pool_size = NSIM_RING_SIZE,
+		.nid = NUMA_NO_NODE,
+		.dev = &rq->napi.dev->dev,
+		.napi = &rq->napi,
+		.dma_dir = DMA_BIDIRECTIONAL,
+		.netdev = rq->napi.dev,
+	};
+
+	rq->page_pool = page_pool_create(&p);
+	if (IS_ERR(rq->page_pool)) {
+		int err = PTR_ERR(rq->page_pool);
+
+		rq->page_pool = NULL;
+		return err;
+	}
+	return 0;
+}
+
+static int nsim_init_napi(struct netdevsim *ns)
+{
+	struct net_device *dev = ns->netdev;
+	struct nsim_rq *rq;
+	int err, i;
+
+	for (i = 0; i < dev->num_rx_queues; i++) {
+		rq = &ns->rq[i];
+
+		netif_napi_add(dev, &rq->napi, nsim_poll);
+	}
+
+	for (i = 0; i < dev->num_rx_queues; i++) {
+		rq = &ns->rq[i];
+
+		err = nsim_create_page_pool(rq);
+		if (err)
+			goto err_pp_destroy;
+	}
+
+	return 0;
+
+err_pp_destroy:
+	while (i--) {
+		page_pool_destroy(ns->rq[i].page_pool);
+		ns->rq[i].page_pool = NULL;
+	}
+
+	for (i = 0; i < dev->num_rx_queues; i++)
+		__netif_napi_del(&ns->rq[i].napi);
+
+	return err;
+}
+
+static void nsim_enable_napi(struct netdevsim *ns)
+{
+	int i;
+
+	for (i = 0; i < ns->netdev->num_rx_queues; i++) {
+		struct nsim_rq *rq = &ns->rq[i];
+
+		netif_queue_set_napi(ns->netdev, i,
+				     NETDEV_QUEUE_TYPE_RX, &rq->napi);
+		napi_enable(&rq->napi);
+	}
+}
+
 static int nsim_open(struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
-	struct page_pool_params pp = { 0 };
+	int err;
+
+	err = nsim_init_napi(ns);
+	if (err)
+		return err;
+
+	nsim_enable_napi(ns);
 
-	pp.pool_size = 128;
-	pp.dev = &dev->dev;
-	pp.dma_dir = DMA_BIDIRECTIONAL;
-	pp.netdev = dev;
+	netif_carrier_on(dev);
 
-	ns->pp = page_pool_create(&pp);
-	return PTR_ERR_OR_ZERO(ns->pp);
+	return 0;
+}
+
+static void nsim_del_napi(struct netdevsim *ns)
+{
+	struct net_device *dev = ns->netdev;
+	int i;
+
+	for (i = 0; i < dev->num_rx_queues; i++) {
+		struct nsim_rq *rq = &ns->rq[i];
+
+		napi_disable(&rq->napi);
+		__netif_napi_del(&rq->napi);
+	}
+	synchronize_net();
+
+	for (i = 0; i < dev->num_rx_queues; i++) {
+		page_pool_destroy(ns->rq[i].page_pool);
+		ns->rq[i].page_pool = NULL;
+	}
 }
 
 static int nsim_stop(struct net_device *dev)
 {
 	struct netdevsim *ns = netdev_priv(dev);
+	struct netdevsim *peer = rtnl_dereference(ns->peer);
+
+	netif_carrier_off(dev);
+	if (peer)
+		netif_carrier_off(peer->netdev);
 
-	page_pool_destroy(ns->pp);
+	nsim_del_napi(ns);
 
 	return 0;
 }
@@ -437,7 +595,7 @@  nsim_pp_hold_write(struct file *file, const char __user *data,
 	if (!netif_running(ns->netdev) && val) {
 		ret = -ENETDOWN;
 	} else if (val) {
-		ns->page = page_pool_dev_alloc_pages(ns->pp);
+		ns->page = page_pool_dev_alloc_pages(ns->rq[0].page_pool);
 		if (!ns->page)
 			ret = -ENOMEM;
 	} else {
@@ -477,6 +635,46 @@  static void nsim_setup(struct net_device *dev)
 	dev->xdp_features = NETDEV_XDP_ACT_HW_OFFLOAD;
 }
 
+static int nsim_queue_init(struct netdevsim *ns)
+{
+	struct net_device *dev = ns->netdev;
+	int i;
+
+	ns->rq = kvcalloc(dev->num_rx_queues, sizeof(*ns->rq),
+			  GFP_KERNEL_ACCOUNT | __GFP_RETRY_MAYFAIL);
+	if (!ns->rq)
+		return -ENOMEM;
+
+	for (i = 0; i < dev->num_rx_queues; i++)
+		INIT_LIST_HEAD(&ns->rq[i].skb_queue);
+
+	return 0;
+}
+
+static void __nsim_skb_queue_purge(struct list_head *head)
+{
+	struct sk_buff *skb, *tmp;
+
+	list_for_each_entry_safe(skb, tmp, head, list) {
+		list_del(&skb->list);
+		dev_kfree_skb_any(skb);
+	}
+}
+
+static void nsim_queue_free(struct netdevsim *ns)
+{
+	struct net_device *dev = ns->netdev;
+	int i;
+
+	for (i = 0; i < dev->num_rx_queues; i++) {
+		if (!list_empty(&ns->rq[i].skb_queue))
+			__nsim_skb_queue_purge(&ns->rq[i].skb_queue);
+	}
+
+	kvfree(ns->rq);
+	ns->rq = NULL;
+}
+
 static int nsim_init_netdevsim(struct netdevsim *ns)
 {
 	struct mock_phc *phc;
@@ -495,10 +693,14 @@  static int nsim_init_netdevsim(struct netdevsim *ns)
 		goto err_phc_destroy;
 
 	rtnl_lock();
-	err = nsim_bpf_init(ns);
+	err = nsim_queue_init(ns);
 	if (err)
 		goto err_utn_destroy;
 
+	err = nsim_bpf_init(ns);
+	if (err)
+		goto err_rq_destroy;
+
 	nsim_macsec_init(ns);
 	nsim_ipsec_init(ns);
 
@@ -512,6 +714,8 @@  static int nsim_init_netdevsim(struct netdevsim *ns)
 	nsim_ipsec_teardown(ns);
 	nsim_macsec_teardown(ns);
 	nsim_bpf_uninit(ns);
+err_rq_destroy:
+	nsim_queue_free(ns);
 err_utn_destroy:
 	rtnl_unlock();
 	nsim_udp_tunnels_info_destroy(ns->netdev);
@@ -594,6 +798,7 @@  void nsim_destroy(struct netdevsim *ns)
 		nsim_ipsec_teardown(ns);
 		nsim_bpf_uninit(ns);
 	}
+	nsim_queue_free(ns);
 	rtnl_unlock();
 	if (nsim_dev_port_is_pf(ns->nsim_dev_port))
 		nsim_exit_netdevsim(ns);
diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h
index 7664ab823e29..87bf45ec4dd2 100644
--- a/drivers/net/netdevsim/netdevsim.h
+++ b/drivers/net/netdevsim/netdevsim.h
@@ -90,11 +90,18 @@  struct nsim_ethtool {
 	struct ethtool_fecparam fec;
 };
 
+struct nsim_rq {
+	struct napi_struct napi;
+	struct list_head skb_queue;
+	struct page_pool *page_pool;
+};
+
 struct netdevsim {
 	struct net_device *netdev;
 	struct nsim_dev *nsim_dev;
 	struct nsim_dev_port *nsim_dev_port;
 	struct mock_phc *phc;
+	struct nsim_rq *rq;
 
 	u64 tx_packets;
 	u64 tx_bytes;