diff mbox series

[iwl-net,11/11] ice: protect ring configuration with a mutex

Message ID 20240528131429.3012910-12-maciej.fijalkowski@intel.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series ice: fix AF_XDP ZC timeout and concurrency issues | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/apply fail Patch does not apply to net-0

Commit Message

Maciej Fijalkowski May 28, 2024, 1:14 p.m. UTC
From: Larysa Zaremba <larysa.zaremba@intel.com>

Add a ring_lock mutex to protect sections, where software rings are
affected. Particularly, to prevent system crash, when tx_timeout
and .ndo_bpf() happen at the same time.

Fixes: 2d4238f55697 ("ice: Add support for AF_XDP")
Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h      |  2 ++
 drivers/net/ethernet/intel/ice/ice_lib.c  | 23 ++++++++++---
 drivers/net/ethernet/intel/ice/ice_main.c | 39 ++++++++++++++++++++---
 drivers/net/ethernet/intel/ice/ice_xsk.c  | 13 ++------
 4 files changed, 57 insertions(+), 20 deletions(-)

Comments

Larysa Zaremba May 28, 2024, 2:11 p.m. UTC | #1
On Tue, May 28, 2024 at 03:14:29PM +0200, Maciej Fijalkowski wrote:
> From: Larysa Zaremba <larysa.zaremba@intel.com>
> 
> Add a ring_lock mutex to protect sections, where software rings are
> affected. Particularly, to prevent system crash, when tx_timeout
> and .ndo_bpf() happen at the same time.
> 
> Fixes: 2d4238f55697 ("ice: Add support for AF_XDP")
> Signed-off-by: Larysa Zaremba <larysa.zaremba@intel.com>

This is not the latest version of my patches. Code is the same, but newer 
version has better patch division and more relevant commit messages. Maciej will 
update the patches in v2.

> ---
>  drivers/net/ethernet/intel/ice/ice.h      |  2 ++
>  drivers/net/ethernet/intel/ice/ice_lib.c  | 23 ++++++++++---
>  drivers/net/ethernet/intel/ice/ice_main.c | 39 ++++++++++++++++++++---
>  drivers/net/ethernet/intel/ice/ice_xsk.c  | 13 ++------
>  4 files changed, 57 insertions(+), 20 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
> index 701a61d791dd..7c1e24afa34b 100644
> --- a/drivers/net/ethernet/intel/ice/ice.h
> +++ b/drivers/net/ethernet/intel/ice/ice.h
> @@ -307,6 +307,7 @@ enum ice_pf_state {
>  	ICE_PHY_INIT_COMPLETE,
>  	ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
>  	ICE_AUX_ERR_PENDING,
> +	ICE_RTNL_WAITS_FOR_RESET,
>  	ICE_STATE_NBITS		/* must be last */
>  };
>  
> @@ -941,6 +942,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
>  			  enum ice_xdp_cfg cfg_type);
>  int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
>  void ice_map_xdp_rings(struct ice_vsi *vsi);
> +bool ice_rebuild_pending(struct ice_vsi *vsi);
>  int
>  ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
>  	     u32 flags);
> diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> index 7629b0190578..a5dc6fc6e63d 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> @@ -2426,7 +2426,10 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
>  		dev_err(ice_pf_to_dev(pf), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
>  			vsi->vsi_num, err);
>  
> -	if (ice_is_xdp_ena_vsi(vsi))
> +	/* xdp_rings can be absent, if program was attached amid reset,
> +	 * VSI rebuild is supposed to create them later
> +	 */
> +	if (ice_is_xdp_ena_vsi(vsi) && vsi->xdp_rings)
>  		/* return value check can be skipped here, it always returns
>  		 * 0 if reset is in progress
>  		 */
> @@ -2737,12 +2740,24 @@ ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
>  	if (current_work() == &pf->serv_task ||
>  	    test_bit(ICE_PREPARED_FOR_RESET, pf->state) ||
>  	    test_bit(ICE_DOWN, pf->state) ||
> -	    test_bit(ICE_SUSPENDED, pf->state))
> +	    test_bit(ICE_SUSPENDED, pf->state)) {
> +		bool rtnl_held_here = true;
> +
> +		while (!rtnl_trylock()) {
> +			if (test_bit(ICE_RTNL_WAITS_FOR_RESET, pf->state)) {
> +				rtnl_held_here = false;
> +				break;
> +			}
> +			usleep_range(1000, 2000);
> +		}
>  		__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
> -				     false);
> -	else
> +				     true);
> +		if (rtnl_held_here)
> +			rtnl_unlock();
> +	} else {
>  		__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
>  				     true);
> +	}
>  }
>  
>  /**
> diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
> index 15a6805ac2a1..7724ed8fc1b1 100644
> --- a/drivers/net/ethernet/intel/ice/ice_main.c
> +++ b/drivers/net/ethernet/intel/ice/ice_main.c
> @@ -2986,6 +2986,20 @@ static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
>  		return ICE_RXBUF_3072;
>  }
>  
> +/**
> + * ice_rebuild_pending - ice_vsi_rebuild will be performed, when locks are released
> + * @vsi: VSI to setup XDP for
> + *
> + * ice_vsi_close() in the reset path is called under rtnl_lock(),
> + * so it happened strictly before or after .ndo_bpf().
> + * In case it has happened before, we do not have anything attached to rings
> + */
> +bool ice_rebuild_pending(struct ice_vsi *vsi)
> +{
> +	return ice_is_reset_in_progress(vsi->back->state) &&
> +	       !vsi->rx_rings[0]->desc;
> +}
> +
>  /**
>   * ice_xdp_setup_prog - Add or remove XDP eBPF program
>   * @vsi: VSI to setup XDP for
> @@ -3009,7 +3023,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
>  	}
>  
>  	/* hot swap progs and avoid toggling link */
> -	if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
> +	if (ice_is_xdp_ena_vsi(vsi) == !!prog || ice_rebuild_pending(vsi)) {
>  		ice_vsi_assign_bpf_prog(vsi, prog);
>  		return 0;
>  	}
> @@ -3081,21 +3095,33 @@ static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
>  {
>  	struct ice_netdev_priv *np = netdev_priv(dev);
>  	struct ice_vsi *vsi = np->vsi;
> +	struct ice_pf *pf = vsi->back;
> +	int ret;
>  
>  	if (vsi->type != ICE_VSI_PF) {
>  		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
>  		return -EINVAL;
>  	}
>  
> +	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
> +		set_bit(ICE_RTNL_WAITS_FOR_RESET, pf->state);
> +		usleep_range(1000, 2000);
> +	}
> +	clear_bit(ICE_RTNL_WAITS_FOR_RESET, pf->state);
> +
>  	switch (xdp->command) {
>  	case XDP_SETUP_PROG:
> -		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
> +		ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
> +		break;
>  	case XDP_SETUP_XSK_POOL:
> -		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
> -					  xdp->xsk.queue_id);
> +		ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id);
> +		break;
>  	default:
> -		return -EINVAL;
> +		ret = -EINVAL;
>  	}
> +
> +	clear_bit(ICE_CFG_BUSY, pf->state);
> +	return ret;
>  }
>  
>  /**
> @@ -7672,7 +7698,10 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
>  		ice_gnss_init(pf);
>  
>  	/* rebuild PF VSI */
> +	while (test_and_set_bit(ICE_CFG_BUSY, pf->state))
> +		usleep_range(1000, 2000);
>  	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
> +	clear_bit(ICE_CFG_BUSY, pf->state);
>  	if (err) {
>  		dev_err(dev, "PF VSI rebuild failed: %d\n", err);
>  		goto err_vsi_rebuild;
> diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
> index 225d027d3d7a..962af14f9fd5 100644
> --- a/drivers/net/ethernet/intel/ice/ice_xsk.c
> +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
> @@ -370,7 +370,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
>  {
>  	bool if_running, pool_present = !!pool;
>  	int ret = 0, pool_failure = 0;
> -	struct ice_pf *pf = vsi->back;
>  
>  	if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
>  		netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
> @@ -378,18 +377,11 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
>  		goto failure;
>  	}
>  
> -	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
> +	if_running = !ice_rebuild_pending(vsi) &&
> +		     (netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi));
>  
>  	if (if_running) {
>  		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
> -		int timeout = 50;
> -
> -		while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
> -			timeout--;
> -			if (!timeout)
> -				return -EBUSY;
> -			usleep_range(1000, 2000);
> -		}
>  
>  		ret = ice_qp_dis(vsi, qid);
>  		if (ret) {
> @@ -412,7 +404,6 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
>  			napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
>  		else if (ret)
>  			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
> -		clear_bit(ICE_CFG_BUSY, pf->state);
>  	}
>  
>  failure:
> -- 
> 2.34.1
>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 701a61d791dd..7c1e24afa34b 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -307,6 +307,7 @@  enum ice_pf_state {
 	ICE_PHY_INIT_COMPLETE,
 	ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
 	ICE_AUX_ERR_PENDING,
+	ICE_RTNL_WAITS_FOR_RESET,
 	ICE_STATE_NBITS		/* must be last */
 };
 
@@ -941,6 +942,7 @@  int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog,
 			  enum ice_xdp_cfg cfg_type);
 int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type);
 void ice_map_xdp_rings(struct ice_vsi *vsi);
+bool ice_rebuild_pending(struct ice_vsi *vsi);
 int
 ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 	     u32 flags);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 7629b0190578..a5dc6fc6e63d 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2426,7 +2426,10 @@  void ice_vsi_decfg(struct ice_vsi *vsi)
 		dev_err(ice_pf_to_dev(pf), "Failed to remove RDMA scheduler config for VSI %u, err %d\n",
 			vsi->vsi_num, err);
 
-	if (ice_is_xdp_ena_vsi(vsi))
+	/* xdp_rings can be absent, if program was attached amid reset,
+	 * VSI rebuild is supposed to create them later
+	 */
+	if (ice_is_xdp_ena_vsi(vsi) && vsi->xdp_rings)
 		/* return value check can be skipped here, it always returns
 		 * 0 if reset is in progress
 		 */
@@ -2737,12 +2740,24 @@  ice_queue_set_napi(struct ice_vsi *vsi, unsigned int queue_index,
 	if (current_work() == &pf->serv_task ||
 	    test_bit(ICE_PREPARED_FOR_RESET, pf->state) ||
 	    test_bit(ICE_DOWN, pf->state) ||
-	    test_bit(ICE_SUSPENDED, pf->state))
+	    test_bit(ICE_SUSPENDED, pf->state)) {
+		bool rtnl_held_here = true;
+
+		while (!rtnl_trylock()) {
+			if (test_bit(ICE_RTNL_WAITS_FOR_RESET, pf->state)) {
+				rtnl_held_here = false;
+				break;
+			}
+			usleep_range(1000, 2000);
+		}
 		__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
-				     false);
-	else
+				     true);
+		if (rtnl_held_here)
+			rtnl_unlock();
+	} else {
 		__ice_queue_set_napi(vsi->netdev, queue_index, type, napi,
 				     true);
+	}
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 15a6805ac2a1..7724ed8fc1b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2986,6 +2986,20 @@  static int ice_max_xdp_frame_size(struct ice_vsi *vsi)
 		return ICE_RXBUF_3072;
 }
 
+/**
+ * ice_rebuild_pending - ice_vsi_rebuild will be performed, when locks are released
+ * @vsi: VSI to setup XDP for
+ *
+ * ice_vsi_close() in the reset path is called under rtnl_lock(),
+ * so it happened strictly before or after .ndo_bpf().
+ * In case it has happened before, we do not have anything attached to rings
+ */
+bool ice_rebuild_pending(struct ice_vsi *vsi)
+{
+	return ice_is_reset_in_progress(vsi->back->state) &&
+	       !vsi->rx_rings[0]->desc;
+}
+
 /**
  * ice_xdp_setup_prog - Add or remove XDP eBPF program
  * @vsi: VSI to setup XDP for
@@ -3009,7 +3023,7 @@  ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 	}
 
 	/* hot swap progs and avoid toggling link */
-	if (ice_is_xdp_ena_vsi(vsi) == !!prog) {
+	if (ice_is_xdp_ena_vsi(vsi) == !!prog || ice_rebuild_pending(vsi)) {
 		ice_vsi_assign_bpf_prog(vsi, prog);
 		return 0;
 	}
@@ -3081,21 +3095,33 @@  static int ice_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 {
 	struct ice_netdev_priv *np = netdev_priv(dev);
 	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	int ret;
 
 	if (vsi->type != ICE_VSI_PF) {
 		NL_SET_ERR_MSG_MOD(xdp->extack, "XDP can be loaded only on PF VSI");
 		return -EINVAL;
 	}
 
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
+		set_bit(ICE_RTNL_WAITS_FOR_RESET, pf->state);
+		usleep_range(1000, 2000);
+	}
+	clear_bit(ICE_RTNL_WAITS_FOR_RESET, pf->state);
+
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
-		return ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+		ret = ice_xdp_setup_prog(vsi, xdp->prog, xdp->extack);
+		break;
 	case XDP_SETUP_XSK_POOL:
-		return ice_xsk_pool_setup(vsi, xdp->xsk.pool,
-					  xdp->xsk.queue_id);
+		ret = ice_xsk_pool_setup(vsi, xdp->xsk.pool, xdp->xsk.queue_id);
+		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
 	}
+
+	clear_bit(ICE_CFG_BUSY, pf->state);
+	return ret;
 }
 
 /**
@@ -7672,7 +7698,10 @@  static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		ice_gnss_init(pf);
 
 	/* rebuild PF VSI */
+	while (test_and_set_bit(ICE_CFG_BUSY, pf->state))
+		usleep_range(1000, 2000);
 	err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
+	clear_bit(ICE_CFG_BUSY, pf->state);
 	if (err) {
 		dev_err(dev, "PF VSI rebuild failed: %d\n", err);
 		goto err_vsi_rebuild;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 225d027d3d7a..962af14f9fd5 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -370,7 +370,6 @@  int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 {
 	bool if_running, pool_present = !!pool;
 	int ret = 0, pool_failure = 0;
-	struct ice_pf *pf = vsi->back;
 
 	if (qid >= vsi->num_rxq || qid >= vsi->num_txq) {
 		netdev_err(vsi->netdev, "Please use queue id in scope of combined queues count\n");
@@ -378,18 +377,11 @@  int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 		goto failure;
 	}
 
-	if_running = netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi);
+	if_running = !ice_rebuild_pending(vsi) &&
+		     (netif_running(vsi->netdev) && ice_is_xdp_ena_vsi(vsi));
 
 	if (if_running) {
 		struct ice_rx_ring *rx_ring = vsi->rx_rings[qid];
-		int timeout = 50;
-
-		while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) {
-			timeout--;
-			if (!timeout)
-				return -EBUSY;
-			usleep_range(1000, 2000);
-		}
 
 		ret = ice_qp_dis(vsi, qid);
 		if (ret) {
@@ -412,7 +404,6 @@  int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid)
 			napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi);
 		else if (ret)
 			netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret);
-		clear_bit(ICE_CFG_BUSY, pf->state);
 	}
 
 failure: