diff mbox series

[2/2] ipoib: Add tx timeout work to recover queue stop situation

Message ID 20231120203501.321587-3-jinpu.wang@ionos.com (mailing list archive)
State Superseded
Headers show
Series bugfix for ipoib | expand

Commit Message

Jinpu Wang Nov. 20, 2023, 8:35 p.m. UTC
As we sometime run into tx timeout from ipoib, queue seems stopped
and can't recover. Diff with mellanox OFED show
mellanox driver has timeout work to recover in such case.

Add tx timeout work/napi work to recover such case.

Also increase the watchdog_timeo to 10 seconds, so more tolerant to
error.

Signed-off-by: Jack Wang <jinpu.wang@ionos.com>
---
 drivers/infiniband/ulp/ipoib/ipoib.h      |  4 +++
 drivers/infiniband/ulp/ipoib/ipoib_ib.c   | 26 +++++++++++++++++-
 drivers/infiniband/ulp/ipoib/ipoib_main.c | 33 +++++++++++++++++++++--
 3 files changed, 60 insertions(+), 3 deletions(-)

Comments

kernel test robot Nov. 21, 2023, 4:20 a.m. UTC | #1
Hi Jack,

kernel test robot noticed the following build errors:

[auto build test ERROR on rdma/for-next]
[also build test ERROR on linus/master v6.7-rc2 next-20231120]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jack-Wang/ipoib-Fix-error-code-return-in-ipoib_mcast_join/20231121-044240
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link:    https://lore.kernel.org/r/20231120203501.321587-3-jinpu.wang%40ionos.com
patch subject: [PATCH 2/2] ipoib: Add tx timeout work to recover queue stop situation
config: x86_64-buildonly-randconfig-001-20231121 (https://download.01.org/0day-ci/archive/20231121/202311211231.oyOBtdMM-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231121/202311211231.oyOBtdMM-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311211231.oyOBtdMM-lkp@intel.com/

All errors (new ones prefixed by >>):

   drivers/infiniband/ulp/ipoib/ipoib_ib.c: In function 'ipoib_napi_schedule_work':
>> drivers/infiniband/ulp/ipoib/ipoib_ib.c:542:9: error: implicit declaration of function 'napi_reschedule'; did you mean 'napi_schedule'? [-Werror=implicit-function-declaration]
     542 |   ret = napi_reschedule(&priv->send_napi);
         |         ^~~~~~~~~~~~~~~
         |         napi_schedule
   cc1: some warnings being treated as errors


vim +542 drivers/infiniband/ulp/ipoib/ipoib_ib.c

   533	
   534	/* The function will force napi_schedule */
   535	void ipoib_napi_schedule_work(struct work_struct *work)
   536	{
   537		struct ipoib_dev_priv *priv =
   538			container_of(work, struct ipoib_dev_priv, reschedule_napi_work);
   539		bool ret;
   540	
   541		do {
 > 542			ret = napi_reschedule(&priv->send_napi);
   543			if (!ret)
   544				msleep(3);
   545		} while (!ret && netif_queue_stopped(priv->dev) &&
   546			 test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags));
   547	}
   548
kernel test robot Nov. 21, 2023, 8:21 a.m. UTC | #2
Hi Jack,

kernel test robot noticed the following build errors:

[auto build test ERROR on rdma/for-next]
[also build test ERROR on linus/master v6.7-rc2 next-20231121]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Jack-Wang/ipoib-Fix-error-code-return-in-ipoib_mcast_join/20231121-044240
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git for-next
patch link:    https://lore.kernel.org/r/20231120203501.321587-3-jinpu.wang%40ionos.com
patch subject: [PATCH 2/2] ipoib: Add tx timeout work to recover queue stop situation
config: i386-allmodconfig (https://download.01.org/0day-ci/archive/20231121/202311211607.9eJYoznW-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231121/202311211607.9eJYoznW-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311211607.9eJYoznW-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/infiniband/ulp/ipoib/ipoib_ib.c:542:9: error: call to undeclared function 'napi_reschedule'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
                   ret = napi_reschedule(&priv->send_napi);
                         ^
   drivers/infiniband/ulp/ipoib/ipoib_ib.c:542:9: note: did you mean 'napi_schedule'?
   include/linux/netdevice.h:520:20: note: 'napi_schedule' declared here
   static inline bool napi_schedule(struct napi_struct *n)
                      ^
   drivers/infiniband/ulp/ipoib/ipoib_ib.c:554:8: error: call to undeclared function 'napi_reschedule'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
           ret = napi_reschedule(&priv->send_napi);
                 ^
   2 errors generated.


vim +/napi_reschedule +542 drivers/infiniband/ulp/ipoib/ipoib_ib.c

   533	
   534	/* The function will force napi_schedule */
   535	void ipoib_napi_schedule_work(struct work_struct *work)
   536	{
   537		struct ipoib_dev_priv *priv =
   538			container_of(work, struct ipoib_dev_priv, reschedule_napi_work);
   539		bool ret;
   540	
   541		do {
 > 542			ret = napi_reschedule(&priv->send_napi);
   543			if (!ret)
   544				msleep(3);
   545		} while (!ret && netif_queue_stopped(priv->dev) &&
   546			 test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags));
   547	}
   548
diff mbox series

Patch

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 35e9c8a330e2..963e936da5e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -351,10 +351,12 @@  struct ipoib_dev_priv {
 	struct workqueue_struct *wq;
 	struct delayed_work mcast_task;
 	struct work_struct carrier_on_task;
+	struct work_struct reschedule_napi_work;
 	struct work_struct flush_light;
 	struct work_struct flush_normal;
 	struct work_struct flush_heavy;
 	struct work_struct restart_task;
+	struct work_struct tx_timeout_work;
 	struct delayed_work ah_reap_task;
 	struct delayed_work neigh_reap_task;
 	struct ib_device *ca;
@@ -499,6 +501,7 @@  int ipoib_send(struct net_device *dev, struct sk_buff *skb,
 	       struct ib_ah *address, u32 dqpn);
 void ipoib_reap_ah(struct work_struct *work);
 
+void ipoib_napi_schedule_work(struct work_struct *work);
 struct ipoib_path *__path_find(struct net_device *dev, void *gid);
 void ipoib_mark_paths_invalid(struct net_device *dev);
 void ipoib_flush_paths(struct net_device *dev);
@@ -510,6 +513,7 @@  void ipoib_ib_tx_timer_func(struct timer_list *t);
 void ipoib_ib_dev_flush_light(struct work_struct *work);
 void ipoib_ib_dev_flush_normal(struct work_struct *work);
 void ipoib_ib_dev_flush_heavy(struct work_struct *work);
+void ipoib_ib_tx_timeout_work(struct work_struct *work);
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 7f84d9866cef..29ba2e443738 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -531,11 +531,35 @@  void ipoib_ib_rx_completion(struct ib_cq *cq, void *ctx_ptr)
 	napi_schedule(&priv->recv_napi);
 }
 
+/* The function will force napi_schedule */
+void ipoib_napi_schedule_work(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv =
+		container_of(work, struct ipoib_dev_priv, reschedule_napi_work);
+	bool ret;
+
+	do {
+		ret = napi_reschedule(&priv->send_napi);
+		if (!ret)
+			msleep(3);
+	} while (!ret && netif_queue_stopped(priv->dev) &&
+		 test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags));
+}
+
 void ipoib_ib_tx_completion(struct ib_cq *cq, void *ctx_ptr)
 {
 	struct ipoib_dev_priv *priv = ctx_ptr;
+	bool ret;
 
-	napi_schedule(&priv->send_napi);
+	ret = napi_reschedule(&priv->send_napi);
+	/*
+	 * if the queue is closed the driver must be able to schedule napi,
+	 * otherwise we can end with closed queue forever, because no new
+	 * packets to send and napi callback might not get new event after
+	 * its re-arm of the napi.
+	 */
+	if (!ret && netif_queue_stopped(priv->dev))
+		schedule_work(&priv->reschedule_napi_work);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 967004ccad98..7a5be705d718 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1200,7 +1200,34 @@  static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
 		   netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
 		   priv->global_tx_head, priv->global_tx_tail);
 
-	/* XXX reset QP, etc. */
+
+	schedule_work(&priv->tx_timeout_work);
+}
+
+void ipoib_ib_tx_timeout_work(struct work_struct *work)
+{
+	struct ipoib_dev_priv *priv = container_of(work,
+						   struct ipoib_dev_priv,
+						   tx_timeout_work);
+	int err;
+
+	rtnl_lock();
+
+	if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+		goto unlock;
+
+	ipoib_stop(priv->dev);
+	err = ipoib_open(priv->dev);
+	if (err) {
+		ipoib_warn(priv, "ipoib_open failed recovering from a tx_timeout, err(%d).\n",
+				err);
+		goto unlock;
+	}
+
+	netif_tx_wake_all_queues(priv->dev);
+unlock:
+	rtnl_unlock();
+
 }
 
 static int ipoib_hard_header(struct sk_buff *skb,
@@ -2112,7 +2139,7 @@  void ipoib_setup_common(struct net_device *dev)
 
 	ipoib_set_ethtool_ops(dev);
 
-	dev->watchdog_timeo	 = HZ;
+	dev->watchdog_timeo	 = 10 * HZ;
 
 	dev->flags		|= IFF_BROADCAST | IFF_MULTICAST;
 
@@ -2150,10 +2177,12 @@  static void ipoib_build_priv(struct net_device *dev)
 
 	INIT_DELAYED_WORK(&priv->mcast_task,   ipoib_mcast_join_task);
 	INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task);
+	INIT_WORK(&priv->reschedule_napi_work, ipoib_napi_schedule_work);
 	INIT_WORK(&priv->flush_light,   ipoib_ib_dev_flush_light);
 	INIT_WORK(&priv->flush_normal,   ipoib_ib_dev_flush_normal);
 	INIT_WORK(&priv->flush_heavy,   ipoib_ib_dev_flush_heavy);
 	INIT_WORK(&priv->restart_task, ipoib_mcast_restart_task);
+	INIT_WORK(&priv->tx_timeout_work, ipoib_ib_tx_timeout_work);
 	INIT_DELAYED_WORK(&priv->ah_reap_task, ipoib_reap_ah);
 	INIT_DELAYED_WORK(&priv->neigh_reap_task, ipoib_reap_neigh);
 }