Message ID | 20210917084004.44332-1-tonylu@linux.alibaba.com (mailing list archive) |
---|---|
State | Accepted |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio_net: introduce TX timeout watchdog | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Guessed tree name to be net-next |
netdev/subject_prefix | warning | Target tree name not specified in the subject |
netdev/cc_maintainers | warning | 2 maintainers not CCed: kuba@kernel.org virtualization@lists.linux-foundation.org |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 0 this patch: 0 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 64 lines checked |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 0 this patch: 0 |
netdev/header_inline | success | Link |
On Fri, Sep 17, 2021 at 4:45 PM tonylu_linux <tonylu@linux.alibaba.com> wrote: > > From: Tony Lu <tony.ly@linux.alibaba.com> > > This implements ndo_tx_timeout handler and put this into stats. When > there is something wrong to send out packets, we could notice tx timeout > events and total timeout counter. > > We have suffered send timeout issues due to the backends hung. With this, > we can find the details, and collect the counters by monitor systems. > > Signed-off-by: Tony Lu <tony.ly@linux.alibaba.com> Note that we support non tx interrupt mode (which could be turned on via ethtool). I wonder if this can work well in that case. Or maybe it's the time to remove the non tx interrupt mode completely. Want to do that? Thanks > --- > drivers/net/virtio_net.c | 22 +++++++++++++++++++++- > 1 file changed, 21 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 271d38c1d9f8..90fed0fdc40f 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -80,6 +80,7 @@ struct virtnet_sq_stats { > u64 xdp_tx; > u64 xdp_tx_drops; > u64 kicks; > + u64 tx_timeouts; > }; > > struct virtnet_rq_stats { > @@ -103,6 +104,7 @@ static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { > { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, > { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, > { "kicks", VIRTNET_SQ_STAT(kicks) }, > + { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, > }; > > static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { > @@ -1856,7 +1858,7 @@ static void virtnet_stats(struct net_device *dev, > int i; > > for (i = 0; i < vi->max_queue_pairs; i++) { > - u64 tpackets, tbytes, rpackets, rbytes, rdrops; > + u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; > struct receive_queue *rq = &vi->rq[i]; > struct send_queue *sq = &vi->sq[i]; > > @@ -1864,6 +1866,7 @@ static void virtnet_stats(struct net_device *dev, > start = u64_stats_fetch_begin_irq(&sq->stats.syncp); > tpackets = sq->stats.packets; > tbytes = sq->stats.bytes; > + terrors = sq->stats.tx_timeouts; > } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); > > do { > @@ -1878,6 +1881,7 @@ static void virtnet_stats(struct net_device *dev, > tot->rx_bytes += rbytes; > tot->tx_bytes += tbytes; > tot->rx_dropped += rdrops; > + tot->tx_errors += terrors; > } > > tot->tx_dropped = dev->stats.tx_dropped; > @@ -2659,6 +2663,21 @@ static int virtnet_set_features(struct net_device *dev, > return 0; > } > > +static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) > +{ > + struct virtnet_info *priv = netdev_priv(dev); > + struct send_queue *sq = &priv->sq[txqueue]; > + struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); > + > + u64_stats_update_begin(&sq->stats.syncp); > + sq->stats.tx_timeouts++; > + u64_stats_update_end(&sq->stats.syncp); > + > + netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", > + txqueue, sq->name, sq->vq->index, sq->vq->name, > + jiffies_to_usecs(jiffies - txq->trans_start)); > +} > + > static const struct net_device_ops virtnet_netdev = { > .ndo_open = virtnet_open, > .ndo_stop = virtnet_close, > @@ -2674,6 +2693,7 @@ static const struct net_device_ops virtnet_netdev = { > .ndo_features_check = passthru_features_check, > .ndo_get_phys_port_name = virtnet_get_phys_port_name, > .ndo_set_features = virtnet_set_features, > + .ndo_tx_timeout = virtnet_tx_timeout, > }; > > static void virtnet_config_changed_work(struct work_struct *work) > -- > 2.19.1.6.gb485710b >
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 271d38c1d9f8..90fed0fdc40f 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -80,6 +80,7 @@ struct virtnet_sq_stats { u64 xdp_tx; u64 xdp_tx_drops; u64 kicks; + u64 tx_timeouts; }; struct virtnet_rq_stats { @@ -103,6 +104,7 @@ static const struct virtnet_stat_desc virtnet_sq_stats_desc[] = { { "xdp_tx", VIRTNET_SQ_STAT(xdp_tx) }, { "xdp_tx_drops", VIRTNET_SQ_STAT(xdp_tx_drops) }, { "kicks", VIRTNET_SQ_STAT(kicks) }, + { "tx_timeouts", VIRTNET_SQ_STAT(tx_timeouts) }, }; static const struct virtnet_stat_desc virtnet_rq_stats_desc[] = { @@ -1856,7 +1858,7 @@ static void virtnet_stats(struct net_device *dev, int i; for (i = 0; i < vi->max_queue_pairs; i++) { - u64 tpackets, tbytes, rpackets, rbytes, rdrops; + u64 tpackets, tbytes, terrors, rpackets, rbytes, rdrops; struct receive_queue *rq = &vi->rq[i]; struct send_queue *sq = &vi->sq[i]; @@ -1864,6 +1866,7 @@ static void virtnet_stats(struct net_device *dev, start = u64_stats_fetch_begin_irq(&sq->stats.syncp); tpackets = sq->stats.packets; tbytes = sq->stats.bytes; + terrors = sq->stats.tx_timeouts; } while (u64_stats_fetch_retry_irq(&sq->stats.syncp, start)); do { @@ -1878,6 +1881,7 @@ static void virtnet_stats(struct net_device *dev, tot->rx_bytes += rbytes; tot->tx_bytes += tbytes; tot->rx_dropped += rdrops; + tot->tx_errors += terrors; } tot->tx_dropped = dev->stats.tx_dropped; @@ -2659,6 +2663,21 @@ static int virtnet_set_features(struct net_device *dev, return 0; } +static void virtnet_tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct virtnet_info *priv = netdev_priv(dev); + struct send_queue *sq = &priv->sq[txqueue]; + struct netdev_queue *txq = netdev_get_tx_queue(dev, txqueue); + + u64_stats_update_begin(&sq->stats.syncp); + sq->stats.tx_timeouts++; + u64_stats_update_end(&sq->stats.syncp); + + netdev_err(dev, "TX timeout on queue: %u, sq: %s, vq: 0x%x, name: %s, %u usecs ago\n", + txqueue, sq->name, sq->vq->index, sq->vq->name, + jiffies_to_usecs(jiffies - txq->trans_start)); +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -2674,6 +2693,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_features_check = passthru_features_check, .ndo_get_phys_port_name = virtnet_get_phys_port_name, .ndo_set_features = virtnet_set_features, + .ndo_tx_timeout = virtnet_tx_timeout, }; static void virtnet_config_changed_work(struct work_struct *work)