diff mbox

[v2,4/5] net: thunderx: Fix VF driver's interface statistics

Message ID 1479211696-14501-1-git-send-email-sunil.kovvuri@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Sunil Kovvuri Nov. 15, 2016, 12:08 p.m. UTC
From: Sunil Goutham <sgoutham@cavium.com>

This patch fixes multiple issues
1. Convert all driver statistics to percpu counters for accuracy.
2. To avoid multiple CQEs posted by a TSO packet appended to HW,
   TSO pkt's SQE has 'post_cqe' not set but a dummy SQE is added
   for getting HW transmit completion notification. This dummy
   SQE has 'dont_send' set and HW drops the pkt pointed to in this
   thus Tx drop counter increases. This patch fixes this by subtracting
   SW tx tso counter from HW Tx drop counter for actual packet drop counter.
3. Reset all individual queue's and VNIC HW stats when interface is going down.
4. Getrid off unnecessary counters in hot path.
5. Bringout all CQE error stats i.e both Rx and Tx.

Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
---
 drivers/net/ethernet/cavium/thunder/nic.h          |  61 +++++++-----
 drivers/net/ethernet/cavium/thunder/nic_main.c     |   1 +
 .../net/ethernet/cavium/thunder/nicvf_ethtool.c    | 105 +++++++++++---------
 drivers/net/ethernet/cavium/thunder/nicvf_main.c   | 106 +++++++++++----------
 drivers/net/ethernet/cavium/thunder/nicvf_queues.c |  96 +++++++++----------
 drivers/net/ethernet/cavium/thunder/nicvf_queues.h |  24 +----
 6 files changed, 197 insertions(+), 196 deletions(-)
diff mbox

Patch

diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index cd2d379..86bd93c 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -178,11 +178,11 @@  enum tx_stats_reg_offset {
 
 struct nicvf_hw_stats {
 	u64 rx_bytes;
+	u64 rx_frames;
 	u64 rx_ucast_frames;
 	u64 rx_bcast_frames;
 	u64 rx_mcast_frames;
-	u64 rx_fcs_errors;
-	u64 rx_l2_errors;
+	u64 rx_drops;
 	u64 rx_drop_red;
 	u64 rx_drop_red_bytes;
 	u64 rx_drop_overrun;
@@ -191,6 +191,19 @@  struct nicvf_hw_stats {
 	u64 rx_drop_mcast;
 	u64 rx_drop_l3_bcast;
 	u64 rx_drop_l3_mcast;
+	u64 rx_fcs_errors;
+	u64 rx_l2_errors;
+
+	u64 tx_bytes;
+	u64 tx_frames;
+	u64 tx_ucast_frames;
+	u64 tx_bcast_frames;
+	u64 tx_mcast_frames;
+	u64 tx_drops;
+};
+
+struct nicvf_drv_stats {
+	/* CQE Rx errs */
 	u64 rx_bgx_truncated_pkts;
 	u64 rx_jabber_errs;
 	u64 rx_fcs_errs;
@@ -216,34 +229,30 @@  struct nicvf_hw_stats {
 	u64 rx_l4_pclp;
 	u64 rx_truncated_pkts;
 
-	u64 tx_bytes_ok;
-	u64 tx_ucast_frames_ok;
-	u64 tx_bcast_frames_ok;
-	u64 tx_mcast_frames_ok;
-	u64 tx_drops;
-};
-
-struct nicvf_drv_stats {
-	/* Rx */
-	u64 rx_frames_ok;
-	u64 rx_frames_64;
-	u64 rx_frames_127;
-	u64 rx_frames_255;
-	u64 rx_frames_511;
-	u64 rx_frames_1023;
-	u64 rx_frames_1518;
-	u64 rx_frames_jumbo;
-	u64 rx_drops;
-
+	/* CQE Tx errs */
+	u64 tx_desc_fault;
+	u64 tx_hdr_cons_err;
+	u64 tx_subdesc_err;
+	u64 tx_max_size_exceeded;
+	u64 tx_imm_size_oflow;
+	u64 tx_data_seq_err;
+	u64 tx_mem_seq_err;
+	u64 tx_lock_viol;
+	u64 tx_data_fault;
+	u64 tx_tstmp_conflict;
+	u64 tx_tstmp_timeout;
+	u64 tx_mem_fault;
+	u64 tx_csum_overlap;
+	u64 tx_csum_overflow;
+
+	/* driver debug stats */
 	u64 rcv_buffer_alloc_failures;
-
-	/* Tx */
-	u64 tx_frames_ok;
-	u64 tx_drops;
 	u64 tx_tso;
 	u64 tx_timeout;
 	u64 txq_stop;
 	u64 txq_wake;
+
+	struct u64_stats_sync   syncp;
 };
 
 struct nicvf {
@@ -297,7 +306,7 @@  struct nicvf {
 
 	/* Stats */
 	struct nicvf_hw_stats   hw_stats;
-	struct nicvf_drv_stats  drv_stats;
+	struct nicvf_drv_stats  __percpu *drv_stats;
 	struct bgx_stats	bgx_stats;
 
 	/* MSI-X  */
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 85c9e62..6677b96 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -851,6 +851,7 @@  static int nic_reset_stat_counters(struct nicpf *nic,
 			nic_reg_write(nic, reg_addr, 0);
 		}
 	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index ad4fddb..432bf6b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -36,11 +36,11 @@  struct nicvf_stat {
 
 static const struct nicvf_stat nicvf_hw_stats[] = {
 	NICVF_HW_STAT(rx_bytes),
+	NICVF_HW_STAT(rx_frames),
 	NICVF_HW_STAT(rx_ucast_frames),
 	NICVF_HW_STAT(rx_bcast_frames),
 	NICVF_HW_STAT(rx_mcast_frames),
-	NICVF_HW_STAT(rx_fcs_errors),
-	NICVF_HW_STAT(rx_l2_errors),
+	NICVF_HW_STAT(rx_drops),
 	NICVF_HW_STAT(rx_drop_red),
 	NICVF_HW_STAT(rx_drop_red_bytes),
 	NICVF_HW_STAT(rx_drop_overrun),
@@ -49,50 +49,59 @@  static const struct nicvf_stat nicvf_hw_stats[] = {
 	NICVF_HW_STAT(rx_drop_mcast),
 	NICVF_HW_STAT(rx_drop_l3_bcast),
 	NICVF_HW_STAT(rx_drop_l3_mcast),
-	NICVF_HW_STAT(rx_bgx_truncated_pkts),
-	NICVF_HW_STAT(rx_jabber_errs),
-	NICVF_HW_STAT(rx_fcs_errs),
-	NICVF_HW_STAT(rx_bgx_errs),
-	NICVF_HW_STAT(rx_prel2_errs),
-	NICVF_HW_STAT(rx_l2_hdr_malformed),
-	NICVF_HW_STAT(rx_oversize),
-	NICVF_HW_STAT(rx_undersize),
-	NICVF_HW_STAT(rx_l2_len_mismatch),
-	NICVF_HW_STAT(rx_l2_pclp),
-	NICVF_HW_STAT(rx_ip_ver_errs),
-	NICVF_HW_STAT(rx_ip_csum_errs),
-	NICVF_HW_STAT(rx_ip_hdr_malformed),
-	NICVF_HW_STAT(rx_ip_payload_malformed),
-	NICVF_HW_STAT(rx_ip_ttl_errs),
-	NICVF_HW_STAT(rx_l3_pclp),
-	NICVF_HW_STAT(rx_l4_malformed),
-	NICVF_HW_STAT(rx_l4_csum_errs),
-	NICVF_HW_STAT(rx_udp_len_errs),
-	NICVF_HW_STAT(rx_l4_port_errs),
-	NICVF_HW_STAT(rx_tcp_flag_errs),
-	NICVF_HW_STAT(rx_tcp_offset_errs),
-	NICVF_HW_STAT(rx_l4_pclp),
-	NICVF_HW_STAT(rx_truncated_pkts),
-	NICVF_HW_STAT(tx_bytes_ok),
-	NICVF_HW_STAT(tx_ucast_frames_ok),
-	NICVF_HW_STAT(tx_bcast_frames_ok),
-	NICVF_HW_STAT(tx_mcast_frames_ok),
+	NICVF_HW_STAT(rx_fcs_errors),
+	NICVF_HW_STAT(rx_l2_errors),
+	NICVF_HW_STAT(tx_bytes),
+	NICVF_HW_STAT(tx_frames),
+	NICVF_HW_STAT(tx_ucast_frames),
+	NICVF_HW_STAT(tx_bcast_frames),
+	NICVF_HW_STAT(tx_mcast_frames),
+	NICVF_HW_STAT(tx_drops),
 };
 
 static const struct nicvf_stat nicvf_drv_stats[] = {
-	NICVF_DRV_STAT(rx_frames_ok),
-	NICVF_DRV_STAT(rx_frames_64),
-	NICVF_DRV_STAT(rx_frames_127),
-	NICVF_DRV_STAT(rx_frames_255),
-	NICVF_DRV_STAT(rx_frames_511),
-	NICVF_DRV_STAT(rx_frames_1023),
-	NICVF_DRV_STAT(rx_frames_1518),
-	NICVF_DRV_STAT(rx_frames_jumbo),
-	NICVF_DRV_STAT(rx_drops),
+	NICVF_DRV_STAT(rx_bgx_truncated_pkts),
+	NICVF_DRV_STAT(rx_jabber_errs),
+	NICVF_DRV_STAT(rx_fcs_errs),
+	NICVF_DRV_STAT(rx_bgx_errs),
+	NICVF_DRV_STAT(rx_prel2_errs),
+	NICVF_DRV_STAT(rx_l2_hdr_malformed),
+	NICVF_DRV_STAT(rx_oversize),
+	NICVF_DRV_STAT(rx_undersize),
+	NICVF_DRV_STAT(rx_l2_len_mismatch),
+	NICVF_DRV_STAT(rx_l2_pclp),
+	NICVF_DRV_STAT(rx_ip_ver_errs),
+	NICVF_DRV_STAT(rx_ip_csum_errs),
+	NICVF_DRV_STAT(rx_ip_hdr_malformed),
+	NICVF_DRV_STAT(rx_ip_payload_malformed),
+	NICVF_DRV_STAT(rx_ip_ttl_errs),
+	NICVF_DRV_STAT(rx_l3_pclp),
+	NICVF_DRV_STAT(rx_l4_malformed),
+	NICVF_DRV_STAT(rx_l4_csum_errs),
+	NICVF_DRV_STAT(rx_udp_len_errs),
+	NICVF_DRV_STAT(rx_l4_port_errs),
+	NICVF_DRV_STAT(rx_tcp_flag_errs),
+	NICVF_DRV_STAT(rx_tcp_offset_errs),
+	NICVF_DRV_STAT(rx_l4_pclp),
+	NICVF_DRV_STAT(rx_truncated_pkts),
+
+	NICVF_DRV_STAT(tx_desc_fault),
+	NICVF_DRV_STAT(tx_hdr_cons_err),
+	NICVF_DRV_STAT(tx_subdesc_err),
+	NICVF_DRV_STAT(tx_max_size_exceeded),
+	NICVF_DRV_STAT(tx_imm_size_oflow),
+	NICVF_DRV_STAT(tx_data_seq_err),
+	NICVF_DRV_STAT(tx_mem_seq_err),
+	NICVF_DRV_STAT(tx_lock_viol),
+	NICVF_DRV_STAT(tx_data_fault),
+	NICVF_DRV_STAT(tx_tstmp_conflict),
+	NICVF_DRV_STAT(tx_tstmp_timeout),
+	NICVF_DRV_STAT(tx_mem_fault),
+	NICVF_DRV_STAT(tx_csum_overlap),
+	NICVF_DRV_STAT(tx_csum_overflow),
+
 	NICVF_DRV_STAT(rcv_buffer_alloc_failures),
-	NICVF_DRV_STAT(tx_frames_ok),
 	NICVF_DRV_STAT(tx_tso),
-	NICVF_DRV_STAT(tx_drops),
 	NICVF_DRV_STAT(tx_timeout),
 	NICVF_DRV_STAT(txq_stop),
 	NICVF_DRV_STAT(txq_wake),
@@ -278,8 +287,8 @@  static void nicvf_get_ethtool_stats(struct net_device *netdev,
 				    struct ethtool_stats *stats, u64 *data)
 {
 	struct nicvf *nic = netdev_priv(netdev);
-	int stat;
-	int sqs;
+	int stat, tmp_stats;
+	int sqs, cpu;
 
 	nicvf_update_stats(nic);
 
@@ -289,9 +298,13 @@  static void nicvf_get_ethtool_stats(struct net_device *netdev,
 	for (stat = 0; stat < nicvf_n_hw_stats; stat++)
 		*(data++) = ((u64 *)&nic->hw_stats)
 				[nicvf_hw_stats[stat].index];
-	for (stat = 0; stat < nicvf_n_drv_stats; stat++)
-		*(data++) = ((u64 *)&nic->drv_stats)
-				[nicvf_drv_stats[stat].index];
+	for (stat = 0; stat < nicvf_n_drv_stats; stat++) {
+		tmp_stats = 0;
+		for_each_possible_cpu(cpu)
+			tmp_stats += ((u64 *)per_cpu_ptr(nic->drv_stats, cpu))
+				     [nicvf_drv_stats[stat].index];
+		*(data++) = tmp_stats;
+	}
 
 	nicvf_get_qset_stats(nic, stats, &data);
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 8f83361..9dc79c05 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -69,25 +69,6 @@  static inline u8 nicvf_netdev_qidx(struct nicvf *nic, u8 qidx)
 		return qidx;
 }
 
-static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic,
-					  struct sk_buff *skb)
-{
-	if (skb->len <= 64)
-		nic->drv_stats.rx_frames_64++;
-	else if (skb->len <= 127)
-		nic->drv_stats.rx_frames_127++;
-	else if (skb->len <= 255)
-		nic->drv_stats.rx_frames_255++;
-	else if (skb->len <= 511)
-		nic->drv_stats.rx_frames_511++;
-	else if (skb->len <= 1023)
-		nic->drv_stats.rx_frames_1023++;
-	else if (skb->len <= 1518)
-		nic->drv_stats.rx_frames_1518++;
-	else
-		nic->drv_stats.rx_frames_jumbo++;
-}
-
 /* The Cavium ThunderX network controller can *only* be found in SoCs
  * containing the ThunderX ARM64 CPU implementation.  All accesses to the device
  * registers on this platform are implicitly strongly ordered with respect
@@ -514,7 +495,6 @@  static int nicvf_init_resources(struct nicvf *nic)
 }
 
 static void nicvf_snd_pkt_handler(struct net_device *netdev,
-				  struct cmp_queue *cq,
 				  struct cqe_send_t *cqe_tx,
 				  int cqe_type, int budget,
 				  unsigned int *tx_pkts, unsigned int *tx_bytes)
@@ -536,7 +516,7 @@  static void nicvf_snd_pkt_handler(struct net_device *netdev,
 		   __func__, cqe_tx->sq_qs, cqe_tx->sq_idx,
 		   cqe_tx->sqe_ptr, hdr->subdesc_cnt);
 
-	nicvf_check_cqe_tx_errs(nic, cq, cqe_tx);
+	nicvf_check_cqe_tx_errs(nic, cqe_tx);
 	skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr];
 	if (skb) {
 		/* Check for dummy descriptor used for HW TSO offload on 88xx */
@@ -630,8 +610,6 @@  static void nicvf_rcv_pkt_handler(struct net_device *netdev,
 		return;
 	}
 
-	nicvf_set_rx_frame_cnt(nic, skb);
-
 	nicvf_set_rxhash(netdev, cqe_rx, skb);
 
 	skb_record_rx_queue(skb, rq_idx);
@@ -703,7 +681,7 @@  static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
 			work_done++;
 		break;
 		case CQE_TYPE_SEND:
-			nicvf_snd_pkt_handler(netdev, cq,
+			nicvf_snd_pkt_handler(netdev,
 					      (void *)cq_desc, CQE_TYPE_SEND,
 					      budget, &tx_pkts, &tx_bytes);
 			tx_done++;
@@ -740,7 +718,7 @@  static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
 		nic = nic->pnicvf;
 		if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
 			netif_tx_start_queue(txq);
-			nic->drv_stats.txq_wake++;
+			this_cpu_inc(nic->drv_stats->txq_wake);
 			if (netif_msg_tx_err(nic))
 				netdev_warn(netdev,
 					    "%s: Transmit queue wakeup SQ%d\n",
@@ -1084,7 +1062,7 @@  static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) {
 		netif_tx_stop_queue(txq);
-		nic->drv_stats.txq_stop++;
+		this_cpu_inc(nic->drv_stats->txq_stop);
 		if (netif_msg_tx_err(nic))
 			netdev_warn(netdev,
 				    "%s: Transmit ring full, stopping SQ%d\n",
@@ -1202,7 +1180,7 @@  static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
 
 int nicvf_open(struct net_device *netdev)
 {
-	int err, qidx;
+	int cpu, err, qidx;
 	struct nicvf *nic = netdev_priv(netdev);
 	struct queue_set *qs = nic->qs;
 	struct nicvf_cq_poll *cq_poll = NULL;
@@ -1262,6 +1240,11 @@  int nicvf_open(struct net_device *netdev)
 		nicvf_rss_init(nic);
 		if (nicvf_update_hw_max_frs(nic, netdev->mtu))
 			goto cleanup;
+
+		/* Clear percpu stats */
+		for_each_possible_cpu(cpu)
+			memset(per_cpu_ptr(nic->drv_stats, cpu), 0,
+			       sizeof(struct nicvf_drv_stats));
 	}
 
 	err = nicvf_register_interrupts(nic);
@@ -1288,9 +1271,6 @@  int nicvf_open(struct net_device *netdev)
 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
 
-	nic->drv_stats.txq_stop = 0;
-	nic->drv_stats.txq_wake = 0;
-
 	return 0;
 cleanup:
 	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
@@ -1383,9 +1363,10 @@  void nicvf_update_lmac_stats(struct nicvf *nic)
 
 void nicvf_update_stats(struct nicvf *nic)
 {
-	int qidx;
+	int qidx, cpu;
+	u64 tmp_stats = 0;
 	struct nicvf_hw_stats *stats = &nic->hw_stats;
-	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
+	struct nicvf_drv_stats *drv_stats;
 	struct queue_set *qs = nic->qs;
 
 #define GET_RX_STATS(reg) \
@@ -1408,21 +1389,33 @@  void nicvf_update_stats(struct nicvf *nic)
 	stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
 	stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
 
-	stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS);
-	stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST);
-	stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST);
-	stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST);
+	stats->tx_bytes = GET_TX_STATS(TX_OCTS);
+	stats->tx_ucast_frames = GET_TX_STATS(TX_UCAST);
+	stats->tx_bcast_frames = GET_TX_STATS(TX_BCAST);
+	stats->tx_mcast_frames = GET_TX_STATS(TX_MCAST);
 	stats->tx_drops = GET_TX_STATS(TX_DROP);
 
-	drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
-				  stats->tx_bcast_frames_ok +
-				  stats->tx_mcast_frames_ok;
-	drv_stats->rx_frames_ok = stats->rx_ucast_frames +
-				  stats->rx_bcast_frames +
-				  stats->rx_mcast_frames;
-	drv_stats->rx_drops = stats->rx_drop_red +
-			      stats->rx_drop_overrun;
-	drv_stats->tx_drops = stats->tx_drops;
+	/* On T88 pass 2.0, the dummy SQE added for TSO notification
+	 * via CQE has 'dont_send' set. Hence HW drops the pkt pointed
+	 * pointed by dummy SQE and results in tx_drops counter being
+	 * incremented. Subtracting it from tx_tso counter will give
+	 * exact tx_drops counter.
+	 */
+	if (nic->t88 && nic->hw_tso) {
+		for_each_possible_cpu(cpu) {
+			drv_stats = per_cpu_ptr(nic->drv_stats, cpu);
+			tmp_stats += drv_stats->tx_tso;
+		}
+		stats->tx_drops = tmp_stats - stats->tx_drops;
+	}
+	stats->tx_frames = stats->tx_ucast_frames +
+			   stats->tx_bcast_frames +
+			   stats->tx_mcast_frames;
+	stats->rx_frames = stats->rx_ucast_frames +
+			   stats->rx_bcast_frames +
+			   stats->rx_mcast_frames;
+	stats->rx_drops = stats->rx_drop_red +
+			  stats->rx_drop_overrun;
 
 	/* Update RQ and SQ stats */
 	for (qidx = 0; qidx < qs->rq_cnt; qidx++)
@@ -1436,18 +1429,17 @@  static struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev,
 {
 	struct nicvf *nic = netdev_priv(netdev);
 	struct nicvf_hw_stats *hw_stats = &nic->hw_stats;
-	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
 
 	nicvf_update_stats(nic);
 
 	stats->rx_bytes = hw_stats->rx_bytes;
-	stats->rx_packets = drv_stats->rx_frames_ok;
-	stats->rx_dropped = drv_stats->rx_drops;
+	stats->rx_packets = hw_stats->rx_frames;
+	stats->rx_dropped = hw_stats->rx_drops;
 	stats->multicast = hw_stats->rx_mcast_frames;
 
-	stats->tx_bytes = hw_stats->tx_bytes_ok;
-	stats->tx_packets = drv_stats->tx_frames_ok;
-	stats->tx_dropped = drv_stats->tx_drops;
+	stats->tx_bytes = hw_stats->tx_bytes;
+	stats->tx_packets = hw_stats->tx_frames;
+	stats->tx_dropped = hw_stats->tx_drops;
 
 	return stats;
 }
@@ -1460,7 +1452,7 @@  static void nicvf_tx_timeout(struct net_device *dev)
 		netdev_warn(dev, "%s: Transmit timed out, resetting\n",
 			    dev->name);
 
-	nic->drv_stats.tx_timeout++;
+	this_cpu_inc(nic->drv_stats->tx_timeout);
 	schedule_work(&nic->reset_task);
 }
 
@@ -1594,6 +1586,12 @@  static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_free_netdev;
 	}
 
+	nic->drv_stats = netdev_alloc_pcpu_stats(struct nicvf_drv_stats);
+	if (!nic->drv_stats) {
+		err = -ENOMEM;
+		goto err_free_netdev;
+	}
+
 	err = nicvf_set_qset_resources(nic);
 	if (err)
 		goto err_free_netdev;
@@ -1652,6 +1650,8 @@  static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	nicvf_unregister_interrupts(nic);
 err_free_netdev:
 	pci_set_drvdata(pdev, NULL);
+	if (nic->drv_stats)
+		free_percpu(nic->drv_stats);
 	free_netdev(netdev);
 err_release_regions:
 	pci_release_regions(pdev);
@@ -1679,6 +1679,8 @@  static void nicvf_remove(struct pci_dev *pdev)
 		unregister_netdev(pnetdev);
 	nicvf_unregister_interrupts(nic);
 	pci_set_drvdata(pdev, NULL);
+	if (nic->drv_stats)
+		free_percpu(nic->drv_stats);
 	free_netdev(netdev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index f914eef..bdce591 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -104,7 +104,8 @@  static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
 		nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
 					   order);
 		if (!nic->rb_page) {
-			nic->drv_stats.rcv_buffer_alloc_failures++;
+			this_cpu_inc(nic->pnicvf->drv_stats->
+				     rcv_buffer_alloc_failures);
 			return -ENOMEM;
 		}
 		nic->rb_page_offset = 0;
@@ -483,9 +484,12 @@  static void nicvf_reset_rcv_queue_stats(struct nicvf *nic)
 {
 	union nic_mbx mbx = {};
 
-	/* Reset all RXQ's stats */
+	/* Reset all RQ/SQ and VF stats */
 	mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER;
+	mbx.reset_stat.rx_stat_mask = 0x3FFF;
+	mbx.reset_stat.tx_stat_mask = 0x1F;
 	mbx.reset_stat.rq_stat_mask = 0xFFFF;
+	mbx.reset_stat.sq_stat_mask = 0xFFFF;
 	nicvf_send_msg_to_pf(nic, &mbx);
 }
 
@@ -1032,7 +1036,7 @@  nicvf_sq_add_hdr_subdesc(struct nicvf *nic, struct snd_queue *sq, int qentry,
 		hdr->tso_max_paysize = skb_shinfo(skb)->gso_size;
 		/* For non-tunneled pkts, point this to L2 ethertype */
 		hdr->inner_l3_offset = skb_network_offset(skb) - 2;
-		nic->drv_stats.tx_tso++;
+		this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
 	}
 }
 
@@ -1164,7 +1168,7 @@  static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
 
 	nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt);
 
-	nic->drv_stats.tx_tso++;
+	this_cpu_inc(nic->pnicvf->drv_stats->tx_tso);
 	return 1;
 }
 
@@ -1425,8 +1429,6 @@  void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx)
 /* Check for errors in the receive cmp.queue entry */
 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 {
-	struct nicvf_hw_stats *stats = &nic->hw_stats;
-
 	if (!cqe_rx->err_level && !cqe_rx->err_opcode)
 		return 0;
 
@@ -1438,76 +1440,76 @@  int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 
 	switch (cqe_rx->err_opcode) {
 	case CQ_RX_ERROP_RE_PARTIAL:
-		stats->rx_bgx_truncated_pkts++;
+		this_cpu_inc(nic->drv_stats->rx_bgx_truncated_pkts);
 		break;
 	case CQ_RX_ERROP_RE_JABBER:
-		stats->rx_jabber_errs++;
+		this_cpu_inc(nic->drv_stats->rx_jabber_errs);
 		break;
 	case CQ_RX_ERROP_RE_FCS:
-		stats->rx_fcs_errs++;
+		this_cpu_inc(nic->drv_stats->rx_fcs_errs);
 		break;
 	case CQ_RX_ERROP_RE_RX_CTL:
-		stats->rx_bgx_errs++;
+		this_cpu_inc(nic->drv_stats->rx_bgx_errs);
 		break;
 	case CQ_RX_ERROP_PREL2_ERR:
-		stats->rx_prel2_errs++;
+		this_cpu_inc(nic->drv_stats->rx_prel2_errs);
 		break;
 	case CQ_RX_ERROP_L2_MAL:
-		stats->rx_l2_hdr_malformed++;
+		this_cpu_inc(nic->drv_stats->rx_l2_hdr_malformed);
 		break;
 	case CQ_RX_ERROP_L2_OVERSIZE:
-		stats->rx_oversize++;
+		this_cpu_inc(nic->drv_stats->rx_oversize);
 		break;
 	case CQ_RX_ERROP_L2_UNDERSIZE:
-		stats->rx_undersize++;
+		this_cpu_inc(nic->drv_stats->rx_undersize);
 		break;
 	case CQ_RX_ERROP_L2_LENMISM:
-		stats->rx_l2_len_mismatch++;
+		this_cpu_inc(nic->drv_stats->rx_l2_len_mismatch);
 		break;
 	case CQ_RX_ERROP_L2_PCLP:
-		stats->rx_l2_pclp++;
+		this_cpu_inc(nic->drv_stats->rx_l2_pclp);
 		break;
 	case CQ_RX_ERROP_IP_NOT:
-		stats->rx_ip_ver_errs++;
+		this_cpu_inc(nic->drv_stats->rx_ip_ver_errs);
 		break;
 	case CQ_RX_ERROP_IP_CSUM_ERR:
-		stats->rx_ip_csum_errs++;
+		this_cpu_inc(nic->drv_stats->rx_ip_csum_errs);
 		break;
 	case CQ_RX_ERROP_IP_MAL:
-		stats->rx_ip_hdr_malformed++;
+		this_cpu_inc(nic->drv_stats->rx_ip_hdr_malformed);
 		break;
 	case CQ_RX_ERROP_IP_MALD:
-		stats->rx_ip_payload_malformed++;
+		this_cpu_inc(nic->drv_stats->rx_ip_payload_malformed);
 		break;
 	case CQ_RX_ERROP_IP_HOP:
-		stats->rx_ip_ttl_errs++;
+		this_cpu_inc(nic->drv_stats->rx_ip_ttl_errs);
 		break;
 	case CQ_RX_ERROP_L3_PCLP:
-		stats->rx_l3_pclp++;
+		this_cpu_inc(nic->drv_stats->rx_l3_pclp);
 		break;
 	case CQ_RX_ERROP_L4_MAL:
-		stats->rx_l4_malformed++;
+		this_cpu_inc(nic->drv_stats->rx_l4_malformed);
 		break;
 	case CQ_RX_ERROP_L4_CHK:
-		stats->rx_l4_csum_errs++;
+		this_cpu_inc(nic->drv_stats->rx_l4_csum_errs);
 		break;
 	case CQ_RX_ERROP_UDP_LEN:
-		stats->rx_udp_len_errs++;
+		this_cpu_inc(nic->drv_stats->rx_udp_len_errs);
 		break;
 	case CQ_RX_ERROP_L4_PORT:
-		stats->rx_l4_port_errs++;
+		this_cpu_inc(nic->drv_stats->rx_l4_port_errs);
 		break;
 	case CQ_RX_ERROP_TCP_FLAG:
-		stats->rx_tcp_flag_errs++;
+		this_cpu_inc(nic->drv_stats->rx_tcp_flag_errs);
 		break;
 	case CQ_RX_ERROP_TCP_OFFSET:
-		stats->rx_tcp_offset_errs++;
+		this_cpu_inc(nic->drv_stats->rx_tcp_offset_errs);
 		break;
 	case CQ_RX_ERROP_L4_PCLP:
-		stats->rx_l4_pclp++;
+		this_cpu_inc(nic->drv_stats->rx_l4_pclp);
 		break;
 	case CQ_RX_ERROP_RBDR_TRUNC:
-		stats->rx_truncated_pkts++;
+		this_cpu_inc(nic->drv_stats->rx_truncated_pkts);
 		break;
 	}
 
@@ -1515,56 +1517,52 @@  int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
 }
 
 /* Check for errors in the send cmp.queue entry */
-int nicvf_check_cqe_tx_errs(struct nicvf *nic,
-			    struct cmp_queue *cq, struct cqe_send_t *cqe_tx)
+int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx)
 {
-	struct cmp_queue_stats *stats = &cq->stats;
-
 	switch (cqe_tx->send_status) {
 	case CQ_TX_ERROP_GOOD:
-		stats->tx.good++;
 		return 0;
 	case CQ_TX_ERROP_DESC_FAULT:
-		stats->tx.desc_fault++;
+		this_cpu_inc(nic->drv_stats->tx_desc_fault);
 		break;
 	case CQ_TX_ERROP_HDR_CONS_ERR:
-		stats->tx.hdr_cons_err++;
+		this_cpu_inc(nic->drv_stats->tx_hdr_cons_err);
 		break;
 	case CQ_TX_ERROP_SUBDC_ERR:
-		stats->tx.subdesc_err++;
+		this_cpu_inc(nic->drv_stats->tx_subdesc_err);
 		break;
 	case CQ_TX_ERROP_MAX_SIZE_VIOL:
-		stats->tx.max_size_exceeded++;
+		this_cpu_inc(nic->drv_stats->tx_max_size_exceeded);
 		break;
 	case CQ_TX_ERROP_IMM_SIZE_OFLOW:
-		stats->tx.imm_size_oflow++;
+		this_cpu_inc(nic->drv_stats->tx_imm_size_oflow);
 		break;
 	case CQ_TX_ERROP_DATA_SEQUENCE_ERR:
-		stats->tx.data_seq_err++;
+		this_cpu_inc(nic->drv_stats->tx_data_seq_err);
 		break;
 	case CQ_TX_ERROP_MEM_SEQUENCE_ERR:
-		stats->tx.mem_seq_err++;
+		this_cpu_inc(nic->drv_stats->tx_mem_seq_err);
 		break;
 	case CQ_TX_ERROP_LOCK_VIOL:
-		stats->tx.lock_viol++;
+		this_cpu_inc(nic->drv_stats->tx_lock_viol);
 		break;
 	case CQ_TX_ERROP_DATA_FAULT:
-		stats->tx.data_fault++;
+		this_cpu_inc(nic->drv_stats->tx_data_fault);
 		break;
 	case CQ_TX_ERROP_TSTMP_CONFLICT:
-		stats->tx.tstmp_conflict++;
+		this_cpu_inc(nic->drv_stats->tx_tstmp_conflict);
 		break;
 	case CQ_TX_ERROP_TSTMP_TIMEOUT:
-		stats->tx.tstmp_timeout++;
+		this_cpu_inc(nic->drv_stats->tx_tstmp_timeout);
 		break;
 	case CQ_TX_ERROP_MEM_FAULT:
-		stats->tx.mem_fault++;
+		this_cpu_inc(nic->drv_stats->tx_mem_fault);
 		break;
 	case CQ_TX_ERROP_CK_OVERLAP:
-		stats->tx.csum_overlap++;
+		this_cpu_inc(nic->drv_stats->tx_csum_overlap);
 		break;
 	case CQ_TX_ERROP_CK_OFLOW:
-		stats->tx.csum_overflow++;
+		this_cpu_inc(nic->drv_stats->tx_csum_overflow);
 		break;
 	}
 
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 8f4718e..2e3c940 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -172,26 +172,6 @@  enum CQ_TX_ERROP_E {
 	CQ_TX_ERROP_ENUM_LAST = 0x8a,
 };
 
-struct cmp_queue_stats {
-	struct tx_stats {
-		u64 good;
-		u64 desc_fault;
-		u64 hdr_cons_err;
-		u64 subdesc_err;
-		u64 max_size_exceeded;
-		u64 imm_size_oflow;
-		u64 data_seq_err;
-		u64 mem_seq_err;
-		u64 lock_viol;
-		u64 data_fault;
-		u64 tstmp_conflict;
-		u64 tstmp_timeout;
-		u64 mem_fault;
-		u64 csum_overlap;
-		u64 csum_overflow;
-	} tx;
-} ____cacheline_aligned_in_smp;
-
 enum RQ_SQ_STATS {
 	RQ_SQ_STATS_OCTS,
 	RQ_SQ_STATS_PKTS,
@@ -243,7 +223,6 @@  struct cmp_queue {
 	spinlock_t	lock;  /* lock to serialize processing CQEs */
 	void		*desc;
 	struct q_desc_mem   dmem;
-	struct cmp_queue_stats	stats;
 	int		irq;
 } ____cacheline_aligned_in_smp;
 
@@ -338,6 +317,5 @@  u64  nicvf_queue_reg_read(struct nicvf *nic,
 void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx);
 void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx);
 int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
-int nicvf_check_cqe_tx_errs(struct nicvf *nic,
-			    struct cmp_queue *cq, struct cqe_send_t *cqe_tx);
+int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx);
 #endif /* NICVF_QUEUES_H */