diff mbox series

[net] octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT]

Message ID 20221102031113.1919861-1-rkannoth@marvell.com (mailing list archive)
State Accepted
Commit 51afe9026d0c63263abe9840e629f118d7405b36
Delegated to: Netdev Maintainers
Headers show
Series [net] octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT] | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net, async
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers fail 2 blamed authors not CCed: amakarov@marvell.com gakula@marvell.com; 4 maintainers not CCed: sbhatta@marvell.com hkelam@marvell.com gakula@marvell.com amakarov@marvell.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 14 this patch: 14
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Ratheesh Kannoth Nov. 2, 2022, 3:11 a.m. UTC
In scenarios where multiple errors have occurred
for a SQ before SW starts handling error interrupt,
SQ_CTX[OP_INT] may get overwritten leading to
NIX_LF_SQ_OP_INT returning incorrect value.
To workaround this read LMT, MNQ and SQ individual
error status registers to determine the cause of error.

Fixes: 4ff7d1488a84 ("octeontx2-pf: Error handling support")
Signed-off-by: Ratheesh Kannoth <rkannoth@marvell.com>
Reviewed-by: Sunil Kovvuri Goutham <sgoutham@marvell.com>
---
 .../ethernet/marvell/octeontx2/nic/otx2_pf.c  | 135 ++++++++++++++----
 .../marvell/octeontx2/nic/otx2_struct.h       |  57 ++++++++
 2 files changed, 162 insertions(+), 30 deletions(-)

Comments

patchwork-bot+netdevbpf@kernel.org Nov. 4, 2022, 11 a.m. UTC | #1
Hello:

This patch was applied to netdev/net.git (master)
by David S. Miller <davem@davemloft.net>:

On Wed, 2 Nov 2022 08:41:13 +0530 you wrote:
> In scenarios where multiple errors have occurred
> for a SQ before SW starts handling error interrupt,
> SQ_CTX[OP_INT] may get overwritten leading to
> NIX_LF_SQ_OP_INT returning incorrect value.
> To workaround this read LMT, MNQ and SQ individual
> error status registers to determine the cause of error.
> 
> [...]

Here is the summary with links:
  - [net] octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT]
    https://git.kernel.org/netdev/net/c/51afe9026d0c

You are awesome, thank you!
diff mbox series

Patch

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 892ca88e0cf4..303930499a4c 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -15,6 +15,7 @@ 
 #include <net/ip.h>
 #include <linux/bpf.h>
 #include <linux/bpf_trace.h>
+#include <linux/bitfield.h>
 
 #include "otx2_reg.h"
 #include "otx2_common.h"
@@ -1171,6 +1172,59 @@  int otx2_set_real_num_queues(struct net_device *netdev,
 }
 EXPORT_SYMBOL(otx2_set_real_num_queues);
 
+static char *nix_sqoperr_e_str[NIX_SQOPERR_MAX] = {
+	"NIX_SQOPERR_OOR",
+	"NIX_SQOPERR_CTX_FAULT",
+	"NIX_SQOPERR_CTX_POISON",
+	"NIX_SQOPERR_DISABLED",
+	"NIX_SQOPERR_SIZE_ERR",
+	"NIX_SQOPERR_OFLOW",
+	"NIX_SQOPERR_SQB_NULL",
+	"NIX_SQOPERR_SQB_FAULT",
+	"NIX_SQOPERR_SQE_SZ_ZERO",
+};
+
+static char *nix_mnqerr_e_str[NIX_MNQERR_MAX] = {
+	"NIX_MNQERR_SQ_CTX_FAULT",
+	"NIX_MNQERR_SQ_CTX_POISON",
+	"NIX_MNQERR_SQB_FAULT",
+	"NIX_MNQERR_SQB_POISON",
+	"NIX_MNQERR_TOTAL_ERR",
+	"NIX_MNQERR_LSO_ERR",
+	"NIX_MNQERR_CQ_QUERY_ERR",
+	"NIX_MNQERR_MAX_SQE_SIZE_ERR",
+	"NIX_MNQERR_MAXLEN_ERR",
+	"NIX_MNQERR_SQE_SIZEM1_ZERO",
+};
+
+static char *nix_snd_status_e_str[NIX_SND_STATUS_MAX] =  {
+	"NIX_SND_STATUS_GOOD",
+	"NIX_SND_STATUS_SQ_CTX_FAULT",
+	"NIX_SND_STATUS_SQ_CTX_POISON",
+	"NIX_SND_STATUS_SQB_FAULT",
+	"NIX_SND_STATUS_SQB_POISON",
+	"NIX_SND_STATUS_HDR_ERR",
+	"NIX_SND_STATUS_EXT_ERR",
+	"NIX_SND_STATUS_JUMP_FAULT",
+	"NIX_SND_STATUS_JUMP_POISON",
+	"NIX_SND_STATUS_CRC_ERR",
+	"NIX_SND_STATUS_IMM_ERR",
+	"NIX_SND_STATUS_SG_ERR",
+	"NIX_SND_STATUS_MEM_ERR",
+	"NIX_SND_STATUS_INVALID_SUBDC",
+	"NIX_SND_STATUS_SUBDC_ORDER_ERR",
+	"NIX_SND_STATUS_DATA_FAULT",
+	"NIX_SND_STATUS_DATA_POISON",
+	"NIX_SND_STATUS_NPC_DROP_ACTION",
+	"NIX_SND_STATUS_LOCK_VIOL",
+	"NIX_SND_STATUS_NPC_UCAST_CHAN_ERR",
+	"NIX_SND_STATUS_NPC_MCAST_CHAN_ERR",
+	"NIX_SND_STATUS_NPC_MCAST_ABORT",
+	"NIX_SND_STATUS_NPC_VTAG_PTR_ERR",
+	"NIX_SND_STATUS_NPC_VTAG_SIZE_ERR",
+	"NIX_SND_STATUS_SEND_STATS_ERR",
+};
+
 static irqreturn_t otx2_q_intr_handler(int irq, void *data)
 {
 	struct otx2_nic *pf = data;
@@ -1204,46 +1258,67 @@  static irqreturn_t otx2_q_intr_handler(int irq, void *data)
 
 	/* SQ */
 	for (qidx = 0; qidx < pf->hw.tot_tx_queues; qidx++) {
+		u64 sq_op_err_dbg, mnq_err_dbg, snd_err_dbg;
+		u8 sq_op_err_code, mnq_err_code, snd_err_code;
+
+		/* Below debug registers captures first errors corresponding to
+		 * those registers. We don't have to check against SQ qid as
+		 * these are fatal errors.
+		 */
+
 		ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT);
 		val = otx2_atomic64_add((qidx << 44), ptr);
 		otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) |
 			     (val & NIX_SQINT_BITS));
 
-		if (!(val & (NIX_SQINT_BITS | BIT_ULL(42))))
-			continue;
-
 		if (val & BIT_ULL(42)) {
 			netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
 				   qidx, otx2_read64(pf, NIX_LF_ERR_INT));
-		} else {
-			if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) {
-				netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx",
-					   qidx,
-					   otx2_read64(pf,
-						       NIX_LF_SQ_OP_ERR_DBG));
-				otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG,
-					     BIT_ULL(44));
-			}
-			if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) {
-				netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n",
-					   qidx,
-					   otx2_read64(pf, NIX_LF_MNQ_ERR_DBG));
-				otx2_write64(pf, NIX_LF_MNQ_ERR_DBG,
-					     BIT_ULL(44));
-			}
-			if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) {
-				netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx",
-					   qidx,
-					   otx2_read64(pf,
-						       NIX_LF_SEND_ERR_DBG));
-				otx2_write64(pf, NIX_LF_SEND_ERR_DBG,
-					     BIT_ULL(44));
-			}
-			if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
-				netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
-					   qidx);
+			goto done;
 		}
 
+		sq_op_err_dbg = otx2_read64(pf, NIX_LF_SQ_OP_ERR_DBG);
+		if (!(sq_op_err_dbg & BIT(44)))
+			goto chk_mnq_err_dbg;
+
+		sq_op_err_code = FIELD_GET(GENMASK(7, 0), sq_op_err_dbg);
+		netdev_err(pf->netdev, "SQ%lld: NIX_LF_SQ_OP_ERR_DBG(%llx)  err=%s\n",
+			   qidx, sq_op_err_dbg, nix_sqoperr_e_str[sq_op_err_code]);
+
+		otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG, BIT_ULL(44));
+
+		if (sq_op_err_code == NIX_SQOPERR_SQB_NULL)
+			goto chk_mnq_err_dbg;
+
+		/* Err is not NIX_SQOPERR_SQB_NULL, call aq function to read SQ structure.
+		 * TODO: But we are in irq context. How to call mbox functions which does sleep
+		 */
+
+chk_mnq_err_dbg:
+		mnq_err_dbg = otx2_read64(pf, NIX_LF_MNQ_ERR_DBG);
+		if (!(mnq_err_dbg & BIT(44)))
+			goto chk_snd_err_dbg;
+
+		mnq_err_code = FIELD_GET(GENMASK(7, 0), mnq_err_dbg);
+		netdev_err(pf->netdev, "SQ%lld: NIX_LF_MNQ_ERR_DBG(%llx)  err=%s\n",
+			   qidx, mnq_err_dbg,  nix_mnqerr_e_str[mnq_err_code]);
+		otx2_write64(pf, NIX_LF_MNQ_ERR_DBG, BIT_ULL(44));
+
+chk_snd_err_dbg:
+		snd_err_dbg = otx2_read64(pf, NIX_LF_SEND_ERR_DBG);
+		if (snd_err_dbg & BIT(44)) {
+			snd_err_code = FIELD_GET(GENMASK(7, 0), snd_err_dbg);
+			netdev_err(pf->netdev, "SQ%lld: NIX_LF_SND_ERR_DBG:0x%llx err=%s\n",
+				   qidx, snd_err_dbg, nix_snd_status_e_str[snd_err_code]);
+			otx2_write64(pf, NIX_LF_SEND_ERR_DBG, BIT_ULL(44));
+		}
+
+done:
+		/* Print values and reset */
+		if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
+			netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
+				   qidx);
+
 		schedule_work(&pf->reset_task);
 	}
 
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
index aa205a0d158f..fa37b9f312ca 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h
@@ -281,4 +281,61 @@  enum nix_sqint_e {
 			BIT_ULL(NIX_SQINT_SEND_ERR) | \
 			BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
 
+enum nix_sqoperr_e {
+	NIX_SQOPERR_OOR = 0,
+	NIX_SQOPERR_CTX_FAULT = 1,
+	NIX_SQOPERR_CTX_POISON = 2,
+	NIX_SQOPERR_DISABLED = 3,
+	NIX_SQOPERR_SIZE_ERR = 4,
+	NIX_SQOPERR_OFLOW = 5,
+	NIX_SQOPERR_SQB_NULL = 6,
+	NIX_SQOPERR_SQB_FAULT = 7,
+	NIX_SQOPERR_SQE_SZ_ZERO = 8,
+	NIX_SQOPERR_MAX,
+};
+
+enum nix_mnqerr_e {
+	NIX_MNQERR_SQ_CTX_FAULT = 0,
+	NIX_MNQERR_SQ_CTX_POISON = 1,
+	NIX_MNQERR_SQB_FAULT = 2,
+	NIX_MNQERR_SQB_POISON = 3,
+	NIX_MNQERR_TOTAL_ERR = 4,
+	NIX_MNQERR_LSO_ERR = 5,
+	NIX_MNQERR_CQ_QUERY_ERR = 6,
+	NIX_MNQERR_MAX_SQE_SIZE_ERR = 7,
+	NIX_MNQERR_MAXLEN_ERR = 8,
+	NIX_MNQERR_SQE_SIZEM1_ZERO = 9,
+	NIX_MNQERR_MAX,
+};
+
+enum nix_snd_status_e {
+	NIX_SND_STATUS_GOOD = 0x0,
+	NIX_SND_STATUS_SQ_CTX_FAULT = 0x1,
+	NIX_SND_STATUS_SQ_CTX_POISON = 0x2,
+	NIX_SND_STATUS_SQB_FAULT = 0x3,
+	NIX_SND_STATUS_SQB_POISON = 0x4,
+	NIX_SND_STATUS_HDR_ERR = 0x5,
+	NIX_SND_STATUS_EXT_ERR = 0x6,
+	NIX_SND_STATUS_JUMP_FAULT = 0x7,
+	NIX_SND_STATUS_JUMP_POISON = 0x8,
+	NIX_SND_STATUS_CRC_ERR = 0x9,
+	NIX_SND_STATUS_IMM_ERR = 0x10,
+	NIX_SND_STATUS_SG_ERR = 0x11,
+	NIX_SND_STATUS_MEM_ERR = 0x12,
+	NIX_SND_STATUS_INVALID_SUBDC = 0x13,
+	NIX_SND_STATUS_SUBDC_ORDER_ERR = 0x14,
+	NIX_SND_STATUS_DATA_FAULT = 0x15,
+	NIX_SND_STATUS_DATA_POISON = 0x16,
+	NIX_SND_STATUS_NPC_DROP_ACTION = 0x17,
+	NIX_SND_STATUS_LOCK_VIOL = 0x18,
+	NIX_SND_STATUS_NPC_UCAST_CHAN_ERR = 0x19,
+	NIX_SND_STATUS_NPC_MCAST_CHAN_ERR = 0x20,
+	NIX_SND_STATUS_NPC_MCAST_ABORT = 0x21,
+	NIX_SND_STATUS_NPC_VTAG_PTR_ERR = 0x22,
+	NIX_SND_STATUS_NPC_VTAG_SIZE_ERR = 0x23,
+	NIX_SND_STATUS_SEND_MEM_FAULT = 0x24,
+	NIX_SND_STATUS_SEND_STATS_ERR = 0x25,
+	NIX_SND_STATUS_MAX,
+};
+
 #endif /* OTX2_STRUCT_H */