diff mbox series

[rdma-next,1/9] RDMA/bnxt_re: Add support for collecting the Queue dumps

Message ID 1740076496-14227-2-git-send-email-selvin.xavier@broadcom.com (mailing list archive)
State Not Applicable
Headers show
Series RDMA/bnxt_re: Driver Debug Enhancements | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

Selvin Xavier Feb. 20, 2025, 6:34 p.m. UTC
From: Saravanan Vajravel <saravanan.vajravel@broadcom.com>

As part of enhancing the debug data collection, allocate
few data structures to hold the resources after the queues
are destroyed.

Initialize the data structures to capture the data. By default,
driver will cache the info of the QPs that are in error state.
The dump levels can be changed from debugfs hook in a later
patch. Driver caches the info of the last 1024 entries only.

Signed-off-by: Saravanan Vajravel <saravanan.vajravel@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxt_re/bnxt_re.h | 64 +++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxt_re/main.c    | 18 ++++++++++
 2 files changed, 82 insertions(+)
diff mbox series

Patch

diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
index b33b04e..5818db1 100644
--- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h
@@ -173,6 +173,67 @@  static inline bool bnxt_re_chip_gen_p7(u16 chip_num)
 		chip_num == CHIP_NUM_57608);
 }
 
+#define BNXT_RE_MAX_QDUMP_ENTRIES 1024
+
+struct qdump_qpinfo {
+	u32 id;
+	u32 dest_qpid;
+	u64 qp_handle;
+	u32 mtu;
+	u8  type;
+	u8  wqe_mode;
+	u8  state;
+	u8  is_user;
+	u64 scq_handle;
+	u64 rcq_handle;
+	u32 scq_id;
+	u32 rcq_id;
+};
+
+struct qdump_mrinfo {
+	int type;
+	u32 lkey;
+	u32 rkey;
+	u64 total_size;
+	u64 mr_handle;
+};
+
+struct qdump_element {
+	struct bnxt_qplib_pbl pbl[PBL_LVL_MAX];
+	enum bnxt_qplib_pbl_lvl level;
+	struct bnxt_qplib_hwq *hwq;
+	struct bnxt_re_dev *rdev;
+	struct ib_umem *umem;
+	bool is_user_qp;
+	char des[32];
+	char *buf;
+	size_t len;
+	u16 stride;
+	u32 prod;
+	u32 cons;
+};
+
+struct qdump_array {
+	struct qdump_qpinfo qpinfo;
+	struct qdump_mrinfo mrinfo;
+	bool valid;
+	bool is_mr;
+};
+
+struct bnxt_re_qdump_head {
+	struct qdump_array *qdump;
+	u32 max_elements;
+	struct mutex lock; /* lock qdump array elements */
+	u32 index;
+};
+
+enum {
+	BNXT_RE_SNAPDUMP_NONE = 0,
+	BNXT_RE_SNAPDUMP_ERR,
+	/* Add new entry before this */
+	BNXT_RE_SNAPDUMP_ALL
+};
+
 struct bnxt_re_dev {
 	struct ib_device		ibdev;
 	struct list_head		list;
@@ -232,6 +293,9 @@  struct bnxt_re_dev {
 	unsigned long			event_bitmap;
 	struct bnxt_qplib_cc_param	cc_param;
 	struct workqueue_struct		*dcb_wq;
+	/* Head to track all QP dump */
+	struct bnxt_re_qdump_head qdump_head;
+	u8 snapdump_dbg_lvl;
 	struct dentry                   *cc_config;
 	struct bnxt_re_dbg_cc_config_params *cc_config_params;
 };
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index e9e4da4..87fdf69 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -2008,6 +2008,11 @@  static void bnxt_re_free_nqr_mem(struct bnxt_re_dev *rdev)
 	rdev->nqr = NULL;
 }
 
+static void bnxt_re_clean_qdump(struct bnxt_re_dev *rdev)
+{
+	vfree(rdev->qdump_head.qdump);
+}
+
 static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
 {
 	u8 type;
@@ -2018,6 +2023,7 @@  static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev, u8 op_type)
 	bnxt_re_net_unregister_async_event(rdev);
 	bnxt_re_uninit_dcb_wq(rdev);
 
+	bnxt_re_clean_qdump(rdev);
 	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
 		cancel_delayed_work_sync(&rdev->worker);
 
@@ -2063,6 +2069,16 @@  static void bnxt_re_worker(struct work_struct *work)
 	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
 }
 
+static void bnxt_re_init_qdump(struct bnxt_re_dev *rdev)
+{
+	rdev->qdump_head.max_elements = BNXT_RE_MAX_QDUMP_ENTRIES;
+	rdev->qdump_head.index = 0;
+	rdev->snapdump_dbg_lvl = BNXT_RE_SNAPDUMP_ERR;
+	mutex_init(&rdev->qdump_head.lock);
+	rdev->qdump_head.qdump = vzalloc(rdev->qdump_head.max_elements *
+					 sizeof(struct qdump_array));
+}
+
 static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
 {
 	struct bnxt_re_ring_attr rattr = {};
@@ -2235,6 +2251,8 @@  static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 op_type)
 		hash_init(rdev->srq_hash);
 
 	bnxt_re_debugfs_add_pdev(rdev);
+	if (bnxt_qplib_is_chip_gen_p5_p7(rdev->chip_ctx))
+		bnxt_re_init_qdump(rdev);
 
 	bnxt_re_init_dcb_wq(rdev);
 	bnxt_re_net_register_async_event(rdev);