diff mbox series

[net-next,10/15] net/mlx5: Add support for sync reset using hot reset

Message ID 20240911201757.1505453-11-saeed@kernel.org (mailing list archive)
State Accepted
Commit 57502f62678ced0149d415324931bde37b42885a
Delegated to: Netdev Maintainers
Headers show
Series [net-next,01/15] net/mlx5: HWS, updated API functions comments to kernel doc | expand

Checks

Context Check Description
netdev/series_format success Pull request is its own cover letter
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 16 this patch: 16
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 4 of 4 maintainers
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 16 this patch: 16
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-09-12--00-00 (tests: 764)

Commit Message

Saeed Mahameed Sept. 11, 2024, 8:17 p.m. UTC
From: Moshe Shemesh <moshe@nvidia.com>

On device that supports sync reset for firmware activate using hot
reset, the driver queries the required reset method while handling the
sync reset request. If the required reset method is hot reset, the
driver will use pci_reset_bus() to reset the PCI link instead of the
link toggle.

Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/fw_reset.c    | 82 +++++++++++++++----
 .../net/ethernet/mellanox/mlx5/core/main.c    |  3 +
 2 files changed, 69 insertions(+), 16 deletions(-)

Comments

Jacob Keller Sept. 12, 2024, 7:12 p.m. UTC | #1
On 9/11/2024 1:17 PM, Saeed Mahameed wrote:
> From: Moshe Shemesh <moshe@nvidia.com>
> 
> On device that supports sync reset for firmware activate using hot
> reset, the driver queries the required reset method while handling the
> sync reset request. If the required reset method is hot reset, the
> driver will use pci_reset_bus() to reset the PCI link instead of the
> link toggle.
> 
> Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> ---
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
index b43ca0b762c3..bda74cb9c975 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c
@@ -26,6 +26,7 @@  struct mlx5_fw_reset {
 	struct work_struct reset_now_work;
 	struct work_struct reset_abort_work;
 	unsigned long reset_flags;
+	u8 reset_method;
 	struct timer_list timer;
 	struct completion done;
 	int ret;
@@ -95,7 +96,7 @@  static int mlx5_reg_mfrl_set(struct mlx5_core_dev *dev, u8 reset_level,
 }
 
 static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
-			       u8 *reset_type, u8 *reset_state)
+			       u8 *reset_type, u8 *reset_state, u8 *reset_method)
 {
 	u32 out[MLX5_ST_SZ_DW(mfrl_reg)] = {};
 	u32 in[MLX5_ST_SZ_DW(mfrl_reg)] = {};
@@ -111,13 +112,26 @@  static int mlx5_reg_mfrl_query(struct mlx5_core_dev *dev, u8 *reset_level,
 		*reset_type = MLX5_GET(mfrl_reg, out, reset_type);
 	if (reset_state)
 		*reset_state = MLX5_GET(mfrl_reg, out, reset_state);
+	if (reset_method)
+		*reset_method = MLX5_GET(mfrl_reg, out, pci_reset_req_method);
 
 	return 0;
 }
 
 int mlx5_fw_reset_query(struct mlx5_core_dev *dev, u8 *reset_level, u8 *reset_type)
 {
-	return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL);
+	return mlx5_reg_mfrl_query(dev, reset_level, reset_type, NULL, NULL);
+}
+
+static int mlx5_fw_reset_get_reset_method(struct mlx5_core_dev *dev,
+					  u8 *reset_method)
+{
+	if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method)) {
+		*reset_method = MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE;
+		return 0;
+	}
+
+	return mlx5_reg_mfrl_query(dev, NULL, NULL, NULL, reset_method);
 }
 
 static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
@@ -125,7 +139,7 @@  static int mlx5_fw_reset_get_reset_state_err(struct mlx5_core_dev *dev,
 {
 	u8 reset_state;
 
-	if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state))
+	if (mlx5_reg_mfrl_query(dev, NULL, NULL, &reset_state, NULL))
 		goto out;
 
 	if (!reset_state)
@@ -427,7 +441,11 @@  static void mlx5_sync_reset_request_event(struct work_struct *work)
 	struct mlx5_core_dev *dev = fw_reset->dev;
 	int err;
 
-	if (test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
+	err = mlx5_fw_reset_get_reset_method(dev, &fw_reset->reset_method);
+	if (err)
+		mlx5_core_warn(dev, "Failed reading MFRL, err %d\n", err);
+
+	if (err || test_bit(MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, &fw_reset->reset_flags) ||
 	    !mlx5_is_reset_now_capable(dev)) {
 		err = mlx5_fw_reset_set_reset_sync_nack(dev);
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Nack %s",
@@ -444,21 +462,15 @@  static void mlx5_sync_reset_request_event(struct work_struct *work)
 		mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack. Device reset is expected.\n");
 }
 
-static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
+static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev, u16 dev_id)
 {
 	struct pci_bus *bridge_bus = dev->pdev->bus;
 	struct pci_dev *bridge = bridge_bus->self;
 	unsigned long timeout;
 	struct pci_dev *sdev;
-	u16 reg16, dev_id;
 	int cap, err;
+	u16 reg16;
 
-	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
-	if (err)
-		return pcibios_err_to_errno(err);
-	err = mlx5_check_dev_ids(dev, dev_id);
-	if (err)
-		return err;
 	cap = pci_find_capability(bridge, PCI_CAP_ID_EXP);
 	if (!cap)
 		return -EOPNOTSUPP;
@@ -528,6 +540,44 @@  static int mlx5_pci_link_toggle(struct mlx5_core_dev *dev)
 	return err;
 }
 
+static int mlx5_pci_reset_bus(struct mlx5_core_dev *dev)
+{
+	if (!MLX5_CAP_GEN(dev, pcie_reset_using_hotreset_method))
+		return -EOPNOTSUPP;
+
+	return pci_reset_bus(dev->pdev);
+}
+
+static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method)
+{
+	u16 dev_id;
+	int err;
+
+	err = pci_read_config_word(dev->pdev, PCI_DEVICE_ID, &dev_id);
+	if (err)
+		return pcibios_err_to_errno(err);
+	err = mlx5_check_dev_ids(dev, dev_id);
+	if (err)
+		return err;
+
+	switch (reset_method) {
+	case MLX5_MFRL_REG_PCI_RESET_METHOD_LINK_TOGGLE:
+		err = mlx5_pci_link_toggle(dev, dev_id);
+		if (err)
+			mlx5_core_warn(dev, "mlx5_pci_link_toggle failed\n");
+		break;
+	case MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET:
+		err = mlx5_pci_reset_bus(dev);
+		if (err)
+			mlx5_core_warn(dev, "mlx5_pci_reset_bus failed\n");
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
 static void mlx5_sync_reset_now_event(struct work_struct *work)
 {
 	struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset,
@@ -546,9 +596,9 @@  static void mlx5_sync_reset_now_event(struct work_struct *work)
 		goto done;
 	}
 
-	err = mlx5_pci_link_toggle(dev);
+	err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
 	if (err) {
-		mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
+		mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, no reset done, err %d\n", err);
 		set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
 	}
 
@@ -610,9 +660,9 @@  static void mlx5_sync_reset_unload_event(struct work_struct *work)
 
 	mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state);
 	if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) {
-		err = mlx5_pci_link_toggle(dev);
+		err = mlx5_sync_pci_reset(dev, fw_reset->reset_method);
 		if (err) {
-			mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, err %d\n", err);
+			mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err);
 			fw_reset->ret = err;
 		}
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 4b88d969a66c..285e8fa1b7b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -619,6 +619,9 @@  static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 	if (MLX5_CAP_GEN_MAX(dev, pci_sync_for_fw_update_with_driver_unload))
 		MLX5_SET(cmd_hca_cap, set_hca_cap,
 			 pci_sync_for_fw_update_with_driver_unload, 1);
+	if (MLX5_CAP_GEN_MAX(dev, pcie_reset_using_hotreset_method))
+		MLX5_SET(cmd_hca_cap, set_hca_cap,
+			 pcie_reset_using_hotreset_method, 1);
 
 	if (MLX5_CAP_GEN_MAX(dev, num_vhca_ports))
 		MLX5_SET(cmd_hca_cap,