diff mbox series

[net-next,05/14] net/mlx5: Implement devlink total_vfs parameter

Message ID 20250228021227.871993-6-saeed@kernel.org (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series devlink, mlx5: Add new parameters for link management and SRIOV/eSwitch configurations | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl fail Generated files up to date; build failed; build has 10 warnings/errors; GEN HAS DIFF 2 files changed, 12664 deletions(-);
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 4 maintainers not CCed: linux-doc@vger.kernel.org andrew+netdev@lunn.ch horms@kernel.org corbet@lwn.net
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Saeed Mahameed Feb. 28, 2025, 2:12 a.m. UTC
From: Vlad Dumitrescu <vdumitrescu@nvidia.com>

Some devices support both symmetric (same value for all PFs) and
asymmetric, while others only support symmetric configuration. This
implementation prefers asymmetric, since it is closer to the devlink
model (per function settings), but falls back to symmetric when needed.

Example usage:
  devlink dev param set pci/0000:01:00.0 name total_vfs value <u16> cmode permanent
  devlink dev reload pci/0000:01:00.0 action fw_activate
  echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
  echo 1 >/sys/bus/pci/rescan
  cat /sys/bus/pci/devices/0000:01:00.0/sriov_totalvfs

Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
---
 Documentation/networking/devlink/mlx5.rst     |  22 +++
 .../mellanox/mlx5/core/lib/nv_param.c         | 125 ++++++++++++++++++
 2 files changed, 147 insertions(+)

Comments

Kamal Heib March 4, 2025, 4:45 p.m. UTC | #1
On Thu, Feb 27, 2025 at 06:12:18PM -0800, Saeed Mahameed wrote:
> From: Vlad Dumitrescu <vdumitrescu@nvidia.com>
> 
> Some devices support both symmetric (same value for all PFs) and
> asymmetric, while others only support symmetric configuration. This
> implementation prefers asymmetric, since it is closer to the devlink
> model (per function settings), but falls back to symmetric when needed.
> 
> Example usage:
>   devlink dev param set pci/0000:01:00.0 name total_vfs value <u16> cmode permanent
>   devlink dev reload pci/0000:01:00.0 action fw_activate
>   echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
>   echo 1 >/sys/bus/pci/rescan
>   cat /sys/bus/pci/devices/0000:01:00.0/sriov_totalvfs
> 
> Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
> Reviewed-by: Jiri Pirko <jiri@nvidia.com>

Tested-by: Kamal Heib <kheib@redhat.com>

> ---
>  Documentation/networking/devlink/mlx5.rst     |  22 +++
>  .../mellanox/mlx5/core/lib/nv_param.c         | 125 ++++++++++++++++++
>  2 files changed, 147 insertions(+)
> 
> diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
> index 587e0200c1cd..00a43324dec2 100644
> --- a/Documentation/networking/devlink/mlx5.rst
> +++ b/Documentation/networking/devlink/mlx5.rst
> @@ -40,6 +40,28 @@ Parameters
>       - Boolean
>       - Applies to each physical function (PF) independently, if the device
>         supports it. Otherwise, it applies symmetrically to all PFs.
> +   * - ``total_vfs``
> +     - permanent
> +     - The range is between 1 and a device-specific max.
> +     - Applies to each physical function (PF) independently, if the device
> +       supports it. Otherwise, it applies symmetrically to all PFs.
> +
> +Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect
> +
> +.. code-block:: bash
> +
> +   # setup parameters
> +   devlink dev param set pci/0000:01:00.0 name enable_sriov value true cmode permanent
> +   devlink dev param set pci/0000:01:00.0 name total_vfs value 8 cmode permanent
> +
> +   # Fw reset
> +   devlink dev reload pci/0000:01:00.0 action fw_activate
> +
> +   # for PCI related config such as sriov PCI reset/rescan is required:
> +   echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
> +   echo 1 >/sys/bus/pci/rescan
> +   grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
> +
>  
>  The ``mlx5`` driver also implements the following driver-specific
>  parameters.
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
> index 6b63fc110e2d..97d74d890582 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
> @@ -387,10 +387,135 @@ static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
>  	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>  }
>  
> +static int mlx5_devlink_total_vfs_get(struct devlink *devlink, u32 id,
> +				      struct devlink_param_gset_ctx *ctx)
> +{
> +	struct mlx5_core_dev *dev = devlink_priv(devlink);
> +	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
> +	void *data;
> +	int err;
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +
> +	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
> +		ctx->val.vu32 = 0;
> +		return 0;
> +	}
> +
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
> +		ctx->val.vu32 = MLX5_GET(nv_global_pci_conf, data, total_vfs);
> +		return 0;
> +	}
> +
> +	/* SRIOV is per PF */
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_vf);
> +
> +	return 0;
> +}
> +
> +static int mlx5_devlink_total_vfs_set(struct devlink *devlink, u32 id,
> +				      struct devlink_param_gset_ctx *ctx,
> +				      struct netlink_ext_ack *extack)
> +{
> +	struct mlx5_core_dev *dev = devlink_priv(devlink);
> +	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)];
> +	bool per_pf_support;
> +	void *data;
> +	int err;
> +
> +	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed to read global pci cap");
> +		return err;
> +	}
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
> +		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	per_pf_support = MLX5_GET(nv_global_pci_cap, data, per_pf_total_vf_supported);
> +	memset(mnvda, 0, sizeof(mnvda));
> +
> +	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
> +	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
> +
> +	if (!per_pf_support) {
> +		MLX5_SET(nv_global_pci_conf, data, total_vfs, ctx->val.vu32);
> +		return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> +	}
> +
> +	/* SRIOV is per PF */
> +	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, 1);
> +	MLX5_SET(nv_pf_pci_conf, data, total_vf, ctx->val.vu32);
> +	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> +}
> +
> +static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id,
> +					   union devlink_param_value val,
> +					   struct netlink_ext_ack *extack)
> +{
> +	struct mlx5_core_dev *dev = devlink_priv(devlink);
> +	u32 cap[MLX5_ST_SZ_DW(mnvda_reg)];
> +	void *data;
> +	u16 max;
> +	int err;
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data);
> +
> +	err = mlx5_nv_param_read_global_pci_cap(dev, cap, sizeof(cap));
> +	if (err)
> +		return err;
> +
> +	if (!MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf_valid))
> +		return 0; /* optimistic, but set might fail later */
> +
> +	max = MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf);
> +	if (val.vu16 > max) {
> +		NL_SET_ERR_MSG_FMT_MOD(extack,
> +				       "Max allowed by device is %u", max);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
>  static const struct devlink_param mlx5_nv_param_devlink_params[] = {
>  	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>  			      mlx5_devlink_enable_sriov_get,
>  			      mlx5_devlink_enable_sriov_set, NULL),
> +	DEVLINK_PARAM_GENERIC(TOTAL_VFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
> +			      mlx5_devlink_total_vfs_get, mlx5_devlink_total_vfs_set,
> +			      mlx5_devlink_total_vfs_validate),
>  	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
>  			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
>  			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
> -- 
> 2.48.1
> 
>
diff mbox series

Patch

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 587e0200c1cd..00a43324dec2 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -40,6 +40,28 @@  Parameters
      - Boolean
      - Applies to each physical function (PF) independently, if the device
        supports it. Otherwise, it applies symmetrically to all PFs.
+   * - ``total_vfs``
+     - permanent
+     - The range is between 1 and a device-specific max.
+     - Applies to each physical function (PF) independently, if the device
+       supports it. Otherwise, it applies symmetrically to all PFs.
+
+Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect
+
+.. code-block:: bash
+
+   # setup parameters
+   devlink dev param set pci/0000:01:00.0 name enable_sriov value true cmode permanent
+   devlink dev param set pci/0000:01:00.0 name total_vfs value 8 cmode permanent
+
+   # Fw reset
+   devlink dev reload pci/0000:01:00.0 action fw_activate
+
+   # for PCI related config such as sriov PCI reset/rescan is required:
+   echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
+   echo 1 >/sys/bus/pci/rescan
+   grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
+
 
 The ``mlx5`` driver also implements the following driver-specific
 parameters.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
index 6b63fc110e2d..97d74d890582 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
@@ -387,10 +387,135 @@  static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
 	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
 }
 
+static int mlx5_devlink_total_vfs_get(struct devlink *devlink, u32 id,
+				      struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
+	void *data;
+	int err;
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+
+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
+		ctx->val.vu32 = 0;
+		return 0;
+	}
+
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
+		ctx->val.vu32 = MLX5_GET(nv_global_pci_conf, data, total_vfs);
+		return 0;
+	}
+
+	/* SRIOV is per PF */
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_vf);
+
+	return 0;
+}
+
+static int mlx5_devlink_total_vfs_set(struct devlink *devlink, u32 id,
+				      struct devlink_param_gset_ctx *ctx,
+				      struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)];
+	bool per_pf_support;
+	void *data;
+	int err;
+
+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to read global pci cap");
+		return err;
+	}
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
+		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
+		return -EOPNOTSUPP;
+	}
+
+	per_pf_support = MLX5_GET(nv_global_pci_cap, data, per_pf_total_vf_supported);
+	memset(mnvda, 0, sizeof(mnvda));
+
+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
+	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
+
+	if (!per_pf_support) {
+		MLX5_SET(nv_global_pci_conf, data, total_vfs, ctx->val.vu32);
+		return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+	}
+
+	/* SRIOV is per PF */
+	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, 1);
+	MLX5_SET(nv_pf_pci_conf, data, total_vf, ctx->val.vu32);
+	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+}
+
+static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id,
+					   union devlink_param_value val,
+					   struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u32 cap[MLX5_ST_SZ_DW(mnvda_reg)];
+	void *data;
+	u16 max;
+	int err;
+
+	data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data);
+
+	err = mlx5_nv_param_read_global_pci_cap(dev, cap, sizeof(cap));
+	if (err)
+		return err;
+
+	if (!MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf_valid))
+		return 0; /* optimistic, but set might fail later */
+
+	max = MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf);
+	if (val.vu16 > max) {
+		NL_SET_ERR_MSG_FMT_MOD(extack,
+				       "Max allowed by device is %u", max);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static const struct devlink_param mlx5_nv_param_devlink_params[] = {
 	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
 			      mlx5_devlink_enable_sriov_get,
 			      mlx5_devlink_enable_sriov_set, NULL),
+	DEVLINK_PARAM_GENERIC(TOTAL_VFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      mlx5_devlink_total_vfs_get, mlx5_devlink_total_vfs_set,
+			      mlx5_devlink_total_vfs_validate),
 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
 			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),