diff mbox series

[net-next,04/14] net/mlx5: Implement devlink enable_sriov parameter

Message ID 20250228021227.871993-5-saeed@kernel.org (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series devlink, mlx5: Add new parameters for link management and SRIOV/eSwitch configurations | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl fail Generated files up to date; build failed; build has 10 warnings/errors; GEN HAS DIFF 2 files changed, 12664 deletions(-);
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 4 maintainers not CCed: linux-doc@vger.kernel.org andrew+netdev@lunn.ch horms@kernel.org corbet@lwn.net
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 83 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Saeed Mahameed Feb. 28, 2025, 2:12 a.m. UTC
From: Vlad Dumitrescu <vdumitrescu@nvidia.com>

Example usage:
  devlink dev param set pci/0000:01:00.0 name enable_sriov value {true, false} cmode permanent
  devlink dev reload pci/0000:01:00.0 action fw_activate
  echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
  echo 1 >/sys/bus/pci/rescan
  grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*

Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 Documentation/networking/devlink/mlx5.rst     |  14 +-
 .../net/ethernet/mellanox/mlx5/core/devlink.c |   1 +
 .../mellanox/mlx5/core/lib/nv_param.c         | 184 ++++++++++++++++++
 3 files changed, 196 insertions(+), 3 deletions(-)

Comments

Jiri Pirko Feb. 28, 2025, 12:46 p.m. UTC | #1
Fri, Feb 28, 2025 at 03:12:17AM +0100, saeed@kernel.org wrote:
>From: Vlad Dumitrescu <vdumitrescu@nvidia.com>
>
>Example usage:
>  devlink dev param set pci/0000:01:00.0 name enable_sriov value {true, false} cmode permanent
>  devlink dev reload pci/0000:01:00.0 action fw_activate
>  echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
>  echo 1 >/sys/bus/pci/rescan
>  grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
>
>Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
>Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
>---
> Documentation/networking/devlink/mlx5.rst     |  14 +-
> .../net/ethernet/mellanox/mlx5/core/devlink.c |   1 +
> .../mellanox/mlx5/core/lib/nv_param.c         | 184 ++++++++++++++++++
> 3 files changed, 196 insertions(+), 3 deletions(-)
>
>diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
>index 417e5cdcd35d..587e0200c1cd 100644
>--- a/Documentation/networking/devlink/mlx5.rst
>+++ b/Documentation/networking/devlink/mlx5.rst
>@@ -15,23 +15,31 @@ Parameters
>    * - Name
>      - Mode
>      - Validation
>+     - Notes
>    * - ``enable_roce``
>      - driverinit
>-     - Type: Boolean
>-
>-       If the device supports RoCE disablement, RoCE enablement state controls
>+     - Boolean
>+     - If the device supports RoCE disablement, RoCE enablement state controls
>        device support for RoCE capability. Otherwise, the control occurs in the
>        driver stack. When RoCE is disabled at the driver level, only raw
>        ethernet QPs are supported.
>    * - ``io_eq_size``
>      - driverinit
>      - The range is between 64 and 4096.
>+     -
>    * - ``event_eq_size``
>      - driverinit
>      - The range is between 64 and 4096.
>+     -
>    * - ``max_macs``
>      - driverinit
>      - The range is between 1 and 2^31. Only power of 2 values are supported.
>+     -
>+   * - ``enable_sriov``
>+     - permanent
>+     - Boolean
>+     - Applies to each physical function (PF) independently, if the device
>+       supports it. Otherwise, it applies symmetrically to all PFs.
> 
> The ``mlx5`` driver also implements the following driver-specific
> parameters.
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>index 1f764ae4f4aa..7a702d84f19a 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>@@ -8,6 +8,7 @@
> #include "fs_core.h"
> #include "eswitch.h"
> #include "esw/qos.h"
>+#include "lib/nv_param.h"
> #include "sf/dev/dev.h"
> #include "sf/sf.h"
> #include "lib/nv_param.h"
>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>index 5ab37a88c260..6b63fc110e2d 100644
>--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>@@ -5,7 +5,11 @@
> #include "mlx5_core.h"
> 
> enum {
>+	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF               = 0x80,
>+	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP                = 0x81,
> 	MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG             = 0x10a,
>+
>+	MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF                   = 0x80,
> };
> 
> struct mlx5_ifc_configuration_item_type_class_global_bits {
>@@ -13,9 +17,18 @@ struct mlx5_ifc_configuration_item_type_class_global_bits {
> 	u8         parameter_index[0x18];
> };
> 
>+struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits {
>+	u8         type_class[0x8];
>+	u8         pf_index[0x6];
>+	u8         pci_bus_index[0x8];
>+	u8         parameter_index[0xa];
>+};
>+
> union mlx5_ifc_config_item_type_auto_bits {
> 	struct mlx5_ifc_configuration_item_type_class_global_bits
> 				configuration_item_type_class_global;
>+	struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits
>+				configuration_item_type_class_per_host_pf;
> 	u8 reserved_at_0[0x20];
> };
> 
>@@ -45,6 +58,45 @@ struct mlx5_ifc_mnvda_reg_bits {
> 	u8         configuration_item_data[64][0x20];
> };
> 
>+struct mlx5_ifc_nv_global_pci_conf_bits {
>+	u8         sriov_valid[0x1];
>+	u8         reserved_at_1[0x10];
>+	u8         per_pf_total_vf[0x1];
>+	u8         reserved_at_12[0xe];
>+
>+	u8         sriov_en[0x1];
>+	u8         reserved_at_21[0xf];
>+	u8         total_vfs[0x10];
>+
>+	u8         reserved_at_40[0x20];
>+};
>+
>+struct mlx5_ifc_nv_global_pci_cap_bits {
>+	u8         max_vfs_per_pf_valid[0x1];
>+	u8         reserved_at_1[0x13];
>+	u8         per_pf_total_vf_supported[0x1];
>+	u8         reserved_at_15[0xb];
>+
>+	u8         sriov_support[0x1];
>+	u8         reserved_at_21[0xf];
>+	u8         max_vfs_per_pf[0x10];
>+
>+	u8         reserved_at_40[0x60];
>+};
>+
>+struct mlx5_ifc_nv_pf_pci_conf_bits {
>+	u8         reserved_at_0[0x9];
>+	u8         pf_total_vf_en[0x1];
>+	u8         reserved_at_a[0x16];
>+
>+	u8         reserved_at_20[0x20];
>+
>+	u8         reserved_at_40[0x10];
>+	u8         total_vf[0x10];
>+
>+	u8         reserved_at_60[0x20];
>+};
>+
> struct mlx5_ifc_nv_sw_offload_conf_bits {
> 	u8         ip_over_vxlan_port[0x10];
> 	u8         tunnel_ecn_copy_offload_disable[0x1];
>@@ -206,7 +258,139 @@ static int mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 i
> 	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> }
> 
>+static int
>+mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>+{
>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
>+				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF);
>+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_conf);
>+
>+	return mlx5_nv_param_read(dev, mnvda, len);
>+}
>+
>+static int
>+mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>+{
>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
>+				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP);
>+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_cap);
>+
>+	return mlx5_nv_param_read(dev, mnvda, len);
>+}
>+
>+static int
>+mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>+{
>+	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3);
>+	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, parameter_index,
>+				  MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF);
>+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_pf_pci_conf);
>+
>+	return mlx5_nv_param_read(dev, mnvda, len);
>+}
>+
>+static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id,
>+					 struct devlink_param_gset_ctx *ctx)
>+{
>+	struct mlx5_core_dev *dev = devlink_priv(devlink);
>+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>+	void *data;
>+	int err;
>+
>+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
>+	if (err)
>+		return err;
>+
>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
>+		ctx->val.vbool = false;
>+		return 0;
>+	}
>+
>+	memset(mnvda, 0, sizeof(mnvda));
>+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
>+	if (err)
>+		return err;
>+
>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
>+		ctx->val.vbool = MLX5_GET(nv_global_pci_conf, data, sriov_en);
>+		return 0;
>+	}
>+
>+	/* SRIOV is per PF */
>+	memset(mnvda, 0, sizeof(mnvda));
>+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
>+	if (err)
>+		return err;
>+
>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+	ctx->val.vbool = MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en);
>+	return 0;
>+}
>+
>+static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
>+					 struct devlink_param_gset_ctx *ctx,
>+					 struct netlink_ext_ack *extack)
>+{
>+	struct mlx5_core_dev *dev = devlink_priv(devlink);
>+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>+	bool per_pf_support;
>+	void *cap, *data;
>+	int err;
>+
>+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
>+	if (err) {
>+		NL_SET_ERR_MSG_MOD(extack, "Failed to read global PCI capability");
>+		return err;
>+	}
>+
>+	cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+	per_pf_support = MLX5_GET(nv_global_pci_cap, cap, per_pf_total_vf_supported);
>+
>+	if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) {
>+		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
>+		return -EOPNOTSUPP;
>+	}
>+
>+	memset(mnvda, 0, sizeof(mnvda));
>+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
>+	if (err) {
>+		NL_SET_ERR_MSG_MOD(extack, "Unable to read global PCI configuration");
>+		return err;
>+	}
>+
>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>+	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
>+	MLX5_SET(nv_global_pci_conf, data, sriov_en, ctx->val.vbool);
>+	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
>+
>+	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>+	if (err) {
>+		NL_SET_ERR_MSG_MOD(extack, "Unable to write global PCI configuration");
>+		return err;
>+	}
>+
>+	if (!per_pf_support)


Hmm, given the discussion we have in parallel about some shared-PF
devlink instance, perhaps it would be good idea to allow only per-pf
configuration here for now and let the "global" per-device configuration
knob to be attached on the shared-PF devlink, when/if it lands. What do
you think?


>+		return 0;
>+
>+	/* SRIOV is per PF */
>+	memset(mnvda, 0, sizeof(mnvda));
>+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
>+	if (err) {
>+		NL_SET_ERR_MSG_MOD(extack, "Unable to read per host PF configuration");
>+		return err;
>+	}
>+	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool);
>+	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>+}
>+
> static const struct devlink_param mlx5_nv_param_devlink_params[] = {
>+	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>+			      mlx5_devlink_enable_sriov_get,
>+			      mlx5_devlink_enable_sriov_set, NULL),
> 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
> 			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
> 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>-- 
>2.48.1
>
>
Saeed Mahameed Feb. 28, 2025, 6:19 p.m. UTC | #2
On 28 Feb 13:46, Jiri Pirko wrote:
>Fri, Feb 28, 2025 at 03:12:17AM +0100, saeed@kernel.org wrote:
>>From: Vlad Dumitrescu <vdumitrescu@nvidia.com>
>>
>>Example usage:
>>  devlink dev param set pci/0000:01:00.0 name enable_sriov value {true, false} cmode permanent
>>  devlink dev reload pci/0000:01:00.0 action fw_activate
>>  echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
>>  echo 1 >/sys/bus/pci/rescan
>>  grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
>>
>>Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
>>Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
>>---
>> Documentation/networking/devlink/mlx5.rst     |  14 +-
>> .../net/ethernet/mellanox/mlx5/core/devlink.c |   1 +
>> .../mellanox/mlx5/core/lib/nv_param.c         | 184 ++++++++++++++++++
>> 3 files changed, 196 insertions(+), 3 deletions(-)
>>
>>diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
>>index 417e5cdcd35d..587e0200c1cd 100644
>>--- a/Documentation/networking/devlink/mlx5.rst
>>+++ b/Documentation/networking/devlink/mlx5.rst
>>@@ -15,23 +15,31 @@ Parameters
>>    * - Name
>>      - Mode
>>      - Validation
>>+     - Notes
>>    * - ``enable_roce``
>>      - driverinit
>>-     - Type: Boolean
>>-
>>-       If the device supports RoCE disablement, RoCE enablement state controls
>>+     - Boolean
>>+     - If the device supports RoCE disablement, RoCE enablement state controls
>>        device support for RoCE capability. Otherwise, the control occurs in the
>>        driver stack. When RoCE is disabled at the driver level, only raw
>>        ethernet QPs are supported.
>>    * - ``io_eq_size``
>>      - driverinit
>>      - The range is between 64 and 4096.
>>+     -
>>    * - ``event_eq_size``
>>      - driverinit
>>      - The range is between 64 and 4096.
>>+     -
>>    * - ``max_macs``
>>      - driverinit
>>      - The range is between 1 and 2^31. Only power of 2 values are supported.
>>+     -
>>+   * - ``enable_sriov``
>>+     - permanent
>>+     - Boolean
>>+     - Applies to each physical function (PF) independently, if the device
>>+       supports it. Otherwise, it applies symmetrically to all PFs.
>>
>> The ``mlx5`` driver also implements the following driver-specific
>> parameters.
>>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>>index 1f764ae4f4aa..7a702d84f19a 100644
>>--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>>+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>>@@ -8,6 +8,7 @@
>> #include "fs_core.h"
>> #include "eswitch.h"
>> #include "esw/qos.h"
>>+#include "lib/nv_param.h"
>> #include "sf/dev/dev.h"
>> #include "sf/sf.h"
>> #include "lib/nv_param.h"
>>diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>>index 5ab37a88c260..6b63fc110e2d 100644
>>--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>>+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>>@@ -5,7 +5,11 @@
>> #include "mlx5_core.h"
>>
>> enum {
>>+	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF               = 0x80,
>>+	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP                = 0x81,
>> 	MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG             = 0x10a,
>>+
>>+	MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF                   = 0x80,
>> };
>>
>> struct mlx5_ifc_configuration_item_type_class_global_bits {
>>@@ -13,9 +17,18 @@ struct mlx5_ifc_configuration_item_type_class_global_bits {
>> 	u8         parameter_index[0x18];
>> };
>>
>>+struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits {
>>+	u8         type_class[0x8];
>>+	u8         pf_index[0x6];
>>+	u8         pci_bus_index[0x8];
>>+	u8         parameter_index[0xa];
>>+};
>>+
>> union mlx5_ifc_config_item_type_auto_bits {
>> 	struct mlx5_ifc_configuration_item_type_class_global_bits
>> 				configuration_item_type_class_global;
>>+	struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits
>>+				configuration_item_type_class_per_host_pf;
>> 	u8 reserved_at_0[0x20];
>> };
>>
>>@@ -45,6 +58,45 @@ struct mlx5_ifc_mnvda_reg_bits {
>> 	u8         configuration_item_data[64][0x20];
>> };
>>
>>+struct mlx5_ifc_nv_global_pci_conf_bits {
>>+	u8         sriov_valid[0x1];
>>+	u8         reserved_at_1[0x10];
>>+	u8         per_pf_total_vf[0x1];
>>+	u8         reserved_at_12[0xe];
>>+
>>+	u8         sriov_en[0x1];
>>+	u8         reserved_at_21[0xf];
>>+	u8         total_vfs[0x10];
>>+
>>+	u8         reserved_at_40[0x20];
>>+};
>>+
>>+struct mlx5_ifc_nv_global_pci_cap_bits {
>>+	u8         max_vfs_per_pf_valid[0x1];
>>+	u8         reserved_at_1[0x13];
>>+	u8         per_pf_total_vf_supported[0x1];
>>+	u8         reserved_at_15[0xb];
>>+
>>+	u8         sriov_support[0x1];
>>+	u8         reserved_at_21[0xf];
>>+	u8         max_vfs_per_pf[0x10];
>>+
>>+	u8         reserved_at_40[0x60];
>>+};
>>+
>>+struct mlx5_ifc_nv_pf_pci_conf_bits {
>>+	u8         reserved_at_0[0x9];
>>+	u8         pf_total_vf_en[0x1];
>>+	u8         reserved_at_a[0x16];
>>+
>>+	u8         reserved_at_20[0x20];
>>+
>>+	u8         reserved_at_40[0x10];
>>+	u8         total_vf[0x10];
>>+
>>+	u8         reserved_at_60[0x20];
>>+};
>>+
>> struct mlx5_ifc_nv_sw_offload_conf_bits {
>> 	u8         ip_over_vxlan_port[0x10];
>> 	u8         tunnel_ecn_copy_offload_disable[0x1];
>>@@ -206,7 +258,139 @@ static int mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 i
>> 	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>> }
>>
>>+static int
>>+mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>>+{
>>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
>>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
>>+				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF);
>>+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_conf);
>>+
>>+	return mlx5_nv_param_read(dev, mnvda, len);
>>+}
>>+
>>+static int
>>+mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>>+{
>>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
>>+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
>>+				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP);
>>+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_cap);
>>+
>>+	return mlx5_nv_param_read(dev, mnvda, len);
>>+}
>>+
>>+static int
>>+mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>>+{
>>+	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3);
>>+	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, parameter_index,
>>+				  MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF);
>>+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_pf_pci_conf);
>>+
>>+	return mlx5_nv_param_read(dev, mnvda, len);
>>+}
>>+
>>+static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id,
>>+					 struct devlink_param_gset_ctx *ctx)
>>+{
>>+	struct mlx5_core_dev *dev = devlink_priv(devlink);
>>+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>>+	void *data;
>>+	int err;
>>+
>>+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
>>+	if (err)
>>+		return err;
>>+
>>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>>+	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
>>+		ctx->val.vbool = false;
>>+		return 0;
>>+	}
>>+
>>+	memset(mnvda, 0, sizeof(mnvda));
>>+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
>>+	if (err)
>>+		return err;
>>+
>>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>>+	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
>>+		ctx->val.vbool = MLX5_GET(nv_global_pci_conf, data, sriov_en);
>>+		return 0;
>>+	}
>>+
>>+	/* SRIOV is per PF */
>>+	memset(mnvda, 0, sizeof(mnvda));
>>+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
>>+	if (err)
>>+		return err;
>>+
>>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>>+	ctx->val.vbool = MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en);
>>+	return 0;
>>+}
>>+
>>+static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
>>+					 struct devlink_param_gset_ctx *ctx,
>>+					 struct netlink_ext_ack *extack)
>>+{
>>+	struct mlx5_core_dev *dev = devlink_priv(devlink);
>>+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>>+	bool per_pf_support;
>>+	void *cap, *data;
>>+	int err;
>>+
>>+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
>>+	if (err) {
>>+		NL_SET_ERR_MSG_MOD(extack, "Failed to read global PCI capability");
>>+		return err;
>>+	}
>>+
>>+	cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>>+	per_pf_support = MLX5_GET(nv_global_pci_cap, cap, per_pf_total_vf_supported);
>>+
>>+	if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) {
>>+		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
>>+		return -EOPNOTSUPP;
>>+	}
>>+
>>+	memset(mnvda, 0, sizeof(mnvda));
>>+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
>>+	if (err) {
>>+		NL_SET_ERR_MSG_MOD(extack, "Unable to read global PCI configuration");
>>+		return err;
>>+	}
>>+
>>+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>>+	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
>>+	MLX5_SET(nv_global_pci_conf, data, sriov_en, ctx->val.vbool);
>>+	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
>>+
>>+	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>>+	if (err) {
>>+		NL_SET_ERR_MSG_MOD(extack, "Unable to write global PCI configuration");
>>+		return err;
>>+	}
>>+
>>+	if (!per_pf_support)
>
>
>Hmm, given the discussion we have in parallel about some shared-PF
>devlink instance, perhaps it would be good idea to allow only per-pf
>configuration here for now and let the "global" per-device configuration
>knob to be attached on the shared-PF devlink, when/if it lands. What do
>you think?

Do we have an RFC? can you point me to it? 

I am just worried about the conflicts between per-pf and global configs
this will introduce, currently it is driver best effort, after that we
might want to pick one direction, global vs per-pf if it will be separate
knobs, and we probably will go with per-pf. Most CX devices support both
modes and it is up to the driver to chose. So why do both global and
per-pf when you can almost always do per-pf?

>
>
>>+		return 0;
>>+
>>+	/* SRIOV is per PF */
>>+	memset(mnvda, 0, sizeof(mnvda));
>>+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
>>+	if (err) {
>>+		NL_SET_ERR_MSG_MOD(extack, "Unable to read per host PF configuration");
>>+		return err;
>>+	}
>>+	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool);
>>+	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>>+}
>>+
>> static const struct devlink_param mlx5_nv_param_devlink_params[] = {
>>+	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>>+			      mlx5_devlink_enable_sriov_get,
>>+			      mlx5_devlink_enable_sriov_set, NULL),
>> 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
>> 			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
>> 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>>--
>>2.48.1
>>
>>
kernel test robot March 3, 2025, 2:27 a.m. UTC | #3
Hi Saeed,

kernel test robot noticed the following build warnings:

[auto build test WARNING on net-next/main]

url:    https://github.com/intel-lab-lkp/linux/commits/Saeed-Mahameed/devlink-define-enum-for-attr-types-of-dynamic-attributes/20250228-101818
base:   net-next/main
patch link:    https://lore.kernel.org/r/20250228021227.871993-5-saeed%40kernel.org
patch subject: [PATCH net-next 04/14] net/mlx5: Implement devlink enable_sriov parameter
compiler: clang version 19.1.7 (https://github.com/llvm/llvm-project cd708029e0b2869e80abe31ddb175f7c35361f90)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202503030926.KNBxmdVW-lkp@intel.com/

includecheck warnings: (new ones prefixed by >>)
>> drivers/net/ethernet/mellanox/mlx5/core/devlink.c: lib/nv_param.h is included more than once.

vim +11 drivers/net/ethernet/mellanox/mlx5/core/devlink.c

     5	
     6	#include "mlx5_core.h"
     7	#include "fw_reset.h"
     8	#include "fs_core.h"
     9	#include "eswitch.h"
    10	#include "esw/qos.h"
  > 11	#include "lib/nv_param.h"
    12	#include "sf/dev/dev.h"
    13	#include "sf/sf.h"
  > 14	#include "lib/nv_param.h"
    15
Jiri Pirko March 3, 2025, 11:35 a.m. UTC | #4
Fri, Feb 28, 2025 at 07:19:19PM +0100, saeedm@nvidia.com wrote:
>On 28 Feb 13:46, Jiri Pirko wrote:
>> Fri, Feb 28, 2025 at 03:12:17AM +0100, saeed@kernel.org wrote:
>> > From: Vlad Dumitrescu <vdumitrescu@nvidia.com>
>> > 
>> > Example usage:
>> >  devlink dev param set pci/0000:01:00.0 name enable_sriov value {true, false} cmode permanent
>> >  devlink dev reload pci/0000:01:00.0 action fw_activate
>> >  echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
>> >  echo 1 >/sys/bus/pci/rescan
>> >  grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
>> > 
>> > Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
>> > Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
>> > ---
>> > Documentation/networking/devlink/mlx5.rst     |  14 +-
>> > .../net/ethernet/mellanox/mlx5/core/devlink.c |   1 +
>> > .../mellanox/mlx5/core/lib/nv_param.c         | 184 ++++++++++++++++++
>> > 3 files changed, 196 insertions(+), 3 deletions(-)
>> > 
>> > diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
>> > index 417e5cdcd35d..587e0200c1cd 100644
>> > --- a/Documentation/networking/devlink/mlx5.rst
>> > +++ b/Documentation/networking/devlink/mlx5.rst
>> > @@ -15,23 +15,31 @@ Parameters
>> >    * - Name
>> >      - Mode
>> >      - Validation
>> > +     - Notes
>> >    * - ``enable_roce``
>> >      - driverinit
>> > -     - Type: Boolean
>> > -
>> > -       If the device supports RoCE disablement, RoCE enablement state controls
>> > +     - Boolean
>> > +     - If the device supports RoCE disablement, RoCE enablement state controls
>> >        device support for RoCE capability. Otherwise, the control occurs in the
>> >        driver stack. When RoCE is disabled at the driver level, only raw
>> >        ethernet QPs are supported.
>> >    * - ``io_eq_size``
>> >      - driverinit
>> >      - The range is between 64 and 4096.
>> > +     -
>> >    * - ``event_eq_size``
>> >      - driverinit
>> >      - The range is between 64 and 4096.
>> > +     -
>> >    * - ``max_macs``
>> >      - driverinit
>> >      - The range is between 1 and 2^31. Only power of 2 values are supported.
>> > +     -
>> > +   * - ``enable_sriov``
>> > +     - permanent
>> > +     - Boolean
>> > +     - Applies to each physical function (PF) independently, if the device
>> > +       supports it. Otherwise, it applies symmetrically to all PFs.
>> > 
>> > The ``mlx5`` driver also implements the following driver-specific
>> > parameters.
>> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> > index 1f764ae4f4aa..7a702d84f19a 100644
>> > --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
>> > @@ -8,6 +8,7 @@
>> > #include "fs_core.h"
>> > #include "eswitch.h"
>> > #include "esw/qos.h"
>> > +#include "lib/nv_param.h"
>> > #include "sf/dev/dev.h"
>> > #include "sf/sf.h"
>> > #include "lib/nv_param.h"
>> > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>> > index 5ab37a88c260..6b63fc110e2d 100644
>> > --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
>> > @@ -5,7 +5,11 @@
>> > #include "mlx5_core.h"
>> > 
>> > enum {
>> > +	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF               = 0x80,
>> > +	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP                = 0x81,
>> > 	MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG             = 0x10a,
>> > +
>> > +	MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF                   = 0x80,
>> > };
>> > 
>> > struct mlx5_ifc_configuration_item_type_class_global_bits {
>> > @@ -13,9 +17,18 @@ struct mlx5_ifc_configuration_item_type_class_global_bits {
>> > 	u8         parameter_index[0x18];
>> > };
>> > 
>> > +struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits {
>> > +	u8         type_class[0x8];
>> > +	u8         pf_index[0x6];
>> > +	u8         pci_bus_index[0x8];
>> > +	u8         parameter_index[0xa];
>> > +};
>> > +
>> > union mlx5_ifc_config_item_type_auto_bits {
>> > 	struct mlx5_ifc_configuration_item_type_class_global_bits
>> > 				configuration_item_type_class_global;
>> > +	struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits
>> > +				configuration_item_type_class_per_host_pf;
>> > 	u8 reserved_at_0[0x20];
>> > };
>> > 
>> > @@ -45,6 +58,45 @@ struct mlx5_ifc_mnvda_reg_bits {
>> > 	u8         configuration_item_data[64][0x20];
>> > };
>> > 
>> > +struct mlx5_ifc_nv_global_pci_conf_bits {
>> > +	u8         sriov_valid[0x1];
>> > +	u8         reserved_at_1[0x10];
>> > +	u8         per_pf_total_vf[0x1];
>> > +	u8         reserved_at_12[0xe];
>> > +
>> > +	u8         sriov_en[0x1];
>> > +	u8         reserved_at_21[0xf];
>> > +	u8         total_vfs[0x10];
>> > +
>> > +	u8         reserved_at_40[0x20];
>> > +};
>> > +
>> > +struct mlx5_ifc_nv_global_pci_cap_bits {
>> > +	u8         max_vfs_per_pf_valid[0x1];
>> > +	u8         reserved_at_1[0x13];
>> > +	u8         per_pf_total_vf_supported[0x1];
>> > +	u8         reserved_at_15[0xb];
>> > +
>> > +	u8         sriov_support[0x1];
>> > +	u8         reserved_at_21[0xf];
>> > +	u8         max_vfs_per_pf[0x10];
>> > +
>> > +	u8         reserved_at_40[0x60];
>> > +};
>> > +
>> > +struct mlx5_ifc_nv_pf_pci_conf_bits {
>> > +	u8         reserved_at_0[0x9];
>> > +	u8         pf_total_vf_en[0x1];
>> > +	u8         reserved_at_a[0x16];
>> > +
>> > +	u8         reserved_at_20[0x20];
>> > +
>> > +	u8         reserved_at_40[0x10];
>> > +	u8         total_vf[0x10];
>> > +
>> > +	u8         reserved_at_60[0x20];
>> > +};
>> > +
>> > struct mlx5_ifc_nv_sw_offload_conf_bits {
>> > 	u8         ip_over_vxlan_port[0x10];
>> > 	u8         tunnel_ecn_copy_offload_disable[0x1];
>> > @@ -206,7 +258,139 @@ static int mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 i
>> > 	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>> > }
>> > 
>> > +static int
>> > +mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>> > +{
>> > +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
>> > +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
>> > +				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF);
>> > +	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_conf);
>> > +
>> > +	return mlx5_nv_param_read(dev, mnvda, len);
>> > +}
>> > +
>> > +static int
>> > +mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>> > +{
>> > +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
>> > +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
>> > +				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP);
>> > +	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_cap);
>> > +
>> > +	return mlx5_nv_param_read(dev, mnvda, len);
>> > +}
>> > +
>> > +static int
>> > +mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
>> > +{
>> > +	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3);
>> > +	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, parameter_index,
>> > +				  MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF);
>> > +	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_pf_pci_conf);
>> > +
>> > +	return mlx5_nv_param_read(dev, mnvda, len);
>> > +}
>> > +
>> > +static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id,
>> > +					 struct devlink_param_gset_ctx *ctx)
>> > +{
>> > +	struct mlx5_core_dev *dev = devlink_priv(devlink);
>> > +	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>> > +	void *data;
>> > +	int err;
>> > +
>> > +	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
>> > +	if (err)
>> > +		return err;
>> > +
>> > +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>> > +	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
>> > +		ctx->val.vbool = false;
>> > +		return 0;
>> > +	}
>> > +
>> > +	memset(mnvda, 0, sizeof(mnvda));
>> > +	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
>> > +	if (err)
>> > +		return err;
>> > +
>> > +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>> > +	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
>> > +		ctx->val.vbool = MLX5_GET(nv_global_pci_conf, data, sriov_en);
>> > +		return 0;
>> > +	}
>> > +
>> > +	/* SRIOV is per PF */
>> > +	memset(mnvda, 0, sizeof(mnvda));
>> > +	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
>> > +	if (err)
>> > +		return err;
>> > +
>> > +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>> > +	ctx->val.vbool = MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en);
>> > +	return 0;
>> > +}
>> > +
>> > +static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
>> > +					 struct devlink_param_gset_ctx *ctx,
>> > +					 struct netlink_ext_ack *extack)
>> > +{
>> > +	struct mlx5_core_dev *dev = devlink_priv(devlink);
>> > +	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
>> > +	bool per_pf_support;
>> > +	void *cap, *data;
>> > +	int err;
>> > +
>> > +	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
>> > +	if (err) {
>> > +		NL_SET_ERR_MSG_MOD(extack, "Failed to read global PCI capability");
>> > +		return err;
>> > +	}
>> > +
>> > +	cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>> > +	per_pf_support = MLX5_GET(nv_global_pci_cap, cap, per_pf_total_vf_supported);
>> > +
>> > +	if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) {
>> > +		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
>> > +		return -EOPNOTSUPP;
>> > +	}
>> > +
>> > +	memset(mnvda, 0, sizeof(mnvda));
>> > +	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
>> > +	if (err) {
>> > +		NL_SET_ERR_MSG_MOD(extack, "Unable to read global PCI configuration");
>> > +		return err;
>> > +	}
>> > +
>> > +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
>> > +	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
>> > +	MLX5_SET(nv_global_pci_conf, data, sriov_en, ctx->val.vbool);
>> > +	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
>> > +
>> > +	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>> > +	if (err) {
>> > +		NL_SET_ERR_MSG_MOD(extack, "Unable to write global PCI configuration");
>> > +		return err;
>> > +	}
>> > +
>> > +	if (!per_pf_support)
>> 
>> 
>> Hmm, given the discussion we have in parallel about some shared-PF
>> devlink instance, perhaps it would be good idea to allow only per-pf
>> configuration here for now and let the "global" per-device configuration
>> knob to be attached on the shared-PF devlink, when/if it lands. What do
>> you think?
>
>Do we have an RFC? can you point me to it?

https://lore.kernel.org/all/20250219164410.35665-2-przemyslaw.kitszel@intel.com/


>
>I am just worried about the conflicts between per-pf and global configs
>this will introduce, currently it is driver best effort, after that we
>might want to pick one direction, global vs per-pf if it will be separate
>knobs, and we probably will go with per-pf. Most CX devices support both
>modes and it is up to the driver to chose. So why do both global and
>per-pf when you can almost always do per-pf?

I was thinking that is device supports per-pf, only per-pf knob will be
present. And if not, only "global" know will be present on the shared
devlink instance.

Okay. So let's implement per-pf only now. And if the global is needed in
the future for older devices and we'll have a devlink instance to hang
in on, let's add it later. Makes sense?

>
>> 
>> 
>> > +		return 0;
>> > +
>> > +	/* SRIOV is per PF */
>> > +	memset(mnvda, 0, sizeof(mnvda));
>> > +	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
>> > +	if (err) {
>> > +		NL_SET_ERR_MSG_MOD(extack, "Unable to read per host PF configuration");
>> > +		return err;
>> > +	}
>> > +	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool);
>> > +	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>> > +}
>> > +
>> > static const struct devlink_param mlx5_nv_param_devlink_params[] = {
>> > +	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>> > +			      mlx5_devlink_enable_sriov_get,
>> > +			      mlx5_devlink_enable_sriov_set, NULL),
>> > 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
>> > 			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
>> > 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
>> > --
>> > 2.48.1
>> > 
>> >
Kamal Heib March 4, 2025, 4:43 p.m. UTC | #5
On Thu, Feb 27, 2025 at 06:12:17PM -0800, Saeed Mahameed wrote:
> From: Vlad Dumitrescu <vdumitrescu@nvidia.com>
> 
> Example usage:
>   devlink dev param set pci/0000:01:00.0 name enable_sriov value {true, false} cmode permanent
>   devlink dev reload pci/0000:01:00.0 action fw_activate
>   echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove
>   echo 1 >/sys/bus/pci/rescan
>   grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_*
> 
> Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com>
> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>

Tested-by: Kamal Heib <kheib@redhat.com>

> ---
>  Documentation/networking/devlink/mlx5.rst     |  14 +-
>  .../net/ethernet/mellanox/mlx5/core/devlink.c |   1 +
>  .../mellanox/mlx5/core/lib/nv_param.c         | 184 ++++++++++++++++++
>  3 files changed, 196 insertions(+), 3 deletions(-)
> 
> diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
> index 417e5cdcd35d..587e0200c1cd 100644
> --- a/Documentation/networking/devlink/mlx5.rst
> +++ b/Documentation/networking/devlink/mlx5.rst
> @@ -15,23 +15,31 @@ Parameters
>     * - Name
>       - Mode
>       - Validation
> +     - Notes
>     * - ``enable_roce``
>       - driverinit
> -     - Type: Boolean
> -
> -       If the device supports RoCE disablement, RoCE enablement state controls
> +     - Boolean
> +     - If the device supports RoCE disablement, RoCE enablement state controls
>         device support for RoCE capability. Otherwise, the control occurs in the
>         driver stack. When RoCE is disabled at the driver level, only raw
>         ethernet QPs are supported.
>     * - ``io_eq_size``
>       - driverinit
>       - The range is between 64 and 4096.
> +     -
>     * - ``event_eq_size``
>       - driverinit
>       - The range is between 64 and 4096.
> +     -
>     * - ``max_macs``
>       - driverinit
>       - The range is between 1 and 2^31. Only power of 2 values are supported.
> +     -
> +   * - ``enable_sriov``
> +     - permanent
> +     - Boolean
> +     - Applies to each physical function (PF) independently, if the device
> +       supports it. Otherwise, it applies symmetrically to all PFs.
>  
>  The ``mlx5`` driver also implements the following driver-specific
>  parameters.
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> index 1f764ae4f4aa..7a702d84f19a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
> @@ -8,6 +8,7 @@
>  #include "fs_core.h"
>  #include "eswitch.h"
>  #include "esw/qos.h"
> +#include "lib/nv_param.h"
>  #include "sf/dev/dev.h"
>  #include "sf/sf.h"
>  #include "lib/nv_param.h"
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
> index 5ab37a88c260..6b63fc110e2d 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
> @@ -5,7 +5,11 @@
>  #include "mlx5_core.h"
>  
>  enum {
> +	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF               = 0x80,
> +	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP                = 0x81,
>  	MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG             = 0x10a,
> +
> +	MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF                   = 0x80,
>  };
>  
>  struct mlx5_ifc_configuration_item_type_class_global_bits {
> @@ -13,9 +17,18 @@ struct mlx5_ifc_configuration_item_type_class_global_bits {
>  	u8         parameter_index[0x18];
>  };
>  
> +struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits {
> +	u8         type_class[0x8];
> +	u8         pf_index[0x6];
> +	u8         pci_bus_index[0x8];
> +	u8         parameter_index[0xa];
> +};
> +
>  union mlx5_ifc_config_item_type_auto_bits {
>  	struct mlx5_ifc_configuration_item_type_class_global_bits
>  				configuration_item_type_class_global;
> +	struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits
> +				configuration_item_type_class_per_host_pf;
>  	u8 reserved_at_0[0x20];
>  };
>  
> @@ -45,6 +58,45 @@ struct mlx5_ifc_mnvda_reg_bits {
>  	u8         configuration_item_data[64][0x20];
>  };
>  
> +struct mlx5_ifc_nv_global_pci_conf_bits {
> +	u8         sriov_valid[0x1];
> +	u8         reserved_at_1[0x10];
> +	u8         per_pf_total_vf[0x1];
> +	u8         reserved_at_12[0xe];
> +
> +	u8         sriov_en[0x1];
> +	u8         reserved_at_21[0xf];
> +	u8         total_vfs[0x10];
> +
> +	u8         reserved_at_40[0x20];
> +};
> +
> +struct mlx5_ifc_nv_global_pci_cap_bits {
> +	u8         max_vfs_per_pf_valid[0x1];
> +	u8         reserved_at_1[0x13];
> +	u8         per_pf_total_vf_supported[0x1];
> +	u8         reserved_at_15[0xb];
> +
> +	u8         sriov_support[0x1];
> +	u8         reserved_at_21[0xf];
> +	u8         max_vfs_per_pf[0x10];
> +
> +	u8         reserved_at_40[0x60];
> +};
> +
> +struct mlx5_ifc_nv_pf_pci_conf_bits {
> +	u8         reserved_at_0[0x9];
> +	u8         pf_total_vf_en[0x1];
> +	u8         reserved_at_a[0x16];
> +
> +	u8         reserved_at_20[0x20];
> +
> +	u8         reserved_at_40[0x10];
> +	u8         total_vf[0x10];
> +
> +	u8         reserved_at_60[0x20];
> +};
> +
>  struct mlx5_ifc_nv_sw_offload_conf_bits {
>  	u8         ip_over_vxlan_port[0x10];
>  	u8         tunnel_ecn_copy_offload_disable[0x1];
> @@ -206,7 +258,139 @@ static int mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 i
>  	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
>  }
>  
> +static int
> +mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
> +{
> +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
> +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
> +				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF);
> +	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_conf);
> +
> +	return mlx5_nv_param_read(dev, mnvda, len);
> +}
> +
> +static int
> +mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, void *mnvda, size_t len)
> +{
> +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
> +	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
> +				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP);
> +	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_cap);
> +
> +	return mlx5_nv_param_read(dev, mnvda, len);
> +}
> +
> +static int
> +mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
> +{
> +	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3);
> +	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, parameter_index,
> +				  MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF);
> +	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_pf_pci_conf);
> +
> +	return mlx5_nv_param_read(dev, mnvda, len);
> +}
> +
> +static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id,
> +					 struct devlink_param_gset_ctx *ctx)
> +{
> +	struct mlx5_core_dev *dev = devlink_priv(devlink);
> +	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
> +	void *data;
> +	int err;
> +
> +	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
> +		ctx->val.vbool = false;
> +		return 0;
> +	}
> +
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
> +		ctx->val.vbool = MLX5_GET(nv_global_pci_conf, data, sriov_en);
> +		return 0;
> +	}
> +
> +	/* SRIOV is per PF */
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
> +	if (err)
> +		return err;
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	ctx->val.vbool = MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en);
> +	return 0;
> +}
> +
> +static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
> +					 struct devlink_param_gset_ctx *ctx,
> +					 struct netlink_ext_ack *extack)
> +{
> +	struct mlx5_core_dev *dev = devlink_priv(devlink);
> +	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
> +	bool per_pf_support;
> +	void *cap, *data;
> +	int err;
> +
> +	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed to read global PCI capability");
> +		return err;
> +	}
> +
> +	cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	per_pf_support = MLX5_GET(nv_global_pci_cap, cap, per_pf_total_vf_supported);
> +
> +	if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) {
> +		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Unable to read global PCI configuration");
> +		return err;
> +	}
> +
> +	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
> +	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
> +	MLX5_SET(nv_global_pci_conf, data, sriov_en, ctx->val.vbool);
> +	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
> +
> +	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Unable to write global PCI configuration");
> +		return err;
> +	}
> +
> +	if (!per_pf_support)
> +		return 0;
> +
> +	/* SRIOV is per PF */
> +	memset(mnvda, 0, sizeof(mnvda));
> +	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Unable to read per host PF configuration");
> +		return err;
> +	}
> +	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool);
> +	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
> +}
> +
>  static const struct devlink_param mlx5_nv_param_devlink_params[] = {
> +	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
> +			      mlx5_devlink_enable_sriov_get,
> +			      mlx5_devlink_enable_sriov_set, NULL),
>  	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
>  			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
>  			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),
> -- 
> 2.48.1
> 
>
diff mbox series

Patch

diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 417e5cdcd35d..587e0200c1cd 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -15,23 +15,31 @@  Parameters
    * - Name
      - Mode
      - Validation
+     - Notes
    * - ``enable_roce``
      - driverinit
-     - Type: Boolean
-
-       If the device supports RoCE disablement, RoCE enablement state controls
+     - Boolean
+     - If the device supports RoCE disablement, RoCE enablement state controls
        device support for RoCE capability. Otherwise, the control occurs in the
        driver stack. When RoCE is disabled at the driver level, only raw
        ethernet QPs are supported.
    * - ``io_eq_size``
      - driverinit
      - The range is between 64 and 4096.
+     -
    * - ``event_eq_size``
      - driverinit
      - The range is between 64 and 4096.
+     -
    * - ``max_macs``
      - driverinit
      - The range is between 1 and 2^31. Only power of 2 values are supported.
+     -
+   * - ``enable_sriov``
+     - permanent
+     - Boolean
+     - Applies to each physical function (PF) independently, if the device
+       supports it. Otherwise, it applies symmetrically to all PFs.
 
 The ``mlx5`` driver also implements the following driver-specific
 parameters.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index 1f764ae4f4aa..7a702d84f19a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -8,6 +8,7 @@ 
 #include "fs_core.h"
 #include "eswitch.h"
 #include "esw/qos.h"
+#include "lib/nv_param.h"
 #include "sf/dev/dev.h"
 #include "sf/sf.h"
 #include "lib/nv_param.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
index 5ab37a88c260..6b63fc110e2d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c
@@ -5,7 +5,11 @@ 
 #include "mlx5_core.h"
 
 enum {
+	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF               = 0x80,
+	MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP                = 0x81,
 	MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG             = 0x10a,
+
+	MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF                   = 0x80,
 };
 
 struct mlx5_ifc_configuration_item_type_class_global_bits {
@@ -13,9 +17,18 @@  struct mlx5_ifc_configuration_item_type_class_global_bits {
 	u8         parameter_index[0x18];
 };
 
+struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits {
+	u8         type_class[0x8];
+	u8         pf_index[0x6];
+	u8         pci_bus_index[0x8];
+	u8         parameter_index[0xa];
+};
+
 union mlx5_ifc_config_item_type_auto_bits {
 	struct mlx5_ifc_configuration_item_type_class_global_bits
 				configuration_item_type_class_global;
+	struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits
+				configuration_item_type_class_per_host_pf;
 	u8 reserved_at_0[0x20];
 };
 
@@ -45,6 +58,45 @@  struct mlx5_ifc_mnvda_reg_bits {
 	u8         configuration_item_data[64][0x20];
 };
 
+struct mlx5_ifc_nv_global_pci_conf_bits {
+	u8         sriov_valid[0x1];
+	u8         reserved_at_1[0x10];
+	u8         per_pf_total_vf[0x1];
+	u8         reserved_at_12[0xe];
+
+	u8         sriov_en[0x1];
+	u8         reserved_at_21[0xf];
+	u8         total_vfs[0x10];
+
+	u8         reserved_at_40[0x20];
+};
+
+struct mlx5_ifc_nv_global_pci_cap_bits {
+	u8         max_vfs_per_pf_valid[0x1];
+	u8         reserved_at_1[0x13];
+	u8         per_pf_total_vf_supported[0x1];
+	u8         reserved_at_15[0xb];
+
+	u8         sriov_support[0x1];
+	u8         reserved_at_21[0xf];
+	u8         max_vfs_per_pf[0x10];
+
+	u8         reserved_at_40[0x60];
+};
+
+struct mlx5_ifc_nv_pf_pci_conf_bits {
+	u8         reserved_at_0[0x9];
+	u8         pf_total_vf_en[0x1];
+	u8         reserved_at_a[0x16];
+
+	u8         reserved_at_20[0x20];
+
+	u8         reserved_at_40[0x10];
+	u8         total_vf[0x10];
+
+	u8         reserved_at_60[0x20];
+};
+
 struct mlx5_ifc_nv_sw_offload_conf_bits {
 	u8         ip_over_vxlan_port[0x10];
 	u8         tunnel_ecn_copy_offload_disable[0x1];
@@ -206,7 +258,139 @@  static int mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 i
 	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
 }
 
+static int
+mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
+{
+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
+				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF);
+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_conf);
+
+	return mlx5_nv_param_read(dev, mnvda, len);
+}
+
+static int
+mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, void *mnvda, size_t len)
+{
+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, type_class, 0);
+	MLX5_SET_CONFIG_ITEM_TYPE(global, mnvda, parameter_index,
+				  MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP);
+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_global_pci_cap);
+
+	return mlx5_nv_param_read(dev, mnvda, len);
+}
+
+static int
+mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, void *mnvda, size_t len)
+{
+	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3);
+	MLX5_SET_CONFIG_ITEM_TYPE(per_host_pf, mnvda, parameter_index,
+				  MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF);
+	MLX5_SET_CONFIG_HDR_LEN(mnvda, nv_pf_pci_conf);
+
+	return mlx5_nv_param_read(dev, mnvda, len);
+}
+
+static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id,
+					 struct devlink_param_gset_ctx *ctx)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
+	void *data;
+	int err;
+
+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) {
+		ctx->val.vbool = false;
+		return 0;
+	}
+
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) {
+		ctx->val.vbool = MLX5_GET(nv_global_pci_conf, data, sriov_en);
+		return 0;
+	}
+
+	/* SRIOV is per PF */
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
+	if (err)
+		return err;
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	ctx->val.vbool = MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en);
+	return 0;
+}
+
+static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id,
+					 struct devlink_param_gset_ctx *ctx,
+					 struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {};
+	bool per_pf_support;
+	void *cap, *data;
+	int err;
+
+	err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda));
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to read global PCI capability");
+		return err;
+	}
+
+	cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	per_pf_support = MLX5_GET(nv_global_pci_cap, cap, per_pf_total_vf_supported);
+
+	if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) {
+		NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device");
+		return -EOPNOTSUPP;
+	}
+
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda));
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to read global PCI configuration");
+		return err;
+	}
+
+	data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data);
+	MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1);
+	MLX5_SET(nv_global_pci_conf, data, sriov_en, ctx->val.vbool);
+	MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support);
+
+	err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to write global PCI configuration");
+		return err;
+	}
+
+	if (!per_pf_support)
+		return 0;
+
+	/* SRIOV is per PF */
+	memset(mnvda, 0, sizeof(mnvda));
+	err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda));
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to read per host PF configuration");
+		return err;
+	}
+	MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool);
+	return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda));
+}
+
 static const struct devlink_param mlx5_nv_param_devlink_params[] = {
+	DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT),
+			      mlx5_devlink_enable_sriov_get,
+			      mlx5_devlink_enable_sriov_set, NULL),
 	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE,
 			     "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING,
 			     BIT(DEVLINK_PARAM_CMODE_PERMANENT),