Message ID | 20250228021227.871993-6-saeed@kernel.org (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | devlink, mlx5: Add new parameters for link management and SRIOV/eSwitch configurations | expand |
On Thu, Feb 27, 2025 at 06:12:18PM -0800, Saeed Mahameed wrote: > From: Vlad Dumitrescu <vdumitrescu@nvidia.com> > > Some devices support both symmetric (same value for all PFs) and > asymmetric, while others only support symmetric configuration. This > implementation prefers asymmetric, since it is closer to the devlink > model (per function settings), but falls back to symmetric when needed. > > Example usage: > devlink dev param set pci/0000:01:00.0 name total_vfs value <u16> cmode permanent > devlink dev reload pci/0000:01:00.0 action fw_activate > echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove > echo 1 >/sys/bus/pci/rescan > cat /sys/bus/pci/devices/0000:01:00.0/sriov_totalvfs > > Signed-off-by: Vlad Dumitrescu <vdumitrescu@nvidia.com> > Signed-off-by: Saeed Mahameed <saeedm@nvidia.com> > Reviewed-by: Jiri Pirko <jiri@nvidia.com> Tested-by: Kamal Heib <kheib@redhat.com> > --- > Documentation/networking/devlink/mlx5.rst | 22 +++ > .../mellanox/mlx5/core/lib/nv_param.c | 125 ++++++++++++++++++ > 2 files changed, 147 insertions(+) > > diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst > index 587e0200c1cd..00a43324dec2 100644 > --- a/Documentation/networking/devlink/mlx5.rst > +++ b/Documentation/networking/devlink/mlx5.rst > @@ -40,6 +40,28 @@ Parameters > - Boolean > - Applies to each physical function (PF) independently, if the device > supports it. Otherwise, it applies symmetrically to all PFs. > + * - ``total_vfs`` > + - permanent > + - The range is between 1 and a device-specific max. > + - Applies to each physical function (PF) independently, if the device > + supports it. Otherwise, it applies symmetrically to all PFs. > + > +Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect > + > +.. code-block:: bash > + > + # setup parameters > + devlink dev param set pci/0000:01:00.0 name enable_sriov value true cmode permanent > + devlink dev param set pci/0000:01:00.0 name total_vfs value 8 cmode permanent > + > + # Fw reset > + devlink dev reload pci/0000:01:00.0 action fw_activate > + > + # for PCI related config such as sriov PCI reset/rescan is required: > + echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove > + echo 1 >/sys/bus/pci/rescan > + grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_* > + > > The ``mlx5`` driver also implements the following driver-specific > parameters. > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c > index 6b63fc110e2d..97d74d890582 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c > @@ -387,10 +387,135 @@ static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id, > return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); > } > > +static int mlx5_devlink_total_vfs_get(struct devlink *devlink, u32 id, > + struct devlink_param_gset_ctx *ctx) > +{ > + struct mlx5_core_dev *dev = devlink_priv(devlink); > + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; > + void *data; > + int err; > + > + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); > + > + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); > + if (err) > + return err; > + > + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { > + ctx->val.vu32 = 0; > + return 0; > + } > + > + memset(mnvda, 0, sizeof(mnvda)); > + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); > + if (err) > + return err; > + > + if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) { > + ctx->val.vu32 = MLX5_GET(nv_global_pci_conf, data, total_vfs); > + return 0; > + } > + > + /* SRIOV is per PF */ > + memset(mnvda, 0, sizeof(mnvda)); > + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); > + if (err) > + return err; > + > + ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_vf); > + > + return 0; > +} > + > +static int mlx5_devlink_total_vfs_set(struct devlink *devlink, u32 id, > + struct devlink_param_gset_ctx *ctx, > + struct netlink_ext_ack *extack) > +{ > + struct mlx5_core_dev *dev = devlink_priv(devlink); > + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)]; > + bool per_pf_support; > + void *data; > + int err; > + > + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); > + if (err) { > + NL_SET_ERR_MSG_MOD(extack, "Failed to read global pci cap"); > + return err; > + } > + > + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); > + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { > + NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device"); > + return -EOPNOTSUPP; > + } > + > + per_pf_support = MLX5_GET(nv_global_pci_cap, data, per_pf_total_vf_supported); > + memset(mnvda, 0, sizeof(mnvda)); > + > + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); > + if (err) > + return err; > + > + MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1); > + MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support); > + > + if (!per_pf_support) { > + MLX5_SET(nv_global_pci_conf, data, total_vfs, ctx->val.vu32); > + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); > + } > + > + /* SRIOV is per PF */ > + err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); > + if (err) > + return err; > + > + memset(mnvda, 0, sizeof(mnvda)); > + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); > + if (err) > + return err; > + > + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); > + MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, 1); > + MLX5_SET(nv_pf_pci_conf, data, total_vf, ctx->val.vu32); > + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); > +} > + > +static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id, > + union devlink_param_value val, > + struct netlink_ext_ack *extack) > +{ > + struct mlx5_core_dev *dev = devlink_priv(devlink); > + u32 cap[MLX5_ST_SZ_DW(mnvda_reg)]; > + void *data; > + u16 max; > + int err; > + > + data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data); > + > + err = mlx5_nv_param_read_global_pci_cap(dev, cap, sizeof(cap)); > + if (err) > + return err; > + > + if (!MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf_valid)) > + return 0; /* optimistic, but set might fail later */ > + > + max = MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf); > + if (val.vu16 > max) { > + NL_SET_ERR_MSG_FMT_MOD(extack, > + "Max allowed by device is %u", max); > + return -EINVAL; > + } > + > + return 0; > +} > + > static const struct devlink_param mlx5_nv_param_devlink_params[] = { > DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), > mlx5_devlink_enable_sriov_get, > mlx5_devlink_enable_sriov_set, NULL), > + DEVLINK_PARAM_GENERIC(TOTAL_VFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT), > + mlx5_devlink_total_vfs_get, mlx5_devlink_total_vfs_set, > + mlx5_devlink_total_vfs_validate), > DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, > "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING, > BIT(DEVLINK_PARAM_CMODE_PERMANENT), > -- > 2.48.1 > >
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst index 587e0200c1cd..00a43324dec2 100644 --- a/Documentation/networking/devlink/mlx5.rst +++ b/Documentation/networking/devlink/mlx5.rst @@ -40,6 +40,28 @@ Parameters - Boolean - Applies to each physical function (PF) independently, if the device supports it. Otherwise, it applies symmetrically to all PFs. + * - ``total_vfs`` + - permanent + - The range is between 1 and a device-specific max. + - Applies to each physical function (PF) independently, if the device + supports it. Otherwise, it applies symmetrically to all PFs. + +Note: permanent parameters such as ``enable_sriov`` and ``total_vfs`` require FW reset to take effect + +.. code-block:: bash + + # setup parameters + devlink dev param set pci/0000:01:00.0 name enable_sriov value true cmode permanent + devlink dev param set pci/0000:01:00.0 name total_vfs value 8 cmode permanent + + # Fw reset + devlink dev reload pci/0000:01:00.0 action fw_activate + + # for PCI related config such as sriov PCI reset/rescan is required: + echo 1 >/sys/bus/pci/devices/0000:01:00.0/remove + echo 1 >/sys/bus/pci/rescan + grep ^ /sys/bus/pci/devices/0000:01:00.0/sriov_* + The ``mlx5`` driver also implements the following driver-specific parameters. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c index 6b63fc110e2d..97d74d890582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -387,10 +387,135 @@ static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id, return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); } +static int mlx5_devlink_total_vfs_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + ctx->val.vu32 = 0; + return 0; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) { + ctx->val.vu32 = MLX5_GET(nv_global_pci_conf, data, total_vfs); + return 0; + } + + /* SRIOV is per PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_vf); + + return 0; +} + +static int mlx5_devlink_total_vfs_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)]; + bool per_pf_support; + void *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to read global pci cap"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device"); + return -EOPNOTSUPP; + } + + per_pf_support = MLX5_GET(nv_global_pci_cap, data, per_pf_total_vf_supported); + memset(mnvda, 0, sizeof(mnvda)); + + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1); + MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, per_pf_support); + + if (!per_pf_support) { + MLX5_SET(nv_global_pci_conf, data, total_vfs, ctx->val.vu32); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + } + + /* SRIOV is per PF */ + err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, 1); + MLX5_SET(nv_pf_pci_conf, data, total_vf, ctx->val.vu32); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 cap[MLX5_ST_SZ_DW(mnvda_reg)]; + void *data; + u16 max; + int err; + + data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data); + + err = mlx5_nv_param_read_global_pci_cap(dev, cap, sizeof(cap)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf_valid)) + return 0; /* optimistic, but set might fail later */ + + max = MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf); + if (val.vu16 > max) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Max allowed by device is %u", max); + return -EINVAL; + } + + return 0; +} + static const struct devlink_param mlx5_nv_param_devlink_params[] = { DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), mlx5_devlink_enable_sriov_get, mlx5_devlink_enable_sriov_set, NULL), + DEVLINK_PARAM_GENERIC(TOTAL_VFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_devlink_total_vfs_get, mlx5_devlink_total_vfs_set, + mlx5_devlink_total_vfs_validate), DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING, BIT(DEVLINK_PARAM_CMODE_PERMANENT),