diff mbox series

[net-next,v3,2/2] mlx5/core: Support max_io_eqs for a function

Message ID 20240403174133.37587-3-parav@nvidia.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series devlink: Add port function attribute for IO EQs | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 943 this patch: 943
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 954 this patch: 954
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 954 this patch: 954
netdev/checkpatch warning WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 1 this patch: 1
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-04-05--00-00 (tests: 949)

Commit Message

Parav Pandit April 3, 2024, 5:41 p.m. UTC
Implement get and set for the maximum IO event queues for SF and VF.
This enables administrator on the hypervisor to control the maximum
IO event queues which are typically used to derive the maximum and
default number of net device channels or rdma device completion vectors.

Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Parav Pandit <parav@nvidia.com>
---
changelog:
v2->v3:
- limited to 80 chars per line in devlink
- fixed comments from Jakub in mlx5 driver to fix missing mutex unlock
  on error path
v1->v2:
- fixed comments from Kalesh
- fixed missing kfree in get call
- returning error code for get cmd failure
- fixed error msg copy paste error in set on cmd failure
- limited code to 80 chars limit
- fixed set function variables for reverse christmas tree
---
 .../mellanox/mlx5/core/esw/devlink_port.c     |  4 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  7 ++
 .../mellanox/mlx5/core/eswitch_offloads.c     | 97 +++++++++++++++++++
 3 files changed, 108 insertions(+)

Comments

Kalesh Anakkur Purayil April 4, 2024, 3:51 a.m. UTC | #1
On Wed, Apr 3, 2024 at 11:12 PM Parav Pandit <parav@nvidia.com> wrote:
>
> Implement get and set for the maximum IO event queues for SF and VF.
> This enables administrator on the hypervisor to control the maximum
> IO event queues which are typically used to derive the maximum and
> default number of net device channels or rdma device completion vectors.
>
> Reviewed-by: Shay Drory <shayd@nvidia.com>
> Signed-off-by: Parav Pandit <parav@nvidia.com>

Thanks Parav for addressing the comments. Changes look good to me.

Reviewed-by: Kalesh AP <kalesh-anakkur.purayil@broadcom.com>
> ---
> changelog:
> v2->v3:
> - limited to 80 chars per line in devlink
> - fixed comments from Jakub in mlx5 driver to fix missing mutex unlock
>   on error path
> v1->v2:
> - fixed comments from Kalesh
> - fixed missing kfree in get call
> - returning error code for get cmd failure
> - fixed error msg copy paste error in set on cmd failure
> - limited code to 80 chars limit
> - fixed set function variables for reverse christmas tree
> ---
>  .../mellanox/mlx5/core/esw/devlink_port.c     |  4 +
>  .../net/ethernet/mellanox/mlx5/core/eswitch.h |  7 ++
>  .../mellanox/mlx5/core/eswitch_offloads.c     | 97 +++++++++++++++++++
>  3 files changed, 108 insertions(+)
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> index d8e739cbcbce..f8869c9b6802 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> @@ -98,6 +98,8 @@ static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = {
>         .port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get,
>         .port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set,
>  #endif /* CONFIG_XFRM_OFFLOAD */
> +       .port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
> +       .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
>  };
>
>  static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw,
> @@ -143,6 +145,8 @@ static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = {
>         .port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
>         .port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
>  #endif
> +       .port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
> +       .port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
>  };
>
>  int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> index 349e28a6dd8d..50ce1ea20dd4 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> @@ -573,6 +573,13 @@ int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_en
>  int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable,
>                                           struct netlink_ext_ack *extack);
>  #endif /* CONFIG_XFRM_OFFLOAD */
> +int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port,
> +                                       u32 *max_io_eqs,
> +                                       struct netlink_ext_ack *extack);
> +int mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port,
> +                                       u32 max_io_eqs,
> +                                       struct netlink_ext_ack *extack);
> +
>  void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
>
>  int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index baaae628b0a0..2ad50634b401 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -66,6 +66,8 @@
>
>  #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
>
> +#define MLX5_ESW_MAX_CTRL_EQS 4
> +
>  static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
>         .max_fte = MLX5_ESW_VPORT_TBL_SIZE,
>         .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
> @@ -4557,3 +4559,98 @@ int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
>         return err;
>  }
>  #endif /* CONFIG_XFRM_OFFLOAD */
> +
> +int
> +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs,
> +                                   struct netlink_ext_ack *extack)
> +{
> +       struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
> +       int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
> +       u16 vport_num = vport->vport;
> +       struct mlx5_eswitch *esw;
> +       void *query_ctx;
> +       void *hca_caps;
> +       u32 max_eqs;
> +       int err;
> +
> +       esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
> +       if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
> +               NL_SET_ERR_MSG_MOD(extack,
> +                                  "Device doesn't support VHCA management");
> +               return -EOPNOTSUPP;
> +       }
> +
> +       query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
> +       if (!query_ctx)
> +               return -ENOMEM;
> +
> +       mutex_lock(&esw->state_lock);
> +       err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
> +                                           MLX5_CAP_GENERAL);
> +       if (err) {
> +               NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
> +               goto out;
> +       }
> +
> +       hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
> +       max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
> +       if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
> +               *max_io_eqs = 0;
> +       else
> +               *max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS;
> +out:
> +       mutex_unlock(&esw->state_lock);
> +       kfree(query_ctx);
> +       return err;
> +}
> +
> +int
> +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
> +                                   struct netlink_ext_ack *extack)
> +{
> +       struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
> +       int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
> +       u16 max_eqs = max_io_eqs + MLX5_ESW_MAX_CTRL_EQS;
> +       u16 vport_num = vport->vport;
> +       struct mlx5_eswitch *esw;
> +       void *query_ctx;
> +       void *hca_caps;
> +       int err;
> +
> +       esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
> +       if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
> +               NL_SET_ERR_MSG_MOD(extack,
> +                                  "Device doesn't support VHCA management");
> +               return -EOPNOTSUPP;
> +       }
> +
> +       if (max_io_eqs + MLX5_ESW_MAX_CTRL_EQS > USHRT_MAX) {
> +               NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range");
> +               return -EINVAL;
> +       }
> +
> +       query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
> +       if (!query_ctx)
> +               return -ENOMEM;
> +
> +       mutex_lock(&esw->state_lock);
> +       err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
> +                                           MLX5_CAP_GENERAL);
> +       if (err) {
> +               NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
> +               goto out;
> +       }
> +
> +       hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
> +       MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
> +
> +       err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
> +                                           MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> +       if (err)
> +               NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
> +
> +out:
> +       mutex_unlock(&esw->state_lock);
> +       kfree(query_ctx);
> +       return err;
> +}
> --
> 2.26.2
>
David Wei April 4, 2024, 4:57 p.m. UTC | #2
On 2024-04-03 10:41, Parav Pandit wrote:
> Implement get and set for the maximum IO event queues for SF and VF.
> This enables administrator on the hypervisor to control the maximum
> IO event queues which are typically used to derive the maximum and
> default number of net device channels or rdma device completion vectors.
> 
> Reviewed-by: Shay Drory <shayd@nvidia.com>
> Signed-off-by: Parav Pandit <parav@nvidia.com>
> ---
> changelog:
> v2->v3:
> - limited to 80 chars per line in devlink
> - fixed comments from Jakub in mlx5 driver to fix missing mutex unlock
>   on error path
> v1->v2:
> - fixed comments from Kalesh
> - fixed missing kfree in get call
> - returning error code for get cmd failure
> - fixed error msg copy paste error in set on cmd failure
> - limited code to 80 chars limit
> - fixed set function variables for reverse christmas tree
> ---
>  .../mellanox/mlx5/core/esw/devlink_port.c     |  4 +
>  .../net/ethernet/mellanox/mlx5/core/eswitch.h |  7 ++
>  .../mellanox/mlx5/core/eswitch_offloads.c     | 97 +++++++++++++++++++
>  3 files changed, 108 insertions(+)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> index d8e739cbcbce..f8869c9b6802 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> @@ -98,6 +98,8 @@ static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = {
>  	.port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get,
>  	.port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set,
>  #endif /* CONFIG_XFRM_OFFLOAD */
> +	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
> +	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
>  };
>  
>  static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw,
> @@ -143,6 +145,8 @@ static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = {
>  	.port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
>  	.port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
>  #endif
> +	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
> +	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
>  };
>  
>  int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> index 349e28a6dd8d..50ce1ea20dd4 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> @@ -573,6 +573,13 @@ int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_en
>  int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable,
>  					  struct netlink_ext_ack *extack);
>  #endif /* CONFIG_XFRM_OFFLOAD */
> +int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port,
> +					u32 *max_io_eqs,
> +					struct netlink_ext_ack *extack);
> +int mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port,
> +					u32 max_io_eqs,
> +					struct netlink_ext_ack *extack);
> +
>  void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
>  
>  int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index baaae628b0a0..2ad50634b401 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -66,6 +66,8 @@
>  
>  #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
>  
> +#define MLX5_ESW_MAX_CTRL_EQS 4
> +
>  static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
>  	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
>  	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
> @@ -4557,3 +4559,98 @@ int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
>  	return err;
>  }
>  #endif /* CONFIG_XFRM_OFFLOAD */
> +
> +int
> +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs,
> +				    struct netlink_ext_ack *extack)
> +{
> +	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
> +	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
> +	u16 vport_num = vport->vport;
> +	struct mlx5_eswitch *esw;
> +	void *query_ctx;
> +	void *hca_caps;
> +	u32 max_eqs;
> +	int err;
> +
> +	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
> +	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
> +		NL_SET_ERR_MSG_MOD(extack,
> +				   "Device doesn't support VHCA management");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
> +	if (!query_ctx)
> +		return -ENOMEM;
> +
> +	mutex_lock(&esw->state_lock);
> +	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
> +					    MLX5_CAP_GENERAL);
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
> +		goto out;
> +	}
> +
> +	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
> +	max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
> +	if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
> +		*max_io_eqs = 0;
> +	else
> +		*max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS;
> +out:
> +	mutex_unlock(&esw->state_lock);
> +	kfree(query_ctx);
> +	return err;
> +}
> +
> +int
> +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
> +				    struct netlink_ext_ack *extack)
> +{
> +	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
> +	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
> +	u16 max_eqs = max_io_eqs + MLX5_ESW_MAX_CTRL_EQS;
> +	u16 vport_num = vport->vport;
> +	struct mlx5_eswitch *esw;
> +	void *query_ctx;
> +	void *hca_caps;
> +	int err;
> +
> +	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
> +	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
> +		NL_SET_ERR_MSG_MOD(extack,
> +				   "Device doesn't support VHCA management");
> +		return -EOPNOTSUPP;
> +	}
> +
> +	if (max_io_eqs + MLX5_ESW_MAX_CTRL_EQS > USHRT_MAX) {

nit: this is the same as max_eqs

> +		NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range");
> +		return -EINVAL;
> +	}
> +
> +	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
> +	if (!query_ctx)
> +		return -ENOMEM;
> +
> +	mutex_lock(&esw->state_lock);
> +	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
> +					    MLX5_CAP_GENERAL);
> +	if (err) {
> +		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
> +		goto out;
> +	}
> +
> +	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
> +	MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
> +
> +	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
> +					    MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> +	if (err)
> +		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
> +
> +out:
> +	mutex_unlock(&esw->state_lock);
> +	kfree(query_ctx);
> +	return err;
> +}
Parav Pandit April 4, 2024, 6:05 p.m. UTC | #3
> From: David Wei <dw@davidwei.uk>
> Sent: Thursday, April 4, 2024 10:27 PM
> 
> On 2024-04-03 10:41, Parav Pandit wrote:
> > Implement get and set for the maximum IO event queues for SF and VF.
> > This enables administrator on the hypervisor to control the maximum IO
> > event queues which are typically used to derive the maximum and
> > default number of net device channels or rdma device completion vectors.
> >
> > Reviewed-by: Shay Drory <shayd@nvidia.com>
> > Signed-off-by: Parav Pandit <parav@nvidia.com>
> > ---
> > changelog:
> > v2->v3:
> > - limited to 80 chars per line in devlink
> > - fixed comments from Jakub in mlx5 driver to fix missing mutex unlock
> >   on error path
> > v1->v2:
> > - fixed comments from Kalesh
> > - fixed missing kfree in get call
> > - returning error code for get cmd failure
> > - fixed error msg copy paste error in set on cmd failure
> > - limited code to 80 chars limit
> > - fixed set function variables for reverse christmas tree
> > ---
> >  .../mellanox/mlx5/core/esw/devlink_port.c     |  4 +
> >  .../net/ethernet/mellanox/mlx5/core/eswitch.h |  7 ++
> >  .../mellanox/mlx5/core/eswitch_offloads.c     | 97 +++++++++++++++++++
> >  3 files changed, 108 insertions(+)
> >
> > diff --git
> > a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> > b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> > index d8e739cbcbce..f8869c9b6802 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
> > @@ -98,6 +98,8 @@ static const struct devlink_port_ops
> mlx5_esw_pf_vf_dl_port_ops = {
> >  	.port_fn_ipsec_packet_get =
> mlx5_devlink_port_fn_ipsec_packet_get,
> >  	.port_fn_ipsec_packet_set =
> mlx5_devlink_port_fn_ipsec_packet_set,
> >  #endif /* CONFIG_XFRM_OFFLOAD */
> > +	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
> > +	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
> >  };
> >
> >  static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct
> > mlx5_eswitch *esw, @@ -143,6 +145,8 @@ static const struct
> devlink_port_ops mlx5_esw_dl_sf_port_ops = {
> >  	.port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
> >  	.port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
> >  #endif
> > +	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
> > +	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
> >  };
> >
> >  int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw,
> > struct mlx5_vport *vport) diff --git
> > a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> > b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> > index 349e28a6dd8d..50ce1ea20dd4 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> > @@ -573,6 +573,13 @@ int mlx5_devlink_port_fn_ipsec_packet_get(struct
> > devlink_port *port, bool *is_en  int
> mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool
> enable,
> >  					  struct netlink_ext_ack *extack);
> #endif /*
> > CONFIG_XFRM_OFFLOAD */
> > +int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port,
> > +					u32 *max_io_eqs,
> > +					struct netlink_ext_ack *extack); int
> > +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port,
> > +					u32 max_io_eqs,
> > +					struct netlink_ext_ack *extack);
> > +
> >  void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8
> > rep_type);
> >
> >  int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, diff
> > --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> > b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> > index baaae628b0a0..2ad50634b401 100644
> > --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> > @@ -66,6 +66,8 @@
> >
> >  #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
> >
> > +#define MLX5_ESW_MAX_CTRL_EQS 4
> > +
> >  static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
> >  	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
> >  	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, @@ -
> 4557,3 +4559,98
> > @@ int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
> >  	return err;
> >  }
> >  #endif /* CONFIG_XFRM_OFFLOAD */
> > +
> > +int
> > +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32
> *max_io_eqs,
> > +				    struct netlink_ext_ack *extack) {
> > +	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
> > +	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
> > +	u16 vport_num = vport->vport;
> > +	struct mlx5_eswitch *esw;
> > +	void *query_ctx;
> > +	void *hca_caps;
> > +	u32 max_eqs;
> > +	int err;
> > +
> > +	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
> > +	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
> > +		NL_SET_ERR_MSG_MOD(extack,
> > +				   "Device doesn't support VHCA
> management");
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
> > +	if (!query_ctx)
> > +		return -ENOMEM;
> > +
> > +	mutex_lock(&esw->state_lock);
> > +	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num,
> query_ctx,
> > +					    MLX5_CAP_GENERAL);
> > +	if (err) {
> > +		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
> > +		goto out;
> > +	}
> > +
> > +	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx,
> capability);
> > +	max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
> > +	if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
> > +		*max_io_eqs = 0;
> > +	else
> > +		*max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS;
> > +out:
> > +	mutex_unlock(&esw->state_lock);
> > +	kfree(query_ctx);
> > +	return err;
> > +}
> > +
> > +int
> > +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32
> max_io_eqs,
> > +				    struct netlink_ext_ack *extack) {
> > +	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
> > +	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
> > +	u16 max_eqs = max_io_eqs + MLX5_ESW_MAX_CTRL_EQS;
> > +	u16 vport_num = vport->vport;
> > +	struct mlx5_eswitch *esw;
> > +	void *query_ctx;
> > +	void *hca_caps;
> > +	int err;
> > +
> > +	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
> > +	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
> > +		NL_SET_ERR_MSG_MOD(extack,
> > +				   "Device doesn't support VHCA
> management");
> > +		return -EOPNOTSUPP;
> > +	}
> > +
> > +	if (max_io_eqs + MLX5_ESW_MAX_CTRL_EQS > USHRT_MAX) {
> 
> nit: this is the same as max_eqs
>
Yes.
max_eqs may overflow and we may miss the overflow check by reusing max_eqs.
Above if condition avoids that bug.
That reminds me that I can replace above max_eqs line and this with check_add_overflow() and store the result in max_io_eqs.
And save one line. :)

Shall I resend with this change?

> > +		NL_SET_ERR_MSG_MOD(extack, "Supplied value out of
> range");
> > +		return -EINVAL;
> > +	}
> > +
> > +	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
> > +	if (!query_ctx)
> > +		return -ENOMEM;
> > +
> > +	mutex_lock(&esw->state_lock);
> > +	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num,
> query_ctx,
> > +					    MLX5_CAP_GENERAL);
> > +	if (err) {
> > +		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
> > +		goto out;
> > +	}
> > +
> > +	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx,
> capability);
> > +	MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
> > +
> > +	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps,
> vport_num,
> > +
> MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
> > +	if (err)
> > +		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
> > +
> > +out:
> > +	mutex_unlock(&esw->state_lock);
> > +	kfree(query_ctx);
> > +	return err;
> > +}
David Wei April 4, 2024, 6:44 p.m. UTC | #4
On 2024-04-04 11:05, Parav Pandit wrote:
> 
> 
>> From: David Wei <dw@davidwei.uk>
>> Sent: Thursday, April 4, 2024 10:27 PM
>>
>> On 2024-04-03 10:41, Parav Pandit wrote:
>>> Implement get and set for the maximum IO event queues for SF and VF.
>>> This enables administrator on the hypervisor to control the maximum IO
>>> event queues which are typically used to derive the maximum and
>>> default number of net device channels or rdma device completion vectors.
>>>
>>> Reviewed-by: Shay Drory <shayd@nvidia.com>
>>> Signed-off-by: Parav Pandit <parav@nvidia.com>
>>> ---
>>> changelog:
>>> v2->v3:
>>> - limited to 80 chars per line in devlink
>>> - fixed comments from Jakub in mlx5 driver to fix missing mutex unlock
>>>   on error path
>>> v1->v2:
>>> - fixed comments from Kalesh
>>> - fixed missing kfree in get call
>>> - returning error code for get cmd failure
>>> - fixed error msg copy paste error in set on cmd failure
>>> - limited code to 80 chars limit
>>> - fixed set function variables for reverse christmas tree
>>> ---
>>>  .../mellanox/mlx5/core/esw/devlink_port.c     |  4 +
>>>  .../net/ethernet/mellanox/mlx5/core/eswitch.h |  7 ++
>>>  .../mellanox/mlx5/core/eswitch_offloads.c     | 97 +++++++++++++++++++
>>>  3 files changed, 108 insertions(+)
>>>
>>> diff --git
>>> a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
>>> b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
>>> index d8e739cbcbce..f8869c9b6802 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
>>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
>>> @@ -98,6 +98,8 @@ static const struct devlink_port_ops
>> mlx5_esw_pf_vf_dl_port_ops = {
>>>  	.port_fn_ipsec_packet_get =
>> mlx5_devlink_port_fn_ipsec_packet_get,
>>>  	.port_fn_ipsec_packet_set =
>> mlx5_devlink_port_fn_ipsec_packet_set,
>>>  #endif /* CONFIG_XFRM_OFFLOAD */
>>> +	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
>>> +	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
>>>  };
>>>
>>>  static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct
>>> mlx5_eswitch *esw, @@ -143,6 +145,8 @@ static const struct
>> devlink_port_ops mlx5_esw_dl_sf_port_ops = {
>>>  	.port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
>>>  	.port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
>>>  #endif
>>> +	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
>>> +	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
>>>  };
>>>
>>>  int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw,
>>> struct mlx5_vport *vport) diff --git
>>> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>>> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>>> index 349e28a6dd8d..50ce1ea20dd4 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
>>> @@ -573,6 +573,13 @@ int mlx5_devlink_port_fn_ipsec_packet_get(struct
>>> devlink_port *port, bool *is_en  int
>> mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool
>> enable,
>>>  					  struct netlink_ext_ack *extack);
>> #endif /*
>>> CONFIG_XFRM_OFFLOAD */
>>> +int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port,
>>> +					u32 *max_io_eqs,
>>> +					struct netlink_ext_ack *extack); int
>>> +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port,
>>> +					u32 max_io_eqs,
>>> +					struct netlink_ext_ack *extack);
>>> +
>>>  void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8
>>> rep_type);
>>>
>>>  int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, diff
>>> --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>>> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>>> index baaae628b0a0..2ad50634b401 100644
>>> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>>> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
>>> @@ -66,6 +66,8 @@
>>>
>>>  #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
>>>
>>> +#define MLX5_ESW_MAX_CTRL_EQS 4
>>> +
>>>  static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
>>>  	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
>>>  	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, @@ -
>> 4557,3 +4559,98
>>> @@ int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
>>>  	return err;
>>>  }
>>>  #endif /* CONFIG_XFRM_OFFLOAD */
>>> +
>>> +int
>>> +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32
>> *max_io_eqs,
>>> +				    struct netlink_ext_ack *extack) {
>>> +	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
>>> +	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
>>> +	u16 vport_num = vport->vport;
>>> +	struct mlx5_eswitch *esw;
>>> +	void *query_ctx;
>>> +	void *hca_caps;
>>> +	u32 max_eqs;
>>> +	int err;
>>> +
>>> +	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
>>> +	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
>>> +		NL_SET_ERR_MSG_MOD(extack,
>>> +				   "Device doesn't support VHCA
>> management");
>>> +		return -EOPNOTSUPP;
>>> +	}
>>> +
>>> +	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
>>> +	if (!query_ctx)
>>> +		return -ENOMEM;
>>> +
>>> +	mutex_lock(&esw->state_lock);
>>> +	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num,
>> query_ctx,
>>> +					    MLX5_CAP_GENERAL);
>>> +	if (err) {
>>> +		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
>>> +		goto out;
>>> +	}
>>> +
>>> +	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx,
>> capability);
>>> +	max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
>>> +	if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
>>> +		*max_io_eqs = 0;
>>> +	else
>>> +		*max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS;
>>> +out:
>>> +	mutex_unlock(&esw->state_lock);
>>> +	kfree(query_ctx);
>>> +	return err;
>>> +}
>>> +
>>> +int
>>> +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32
>> max_io_eqs,
>>> +				    struct netlink_ext_ack *extack) {
>>> +	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
>>> +	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
>>> +	u16 max_eqs = max_io_eqs + MLX5_ESW_MAX_CTRL_EQS;
>>> +	u16 vport_num = vport->vport;
>>> +	struct mlx5_eswitch *esw;
>>> +	void *query_ctx;
>>> +	void *hca_caps;
>>> +	int err;
>>> +
>>> +	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
>>> +	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
>>> +		NL_SET_ERR_MSG_MOD(extack,
>>> +				   "Device doesn't support VHCA
>> management");
>>> +		return -EOPNOTSUPP;
>>> +	}
>>> +
>>> +	if (max_io_eqs + MLX5_ESW_MAX_CTRL_EQS > USHRT_MAX) {
>>
>> nit: this is the same as max_eqs
>>
> Yes.
> max_eqs may overflow and we may miss the overflow check by reusing max_eqs.
> Above if condition avoids that bug.

Thanks, I missed that max_io_eqs is u32.

> That reminds me that I can replace above max_eqs line and this with check_add_overflow() and store the result in max_io_eqs.
> And save one line. :)
> 
> Shall I resend with this change?
Yes that sounds good, thank you.

> 
>>> +		NL_SET_ERR_MSG_MOD(extack, "Supplied value out of
>> range");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
>>> +	if (!query_ctx)
>>> +		return -ENOMEM;
>>> +
>>> +	mutex_lock(&esw->state_lock);
>>> +	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num,
>> query_ctx,
>>> +					    MLX5_CAP_GENERAL);
>>> +	if (err) {
>>> +		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
>>> +		goto out;
>>> +	}
>>> +
>>> +	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx,
>> capability);
>>> +	MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
>>> +
>>> +	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps,
>> vport_num,
>>> +
>> MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
>>> +	if (err)
>>> +		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
>>> +
>>> +out:
>>> +	mutex_unlock(&esw->state_lock);
>>> +	kfree(query_ctx);
>>> +	return err;
>>> +}
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index d8e739cbcbce..f8869c9b6802 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -98,6 +98,8 @@  static const struct devlink_port_ops mlx5_esw_pf_vf_dl_port_ops = {
 	.port_fn_ipsec_packet_get = mlx5_devlink_port_fn_ipsec_packet_get,
 	.port_fn_ipsec_packet_set = mlx5_devlink_port_fn_ipsec_packet_set,
 #endif /* CONFIG_XFRM_OFFLOAD */
+	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
+	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
 };
 
 static void mlx5_esw_offloads_sf_devlink_port_attrs_set(struct mlx5_eswitch *esw,
@@ -143,6 +145,8 @@  static const struct devlink_port_ops mlx5_esw_dl_sf_port_ops = {
 	.port_fn_state_get = mlx5_devlink_sf_port_fn_state_get,
 	.port_fn_state_set = mlx5_devlink_sf_port_fn_state_set,
 #endif
+	.port_fn_max_io_eqs_get = mlx5_devlink_port_fn_max_io_eqs_get,
+	.port_fn_max_io_eqs_set = mlx5_devlink_port_fn_max_io_eqs_set,
 };
 
 int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 349e28a6dd8d..50ce1ea20dd4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -573,6 +573,13 @@  int mlx5_devlink_port_fn_ipsec_packet_get(struct devlink_port *port, bool *is_en
 int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port, bool enable,
 					  struct netlink_ext_ack *extack);
 #endif /* CONFIG_XFRM_OFFLOAD */
+int mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port,
+					u32 *max_io_eqs,
+					struct netlink_ext_ack *extack);
+int mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port,
+					u32 max_io_eqs,
+					struct netlink_ext_ack *extack);
+
 void *mlx5_eswitch_get_uplink_priv(struct mlx5_eswitch *esw, u8 rep_type);
 
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index baaae628b0a0..2ad50634b401 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -66,6 +66,8 @@ 
 
 #define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
 
+#define MLX5_ESW_MAX_CTRL_EQS 4
+
 static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
 	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
 	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
@@ -4557,3 +4559,98 @@  int mlx5_devlink_port_fn_ipsec_packet_set(struct devlink_port *port,
 	return err;
 }
 #endif /* CONFIG_XFRM_OFFLOAD */
+
+int
+mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs,
+				    struct netlink_ext_ack *extack)
+{
+	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
+	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	u16 vport_num = vport->vport;
+	struct mlx5_eswitch *esw;
+	void *query_ctx;
+	void *hca_caps;
+	u32 max_eqs;
+	int err;
+
+	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
+	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Device doesn't support VHCA management");
+		return -EOPNOTSUPP;
+	}
+
+	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+	if (!query_ctx)
+		return -ENOMEM;
+
+	mutex_lock(&esw->state_lock);
+	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
+					    MLX5_CAP_GENERAL);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+		goto out;
+	}
+
+	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+	max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs);
+	if (max_eqs < MLX5_ESW_MAX_CTRL_EQS)
+		*max_io_eqs = 0;
+	else
+		*max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS;
+out:
+	mutex_unlock(&esw->state_lock);
+	kfree(query_ctx);
+	return err;
+}
+
+int
+mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs,
+				    struct netlink_ext_ack *extack)
+{
+	struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port);
+	int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	u16 max_eqs = max_io_eqs + MLX5_ESW_MAX_CTRL_EQS;
+	u16 vport_num = vport->vport;
+	struct mlx5_eswitch *esw;
+	void *query_ctx;
+	void *hca_caps;
+	int err;
+
+	esw = mlx5_devlink_eswitch_nocheck_get(port->devlink);
+	if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Device doesn't support VHCA management");
+		return -EOPNOTSUPP;
+	}
+
+	if (max_io_eqs + MLX5_ESW_MAX_CTRL_EQS > USHRT_MAX) {
+		NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range");
+		return -EINVAL;
+	}
+
+	query_ctx = kzalloc(query_out_sz, GFP_KERNEL);
+	if (!query_ctx)
+		return -ENOMEM;
+
+	mutex_lock(&esw->state_lock);
+	err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx,
+					    MLX5_CAP_GENERAL);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps");
+		goto out;
+	}
+
+	hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+	MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs);
+
+	err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num,
+					    MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
+	if (err)
+		NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps");
+
+out:
+	mutex_unlock(&esw->state_lock);
+	kfree(query_ctx);
+	return err;
+}