diff mbox series

[net-next,v3,03/15] ice: add basic devlink subfunctions support

Message ID 20240808173104.385094-4-anthony.l.nguyen@intel.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series ice: support devlink subfunction | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 29 this patch: 29
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 5 of 5 maintainers
netdev/build_clang success Errors and warnings before: 29 this patch: 29
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 31 this patch: 31
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns WARNING: line length of 99 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 217 this patch: 217
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-08-09--15-00 (tests: 705)

Commit Message

Tony Nguyen Aug. 8, 2024, 5:30 p.m. UTC
From: Piotr Raczynski <piotr.raczynski@intel.com>

Implement devlink port handlers responsible for ethernet type devlink
subfunctions. Create subfunction devlink port and setup all resources
needed for a subfunction netdev to operate. Configure new VSI for each
new subfunction, initialize and configure interrupts and Tx/Rx resources.
Set correct MAC filters and create new netdev.

For now, subfunction is limited to only one Tx/Rx queue pair.

Only allocate new subfunction VSI with devlink port new command.
Allocate and free subfunction MSIX interrupt vectors using new API
calls with pci_msix_alloc_irq_at and pci_msix_free_irq.

Support both automatic and manual subfunction numbers. If no subfunction
number is provided, use xa_alloc to pick a number automatically. This
will find the first free index and use that as the number. This reduces
burden on users in the simple case where a specific number is not
required. It may also be slightly faster to check that a number exists
since xarray lookup should be faster than a linear scan of the dyn_ports
xarray.

Reviewed-by: Simon Horman <horms@kernel.org>
Reviewed-by: Wojciech Drewek <wojciech.drewek@intel.com>
Co-developed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Piotr Raczynski <piotr.raczynski@intel.com>
Signed-off-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 .../net/ethernet/intel/ice/devlink/devlink.c  |   3 +
 .../ethernet/intel/ice/devlink/devlink_port.c | 288 ++++++++++++++++++
 .../ethernet/intel/ice/devlink/devlink_port.h |  34 +++
 drivers/net/ethernet/intel/ice/ice.h          |   4 +
 drivers/net/ethernet/intel/ice/ice_lib.c      |   5 +-
 drivers/net/ethernet/intel/ice/ice_lib.h      |   2 +
 drivers/net/ethernet/intel/ice/ice_main.c     |   7 +
 7 files changed, 341 insertions(+), 2 deletions(-)

Comments

Jiri Pirko Aug. 9, 2024, 11:16 a.m. UTC | #1
Thu, Aug 08, 2024 at 07:30:49PM CEST, anthony.l.nguyen@intel.com wrote:
>From: Piotr Raczynski <piotr.raczynski@intel.com>
>

[...]

>+static int
>+ice_devlink_port_new_check_attr(struct ice_pf *pf,
>+				const struct devlink_port_new_attrs *new_attr,
>+				struct netlink_ext_ack *extack)
>+{
>+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
>+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
>+		return -EOPNOTSUPP;
>+	}
>+
>+	if (new_attr->controller_valid) {
>+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
>+		return -EOPNOTSUPP;
>+	}
>+
>+	if (new_attr->port_index_valid) {
>+		NL_SET_ERR_MSG_MOD(extack, "Port index is invalid");

Nope, it is actually valid, but your driver does not support that.
Please fix the message.


>+		return -EOPNOTSUPP;
>+	}
>+
>+	if (new_attr->pfnum != pf->hw.bus.func) {

hw.bus.func, hmm. How about if you pass-through the PF to VM, can't the
func be anything? Will this still make sense? I don't think so. If you
get the PF number like this in the rest of the driver, you need to fix
this.



>+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
>+		return -EINVAL;
>+	}
>+
>+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
>+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
>+		return -EOPNOTSUPP;
>+	}
>+
>+	return 0;
>+}

[...]


>+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
>+{
>+	struct devlink_port_attrs attrs = {};
>+	struct devlink_port *devlink_port;
>+	struct devlink *devlink;
>+	struct ice_vsi *vsi;
>+	struct ice_pf *pf;
>+
>+	vsi = dyn_port->vsi;
>+	pf = dyn_port->pf;
>+
>+	devlink_port = &dyn_port->devlink_port;
>+
>+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
>+	attrs.pci_sf.pf = pf->hw.bus.func;

Same here.


>+	attrs.pci_sf.sf = dyn_port->sfnum;
>+
>+	devlink_port_attrs_set(devlink_port, &attrs);
>+	devlink = priv_to_devlink(pf);
>+
>+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
>+					   &ice_devlink_port_sf_ops);
>+}
>+

[...]
Michal Swiatkowski Aug. 9, 2024, 11:47 a.m. UTC | #2
On Fri, Aug 09, 2024 at 01:16:28PM +0200, Jiri Pirko wrote:
> Thu, Aug 08, 2024 at 07:30:49PM CEST, anthony.l.nguyen@intel.com wrote:
> >From: Piotr Raczynski <piotr.raczynski@intel.com>
> >
> 
> [...]
> 
> >+static int
> >+ice_devlink_port_new_check_attr(struct ice_pf *pf,
> >+				const struct devlink_port_new_attrs *new_attr,
> >+				struct netlink_ext_ack *extack)
> >+{
> >+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
> >+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
> >+		return -EOPNOTSUPP;
> >+	}
> >+
> >+	if (new_attr->controller_valid) {
> >+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
> >+		return -EOPNOTSUPP;
> >+	}
> >+
> >+	if (new_attr->port_index_valid) {
> >+		NL_SET_ERR_MSG_MOD(extack, "Port index is invalid");
> 
> Nope, it is actually valid, but your driver does not support that.
> Please fix the message.
>

Ok

> 
> >+		return -EOPNOTSUPP;
> >+	}
> >+
> >+	if (new_attr->pfnum != pf->hw.bus.func) {
> 
> hw.bus.func, hmm. How about if you pass-through the PF to VM, can't the
> func be anything? Will this still make sense? I don't think so. If you
> get the PF number like this in the rest of the driver, you need to fix
> this.
>

I can change it to our internal value. I wonder if it will be better. If
I understand correctly, now:
PF0 - xx.xx.0; PF1 - xx.xx.1

I am doing pass-through PF1
PF0 (on host) - xx.xx.0

PF1 (on VM) - xx.xx.0 (there is one PF on VM, so for me it is more
intuitive to have func 0)

after I change:
PF0 - xx.xx.0; PF1 - xx.xx.1

pass-through PF1
PF0 (on host) - xx.xx.0

PF1 (on VM) - xx.xx.0, but user will have to use 1 as pf num

Correct me if I am wrong.

> 
> 
> >+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
> >+		return -EINVAL;
> >+	}
> >+
> >+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
> >+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
> >+		return -EOPNOTSUPP;
> >+	}
> >+
> >+	return 0;
> >+}
> 
> [...]
> 
> 
> >+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
> >+{
> >+	struct devlink_port_attrs attrs = {};
> >+	struct devlink_port *devlink_port;
> >+	struct devlink *devlink;
> >+	struct ice_vsi *vsi;
> >+	struct ice_pf *pf;
> >+
> >+	vsi = dyn_port->vsi;
> >+	pf = dyn_port->pf;
> >+
> >+	devlink_port = &dyn_port->devlink_port;
> >+
> >+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
> >+	attrs.pci_sf.pf = pf->hw.bus.func;
> 
> Same here.
> 
> 
> >+	attrs.pci_sf.sf = dyn_port->sfnum;
> >+
> >+	devlink_port_attrs_set(devlink_port, &attrs);
> >+	devlink = priv_to_devlink(pf);
> >+
> >+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
> >+					   &ice_devlink_port_sf_ops);
> >+}
> >+
> 
> [...]
Jiri Pirko Aug. 10, 2024, 6:50 a.m. UTC | #3
Fri, Aug 09, 2024 at 01:47:38PM CEST, michal.swiatkowski@linux.intel.com wrote:
>On Fri, Aug 09, 2024 at 01:16:28PM +0200, Jiri Pirko wrote:
>> Thu, Aug 08, 2024 at 07:30:49PM CEST, anthony.l.nguyen@intel.com wrote:
>> >From: Piotr Raczynski <piotr.raczynski@intel.com>
>> >
>> 
>> [...]
>> 
>> >+static int
>> >+ice_devlink_port_new_check_attr(struct ice_pf *pf,
>> >+				const struct devlink_port_new_attrs *new_attr,
>> >+				struct netlink_ext_ack *extack)
>> >+{
>> >+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
>> >+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
>> >+		return -EOPNOTSUPP;
>> >+	}
>> >+
>> >+	if (new_attr->controller_valid) {
>> >+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
>> >+		return -EOPNOTSUPP;
>> >+	}
>> >+
>> >+	if (new_attr->port_index_valid) {
>> >+		NL_SET_ERR_MSG_MOD(extack, "Port index is invalid");
>> 
>> Nope, it is actually valid, but your driver does not support that.
>> Please fix the message.
>>
>
>Ok
>
>> 
>> >+		return -EOPNOTSUPP;
>> >+	}
>> >+
>> >+	if (new_attr->pfnum != pf->hw.bus.func) {
>> 
>> hw.bus.func, hmm. How about if you pass-through the PF to VM, can't the
>> func be anything? Will this still make sense? I don't think so. If you
>> get the PF number like this in the rest of the driver, you need to fix
>> this.
>>
>
>I can change it to our internal value. I wonder if it will be better. If
>I understand correctly, now:
>PF0 - xx.xx.0; PF1 - xx.xx.1
>
>I am doing pass-through PF1
>PF0 (on host) - xx.xx.0
>
>PF1 (on VM) - xx.xx.0 (there is one PF on VM, so for me it is more
>intuitive to have func 0)
>
>after I change:
>PF0 - xx.xx.0; PF1 - xx.xx.1
>
>pass-through PF1
>PF0 (on host) - xx.xx.0
>
>PF1 (on VM) - xx.xx.0, but user will have to use 1 as pf num
>
>Correct me if I am wrong.

Did you try this? I mean, you can check that easily with vng:
vng --qemu-opts="-device vfio-pci,host=5e:01.0,addr=01.04"

Then in the VM you see:
0000:00:01.4

Then, by your code, the pf number is 4, isn't it?

What am I missing?



>
>> 
>> 
>> >+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
>> >+		return -EINVAL;
>> >+	}
>> >+
>> >+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
>> >+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
>> >+		return -EOPNOTSUPP;
>> >+	}
>> >+
>> >+	return 0;
>> >+}
>> 
>> [...]
>> 
>> 
>> >+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
>> >+{
>> >+	struct devlink_port_attrs attrs = {};
>> >+	struct devlink_port *devlink_port;
>> >+	struct devlink *devlink;
>> >+	struct ice_vsi *vsi;
>> >+	struct ice_pf *pf;
>> >+
>> >+	vsi = dyn_port->vsi;
>> >+	pf = dyn_port->pf;
>> >+
>> >+	devlink_port = &dyn_port->devlink_port;
>> >+
>> >+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
>> >+	attrs.pci_sf.pf = pf->hw.bus.func;
>> 
>> Same here.
>> 
>> 
>> >+	attrs.pci_sf.sf = dyn_port->sfnum;
>> >+
>> >+	devlink_port_attrs_set(devlink_port, &attrs);
>> >+	devlink = priv_to_devlink(pf);
>> >+
>> >+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
>> >+					   &ice_devlink_port_sf_ops);
>> >+}
>> >+
>> 
>> [...]
Michal Swiatkowski Aug. 12, 2024, 4:24 a.m. UTC | #4
On Sat, Aug 10, 2024 at 08:50:38AM +0200, Jiri Pirko wrote:
> Fri, Aug 09, 2024 at 01:47:38PM CEST, michal.swiatkowski@linux.intel.com wrote:
> >On Fri, Aug 09, 2024 at 01:16:28PM +0200, Jiri Pirko wrote:
> >> Thu, Aug 08, 2024 at 07:30:49PM CEST, anthony.l.nguyen@intel.com wrote:
> >> >From: Piotr Raczynski <piotr.raczynski@intel.com>
> >> >
> >> 
> >> [...]
> >> 
> >> >+static int
> >> >+ice_devlink_port_new_check_attr(struct ice_pf *pf,
> >> >+				const struct devlink_port_new_attrs *new_attr,
> >> >+				struct netlink_ext_ack *extack)
> >> >+{
> >> >+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
> >> >+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
> >> >+		return -EOPNOTSUPP;
> >> >+	}
> >> >+
> >> >+	if (new_attr->controller_valid) {
> >> >+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
> >> >+		return -EOPNOTSUPP;
> >> >+	}
> >> >+
> >> >+	if (new_attr->port_index_valid) {
> >> >+		NL_SET_ERR_MSG_MOD(extack, "Port index is invalid");
> >> 
> >> Nope, it is actually valid, but your driver does not support that.
> >> Please fix the message.
> >>
> >
> >Ok
> >
> >> 
> >> >+		return -EOPNOTSUPP;
> >> >+	}
> >> >+
> >> >+	if (new_attr->pfnum != pf->hw.bus.func) {
> >> 
> >> hw.bus.func, hmm. How about if you pass-through the PF to VM, can't the
> >> func be anything? Will this still make sense? I don't think so. If you
> >> get the PF number like this in the rest of the driver, you need to fix
> >> this.
> >>
> >
> >I can change it to our internal value. I wonder if it will be better. If
> >I understand correctly, now:
> >PF0 - xx.xx.0; PF1 - xx.xx.1
> >
> >I am doing pass-through PF1
> >PF0 (on host) - xx.xx.0
> >
> >PF1 (on VM) - xx.xx.0 (there is one PF on VM, so for me it is more
> >intuitive to have func 0)
> >
> >after I change:
> >PF0 - xx.xx.0; PF1 - xx.xx.1
> >
> >pass-through PF1
> >PF0 (on host) - xx.xx.0
> >
> >PF1 (on VM) - xx.xx.0, but user will have to use 1 as pf num
> >
> >Correct me if I am wrong.
> 
> Did you try this? I mean, you can check that easily with vng:
> vng --qemu-opts="-device vfio-pci,host=5e:01.0,addr=01.04"
> 
> Then in the VM you see:
> 0000:00:01.4
> 
> Then, by your code, the pf number is 4, isn't it?
> 
> What am I missing?

I didn't know about addr parameter. I will change it to always have the
same number so.

> 
> 
> 
> >
> >> 
> >> 
> >> >+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
> >> >+		return -EINVAL;
> >> >+	}
> >> >+
> >> >+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
> >> >+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
> >> >+		return -EOPNOTSUPP;
> >> >+	}
> >> >+
> >> >+	return 0;
> >> >+}
> >> 
> >> [...]
> >> 
> >> 
> >> >+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
> >> >+{
> >> >+	struct devlink_port_attrs attrs = {};
> >> >+	struct devlink_port *devlink_port;
> >> >+	struct devlink *devlink;
> >> >+	struct ice_vsi *vsi;
> >> >+	struct ice_pf *pf;
> >> >+
> >> >+	vsi = dyn_port->vsi;
> >> >+	pf = dyn_port->pf;
> >> >+
> >> >+	devlink_port = &dyn_port->devlink_port;
> >> >+
> >> >+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
> >> >+	attrs.pci_sf.pf = pf->hw.bus.func;
> >> 
> >> Same here.
> >> 
> >> 
> >> >+	attrs.pci_sf.sf = dyn_port->sfnum;
> >> >+
> >> >+	devlink_port_attrs_set(devlink_port, &attrs);
> >> >+	devlink = priv_to_devlink(pf);
> >> >+
> >> >+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
> >> >+					   &ice_devlink_port_sf_ops);
> >> >+}
> >> >+
> >> 
> >> [...]
Jiri Pirko Aug. 12, 2024, 9:53 a.m. UTC | #5
Mon, Aug 12, 2024 at 06:24:12AM CEST, michal.swiatkowski@linux.intel.com wrote:
>On Sat, Aug 10, 2024 at 08:50:38AM +0200, Jiri Pirko wrote:
>> Fri, Aug 09, 2024 at 01:47:38PM CEST, michal.swiatkowski@linux.intel.com wrote:
>> >On Fri, Aug 09, 2024 at 01:16:28PM +0200, Jiri Pirko wrote:
>> >> Thu, Aug 08, 2024 at 07:30:49PM CEST, anthony.l.nguyen@intel.com wrote:
>> >> >From: Piotr Raczynski <piotr.raczynski@intel.com>
>> >> >
>> >> 
>> >> [...]
>> >> 
>> >> >+static int
>> >> >+ice_devlink_port_new_check_attr(struct ice_pf *pf,
>> >> >+				const struct devlink_port_new_attrs *new_attr,
>> >> >+				struct netlink_ext_ack *extack)
>> >> >+{
>> >> >+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
>> >> >+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
>> >> >+		return -EOPNOTSUPP;
>> >> >+	}
>> >> >+
>> >> >+	if (new_attr->controller_valid) {
>> >> >+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
>> >> >+		return -EOPNOTSUPP;
>> >> >+	}
>> >> >+
>> >> >+	if (new_attr->port_index_valid) {
>> >> >+		NL_SET_ERR_MSG_MOD(extack, "Port index is invalid");
>> >> 
>> >> Nope, it is actually valid, but your driver does not support that.
>> >> Please fix the message.
>> >>
>> >
>> >Ok
>> >
>> >> 
>> >> >+		return -EOPNOTSUPP;
>> >> >+	}
>> >> >+
>> >> >+	if (new_attr->pfnum != pf->hw.bus.func) {
>> >> 
>> >> hw.bus.func, hmm. How about if you pass-through the PF to VM, can't the
>> >> func be anything? Will this still make sense? I don't think so. If you
>> >> get the PF number like this in the rest of the driver, you need to fix
>> >> this.
>> >>
>> >
>> >I can change it to our internal value. I wonder if it will be better. If
>> >I understand correctly, now:
>> >PF0 - xx.xx.0; PF1 - xx.xx.1
>> >
>> >I am doing pass-through PF1
>> >PF0 (on host) - xx.xx.0
>> >
>> >PF1 (on VM) - xx.xx.0 (there is one PF on VM, so for me it is more
>> >intuitive to have func 0)
>> >
>> >after I change:
>> >PF0 - xx.xx.0; PF1 - xx.xx.1
>> >
>> >pass-through PF1
>> >PF0 (on host) - xx.xx.0
>> >
>> >PF1 (on VM) - xx.xx.0, but user will have to use 1 as pf num
>> >
>> >Correct me if I am wrong.
>> 
>> Did you try this? I mean, you can check that easily with vng:
>> vng --qemu-opts="-device vfio-pci,host=5e:01.0,addr=01.04"
>> 
>> Then in the VM you see:
>> 0000:00:01.4
>> 
>> Then, by your code, the pf number is 4, isn't it?
>> 
>> What am I missing?
>
>I didn't know about addr parameter. I will change it to always have the
>same number so.

Please make sure this is fixed in other ice places:

$ git grep "bus.func" drivers/net/ethernet/intel/ice/
drivers/net/ethernet/intel/ice/devlink/devlink_port.c:  attrs.phys.port_number = pf->hw.bus.func;
drivers/net/ethernet/intel/ice/devlink/devlink_port.c:  attrs.pci_vf.pf = pf->hw.bus.func;
drivers/net/ethernet/intel/ice/ice_debugfs.c:   if (pf->hw.bus.func)
drivers/net/ethernet/intel/ice/ice_fwlog.c:     if (hw->bus.func)
drivers/net/ethernet/intel/ice/ice_fwlog.c:     if (hw->bus.func)
drivers/net/ethernet/intel/ice/ice_main.c:      hw->bus.func = PCI_FUNC(pdev->devfn);

Calls for a -net/stable fix IMO.

Also, check other intel drivers as well. I see this is a pattern.

Thanks!


>
>> 
>> 
>> 
>> >
>> >> 
>> >> 
>> >> >+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
>> >> >+		return -EINVAL;
>> >> >+	}
>> >> >+
>> >> >+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
>> >> >+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
>> >> >+		return -EOPNOTSUPP;
>> >> >+	}
>> >> >+
>> >> >+	return 0;
>> >> >+}
>> >> 
>> >> [...]
>> >> 
>> >> 
>> >> >+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
>> >> >+{
>> >> >+	struct devlink_port_attrs attrs = {};
>> >> >+	struct devlink_port *devlink_port;
>> >> >+	struct devlink *devlink;
>> >> >+	struct ice_vsi *vsi;
>> >> >+	struct ice_pf *pf;
>> >> >+
>> >> >+	vsi = dyn_port->vsi;
>> >> >+	pf = dyn_port->pf;
>> >> >+
>> >> >+	devlink_port = &dyn_port->devlink_port;
>> >> >+
>> >> >+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
>> >> >+	attrs.pci_sf.pf = pf->hw.bus.func;
>> >> 
>> >> Same here.
>> >> 
>> >> 
>> >> >+	attrs.pci_sf.sf = dyn_port->sfnum;
>> >> >+
>> >> >+	devlink_port_attrs_set(devlink_port, &attrs);
>> >> >+	devlink = priv_to_devlink(pf);
>> >> >+
>> >> >+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
>> >> >+					   &ice_devlink_port_sf_ops);
>> >> >+}
>> >> >+
>> >> 
>> >> [...]
diff mbox series

Patch

diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c
index 810a901d7afd..b7eb1b56f2c6 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c
@@ -6,6 +6,7 @@ 
 #include "ice.h"
 #include "ice_lib.h"
 #include "devlink.h"
+#include "devlink_port.h"
 #include "ice_eswitch.h"
 #include "ice_fw_update.h"
 #include "ice_dcb_lib.h"
@@ -1277,6 +1278,8 @@  static const struct devlink_ops ice_devlink_ops = {
 
 	.rate_leaf_parent_set = ice_devlink_set_parent,
 	.rate_node_parent_set = ice_devlink_set_parent,
+
+	.port_new = ice_devlink_port_new,
 };
 
 static int
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
index 00fed5a61d62..5d1fe08e4bab 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c
@@ -5,6 +5,9 @@ 
 
 #include "ice.h"
 #include "devlink.h"
+#include "devlink_port.h"
+#include "ice_lib.h"
+#include "ice_fltr.h"
 
 static int ice_active_port_option = -1;
 
@@ -485,3 +488,288 @@  void ice_devlink_destroy_vf_port(struct ice_vf *vf)
 	devl_rate_leaf_destroy(&vf->devlink_port);
 	devl_port_unregister(&vf->devlink_port);
 }
+
+/**
+ * ice_dealloc_dynamic_port - Deallocate and remove a dynamic port
+ * @dyn_port: dynamic port instance to deallocate
+ *
+ * Free resources associated with a dynamically added devlink port. Will
+ * deactivate the port if its currently active.
+ */
+static void ice_dealloc_dynamic_port(struct ice_dynamic_port *dyn_port)
+{
+	struct devlink_port *devlink_port = &dyn_port->devlink_port;
+	struct ice_pf *pf = dyn_port->pf;
+
+	xa_erase(&pf->sf_nums, devlink_port->attrs.pci_sf.sf);
+	devl_port_unregister(devlink_port);
+	ice_vsi_free(dyn_port->vsi);
+	xa_erase(&pf->dyn_ports, dyn_port->vsi->idx);
+	kfree(dyn_port);
+}
+
+/**
+ * ice_dealloc_all_dynamic_ports - Deallocate all dynamic devlink ports
+ * @pf: pointer to the pf structure
+ */
+void ice_dealloc_all_dynamic_ports(struct ice_pf *pf)
+{
+	struct ice_dynamic_port *dyn_port;
+	unsigned long index;
+
+	xa_for_each(&pf->dyn_ports, index, dyn_port)
+		ice_dealloc_dynamic_port(dyn_port);
+}
+
+/**
+ * ice_devlink_port_new_check_attr - Check that new port attributes are valid
+ * @pf: pointer to the PF structure
+ * @new_attr: the attributes for the new port
+ * @extack: extack for reporting error messages
+ *
+ * Check that the attributes for the new port are valid before continuing to
+ * allocate the devlink port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_new_check_attr(struct ice_pf *pf,
+				const struct devlink_port_new_attrs *new_attr,
+				struct netlink_ext_ack *extack)
+{
+	if (new_attr->flavour != DEVLINK_PORT_FLAVOUR_PCI_SF) {
+		NL_SET_ERR_MSG_MOD(extack, "Flavour other than pcisf is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (new_attr->controller_valid) {
+		NL_SET_ERR_MSG_MOD(extack, "Setting controller is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (new_attr->port_index_valid) {
+		NL_SET_ERR_MSG_MOD(extack, "Port index is invalid");
+		return -EOPNOTSUPP;
+	}
+
+	if (new_attr->pfnum != pf->hw.bus.func) {
+		NL_SET_ERR_MSG_MOD(extack, "Incorrect pfnum supplied");
+		return -EINVAL;
+	}
+
+	if (!pci_msix_can_alloc_dyn(pf->pdev)) {
+		NL_SET_ERR_MSG_MOD(extack, "Dynamic MSIX-X interrupt allocation is not supported");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_devlink_port_del - devlink handler for port delete
+ * @devlink: pointer to devlink
+ * @port: devlink port to be deleted
+ * @extack: pointer to extack
+ *
+ * Deletes devlink port and deallocates all resources associated with
+ * created subfunction.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_devlink_port_del(struct devlink *devlink, struct devlink_port *port,
+		     struct netlink_ext_ack *extack)
+{
+	struct ice_dynamic_port *dyn_port;
+
+	dyn_port = ice_devlink_port_to_dyn(port);
+	ice_dealloc_dynamic_port(dyn_port);
+
+	return 0;
+}
+
+static const struct devlink_port_ops ice_devlink_port_sf_ops = {
+	.port_del = ice_devlink_port_del,
+};
+
+/**
+ * ice_reserve_sf_num - Reserve a subfunction number for this port
+ * @pf: pointer to the pf structure
+ * @new_attr: devlink port attributes requested
+ * @extack: extack for reporting error messages
+ * @sfnum: on success, the sf number reserved
+ *
+ * Reserve a subfunction number for this port. Only called for
+ * DEVLINK_PORT_FLAVOUR_PCI_SF ports.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_reserve_sf_num(struct ice_pf *pf,
+		   const struct devlink_port_new_attrs *new_attr,
+		   struct netlink_ext_ack *extack, u32 *sfnum)
+{
+	int err;
+
+	/* If user didn't request an explicit number, pick one */
+	if (!new_attr->sfnum_valid)
+		return xa_alloc(&pf->sf_nums, sfnum, NULL, xa_limit_32b,
+				GFP_KERNEL);
+
+	/* Otherwise, check and use the number provided */
+	err = xa_insert(&pf->sf_nums, new_attr->sfnum, NULL, GFP_KERNEL);
+	if (err) {
+		if (err == -EBUSY)
+			NL_SET_ERR_MSG_MOD(extack, "Subfunction with given sfnum already exists");
+		return err;
+	}
+
+	*sfnum = new_attr->sfnum;
+
+	return 0;
+}
+
+/**
+ * ice_devlink_create_sf_port - Register PCI subfunction devlink port
+ * @dyn_port: the dynamic port instance structure for this subfunction
+ *
+ * Register PCI subfunction flavour devlink port for a dynamically added
+ * subfunction port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port)
+{
+	struct devlink_port_attrs attrs = {};
+	struct devlink_port *devlink_port;
+	struct devlink *devlink;
+	struct ice_vsi *vsi;
+	struct ice_pf *pf;
+
+	vsi = dyn_port->vsi;
+	pf = dyn_port->pf;
+
+	devlink_port = &dyn_port->devlink_port;
+
+	attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_SF;
+	attrs.pci_sf.pf = pf->hw.bus.func;
+	attrs.pci_sf.sf = dyn_port->sfnum;
+
+	devlink_port_attrs_set(devlink_port, &attrs);
+	devlink = priv_to_devlink(pf);
+
+	return devl_port_register_with_ops(devlink, devlink_port, vsi->idx,
+					   &ice_devlink_port_sf_ops);
+}
+
+/**
+ * ice_devlink_destroy_sf_port - Destroy the devlink_port for this SF
+ * @dyn_port: the dynamic port instance structure for this subfunction
+ *
+ * Unregisters the devlink_port structure associated with this SF.
+ */
+void ice_devlink_destroy_sf_port(struct ice_dynamic_port *dyn_port)
+{
+	devl_port_unregister(&dyn_port->devlink_port);
+}
+
+/**
+ * ice_alloc_dynamic_port - Allocate new dynamic port
+ * @pf: pointer to the pf structure
+ * @new_attr: devlink port attributes requested
+ * @extack: extack for reporting error messages
+ * @devlink_port: index of newly created devlink port
+ *
+ * Allocate a new dynamic port instance and prepare it for configuration
+ * with devlink.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+static int
+ice_alloc_dynamic_port(struct ice_pf *pf,
+		       const struct devlink_port_new_attrs *new_attr,
+		       struct netlink_ext_ack *extack,
+		       struct devlink_port **devlink_port)
+{
+	struct ice_dynamic_port *dyn_port;
+	struct ice_vsi *vsi;
+	u32 sfnum;
+	int err;
+
+	err = ice_reserve_sf_num(pf, new_attr, extack, &sfnum);
+	if (err)
+		return err;
+
+	dyn_port = kzalloc(sizeof(*dyn_port), GFP_KERNEL);
+	if (!dyn_port) {
+		err = -ENOMEM;
+		goto unroll_reserve_sf_num;
+	}
+
+	vsi = ice_vsi_alloc(pf);
+	if (!vsi) {
+		NL_SET_ERR_MSG_MOD(extack, "Unable to allocate VSI");
+		err = -ENOMEM;
+		goto unroll_dyn_port_alloc;
+	}
+
+	dyn_port->vsi = vsi;
+	dyn_port->pf = pf;
+	dyn_port->sfnum = sfnum;
+	eth_random_addr(dyn_port->hw_addr);
+
+	err = xa_insert(&pf->dyn_ports, vsi->idx, dyn_port, GFP_KERNEL);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Port index reservation failed");
+		goto unroll_vsi_alloc;
+	}
+
+	err = ice_devlink_create_sf_port(dyn_port);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Port registration failed");
+		goto unroll_xa_insert;
+	}
+
+	*devlink_port = &dyn_port->devlink_port;
+
+	return 0;
+
+unroll_xa_insert:
+	xa_erase(&pf->dyn_ports, vsi->idx);
+unroll_vsi_alloc:
+	ice_vsi_free(vsi);
+unroll_dyn_port_alloc:
+	kfree(dyn_port);
+unroll_reserve_sf_num:
+	xa_erase(&pf->sf_nums, sfnum);
+
+	return err;
+}
+
+/**
+ * ice_devlink_port_new - devlink handler for the new port
+ * @devlink: pointer to devlink
+ * @new_attr: pointer to the port new attributes
+ * @extack: extack for reporting error messages
+ * @devlink_port: pointer to a new port
+ *
+ * Creates new devlink port, checks new port attributes and reject
+ * any unsupported parameters, allocates new subfunction for that port.
+ *
+ * Return: zero on success or an error code on failure.
+ */
+int
+ice_devlink_port_new(struct devlink *devlink,
+		     const struct devlink_port_new_attrs *new_attr,
+		     struct netlink_ext_ack *extack,
+		     struct devlink_port **devlink_port)
+{
+	struct ice_pf *pf = devlink_priv(devlink);
+	int err;
+
+	err = ice_devlink_port_new_check_attr(pf, new_attr, extack);
+	if (err)
+		return err;
+
+	return ice_alloc_dynamic_port(pf, new_attr, extack, devlink_port);
+}
diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h b/drivers/net/ethernet/intel/ice/devlink/devlink_port.h
index 9223bcdb6444..08ebf56664a5 100644
--- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.h
+++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.h
@@ -4,9 +4,43 @@ 
 #ifndef _DEVLINK_PORT_H_
 #define _DEVLINK_PORT_H_
 
+#include "../ice.h"
+
+/**
+ * struct ice_dynamic_port - Track dynamically added devlink port instance
+ * @hw_addr: the HW address for this port
+ * @active: true if the port has been activated
+ * @devlink_port: the associated devlink port structure
+ * @pf: pointer to the PF private structure
+ * @vsi: the VSI associated with this port
+ * @sfnum: the subfunction ID
+ *
+ * An instance of a dynamically added devlink port. Each port flavour
+ */
+struct ice_dynamic_port {
+	u8 hw_addr[ETH_ALEN];
+	u8 active: 1;
+	struct devlink_port devlink_port;
+	struct ice_pf *pf;
+	struct ice_vsi *vsi;
+	u32 sfnum;
+};
+
+void ice_dealloc_all_dynamic_ports(struct ice_pf *pf);
+
 int ice_devlink_create_pf_port(struct ice_pf *pf);
 void ice_devlink_destroy_pf_port(struct ice_pf *pf);
 int ice_devlink_create_vf_port(struct ice_vf *vf);
 void ice_devlink_destroy_vf_port(struct ice_vf *vf);
+int ice_devlink_create_sf_port(struct ice_dynamic_port *dyn_port);
+void ice_devlink_destroy_sf_port(struct ice_dynamic_port *dyn_port);
+
+#define ice_devlink_port_to_dyn(port) \
+	container_of(port, struct ice_dynamic_port, devlink_port)
 
+int
+ice_devlink_port_new(struct devlink *devlink,
+		     const struct devlink_port_new_attrs *new_attr,
+		     struct netlink_ext_ack *extack,
+		     struct devlink_port **devlink_port);
 #endif /* _DEVLINK_PORT_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 59885228cccf..d61f8e602cac 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -650,6 +650,9 @@  struct ice_pf {
 	struct ice_eswitch eswitch;
 	struct ice_esw_br_port *br_port;
 
+	struct xarray dyn_ports;
+	struct xarray sf_nums;
+
 #define ICE_INVALID_AGG_NODE_ID		0
 #define ICE_PF_AGG_NODE_ID_START	1
 #define ICE_MAX_PF_AGG_NODES		32
@@ -916,6 +919,7 @@  int ice_vsi_open(struct ice_vsi *vsi);
 void ice_set_ethtool_ops(struct net_device *netdev);
 void ice_set_ethtool_repr_ops(struct net_device *netdev);
 void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
+void ice_set_ethtool_sf_ops(struct net_device *netdev);
 u16 ice_get_avail_txq_count(struct ice_pf *pf);
 u16 ice_get_avail_rxq_count(struct ice_pf *pf);
 int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 56bbc3ebf9dc..eabdaf624793 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -7,6 +7,7 @@ 
 #include "ice_lib.h"
 #include "ice_fltr.h"
 #include "ice_dcb_lib.h"
+#include "ice_type.h"
 #include "ice_vsi_vlan_ops.h"
 
 /**
@@ -432,7 +433,7 @@  static int ice_vsi_alloc_ring_stats(struct ice_vsi *vsi)
  * This deallocates the VSI's queue resources, removes it from the PF's
  * VSI array if necessary, and deallocates the VSI
  */
-static void ice_vsi_free(struct ice_vsi *vsi)
+void ice_vsi_free(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = NULL;
 	struct device *dev;
@@ -604,7 +605,7 @@  ice_vsi_alloc_def(struct ice_vsi *vsi, struct ice_channel *ch)
  *
  * returns a pointer to a VSI on success, NULL on failure.
  */
-static struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
+struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf)
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_vsi *vsi = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index f9ee461c5c06..5de0cc50552c 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -66,6 +66,8 @@  void ice_dis_vsi(struct ice_vsi *vsi, bool locked);
 
 int ice_vsi_rebuild(struct ice_vsi *vsi, u32 vsi_flags);
 int ice_vsi_cfg(struct ice_vsi *vsi);
+struct ice_vsi *ice_vsi_alloc(struct ice_pf *pf);
+void ice_vsi_free(struct ice_vsi *vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
 int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d8a89a6152a1..7ce025b6efe4 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3987,6 +3987,9 @@  static void ice_deinit_pf(struct ice_pf *pf)
 
 	if (pf->ptp.clock)
 		ptp_clock_unregister(pf->ptp.clock);
+
+	xa_destroy(&pf->dyn_ports);
+	xa_destroy(&pf->sf_nums);
 }
 
 /**
@@ -4080,6 +4083,9 @@  static int ice_init_pf(struct ice_pf *pf)
 	hash_init(pf->vfs.table);
 	ice_mbx_init_snapshot(&pf->hw);
 
+	xa_init(&pf->dyn_ports);
+	xa_init(&pf->sf_nums);
+
 	return 0;
 }
 
@@ -5422,6 +5428,7 @@  static void ice_remove(struct pci_dev *pdev)
 		ice_remove_arfs(pf);
 
 	devl_lock(priv_to_devlink(pf));
+	ice_dealloc_all_dynamic_ports(pf);
 	ice_deinit_devlink(pf);
 
 	ice_unload(pf);