diff mbox series

[net-next,3/8] net/funeth: probing and netdev ops

Message ID 20211230163909.160269-4-dmichail@fungible.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series new Fungible Ethernet driver | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 10 maintainers not CCed: andrii@kernel.org hawk@kernel.org kpsingh@kernel.org daniel@iogearbox.net john.fastabend@gmail.com kafai@fb.com songliubraving@fb.com bpf@vger.kernel.org ast@kernel.org yhs@fb.com
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param fail Was 0 now: 1
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: From:/Signed-off-by: email address mismatch: 'From: Dimitris Michailidis <d.michailidis@fungible.com>' != 'Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>' WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Dimitris Michailidis Dec. 30, 2021, 4:39 p.m. UTC
This is the first part of the Fungible ethernet driver. It deals with
device probing, net_device creation, and netdev ops.

Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>
---
 drivers/net/ethernet/fungible/funeth/funeth.h |  153 ++
 .../ethernet/fungible/funeth/funeth_main.c    | 1772 +++++++++++++++++
 2 files changed, 1925 insertions(+)
 create mode 100644 drivers/net/ethernet/fungible/funeth/funeth.h
 create mode 100644 drivers/net/ethernet/fungible/funeth/funeth_main.c

Comments

Andrew Lunn Dec. 30, 2021, 5:40 p.m. UTC | #1
> +static int msg_enable;
> +module_param(msg_enable, int, 0644);
> +MODULE_PARM_DESC(msg_enable, "bitmap of NETIF_MSG_* enables");
> +

Module params are not liked. Please implement the ethtool op, if you
have not already done so.

     Andrew
Dimitris Michailidis Dec. 30, 2021, 6:33 p.m. UTC | #2
On Thu, Dec 30, 2021 at 9:40 AM Andrew Lunn <andrew@lunn.ch> wrote:
>
> > +static int msg_enable;
> > +module_param(msg_enable, int, 0644);
> > +MODULE_PARM_DESC(msg_enable, "bitmap of NETIF_MSG_* enables");
> > +
>
> Module params are not liked. Please implement the ethtool op, if you
> have not already done so.

The associated ethtool op is implemented. I think this module param is
fairly common
to control messages during probe and generally before the ethtool path
is available.

>
>      Andrew
Andrew Lunn Dec. 30, 2021, 7:02 p.m. UTC | #3
On Thu, Dec 30, 2021 at 10:33:03AM -0800, Dimitris Michailidis wrote:
> On Thu, Dec 30, 2021 at 9:40 AM Andrew Lunn <andrew@lunn.ch> wrote:
> >
> > > +static int msg_enable;
> > > +module_param(msg_enable, int, 0644);
> > > +MODULE_PARM_DESC(msg_enable, "bitmap of NETIF_MSG_* enables");
> > > +
> >
> > Module params are not liked. Please implement the ethtool op, if you
> > have not already done so.
> 
> The associated ethtool op is implemented. I think this module param is
> fairly common
> to control messages during probe and generally before the ethtool path
> is available.

It is common in order drivers, but in general new drivers don't have
module parameters at all. Anybody debugging code before ethtool is
available from user space is probably also capable of recompiling the
driver to change the default value.

       Andrew
Dimitris Michailidis Dec. 30, 2021, 8:05 p.m. UTC | #4
On Thu, Dec 30, 2021 at 11:02 AM Andrew Lunn <andrew@lunn.ch> wrote:
>
> On Thu, Dec 30, 2021 at 10:33:03AM -0800, Dimitris Michailidis wrote:
> > On Thu, Dec 30, 2021 at 9:40 AM Andrew Lunn <andrew@lunn.ch> wrote:
> > >
> > > > +static int msg_enable;
> > > > +module_param(msg_enable, int, 0644);
> > > > +MODULE_PARM_DESC(msg_enable, "bitmap of NETIF_MSG_* enables");
> > > > +
> > >
> > > Module params are not liked. Please implement the ethtool op, if you
> > > have not already done so.
> >
> > The associated ethtool op is implemented. I think this module param is
> > fairly common
> > to control messages during probe and generally before the ethtool path
> > is available.
>
> It is common in order drivers, but in general new drivers don't have
> module parameters at all. Anybody debugging code before ethtool is
> available from user space is probably also capable of recompiling the
> driver to change the default value.

OK, will remove it.

>        Andrew
Heiner Kallweit Dec. 31, 2021, 11:14 a.m. UTC | #5
On 30.12.2021 17:39, Dimitris Michailidis wrote:
> This is the first part of the Fungible ethernet driver. It deals with
> device probing, net_device creation, and netdev ops.
> 
> Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>
> ---
>  drivers/net/ethernet/fungible/funeth/funeth.h |  153 ++
>  .../ethernet/fungible/funeth/funeth_main.c    | 1772 +++++++++++++++++
>  2 files changed, 1925 insertions(+)
>  create mode 100644 drivers/net/ethernet/fungible/funeth/funeth.h
>  create mode 100644 drivers/net/ethernet/fungible/funeth/funeth_main.c
> 
> diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h
> new file mode 100644
> index 000000000000..0c089f685c7f
> --- /dev/null
> +++ b/drivers/net/ethernet/fungible/funeth/funeth.h
> @@ -0,0 +1,153 @@
> +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
> +
> +#ifndef _FUNETH_H
> +#define _FUNETH_H
> +
> +#include <uapi/linux/if_ether.h>
> +#include <uapi/linux/net_tstamp.h>
> +#include <linux/seqlock.h>
> +#include <net/devlink.h>
> +#include "fun_dev.h"
> +
> +#define ADMIN_SQE_SIZE SZ_128
> +#define ADMIN_CQE_SIZE SZ_64
> +#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info))
> +
> +#define FUN_MAX_MTU 9024
> +
> +#define SQ_DEPTH 512U
> +#define CQ_DEPTH 1024U
> +#define RQ_DEPTH (512U / (PAGE_SIZE / 4096))
> +
> +#define CQ_INTCOAL_USEC 10
> +#define CQ_INTCOAL_NPKT 16
> +#define SQ_INTCOAL_USEC 10
> +#define SQ_INTCOAL_NPKT 16
> +
> +#define INVALID_LPORT 0xffff
> +
> +#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE)
> +
> +struct fun_vport_info {
> +	u8 mac[ETH_ALEN];
> +	u16 vlan;
> +	__be16 vlan_proto;
> +	u8 qos;
> +	u8 spoofchk:1;
> +	u8 trusted:1;
> +	unsigned int max_rate;
> +};
> +
> +/* "subclass" of fun_dev for Ethernet functions */
> +struct fun_ethdev {
> +	struct fun_dev fdev;
> +
> +	/* the function's network ports */
> +	struct net_device **netdevs;
> +	unsigned int num_ports;
> +
> +	/* configuration for the function's virtual ports */
> +	unsigned int num_vports;
> +	struct fun_vport_info *vport_info;
> +
> +	unsigned int nsqs_per_port;
> +};
> +
> +static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p)
> +{
> +	return container_of(p, struct fun_ethdev, fdev);
> +}
> +
> +/* Per netdevice driver state, i.e., netdev_priv. */
> +struct funeth_priv {
> +	struct fun_dev *fdev;
> +	struct pci_dev *pdev;
> +	struct net_device *netdev;
> +
> +	struct funeth_rxq * __rcu *rxqs;
> +	struct funeth_txq **txqs;
> +	struct funeth_txq **xdpqs;
> +
> +	struct fun_irq *irqs;
> +	unsigned int num_irqs;
> +	unsigned int num_tx_irqs;
> +
> +	unsigned int lane_attrs;
> +	u16 lport;
> +
> +	/* link settings */
> +	u64 port_caps;
> +	u64 advertising;
> +	u64 lp_advertising;
> +	unsigned int link_speed;

Any specific reason for handling this manually?
Why not using phylib/phylink?

> +	u8 xcvr_type;
> +	u8 active_fc;
> +	u8 active_fec;
> +	u8 link_down_reason;
> +	seqcount_t link_seq;
> +
> +	u32 msg_enable;
> +
> +	unsigned int ethid_start;
> +
> +	unsigned int num_xdpqs;
> +
> +	/* ethtool, etc. config parameters */
> +	unsigned int sq_depth;
> +	unsigned int rq_depth;
> +	unsigned int cq_depth;
> +	unsigned int cq_irq_db;
> +	u8 tx_coal_usec;
> +	u8 tx_coal_count;
> +	u8 rx_coal_usec;
> +	u8 rx_coal_count;
> +
> +	struct hwtstamp_config hwtstamp_cfg;
> +
> +	/* cumulative queue stats from earlier queue instances */
> +	u64 tx_packets;
> +	u64 tx_bytes;
> +	u64 tx_dropped;
> +	u64 rx_packets;
> +	u64 rx_bytes;
> +	u64 rx_dropped;
> +
> +	/* RSS */
> +	unsigned int rss_hw_id;
> +	enum fun_eth_hash_alg hash_algo;
> +	u8 rss_key[FUN_ETH_RSS_MAX_KEY_SIZE];
> +	unsigned int indir_table_nentries;
> +	u32 indir_table[FUN_ETH_RSS_MAX_INDIR_ENT];
> +	dma_addr_t rss_dma_addr;
> +	void *rss_cfg;
> +
> +	/* DMA area for port stats */
> +	dma_addr_t stats_dma_addr;
> +	u64 *stats;
> +
> +	struct bpf_prog *xdp_prog;
> +
> +	struct devlink_port dl_port;
> +
> +	/* kTLS state */
> +	unsigned int ktls_id;
> +	atomic64_t tx_tls_add;
> +	atomic64_t tx_tls_del;
> +	atomic64_t tx_tls_resync;
> +};
> +
> +void fun_set_ethtool_ops(struct net_device *netdev);
> +int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n,
> +		       const int *keys, u64 *data);
> +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data);
> +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data);
> +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 ethid, u32 sqid);
> +void fun_reset_rss_indir(struct net_device *dev);
> +int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
> +		   const u32 *qtable, u8 op);
> +
> +int fun_alloc_rings(struct net_device *netdev);
> +void fun_free_rings(struct net_device *netdev);
> +int fun_alloc_queue_irqs(struct net_device *dev);
> +
> +#endif /* _FUNETH_H */
> diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c
> new file mode 100644
> index 000000000000..11823006cce5
> --- /dev/null
> +++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c
> @@ -0,0 +1,1772 @@
> +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
> +
> +#include <linux/bpf.h>
> +#include <linux/crash_dump.h>
> +#include <linux/etherdevice.h>
> +#include <linux/ethtool.h>
> +#include <linux/filter.h>
> +#include <linux/idr.h>
> +#include <linux/if_vlan.h>
> +#include <linux/module.h>
> +#include <linux/netdevice.h>
> +#include <linux/pci.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/inetdevice.h>
> +
> +#include "funeth.h"
> +#include "funeth_devlink.h"
> +#include "funeth_ktls.h"
> +#include "fun_port.h"
> +#include "fun_queue.h"
> +#include "funeth_txrx.h"
> +
> +#define ADMIN_SQ_DEPTH 32
> +#define ADMIN_CQ_DEPTH 64
> +#define ADMIN_RQ_DEPTH 16
> +
> +/* Default number of Tx/Rx queues. */
> +#define FUN_DFLT_QUEUES 16U
> +
> +enum {
> +	FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL,
> +	FUN_SERV_DEL_PORTS,
> +};
> +
> +static int msg_enable;
> +module_param(msg_enable, int, 0644);
> +MODULE_PARM_DESC(msg_enable, "bitmap of NETIF_MSG_* enables");
> +
> +static const struct pci_device_id funeth_id_table[] = {
> +	{ PCI_VDEVICE(FUNGIBLE, 0x0101) },
> +	{ PCI_VDEVICE(FUNGIBLE, 0x0181) },
> +	{ 0, }
> +};
> +
> +/* Issue a port write admin command with @n key/value pairs. */
> +int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n,
> +			const int *keys, const u64 *data)
> +{
> +	unsigned int cmd_size, i;
> +	union {
> +		struct fun_admin_port_req req;
> +		struct fun_admin_port_rsp rsp;
> +		u8 v[ADMIN_SQE_SIZE];
> +	} cmd;
> +
> +	cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) +
> +		n * sizeof(struct fun_admin_write48_req);
> +	if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
> +		return -EINVAL;
> +
> +	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
> +						    cmd_size);
> +	cmd.req.u.write =
> +		FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0,
> +					      fp->netdev->dev_port);
> +	for (i = 0; i < n; i++)
> +		cmd.req.u.write.write48[i] =
> +			FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]);
> +
> +	return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
> +					 &cmd.rsp, cmd_size, 0);
> +}
> +
> +int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data)
> +{
> +	return fun_port_write_cmds(fp, 1, &key, &data);
> +}
> +
> +/* Issue a port read admin command with @n key/value pairs. */
> +int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n,
> +		       const int *keys, u64 *data)
> +{
> +	const struct fun_admin_read48_rsp *r48rsp;
> +	unsigned int cmd_size, i;
> +	int rc;
> +	union {
> +		struct fun_admin_port_req req;
> +		struct fun_admin_port_rsp rsp;
> +		u8 v[ADMIN_SQE_SIZE];
> +	} cmd;
> +
> +	cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) +
> +		n * sizeof(struct fun_admin_read48_req);
> +	if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
> +		return -EINVAL;
> +
> +	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
> +						    cmd_size);
> +	cmd.req.u.read =
> +		FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0,
> +					     fp->netdev->dev_port);
> +	for (i = 0; i < n; i++)
> +		cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]);
> +
> +	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
> +				       &cmd.rsp, cmd_size, 0);
> +	if (rc)
> +		return rc;
> +
> +	for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) {
> +		data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data);
> +		dev_dbg(fp->fdev->dev,
> +			"port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld",
> +			fp->lport, r48rsp->key_to_data, keys[i], data[i],
> +			FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data));
> +	}
> +	return 0;
> +}
> +
> +int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data)
> +{
> +	return fun_port_read_cmds(fp, 1, &key, data);
> +}
> +
> +static void fun_report_link(struct net_device *netdev)
> +{
> +	if (netif_carrier_ok(netdev)) {
> +		const struct funeth_priv *fp = netdev_priv(netdev);
> +		const char *fec = "", *pause = "";
> +		int speed = fp->link_speed;
> +		char unit = 'M';
> +
> +		if (fp->link_speed >= SPEED_1000) {
> +			speed /= 1000;
> +			unit = 'G';
> +		}
> +
> +		if (fp->active_fec & FUN_PORT_FEC_RS)
> +			fec = ", RS-FEC";
> +		else if (fp->active_fec & FUN_PORT_FEC_FC)
> +			fec = ", BASER-FEC";
> +
> +		if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK)
> +			pause = ", Tx/Rx PAUSE";
> +		else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE)
> +			pause = ", Rx PAUSE";
> +		else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE)
> +			pause = ", Tx PAUSE";
> +
> +		netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s%s\n",
> +			    speed, unit, pause, fec,
> +			    netif_dormant(netdev) ? ", dormant" : "");
> +	} else {
> +		netdev_info(netdev, "Link down\n");
> +	}
> +}
> +
> +static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr,
> +			 unsigned int adi_id, const struct fun_adi_param *param)
> +{
> +	struct fun_admin_adi_req req = {
> +		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI,
> +						     sizeof(req)),
> +		.u.write = FUN_ADMIN_ADI_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE,
> +							attr, adi_id),
> +		.u.write.param = *param
> +	};
> +
> +	return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
> +}
> +
> +/* Configure RSS for the given port. @op determines whether a new RSS context
> + * is to be created or whether an existing one should be reconfigured. The
> + * remaining parameters specify the hashing algorithm, key, and indirection
> + * table.
> + *
> + * This initiates packet delivery to the Rx queues set in the indirection
> + * table.
> + */
> +int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
> +		   const u32 *qtable, u8 op)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	unsigned int table_len = fp->indir_table_nentries;
> +	unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len;
> +	struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
> +	u32 *indir_tab;
> +	u16 flags;
> +	int rc;
> +	union {
> +		struct {
> +			struct fun_admin_rss_req req;
> +			struct fun_dataop_gl gl;
> +		};
> +		struct fun_admin_generic_create_rsp rsp;
> +	} cmd;
> +
> +	if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID)
> +		return -EINVAL;
> +
> +	flags = op == FUN_ADMIN_SUBOP_CREATE ?
> +			FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0;
> +	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS,
> +						    sizeof(cmd));
> +	cmd.req.u.create =
> +		FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id,
> +					      dev->dev_port, algo,
> +					      FUN_ETH_RSS_MAX_KEY_SIZE,
> +					      table_len, 0,
> +					      FUN_ETH_RSS_MAX_KEY_SIZE);
> +	cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
> +	fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr);
> +
> +	/* write the key and indirection table into the RSS DMA area */
> +	memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE);
> +	indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE;
> +	for (rc = 0; rc < table_len; rc++)
> +		*indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid);
> +
> +	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
> +				       &cmd.rsp, sizeof(cmd.rsp), 0);
> +	if (!rc && op == FUN_ADMIN_SUBOP_CREATE)
> +		fp->rss_hw_id = be32_to_cpu(cmd.rsp.id);
> +	return rc;
> +}
> +
> +/* Destroy the HW RSS conntext associated with the given port. This also stops
> + * all packet delivery to our Rx queues.
> + */
> +static int fun_destroy_rss(struct funeth_priv *fp)
> +{
> +	int rc;
> +
> +	if (fp->rss_hw_id == FUN_HCI_ID_INVALID)
> +		return 0;
> +
> +	rc = fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id);
> +	fp->rss_hw_id = FUN_HCI_ID_INVALID;
> +	return rc;
> +}
> +
> +static void free_txqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct funeth_txq **txqs = fp->txqs;
> +	unsigned int i;
> +
> +	for (i = 0; i < dev->real_num_tx_queues && txqs[i]; i++) {
> +		fp->irqs[txqs[i]->irq_idx].txq = NULL;
> +		funeth_txq_free(txqs[i]);
> +		txqs[i] = NULL;
> +	}
> +}
> +
> +static int alloc_txqs(struct net_device *dev, unsigned int start_irq)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct funeth_txq **txqs = fp->txqs, *q;
> +	unsigned int i;
> +
> +	for (i = 0; i < dev->real_num_tx_queues; i++) {
> +		q = funeth_txq_create(dev, i, fp->sq_depth,
> +				      &fp->irqs[start_irq + i]);
> +		if (IS_ERR(q)) {
> +			free_txqs(dev);
> +			return PTR_ERR(q);
> +		}
> +		txqs[i] = q;
> +	}
> +	return 0;
> +}
> +
> +static void free_rxqs(struct net_device *dev, struct funeth_rxq **rxqs)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	unsigned int i;
> +
> +	for (i = 0; i < dev->real_num_rx_queues && rxqs[i]; i++) {
> +		fp->irqs[rxqs[i]->irq_idx].rxq = NULL;
> +		funeth_rxq_free(rxqs[i]);
> +		rxqs[i] = NULL;
> +	}
> +}
> +
> +static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs,
> +		      unsigned int start_irq)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct funeth_rxq *q;
> +	unsigned int i;
> +
> +	for (i = 0; i < dev->real_num_rx_queues; i++) {
> +		q = funeth_rxq_create(dev, i, fp->cq_depth, fp->rq_depth,
> +				      &fp->irqs[start_irq + i]);
> +		if (IS_ERR(q)) {
> +			free_rxqs(dev, rxqs);
> +			return PTR_ERR(q);
> +		}
> +		rxqs[i] = q;
> +	}
> +	return 0;
> +}
> +
> +static void free_xdpqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct funeth_txq **xdpqs = fp->xdpqs;
> +	unsigned int i;
> +
> +	for (i = 0; i < fp->num_xdpqs && xdpqs[i]; i++) {
> +		funeth_txq_free(xdpqs[i]);
> +		xdpqs[i] = NULL;
> +	}
> +}
> +
> +static int alloc_xdpqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct funeth_txq **xdpqs = fp->xdpqs, *q;
> +	unsigned int i;
> +
> +	for (i = 0; i < fp->num_xdpqs; i++) {
> +		q = funeth_txq_create(dev, i, fp->sq_depth, NULL);
> +		if (IS_ERR(q)) {
> +			free_xdpqs(dev);
> +			return PTR_ERR(q);
> +		}
> +		xdpqs[i] = q;
> +	}
> +	return 0;
> +}
> +
> +void fun_free_rings(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
> +
> +	if (!rxqs)
> +		return;
> +
> +	rcu_assign_pointer(fp->rxqs, NULL);
> +	synchronize_net();
> +
> +	free_rxqs(netdev, rxqs);
> +	free_txqs(netdev);
> +	fp->txqs = NULL;
> +	free_xdpqs(netdev);
> +	fp->xdpqs = NULL;
> +	kfree(rxqs);
> +}
> +
> +int fun_alloc_rings(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	struct funeth_rxq **rxqs;
> +	unsigned int total_qs;
> +	int err;
> +
> +	total_qs = netdev->real_num_tx_queues + netdev->real_num_rx_queues +
> +		   fp->num_xdpqs;
> +
> +	rxqs = kcalloc(total_qs, sizeof(*rxqs), GFP_KERNEL);
> +	if (!rxqs)
> +		return -ENOMEM;
> +
> +	fp->txqs = (struct funeth_txq **)&rxqs[netdev->real_num_rx_queues];
> +	err = alloc_txqs(netdev, 0);
> +	if (err)
> +		goto free_qvec;
> +
> +	if (fp->num_xdpqs) {
> +		fp->xdpqs = (struct funeth_txq **)&rxqs[total_qs - fp->num_xdpqs];
> +		err = alloc_xdpqs(netdev);
> +		if (err)
> +			goto free_txqs;
> +	}
> +
> +	err = alloc_rxqs(netdev, rxqs, netdev->real_num_tx_queues);
> +	if (err)
> +		goto free_xdpqs;
> +
> +	rcu_assign_pointer(fp->rxqs, rxqs);
> +	return 0;
> +
> +free_xdpqs:
> +	free_xdpqs(netdev);
> +free_txqs:
> +	free_txqs(netdev);
> +free_qvec:
> +	fp->txqs = NULL;
> +	fp->xdpqs = NULL;
> +	kfree(rxqs);
> +	return err;
> +}
> +
> +static int fun_port_create(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	union {
> +		struct fun_admin_port_req req;
> +		struct fun_admin_port_rsp rsp;
> +	} cmd;
> +	int rc;
> +
> +	if (fp->lport != INVALID_LPORT)
> +		return 0;
> +
> +	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
> +						    sizeof(cmd.req));
> +	cmd.req.u.create =
> +		FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0,
> +					       netdev->dev_port);
> +
> +	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
> +				       sizeof(cmd.rsp), 0);
> +
> +	if (!rc)
> +		fp->lport = be16_to_cpu(cmd.rsp.u.create.lport);
> +	return rc;
> +}
> +
> +static int fun_port_destroy(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +
> +	if (fp->lport == INVALID_LPORT)
> +		return 0;
> +
> +	fp->lport = INVALID_LPORT;
> +	return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0,
> +			       netdev->dev_port);
> +}
> +
> +static int fun_eth_create(struct funeth_priv *fp, u32 ethid)
> +{
> +	struct fun_admin_eth_req req = {
> +		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH,
> +						     sizeof(req)),
> +		.u.create =
> +			FUN_ADMIN_ETH_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0,
> +						      ethid,
> +						      fp->netdev->dev_port)
> +	};
> +
> +	return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
> +}
> +
> +static int fun_vi_create(struct funeth_priv *fp)
> +{
> +	struct fun_admin_vi_req req = {
> +		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI,
> +						     sizeof(req)),
> +		.u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE,
> +							 0,
> +							 fp->netdev->dev_port,
> +							 fp->netdev->dev_port)
> +	};
> +
> +	return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
> +}
> +
> +/* helper to create an ETH flow and bind an SQ to it */
> +int fun_create_and_bind_tx(struct funeth_priv *fp, u32 ethid, u32 sqid)
> +{
> +	int rc;
> +
> +	netif_info(fp, ifup, fp->netdev,
> +		   "creating ETH flow %u and binding SQ id %u\n", ethid, sqid);
> +	rc = fun_eth_create(fp, ethid);
> +	if (!rc) {
> +		rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid,
> +			      FUN_ADMIN_BIND_TYPE_ETH, ethid);
> +		if (rc)
> +			fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid);
> +	}
> +	return rc;
> +}
> +
> +static void fun_irq_aff_notify(struct irq_affinity_notify *notify,
> +			       const cpumask_t *mask)
> +{
> +	struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify);
> +
> +	cpumask_copy(&p->affinity_mask, mask);
> +}
> +
> +static void fun_irq_aff_release(struct kref __always_unused *ref)
> +{
> +}
> +
> +static void fun_init_irq(struct fun_irq *p, int node, int idx)
> +{
> +	cpumask_set_cpu(cpumask_local_spread(idx, node), &p->affinity_mask);
> +	p->aff_notify.notify = fun_irq_aff_notify;
> +	p->aff_notify.release = fun_irq_aff_release;
> +}
> +
> +static void fun_free_irqs_from(struct funeth_priv *fp, unsigned int start)
> +{
> +	struct fun_irq *p = fp->irqs + start;
> +
> +	for ( ; start < fp->num_irqs; start++, p++) {
> +		netif_napi_del(&p->napi);
> +		fun_release_irqs(fp->fdev, 1, &p->irq_idx);
> +	}
> +}
> +
> +/* Release the IRQ vectors reserved for Tx/Rx queues. */
> +static void fun_free_queue_irqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +
> +	if (fp->num_irqs) {
> +		netif_info(fp, intr, dev, "Releasing %u queue IRQs\n",
> +			   fp->num_irqs);
> +		fun_free_irqs_from(fp, 0);
> +		kfree(fp->irqs);
> +		fp->irqs = NULL;
> +		fp->num_irqs = 0;
> +		fp->num_tx_irqs = 0;
> +	}
> +}
> +
> +/* Reserve IRQ vectors, one per queue. We hold on to allocated vectors until
> + * the total number of queues changes.
> + */
> +int fun_alloc_queue_irqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	unsigned int i, copy, irqs_needed;
> +	struct fun_irq *irqs, *p;
> +	int node, res = -ENOMEM;
> +	u16 *irq_idx;
> +
> +	irqs_needed = dev->real_num_rx_queues + dev->real_num_tx_queues;
> +	if (irqs_needed == fp->num_irqs &&
> +	    fp->num_tx_irqs == dev->real_num_tx_queues)
> +		return 0;
> +
> +	/* IRQ needs have changed, reallocate. */
> +	irqs = kcalloc(irqs_needed, sizeof(*irqs), GFP_KERNEL);
> +	if (!irqs)
> +		return -ENOMEM;
> +
> +	irq_idx = kcalloc(irqs_needed, sizeof(u16), GFP_KERNEL);
> +	if (!irq_idx)
> +		goto free;
> +
> +	/* keep as many existing IRQs as possible */
> +	copy = min(irqs_needed, fp->num_irqs);
> +	for (i = 0; i < copy; i++)
> +		irq_idx[i] = fp->irqs[i].irq_idx;
> +
> +	/* get additional IRQs */
> +	if (irqs_needed > fp->num_irqs) {
> +		unsigned int addl_irqs = irqs_needed - fp->num_irqs;
> +
> +		res = fun_reserve_irqs(fp->fdev, addl_irqs, irq_idx + copy);
> +		if (res != addl_irqs)
> +			goto free;
> +	}
> +
> +	/* release excess IRQs */
> +	fun_free_irqs_from(fp, copy);
> +
> +	for (i = 0; i < copy; i++)
> +		netif_napi_del(&fp->irqs[i].napi);
> +
> +	/* new Tx IRQs */
> +	copy = min(dev->real_num_tx_queues, fp->num_tx_irqs);
> +	memcpy(irqs, fp->irqs, copy * sizeof(*p));
> +
> +	node = dev_to_node(&fp->pdev->dev);
> +	for (p = irqs + copy, i = copy; i < dev->real_num_tx_queues; i++, p++)
> +		fun_init_irq(p, node, i);
> +
> +	/* new Rx IRQs */
> +	copy = min(dev->real_num_rx_queues, fp->num_irqs - fp->num_tx_irqs);
> +	memcpy(p, fp->irqs + fp->num_tx_irqs, copy * sizeof(*p));
> +	p += copy;
> +
> +	for (i = copy; i < dev->real_num_rx_queues; i++, p++)
> +		fun_init_irq(p, node, i);
> +
> +	/* assign IRQ vectors and register NAPI */
> +	for (i = 0; i < irqs_needed; i++) {
> +		irqs[i].irq_idx = irq_idx[i];
> +		irqs[i].irq = pci_irq_vector(fp->pdev, irq_idx[i]);
> +	}
> +
> +	for (p = irqs, i = 0; i < dev->real_num_tx_queues; i++, p++)
> +		netif_tx_napi_add(dev, &p->napi, fun_txq_napi_poll,
> +				  NAPI_POLL_WEIGHT);
> +
> +	for (i = 0; i < dev->real_num_rx_queues; i++, p++)
> +		netif_napi_add(dev, &p->napi, fun_rxq_napi_poll,
> +			       NAPI_POLL_WEIGHT);
> +
> +	kfree(irq_idx);
> +	kfree(fp->irqs);
> +
> +	fp->irqs = irqs;
> +	fp->num_irqs = irqs_needed;
> +	fp->num_tx_irqs = dev->real_num_tx_queues;
> +	netif_info(fp, intr, dev, "Reserved %u IRQs for Tx/Rx queues\n",
> +		   irqs_needed);
> +	return 0;
> +
> +free:
> +	kfree(irq_idx);
> +	kfree(irqs);
> +	return res;
> +}
> +
> +static irqreturn_t fun_queue_irq_handler(int irq, void *data)
> +{
> +	struct fun_irq *p = data;
> +
> +	if (p->rxq) {
> +		prefetch(p->rxq->next_cqe_info);
> +		p->rxq->irq_cnt++;
> +	}
> +	napi_schedule_irqoff(&p->napi);
> +	return IRQ_HANDLED;
> +}
> +
> +static int fun_enable_irqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	unsigned int i, qidx;
> +	struct fun_irq *p;
> +	const char *qtype;
> +	int err;
> +
> +	for (p = fp->irqs, i = 0; i < fp->num_irqs; i++, p++) {
> +		if (p->txq) {
> +			qtype = "tx";
> +			qidx = p->txq->qidx;
> +		} else if (p->rxq) {
> +			qtype = "rx";
> +			qidx = p->rxq->qidx;
> +		} else {
> +			continue;
> +		}
> +
> +		snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name,
> +			 qtype, qidx);
> +		err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p);
> +		if (err) {
> +			netdev_err(dev, "Failed to allocate IRQ %u, err %d\n",
> +				   p->irq, err);
> +			goto unroll;
> +		}
> +	}
> +
> +	for (p = fp->irqs, i = 0; i < fp->num_irqs; i++, p++) {
> +		if (!p->txq && !p->rxq)
> +			continue;
> +		irq_set_affinity_notifier(p->irq, &p->aff_notify);
> +		irq_set_affinity_hint(p->irq, &p->affinity_mask);
> +		napi_enable(&p->napi);
> +	}
> +
> +	return 0;
> +
> +unroll:
> +	while (i--) {
> +		p--;
> +		free_irq(p->irq, p);
> +	}
> +	return err;
> +}
> +
> +static void fun_disable_irqs(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct fun_irq *p;
> +	unsigned int i;
> +
> +	for (p = fp->irqs, i = 0; i < fp->num_irqs; i++, p++) {
> +		if (!p->txq && !p->rxq)
> +			continue;
> +
> +		napi_disable(&p->napi);
> +		irq_set_affinity_notifier(p->irq, NULL);
> +		irq_set_affinity_hint(p->irq, NULL);
> +		free_irq(p->irq, p);
> +	}
> +}
> +
> +static int funeth_open(struct net_device *netdev)
> +{
> +	static const int port_keys[] = {
> +		FUN_ADMIN_PORT_KEY_STATS_DMA_LOW,
> +		FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH,
> +		FUN_ADMIN_PORT_KEY_ENABLE
> +	};
> +
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	u64 vals[] = {
> +		lower_32_bits(fp->stats_dma_addr),
> +		upper_32_bits(fp->stats_dma_addr),
> +		FUN_PORT_FLAG_ENABLE_NOTIFY
> +	};
> +	int rc;
> +
> +	rc = fun_alloc_queue_irqs(netdev);
> +	if (rc)
> +		return rc;
> +
> +	rc = fun_alloc_rings(netdev);
> +	if (rc)
> +		return rc;
> +
> +	rc = fun_vi_create(fp);
> +	if (rc)
> +		goto free_queues;
> +
> +	rc = fun_enable_irqs(netdev);
> +	if (rc)
> +		goto destroy_vi;
> +
> +	if (fp->rss_cfg) {
> +		rc = fun_config_rss(netdev, fp->hash_algo, fp->rss_key,
> +				    fp->indir_table, FUN_ADMIN_SUBOP_CREATE);
> +	} else {
> +		struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
> +
> +		/* The non-RSS case has only 1 queue. */
> +		rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI,
> +			      netdev->dev_port, FUN_ADMIN_BIND_TYPE_EPCQ,
> +			      rxqs[0]->hw_cqid);
> +	}
> +	if (rc)
> +		goto disable_irqs;
> +
> +	rc = fun_port_write_cmds(fp, 3, port_keys, vals);
> +	if (rc)
> +		goto free_rss;
> +
> +	netif_tx_start_all_queues(netdev);
> +	return 0;
> +
> +free_rss:
> +	fun_destroy_rss(fp);
> +disable_irqs:
> +	fun_disable_irqs(netdev);
> +destroy_vi:
> +	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, netdev->dev_port);
> +free_queues:
> +	fun_free_rings(netdev);
> +	return rc;
> +}
> +
> +static int funeth_close(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +
> +	/* HW admin disable port */
> +	fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0);
> +
> +	netif_carrier_off(netdev);
> +	netif_tx_disable(netdev);
> +
> +	fun_destroy_rss(fp);
> +	if (fp->txqs)
> +		fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, netdev->dev_port);
> +	fun_disable_irqs(netdev);
> +	fun_free_rings(netdev);
> +	return 0;
> +}
> +
> +static void fun_get_stats64(struct net_device *netdev,
> +			    struct rtnl_link_stats64 *stats)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	struct funeth_rxq **rxqs;
> +	unsigned int i, start;
> +
> +	stats->tx_packets = fp->tx_packets;
> +	stats->tx_bytes   = fp->tx_bytes;
> +	stats->tx_dropped = fp->tx_dropped;
> +
> +	stats->rx_packets = fp->rx_packets;
> +	stats->rx_bytes   = fp->rx_bytes;
> +	stats->rx_dropped = fp->rx_dropped;
> +
> +	rcu_read_lock();
> +	rxqs = rcu_dereference(fp->rxqs);
> +	if (!rxqs)
> +		goto unlock;
> +
> +	for (i = 0; i < netdev->real_num_tx_queues; i++) {
> +		struct funeth_txq_stats txs;
> +
> +		FUN_QSTAT_READ(fp->txqs[i], start, txs);
> +		stats->tx_packets += txs.tx_pkts;
> +		stats->tx_bytes   += txs.tx_bytes;
> +		stats->tx_dropped += txs.tx_map_err + txs.tx_len_err;
> +	}
> +
> +	for (i = 0; i < fp->num_xdpqs; i++) {
> +		struct funeth_txq_stats txs;
> +
> +		FUN_QSTAT_READ(fp->xdpqs[i], start, txs);
> +		stats->tx_packets += txs.tx_pkts;
> +		stats->tx_bytes   += txs.tx_bytes;
> +	}
> +
> +	for (i = 0; i < netdev->real_num_rx_queues; i++) {
> +		struct funeth_rxq_stats rxs;
> +
> +		FUN_QSTAT_READ(rxqs[i], start, rxs);
> +		stats->rx_packets += rxs.rx_pkts;
> +		stats->rx_bytes   += rxs.rx_bytes;
> +		stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops;
> +	}
> +unlock:
> +	rcu_read_unlock();
> +}
> +
> +static int fun_change_mtu(struct net_device *netdev, int new_mtu)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	int rc;
> +
> +	rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu);
> +	if (!rc)
> +		netdev->mtu = new_mtu;
> +	return rc;
> +}
> +
> +static int fun_set_macaddr(struct net_device *netdev, void *addr)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	struct sockaddr *saddr = addr;
> +	int rc;
> +
> +	if (!is_valid_ether_addr(saddr->sa_data))
> +		return -EADDRNOTAVAIL;
> +
> +	if (ether_addr_equal(netdev->dev_addr, saddr->sa_data))
> +		return 0;
> +
> +	rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
> +				ether_addr_to_u64(saddr->sa_data));
> +	if (!rc)
> +		eth_hw_addr_set(netdev, saddr->sa_data);
> +	return rc;
> +}
> +
> +static int fun_get_port_attributes(struct net_device *netdev)
> +{
> +	static const int keys[] = {
> +		FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES,
> +		FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU
> +	};
> +	static const int phys_keys[] = {
> +		FUN_ADMIN_PORT_KEY_LANE_ATTRS,
> +	};
> +
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	u64 data[ARRAY_SIZE(keys)];
> +	u8 mac[ETH_ALEN];
> +	int i, rc;
> +
> +	rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data);
> +	if (rc)
> +		return rc;
> +
> +	for (i = 0; i < ARRAY_SIZE(keys); i++) {
> +		switch (keys[i]) {
> +		case FUN_ADMIN_PORT_KEY_MACADDR:
> +			u64_to_ether_addr(data[i], mac);
> +			if (is_zero_ether_addr(mac)) {
> +				eth_hw_addr_random(netdev);
> +			} else if (is_valid_ether_addr(mac)) {
> +				eth_hw_addr_set(netdev, mac);
> +			} else {
> +				netdev_err(netdev,
> +					   "device provided a bad MAC address %pM\n",
> +					   mac);
> +				return -EINVAL;
> +			}
> +			break;
> +
> +		case FUN_ADMIN_PORT_KEY_CAPABILITIES:
> +			fp->port_caps = data[i];
> +			break;
> +
> +		case FUN_ADMIN_PORT_KEY_ADVERT:
> +			fp->advertising = data[i];
> +			break;
> +
> +		case FUN_ADMIN_PORT_KEY_MTU:
> +			netdev->mtu = data[i];
> +			break;
> +		}
> +	}
> +
> +	if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) {
> +		rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys,
> +					data);
> +		if (rc)
> +			return rc;
> +
> +		fp->lane_attrs = data[0];
> +	}
> +
> +	if (netdev->addr_assign_type == NET_ADDR_RANDOM)
> +		return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
> +					  ether_addr_to_u64(netdev->dev_addr));
> +	return 0;
> +}
> +
> +static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
> +{
> +	const struct funeth_priv *fp = netdev_priv(dev);
> +
> +	return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg,
> +			    sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0;
> +}
> +
> +static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct hwtstamp_config cfg;
> +
> +	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
> +		return -EFAULT;
> +
> +	if (cfg.flags)           /* flags is reserved, must be 0 */
> +		return -EINVAL;
> +
> +	/* no TX HW timestamps */
> +	cfg.tx_type = HWTSTAMP_TX_OFF;
> +
> +	switch (cfg.rx_filter) {
> +	case HWTSTAMP_FILTER_NONE:
> +		break;
> +	case HWTSTAMP_FILTER_ALL:
> +	case HWTSTAMP_FILTER_SOME:
> +	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
> +	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
> +	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
> +	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
> +	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
> +	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
> +	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
> +	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
> +	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
> +	case HWTSTAMP_FILTER_PTP_V2_EVENT:
> +	case HWTSTAMP_FILTER_PTP_V2_SYNC:
> +	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
> +	case HWTSTAMP_FILTER_NTP_ALL:
> +		cfg.rx_filter = HWTSTAMP_FILTER_ALL;
> +		break;
> +	default:
> +		return -ERANGE;
> +	}
> +
> +	fp->hwtstamp_cfg = cfg;
> +	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
> +}
> +
> +static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
> +{
> +	switch (cmd) {
> +	case SIOCSHWTSTAMP:
> +		return fun_hwtstamp_set(dev, ifr);
> +	case SIOCGHWTSTAMP:
> +		return fun_hwtstamp_get(dev, ifr);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
> +#define XDP_MAX_MTU \
> +	(PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM)
> +
> +static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp)
> +{
> +	struct bpf_prog *old_prog, *prog = xdp->prog;
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	bool reconfig;
> +	int rc, i;
> +
> +	/* XDP uses at most one buffer */
> +	if (prog && dev->mtu > XDP_MAX_MTU) {
> +		netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu);
> +		NL_SET_ERR_MSG_MOD(xdp->extack,
> +				   "Device MTU too large for XDP");
> +		return -EINVAL;
> +	}
> +
> +	reconfig = netif_running(dev) && (!!fp->xdp_prog ^ !!prog);
> +	if (reconfig) {
> +		rc = funeth_close(dev);
> +		if (rc) {
> +			NL_SET_ERR_MSG_MOD(xdp->extack,
> +					   "Failed to reconfigure Rx queues.");
> +			return rc;
> +		}
> +	}
> +
> +	dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU;
> +	fp->num_xdpqs = prog ? num_online_cpus() : 0;
> +	old_prog = xchg(&fp->xdp_prog, prog);
> +
> +	if (reconfig) {
> +		rc = funeth_open(dev);
> +		if (rc) {
> +			NL_SET_ERR_MSG_MOD(xdp->extack,
> +					   "Failed to reconfigure Rx queues.");
> +			dev->max_mtu = old_prog ? XDP_MAX_MTU : FUN_MAX_MTU;
> +			fp->num_xdpqs = old_prog ? num_online_cpus() : 0;
> +			xchg(&fp->xdp_prog, old_prog);
> +			return rc;
> +		}
> +	} else if (netif_running(dev)) {
> +		struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
> +
> +		for (i = 0; i < dev->real_num_rx_queues; i++)
> +			WRITE_ONCE(rxqs[i]->xdp_prog, prog);
> +	}
> +
> +	if (old_prog)
> +		bpf_prog_put(old_prog);
> +	return 0;
> +}
> +
> +static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> +{
> +	switch (xdp->command) {
> +	case XDP_SETUP_PROG:
> +		return fun_xdp_setup(dev, xdp);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
> +struct devlink_port *fun_get_devlink_port(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +
> +	return &fp->dl_port;
> +}
> +
> +static int fun_init_vports(struct fun_ethdev *ed, unsigned int n)
> +{
> +	if (ed->num_vports)
> +		return -EINVAL;
> +
> +	ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL);
> +	if (!ed->vport_info)
> +		return -ENOMEM;
> +	ed->num_vports = n;
> +	return 0;
> +}
> +
> +static void fun_free_vports(struct fun_ethdev *ed)
> +{
> +	kvfree(ed->vport_info);
> +	ed->vport_info = NULL;
> +	ed->num_vports = 0;
> +}
> +
> +static struct fun_vport_info *fun_get_vport(struct fun_dev *fdev,
> +					    unsigned int vport)
> +{
> +	struct fun_ethdev *ed = to_fun_ethdev(fdev);
> +
> +	if (!ed->vport_info || vport >= ed->num_vports)
> +		return NULL;
> +
> +	return ed->vport_info + vport;
> +}
> +
> +int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct fun_dev *fdev = fp->fdev;
> +	struct fun_vport_info *vi = fun_get_vport(fdev, vf);
> +	struct fun_adi_param mac_param = {};
> +	int rc;
> +
> +	if (!vi)
> +		return -EINVAL;
> +	if (is_multicast_ether_addr(mac))
> +		return -EINVAL;
> +
> +	mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac));
> +	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1,
> +			   &mac_param);
> +	if (!rc)
> +		ether_addr_copy(vi->mac, mac);
> +	return rc;
> +}
> +
> +static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
> +			   __be16 vlan_proto)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct fun_dev *fdev = fp->fdev;
> +	struct fun_vport_info *vi = fun_get_vport(fdev, vf);
> +	struct fun_adi_param vlan_param = {};
> +	int rc;
> +
> +	if (!vi)
> +		return -EINVAL;
> +	if (vlan > 4095 || qos > 7)
> +		return -EINVAL;
> +	if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) &&
> +	    vlan_proto != htons(ETH_P_8021AD))
> +		return -EINVAL;
> +
> +	vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto),
> +					      ((u16)qos << VLAN_PRIO_SHIFT) | vlan);
> +	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param);
> +	if (rc)
> +		return rc;
> +
> +	vi->vlan = vlan;
> +	vi->qos = qos;
> +	vi->vlan_proto = vlan_proto;
> +	return 0;
> +}
> +
> +static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
> +			   int max_tx_rate)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct fun_dev *fdev = fp->fdev;
> +	struct fun_vport_info *vi = fun_get_vport(fdev, vf);
> +	struct fun_adi_param rate_param = {};
> +	int rc;
> +
> +	if (!vi || min_tx_rate)
> +		return -EINVAL;
> +
> +	rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate);
> +	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param);
> +	if (rc)
> +		return rc;
> +
> +	vi->max_rate = max_tx_rate;
> +	return 0;
> +}
> +
> +int fun_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	struct fun_dev *fdev = fp->fdev;
> +	const struct fun_vport_info *vi = fun_get_vport(fdev, vf);
> +
> +	if (!vi)
> +		return -EINVAL;
> +
> +	memset(ivi, 0, sizeof(*ivi));
> +	ivi->vf = vf;
> +	ether_addr_copy(ivi->mac, vi->mac);
> +	ivi->vlan = vi->vlan;
> +	ivi->qos = vi->qos;
> +	ivi->vlan_proto = vi->vlan_proto;
> +	ivi->max_tx_rate = vi->max_rate;
> +	ivi->spoofchk = vi->spoofchk;
> +	return 0;
> +}
> +
> +static const struct net_device_ops fun_netdev_ops = {
> +	.ndo_open		= funeth_open,
> +	.ndo_stop		= funeth_close,
> +	.ndo_start_xmit		= fun_start_xmit,
> +	.ndo_get_stats64	= fun_get_stats64,
> +	.ndo_change_mtu		= fun_change_mtu,
> +	.ndo_set_mac_address	= fun_set_macaddr,
> +	.ndo_validate_addr	= eth_validate_addr,
> +	.ndo_do_ioctl		= fun_ioctl,
> +	.ndo_uninit		= fun_free_queue_irqs,
> +	.ndo_bpf		= fun_xdp,
> +	.ndo_xdp_xmit		= fun_xdp_xmit_frames,
> +	.ndo_set_vf_mac		= fun_set_vf_mac,
> +	.ndo_set_vf_vlan	= fun_set_vf_vlan,
> +	.ndo_set_vf_rate	= fun_set_vf_rate,
> +	.ndo_get_vf_config	= fun_get_vf_config,
> +	.ndo_get_devlink_port	= fun_get_devlink_port,
> +};
> +
> +#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \
> +			 NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \
> +			 NETIF_F_GSO_UDP_TUNNEL_CSUM)
> +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
> +#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \
> +		   GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA)
> +
> +static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < fp->indir_table_nentries; i++)
> +		fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx);
> +}
> +
> +/* Reset the RSS indirection table to equal distribution across the current
> + * number of Rx queues. Called at init time and whenever the number of Rx
> + * queues changes subsequently. Note that this may also resize the indirection
> + * table.
> + */
> +void fun_reset_rss_indir(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +
> +	if (!fp->rss_cfg)
> +		return;
> +
> +	/* Set the table size to the max possible that allows an equal number
> +	 * of occurrences of each CQ.
> +	 */
> +	fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT,
> +					     dev->real_num_rx_queues);
> +	fun_dflt_rss_indir(fp, dev->real_num_rx_queues);
> +}
> +
> +/* Allocate the DMA area for the RSS configuration commands to the device, and
> + * initialize the hash, hash key, indirection table size and its entries to
> + * their defaults. The indirection table defaults to equal distribution across
> + * the Rx queues.
> + */
> +static int fun_init_rss(struct net_device *dev)
> +{
> +	struct funeth_priv *fp = netdev_priv(dev);
> +	size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table);
> +
> +	fp->rss_hw_id = FUN_HCI_ID_INVALID;
> +	if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS))
> +		return 0;
> +
> +	fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size,
> +					 &fp->rss_dma_addr, GFP_KERNEL);
> +	if (!fp->rss_cfg)
> +		return -ENOMEM;
> +
> +	fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ;
> +	netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key));
> +	fun_reset_rss_indir(dev);
> +	return 0;
> +}
> +
> +static void fun_free_rss(struct funeth_priv *fp)
> +{
> +	if (fp->rss_cfg) {
> +		dma_free_coherent(&fp->pdev->dev,
> +				  sizeof(fp->rss_key) + sizeof(fp->indir_table),
> +				  fp->rss_cfg, fp->rss_dma_addr);
> +		fp->rss_cfg = NULL;
> +	}
> +}
> +
> +static int fun_init_stats_area(struct funeth_priv *fp)
> +{
> +	unsigned int nstats;
> +
> +	if (!(fp->port_caps & FUN_PORT_CAP_STATS))
> +		return 0;
> +
> +	nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX +
> +		 PORT_MAC_FEC_STATS_MAX;
> +
> +	fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64),
> +				       &fp->stats_dma_addr, GFP_KERNEL);
> +	if (!fp->stats)
> +		return -ENOMEM;
> +	return 0;
> +}
> +
> +static void fun_free_stats_area(struct funeth_priv *fp)
> +{
> +	unsigned int nstats;
> +
> +	if (fp->stats) {
> +		nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX;
> +		dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64),
> +				  fp->stats, fp->stats_dma_addr);
> +		fp->stats = NULL;
> +	}
> +}
> +
> +static int fun_dl_port_register(struct net_device *netdev)
> +{
> +	struct funeth_priv *fp = netdev_priv(netdev);
> +	struct devlink *dl = priv_to_devlink(fp->fdev);
> +	struct devlink_port_attrs attrs = {};
> +
> +	if (fp->port_caps & FUN_PORT_CAP_VPORT) {
> +		attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
> +	} else {
> +		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
> +		attrs.lanes = fp->lane_attrs & 7;
> +		if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) {
> +			attrs.split = 1;
> +			attrs.phys.split_subport_number = fp->lport & 3;
> +		}
> +	}
> +	attrs.phys.port_number = fp->lport;
> +
> +	devlink_port_attrs_set(&fp->dl_port, &attrs);
> +
> +	return devlink_port_register(dl, &fp->dl_port, netdev->dev_port);
> +}
> +
> +/* Determine the max Tx/Rx queues for a port. */
> +static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx,
> +		      unsigned int *nrx)
> +{
> +	int neth;
> +
> +	if (ed->num_ports > 1 || is_kdump_kernel()) {
> +		*ntx = 1;
> +		*nrx = 1;
> +		return 0;
> +	}
> +
> +	neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH);
> +	if (neth < 0)
> +		return neth;
> +
> +	/* We determine the max number of queues based on the CPU
> +	 * cores, device interrupts and queues, RSS size, and device Tx flows.
> +	 *
> +	 * - At least 1 Rx and 1 Tx queues.
> +	 * - At most 1 Rx/Tx queue per core.
> +	 * - Each Rx/Tx queue needs 1 SQ.
> +	 */
> +	*ntx = min(ed->nsqs_per_port - 1, num_online_cpus());
> +	*nrx = *ntx;
> +	if (*ntx > neth)
> +		*ntx = neth;
> +	if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT)
> +		*nrx = FUN_ETH_RSS_MAX_INDIR_ENT;
> +	return 0;
> +}
> +
> +static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs)
> +{
> +	unsigned int ntx, nrx;
> +
> +	ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES);
> +	nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES);
> +	if (ntx <= nrx) {
> +		ntx = min(ntx, nsqs / 2);
> +		nrx = min(nrx, nsqs - ntx);
> +	} else {
> +		nrx = min(nrx, nsqs / 2);
> +		ntx = min(ntx, nsqs - nrx);
> +	}
> +
> +	netif_set_real_num_tx_queues(dev, ntx);
> +	netif_set_real_num_rx_queues(dev, nrx);
> +}
> +
> +static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid)
> +{
> +	struct fun_dev *fdev = &ed->fdev;
> +	struct net_device *netdev;
> +	unsigned int ntx, nrx;
> +	struct funeth_priv *fp;
> +	int rc;
> +
> +	rc = fun_max_qs(ed, &ntx, &nrx);
> +	if (rc)
> +		return rc;
> +
> +	netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx);
> +	if (!netdev) {
> +		rc = -ENOMEM;
> +		goto done;
> +	}
> +
> +	netdev->dev_port = portid;
> +	fun_queue_defaults(netdev, ed->nsqs_per_port);
> +
> +	fp = netdev_priv(netdev);
> +	fp->fdev = fdev;
> +	fp->pdev = to_pci_dev(fdev->dev);
> +	fp->netdev = netdev;
> +	fp->msg_enable = msg_enable;
> +	fp->ethid_start = portid;
> +	seqcount_init(&fp->link_seq);
> +
> +	fp->lport = INVALID_LPORT;
> +	rc = fun_port_create(netdev);
> +	if (rc)
> +		goto free_netdev;
> +
> +	/* bind port to admin CQ for async events */
> +	rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid,
> +		      FUN_ADMIN_BIND_TYPE_EPCQ, 0);
> +	if (rc)
> +		goto destroy_port;
> +
> +	rc = fun_get_port_attributes(netdev);
> +	if (rc)
> +		goto destroy_port;
> +
> +	rc = fun_init_rss(netdev);
> +	if (rc)
> +		goto destroy_port;
> +
> +	rc = fun_init_stats_area(fp);
> +	if (rc)
> +		goto free_rss;
> +
> +	SET_NETDEV_DEV(netdev, fdev->dev);
> +	netdev->netdev_ops = &fun_netdev_ops;
> +
> +	netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM;
> +	if (fp->port_caps & FUN_PORT_CAP_OFFLOADS)
> +		netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS;
> +	if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS)
> +		netdev->hw_features |= GSO_ENCAP_FLAGS;
> +
> +	netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA;
> +	netdev->vlan_features = netdev->features & VLAN_FEAT;
> +	netdev->mpls_features = netdev->vlan_features;
> +	netdev->hw_enc_features = netdev->hw_features;
> +
> +	netdev->min_mtu = ETH_MIN_MTU;
> +	netdev->max_mtu = FUN_MAX_MTU;
> +	netdev->watchdog_timeo = 15 * HZ;
> +
> +	fun_set_ethtool_ops(netdev);
> +
> +	/* configurable parameters */
> +	fp->sq_depth = min(SQ_DEPTH, fdev->q_depth);
> +	fp->cq_depth = min(CQ_DEPTH, fdev->q_depth);
> +	fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth);
> +	fp->rx_coal_usec  = CQ_INTCOAL_USEC;
> +	fp->rx_coal_count = CQ_INTCOAL_NPKT;
> +	fp->tx_coal_usec  = SQ_INTCOAL_USEC;
> +	fp->tx_coal_count = SQ_INTCOAL_NPKT;
> +	fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count);
> +
> +	rc = fun_dl_port_register(netdev);
> +	if (rc)
> +		goto free_stats;
> +
> +	fp->ktls_id = FUN_HCI_ID_INVALID;
> +	fun_ktls_init(netdev);            /* optional, failure OK */
> +
> +	netif_carrier_off(netdev);
> +	ed->netdevs[portid] = netdev;
> +	rc = register_netdev(netdev);
> +	if (rc)
> +		goto unreg_devlink;
> +
> +	if (fp->dl_port.devlink)
> +		devlink_port_type_eth_set(&fp->dl_port, netdev);
> +
> +	return 0;
> +
> +unreg_devlink:
> +	ed->netdevs[portid] = NULL;
> +	fun_ktls_cleanup(fp);
> +	if (fp->dl_port.devlink)
> +		devlink_port_unregister(&fp->dl_port);
> +free_stats:
> +	fun_free_stats_area(fp);
> +free_rss:
> +	fun_free_rss(fp);
> +destroy_port:
> +	fun_port_destroy(netdev);
> +free_netdev:
> +	free_netdev(netdev);
> +done:
> +	dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc);
> +	return rc;
> +}
> +
> +static void fun_destroy_netdev(struct net_device *netdev)
> +{
> +	if (likely(netdev)) {
> +		struct funeth_priv *fp = netdev_priv(netdev);
> +
> +		if (fp->dl_port.devlink) {
> +			devlink_port_type_clear(&fp->dl_port);
> +			devlink_port_unregister(&fp->dl_port);
> +		}
> +		unregister_netdev(netdev);
> +		fun_ktls_cleanup(fp);
> +		fun_free_stats_area(fp);
> +		fun_free_rss(fp);
> +		fun_port_destroy(netdev);
> +		free_netdev(netdev);
> +	}
> +}
> +
> +static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports)
> +{
> +	struct fun_dev *fd = &ed->fdev;
> +	int i, rc;
> +
> +	/* The admin queue takes 1 IRQ and 2 SQs. */
> +	ed->nsqs_per_port = min(fd->num_irqs - 1,
> +				fd->kern_end_qid - 2) / nports;
> +	if (ed->nsqs_per_port < 2) {
> +		dev_err(fd->dev, "Too few SQs for %u ports", nports);
> +		return -EINVAL;
> +	}
> +
> +	ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL);
> +	if (!ed->netdevs)
> +		return -ENOMEM;
> +
> +	ed->num_ports = nports;
> +	for (i = 0; i < nports; i++) {
> +		rc = fun_create_netdev(ed, i);
> +		if (rc)
> +			goto free_netdevs;
> +	}
> +
> +	return 0;
> +
> +free_netdevs:
> +	while (i)
> +		fun_destroy_netdev(ed->netdevs[--i]);
> +	kfree(ed->netdevs);
> +	ed->netdevs = NULL;
> +	ed->num_ports = 0;
> +	return rc;
> +}
> +
> +static void fun_destroy_ports(struct fun_ethdev *ed)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < ed->num_ports; i++)
> +		fun_destroy_netdev(ed->netdevs[i]);
> +
> +	kfree(ed->netdevs);
> +	ed->netdevs = NULL;
> +	ed->num_ports = 0;
> +}
> +
> +static void fun_update_link_state(const struct fun_ethdev *ed,
> +				  const struct fun_admin_port_notif *notif)
> +{
> +	unsigned int port_idx = be16_to_cpu(notif->id);
> +	struct net_device *netdev;
> +	struct funeth_priv *fp;
> +
> +	if (port_idx >= ed->num_ports)
> +		return;
> +
> +	netdev = ed->netdevs[port_idx];
> +	fp = netdev_priv(netdev);
> +
> +	write_seqcount_begin(&fp->link_seq);
> +	fp->link_speed = be32_to_cpu(notif->speed) * 10;  /* 10 Mbps->Mbps */
> +	fp->active_fc = notif->flow_ctrl;
> +	fp->active_fec = notif->fec;
> +	fp->xcvr_type = notif->xcvr_type;
> +	fp->link_down_reason = notif->link_down_reason;
> +	fp->lp_advertising = be64_to_cpu(notif->lp_advertising);
> +
> +	if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN)
> +		netif_carrier_off(netdev);
> +	if (notif->link_state & FUN_PORT_FLAG_NH_DOWN)
> +		netif_dormant_on(netdev);
> +	if (notif->link_state & FUN_PORT_FLAG_NH_UP)
> +		netif_dormant_off(netdev);
> +	if (notif->link_state & FUN_PORT_FLAG_MAC_UP)
> +		netif_carrier_on(netdev);
> +
> +	write_seqcount_end(&fp->link_seq);
> +	fun_report_link(netdev);
> +}
> +
> +/* handler for async events delivered through the admin CQ */
> +static void fun_event_cb(struct fun_dev *fdev, void *entry)
> +{
> +	u8 op = ((struct fun_admin_rsp_common *)entry)->op;
> +
> +	if (op == FUN_ADMIN_OP_PORT) {
> +		const struct fun_admin_port_notif *rsp = entry;
> +
> +		if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) {
> +			fun_update_link_state(to_fun_ethdev(fdev), rsp);
> +		} else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) {
> +			const struct fun_admin_res_count_rsp *r = entry;
> +
> +			if (r->count.data)
> +				set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags);
> +			else
> +				set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags);
> +			fun_serv_sched(fdev);
> +		} else {
> +			dev_info(fdev->dev, "adminq event unexpected op %u subop %u",
> +				 op, rsp->subop);
> +		}
> +	} else {
> +		dev_info(fdev->dev, "adminq event unexpected op %u", op);
> +	}
> +}
> +
> +/* handler for pending work managed by the service task */
> +static void fun_service_cb(struct fun_dev *fdev)
> +{
> +	struct fun_ethdev *ed = to_fun_ethdev(fdev);
> +	int rc;
> +
> +	if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags))
> +		fun_destroy_ports(ed);
> +
> +	if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags))
> +		return;
> +
> +	rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
> +	if (rc < 0 || rc == ed->num_ports)
> +		return;
> +
> +	if (ed->num_ports)
> +		fun_destroy_ports(ed);
> +	if (rc)
> +		fun_create_ports(ed, rc);
> +}
> +
> +static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs)
> +{
> +	struct fun_dev *fdev = pci_get_drvdata(pdev);
> +	struct fun_ethdev *ed = to_fun_ethdev(fdev);
> +	int rc;
> +
> +	if (nvfs == 0) {
> +		if (pci_vfs_assigned(pdev)) {
> +			dev_warn(&pdev->dev,
> +				 "Cannot disable SR-IOV while VFs are assigned\n");
> +			return -EPERM;
> +		}
> +
> +		pci_disable_sriov(pdev);
> +		fun_free_vports(ed);
> +		return 0;
> +	}
> +
> +	rc = fun_init_vports(ed, nvfs);
> +	if (rc)
> +		return rc;
> +
> +	rc = pci_enable_sriov(pdev, nvfs);
> +	if (rc) {
> +		fun_free_vports(ed);
> +		return rc;
> +	}
> +
> +	return nvfs;
> +}
> +
> +static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> +{
> +	struct devlink *devlink;
> +	struct fun_ethdev *ed;
> +	struct fun_dev *fdev;
> +	int rc;
> +
> +	struct fun_dev_params aqreq = {
> +		.cqe_size_log2 = ilog2(ADMIN_CQE_SIZE),
> +		.sqe_size_log2 = ilog2(ADMIN_SQE_SIZE),
> +		.cq_depth      = ADMIN_CQ_DEPTH,
> +		.sq_depth      = ADMIN_SQ_DEPTH,
> +		.rq_depth      = ADMIN_RQ_DEPTH,
> +		.min_msix      = 2,              /* 1 Rx + 1 Tx */
> +		.event_cb      = fun_event_cb,
> +		.serv_cb       = fun_service_cb,
> +	};
> +
> +	devlink = fun_devlink_alloc(&pdev->dev);
> +	if (!devlink) {
> +		dev_err(&pdev->dev, "devlink alloc failed\n");
> +		return -ENOMEM;
> +	}
> +
> +	ed = devlink_priv(devlink);
> +
> +	fdev = &ed->fdev;
> +	rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME);
> +	if (rc)
> +		goto free_devlink;
> +
> +	rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
> +	if (rc > 0)
> +		rc = fun_create_ports(ed, rc);
> +	if (rc < 0)
> +		goto disable_dev;
> +
> +	fun_serv_restart(fdev);
> +	fun_devlink_register(devlink);
> +	return 0;
> +
> +disable_dev:
> +	fun_dev_disable(fdev);
> +free_devlink:
> +	fun_devlink_free(devlink);
> +	return rc;
> +}
> +
> +static void __funeth_remove(struct pci_dev *pdev)
> +{
> +	struct fun_dev *fdev = pci_get_drvdata(pdev);
> +	struct devlink *devlink;
> +	struct fun_ethdev *ed;
> +
> +	if (!fdev)
> +		return;
> +
> +	ed = to_fun_ethdev(fdev);
> +	devlink = priv_to_devlink(ed);
> +	fun_devlink_unregister(devlink);
> +
> +#ifdef CONFIG_PCI_IOV
> +	funeth_sriov_configure(pdev, 0);
> +#endif
> +
> +	fun_serv_stop(fdev);
> +	fun_destroy_ports(ed);
> +	fun_dev_disable(fdev);
> +
> +	fun_devlink_free(devlink);
> +}
> +
> +static void funeth_remove(struct pci_dev *pdev)
> +{
> +	__funeth_remove(pdev);
> +}
> +
> +static void funeth_shutdown(struct pci_dev *pdev)
> +{
> +	__funeth_remove(pdev);
> +}
> +
> +static struct pci_driver funeth_driver = {
> +	.name		 = KBUILD_MODNAME,
> +	.id_table	 = funeth_id_table,
> +	.probe		 = funeth_probe,
> +	.remove		 = funeth_remove,
> +	.shutdown	 = funeth_shutdown,
> +	.sriov_configure = funeth_sriov_configure,
> +};
> +
> +static int __init funeth_init(void)
> +{
> +	int ret;
> +
> +	ret = pci_register_driver(&funeth_driver);
> +	if (ret) {
> +		pr_err("%s pci_register_driver failed ret %d\n",
> +		       KBUILD_MODNAME, ret);
> +	}
> +	return ret;
> +}
> +
> +static void __exit funeth_exit(void)
> +{
> +	pci_unregister_driver(&funeth_driver);
> +}
> +
> +module_init(funeth_init);
> +module_exit(funeth_exit);
> +
> +MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
> +MODULE_DESCRIPTION("Fungible Ethernet Network Driver");
> +MODULE_LICENSE("Dual BSD/GPL");
> +MODULE_DEVICE_TABLE(pci, funeth_id_table);
Dimitris Michailidis Jan. 3, 2022, 10:11 p.m. UTC | #6
On Fri, Dec 31, 2021 at 3:15 AM Heiner Kallweit <hkallweit1@gmail.com> wrote:
>
> On 30.12.2021 17:39, Dimitris Michailidis wrote:
> > This is the first part of the Fungible ethernet driver. It deals with
> > device probing, net_device creation, and netdev ops.
> >
> > Signed-off-by: Dimitris Michailidis <dmichail@fungible.com>
> > ---
> >  drivers/net/ethernet/fungible/funeth/funeth.h |  153 ++
> >  .../ethernet/fungible/funeth/funeth_main.c    | 1772 +++++++++++++++++
> >  2 files changed, 1925 insertions(+)
> >  create mode 100644 drivers/net/ethernet/fungible/funeth/funeth.h
> >  create mode 100644 drivers/net/ethernet/fungible/funeth/funeth_main.c
> >
> > diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h
> > new file mode 100644
> > index 000000000000..0c089f685c7f
> > --- /dev/null
> > +++ b/drivers/net/ethernet/fungible/funeth/funeth.h
> > @@ -0,0 +1,153 @@
> > +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
> > +
> > +#ifndef _FUNETH_H
> > +#define _FUNETH_H
> > +
> > +#include <uapi/linux/if_ether.h>
> > +#include <uapi/linux/net_tstamp.h>
> > +#include <linux/seqlock.h>
> > +#include <net/devlink.h>
> > +#include "fun_dev.h"
> > +
> > +#define ADMIN_SQE_SIZE SZ_128
> > +#define ADMIN_CQE_SIZE SZ_64
> > +#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info))
> > +
> > +#define FUN_MAX_MTU 9024
> > +
> > +#define SQ_DEPTH 512U
> > +#define CQ_DEPTH 1024U
> > +#define RQ_DEPTH (512U / (PAGE_SIZE / 4096))
> > +
> > +#define CQ_INTCOAL_USEC 10
> > +#define CQ_INTCOAL_NPKT 16
> > +#define SQ_INTCOAL_USEC 10
> > +#define SQ_INTCOAL_NPKT 16
> > +
> > +#define INVALID_LPORT 0xffff
> > +
> > +#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE)
> > +
> > +struct fun_vport_info {
> > +     u8 mac[ETH_ALEN];
> > +     u16 vlan;
> > +     __be16 vlan_proto;
> > +     u8 qos;
> > +     u8 spoofchk:1;
> > +     u8 trusted:1;
> > +     unsigned int max_rate;
> > +};
> > +
> > +/* "subclass" of fun_dev for Ethernet functions */
> > +struct fun_ethdev {
> > +     struct fun_dev fdev;
> > +
> > +     /* the function's network ports */
> > +     struct net_device **netdevs;
> > +     unsigned int num_ports;
> > +
> > +     /* configuration for the function's virtual ports */
> > +     unsigned int num_vports;
> > +     struct fun_vport_info *vport_info;
> > +
> > +     unsigned int nsqs_per_port;
> > +};
> > +
> > +static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p)
> > +{
> > +     return container_of(p, struct fun_ethdev, fdev);
> > +}
> > +
> > +/* Per netdevice driver state, i.e., netdev_priv. */
> > +struct funeth_priv {
> > +     struct fun_dev *fdev;
> > +     struct pci_dev *pdev;
> > +     struct net_device *netdev;
> > +
> > +     struct funeth_rxq * __rcu *rxqs;
> > +     struct funeth_txq **txqs;
> > +     struct funeth_txq **xdpqs;
> > +
> > +     struct fun_irq *irqs;
> > +     unsigned int num_irqs;
> > +     unsigned int num_tx_irqs;
> > +
> > +     unsigned int lane_attrs;
> > +     u16 lport;
> > +
> > +     /* link settings */
> > +     u64 port_caps;
> > +     u64 advertising;
> > +     u64 lp_advertising;
> > +     unsigned int link_speed;
>
> Any specific reason for handling this manually?
> Why not using phylib/phylink?

Linux here doesn't have access to the MAC/PHY. They are handled
by FW. The driver for the most part sits between FW and ethtool
converting commands and state between them and these fields store
either what FW has reported or what ethtool has requested.
diff mbox series

Patch

diff --git a/drivers/net/ethernet/fungible/funeth/funeth.h b/drivers/net/ethernet/fungible/funeth/funeth.h
new file mode 100644
index 000000000000..0c089f685c7f
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth.h
@@ -0,0 +1,153 @@ 
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+
+#ifndef _FUNETH_H
+#define _FUNETH_H
+
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/net_tstamp.h>
+#include <linux/seqlock.h>
+#include <net/devlink.h>
+#include "fun_dev.h"
+
+#define ADMIN_SQE_SIZE SZ_128
+#define ADMIN_CQE_SIZE SZ_64
+#define ADMIN_RSP_MAX_LEN (ADMIN_CQE_SIZE - sizeof(struct fun_cqe_info))
+
+#define FUN_MAX_MTU 9024
+
+#define SQ_DEPTH 512U
+#define CQ_DEPTH 1024U
+#define RQ_DEPTH (512U / (PAGE_SIZE / 4096))
+
+#define CQ_INTCOAL_USEC 10
+#define CQ_INTCOAL_NPKT 16
+#define SQ_INTCOAL_USEC 10
+#define SQ_INTCOAL_NPKT 16
+
+#define INVALID_LPORT 0xffff
+
+#define FUN_PORT_CAP_PAUSE_MASK (FUN_PORT_CAP_TX_PAUSE | FUN_PORT_CAP_RX_PAUSE)
+
+struct fun_vport_info {
+	u8 mac[ETH_ALEN];
+	u16 vlan;
+	__be16 vlan_proto;
+	u8 qos;
+	u8 spoofchk:1;
+	u8 trusted:1;
+	unsigned int max_rate;
+};
+
+/* "subclass" of fun_dev for Ethernet functions */
+struct fun_ethdev {
+	struct fun_dev fdev;
+
+	/* the function's network ports */
+	struct net_device **netdevs;
+	unsigned int num_ports;
+
+	/* configuration for the function's virtual ports */
+	unsigned int num_vports;
+	struct fun_vport_info *vport_info;
+
+	unsigned int nsqs_per_port;
+};
+
+static inline struct fun_ethdev *to_fun_ethdev(struct fun_dev *p)
+{
+	return container_of(p, struct fun_ethdev, fdev);
+}
+
+/* Per netdevice driver state, i.e., netdev_priv. */
+struct funeth_priv {
+	struct fun_dev *fdev;
+	struct pci_dev *pdev;
+	struct net_device *netdev;
+
+	struct funeth_rxq * __rcu *rxqs;
+	struct funeth_txq **txqs;
+	struct funeth_txq **xdpqs;
+
+	struct fun_irq *irqs;
+	unsigned int num_irqs;
+	unsigned int num_tx_irqs;
+
+	unsigned int lane_attrs;
+	u16 lport;
+
+	/* link settings */
+	u64 port_caps;
+	u64 advertising;
+	u64 lp_advertising;
+	unsigned int link_speed;
+	u8 xcvr_type;
+	u8 active_fc;
+	u8 active_fec;
+	u8 link_down_reason;
+	seqcount_t link_seq;
+
+	u32 msg_enable;
+
+	unsigned int ethid_start;
+
+	unsigned int num_xdpqs;
+
+	/* ethtool, etc. config parameters */
+	unsigned int sq_depth;
+	unsigned int rq_depth;
+	unsigned int cq_depth;
+	unsigned int cq_irq_db;
+	u8 tx_coal_usec;
+	u8 tx_coal_count;
+	u8 rx_coal_usec;
+	u8 rx_coal_count;
+
+	struct hwtstamp_config hwtstamp_cfg;
+
+	/* cumulative queue stats from earlier queue instances */
+	u64 tx_packets;
+	u64 tx_bytes;
+	u64 tx_dropped;
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 rx_dropped;
+
+	/* RSS */
+	unsigned int rss_hw_id;
+	enum fun_eth_hash_alg hash_algo;
+	u8 rss_key[FUN_ETH_RSS_MAX_KEY_SIZE];
+	unsigned int indir_table_nentries;
+	u32 indir_table[FUN_ETH_RSS_MAX_INDIR_ENT];
+	dma_addr_t rss_dma_addr;
+	void *rss_cfg;
+
+	/* DMA area for port stats */
+	dma_addr_t stats_dma_addr;
+	u64 *stats;
+
+	struct bpf_prog *xdp_prog;
+
+	struct devlink_port dl_port;
+
+	/* kTLS state */
+	unsigned int ktls_id;
+	atomic64_t tx_tls_add;
+	atomic64_t tx_tls_del;
+	atomic64_t tx_tls_resync;
+};
+
+void fun_set_ethtool_ops(struct net_device *netdev);
+int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n,
+		       const int *keys, u64 *data);
+int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data);
+int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data);
+int fun_create_and_bind_tx(struct funeth_priv *fp, u32 ethid, u32 sqid);
+void fun_reset_rss_indir(struct net_device *dev);
+int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
+		   const u32 *qtable, u8 op);
+
+int fun_alloc_rings(struct net_device *netdev);
+void fun_free_rings(struct net_device *netdev);
+int fun_alloc_queue_irqs(struct net_device *dev);
+
+#endif /* _FUNETH_H */
diff --git a/drivers/net/ethernet/fungible/funeth/funeth_main.c b/drivers/net/ethernet/fungible/funeth/funeth_main.c
new file mode 100644
index 000000000000..11823006cce5
--- /dev/null
+++ b/drivers/net/ethernet/fungible/funeth/funeth_main.c
@@ -0,0 +1,1772 @@ 
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+
+#include <linux/bpf.h>
+#include <linux/crash_dump.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/idr.h>
+#include <linux/if_vlan.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/rtnetlink.h>
+#include <linux/inetdevice.h>
+
+#include "funeth.h"
+#include "funeth_devlink.h"
+#include "funeth_ktls.h"
+#include "fun_port.h"
+#include "fun_queue.h"
+#include "funeth_txrx.h"
+
+#define ADMIN_SQ_DEPTH 32
+#define ADMIN_CQ_DEPTH 64
+#define ADMIN_RQ_DEPTH 16
+
+/* Default number of Tx/Rx queues. */
+#define FUN_DFLT_QUEUES 16U
+
+enum {
+	FUN_SERV_RES_CHANGE = FUN_SERV_FIRST_AVAIL,
+	FUN_SERV_DEL_PORTS,
+};
+
+static int msg_enable;
+module_param(msg_enable, int, 0644);
+MODULE_PARM_DESC(msg_enable, "bitmap of NETIF_MSG_* enables");
+
+static const struct pci_device_id funeth_id_table[] = {
+	{ PCI_VDEVICE(FUNGIBLE, 0x0101) },
+	{ PCI_VDEVICE(FUNGIBLE, 0x0181) },
+	{ 0, }
+};
+
+/* Issue a port write admin command with @n key/value pairs. */
+int fun_port_write_cmds(struct funeth_priv *fp, unsigned int n,
+			const int *keys, const u64 *data)
+{
+	unsigned int cmd_size, i;
+	union {
+		struct fun_admin_port_req req;
+		struct fun_admin_port_rsp rsp;
+		u8 v[ADMIN_SQE_SIZE];
+	} cmd;
+
+	cmd_size = offsetof(struct fun_admin_port_req, u.write.write48) +
+		n * sizeof(struct fun_admin_write48_req);
+	if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
+		return -EINVAL;
+
+	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+						    cmd_size);
+	cmd.req.u.write =
+		FUN_ADMIN_PORT_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE, 0,
+					      fp->netdev->dev_port);
+	for (i = 0; i < n; i++)
+		cmd.req.u.write.write48[i] =
+			FUN_ADMIN_WRITE48_REQ_INIT(keys[i], data[i]);
+
+	return fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
+					 &cmd.rsp, cmd_size, 0);
+}
+
+int fun_port_write_cmd(struct funeth_priv *fp, int key, u64 data)
+{
+	return fun_port_write_cmds(fp, 1, &key, &data);
+}
+
+/* Issue a port read admin command with @n key/value pairs. */
+int fun_port_read_cmds(struct funeth_priv *fp, unsigned int n,
+		       const int *keys, u64 *data)
+{
+	const struct fun_admin_read48_rsp *r48rsp;
+	unsigned int cmd_size, i;
+	int rc;
+	union {
+		struct fun_admin_port_req req;
+		struct fun_admin_port_rsp rsp;
+		u8 v[ADMIN_SQE_SIZE];
+	} cmd;
+
+	cmd_size = offsetof(struct fun_admin_port_req, u.read.read48) +
+		n * sizeof(struct fun_admin_read48_req);
+	if (cmd_size > sizeof(cmd) || cmd_size > ADMIN_RSP_MAX_LEN)
+		return -EINVAL;
+
+	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+						    cmd_size);
+	cmd.req.u.read =
+		FUN_ADMIN_PORT_READ_REQ_INIT(FUN_ADMIN_SUBOP_READ, 0,
+					     fp->netdev->dev_port);
+	for (i = 0; i < n; i++)
+		cmd.req.u.read.read48[i] = FUN_ADMIN_READ48_REQ_INIT(keys[i]);
+
+	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
+				       &cmd.rsp, cmd_size, 0);
+	if (rc)
+		return rc;
+
+	for (r48rsp = cmd.rsp.u.read.read48, i = 0; i < n; i++, r48rsp++) {
+		data[i] = FUN_ADMIN_READ48_RSP_DATA_G(r48rsp->key_to_data);
+		dev_dbg(fp->fdev->dev,
+			"port_read_rsp lport=%u (key_to_data=0x%llx) key=%d data:%lld retval:%lld",
+			fp->lport, r48rsp->key_to_data, keys[i], data[i],
+			FUN_ADMIN_READ48_RSP_RET_G(r48rsp->key_to_data));
+	}
+	return 0;
+}
+
+int fun_port_read_cmd(struct funeth_priv *fp, int key, u64 *data)
+{
+	return fun_port_read_cmds(fp, 1, &key, data);
+}
+
+static void fun_report_link(struct net_device *netdev)
+{
+	if (netif_carrier_ok(netdev)) {
+		const struct funeth_priv *fp = netdev_priv(netdev);
+		const char *fec = "", *pause = "";
+		int speed = fp->link_speed;
+		char unit = 'M';
+
+		if (fp->link_speed >= SPEED_1000) {
+			speed /= 1000;
+			unit = 'G';
+		}
+
+		if (fp->active_fec & FUN_PORT_FEC_RS)
+			fec = ", RS-FEC";
+		else if (fp->active_fec & FUN_PORT_FEC_FC)
+			fec = ", BASER-FEC";
+
+		if ((fp->active_fc & FUN_PORT_CAP_PAUSE_MASK) == FUN_PORT_CAP_PAUSE_MASK)
+			pause = ", Tx/Rx PAUSE";
+		else if (fp->active_fc & FUN_PORT_CAP_RX_PAUSE)
+			pause = ", Rx PAUSE";
+		else if (fp->active_fc & FUN_PORT_CAP_TX_PAUSE)
+			pause = ", Tx PAUSE";
+
+		netdev_info(netdev, "Link up at %d %cb/s full-duplex%s%s%s\n",
+			    speed, unit, pause, fec,
+			    netif_dormant(netdev) ? ", dormant" : "");
+	} else {
+		netdev_info(netdev, "Link down\n");
+	}
+}
+
+static int fun_adi_write(struct fun_dev *fdev, enum fun_admin_adi_attr attr,
+			 unsigned int adi_id, const struct fun_adi_param *param)
+{
+	struct fun_admin_adi_req req = {
+		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ADI,
+						     sizeof(req)),
+		.u.write = FUN_ADMIN_ADI_WRITE_REQ_INIT(FUN_ADMIN_SUBOP_WRITE,
+							attr, adi_id),
+		.u.write.param = *param
+	};
+
+	return fun_submit_admin_sync_cmd(fdev, &req.common, NULL, 0, 0);
+}
+
+/* Configure RSS for the given port. @op determines whether a new RSS context
+ * is to be created or whether an existing one should be reconfigured. The
+ * remaining parameters specify the hashing algorithm, key, and indirection
+ * table.
+ *
+ * This initiates packet delivery to the Rx queues set in the indirection
+ * table.
+ */
+int fun_config_rss(struct net_device *dev, int algo, const u8 *key,
+		   const u32 *qtable, u8 op)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	unsigned int table_len = fp->indir_table_nentries;
+	unsigned int len = FUN_ETH_RSS_MAX_KEY_SIZE + sizeof(u32) * table_len;
+	struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
+	u32 *indir_tab;
+	u16 flags;
+	int rc;
+	union {
+		struct {
+			struct fun_admin_rss_req req;
+			struct fun_dataop_gl gl;
+		};
+		struct fun_admin_generic_create_rsp rsp;
+	} cmd;
+
+	if (op != FUN_ADMIN_SUBOP_CREATE && fp->rss_hw_id == FUN_HCI_ID_INVALID)
+		return -EINVAL;
+
+	flags = op == FUN_ADMIN_SUBOP_CREATE ?
+			FUN_ADMIN_RES_CREATE_FLAG_ALLOCATOR : 0;
+	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_RSS,
+						    sizeof(cmd));
+	cmd.req.u.create =
+		FUN_ADMIN_RSS_CREATE_REQ_INIT(op, flags, fp->rss_hw_id,
+					      dev->dev_port, algo,
+					      FUN_ETH_RSS_MAX_KEY_SIZE,
+					      table_len, 0,
+					      FUN_ETH_RSS_MAX_KEY_SIZE);
+	cmd.req.u.create.dataop = FUN_DATAOP_HDR_INIT(1, 0, 1, 0, len);
+	fun_dataop_gl_init(&cmd.gl, 0, 0, len, fp->rss_dma_addr);
+
+	/* write the key and indirection table into the RSS DMA area */
+	memcpy(fp->rss_cfg, key, FUN_ETH_RSS_MAX_KEY_SIZE);
+	indir_tab = fp->rss_cfg + FUN_ETH_RSS_MAX_KEY_SIZE;
+	for (rc = 0; rc < table_len; rc++)
+		*indir_tab++ = cpu_to_be32(rxqs[*qtable++]->hw_cqid);
+
+	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common,
+				       &cmd.rsp, sizeof(cmd.rsp), 0);
+	if (!rc && op == FUN_ADMIN_SUBOP_CREATE)
+		fp->rss_hw_id = be32_to_cpu(cmd.rsp.id);
+	return rc;
+}
+
+/* Destroy the HW RSS conntext associated with the given port. This also stops
+ * all packet delivery to our Rx queues.
+ */
+static int fun_destroy_rss(struct funeth_priv *fp)
+{
+	int rc;
+
+	if (fp->rss_hw_id == FUN_HCI_ID_INVALID)
+		return 0;
+
+	rc = fun_res_destroy(fp->fdev, FUN_ADMIN_OP_RSS, 0, fp->rss_hw_id);
+	fp->rss_hw_id = FUN_HCI_ID_INVALID;
+	return rc;
+}
+
+static void free_txqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct funeth_txq **txqs = fp->txqs;
+	unsigned int i;
+
+	for (i = 0; i < dev->real_num_tx_queues && txqs[i]; i++) {
+		fp->irqs[txqs[i]->irq_idx].txq = NULL;
+		funeth_txq_free(txqs[i]);
+		txqs[i] = NULL;
+	}
+}
+
+static int alloc_txqs(struct net_device *dev, unsigned int start_irq)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct funeth_txq **txqs = fp->txqs, *q;
+	unsigned int i;
+
+	for (i = 0; i < dev->real_num_tx_queues; i++) {
+		q = funeth_txq_create(dev, i, fp->sq_depth,
+				      &fp->irqs[start_irq + i]);
+		if (IS_ERR(q)) {
+			free_txqs(dev);
+			return PTR_ERR(q);
+		}
+		txqs[i] = q;
+	}
+	return 0;
+}
+
+static void free_rxqs(struct net_device *dev, struct funeth_rxq **rxqs)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	unsigned int i;
+
+	for (i = 0; i < dev->real_num_rx_queues && rxqs[i]; i++) {
+		fp->irqs[rxqs[i]->irq_idx].rxq = NULL;
+		funeth_rxq_free(rxqs[i]);
+		rxqs[i] = NULL;
+	}
+}
+
+static int alloc_rxqs(struct net_device *dev, struct funeth_rxq **rxqs,
+		      unsigned int start_irq)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct funeth_rxq *q;
+	unsigned int i;
+
+	for (i = 0; i < dev->real_num_rx_queues; i++) {
+		q = funeth_rxq_create(dev, i, fp->cq_depth, fp->rq_depth,
+				      &fp->irqs[start_irq + i]);
+		if (IS_ERR(q)) {
+			free_rxqs(dev, rxqs);
+			return PTR_ERR(q);
+		}
+		rxqs[i] = q;
+	}
+	return 0;
+}
+
+static void free_xdpqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct funeth_txq **xdpqs = fp->xdpqs;
+	unsigned int i;
+
+	for (i = 0; i < fp->num_xdpqs && xdpqs[i]; i++) {
+		funeth_txq_free(xdpqs[i]);
+		xdpqs[i] = NULL;
+	}
+}
+
+static int alloc_xdpqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct funeth_txq **xdpqs = fp->xdpqs, *q;
+	unsigned int i;
+
+	for (i = 0; i < fp->num_xdpqs; i++) {
+		q = funeth_txq_create(dev, i, fp->sq_depth, NULL);
+		if (IS_ERR(q)) {
+			free_xdpqs(dev);
+			return PTR_ERR(q);
+		}
+		xdpqs[i] = q;
+	}
+	return 0;
+}
+
+void fun_free_rings(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
+
+	if (!rxqs)
+		return;
+
+	rcu_assign_pointer(fp->rxqs, NULL);
+	synchronize_net();
+
+	free_rxqs(netdev, rxqs);
+	free_txqs(netdev);
+	fp->txqs = NULL;
+	free_xdpqs(netdev);
+	fp->xdpqs = NULL;
+	kfree(rxqs);
+}
+
+int fun_alloc_rings(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	struct funeth_rxq **rxqs;
+	unsigned int total_qs;
+	int err;
+
+	total_qs = netdev->real_num_tx_queues + netdev->real_num_rx_queues +
+		   fp->num_xdpqs;
+
+	rxqs = kcalloc(total_qs, sizeof(*rxqs), GFP_KERNEL);
+	if (!rxqs)
+		return -ENOMEM;
+
+	fp->txqs = (struct funeth_txq **)&rxqs[netdev->real_num_rx_queues];
+	err = alloc_txqs(netdev, 0);
+	if (err)
+		goto free_qvec;
+
+	if (fp->num_xdpqs) {
+		fp->xdpqs = (struct funeth_txq **)&rxqs[total_qs - fp->num_xdpqs];
+		err = alloc_xdpqs(netdev);
+		if (err)
+			goto free_txqs;
+	}
+
+	err = alloc_rxqs(netdev, rxqs, netdev->real_num_tx_queues);
+	if (err)
+		goto free_xdpqs;
+
+	rcu_assign_pointer(fp->rxqs, rxqs);
+	return 0;
+
+free_xdpqs:
+	free_xdpqs(netdev);
+free_txqs:
+	free_txqs(netdev);
+free_qvec:
+	fp->txqs = NULL;
+	fp->xdpqs = NULL;
+	kfree(rxqs);
+	return err;
+}
+
+static int fun_port_create(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	union {
+		struct fun_admin_port_req req;
+		struct fun_admin_port_rsp rsp;
+	} cmd;
+	int rc;
+
+	if (fp->lport != INVALID_LPORT)
+		return 0;
+
+	cmd.req.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_PORT,
+						    sizeof(cmd.req));
+	cmd.req.u.create =
+		FUN_ADMIN_PORT_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0,
+					       netdev->dev_port);
+
+	rc = fun_submit_admin_sync_cmd(fp->fdev, &cmd.req.common, &cmd.rsp,
+				       sizeof(cmd.rsp), 0);
+
+	if (!rc)
+		fp->lport = be16_to_cpu(cmd.rsp.u.create.lport);
+	return rc;
+}
+
+static int fun_port_destroy(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+
+	if (fp->lport == INVALID_LPORT)
+		return 0;
+
+	fp->lport = INVALID_LPORT;
+	return fun_res_destroy(fp->fdev, FUN_ADMIN_OP_PORT, 0,
+			       netdev->dev_port);
+}
+
+static int fun_eth_create(struct funeth_priv *fp, u32 ethid)
+{
+	struct fun_admin_eth_req req = {
+		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_ETH,
+						     sizeof(req)),
+		.u.create =
+			FUN_ADMIN_ETH_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE, 0,
+						      ethid,
+						      fp->netdev->dev_port)
+	};
+
+	return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
+}
+
+static int fun_vi_create(struct funeth_priv *fp)
+{
+	struct fun_admin_vi_req req = {
+		.common = FUN_ADMIN_REQ_COMMON_INIT2(FUN_ADMIN_OP_VI,
+						     sizeof(req)),
+		.u.create = FUN_ADMIN_VI_CREATE_REQ_INIT(FUN_ADMIN_SUBOP_CREATE,
+							 0,
+							 fp->netdev->dev_port,
+							 fp->netdev->dev_port)
+	};
+
+	return fun_submit_admin_sync_cmd(fp->fdev, &req.common, NULL, 0, 0);
+}
+
+/* helper to create an ETH flow and bind an SQ to it */
+int fun_create_and_bind_tx(struct funeth_priv *fp, u32 ethid, u32 sqid)
+{
+	int rc;
+
+	netif_info(fp, ifup, fp->netdev,
+		   "creating ETH flow %u and binding SQ id %u\n", ethid, sqid);
+	rc = fun_eth_create(fp, ethid);
+	if (!rc) {
+		rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_EPSQ, sqid,
+			      FUN_ADMIN_BIND_TYPE_ETH, ethid);
+		if (rc)
+			fun_res_destroy(fp->fdev, FUN_ADMIN_OP_ETH, 0, ethid);
+	}
+	return rc;
+}
+
+static void fun_irq_aff_notify(struct irq_affinity_notify *notify,
+			       const cpumask_t *mask)
+{
+	struct fun_irq *p = container_of(notify, struct fun_irq, aff_notify);
+
+	cpumask_copy(&p->affinity_mask, mask);
+}
+
+static void fun_irq_aff_release(struct kref __always_unused *ref)
+{
+}
+
+static void fun_init_irq(struct fun_irq *p, int node, int idx)
+{
+	cpumask_set_cpu(cpumask_local_spread(idx, node), &p->affinity_mask);
+	p->aff_notify.notify = fun_irq_aff_notify;
+	p->aff_notify.release = fun_irq_aff_release;
+}
+
+static void fun_free_irqs_from(struct funeth_priv *fp, unsigned int start)
+{
+	struct fun_irq *p = fp->irqs + start;
+
+	for ( ; start < fp->num_irqs; start++, p++) {
+		netif_napi_del(&p->napi);
+		fun_release_irqs(fp->fdev, 1, &p->irq_idx);
+	}
+}
+
+/* Release the IRQ vectors reserved for Tx/Rx queues. */
+static void fun_free_queue_irqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+
+	if (fp->num_irqs) {
+		netif_info(fp, intr, dev, "Releasing %u queue IRQs\n",
+			   fp->num_irqs);
+		fun_free_irqs_from(fp, 0);
+		kfree(fp->irqs);
+		fp->irqs = NULL;
+		fp->num_irqs = 0;
+		fp->num_tx_irqs = 0;
+	}
+}
+
+/* Reserve IRQ vectors, one per queue. We hold on to allocated vectors until
+ * the total number of queues changes.
+ */
+int fun_alloc_queue_irqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	unsigned int i, copy, irqs_needed;
+	struct fun_irq *irqs, *p;
+	int node, res = -ENOMEM;
+	u16 *irq_idx;
+
+	irqs_needed = dev->real_num_rx_queues + dev->real_num_tx_queues;
+	if (irqs_needed == fp->num_irqs &&
+	    fp->num_tx_irqs == dev->real_num_tx_queues)
+		return 0;
+
+	/* IRQ needs have changed, reallocate. */
+	irqs = kcalloc(irqs_needed, sizeof(*irqs), GFP_KERNEL);
+	if (!irqs)
+		return -ENOMEM;
+
+	irq_idx = kcalloc(irqs_needed, sizeof(u16), GFP_KERNEL);
+	if (!irq_idx)
+		goto free;
+
+	/* keep as many existing IRQs as possible */
+	copy = min(irqs_needed, fp->num_irqs);
+	for (i = 0; i < copy; i++)
+		irq_idx[i] = fp->irqs[i].irq_idx;
+
+	/* get additional IRQs */
+	if (irqs_needed > fp->num_irqs) {
+		unsigned int addl_irqs = irqs_needed - fp->num_irqs;
+
+		res = fun_reserve_irqs(fp->fdev, addl_irqs, irq_idx + copy);
+		if (res != addl_irqs)
+			goto free;
+	}
+
+	/* release excess IRQs */
+	fun_free_irqs_from(fp, copy);
+
+	for (i = 0; i < copy; i++)
+		netif_napi_del(&fp->irqs[i].napi);
+
+	/* new Tx IRQs */
+	copy = min(dev->real_num_tx_queues, fp->num_tx_irqs);
+	memcpy(irqs, fp->irqs, copy * sizeof(*p));
+
+	node = dev_to_node(&fp->pdev->dev);
+	for (p = irqs + copy, i = copy; i < dev->real_num_tx_queues; i++, p++)
+		fun_init_irq(p, node, i);
+
+	/* new Rx IRQs */
+	copy = min(dev->real_num_rx_queues, fp->num_irqs - fp->num_tx_irqs);
+	memcpy(p, fp->irqs + fp->num_tx_irqs, copy * sizeof(*p));
+	p += copy;
+
+	for (i = copy; i < dev->real_num_rx_queues; i++, p++)
+		fun_init_irq(p, node, i);
+
+	/* assign IRQ vectors and register NAPI */
+	for (i = 0; i < irqs_needed; i++) {
+		irqs[i].irq_idx = irq_idx[i];
+		irqs[i].irq = pci_irq_vector(fp->pdev, irq_idx[i]);
+	}
+
+	for (p = irqs, i = 0; i < dev->real_num_tx_queues; i++, p++)
+		netif_tx_napi_add(dev, &p->napi, fun_txq_napi_poll,
+				  NAPI_POLL_WEIGHT);
+
+	for (i = 0; i < dev->real_num_rx_queues; i++, p++)
+		netif_napi_add(dev, &p->napi, fun_rxq_napi_poll,
+			       NAPI_POLL_WEIGHT);
+
+	kfree(irq_idx);
+	kfree(fp->irqs);
+
+	fp->irqs = irqs;
+	fp->num_irqs = irqs_needed;
+	fp->num_tx_irqs = dev->real_num_tx_queues;
+	netif_info(fp, intr, dev, "Reserved %u IRQs for Tx/Rx queues\n",
+		   irqs_needed);
+	return 0;
+
+free:
+	kfree(irq_idx);
+	kfree(irqs);
+	return res;
+}
+
+static irqreturn_t fun_queue_irq_handler(int irq, void *data)
+{
+	struct fun_irq *p = data;
+
+	if (p->rxq) {
+		prefetch(p->rxq->next_cqe_info);
+		p->rxq->irq_cnt++;
+	}
+	napi_schedule_irqoff(&p->napi);
+	return IRQ_HANDLED;
+}
+
+static int fun_enable_irqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	unsigned int i, qidx;
+	struct fun_irq *p;
+	const char *qtype;
+	int err;
+
+	for (p = fp->irqs, i = 0; i < fp->num_irqs; i++, p++) {
+		if (p->txq) {
+			qtype = "tx";
+			qidx = p->txq->qidx;
+		} else if (p->rxq) {
+			qtype = "rx";
+			qidx = p->rxq->qidx;
+		} else {
+			continue;
+		}
+
+		snprintf(p->name, sizeof(p->name) - 1, "%s-%s-%u", dev->name,
+			 qtype, qidx);
+		err = request_irq(p->irq, fun_queue_irq_handler, 0, p->name, p);
+		if (err) {
+			netdev_err(dev, "Failed to allocate IRQ %u, err %d\n",
+				   p->irq, err);
+			goto unroll;
+		}
+	}
+
+	for (p = fp->irqs, i = 0; i < fp->num_irqs; i++, p++) {
+		if (!p->txq && !p->rxq)
+			continue;
+		irq_set_affinity_notifier(p->irq, &p->aff_notify);
+		irq_set_affinity_hint(p->irq, &p->affinity_mask);
+		napi_enable(&p->napi);
+	}
+
+	return 0;
+
+unroll:
+	while (i--) {
+		p--;
+		free_irq(p->irq, p);
+	}
+	return err;
+}
+
+static void fun_disable_irqs(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct fun_irq *p;
+	unsigned int i;
+
+	for (p = fp->irqs, i = 0; i < fp->num_irqs; i++, p++) {
+		if (!p->txq && !p->rxq)
+			continue;
+
+		napi_disable(&p->napi);
+		irq_set_affinity_notifier(p->irq, NULL);
+		irq_set_affinity_hint(p->irq, NULL);
+		free_irq(p->irq, p);
+	}
+}
+
+static int funeth_open(struct net_device *netdev)
+{
+	static const int port_keys[] = {
+		FUN_ADMIN_PORT_KEY_STATS_DMA_LOW,
+		FUN_ADMIN_PORT_KEY_STATS_DMA_HIGH,
+		FUN_ADMIN_PORT_KEY_ENABLE
+	};
+
+	struct funeth_priv *fp = netdev_priv(netdev);
+	u64 vals[] = {
+		lower_32_bits(fp->stats_dma_addr),
+		upper_32_bits(fp->stats_dma_addr),
+		FUN_PORT_FLAG_ENABLE_NOTIFY
+	};
+	int rc;
+
+	rc = fun_alloc_queue_irqs(netdev);
+	if (rc)
+		return rc;
+
+	rc = fun_alloc_rings(netdev);
+	if (rc)
+		return rc;
+
+	rc = fun_vi_create(fp);
+	if (rc)
+		goto free_queues;
+
+	rc = fun_enable_irqs(netdev);
+	if (rc)
+		goto destroy_vi;
+
+	if (fp->rss_cfg) {
+		rc = fun_config_rss(netdev, fp->hash_algo, fp->rss_key,
+				    fp->indir_table, FUN_ADMIN_SUBOP_CREATE);
+	} else {
+		struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
+
+		/* The non-RSS case has only 1 queue. */
+		rc = fun_bind(fp->fdev, FUN_ADMIN_BIND_TYPE_VI,
+			      netdev->dev_port, FUN_ADMIN_BIND_TYPE_EPCQ,
+			      rxqs[0]->hw_cqid);
+	}
+	if (rc)
+		goto disable_irqs;
+
+	rc = fun_port_write_cmds(fp, 3, port_keys, vals);
+	if (rc)
+		goto free_rss;
+
+	netif_tx_start_all_queues(netdev);
+	return 0;
+
+free_rss:
+	fun_destroy_rss(fp);
+disable_irqs:
+	fun_disable_irqs(netdev);
+destroy_vi:
+	fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, netdev->dev_port);
+free_queues:
+	fun_free_rings(netdev);
+	return rc;
+}
+
+static int funeth_close(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+
+	/* HW admin disable port */
+	fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_DISABLE, 0);
+
+	netif_carrier_off(netdev);
+	netif_tx_disable(netdev);
+
+	fun_destroy_rss(fp);
+	if (fp->txqs)
+		fun_res_destroy(fp->fdev, FUN_ADMIN_OP_VI, 0, netdev->dev_port);
+	fun_disable_irqs(netdev);
+	fun_free_rings(netdev);
+	return 0;
+}
+
+static void fun_get_stats64(struct net_device *netdev,
+			    struct rtnl_link_stats64 *stats)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	struct funeth_rxq **rxqs;
+	unsigned int i, start;
+
+	stats->tx_packets = fp->tx_packets;
+	stats->tx_bytes   = fp->tx_bytes;
+	stats->tx_dropped = fp->tx_dropped;
+
+	stats->rx_packets = fp->rx_packets;
+	stats->rx_bytes   = fp->rx_bytes;
+	stats->rx_dropped = fp->rx_dropped;
+
+	rcu_read_lock();
+	rxqs = rcu_dereference(fp->rxqs);
+	if (!rxqs)
+		goto unlock;
+
+	for (i = 0; i < netdev->real_num_tx_queues; i++) {
+		struct funeth_txq_stats txs;
+
+		FUN_QSTAT_READ(fp->txqs[i], start, txs);
+		stats->tx_packets += txs.tx_pkts;
+		stats->tx_bytes   += txs.tx_bytes;
+		stats->tx_dropped += txs.tx_map_err + txs.tx_len_err;
+	}
+
+	for (i = 0; i < fp->num_xdpqs; i++) {
+		struct funeth_txq_stats txs;
+
+		FUN_QSTAT_READ(fp->xdpqs[i], start, txs);
+		stats->tx_packets += txs.tx_pkts;
+		stats->tx_bytes   += txs.tx_bytes;
+	}
+
+	for (i = 0; i < netdev->real_num_rx_queues; i++) {
+		struct funeth_rxq_stats rxs;
+
+		FUN_QSTAT_READ(rxqs[i], start, rxs);
+		stats->rx_packets += rxs.rx_pkts;
+		stats->rx_bytes   += rxs.rx_bytes;
+		stats->rx_dropped += rxs.rx_map_err + rxs.rx_mem_drops;
+	}
+unlock:
+	rcu_read_unlock();
+}
+
+static int fun_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	int rc;
+
+	rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MTU, new_mtu);
+	if (!rc)
+		netdev->mtu = new_mtu;
+	return rc;
+}
+
+static int fun_set_macaddr(struct net_device *netdev, void *addr)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	struct sockaddr *saddr = addr;
+	int rc;
+
+	if (!is_valid_ether_addr(saddr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(netdev->dev_addr, saddr->sa_data))
+		return 0;
+
+	rc = fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
+				ether_addr_to_u64(saddr->sa_data));
+	if (!rc)
+		eth_hw_addr_set(netdev, saddr->sa_data);
+	return rc;
+}
+
+static int fun_get_port_attributes(struct net_device *netdev)
+{
+	static const int keys[] = {
+		FUN_ADMIN_PORT_KEY_MACADDR, FUN_ADMIN_PORT_KEY_CAPABILITIES,
+		FUN_ADMIN_PORT_KEY_ADVERT, FUN_ADMIN_PORT_KEY_MTU
+	};
+	static const int phys_keys[] = {
+		FUN_ADMIN_PORT_KEY_LANE_ATTRS,
+	};
+
+	struct funeth_priv *fp = netdev_priv(netdev);
+	u64 data[ARRAY_SIZE(keys)];
+	u8 mac[ETH_ALEN];
+	int i, rc;
+
+	rc = fun_port_read_cmds(fp, ARRAY_SIZE(keys), keys, data);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < ARRAY_SIZE(keys); i++) {
+		switch (keys[i]) {
+		case FUN_ADMIN_PORT_KEY_MACADDR:
+			u64_to_ether_addr(data[i], mac);
+			if (is_zero_ether_addr(mac)) {
+				eth_hw_addr_random(netdev);
+			} else if (is_valid_ether_addr(mac)) {
+				eth_hw_addr_set(netdev, mac);
+			} else {
+				netdev_err(netdev,
+					   "device provided a bad MAC address %pM\n",
+					   mac);
+				return -EINVAL;
+			}
+			break;
+
+		case FUN_ADMIN_PORT_KEY_CAPABILITIES:
+			fp->port_caps = data[i];
+			break;
+
+		case FUN_ADMIN_PORT_KEY_ADVERT:
+			fp->advertising = data[i];
+			break;
+
+		case FUN_ADMIN_PORT_KEY_MTU:
+			netdev->mtu = data[i];
+			break;
+		}
+	}
+
+	if (!(fp->port_caps & FUN_PORT_CAP_VPORT)) {
+		rc = fun_port_read_cmds(fp, ARRAY_SIZE(phys_keys), phys_keys,
+					data);
+		if (rc)
+			return rc;
+
+		fp->lane_attrs = data[0];
+	}
+
+	if (netdev->addr_assign_type == NET_ADDR_RANDOM)
+		return fun_port_write_cmd(fp, FUN_ADMIN_PORT_KEY_MACADDR,
+					  ether_addr_to_u64(netdev->dev_addr));
+	return 0;
+}
+
+static int fun_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+	const struct funeth_priv *fp = netdev_priv(dev);
+
+	return copy_to_user(ifr->ifr_data, &fp->hwtstamp_cfg,
+			    sizeof(fp->hwtstamp_cfg)) ? -EFAULT : 0;
+}
+
+static int fun_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct hwtstamp_config cfg;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	if (cfg.flags)           /* flags is reserved, must be 0 */
+		return -EINVAL;
+
+	/* no TX HW timestamps */
+	cfg.tx_type = HWTSTAMP_TX_OFF;
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+		cfg.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	fp->hwtstamp_cfg = cfg;
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int fun_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return fun_hwtstamp_set(dev, ifr);
+	case SIOCGHWTSTAMP:
+		return fun_hwtstamp_get(dev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+#define XDP_MAX_MTU \
+	(PAGE_SIZE - FUN_XDP_HEADROOM - VLAN_ETH_HLEN - FUN_RX_TAILROOM)
+
+static int fun_xdp_setup(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	struct bpf_prog *old_prog, *prog = xdp->prog;
+	struct funeth_priv *fp = netdev_priv(dev);
+	bool reconfig;
+	int rc, i;
+
+	/* XDP uses at most one buffer */
+	if (prog && dev->mtu > XDP_MAX_MTU) {
+		netdev_err(dev, "device MTU %u too large for XDP\n", dev->mtu);
+		NL_SET_ERR_MSG_MOD(xdp->extack,
+				   "Device MTU too large for XDP");
+		return -EINVAL;
+	}
+
+	reconfig = netif_running(dev) && (!!fp->xdp_prog ^ !!prog);
+	if (reconfig) {
+		rc = funeth_close(dev);
+		if (rc) {
+			NL_SET_ERR_MSG_MOD(xdp->extack,
+					   "Failed to reconfigure Rx queues.");
+			return rc;
+		}
+	}
+
+	dev->max_mtu = prog ? XDP_MAX_MTU : FUN_MAX_MTU;
+	fp->num_xdpqs = prog ? num_online_cpus() : 0;
+	old_prog = xchg(&fp->xdp_prog, prog);
+
+	if (reconfig) {
+		rc = funeth_open(dev);
+		if (rc) {
+			NL_SET_ERR_MSG_MOD(xdp->extack,
+					   "Failed to reconfigure Rx queues.");
+			dev->max_mtu = old_prog ? XDP_MAX_MTU : FUN_MAX_MTU;
+			fp->num_xdpqs = old_prog ? num_online_cpus() : 0;
+			xchg(&fp->xdp_prog, old_prog);
+			return rc;
+		}
+	} else if (netif_running(dev)) {
+		struct funeth_rxq **rxqs = rtnl_dereference(fp->rxqs);
+
+		for (i = 0; i < dev->real_num_rx_queues; i++)
+			WRITE_ONCE(rxqs[i]->xdp_prog, prog);
+	}
+
+	if (old_prog)
+		bpf_prog_put(old_prog);
+	return 0;
+}
+
+static int fun_xdp(struct net_device *dev, struct netdev_bpf *xdp)
+{
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return fun_xdp_setup(dev, xdp);
+	default:
+		return -EINVAL;
+	}
+}
+
+struct devlink_port *fun_get_devlink_port(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+
+	return &fp->dl_port;
+}
+
+static int fun_init_vports(struct fun_ethdev *ed, unsigned int n)
+{
+	if (ed->num_vports)
+		return -EINVAL;
+
+	ed->vport_info = kvcalloc(n, sizeof(*ed->vport_info), GFP_KERNEL);
+	if (!ed->vport_info)
+		return -ENOMEM;
+	ed->num_vports = n;
+	return 0;
+}
+
+static void fun_free_vports(struct fun_ethdev *ed)
+{
+	kvfree(ed->vport_info);
+	ed->vport_info = NULL;
+	ed->num_vports = 0;
+}
+
+static struct fun_vport_info *fun_get_vport(struct fun_dev *fdev,
+					    unsigned int vport)
+{
+	struct fun_ethdev *ed = to_fun_ethdev(fdev);
+
+	if (!ed->vport_info || vport >= ed->num_vports)
+		return NULL;
+
+	return ed->vport_info + vport;
+}
+
+int fun_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct fun_dev *fdev = fp->fdev;
+	struct fun_vport_info *vi = fun_get_vport(fdev, vf);
+	struct fun_adi_param mac_param = {};
+	int rc;
+
+	if (!vi)
+		return -EINVAL;
+	if (is_multicast_ether_addr(mac))
+		return -EINVAL;
+
+	mac_param.u.mac = FUN_ADI_MAC_INIT(ether_addr_to_u64(mac));
+	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_MACADDR, vf + 1,
+			   &mac_param);
+	if (!rc)
+		ether_addr_copy(vi->mac, mac);
+	return rc;
+}
+
+static int fun_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
+			   __be16 vlan_proto)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct fun_dev *fdev = fp->fdev;
+	struct fun_vport_info *vi = fun_get_vport(fdev, vf);
+	struct fun_adi_param vlan_param = {};
+	int rc;
+
+	if (!vi)
+		return -EINVAL;
+	if (vlan > 4095 || qos > 7)
+		return -EINVAL;
+	if (vlan_proto && vlan_proto != htons(ETH_P_8021Q) &&
+	    vlan_proto != htons(ETH_P_8021AD))
+		return -EINVAL;
+
+	vlan_param.u.vlan = FUN_ADI_VLAN_INIT(be16_to_cpu(vlan_proto),
+					      ((u16)qos << VLAN_PRIO_SHIFT) | vlan);
+	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_VLAN, vf + 1, &vlan_param);
+	if (rc)
+		return rc;
+
+	vi->vlan = vlan;
+	vi->qos = qos;
+	vi->vlan_proto = vlan_proto;
+	return 0;
+}
+
+static int fun_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+			   int max_tx_rate)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct fun_dev *fdev = fp->fdev;
+	struct fun_vport_info *vi = fun_get_vport(fdev, vf);
+	struct fun_adi_param rate_param = {};
+	int rc;
+
+	if (!vi || min_tx_rate)
+		return -EINVAL;
+
+	rate_param.u.rate = FUN_ADI_RATE_INIT(max_tx_rate);
+	rc = fun_adi_write(fdev, FUN_ADMIN_ADI_ATTR_RATE, vf + 1, &rate_param);
+	if (rc)
+		return rc;
+
+	vi->max_rate = max_tx_rate;
+	return 0;
+}
+
+int fun_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	struct fun_dev *fdev = fp->fdev;
+	const struct fun_vport_info *vi = fun_get_vport(fdev, vf);
+
+	if (!vi)
+		return -EINVAL;
+
+	memset(ivi, 0, sizeof(*ivi));
+	ivi->vf = vf;
+	ether_addr_copy(ivi->mac, vi->mac);
+	ivi->vlan = vi->vlan;
+	ivi->qos = vi->qos;
+	ivi->vlan_proto = vi->vlan_proto;
+	ivi->max_tx_rate = vi->max_rate;
+	ivi->spoofchk = vi->spoofchk;
+	return 0;
+}
+
+static const struct net_device_ops fun_netdev_ops = {
+	.ndo_open		= funeth_open,
+	.ndo_stop		= funeth_close,
+	.ndo_start_xmit		= fun_start_xmit,
+	.ndo_get_stats64	= fun_get_stats64,
+	.ndo_change_mtu		= fun_change_mtu,
+	.ndo_set_mac_address	= fun_set_macaddr,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_do_ioctl		= fun_ioctl,
+	.ndo_uninit		= fun_free_queue_irqs,
+	.ndo_bpf		= fun_xdp,
+	.ndo_xdp_xmit		= fun_xdp_xmit_frames,
+	.ndo_set_vf_mac		= fun_set_vf_mac,
+	.ndo_set_vf_vlan	= fun_set_vf_vlan,
+	.ndo_set_vf_rate	= fun_set_vf_rate,
+	.ndo_get_vf_config	= fun_get_vf_config,
+	.ndo_get_devlink_port	= fun_get_devlink_port,
+};
+
+#define GSO_ENCAP_FLAGS (NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | \
+			 NETIF_F_GSO_IPXIP6 | NETIF_F_GSO_UDP_TUNNEL | \
+			 NETIF_F_GSO_UDP_TUNNEL_CSUM)
+#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+#define VLAN_FEAT (NETIF_F_SG | NETIF_F_HW_CSUM | TSO_FLAGS | \
+		   GSO_ENCAP_FLAGS | NETIF_F_HIGHDMA)
+
+static void fun_dflt_rss_indir(struct funeth_priv *fp, unsigned int nrx)
+{
+	unsigned int i;
+
+	for (i = 0; i < fp->indir_table_nentries; i++)
+		fp->indir_table[i] = ethtool_rxfh_indir_default(i, nrx);
+}
+
+/* Reset the RSS indirection table to equal distribution across the current
+ * number of Rx queues. Called at init time and whenever the number of Rx
+ * queues changes subsequently. Note that this may also resize the indirection
+ * table.
+ */
+void fun_reset_rss_indir(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+
+	if (!fp->rss_cfg)
+		return;
+
+	/* Set the table size to the max possible that allows an equal number
+	 * of occurrences of each CQ.
+	 */
+	fp->indir_table_nentries = rounddown(FUN_ETH_RSS_MAX_INDIR_ENT,
+					     dev->real_num_rx_queues);
+	fun_dflt_rss_indir(fp, dev->real_num_rx_queues);
+}
+
+/* Allocate the DMA area for the RSS configuration commands to the device, and
+ * initialize the hash, hash key, indirection table size and its entries to
+ * their defaults. The indirection table defaults to equal distribution across
+ * the Rx queues.
+ */
+static int fun_init_rss(struct net_device *dev)
+{
+	struct funeth_priv *fp = netdev_priv(dev);
+	size_t size = sizeof(fp->rss_key) + sizeof(fp->indir_table);
+
+	fp->rss_hw_id = FUN_HCI_ID_INVALID;
+	if (!(fp->port_caps & FUN_PORT_CAP_OFFLOADS))
+		return 0;
+
+	fp->rss_cfg = dma_alloc_coherent(&fp->pdev->dev, size,
+					 &fp->rss_dma_addr, GFP_KERNEL);
+	if (!fp->rss_cfg)
+		return -ENOMEM;
+
+	fp->hash_algo = FUN_ETH_RSS_ALG_TOEPLITZ;
+	netdev_rss_key_fill(fp->rss_key, sizeof(fp->rss_key));
+	fun_reset_rss_indir(dev);
+	return 0;
+}
+
+static void fun_free_rss(struct funeth_priv *fp)
+{
+	if (fp->rss_cfg) {
+		dma_free_coherent(&fp->pdev->dev,
+				  sizeof(fp->rss_key) + sizeof(fp->indir_table),
+				  fp->rss_cfg, fp->rss_dma_addr);
+		fp->rss_cfg = NULL;
+	}
+}
+
+static int fun_init_stats_area(struct funeth_priv *fp)
+{
+	unsigned int nstats;
+
+	if (!(fp->port_caps & FUN_PORT_CAP_STATS))
+		return 0;
+
+	nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX +
+		 PORT_MAC_FEC_STATS_MAX;
+
+	fp->stats = dma_alloc_coherent(&fp->pdev->dev, nstats * sizeof(u64),
+				       &fp->stats_dma_addr, GFP_KERNEL);
+	if (!fp->stats)
+		return -ENOMEM;
+	return 0;
+}
+
+static void fun_free_stats_area(struct funeth_priv *fp)
+{
+	unsigned int nstats;
+
+	if (fp->stats) {
+		nstats = PORT_MAC_RX_STATS_MAX + PORT_MAC_TX_STATS_MAX;
+		dma_free_coherent(&fp->pdev->dev, nstats * sizeof(u64),
+				  fp->stats, fp->stats_dma_addr);
+		fp->stats = NULL;
+	}
+}
+
+static int fun_dl_port_register(struct net_device *netdev)
+{
+	struct funeth_priv *fp = netdev_priv(netdev);
+	struct devlink *dl = priv_to_devlink(fp->fdev);
+	struct devlink_port_attrs attrs = {};
+
+	if (fp->port_caps & FUN_PORT_CAP_VPORT) {
+		attrs.flavour = DEVLINK_PORT_FLAVOUR_VIRTUAL;
+	} else {
+		attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL;
+		attrs.lanes = fp->lane_attrs & 7;
+		if (fp->lane_attrs & FUN_PORT_LANE_SPLIT) {
+			attrs.split = 1;
+			attrs.phys.split_subport_number = fp->lport & 3;
+		}
+	}
+	attrs.phys.port_number = fp->lport;
+
+	devlink_port_attrs_set(&fp->dl_port, &attrs);
+
+	return devlink_port_register(dl, &fp->dl_port, netdev->dev_port);
+}
+
+/* Determine the max Tx/Rx queues for a port. */
+static int fun_max_qs(struct fun_ethdev *ed, unsigned int *ntx,
+		      unsigned int *nrx)
+{
+	int neth;
+
+	if (ed->num_ports > 1 || is_kdump_kernel()) {
+		*ntx = 1;
+		*nrx = 1;
+		return 0;
+	}
+
+	neth = fun_get_res_count(&ed->fdev, FUN_ADMIN_OP_ETH);
+	if (neth < 0)
+		return neth;
+
+	/* We determine the max number of queues based on the CPU
+	 * cores, device interrupts and queues, RSS size, and device Tx flows.
+	 *
+	 * - At least 1 Rx and 1 Tx queues.
+	 * - At most 1 Rx/Tx queue per core.
+	 * - Each Rx/Tx queue needs 1 SQ.
+	 */
+	*ntx = min(ed->nsqs_per_port - 1, num_online_cpus());
+	*nrx = *ntx;
+	if (*ntx > neth)
+		*ntx = neth;
+	if (*nrx > FUN_ETH_RSS_MAX_INDIR_ENT)
+		*nrx = FUN_ETH_RSS_MAX_INDIR_ENT;
+	return 0;
+}
+
+static void fun_queue_defaults(struct net_device *dev, unsigned int nsqs)
+{
+	unsigned int ntx, nrx;
+
+	ntx = min(dev->num_tx_queues, FUN_DFLT_QUEUES);
+	nrx = min(dev->num_rx_queues, FUN_DFLT_QUEUES);
+	if (ntx <= nrx) {
+		ntx = min(ntx, nsqs / 2);
+		nrx = min(nrx, nsqs - ntx);
+	} else {
+		nrx = min(nrx, nsqs / 2);
+		ntx = min(ntx, nsqs - nrx);
+	}
+
+	netif_set_real_num_tx_queues(dev, ntx);
+	netif_set_real_num_rx_queues(dev, nrx);
+}
+
+static int fun_create_netdev(struct fun_ethdev *ed, unsigned int portid)
+{
+	struct fun_dev *fdev = &ed->fdev;
+	struct net_device *netdev;
+	unsigned int ntx, nrx;
+	struct funeth_priv *fp;
+	int rc;
+
+	rc = fun_max_qs(ed, &ntx, &nrx);
+	if (rc)
+		return rc;
+
+	netdev = alloc_etherdev_mqs(sizeof(*fp), ntx, nrx);
+	if (!netdev) {
+		rc = -ENOMEM;
+		goto done;
+	}
+
+	netdev->dev_port = portid;
+	fun_queue_defaults(netdev, ed->nsqs_per_port);
+
+	fp = netdev_priv(netdev);
+	fp->fdev = fdev;
+	fp->pdev = to_pci_dev(fdev->dev);
+	fp->netdev = netdev;
+	fp->msg_enable = msg_enable;
+	fp->ethid_start = portid;
+	seqcount_init(&fp->link_seq);
+
+	fp->lport = INVALID_LPORT;
+	rc = fun_port_create(netdev);
+	if (rc)
+		goto free_netdev;
+
+	/* bind port to admin CQ for async events */
+	rc = fun_bind(fdev, FUN_ADMIN_BIND_TYPE_PORT, portid,
+		      FUN_ADMIN_BIND_TYPE_EPCQ, 0);
+	if (rc)
+		goto destroy_port;
+
+	rc = fun_get_port_attributes(netdev);
+	if (rc)
+		goto destroy_port;
+
+	rc = fun_init_rss(netdev);
+	if (rc)
+		goto destroy_port;
+
+	rc = fun_init_stats_area(fp);
+	if (rc)
+		goto free_rss;
+
+	SET_NETDEV_DEV(netdev, fdev->dev);
+	netdev->netdev_ops = &fun_netdev_ops;
+
+	netdev->hw_features = NETIF_F_SG | NETIF_F_RXHASH | NETIF_F_RXCSUM;
+	if (fp->port_caps & FUN_PORT_CAP_OFFLOADS)
+		netdev->hw_features |= NETIF_F_HW_CSUM | TSO_FLAGS;
+	if (fp->port_caps & FUN_PORT_CAP_ENCAP_OFFLOADS)
+		netdev->hw_features |= GSO_ENCAP_FLAGS;
+
+	netdev->features |= netdev->hw_features | NETIF_F_HIGHDMA;
+	netdev->vlan_features = netdev->features & VLAN_FEAT;
+	netdev->mpls_features = netdev->vlan_features;
+	netdev->hw_enc_features = netdev->hw_features;
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = FUN_MAX_MTU;
+	netdev->watchdog_timeo = 15 * HZ;
+
+	fun_set_ethtool_ops(netdev);
+
+	/* configurable parameters */
+	fp->sq_depth = min(SQ_DEPTH, fdev->q_depth);
+	fp->cq_depth = min(CQ_DEPTH, fdev->q_depth);
+	fp->rq_depth = min_t(unsigned int, RQ_DEPTH, fdev->q_depth);
+	fp->rx_coal_usec  = CQ_INTCOAL_USEC;
+	fp->rx_coal_count = CQ_INTCOAL_NPKT;
+	fp->tx_coal_usec  = SQ_INTCOAL_USEC;
+	fp->tx_coal_count = SQ_INTCOAL_NPKT;
+	fp->cq_irq_db = FUN_IRQ_CQ_DB(fp->rx_coal_usec, fp->rx_coal_count);
+
+	rc = fun_dl_port_register(netdev);
+	if (rc)
+		goto free_stats;
+
+	fp->ktls_id = FUN_HCI_ID_INVALID;
+	fun_ktls_init(netdev);            /* optional, failure OK */
+
+	netif_carrier_off(netdev);
+	ed->netdevs[portid] = netdev;
+	rc = register_netdev(netdev);
+	if (rc)
+		goto unreg_devlink;
+
+	if (fp->dl_port.devlink)
+		devlink_port_type_eth_set(&fp->dl_port, netdev);
+
+	return 0;
+
+unreg_devlink:
+	ed->netdevs[portid] = NULL;
+	fun_ktls_cleanup(fp);
+	if (fp->dl_port.devlink)
+		devlink_port_unregister(&fp->dl_port);
+free_stats:
+	fun_free_stats_area(fp);
+free_rss:
+	fun_free_rss(fp);
+destroy_port:
+	fun_port_destroy(netdev);
+free_netdev:
+	free_netdev(netdev);
+done:
+	dev_err(fdev->dev, "couldn't allocate port %u, error %d", portid, rc);
+	return rc;
+}
+
+static void fun_destroy_netdev(struct net_device *netdev)
+{
+	if (likely(netdev)) {
+		struct funeth_priv *fp = netdev_priv(netdev);
+
+		if (fp->dl_port.devlink) {
+			devlink_port_type_clear(&fp->dl_port);
+			devlink_port_unregister(&fp->dl_port);
+		}
+		unregister_netdev(netdev);
+		fun_ktls_cleanup(fp);
+		fun_free_stats_area(fp);
+		fun_free_rss(fp);
+		fun_port_destroy(netdev);
+		free_netdev(netdev);
+	}
+}
+
+static int fun_create_ports(struct fun_ethdev *ed, unsigned int nports)
+{
+	struct fun_dev *fd = &ed->fdev;
+	int i, rc;
+
+	/* The admin queue takes 1 IRQ and 2 SQs. */
+	ed->nsqs_per_port = min(fd->num_irqs - 1,
+				fd->kern_end_qid - 2) / nports;
+	if (ed->nsqs_per_port < 2) {
+		dev_err(fd->dev, "Too few SQs for %u ports", nports);
+		return -EINVAL;
+	}
+
+	ed->netdevs = kcalloc(nports, sizeof(*ed->netdevs), GFP_KERNEL);
+	if (!ed->netdevs)
+		return -ENOMEM;
+
+	ed->num_ports = nports;
+	for (i = 0; i < nports; i++) {
+		rc = fun_create_netdev(ed, i);
+		if (rc)
+			goto free_netdevs;
+	}
+
+	return 0;
+
+free_netdevs:
+	while (i)
+		fun_destroy_netdev(ed->netdevs[--i]);
+	kfree(ed->netdevs);
+	ed->netdevs = NULL;
+	ed->num_ports = 0;
+	return rc;
+}
+
+static void fun_destroy_ports(struct fun_ethdev *ed)
+{
+	unsigned int i;
+
+	for (i = 0; i < ed->num_ports; i++)
+		fun_destroy_netdev(ed->netdevs[i]);
+
+	kfree(ed->netdevs);
+	ed->netdevs = NULL;
+	ed->num_ports = 0;
+}
+
+static void fun_update_link_state(const struct fun_ethdev *ed,
+				  const struct fun_admin_port_notif *notif)
+{
+	unsigned int port_idx = be16_to_cpu(notif->id);
+	struct net_device *netdev;
+	struct funeth_priv *fp;
+
+	if (port_idx >= ed->num_ports)
+		return;
+
+	netdev = ed->netdevs[port_idx];
+	fp = netdev_priv(netdev);
+
+	write_seqcount_begin(&fp->link_seq);
+	fp->link_speed = be32_to_cpu(notif->speed) * 10;  /* 10 Mbps->Mbps */
+	fp->active_fc = notif->flow_ctrl;
+	fp->active_fec = notif->fec;
+	fp->xcvr_type = notif->xcvr_type;
+	fp->link_down_reason = notif->link_down_reason;
+	fp->lp_advertising = be64_to_cpu(notif->lp_advertising);
+
+	if ((notif->link_state | notif->missed_events) & FUN_PORT_FLAG_MAC_DOWN)
+		netif_carrier_off(netdev);
+	if (notif->link_state & FUN_PORT_FLAG_NH_DOWN)
+		netif_dormant_on(netdev);
+	if (notif->link_state & FUN_PORT_FLAG_NH_UP)
+		netif_dormant_off(netdev);
+	if (notif->link_state & FUN_PORT_FLAG_MAC_UP)
+		netif_carrier_on(netdev);
+
+	write_seqcount_end(&fp->link_seq);
+	fun_report_link(netdev);
+}
+
+/* handler for async events delivered through the admin CQ */
+static void fun_event_cb(struct fun_dev *fdev, void *entry)
+{
+	u8 op = ((struct fun_admin_rsp_common *)entry)->op;
+
+	if (op == FUN_ADMIN_OP_PORT) {
+		const struct fun_admin_port_notif *rsp = entry;
+
+		if (rsp->subop == FUN_ADMIN_SUBOP_NOTIFY) {
+			fun_update_link_state(to_fun_ethdev(fdev), rsp);
+		} else if (rsp->subop == FUN_ADMIN_SUBOP_RES_COUNT) {
+			const struct fun_admin_res_count_rsp *r = entry;
+
+			if (r->count.data)
+				set_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags);
+			else
+				set_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags);
+			fun_serv_sched(fdev);
+		} else {
+			dev_info(fdev->dev, "adminq event unexpected op %u subop %u",
+				 op, rsp->subop);
+		}
+	} else {
+		dev_info(fdev->dev, "adminq event unexpected op %u", op);
+	}
+}
+
+/* handler for pending work managed by the service task */
+static void fun_service_cb(struct fun_dev *fdev)
+{
+	struct fun_ethdev *ed = to_fun_ethdev(fdev);
+	int rc;
+
+	if (test_and_clear_bit(FUN_SERV_DEL_PORTS, &fdev->service_flags))
+		fun_destroy_ports(ed);
+
+	if (!test_and_clear_bit(FUN_SERV_RES_CHANGE, &fdev->service_flags))
+		return;
+
+	rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
+	if (rc < 0 || rc == ed->num_ports)
+		return;
+
+	if (ed->num_ports)
+		fun_destroy_ports(ed);
+	if (rc)
+		fun_create_ports(ed, rc);
+}
+
+static int funeth_sriov_configure(struct pci_dev *pdev, int nvfs)
+{
+	struct fun_dev *fdev = pci_get_drvdata(pdev);
+	struct fun_ethdev *ed = to_fun_ethdev(fdev);
+	int rc;
+
+	if (nvfs == 0) {
+		if (pci_vfs_assigned(pdev)) {
+			dev_warn(&pdev->dev,
+				 "Cannot disable SR-IOV while VFs are assigned\n");
+			return -EPERM;
+		}
+
+		pci_disable_sriov(pdev);
+		fun_free_vports(ed);
+		return 0;
+	}
+
+	rc = fun_init_vports(ed, nvfs);
+	if (rc)
+		return rc;
+
+	rc = pci_enable_sriov(pdev, nvfs);
+	if (rc) {
+		fun_free_vports(ed);
+		return rc;
+	}
+
+	return nvfs;
+}
+
+static int funeth_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct devlink *devlink;
+	struct fun_ethdev *ed;
+	struct fun_dev *fdev;
+	int rc;
+
+	struct fun_dev_params aqreq = {
+		.cqe_size_log2 = ilog2(ADMIN_CQE_SIZE),
+		.sqe_size_log2 = ilog2(ADMIN_SQE_SIZE),
+		.cq_depth      = ADMIN_CQ_DEPTH,
+		.sq_depth      = ADMIN_SQ_DEPTH,
+		.rq_depth      = ADMIN_RQ_DEPTH,
+		.min_msix      = 2,              /* 1 Rx + 1 Tx */
+		.event_cb      = fun_event_cb,
+		.serv_cb       = fun_service_cb,
+	};
+
+	devlink = fun_devlink_alloc(&pdev->dev);
+	if (!devlink) {
+		dev_err(&pdev->dev, "devlink alloc failed\n");
+		return -ENOMEM;
+	}
+
+	ed = devlink_priv(devlink);
+
+	fdev = &ed->fdev;
+	rc = fun_dev_enable(fdev, pdev, &aqreq, KBUILD_MODNAME);
+	if (rc)
+		goto free_devlink;
+
+	rc = fun_get_res_count(fdev, FUN_ADMIN_OP_PORT);
+	if (rc > 0)
+		rc = fun_create_ports(ed, rc);
+	if (rc < 0)
+		goto disable_dev;
+
+	fun_serv_restart(fdev);
+	fun_devlink_register(devlink);
+	return 0;
+
+disable_dev:
+	fun_dev_disable(fdev);
+free_devlink:
+	fun_devlink_free(devlink);
+	return rc;
+}
+
+static void __funeth_remove(struct pci_dev *pdev)
+{
+	struct fun_dev *fdev = pci_get_drvdata(pdev);
+	struct devlink *devlink;
+	struct fun_ethdev *ed;
+
+	if (!fdev)
+		return;
+
+	ed = to_fun_ethdev(fdev);
+	devlink = priv_to_devlink(ed);
+	fun_devlink_unregister(devlink);
+
+#ifdef CONFIG_PCI_IOV
+	funeth_sriov_configure(pdev, 0);
+#endif
+
+	fun_serv_stop(fdev);
+	fun_destroy_ports(ed);
+	fun_dev_disable(fdev);
+
+	fun_devlink_free(devlink);
+}
+
+static void funeth_remove(struct pci_dev *pdev)
+{
+	__funeth_remove(pdev);
+}
+
+static void funeth_shutdown(struct pci_dev *pdev)
+{
+	__funeth_remove(pdev);
+}
+
+static struct pci_driver funeth_driver = {
+	.name		 = KBUILD_MODNAME,
+	.id_table	 = funeth_id_table,
+	.probe		 = funeth_probe,
+	.remove		 = funeth_remove,
+	.shutdown	 = funeth_shutdown,
+	.sriov_configure = funeth_sriov_configure,
+};
+
+static int __init funeth_init(void)
+{
+	int ret;
+
+	ret = pci_register_driver(&funeth_driver);
+	if (ret) {
+		pr_err("%s pci_register_driver failed ret %d\n",
+		       KBUILD_MODNAME, ret);
+	}
+	return ret;
+}
+
+static void __exit funeth_exit(void)
+{
+	pci_unregister_driver(&funeth_driver);
+}
+
+module_init(funeth_init);
+module_exit(funeth_exit);
+
+MODULE_AUTHOR("Dimitris Michailidis <dmichail@fungible.com>");
+MODULE_DESCRIPTION("Fungible Ethernet Network Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DEVICE_TABLE(pci, funeth_id_table);