diff mbox series

[net-next] net: fec: add CBS offload support

Message ID 20230209092422.1655062-1-wei.fang@nxp.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [net-next] net: fec: add CBS offload support | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 9 of 9 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Wei Fang Feb. 9, 2023, 9:24 a.m. UTC
From: Wei Fang <wei.fang@nxp.com>

The FEC hardware supports the Credit-based shaper (CBS) which control
the bandwidth distribution between normal traffic and time-sensitive
traffic with respect to the total link bandwidth available.
But notice that the bandwidth allocation of hardware is restricted to
certain values. Below is the equation which is used to calculate the
BW (bandwidth) fraction for per class:
	BW fraction = 1 / (1 + 512 / idle_slope)

For values of idle_slope less than 128, idle_slope = 2 ^ n, when n =
0,1,2,...,6. For values equal to or greater than 128, idle_slope =
128 * m, where m = 1,2,3,...,12.
Example 1. idle_slope = 64, therefore BW fraction = 0.111.
Example 2. idle_slope = 128, therefore BW fraction = 0.200.

Here is an example command to set 200Mbps bandwidth on 1000Mbps port
for TC 2 and 111Mbps for TC 3.
tc qdisc add dev eth0 parent root handle 100 mqprio num_tc 3 map \
0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@0 1@1 1@2 hw 0
tc qdisc replace dev eth0 parent 100:2 cbs idleslope 200000 \
sendslope -800000 hicredit 153 locredit -1389 offload 1
tc qdisc replace dev eth0 parent 100:3 cbs idleslope 111000 \
sendslope -889000 hicredit 90 locredit -892 offload 1

Signed-off-by: Wei Fang <wei.fang@nxp.com>
---
 drivers/net/ethernet/freescale/fec.h      |   4 +
 drivers/net/ethernet/freescale/fec_main.c | 106 ++++++++++++++++++++++
 2 files changed, 110 insertions(+)

Comments

Simon Horman Feb. 9, 2023, 4:14 p.m. UTC | #1
On Thu, Feb 09, 2023 at 05:24:22PM +0800, wei.fang@nxp.com wrote:
> From: Wei Fang <wei.fang@nxp.com>
> 
> The FEC hardware supports the Credit-based shaper (CBS) which control
> the bandwidth distribution between normal traffic and time-sensitive
> traffic with respect to the total link bandwidth available.
> But notice that the bandwidth allocation of hardware is restricted to
> certain values. Below is the equation which is used to calculate the
> BW (bandwidth) fraction for per class:
> 	BW fraction = 1 / (1 + 512 / idle_slope)
> 
> For values of idle_slope less than 128, idle_slope = 2 ^ n, when n =
> 0,1,2,...,6. For values equal to or greater than 128, idle_slope =
> 128 * m, where m = 1,2,3,...,12.
> Example 1. idle_slope = 64, therefore BW fraction = 0.111.
> Example 2. idle_slope = 128, therefore BW fraction = 0.200.
> 
> Here is an example command to set 200Mbps bandwidth on 1000Mbps port
> for TC 2 and 111Mbps for TC 3.
> tc qdisc add dev eth0 parent root handle 100 mqprio num_tc 3 map \
> 0 0 2 1 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@0 1@1 1@2 hw 0
> tc qdisc replace dev eth0 parent 100:2 cbs idleslope 200000 \
> sendslope -800000 hicredit 153 locredit -1389 offload 1
> tc qdisc replace dev eth0 parent 100:3 cbs idleslope 111000 \
> sendslope -889000 hicredit 90 locredit -892 offload 1
> 
> Signed-off-by: Wei Fang <wei.fang@nxp.com>
> ---
>  drivers/net/ethernet/freescale/fec.h      |   4 +
>  drivers/net/ethernet/freescale/fec_main.c | 106 ++++++++++++++++++++++
>  2 files changed, 110 insertions(+)
> 
> diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
> index 5ba1e0d71c68..ad5f968aa086 100644
> --- a/drivers/net/ethernet/freescale/fec.h
> +++ b/drivers/net/ethernet/freescale/fec.h
> @@ -340,6 +340,10 @@ struct bufdesc_ex {
>  #define RCMR_CMP(X)		(((X) == 1) ? RCMR_CMP_1 : RCMR_CMP_2)
>  #define FEC_TX_BD_FTYPE(X)	(((X) & 0xf) << 20)
>  
> +#define FEC_QOS_TX_SHEME_MASK	GENMASK(2, 0)
> +#define CREDIT_BASED_SCHEME	0
> +#define ROUND_ROBIN_SCHEME	1
> +
>  /* The number of Tx and Rx buffers.  These are allocated from the page
>   * pool.  The code may assume these are power of two, so it it best
>   * to keep them that size.
> diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
> index c73e25f8995e..3bb3a071fa0c 100644
> --- a/drivers/net/ethernet/freescale/fec_main.c
> +++ b/drivers/net/ethernet/freescale/fec_main.c
> @@ -66,6 +66,7 @@
>  #include <linux/mfd/syscon.h>
>  #include <linux/regmap.h>
>  #include <soc/imx/cpuidle.h>
> +#include <net/pkt_sched.h>
>  #include <linux/filter.h>
>  #include <linux/bpf.h>
>  
> @@ -3232,6 +3233,110 @@ static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
>  	return phy_mii_ioctl(phydev, rq, cmd);
>  }
>  
> +static u32 fec_enet_get_idle_slope(u8 bw)
> +{
> +	u32 idle_slope, quotient, msb;
> +
> +	/* Convert bw to hardware idle slope */
> +	idle_slope = (512 * bw) / (100 - bw);
> +
> +	if (idle_slope >= 128) {
> +		/* For values equal to or greater than 128, idle_slope = 128 * m,
> +		 * where m = 1, 2, 3, ...12. Here we use the rounding method.
> +		 */

	Perhaps the following would be clearer?

	 For values greater than or equal to 128,
	 idle_slope is rounded to the nearest multiple of 128.

> +		quotient = idle_slope / 128;
> +		if (idle_slope >= quotient * 128 + 64)
> +			idle_slope = 128 * (quotient + 1);
> +		else
> +			idle_slope = 128 * quotient;

	Maybe there is a helper that does this, but if
	not, perhaps:

	idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;


> +
> +		goto end;

Maybe return here

> +	}

Or an else here is nicer?

> +
> +	/* For values less than 128, idle_slope = 2 ^ n, where

	Perhaps the following would be clearer?

	 For values less than 128, idle_slope is rounded
	 to the nearest power of 2.

> +	 * n = 0, 1, 2, ...6. Here we use the rounding method.

         n is 7 for input idle_slope around 128 (2^7)

> +	 * So the minimum of idle_slope is 1.
> +	 */
> +	msb = fls(idle_slope);
> +
> +	if (msb == 0 || msb == 1) {
> +		idle_slope = 1;
> +		goto end;
> +	}

nit: maybe this is nicer

	if (msb <= 1)
		return 1;

> +
> +	msb -= 1;
> +	if (idle_slope >= (1 << msb) + (1 << (msb - 1)))
> +		idle_slope = 1 << (msb + 1);
> +	else
> +		idle_slope = 1 << msb;

	In the same vein as the suggestion for the >= 128 case, perhaps:

	u32 d;

	d = BIT(fls(idle_slope));
	idle_slope = DIV_ROUND_CLOSEST(idle_slope, d) * d;

> +
> +end:
> +	return idle_slope;
> +}
> +
> +static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
> +{
> +	struct fec_enet_private *fep = netdev_priv(ndev);
> +	struct tc_cbs_qopt_offload *cbs = type_data;
> +	int queue =  cbs->queue;

nit: extra space after '='

> +	u32 speed = fep->speed;
> +	u32 val, idle_slope;
> +	u8 bw;
> +
> +	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
> +		return -EOPNOTSUPP;
> +
> +	/* Queue 1 for Class A, Queue 2 for Class B, so the ENET must has

nit: s/has/have/

> +	 * three queues.
> +	 */
> +	if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
> +		return -EOPNOTSUPP;
> +
> +	/* Queue 0 is not AVB capable */
> +	if (queue <= 0 || queue >= fep->num_tx_queues)
> +		return -EINVAL;
> +
> +	val = readl(fep->hwp + FEC_QOS_SCHEME);
> +	val &= ~FEC_QOS_TX_SHEME_MASK;
> +	if (!cbs->enable) {
> +		val |= ROUND_ROBIN_SCHEME;
> +		writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> +		return 0;
> +	}
> +
> +	val |= CREDIT_BASED_SCHEME;
> +	writel(val, fep->hwp + FEC_QOS_SCHEME);
> +
> +	/* cbs->idleslope is in kilobits per second. speed is the port rate
> +	 * in megabits per second. So bandwidth ratio bw = (idleslope /
> +	 * (speed * 1000)) * 100, the unit is percentage.
> +	 */

suggestion:

	/* cbs->idleslope is in kilobits per second.
	 * Speed is the port rate in megabits per second.
	 * So bandwidth the ratio, bw, is (idleslope / (speed * 1000)) * 100.
	 * The unit of bw is a percentage.
	 */

> +	bw = cbs->idleslope / (speed * 10UL);
> +	/* bw% can not >= 100% */
> +	if (bw >= 100)
> +		return -EINVAL;

nit: maybe the above calculation and check fits better inside
     fec_enet_get_idle_slope()

> +	idle_slope = fec_enet_get_idle_slope(bw);
> +
> +	val = readl(fep->hwp + FEC_DMA_CFG(queue));
> +	val &= ~IDLE_SLOPE_MASK;
> +	val |= idle_slope & IDLE_SLOPE_MASK;
> +	writel(val, fep->hwp + FEC_DMA_CFG(queue));
> +
> +	return 0;
> +}
> +
> +static int fec_enet_setup_tc(struct net_device *ndev, enum tc_setup_type type,
> +			     void *type_data)
> +{
> +	switch (type) {
> +	case TC_SETUP_QDISC_CBS:
> +		return fec_enet_setup_tc_cbs(ndev, type_data);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
>  static void fec_enet_free_buffers(struct net_device *ndev)
>  {
>  	struct fec_enet_private *fep = netdev_priv(ndev);
> @@ -3882,6 +3987,7 @@ static const struct net_device_ops fec_netdev_ops = {
>  	.ndo_tx_timeout		= fec_timeout,
>  	.ndo_set_mac_address	= fec_set_mac_address,
>  	.ndo_eth_ioctl		= fec_enet_ioctl,
> +	.ndo_setup_tc	= fec_enet_setup_tc,
>  #ifdef CONFIG_NET_POLL_CONTROLLER
>  	.ndo_poll_controller	= fec_poll_controller,
>  #endif
> -- 
> 2.25.1
>
Andrew Lunn Feb. 9, 2023, 8:38 p.m. UTC | #2
> +	/* cbs->idleslope is in kilobits per second. speed is the port rate
> +	 * in megabits per second. So bandwidth ratio bw = (idleslope /
> +	 * (speed * 1000)) * 100, the unit is percentage.
> +	 */
> +	bw = cbs->idleslope / (speed * 10UL);

This appears to be a / 0 when the link is not up yet?  Also, if the
link goes does, fep->speed keeps the old value, so if it comes up
again at a different speed, your calculations are all wrong. So i
think you need fec_enet_adjust_link() involved in this.


> +	/* bw% can not >= 100% */
> +	if (bw >= 100)
> +		return -EINVAL;

Well > 100% could happen when the link goes from 1G to 10Half, or even
100Full. You should probably document the policy of what you do
then. Do you dedicate all the available bandwidth to the high priority
queue, or do you go back to best effort? Is it possible to indicate in
the tc show command that the configuration is no longer possible?

Presumably other drivers have already addressed all these issues, so
you just need to find out what they do.

    Andrew
Wei Fang Feb. 10, 2023, 7:24 a.m. UTC | #3
> -----Original Message-----
> From: Simon Horman <simon.horman@corigine.com>
> Sent: 2023年2月10日 0:14
> To: Wei Fang <wei.fang@nxp.com>
> Cc: Shenwei Wang <shenwei.wang@nxp.com>; Clark Wang
> <xiaoning.wang@nxp.com>; davem@davemloft.net; edumazet@google.com;
> kuba@kernel.org; pabeni@redhat.com; netdev@vger.kernel.org; dl-linux-imx
> <linux-imx@nxp.com>; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next] net: fec: add CBS offload support
> 
> On Thu, Feb 09, 2023 at 05:24:22PM +0800, wei.fang@nxp.com wrote:
> > From: Wei Fang <wei.fang@nxp.com>
> >
> > +
> > +	if (idle_slope >= 128) {
> > +		/* For values equal to or greater than 128, idle_slope = 128 * m,
> > +		 * where m = 1, 2, 3, ...12. Here we use the rounding method.
> > +		 */
> 
> 	Perhaps the following would be clearer?
> 
> 	 For values greater than or equal to 128,
> 	 idle_slope is rounded to the nearest multiple of 128.
> 
> > +		quotient = idle_slope / 128;
> > +		if (idle_slope >= quotient * 128 + 64)
> > +			idle_slope = 128 * (quotient + 1);
> > +		else
> > +			idle_slope = 128 * quotient;
> 
> 	Maybe there is a helper that does this, but if
> 	not, perhaps:
> 
> 	idle_slope = DIV_ROUND_CLOSEST(idle_slope, 128U) * 128U;
> 
> 
> > +
> > +		goto end;
> 
> Maybe return here
> 
> > +	}
> 
> Or an else here is nicer?
> 
> > +
> > +	/* For values less than 128, idle_slope = 2 ^ n, where
> 
> 	Perhaps the following would be clearer?
> 
> 	 For values less than 128, idle_slope is rounded
> 	 to the nearest power of 2.
> 
> > +	 * n = 0, 1, 2, ...6. Here we use the rounding method.
> 
>          n is 7 for input idle_slope around 128 (2^7)
> 
> > +	 * So the minimum of idle_slope is 1.
> > +	 */
> > +	msb = fls(idle_slope);
> > +
> > +	if (msb == 0 || msb == 1) {
> > +		idle_slope = 1;
> > +		goto end;
> > +	}
> 
> nit: maybe this is nicer
> 
> 	if (msb <= 1)
> 		return 1;
> 
> > +
> > +	msb -= 1;
> > +	if (idle_slope >= (1 << msb) + (1 << (msb - 1)))
> > +		idle_slope = 1 << (msb + 1);
> > +	else
> > +		idle_slope = 1 << msb;
> 
> 	In the same vein as the suggestion for the >= 128 case, perhaps:
> 
> 	u32 d;
> 
> 	d = BIT(fls(idle_slope));
> 	idle_slope = DIV_ROUND_CLOSEST(idle_slope, d) * d;
> 
> > +
> > +end:
> > +	return idle_slope;
> > +}
> > +
> > +static int fec_enet_setup_tc_cbs(struct net_device *ndev, void
> > +*type_data) {
> > +	struct fec_enet_private *fep = netdev_priv(ndev);
> > +	struct tc_cbs_qopt_offload *cbs = type_data;
> > +	int queue =  cbs->queue;
> 
> nit: extra space after '='
> 
> > +	u32 speed = fep->speed;
> > +	u32 val, idle_slope;
> > +	u8 bw;
> > +
> > +	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
> > +		return -EOPNOTSUPP;
> > +
> > +	/* Queue 1 for Class A, Queue 2 for Class B, so the ENET must has
> 
> nit: s/has/have/
> 
> > +	 * three queues.
> > +	 */
> > +	if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
> > +		return -EOPNOTSUPP;
> > +
> > +	/* Queue 0 is not AVB capable */
> > +	if (queue <= 0 || queue >= fep->num_tx_queues)
> > +		return -EINVAL;
> > +
> > +	val = readl(fep->hwp + FEC_QOS_SCHEME);
> > +	val &= ~FEC_QOS_TX_SHEME_MASK;
> > +	if (!cbs->enable) {
> > +		val |= ROUND_ROBIN_SCHEME;
> > +		writel(val, fep->hwp + FEC_QOS_SCHEME);
> > +
> > +		return 0;
> > +	}
> > +
> > +	val |= CREDIT_BASED_SCHEME;
> > +	writel(val, fep->hwp + FEC_QOS_SCHEME);
> > +
> > +	/* cbs->idleslope is in kilobits per second. speed is the port rate
> > +	 * in megabits per second. So bandwidth ratio bw = (idleslope /
> > +	 * (speed * 1000)) * 100, the unit is percentage.
> > +	 */
> 
> suggestion:
> 
> 	/* cbs->idleslope is in kilobits per second.
> 	 * Speed is the port rate in megabits per second.
> 	 * So bandwidth the ratio, bw, is (idleslope / (speed * 1000)) * 100.
> 	 * The unit of bw is a percentage.
> 	 */
> 
> > +	bw = cbs->idleslope / (speed * 10UL);
> > +	/* bw% can not >= 100% */
> > +	if (bw >= 100)
> > +		return -EINVAL;
> 
> nit: maybe the above calculation and check fits better inside
>      fec_enet_get_idle_slope()
> 

Your suggestions are very helpful, I'll amend the patch in v2.
Thanks a lot.
Wei Fang Feb. 10, 2023, 8:04 a.m. UTC | #4
> -----Original Message-----
> From: Andrew Lunn <andrew@lunn.ch>
> Sent: 2023年2月10日 4:38
> To: Wei Fang <wei.fang@nxp.com>
> Cc: Shenwei Wang <shenwei.wang@nxp.com>; Clark Wang
> <xiaoning.wang@nxp.com>; davem@davemloft.net; edumazet@google.com;
> kuba@kernel.org; pabeni@redhat.com; netdev@vger.kernel.org; dl-linux-imx
> <linux-imx@nxp.com>; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH net-next] net: fec: add CBS offload support
> 
> > +	/* cbs->idleslope is in kilobits per second. speed is the port rate
> > +	 * in megabits per second. So bandwidth ratio bw = (idleslope /
> > +	 * (speed * 1000)) * 100, the unit is percentage.
> > +	 */
> > +	bw = cbs->idleslope / (speed * 10UL);
> 
> This appears to be a / 0 when the link is not up yet?  Also, if the link goes
> does, fep->speed keeps the old value, so if it comes up again at a different
> speed, your calculations are all wrong. So i think you need
> fec_enet_adjust_link() involved in this.
> 
Yes, speed = 0 is indeed a problem, we should check the value first.
For speed change, I'll think about how to handle this situation.

> 
> > +	/* bw% can not >= 100% */
> > +	if (bw >= 100)
> > +		return -EINVAL;
> 
> Well > 100% could happen when the link goes from 1G to 10Half, or even
> 100Full. You should probably document the policy of what you do then. Do
> you dedicate all the available bandwidth to the high priority queue, or do you
> go back to best effort? 
Actually, the FEC has always used the credit-based shaper by default. So I think
it's better to fall back the default setting and return error if the bw > 100%.

>Is it possible to indicate in the tc show command that
> the configuration is no longer possible?
> 
Sorry, I have no knowledge about the tc show command.

> Presumably other drivers have already addressed all these issues, so you just
> need to find out what they do.
> 
>     Andrew
diff mbox series

Patch

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 5ba1e0d71c68..ad5f968aa086 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -340,6 +340,10 @@  struct bufdesc_ex {
 #define RCMR_CMP(X)		(((X) == 1) ? RCMR_CMP_1 : RCMR_CMP_2)
 #define FEC_TX_BD_FTYPE(X)	(((X) & 0xf) << 20)
 
+#define FEC_QOS_TX_SHEME_MASK	GENMASK(2, 0)
+#define CREDIT_BASED_SCHEME	0
+#define ROUND_ROBIN_SCHEME	1
+
 /* The number of Tx and Rx buffers.  These are allocated from the page
  * pool.  The code may assume these are power of two, so it it best
  * to keep them that size.
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c73e25f8995e..3bb3a071fa0c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -66,6 +66,7 @@ 
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <soc/imx/cpuidle.h>
+#include <net/pkt_sched.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 
@@ -3232,6 +3233,110 @@  static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
 	return phy_mii_ioctl(phydev, rq, cmd);
 }
 
+static u32 fec_enet_get_idle_slope(u8 bw)
+{
+	u32 idle_slope, quotient, msb;
+
+	/* Convert bw to hardware idle slope */
+	idle_slope = (512 * bw) / (100 - bw);
+
+	if (idle_slope >= 128) {
+		/* For values equal to or greater than 128, idle_slope = 128 * m,
+		 * where m = 1, 2, 3, ...12. Here we use the rounding method.
+		 */
+		quotient = idle_slope / 128;
+		if (idle_slope >= quotient * 128 + 64)
+			idle_slope = 128 * (quotient + 1);
+		else
+			idle_slope = 128 * quotient;
+
+		goto end;
+	}
+
+	/* For values less than 128, idle_slope = 2 ^ n, where
+	 * n = 0, 1, 2, ...6. Here we use the rounding method.
+	 * So the minimum of idle_slope is 1.
+	 */
+	msb = fls(idle_slope);
+
+	if (msb == 0 || msb == 1) {
+		idle_slope = 1;
+		goto end;
+	}
+
+	msb -= 1;
+	if (idle_slope >= (1 << msb) + (1 << (msb - 1)))
+		idle_slope = 1 << (msb + 1);
+	else
+		idle_slope = 1 << msb;
+
+end:
+	return idle_slope;
+}
+
+static int fec_enet_setup_tc_cbs(struct net_device *ndev, void *type_data)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct tc_cbs_qopt_offload *cbs = type_data;
+	int queue =  cbs->queue;
+	u32 speed = fep->speed;
+	u32 val, idle_slope;
+	u8 bw;
+
+	if (!(fep->quirks & FEC_QUIRK_HAS_AVB))
+		return -EOPNOTSUPP;
+
+	/* Queue 1 for Class A, Queue 2 for Class B, so the ENET must has
+	 * three queues.
+	 */
+	if (fep->num_tx_queues != FEC_ENET_MAX_TX_QS)
+		return -EOPNOTSUPP;
+
+	/* Queue 0 is not AVB capable */
+	if (queue <= 0 || queue >= fep->num_tx_queues)
+		return -EINVAL;
+
+	val = readl(fep->hwp + FEC_QOS_SCHEME);
+	val &= ~FEC_QOS_TX_SHEME_MASK;
+	if (!cbs->enable) {
+		val |= ROUND_ROBIN_SCHEME;
+		writel(val, fep->hwp + FEC_QOS_SCHEME);
+
+		return 0;
+	}
+
+	val |= CREDIT_BASED_SCHEME;
+	writel(val, fep->hwp + FEC_QOS_SCHEME);
+
+	/* cbs->idleslope is in kilobits per second. speed is the port rate
+	 * in megabits per second. So bandwidth ratio bw = (idleslope /
+	 * (speed * 1000)) * 100, the unit is percentage.
+	 */
+	bw = cbs->idleslope / (speed * 10UL);
+	/* bw% can not >= 100% */
+	if (bw >= 100)
+		return -EINVAL;
+	idle_slope = fec_enet_get_idle_slope(bw);
+
+	val = readl(fep->hwp + FEC_DMA_CFG(queue));
+	val &= ~IDLE_SLOPE_MASK;
+	val |= idle_slope & IDLE_SLOPE_MASK;
+	writel(val, fep->hwp + FEC_DMA_CFG(queue));
+
+	return 0;
+}
+
+static int fec_enet_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			     void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_CBS:
+		return fec_enet_setup_tc_cbs(ndev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static void fec_enet_free_buffers(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
@@ -3882,6 +3987,7 @@  static const struct net_device_ops fec_netdev_ops = {
 	.ndo_tx_timeout		= fec_timeout,
 	.ndo_set_mac_address	= fec_set_mac_address,
 	.ndo_eth_ioctl		= fec_enet_ioctl,
+	.ndo_setup_tc	= fec_enet_setup_tc,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= fec_poll_controller,
 #endif