diff mbox series

[RFC,1/3] veth: implement support for set_channel ethtool op

Message ID 681c32be3a9172e9468893a89fb928b46c5c5ee6.1625823139.git.pabeni@redhat.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series veth: more flexible channels number configuration | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Guessed tree name to be net-next
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cc_maintainers warning 5 maintainers not CCed: hawk@kernel.org daniel@iogearbox.net bpf@vger.kernel.org ast@kernel.org john.fastabend@gmail.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit fail Errors and warnings before: 0 this patch: 3
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 82 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 88 exceeds 80 columns
netdev/build_allmodconfig_warn fail Errors and warnings before: 0 this patch: 3
netdev/header_inline success Link

Commit Message

Paolo Abeni July 9, 2021, 9:39 a.m. UTC
This change implements the set_channel() ethtool operation,
preserving the current defaults values and allowing up set
the number of queues in the range set ad device creation
time.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/veth.c | 62 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 4 deletions(-)

Comments

Toke Høiland-Jørgensen July 9, 2021, 10:15 a.m. UTC | #1
Paolo Abeni <pabeni@redhat.com> writes:

> This change implements the set_channel() ethtool operation,
> preserving the current defaults values and allowing up set
> the number of queues in the range set ad device creation
> time.
>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
>  drivers/net/veth.c | 62 +++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 58 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/veth.c b/drivers/net/veth.c
> index bdb7ce3cb054..10360228a06a 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -72,6 +72,8 @@ struct veth_priv {
>  	atomic64_t		dropped;
>  	struct bpf_prog		*_xdp_prog;
>  	struct veth_rq		*rq;
> +	unsigned int		num_tx_queues;
> +	unsigned int		num_rx_queues;

Why are these needed to be duplicated? Don't they just duplicate the
'real_num_tx_queues' members in struct net_device?

>  	unsigned int		requested_headroom;
>  };
>  
> @@ -224,10 +226,49 @@ static void veth_get_channels(struct net_device *dev,
>  {
>  	channels->tx_count = dev->real_num_tx_queues;
>  	channels->rx_count = dev->real_num_rx_queues;
> -	channels->max_tx = dev->real_num_tx_queues;
> -	channels->max_rx = dev->real_num_rx_queues;
> +	channels->max_tx = dev->num_tx_queues;
> +	channels->max_rx = dev->num_rx_queues;
>  	channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
> -	channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
> +	channels->max_combined = min(dev->num_rx_queues, dev->num_tx_queues);
> +}
> +
> +static int veth_close(struct net_device *dev);
> +static int veth_open(struct net_device *dev);
> +
> +static int veth_set_channels(struct net_device *dev,
> +			     struct ethtool_channels *ch)
> +{
> +	struct veth_priv *priv = netdev_priv(dev);
> +	struct veth_priv *peer_priv;
> +
> +	/* accept changes only on rx/tx */
> +	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
> +		return -EINVAL;
> +
> +	/* respect contraint posed at device creation time */
> +	if (ch->rx_count > dev->num_rx_queues || ch->tx_count > dev->num_tx_queues)
> +		return -EINVAL;
> +
> +	if (!ch->rx_count || !ch->tx_count)
> +		return -EINVAL;
> +
> +	/* avoid braking XDP, if that is enabled */
> +	if (priv->_xdp_prog && ch->rx_count < priv->peer->real_num_tx_queues)
> +		return -EINVAL;
> +
> +	peer_priv = netdev_priv(priv->peer);
> +	if (peer_priv->_xdp_prog && ch->tx_count > priv->peer->real_num_rx_queues)
> +		return -EINVAL;

An XDP program could be loaded later, so I think it's better to enforce
this constraint unconditionally.

(What happens on the regular xmit path if the number of TX queues is out
of sync with the peer RX queues, BTW?)

> +	if (netif_running(dev))
> +		veth_close(dev);
> +
> +	priv->num_tx_queues = ch->tx_count;
> +	priv->num_rx_queues = ch->rx_count;

Why can't just you use netif_set_real_num_*_queues() here directly (and
get rid of the priv members as above)?

-Toke
Paolo Abeni July 9, 2021, 10:49 a.m. UTC | #2
Hello,

On Fri, 2021-07-09 at 12:15 +0200, Toke Høiland-Jørgensen wrote:
> > @@ -224,10 +226,49 @@ static void veth_get_channels(struct net_device *dev,
> >  {
> >  	channels->tx_count = dev->real_num_tx_queues;
> >  	channels->rx_count = dev->real_num_rx_queues;
> > -	channels->max_tx = dev->real_num_tx_queues;
> > -	channels->max_rx = dev->real_num_rx_queues;
> > +	channels->max_tx = dev->num_tx_queues;
> > +	channels->max_rx = dev->num_rx_queues;
> >  	channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
> > -	channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
> > +	channels->max_combined = min(dev->num_rx_queues, dev->num_tx_queues);
> > +}
> > +
> > +static int veth_close(struct net_device *dev);
> > +static int veth_open(struct net_device *dev);
> > +
> > +static int veth_set_channels(struct net_device *dev,
> > +			     struct ethtool_channels *ch)
> > +{
> > +	struct veth_priv *priv = netdev_priv(dev);
> > +	struct veth_priv *peer_priv;
> > +
> > +	/* accept changes only on rx/tx */
> > +	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
> > +		return -EINVAL;
> > +
> > +	/* respect contraint posed at device creation time */
> > +	if (ch->rx_count > dev->num_rx_queues || ch->tx_count > dev->num_tx_queues)
> > +		return -EINVAL;
> > +
> > +	if (!ch->rx_count || !ch->tx_count)
> > +		return -EINVAL;
> > +
> > +	/* avoid braking XDP, if that is enabled */
> > +	if (priv->_xdp_prog && ch->rx_count < priv->peer->real_num_tx_queues)
> > +		return -EINVAL;
> > +
> > +	peer_priv = netdev_priv(priv->peer);
> > +	if (peer_priv->_xdp_prog && ch->tx_count > priv->peer->real_num_rx_queues)
> > +		return -EINVAL;
> 
> An XDP program could be loaded later, so I think it's better to enforce
> this constraint unconditionally.

The relevant check is already present in veth_xdp_set(), the load will
be rejected with an appropriate extack message.

I enforcing the above contraint uncontiditionally will make impossible
changing the number of real queues at runtime, as we must update dev
and peer in different moments.

> (What happens on the regular xmit path if the number of TX queues is out
> of sync with the peer RX queues, BTW?)

if tx < rx, the higly number of rx queue will not be used, if rx < tx,
XDP/gro can't take place: the normal veth path is used.

> > +	if (netif_running(dev))
> > +		veth_close(dev);
> > +
> > +	priv->num_tx_queues = ch->tx_count;
> > +	priv->num_rx_queues = ch->rx_count;
> 
> Why can't just you use netif_set_real_num_*_queues() here directly (and
> get rid of the priv members as above)?

Uhm... I haven't thought about it. Let me try ;)

Thanks!

/P
Toke Høiland-Jørgensen July 9, 2021, 11:36 a.m. UTC | #3
Paolo Abeni <pabeni@redhat.com> writes:

> Hello,
>
> On Fri, 2021-07-09 at 12:15 +0200, Toke Høiland-Jørgensen wrote:
>> > @@ -224,10 +226,49 @@ static void veth_get_channels(struct net_device *dev,
>> >  {
>> >  	channels->tx_count = dev->real_num_tx_queues;
>> >  	channels->rx_count = dev->real_num_rx_queues;
>> > -	channels->max_tx = dev->real_num_tx_queues;
>> > -	channels->max_rx = dev->real_num_rx_queues;
>> > +	channels->max_tx = dev->num_tx_queues;
>> > +	channels->max_rx = dev->num_rx_queues;
>> >  	channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
>> > -	channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
>> > +	channels->max_combined = min(dev->num_rx_queues, dev->num_tx_queues);
>> > +}
>> > +
>> > +static int veth_close(struct net_device *dev);
>> > +static int veth_open(struct net_device *dev);
>> > +
>> > +static int veth_set_channels(struct net_device *dev,
>> > +			     struct ethtool_channels *ch)
>> > +{
>> > +	struct veth_priv *priv = netdev_priv(dev);
>> > +	struct veth_priv *peer_priv;
>> > +
>> > +	/* accept changes only on rx/tx */
>> > +	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
>> > +		return -EINVAL;
>> > +
>> > +	/* respect contraint posed at device creation time */
>> > +	if (ch->rx_count > dev->num_rx_queues || ch->tx_count > dev->num_tx_queues)
>> > +		return -EINVAL;
>> > +
>> > +	if (!ch->rx_count || !ch->tx_count)
>> > +		return -EINVAL;
>> > +
>> > +	/* avoid braking XDP, if that is enabled */
>> > +	if (priv->_xdp_prog && ch->rx_count < priv->peer->real_num_tx_queues)
>> > +		return -EINVAL;
>> > +
>> > +	peer_priv = netdev_priv(priv->peer);
>> > +	if (peer_priv->_xdp_prog && ch->tx_count > priv->peer->real_num_rx_queues)
>> > +		return -EINVAL;
>> 
>> An XDP program could be loaded later, so I think it's better to enforce
>> this constraint unconditionally.
>
> The relevant check is already present in veth_xdp_set(), the load will
> be rejected with an appropriate extack message.

Ah, right, of course; silly me, that's fine then... :)

> I enforcing the above contraint uncontiditionally will make impossible
> changing the number of real queues at runtime, as we must update dev
> and peer in different moments.
>
>> (What happens on the regular xmit path if the number of TX queues is out
>> of sync with the peer RX queues, BTW?)
>
> if tx < rx, the higly number of rx queue will not be used, if rx < tx,
> XDP/gro can't take place: the normal veth path is used.

Right, OK, that's probably also fine then :)

-Toke
Paolo Abeni July 9, 2021, 2:38 p.m. UTC | #4
On Fri, 2021-07-09 at 12:49 +0200, Paolo Abeni wrote:
> On Fri, 2021-07-09 at 12:15 +0200, Toke Høiland-Jørgensen wrote:
> > > +	if (netif_running(dev))
> > > +		veth_close(dev);
> > > +
> > > +	priv->num_tx_queues = ch->tx_count;
> > > +	priv->num_rx_queues = ch->rx_count;
> > 
> > Why can't just you use netif_set_real_num_*_queues() here directly (and
> > get rid of the priv members as above)?
> 
> Uhm... I haven't thought about it. Let me try ;)

Here there is a possible problem: if the latter
netif_set_real_num_*_queues() fails, we should not change the current
configuration, so we should revert the
first netif_set_real_num_*_queues() change.

Even that additional revert operation could fail. If/when that happen
set_channel() will leave the device in a different state from both the
old one and the new one, possibly with an XDP-incompatible number of
queues.

Keeping the  netif_set_real_num_*_queues() calls in veth_open() avoid
the above issue: if the queue creation is problematic, the device will
stay down.

I think the additional fields are worthy, WDYT? 

Cheers,
Paolo
Toke Høiland-Jørgensen July 9, 2021, 3:23 p.m. UTC | #5
Paolo Abeni <pabeni@redhat.com> writes:

> On Fri, 2021-07-09 at 12:49 +0200, Paolo Abeni wrote:
>> On Fri, 2021-07-09 at 12:15 +0200, Toke Høiland-Jørgensen wrote:
>> > > +	if (netif_running(dev))
>> > > +		veth_close(dev);
>> > > +
>> > > +	priv->num_tx_queues = ch->tx_count;
>> > > +	priv->num_rx_queues = ch->rx_count;
>> > 
>> > Why can't just you use netif_set_real_num_*_queues() here directly (and
>> > get rid of the priv members as above)?
>> 
>> Uhm... I haven't thought about it. Let me try ;)
>
> Here there is a possible problem: if the latter
> netif_set_real_num_*_queues() fails, we should not change the current
> configuration, so we should revert the
> first netif_set_real_num_*_queues() change.
>
> Even that additional revert operation could fail. If/when that happen
> set_channel() will leave the device in a different state from both the
> old one and the new one, possibly with an XDP-incompatible number of
> queues.
>
> Keeping the  netif_set_real_num_*_queues() calls in veth_open() avoid
> the above issue: if the queue creation is problematic, the device will
> stay down.
>
> I think the additional fields are worthy, WDYT?

Hmm, wouldn't the right thing to do be to back out the change and return
an error to userspace? Something like:

+	if (netif_running(dev))
+		veth_close(dev);
+
+	old_rx_queues = dev->real_num_rx_queues;
+	err = netif_set_real_num_rx_queues(dev, ch->rx_count);
+	if (err)
+		return err;
+
+	err = netif_set_real_num_tx_queues(dev, ch->tx_count);
+	if (err) {
+		netif_set_real_num_rx_queues(dev, old_rx_queues);
+		return err;
+	}
+
+	if (netif_running(dev))
+		veth_open(dev);
+	return 0;


(also, shouldn't the result of veth_open() be returned? bit weird if you
don't get an error but the device stays down...)

-Toke
Paolo Abeni July 9, 2021, 4:35 p.m. UTC | #6
On Fri, 2021-07-09 at 17:23 +0200, Toke Høiland-Jørgensen wrote:
> Paolo Abeni <pabeni@redhat.com> writes:
> > On Fri, 2021-07-09 at 12:49 +0200, Paolo Abeni wrote:
> > > On Fri, 2021-07-09 at 12:15 +0200, Toke Høiland-Jørgensen wrote:
> > > > > +	if (netif_running(dev))
> > > > > +		veth_close(dev);
> > > > > +
> > > > > +	priv->num_tx_queues = ch->tx_count;
> > > > > +	priv->num_rx_queues = ch->rx_count;
> > > > 
> > > > Why can't just you use netif_set_real_num_*_queues() here directly (and
> > > > get rid of the priv members as above)?
> > > 
> > > Uhm... I haven't thought about it. Let me try ;)
> > 
> > Here there is a possible problem: if the latter
> > netif_set_real_num_*_queues() fails, we should not change the current
> > configuration, so we should revert the
> > first netif_set_real_num_*_queues() change.
> > 
> > Even that additional revert operation could fail. If/when that happen
> > set_channel() will leave the device in a different state from both the
> > old one and the new one, possibly with an XDP-incompatible number of
> > queues.
> > 
> > Keeping the  netif_set_real_num_*_queues() calls in veth_open() avoid
> > the above issue: if the queue creation is problematic, the device will
> > stay down.
> > 
> > I think the additional fields are worthy, WDYT?
> 
> Hmm, wouldn't the right thing to do be to back out the change and return
> an error to userspace? Something like:
> 
> +	if (netif_running(dev))
> +		veth_close(dev);
> +
> +	old_rx_queues = dev->real_num_rx_queues;
> +	err = netif_set_real_num_rx_queues(dev, ch->rx_count);
> +	if (err)
> +		return err;
> +
> +	err = netif_set_real_num_tx_queues(dev, ch->tx_count);
> +	if (err) {
> +		netif_set_real_num_rx_queues(dev, old_rx_queues);

I'm sorry, I was not clear enough. I mean: even the
above netif_set_real_num_rx_queues() can fail. When that happen we will
leave the device in an inconsistent state, possibly even with an
"unsupported" queue setting.

> +		return err;
> +	}
> +
> +	if (netif_running(dev))
> +		veth_open(dev);
> +	return 0;
> 
> 
> (also, shouldn't the result of veth_open() be returned? bit weird if you
> don't get an error but the device stays down...)

Agreed.

Thanks!

Paolo
Jakub Kicinski July 9, 2021, 7:54 p.m. UTC | #7
On Fri,  9 Jul 2021 11:39:48 +0200 Paolo Abeni wrote:
> +	/* accept changes only on rx/tx */
> +	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
> +		return -EINVAL;

Ah damn, I must have missed the get_channels being added. I believe the
correct interpretation of the params is rx means NAPI with just Rx
queue(s), tx NAPI with just Tx queue(s) and combined has both.
IOW combined != min(rx, tx).
Instead real_rx = combined + rx; real_tx = combined + tx.
Can we still change this?

> +	/* respect contraint posed at device creation time */
> +	if (ch->rx_count > dev->num_rx_queues || ch->tx_count > dev->num_tx_queues)
> +		return -EINVAL;

Could you lift this check into ethtool core?

> +	if (!ch->rx_count || !ch->tx_count)
> +		return -EINVAL;

You wouldn't need this with the right interpretation of combined :(

> +	/* avoid braking XDP, if that is enabled */
> +	if (priv->_xdp_prog && ch->rx_count < priv->peer->real_num_tx_queues)
> +		return -EINVAL;
> +
> +	peer_priv = netdev_priv(priv->peer);
> +	if (peer_priv->_xdp_prog && ch->tx_count > priv->peer->real_num_rx_queues)
> +		return -EINVAL;
> +
> +	if (netif_running(dev))
> +		veth_close(dev);
> +
> +	priv->num_tx_queues = ch->tx_count;
> +	priv->num_rx_queues = ch->rx_count;
> +
> +	if (netif_running(dev))
> +		veth_open(dev);
Jakub Kicinski July 9, 2021, 7:56 p.m. UTC | #8
On Fri, 09 Jul 2021 18:35:59 +0200 Paolo Abeni wrote:
> > +	if (netif_running(dev))
> > +		veth_open(dev);
> > +	return 0;
> > 
> > 
> > (also, shouldn't the result of veth_open() be returned? bit weird if you
> > don't get an error but the device stays down...)  
> 
> Agreed.

We've been fighting hard to make sure ethtool -L/-G/etc don't leave
devices half-broken. Maybe it's not as important for software devices,
but we should set a good example. We shouldn't take the device down
unless we are sure we'll be able to bring it up. So if veth_open()
"can't fail" it should be a WARN_ON(veth_open()), not return; and if 
it may fail due to memory allocations etc we should do a prepare/commit.
David Ahern July 12, 2021, 1:44 a.m. UTC | #9
On 7/9/21 1:54 PM, Jakub Kicinski wrote:
> On Fri,  9 Jul 2021 11:39:48 +0200 Paolo Abeni wrote:
>> +	/* accept changes only on rx/tx */
>> +	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
>> +		return -EINVAL;
> 
> Ah damn, I must have missed the get_channels being added. I believe the
> correct interpretation of the params is rx means NAPI with just Rx
> queue(s), tx NAPI with just Tx queue(s) and combined has both.
> IOW combined != min(rx, tx).
> Instead real_rx = combined + rx; real_tx = combined + tx.
> Can we still change this?

Is it not an 'either' / 'or' situation? ie., you can either control the
number of Rx and Tx queues or you control the combined value but not
both. That is what I recall from nics (e.g., ConnectX).
Paolo Abeni July 12, 2021, 10:45 a.m. UTC | #10
Hello,

On Sun, 2021-07-11 at 19:44 -0600, David Ahern wrote:
> On 7/9/21 1:54 PM, Jakub Kicinski wrote:
> > On Fri,  9 Jul 2021 11:39:48 +0200 Paolo Abeni wrote:
> > > +	/* accept changes only on rx/tx */
> > > +	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
> > > +		return -EINVAL;
> > 
> > Ah damn, I must have missed the get_channels being added. I believe the
> > correct interpretation of the params is rx means NAPI with just Rx
> > queue(s), tx NAPI with just Tx queue(s) and combined has both.
> > IOW combined != min(rx, tx).
> > Instead real_rx = combined + rx; real_tx = combined + tx.
> > Can we still change this?
> 
> Is it not an 'either' / 'or' situation? ie., you can either control the
> number of Rx and Tx queues or you control the combined value but not
> both. That is what I recall from nics (e.g., ConnectX).

Thanks for the feedback. My understanding was quite alike what David
stated - and indeed that is what ConnectX enforces AFAICS. Anyhow the
core ethtool code allows for what Jackub said, so I guess I need to
deal with that.

@Jakub: if we are still on time about changing the veth_get_channel()
exposed behaviour, what about just showing nr combined == 0 and
enforcing comined_max == 0? that would both describe more closely the
veth architecture and will make the code simpler - beyond fixing the
current uncorrect nr channels report.

Thanks!

Paolo
Jakub Kicinski July 12, 2021, 3:23 p.m. UTC | #11
On Mon, 12 Jul 2021 12:45:13 +0200 Paolo Abeni wrote:
> On Sun, 2021-07-11 at 19:44 -0600, David Ahern wrote:
> > On 7/9/21 1:54 PM, Jakub Kicinski wrote:  
> > > Ah damn, I must have missed the get_channels being added. I believe the
> > > correct interpretation of the params is rx means NAPI with just Rx
> > > queue(s), tx NAPI with just Tx queue(s) and combined has both.
> > > IOW combined != min(rx, tx).
> > > Instead real_rx = combined + rx; real_tx = combined + tx.
> > > Can we still change this?  
> > 
> > Is it not an 'either' / 'or' situation? ie., you can either control the
> > number of Rx and Tx queues or you control the combined value but not
> > both. That is what I recall from nics (e.g., ConnectX).  
> 
> Thanks for the feedback. My understanding was quite alike what David
> stated - and indeed that is what ConnectX enforces AFAICS. Anyhow the
> core ethtool code allows for what Jackub said, so I guess I need to
> deal with that.

I thought Mellanox was doing something funny to reuse the rx as the
number of AF_XDP queues. Normal rings are not reported twice, they're
only reported as combined.

ethtool man page is relatively clear, unfortunately the kernel code 
is not and few read the man page. A channel is approximately an IRQ, 
not a queue, and IRQ can't be dedicated and combined simultaneously:

 "A channel is an IRQ and the set of queues that can trigger that IRQ."

 " rx N   Changes the number of channels with only receive queues."

> @Jakub: if we are still on time about changing the veth_get_channel()
> exposed behaviour, what about just showing nr combined == 0 and
> enforcing comined_max == 0? that would both describe more closely the
> veth architecture and will make the code simpler - beyond fixing the
> current uncorrect nr channels report.

SGTM.
diff mbox series

Patch

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index bdb7ce3cb054..10360228a06a 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -72,6 +72,8 @@  struct veth_priv {
 	atomic64_t		dropped;
 	struct bpf_prog		*_xdp_prog;
 	struct veth_rq		*rq;
+	unsigned int		num_tx_queues;
+	unsigned int		num_rx_queues;
 	unsigned int		requested_headroom;
 };
 
@@ -224,10 +226,49 @@  static void veth_get_channels(struct net_device *dev,
 {
 	channels->tx_count = dev->real_num_tx_queues;
 	channels->rx_count = dev->real_num_rx_queues;
-	channels->max_tx = dev->real_num_tx_queues;
-	channels->max_rx = dev->real_num_rx_queues;
+	channels->max_tx = dev->num_tx_queues;
+	channels->max_rx = dev->num_rx_queues;
 	channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
-	channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
+	channels->max_combined = min(dev->num_rx_queues, dev->num_tx_queues);
+}
+
+static int veth_close(struct net_device *dev);
+static int veth_open(struct net_device *dev);
+
+static int veth_set_channels(struct net_device *dev,
+			     struct ethtool_channels *ch)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	struct veth_priv *peer_priv;
+
+	/* accept changes only on rx/tx */
+	if (ch->combined_count != min(dev->real_num_rx_queues, dev->real_num_tx_queues))
+		return -EINVAL;
+
+	/* respect contraint posed at device creation time */
+	if (ch->rx_count > dev->num_rx_queues || ch->tx_count > dev->num_tx_queues)
+		return -EINVAL;
+
+	if (!ch->rx_count || !ch->tx_count)
+		return -EINVAL;
+
+	/* avoid braking XDP, if that is enabled */
+	if (priv->_xdp_prog && ch->rx_count < priv->peer->real_num_tx_queues)
+		return -EINVAL;
+
+	peer_priv = netdev_priv(priv->peer);
+	if (peer_priv->_xdp_prog && ch->tx_count > priv->peer->real_num_rx_queues)
+		return -EINVAL;
+
+	if (netif_running(dev))
+		veth_close(dev);
+
+	priv->num_tx_queues = ch->tx_count;
+	priv->num_rx_queues = ch->rx_count;
+
+	if (netif_running(dev))
+		veth_open(dev);
+	return 0;
 }
 
 static const struct ethtool_ops veth_ethtool_ops = {
@@ -239,6 +280,7 @@  static const struct ethtool_ops veth_ethtool_ops = {
 	.get_link_ksettings	= veth_get_link_ksettings,
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_channels		= veth_get_channels,
+	.set_channels		= veth_set_channels,
 };
 
 /* general routines */
@@ -1104,6 +1146,14 @@  static int veth_open(struct net_device *dev)
 	if (!peer)
 		return -ENOTCONN;
 
+	err = netif_set_real_num_rx_queues(dev, priv->num_rx_queues);
+	if (err)
+		return err;
+
+	err = netif_set_real_num_tx_queues(dev, priv->num_tx_queues);
+	if (err)
+		return err;
+
 	if (priv->_xdp_prog) {
 		err = veth_enable_xdp(dev);
 		if (err)
@@ -1551,14 +1601,18 @@  static int veth_newlink(struct net *src_net, struct net_device *dev,
 	netif_carrier_off(dev);
 
 	/*
-	 * tie the deviced together
+	 * tie the deviced together and init the default queue nr
 	 */
 
 	priv = netdev_priv(dev);
 	rcu_assign_pointer(priv->peer, peer);
+	priv->num_tx_queues = dev->num_tx_queues;
+	priv->num_rx_queues = dev->num_rx_queues;
 
 	priv = netdev_priv(peer);
 	rcu_assign_pointer(priv->peer, dev);
+	priv->num_tx_queues = peer->num_tx_queues;
+	priv->num_rx_queues = peer->num_rx_queues;
 
 	veth_disable_gro(dev);
 	return 0;