Message ID | 20220718091102.498774-1-alvaro.karsz@solid-run.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net-next,v4] net: virtio_net: notifications coalescing support | expand |
在 2022/7/18 17:11, Alvaro Karsz 写道: > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > Control a Virtio network device notifications coalescing parameters > using the control virtqueue. > > A device that supports this fetature can receive > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > Ask the network device to change the following parameters: > - tx_usecs: Maximum number of usecs to delay a TX notification. > - tx_max_packets: Maximum number of packets to send before a > TX notification. > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > Ask the network device to change the following parameters: > - rx_usecs: Maximum number of usecs to delay a RX notification. > - rx_max_packets: Maximum number of packets to receive before a > RX notification. > > VirtIO spec. patch: > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > --- > v2: > - Fix type assignments warnings found with sparse. > - Fix a few typos. > > v3: > - Change the coalescing parameters in a dedicated function. > - Return -EBUSY from the set coalescing function when the device's > link is up, even if the notifications coalescing feature is negotiated. > > v4: > - If link is up and we need to update NAPI weight, return -EBUSY before > sending the coalescing commands to the device > --- > drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- > include/uapi/linux/virtio_net.h | 34 +++++++++- > 2 files changed, 129 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 356cf8dd416..4fde66bd511 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -261,6 +261,12 @@ struct virtnet_info { > u8 duplex; > u32 speed; > > + /* Interrupt coalescing settings */ > + u32 tx_usecs; > + u32 rx_usecs; > + u32 tx_max_packets; > + u32 rx_max_packets; > + > unsigned long guest_offloads; > unsigned long guest_offloads_capable; > > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, > + struct ethtool_coalesce *ec) > +{ > + struct scatterlist sgs_tx, sgs_rx; > + struct virtio_net_ctrl_coal_tx coal_tx; > + struct virtio_net_ctrl_coal_rx coal_rx; > + > + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); > + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); > + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, > + &sgs_tx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->tx_usecs = ec->tx_coalesce_usecs; > + vi->tx_max_packets = ec->tx_max_coalesced_frames; > + > + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); > + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); > + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, > + &sgs_rx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->rx_usecs = ec->rx_coalesce_usecs; > + vi->rx_max_packets = ec->rx_max_coalesced_frames; > + > + return 0; > +} > + > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) > +{ > + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL > + * feature is negotiated. > + */ > + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) > + return -EOPNOTSUPP; This seems an independent fix? I wonder what if we just leave it as before. Other looks good to me. Thanks > + > + if (ec->tx_max_coalesced_frames > 1 || > + ec->rx_max_coalesced_frames != 1) > + return -EINVAL; > + > + return 0; > +} > + > static int virtnet_set_coalesce(struct net_device *dev, > struct ethtool_coalesce *ec, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > struct virtnet_info *vi = netdev_priv(dev); > - int i, napi_weight; > - > - if (ec->tx_max_coalesced_frames > 1 || > - ec->rx_max_coalesced_frames != 1) > - return -EINVAL; > + int ret, i, napi_weight; > + bool update_napi = false; > > + /* Can't change NAPI weight if the link is up */ > napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > if (napi_weight ^ vi->sq[0].napi.weight) { > if (dev->flags & IFF_UP) > return -EBUSY; > + else > + update_napi = true; > + } > + > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) > + ret = virtnet_send_notf_coal_cmds(vi, ec); > + else > + ret = virtnet_coal_params_supported(ec); > + > + if (ret) > + return ret; > + > + if (update_napi) { > for (i = 0; i < vi->max_queue_pairs; i++) > vi->sq[i].napi.weight = napi_weight; > } > > - return 0; > + return ret; > } > > static int virtnet_get_coalesce(struct net_device *dev, > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > - struct ethtool_coalesce ec_default = { > - .cmd = ETHTOOL_GCOALESCE, > - .rx_max_coalesced_frames = 1, > - }; > struct virtnet_info *vi = netdev_priv(dev); > > - memcpy(ec, &ec_default, sizeof(ec_default)); > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + ec->rx_coalesce_usecs = vi->rx_usecs; > + ec->tx_coalesce_usecs = vi->tx_usecs; > + ec->tx_max_coalesced_frames = vi->tx_max_packets; > + ec->rx_max_coalesced_frames = vi->rx_max_packets; > + } else { > + ec->rx_max_coalesced_frames = 1; > > - if (vi->sq[0].napi.weight) > - ec->tx_max_coalesced_frames = 1; > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + } > > return 0; > } > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) > } > > static const struct ethtool_ops virtnet_ethtool_ops = { > - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > + ETHTOOL_COALESCE_USECS, > .get_drvinfo = virtnet_get_drvinfo, > .get_link = ethtool_op_get_link, > .get_ringparam = virtnet_get_ringparam, > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, > "VIRTIO_NET_F_CTRL_VQ") || > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, > + "VIRTIO_NET_F_CTRL_VQ") || > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, > "VIRTIO_NET_F_CTRL_VQ"))) { > return false; > } > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) > vi->mergeable_rx_bufs = true; > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + vi->rx_usecs = 0; > + vi->tx_usecs = 0; > + vi->tx_max_packets = 0; > + vi->rx_max_packets = 0; > + } > + > if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) > vi->has_rss_hash_report = true; > > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT > + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index 3f55a4215f1..29ced55514d 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -56,7 +56,7 @@ > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > - > +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > @@ -355,4 +355,36 @@ struct virtio_net_hash_config { > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > +/* > + * Control notifications coalescing. > + * > + * Request the device to change the notifications coalescing parameters. > + * > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. > + */ > +#define VIRTIO_NET_CTRL_NOTF_COAL 6 > +/* > + * Set the tx-usecs/tx-max-packets patameters. > + * tx-usecs - Maximum number of usecs to delay a TX notification. > + * tx-max-packets - Maximum number of packets to send before a TX notification. > + */ > +struct virtio_net_ctrl_coal_tx { > + __le32 tx_max_packets; > + __le32 tx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 > + > +/* > + * Set the rx-usecs/rx-max-packets patameters. > + * rx-usecs - Maximum number of usecs to delay a RX notification. > + * rx-max-frames - Maximum number of packets to receive before a RX notification. > + */ > +struct virtio_net_ctrl_coal_rx { > + __le32 rx_max_packets; > + __le32 rx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 > + > #endif /* _UAPI_LINUX_VIRTIO_NET_H */ > -- > 2.32.0 >
Hi Jason,
> This seems an independent fix? I wonder what if we just leave it as before.
Before this patch, only ETHTOOL_COALESCE_MAX_FRAMES was supported, now
we support ETHTOOL_COALESCE_USECS as well.
ETHTOOL_COALESCE_USECS is meaningless if VIRTIO_NET_F_NOTF_COAL
feature is not negotiated, so I added the mentioned part.
On Tue, Jul 19, 2022 at 3:09 PM Alvaro Karsz <alvaro.karsz@solid-run.com> wrote: > > Hi Jason, > > > This seems an independent fix? I wonder what if we just leave it as before. > > > Before this patch, only ETHTOOL_COALESCE_MAX_FRAMES was supported, now > we support ETHTOOL_COALESCE_USECS as well. > > ETHTOOL_COALESCE_USECS is meaningless if VIRTIO_NET_F_NOTF_COAL > feature is not negotiated, so I added the mentioned part. Yes but this "issue" exists before VIRTIO_NET_F_NOTF_COAL. It might be better to have an independent patch to fix. Thanks >
> Yes but this "issue" exists before VIRTIO_NET_F_NOTF_COAL. It might be > better to have an independent patch to fix. Maybe I'm wrong, but I don't think that you could receive tx_coalesce_usecs/rx_coalesce_usecs without this patch, since the ethtool_ops struct without this patch is: static const struct ethtool_ops virtnet_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, ....... And with this patch is: static const struct ethtool_ops virtnet_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | ETHTOOL_COALESCE_USECS,
On Tue, Jul 19, 2022 at 3:19 PM Alvaro Karsz <alvaro.karsz@solid-run.com> wrote: > > > Yes but this "issue" exists before VIRTIO_NET_F_NOTF_COAL. It might be > > better to have an independent patch to fix. > > > Maybe I'm wrong, but I don't think that you could receive > tx_coalesce_usecs/rx_coalesce_usecs without this patch, since the > ethtool_ops struct without this patch is: > > static const struct ethtool_ops virtnet_ethtool_ops = { > .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > ....... > > And with this patch is: > static const struct ethtool_ops virtnet_ethtool_ops = { > .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > ETHTOOL_COALESCE_USECS, > You're right. So: Acked-by: Jason Wang <jasowang@redhat.com> Thanks
On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote: > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > Control a Virtio network device notifications coalescing parameters > using the control virtqueue. > > A device that supports this fetature can receive > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > Ask the network device to change the following parameters: > - tx_usecs: Maximum number of usecs to delay a TX notification. > - tx_max_packets: Maximum number of packets to send before a > TX notification. > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > Ask the network device to change the following parameters: > - rx_usecs: Maximum number of usecs to delay a RX notification. > - rx_max_packets: Maximum number of packets to receive before a > RX notification. > > VirtIO spec. patch: > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> Waiting a bit longer for Michael's ack, so in case other netdev maintainer takes this: Reviewed-by: Jakub Kicinski <kuba@kernel.org>
On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote: > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > Control a Virtio network device notifications coalescing parameters > using the control virtqueue. > > A device that supports this fetature can receive > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > Ask the network device to change the following parameters: > - tx_usecs: Maximum number of usecs to delay a TX notification. > - tx_max_packets: Maximum number of packets to send before a > TX notification. > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > Ask the network device to change the following parameters: > - rx_usecs: Maximum number of usecs to delay a RX notification. > - rx_max_packets: Maximum number of packets to receive before a > RX notification. > > VirtIO spec. patch: > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > --- > v2: > - Fix type assignments warnings found with sparse. > - Fix a few typos. > > v3: > - Change the coalescing parameters in a dedicated function. > - Return -EBUSY from the set coalescing function when the device's > link is up, even if the notifications coalescing feature is negotiated. > > v4: > - If link is up and we need to update NAPI weight, return -EBUSY before > sending the coalescing commands to the device Thanks! some comments below > --- > drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- > include/uapi/linux/virtio_net.h | 34 +++++++++- > 2 files changed, 129 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 356cf8dd416..4fde66bd511 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -261,6 +261,12 @@ struct virtnet_info { > u8 duplex; > u32 speed; > > + /* Interrupt coalescing settings */ > + u32 tx_usecs; > + u32 rx_usecs; > + u32 tx_max_packets; > + u32 rx_max_packets; > + > unsigned long guest_offloads; > unsigned long guest_offloads_capable; > > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, > + struct ethtool_coalesce *ec) > +{ > + struct scatterlist sgs_tx, sgs_rx; > + struct virtio_net_ctrl_coal_tx coal_tx; > + struct virtio_net_ctrl_coal_rx coal_rx; > + > + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); > + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); > + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, > + &sgs_tx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->tx_usecs = ec->tx_coalesce_usecs; > + vi->tx_max_packets = ec->tx_max_coalesced_frames; > + > + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); > + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); > + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, > + &sgs_rx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->rx_usecs = ec->rx_coalesce_usecs; > + vi->rx_max_packets = ec->rx_max_coalesced_frames; > + > + return 0; > +} > + > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) > +{ > + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL > + * feature is negotiated. > + */ > + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) > + return -EOPNOTSUPP; > + > + if (ec->tx_max_coalesced_frames > 1 || > + ec->rx_max_coalesced_frames != 1) > + return -EINVAL; > + > + return 0; > +} > + > static int virtnet_set_coalesce(struct net_device *dev, > struct ethtool_coalesce *ec, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > struct virtnet_info *vi = netdev_priv(dev); > - int i, napi_weight; > - > - if (ec->tx_max_coalesced_frames > 1 || > - ec->rx_max_coalesced_frames != 1) > - return -EINVAL; > + int ret, i, napi_weight; > + bool update_napi = false; > > + /* Can't change NAPI weight if the link is up */ > napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx napi on/off. However I am not sure we should treat any value != 1 as napi on. I don't really have good ideas - I think abusing coalescing might have been a mistake. But now that we are there, I feel we need a way for userspace to at least be able to figure out whether setting coalescing to 0 will have nasty side effects. For example, here's a problem: - according to spec, all values are reset to 0 - userspace reads coalescing values and gets 0 Does this mean napi is off? And now that we support colescing, I wonder how is user going to control napi. It's also a bit of a spec defect that it does not document corner cases like what do 0 values do, are they different from 1? or what are max values. Not too late to fix? > if (napi_weight ^ vi->sq[0].napi.weight) { > if (dev->flags & IFF_UP) > return -EBUSY; > + else > + update_napi = true; > + } > + > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) > + ret = virtnet_send_notf_coal_cmds(vi, ec); > + else > + ret = virtnet_coal_params_supported(ec); > + > + if (ret) > + return ret; > + > + if (update_napi) { > for (i = 0; i < vi->max_queue_pairs; i++) > vi->sq[i].napi.weight = napi_weight; > } > > - return 0; > + return ret; > } > > static int virtnet_get_coalesce(struct net_device *dev, > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > - struct ethtool_coalesce ec_default = { > - .cmd = ETHTOOL_GCOALESCE, > - .rx_max_coalesced_frames = 1, > - }; > struct virtnet_info *vi = netdev_priv(dev); > > - memcpy(ec, &ec_default, sizeof(ec_default)); > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + ec->rx_coalesce_usecs = vi->rx_usecs; > + ec->tx_coalesce_usecs = vi->tx_usecs; > + ec->tx_max_coalesced_frames = vi->tx_max_packets; > + ec->rx_max_coalesced_frames = vi->rx_max_packets; > + } else { > + ec->rx_max_coalesced_frames = 1; > > - if (vi->sq[0].napi.weight) > - ec->tx_max_coalesced_frames = 1; > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + } > > return 0; > } > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) > } > > static const struct ethtool_ops virtnet_ethtool_ops = { > - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > + ETHTOOL_COALESCE_USECS, > .get_drvinfo = virtnet_get_drvinfo, > .get_link = ethtool_op_get_link, > .get_ringparam = virtnet_get_ringparam, > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, > "VIRTIO_NET_F_CTRL_VQ") || > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, > + "VIRTIO_NET_F_CTRL_VQ") || > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, > "VIRTIO_NET_F_CTRL_VQ"))) { > return false; > } > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) > vi->mergeable_rx_bufs = true; > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + vi->rx_usecs = 0; > + vi->tx_usecs = 0; > + vi->tx_max_packets = 0; > + vi->rx_max_packets = 0; > + } > + > if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) > vi->has_rss_hash_report = true; > > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT > + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index 3f55a4215f1..29ced55514d 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -56,7 +56,7 @@ > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > - > +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ So the spec says Device supports notifications coalescing. which makes more sense - there's not a lot guest needs to do here. > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > @@ -355,4 +355,36 @@ struct virtio_net_hash_config { > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > +/* > + * Control notifications coalescing. > + * > + * Request the device to change the notifications coalescing parameters. > + * > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. > + */ > +#define VIRTIO_NET_CTRL_NOTF_COAL 6 > +/* > + * Set the tx-usecs/tx-max-packets patameters. parameters? > + * tx-usecs - Maximum number of usecs to delay a TX notification. > + * tx-max-packets - Maximum number of packets to send before a TX notification. why with dash here? And why not just put the comments near the fields themselves? > + */ > +struct virtio_net_ctrl_coal_tx { > + __le32 tx_max_packets; > + __le32 tx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 > + > +/* > + * Set the rx-usecs/rx-max-packets patameters. > + * rx-usecs - Maximum number of usecs to delay a RX notification. > + * rx-max-frames - Maximum number of packets to receive before a RX notification. > + */ > +struct virtio_net_ctrl_coal_rx { > + __le32 rx_max_packets; > + __le32 rx_usecs; > +}; same comments > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 > + > #endif /* _UAPI_LINUX_VIRTIO_NET_H */ > -- > 2.32.0
On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote: > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote: > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > > > Control a Virtio network device notifications coalescing parameters > > using the control virtqueue. > > > > A device that supports this fetature can receive > > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > > Ask the network device to change the following parameters: > > - tx_usecs: Maximum number of usecs to delay a TX notification. > > - tx_max_packets: Maximum number of packets to send before a > > TX notification. > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > > Ask the network device to change the following parameters: > > - rx_usecs: Maximum number of usecs to delay a RX notification. > > - rx_max_packets: Maximum number of packets to receive before a > > RX notification. > > > > VirtIO spec. patch: > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > > Waiting a bit longer for Michael's ack, so in case other netdev > maintainer takes this: > > Reviewed-by: Jakub Kicinski <kuba@kernel.org> Yea was going to ack this but looking at the UAPI again we have a problem because we abused tax max frames values 0 and 1 to control napi in the past. technically does not affect legacy cards but userspace can't easily tell the difference, can it?
On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote: > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote: > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > > > > > Control a Virtio network device notifications coalescing parameters > > > using the control virtqueue. > > > > > > A device that supports this fetature can receive > > > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > > > Ask the network device to change the following parameters: > > > - tx_usecs: Maximum number of usecs to delay a TX notification. > > > - tx_max_packets: Maximum number of packets to send before a > > > TX notification. > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > > > Ask the network device to change the following parameters: > > > - rx_usecs: Maximum number of usecs to delay a RX notification. > > > - rx_max_packets: Maximum number of packets to receive before a > > > RX notification. > > > > > > VirtIO spec. patch: > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > > > > Waiting a bit longer for Michael's ack, so in case other netdev > > maintainer takes this: > > > > Reviewed-by: Jakub Kicinski <kuba@kernel.org> > > Yea was going to ack this but looking at the UAPI again we have a > problem because we abused tax max frames values 0 and 1 to control napi > in the past. technically does not affect legacy cards but userspace > can't easily tell the difference, can it? The "abuse" only works for iproute2. For uAPI we know it should follow the spec? (anyhow NAPI is something out of the spec) Thanks > > -- > MST >
On Wed, Jul 20, 2022 at 03:02:04PM +0800, Jason Wang wrote: > On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote: > > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote: > > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > > > > > > > Control a Virtio network device notifications coalescing parameters > > > > using the control virtqueue. > > > > > > > > A device that supports this fetature can receive > > > > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > > > > Ask the network device to change the following parameters: > > > > - tx_usecs: Maximum number of usecs to delay a TX notification. > > > > - tx_max_packets: Maximum number of packets to send before a > > > > TX notification. > > > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > > > > Ask the network device to change the following parameters: > > > > - rx_usecs: Maximum number of usecs to delay a RX notification. > > > > - rx_max_packets: Maximum number of packets to receive before a > > > > RX notification. > > > > > > > > VirtIO spec. patch: > > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > > > > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > > > > > > Waiting a bit longer for Michael's ack, so in case other netdev > > > maintainer takes this: > > > > > > Reviewed-by: Jakub Kicinski <kuba@kernel.org> > > > > Yea was going to ack this but looking at the UAPI again we have a > > problem because we abused tax max frames values 0 and 1 to control napi > > in the past. technically does not affect legacy cards but userspace > > can't easily tell the difference, can it? > > The "abuse" only works for iproute2. That's kernel/userspace API. That's what this patch affects, right? > For uAPI we know it should follow > the spec? (anyhow NAPI is something out of the spec) > > Thanks When you say uAPI here you mean the virtio header. I am not worried about that just yet (maybe I should be). > > > > -- > > MST > >
On Wed, Jul 20, 2022 at 3:05 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Wed, Jul 20, 2022 at 03:02:04PM +0800, Jason Wang wrote: > > On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote: > > > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote: > > > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > > > > > > > > > Control a Virtio network device notifications coalescing parameters > > > > > using the control virtqueue. > > > > > > > > > > A device that supports this fetature can receive > > > > > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > > > > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > > > > > Ask the network device to change the following parameters: > > > > > - tx_usecs: Maximum number of usecs to delay a TX notification. > > > > > - tx_max_packets: Maximum number of packets to send before a > > > > > TX notification. > > > > > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > > > > > Ask the network device to change the following parameters: > > > > > - rx_usecs: Maximum number of usecs to delay a RX notification. > > > > > - rx_max_packets: Maximum number of packets to receive before a > > > > > RX notification. > > > > > > > > > > VirtIO spec. patch: > > > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > > > > > > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > > > > > > > > Waiting a bit longer for Michael's ack, so in case other netdev > > > > maintainer takes this: > > > > > > > > Reviewed-by: Jakub Kicinski <kuba@kernel.org> > > > > > > Yea was going to ack this but looking at the UAPI again we have a > > > problem because we abused tax max frames values 0 and 1 to control napi > > > in the past. technically does not affect legacy cards but userspace > > > can't easily tell the difference, can it? > > > > The "abuse" only works for iproute2. > > That's kernel/userspace API. That's what this patch affects, right? I'm not sure I get this. The 1-to-enable-napi is only used between iproute2 and kernel via ETHTOOL_A_COALESCE_TX_MAX_FRAMES not the uAPI introduced here. So I don't see how it can conflict with the virito uAPI extension here. Thanks > > > For uAPI we know it should follow > > the spec? (anyhow NAPI is something out of the spec) > > > > Thanks > > When you say uAPI here you mean the virtio header. I am not > worried about that just yet (maybe I should be). > > > > > > > -- > > > MST > > > >
On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote: > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx > napi on/off. > > However I am not sure we should treat any value != 1 as napi on. > > > > I don't really have good ideas - I think abusing coalescing might > > have been a mistake. But now that we are there, I feel we need > > a way for userspace to at least be able to figure out whether > > setting coalescing to 0 will have nasty side effects. > > > So, how can I proceed from here? > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub > suggested in prev. versions)? > It makes sense, since the number of TX notifications can be reduced by setting > tx_usecs/tx_max_packets with ethtool. Hmm Jason had some ideas about extensions in mind when he coded the current UAPI, let's see if he has ideas. I'll ruminate on compatibility a bit too. > > It's also a bit of a spec defect that it does not document corner cases > > like what do 0 values do, are they different from 1? or what are max values. > > Not too late to fix? > > > I think that some of the corner cases can be understood from the coalescing > values. > For example: > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a > notification immediately. > But if rx_usecs=1 we should wait for 1 usec. > The case with max_packets is a little bit unclear for the values 0/1, and it > seems that in both cases we should send a notification immediately after > receiving/sending a packet. > > > > So the spec says > > Device supports notifications coalescing. > > > > which makes more sense - there's not a lot guest needs to do here. > > > Noted. > > > parameters? > > > I'll change it to "settings". > > > why with dash here? And why not just put the comments near the fields > > themselves? > > > Noted.
On Wed, Jul 20, 2022 at 3:15 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote: > > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx > > napi on/off. > > > However I am not sure we should treat any value != 1 as napi on. > > > > > > I don't really have good ideas - I think abusing coalescing might > > > have been a mistake. But now that we are there, I feel we need > > > a way for userspace to at least be able to figure out whether > > > setting coalescing to 0 will have nasty side effects. > > > > > > So, how can I proceed from here? > > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub > > suggested in prev. versions)? > > It makes sense, since the number of TX notifications can be reduced by setting > > tx_usecs/tx_max_packets with ethtool. > > > Hmm Jason had some ideas about extensions in mind when he > coded the current UAPI, let's see if he has ideas. > I'll ruminate on compatibility a bit too. I might be wrong, but using 1 to enable tx napi is a way to try to be compatible with the interrupt coalescing. That is, without notification coalescing, if 1 is set, we enable tx notifications (and NAPI) for each packet. This works as if tx-max-coalesced-frames is set to 1 when notification coalescing is negotiated. Thanks > > > > It's also a bit of a spec defect that it does not document corner cases > > > like what do 0 values do, are they different from 1? or what are max values. > > > Not too late to fix? > > > > > > I think that some of the corner cases can be understood from the coalescing > > values. > > For example: > > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a > > notification immediately. > > But if rx_usecs=1 we should wait for 1 usec. > > The case with max_packets is a little bit unclear for the values 0/1, and it > > seems that in both cases we should send a notification immediately after > > receiving/sending a packet. > > > > > > > So the spec says > > > Device supports notifications coalescing. > > > > > > which makes more sense - there's not a lot guest needs to do here. > > > > > > Noted. > > > > > parameters? > > > > > > I'll change it to "settings". > > > > > why with dash here? And why not just put the comments near the fields > > > themselves? > > > > > > Noted. >
On Wed, Jul 20, 2022 at 3:42 PM Jason Wang <jasowang@redhat.com> wrote: > > On Wed, Jul 20, 2022 at 3:15 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote: > > > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx > > > napi on/off. > > > > However I am not sure we should treat any value != 1 as napi on. > > > > > > > > I don't really have good ideas - I think abusing coalescing might > > > > have been a mistake. But now that we are there, I feel we need > > > > a way for userspace to at least be able to figure out whether > > > > setting coalescing to 0 will have nasty side effects. > > > > > > > > > So, how can I proceed from here? > > > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub > > > suggested in prev. versions)? > > > It makes sense, since the number of TX notifications can be reduced by setting > > > tx_usecs/tx_max_packets with ethtool. > > > > > > Hmm Jason had some ideas about extensions in mind when he > > coded the current UAPI, let's see if he has ideas. > > I'll ruminate on compatibility a bit too. > > I might be wrong, but using 1 to enable tx napi is a way to try to be > compatible with the interrupt coalescing. > > That is, without notification coalescing, if 1 is set, we enable tx > notifications (and NAPI) for each packet. This works as if > tx-max-coalesced-frames is set to 1 when notification coalescing is > negotiated. > > Thanks > > > > > > > It's also a bit of a spec defect that it does not document corner cases > > > > like what do 0 values do, are they different from 1? or what are max values. > > > > Not too late to fix? > > > > > > > > > I think that some of the corner cases can be understood from the coalescing > > > values. > > > For example: > > > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a > > > notification immediately. > > > But if rx_usecs=1 we should wait for 1 usec. > > > The case with max_packets is a little bit unclear for the values 0/1, and it > > > seems that in both cases we should send a notification immediately after > > > receiving/sending a packet. Then we probably need to document this in the spec. And we need an upper limit for those values, this helps for e.g migration compatibility. Thanks > > > > > > > > > > So the spec says > > > > Device supports notifications coalescing. > > > > > > > > which makes more sense - there's not a lot guest needs to do here. > > > > > > > > > Noted. > > > > > > > parameters? > > > > > > > > > I'll change it to "settings". > > > > > > > why with dash here? And why not just put the comments near the fields > > > > themselves? > > > > > > > > > Noted. > >
> And we need an upper limit for those values, this helps for e.g > migration compatibility. Why not let the device decide the upper limit, making it device specific? As written in the spec., a device can answer to the coalescing command with VIRTIO_NET_ERR, If it was not able to set the requested settings. If a physical device uses virtio datapath, and can for example coalesce notifications up to 500us, why should we limit it with a lower number?
On Wed, Jul 20, 2022 at 3:57 PM Alvaro Karsz <alvaro.karsz@solid-run.com> wrote: > > > And we need an upper limit for those values, this helps for e.g > > migration compatibility. > > Why not let the device decide the upper limit, making it device specific? For example we may want to migrate VM from src to dst. In src, we set tx-max-coalesced-frames to 512 but the dst only supports 256. > As written in the spec., a device can answer to the coalescing command > with VIRTIO_NET_ERR, > If it was not able to set the requested settings. > > If a physical device uses virtio datapath, and can for example > coalesce notifications up to 500us, why should we limit it with a > lower number? > See above, for migration compatibility. Thanks
On Wed, Jul 20, 2022 at 2:28 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote: > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > > > Control a Virtio network device notifications coalescing parameters > > using the control virtqueue. > > > > A device that supports this fetature can receive > > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > > Ask the network device to change the following parameters: > > - tx_usecs: Maximum number of usecs to delay a TX notification. > > - tx_max_packets: Maximum number of packets to send before a > > TX notification. > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > > Ask the network device to change the following parameters: > > - rx_usecs: Maximum number of usecs to delay a RX notification. > > - rx_max_packets: Maximum number of packets to receive before a > > RX notification. > > > > VirtIO spec. patch: > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > > --- > > v2: > > - Fix type assignments warnings found with sparse. > > - Fix a few typos. > > > > v3: > > - Change the coalescing parameters in a dedicated function. > > - Return -EBUSY from the set coalescing function when the device's > > link is up, even if the notifications coalescing feature is negotiated. > > > > v4: > > - If link is up and we need to update NAPI weight, return -EBUSY before > > sending the coalescing commands to the device > > Thanks! some comments below > > > --- > > drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- > > include/uapi/linux/virtio_net.h | 34 +++++++++- > > 2 files changed, 129 insertions(+), 16 deletions(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index 356cf8dd416..4fde66bd511 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -261,6 +261,12 @@ struct virtnet_info { > > u8 duplex; > > u32 speed; > > > > + /* Interrupt coalescing settings */ > > + u32 tx_usecs; > > + u32 rx_usecs; > > + u32 tx_max_packets; > > + u32 rx_max_packets; > > + > > unsigned long guest_offloads; > > unsigned long guest_offloads_capable; > > > > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > > return 0; > > } > > > > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, > > + struct ethtool_coalesce *ec) > > +{ > > + struct scatterlist sgs_tx, sgs_rx; > > + struct virtio_net_ctrl_coal_tx coal_tx; > > + struct virtio_net_ctrl_coal_rx coal_rx; > > + > > + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); > > + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); > > + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); > > + > > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > > + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, > > + &sgs_tx)) > > + return -EINVAL; > > + > > + /* Save parameters */ > > + vi->tx_usecs = ec->tx_coalesce_usecs; > > + vi->tx_max_packets = ec->tx_max_coalesced_frames; > > + > > + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); > > + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); > > + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); > > + > > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > > + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, > > + &sgs_rx)) > > + return -EINVAL; > > + > > + /* Save parameters */ > > + vi->rx_usecs = ec->rx_coalesce_usecs; > > + vi->rx_max_packets = ec->rx_max_coalesced_frames; > > + > > + return 0; > > +} > > + > > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) > > +{ > > + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL > > + * feature is negotiated. > > + */ > > + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) > > + return -EOPNOTSUPP; > > + > > + if (ec->tx_max_coalesced_frames > 1 || > > + ec->rx_max_coalesced_frames != 1) > > + return -EINVAL; > > + > > + return 0; > > +} > > + > > static int virtnet_set_coalesce(struct net_device *dev, > > struct ethtool_coalesce *ec, > > struct kernel_ethtool_coalesce *kernel_coal, > > struct netlink_ext_ack *extack) > > { > > struct virtnet_info *vi = netdev_priv(dev); > > - int i, napi_weight; > > - > > - if (ec->tx_max_coalesced_frames > 1 || > > - ec->rx_max_coalesced_frames != 1) > > - return -EINVAL; > > + int ret, i, napi_weight; > > + bool update_napi = false; > > > > + /* Can't change NAPI weight if the link is up */ > > napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx napi on/off. > However I am not sure we should treat any value != 1 as napi on. > > I don't really have good ideas - I think abusing coalescing might > have been a mistake. But now that we are there, I feel we need > a way for userspace to at least be able to figure out whether > setting coalescing to 0 will have nasty side effects. > > For example, here's a problem: > > - according to spec, all values are reset to 0 > - userspace reads coalescing values and gets 0 > > Does this mean napi is off? > > And now that we support colescing, I wonder how is user going to control napi. I think we probably don't want to let users know about NAPI. And actually, tx NAPI is a byproduct of the tx notification. Historically, we don't use tx interrupts but skb_orphan() unless the queue is about to be full. But it tends to cause a lot of side effects. Then we try to enable tx interrupt with tx NAPI, but in order to not have performance regression, we enable it via ethtool (tx-max-coalesce-frames). Setting 1 means the driver wants to be notified for each sent packet, tx NAPI is a must in this case. Note that the tx notification (NAPI) has been enabled by default for years. If you have concern, I wonder if it's the time to drop skb_orphan() completely from virtio-net driver. Then we will be fine? Thanks > > It's also a bit of a spec defect that it does not document corner cases > like what do 0 values do, are they different from 1? or what are max values. > Not too late to fix? > > > > if (napi_weight ^ vi->sq[0].napi.weight) { > > if (dev->flags & IFF_UP) > > return -EBUSY; > > + else > > + update_napi = true; > > + } > > + > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) > > + ret = virtnet_send_notf_coal_cmds(vi, ec); > > + else > > + ret = virtnet_coal_params_supported(ec); > > + > > + if (ret) > > + return ret; > > + > > + if (update_napi) { > > for (i = 0; i < vi->max_queue_pairs; i++) > > vi->sq[i].napi.weight = napi_weight; > > } > > > > - return 0; > > + return ret; > > > > } > > > > static int virtnet_get_coalesce(struct net_device *dev, > > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, > > struct kernel_ethtool_coalesce *kernel_coal, > > struct netlink_ext_ack *extack) > > { > > - struct ethtool_coalesce ec_default = { > > - .cmd = ETHTOOL_GCOALESCE, > > - .rx_max_coalesced_frames = 1, > > - }; > > struct virtnet_info *vi = netdev_priv(dev); > > > > - memcpy(ec, &ec_default, sizeof(ec_default)); > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > > + ec->rx_coalesce_usecs = vi->rx_usecs; > > + ec->tx_coalesce_usecs = vi->tx_usecs; > > + ec->tx_max_coalesced_frames = vi->tx_max_packets; > > + ec->rx_max_coalesced_frames = vi->rx_max_packets; > > + } else { > > + ec->rx_max_coalesced_frames = 1; > > > > - if (vi->sq[0].napi.weight) > > - ec->tx_max_coalesced_frames = 1; > > + if (vi->sq[0].napi.weight) > > + ec->tx_max_coalesced_frames = 1; > > + } > > > > return 0; > > } > > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) > > } > > > > static const struct ethtool_ops virtnet_ethtool_ops = { > > - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > > + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > > + ETHTOOL_COALESCE_USECS, > > .get_drvinfo = virtnet_get_drvinfo, > > .get_link = ethtool_op_get_link, > > .get_ringparam = virtnet_get_ringparam, > > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) > > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, > > "VIRTIO_NET_F_CTRL_VQ") || > > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, > > + "VIRTIO_NET_F_CTRL_VQ") || > > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, > > "VIRTIO_NET_F_CTRL_VQ"))) { > > return false; > > } > > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) > > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) > > vi->mergeable_rx_bufs = true; > > > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > > + vi->rx_usecs = 0; > > + vi->tx_usecs = 0; > > + vi->tx_max_packets = 0; > > + vi->rx_max_packets = 0; > > + } > > + > > if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) > > vi->has_rss_hash_report = true; > > > > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { > > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > > - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT > > + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL > > > > static unsigned int features[] = { > > VIRTNET_FEATURES, > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > > index 3f55a4215f1..29ced55514d 100644 > > --- a/include/uapi/linux/virtio_net.h > > +++ b/include/uapi/linux/virtio_net.h > > @@ -56,7 +56,7 @@ > > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > > * Steering */ > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > - > > +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ > > So the spec says > Device supports notifications coalescing. > > which makes more sense - there's not a lot guest needs to do here. > > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > > @@ -355,4 +355,36 @@ struct virtio_net_hash_config { > > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > > > +/* > > + * Control notifications coalescing. > > + * > > + * Request the device to change the notifications coalescing parameters. > > + * > > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. > > + */ > > +#define VIRTIO_NET_CTRL_NOTF_COAL 6 > > +/* > > + * Set the tx-usecs/tx-max-packets patameters. > > parameters? > > > + * tx-usecs - Maximum number of usecs to delay a TX notification. > > + * tx-max-packets - Maximum number of packets to send before a TX notification. > > why with dash here? And why not just put the comments near the fields > themselves? > > > + */ > > +struct virtio_net_ctrl_coal_tx { > > + __le32 tx_max_packets; > > + __le32 tx_usecs; > > +}; > > + > > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 > > + > > +/* > > + * Set the rx-usecs/rx-max-packets patameters. > > + * rx-usecs - Maximum number of usecs to delay a RX notification. > > + * rx-max-frames - Maximum number of packets to receive before a RX notification. > > + */ > > +struct virtio_net_ctrl_coal_rx { > > + __le32 rx_max_packets; > > + __le32 rx_usecs; > > +}; > > same comments > > > + > > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 > > + > > #endif /* _UAPI_LINUX_VIRTIO_NET_H */ > > -- > > 2.32.0 >
On Wed, Jul 20, 2022 at 03:15:08PM +0800, Jason Wang wrote: > On Wed, Jul 20, 2022 at 3:05 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Wed, Jul 20, 2022 at 03:02:04PM +0800, Jason Wang wrote: > > > On Wed, Jul 20, 2022 at 2:45 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > > > On Tue, Jul 19, 2022 at 05:26:52PM -0700, Jakub Kicinski wrote: > > > > > On Mon, 18 Jul 2022 12:11:02 +0300 Alvaro Karsz wrote: > > > > > > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > > > > > > > > > > > Control a Virtio network device notifications coalescing parameters > > > > > > using the control virtqueue. > > > > > > > > > > > > A device that supports this fetature can receive > > > > > > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > > > > > > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > > > > > > Ask the network device to change the following parameters: > > > > > > - tx_usecs: Maximum number of usecs to delay a TX notification. > > > > > > - tx_max_packets: Maximum number of packets to send before a > > > > > > TX notification. > > > > > > > > > > > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > > > > > > Ask the network device to change the following parameters: > > > > > > - rx_usecs: Maximum number of usecs to delay a RX notification. > > > > > > - rx_max_packets: Maximum number of packets to receive before a > > > > > > RX notification. > > > > > > > > > > > > VirtIO spec. patch: > > > > > > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > > > > > > > > > > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > > > > > > > > > > Waiting a bit longer for Michael's ack, so in case other netdev > > > > > maintainer takes this: > > > > > > > > > > Reviewed-by: Jakub Kicinski <kuba@kernel.org> > > > > > > > > Yea was going to ack this but looking at the UAPI again we have a > > > > problem because we abused tax max frames values 0 and 1 to control napi > > > > in the past. technically does not affect legacy cards but userspace > > > > can't easily tell the difference, can it? > > > > > > The "abuse" only works for iproute2. > > > > That's kernel/userspace API. That's what this patch affects, right? > > I'm not sure I get this. > > The 1-to-enable-napi is only used between iproute2 and kernel via > ETHTOOL_A_COALESCE_TX_MAX_FRAMES not the uAPI introduced here. > So I don't see how it can conflict with the virito uAPI extension here. > > Thanks As far as I can see ETHTOOL_A_COALESCE_TX_MAX_FRAMES invokes the ops->get_coalesce and ops->set_coalesce callbacks. This patch changes their behaviour when the card has VIRTIO_NET_F_NOTF_COAL. Userspace making assumptions about what this option does will thinkably might get unexpected behaviour. So: Minimally we need a way for userspace to find out what are the semantics of the command now, so one can implement portable userspace going forward. Preferably, analysis of existing userspace, what it does and how does the change affect it should be included. Ideally, a work-around that does not affect existing userspace would be found. > > > > > > For uAPI we know it should follow > > > the spec? (anyhow NAPI is something out of the spec) > > > > > > Thanks > > > > When you say uAPI here you mean the virtio header. I am not > > worried about that just yet (maybe I should be). > > > > > > > > > > -- > > > > MST > > > > > >
On Wed, Jul 20, 2022 at 03:45:59PM +0800, Jason Wang wrote: > On Wed, Jul 20, 2022 at 3:42 PM Jason Wang <jasowang@redhat.com> wrote: > > > > On Wed, Jul 20, 2022 at 3:15 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Wed, Jul 20, 2022 at 10:07:11AM +0300, Alvaro Karsz wrote: > > > > > Hmm. we currently (ab)use tx_max_coalesced_frames values 0 and 1 to mean tx > > > > napi on/off. > > > > > However I am not sure we should treat any value != 1 as napi on. > > > > > > > > > > I don't really have good ideas - I think abusing coalescing might > > > > > have been a mistake. But now that we are there, I feel we need > > > > > a way for userspace to at least be able to figure out whether > > > > > setting coalescing to 0 will have nasty side effects. > > > > > > > > > > > > So, how can I proceed from here? > > > > Maybe we don't need to use tx napi when this feature is negotiated (like Jakub > > > > suggested in prev. versions)? > > > > It makes sense, since the number of TX notifications can be reduced by setting > > > > tx_usecs/tx_max_packets with ethtool. > > > > > > > > > Hmm Jason had some ideas about extensions in mind when he > > > coded the current UAPI, let's see if he has ideas. > > > I'll ruminate on compatibility a bit too. > > > > I might be wrong, but using 1 to enable tx napi is a way to try to be > > compatible with the interrupt coalescing. > > > > That is, without notification coalescing, if 1 is set, we enable tx > > notifications (and NAPI) for each packet. This works as if > > tx-max-coalesced-frames is set to 1 when notification coalescing is > > negotiated. > > > > Thanks > > > > > > > > > > It's also a bit of a spec defect that it does not document corner cases > > > > > like what do 0 values do, are they different from 1? or what are max values. > > > > > Not too late to fix? > > > > > > > > > > > > I think that some of the corner cases can be understood from the coalescing > > > > values. > > > > For example: > > > > if rx_usecs=0 we should wait for 0 usecs, meaning that we should send a > > > > notification immediately. > > > > But if rx_usecs=1 we should wait for 1 usec. > > > > The case with max_packets is a little bit unclear for the values 0/1, and it > > > > seems that in both cases we should send a notification immediately after > > > > receiving/sending a packet. > > Then we probably need to document this in the spec. > > And we need an upper limit for those values, this helps for e.g > migration compatibility. > > Thanks Not a bad idea generally but I suspect this is better done as part of the admin queue/migration work then. > > > > > > > > > > > > > So the spec says > > > > > Device supports notifications coalescing. > > > > > > > > > > which makes more sense - there's not a lot guest needs to do here. > > > > > > > > > > > > Noted. > > > > > > > > > parameters? > > > > > > > > > > > > I'll change it to "settings". > > > > > > > > > why with dash here? And why not just put the comments near the fields > > > > > themselves? > > > > > > > > > > > > Noted. > > >
On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote: > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > Control a Virtio network device notifications coalescing parameters > using the control virtqueue. > > A device that supports this fetature can receive > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > Ask the network device to change the following parameters: > - tx_usecs: Maximum number of usecs to delay a TX notification. > - tx_max_packets: Maximum number of packets to send before a > TX notification. > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > Ask the network device to change the following parameters: > - rx_usecs: Maximum number of usecs to delay a RX notification. > - rx_max_packets: Maximum number of packets to receive before a > RX notification. > > VirtIO spec. patch: > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > --- > v2: > - Fix type assignments warnings found with sparse. > - Fix a few typos. > > v3: > - Change the coalescing parameters in a dedicated function. > - Return -EBUSY from the set coalescing function when the device's > link is up, even if the notifications coalescing feature is negotiated. > > v4: > - If link is up and we need to update NAPI weight, return -EBUSY before > sending the coalescing commands to the device > --- > drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- > include/uapi/linux/virtio_net.h | 34 +++++++++- > 2 files changed, 129 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 356cf8dd416..4fde66bd511 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -261,6 +261,12 @@ struct virtnet_info { > u8 duplex; > u32 speed; > > + /* Interrupt coalescing settings */ > + u32 tx_usecs; > + u32 rx_usecs; > + u32 tx_max_packets; > + u32 rx_max_packets; > + > unsigned long guest_offloads; > unsigned long guest_offloads_capable; > > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, > + struct ethtool_coalesce *ec) > +{ > + struct scatterlist sgs_tx, sgs_rx; > + struct virtio_net_ctrl_coal_tx coal_tx; > + struct virtio_net_ctrl_coal_rx coal_rx; > + > + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); > + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); > + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, > + &sgs_tx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->tx_usecs = ec->tx_coalesce_usecs; > + vi->tx_max_packets = ec->tx_max_coalesced_frames; > + > + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); > + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); > + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, > + &sgs_rx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->rx_usecs = ec->rx_coalesce_usecs; > + vi->rx_max_packets = ec->rx_max_coalesced_frames; > + > + return 0; > +} > + > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) > +{ > + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL > + * feature is negotiated. > + */ > + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) > + return -EOPNOTSUPP; > + > + if (ec->tx_max_coalesced_frames > 1 || > + ec->rx_max_coalesced_frames != 1) > + return -EINVAL; > + > + return 0; > +} > + > static int virtnet_set_coalesce(struct net_device *dev, > struct ethtool_coalesce *ec, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > struct virtnet_info *vi = netdev_priv(dev); > - int i, napi_weight; > - > - if (ec->tx_max_coalesced_frames > 1 || > - ec->rx_max_coalesced_frames != 1) > - return -EINVAL; > + int ret, i, napi_weight; > + bool update_napi = false; > > + /* Can't change NAPI weight if the link is up */ > napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > if (napi_weight ^ vi->sq[0].napi.weight) { > if (dev->flags & IFF_UP) > return -EBUSY; > + else > + update_napi = true; > + } > + > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) > + ret = virtnet_send_notf_coal_cmds(vi, ec); > + else > + ret = virtnet_coal_params_supported(ec); > + > + if (ret) > + return ret; > + > + if (update_napi) { > for (i = 0; i < vi->max_queue_pairs; i++) > vi->sq[i].napi.weight = napi_weight; > } > > - return 0; > + return ret; > } > > static int virtnet_get_coalesce(struct net_device *dev, OK so I thought hard about this. At the moment the only way I see is this: for tx max coalesced, interpret tx_max_coalesced_frames == 0 as napi disable. tx_max_coalesced_frames = 0 as napi disable, tx_max_coalesced_frames != 0 as napi enable and if the feature has been negotiated additionally forward it to hardware. If not force tx_max_coalesced_frames <= 1 as we do now. Without hardware support force rx_max_coalesced_frames == 1 as we do now otherwise do not force and forward to hardware. > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > - struct ethtool_coalesce ec_default = { > - .cmd = ETHTOOL_GCOALESCE, > - .rx_max_coalesced_frames = 1, > - }; > struct virtnet_info *vi = netdev_priv(dev); > > - memcpy(ec, &ec_default, sizeof(ec_default)); > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + ec->rx_coalesce_usecs = vi->rx_usecs; > + ec->tx_coalesce_usecs = vi->tx_usecs; > + ec->tx_max_coalesced_frames = vi->tx_max_packets; > + ec->rx_max_coalesced_frames = vi->rx_max_packets; > + } else { > + ec->rx_max_coalesced_frames = 1; > > - if (vi->sq[0].napi.weight) > - ec->tx_max_coalesced_frames = 1; > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + } > > return 0; > } > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) > } > > static const struct ethtool_ops virtnet_ethtool_ops = { > - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > + ETHTOOL_COALESCE_USECS, > .get_drvinfo = virtnet_get_drvinfo, > .get_link = ethtool_op_get_link, > .get_ringparam = virtnet_get_ringparam, > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, > "VIRTIO_NET_F_CTRL_VQ") || > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, > + "VIRTIO_NET_F_CTRL_VQ") || > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, > "VIRTIO_NET_F_CTRL_VQ"))) { > return false; > } > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) > vi->mergeable_rx_bufs = true; > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + vi->rx_usecs = 0; > + vi->tx_usecs = 0; > + vi->tx_max_packets = 0; > + vi->rx_max_packets = 0; > + } > + > if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) > vi->has_rss_hash_report = true; > > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT > + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index 3f55a4215f1..29ced55514d 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -56,7 +56,7 @@ > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > - > +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > @@ -355,4 +355,36 @@ struct virtio_net_hash_config { > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > +/* > + * Control notifications coalescing. > + * > + * Request the device to change the notifications coalescing parameters. > + * > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. > + */ > +#define VIRTIO_NET_CTRL_NOTF_COAL 6 > +/* > + * Set the tx-usecs/tx-max-packets patameters. > + * tx-usecs - Maximum number of usecs to delay a TX notification. > + * tx-max-packets - Maximum number of packets to send before a TX notification. > + */ > +struct virtio_net_ctrl_coal_tx { > + __le32 tx_max_packets; > + __le32 tx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 > + > +/* > + * Set the rx-usecs/rx-max-packets patameters. > + * rx-usecs - Maximum number of usecs to delay a RX notification. > + * rx-max-frames - Maximum number of packets to receive before a RX notification. > + */ > +struct virtio_net_ctrl_coal_rx { > + __le32 rx_max_packets; > + __le32 rx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 > + > #endif /* _UAPI_LINUX_VIRTIO_NET_H */ > -- > 2.32.0
On Mon, Jul 18, 2022 at 12:11:02PM +0300, Alvaro Karsz wrote: > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > Control a Virtio network device notifications coalescing parameters > using the control virtqueue. > > A device that supports this fetature can receive > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > Ask the network device to change the following parameters: > - tx_usecs: Maximum number of usecs to delay a TX notification. > - tx_max_packets: Maximum number of packets to send before a > TX notification. > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > Ask the network device to change the following parameters: > - rx_usecs: Maximum number of usecs to delay a RX notification. > - rx_max_packets: Maximum number of packets to receive before a > RX notification. > > VirtIO spec. patch: > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> > --- > v2: > - Fix type assignments warnings found with sparse. > - Fix a few typos. > > v3: > - Change the coalescing parameters in a dedicated function. > - Return -EBUSY from the set coalescing function when the device's > link is up, even if the notifications coalescing feature is negotiated. > > v4: > - If link is up and we need to update NAPI weight, return -EBUSY before > sending the coalescing commands to the device > --- So given this got a bunch of acks I picked this up as is, but let's address Jason's comments for wording in comments as a follow up patch please. > drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- > include/uapi/linux/virtio_net.h | 34 +++++++++- > 2 files changed, 129 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 356cf8dd416..4fde66bd511 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -261,6 +261,12 @@ struct virtnet_info { > u8 duplex; > u32 speed; > > + /* Interrupt coalescing settings */ > + u32 tx_usecs; > + u32 rx_usecs; > + u32 tx_max_packets; > + u32 rx_max_packets; > + > unsigned long guest_offloads; > unsigned long guest_offloads_capable; > > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, > + struct ethtool_coalesce *ec) > +{ > + struct scatterlist sgs_tx, sgs_rx; > + struct virtio_net_ctrl_coal_tx coal_tx; > + struct virtio_net_ctrl_coal_rx coal_rx; > + > + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); > + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); > + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, > + &sgs_tx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->tx_usecs = ec->tx_coalesce_usecs; > + vi->tx_max_packets = ec->tx_max_coalesced_frames; > + > + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); > + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); > + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, > + &sgs_rx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->rx_usecs = ec->rx_coalesce_usecs; > + vi->rx_max_packets = ec->rx_max_coalesced_frames; > + > + return 0; > +} > + > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) > +{ > + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL > + * feature is negotiated. > + */ > + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) > + return -EOPNOTSUPP; > + > + if (ec->tx_max_coalesced_frames > 1 || > + ec->rx_max_coalesced_frames != 1) > + return -EINVAL; > + > + return 0; > +} > + > static int virtnet_set_coalesce(struct net_device *dev, > struct ethtool_coalesce *ec, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > struct virtnet_info *vi = netdev_priv(dev); > - int i, napi_weight; > - > - if (ec->tx_max_coalesced_frames > 1 || > - ec->rx_max_coalesced_frames != 1) > - return -EINVAL; > + int ret, i, napi_weight; > + bool update_napi = false; > > + /* Can't change NAPI weight if the link is up */ > napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > if (napi_weight ^ vi->sq[0].napi.weight) { > if (dev->flags & IFF_UP) > return -EBUSY; > + else > + update_napi = true; > + } > + > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) > + ret = virtnet_send_notf_coal_cmds(vi, ec); > + else > + ret = virtnet_coal_params_supported(ec); > + > + if (ret) > + return ret; > + > + if (update_napi) { > for (i = 0; i < vi->max_queue_pairs; i++) > vi->sq[i].napi.weight = napi_weight; > } > > - return 0; > + return ret; > } > > static int virtnet_get_coalesce(struct net_device *dev, > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > - struct ethtool_coalesce ec_default = { > - .cmd = ETHTOOL_GCOALESCE, > - .rx_max_coalesced_frames = 1, > - }; > struct virtnet_info *vi = netdev_priv(dev); > > - memcpy(ec, &ec_default, sizeof(ec_default)); > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + ec->rx_coalesce_usecs = vi->rx_usecs; > + ec->tx_coalesce_usecs = vi->tx_usecs; > + ec->tx_max_coalesced_frames = vi->tx_max_packets; > + ec->rx_max_coalesced_frames = vi->rx_max_packets; > + } else { > + ec->rx_max_coalesced_frames = 1; > > - if (vi->sq[0].napi.weight) > - ec->tx_max_coalesced_frames = 1; > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + } > > return 0; > } > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) > } > > static const struct ethtool_ops virtnet_ethtool_ops = { > - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > + ETHTOOL_COALESCE_USECS, > .get_drvinfo = virtnet_get_drvinfo, > .get_link = ethtool_op_get_link, > .get_ringparam = virtnet_get_ringparam, > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, > "VIRTIO_NET_F_CTRL_VQ") || > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, > + "VIRTIO_NET_F_CTRL_VQ") || > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, > "VIRTIO_NET_F_CTRL_VQ"))) { > return false; > } > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) > vi->mergeable_rx_bufs = true; > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + vi->rx_usecs = 0; > + vi->tx_usecs = 0; > + vi->tx_max_packets = 0; > + vi->rx_max_packets = 0; > + } > + > if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) > vi->has_rss_hash_report = true; > > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT > + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index 3f55a4215f1..29ced55514d 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -56,7 +56,7 @@ > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > - > +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > @@ -355,4 +355,36 @@ struct virtio_net_hash_config { > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > +/* > + * Control notifications coalescing. > + * > + * Request the device to change the notifications coalescing parameters. > + * > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. > + */ > +#define VIRTIO_NET_CTRL_NOTF_COAL 6 > +/* > + * Set the tx-usecs/tx-max-packets patameters. > + * tx-usecs - Maximum number of usecs to delay a TX notification. > + * tx-max-packets - Maximum number of packets to send before a TX notification. > + */ > +struct virtio_net_ctrl_coal_tx { > + __le32 tx_max_packets; > + __le32 tx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 > + > +/* > + * Set the rx-usecs/rx-max-packets patameters. > + * rx-usecs - Maximum number of usecs to delay a RX notification. > + * rx-max-frames - Maximum number of packets to receive before a RX notification. > + */ > +struct virtio_net_ctrl_coal_rx { > + __le32 rx_max_packets; > + __le32 rx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 > + > #endif /* _UAPI_LINUX_VIRTIO_NET_H */ > -- > 2.32.0
On Mon, 18 Jul 2022 12:11:02 +0300, Alvaro Karsz <alvaro.karsz@solid-run.com> wrote: > New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. > > Control a Virtio network device notifications coalescing parameters > using the control virtqueue. > > A device that supports this fetature can receive > VIRTIO_NET_CTRL_NOTF_COAL control commands. > > - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: > Ask the network device to change the following parameters: > - tx_usecs: Maximum number of usecs to delay a TX notification. > - tx_max_packets: Maximum number of packets to send before a > TX notification. > > - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: > Ask the network device to change the following parameters: > - rx_usecs: Maximum number of usecs to delay a RX notification. > - rx_max_packets: Maximum number of packets to receive before a > RX notification. > > VirtIO spec. patch: > https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html > > Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> We are very interested in this, and I would like to know what other plans you have in the future? Such as qemu, vhost-uer, vhost-net. And further development work in the kernel. Thanks. > --- > v2: > - Fix type assignments warnings found with sparse. > - Fix a few typos. > > v3: > - Change the coalescing parameters in a dedicated function. > - Return -EBUSY from the set coalescing function when the device's > link is up, even if the notifications coalescing feature is negotiated. > > v4: > - If link is up and we need to update NAPI weight, return -EBUSY before > sending the coalescing commands to the device > --- > drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- > include/uapi/linux/virtio_net.h | 34 +++++++++- > 2 files changed, 129 insertions(+), 16 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 356cf8dd416..4fde66bd511 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -261,6 +261,12 @@ struct virtnet_info { > u8 duplex; > u32 speed; > > + /* Interrupt coalescing settings */ > + u32 tx_usecs; > + u32 rx_usecs; > + u32 tx_max_packets; > + u32 rx_max_packets; > + > unsigned long guest_offloads; > unsigned long guest_offloads_capable; > > @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, > return 0; > } > > +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, > + struct ethtool_coalesce *ec) > +{ > + struct scatterlist sgs_tx, sgs_rx; > + struct virtio_net_ctrl_coal_tx coal_tx; > + struct virtio_net_ctrl_coal_rx coal_rx; > + > + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); > + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); > + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, > + &sgs_tx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->tx_usecs = ec->tx_coalesce_usecs; > + vi->tx_max_packets = ec->tx_max_coalesced_frames; > + > + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); > + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); > + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); > + > + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, > + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, > + &sgs_rx)) > + return -EINVAL; > + > + /* Save parameters */ > + vi->rx_usecs = ec->rx_coalesce_usecs; > + vi->rx_max_packets = ec->rx_max_coalesced_frames; > + > + return 0; > +} > + > +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) > +{ > + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL > + * feature is negotiated. > + */ > + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) > + return -EOPNOTSUPP; > + > + if (ec->tx_max_coalesced_frames > 1 || > + ec->rx_max_coalesced_frames != 1) > + return -EINVAL; > + > + return 0; > +} > + > static int virtnet_set_coalesce(struct net_device *dev, > struct ethtool_coalesce *ec, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > struct virtnet_info *vi = netdev_priv(dev); > - int i, napi_weight; > - > - if (ec->tx_max_coalesced_frames > 1 || > - ec->rx_max_coalesced_frames != 1) > - return -EINVAL; > + int ret, i, napi_weight; > + bool update_napi = false; > > + /* Can't change NAPI weight if the link is up */ > napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; > if (napi_weight ^ vi->sq[0].napi.weight) { > if (dev->flags & IFF_UP) > return -EBUSY; > + else > + update_napi = true; > + } > + > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) > + ret = virtnet_send_notf_coal_cmds(vi, ec); > + else > + ret = virtnet_coal_params_supported(ec); > + > + if (ret) > + return ret; > + > + if (update_napi) { > for (i = 0; i < vi->max_queue_pairs; i++) > vi->sq[i].napi.weight = napi_weight; > } > > - return 0; > + return ret; > } > > static int virtnet_get_coalesce(struct net_device *dev, > @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, > struct kernel_ethtool_coalesce *kernel_coal, > struct netlink_ext_ack *extack) > { > - struct ethtool_coalesce ec_default = { > - .cmd = ETHTOOL_GCOALESCE, > - .rx_max_coalesced_frames = 1, > - }; > struct virtnet_info *vi = netdev_priv(dev); > > - memcpy(ec, &ec_default, sizeof(ec_default)); > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + ec->rx_coalesce_usecs = vi->rx_usecs; > + ec->tx_coalesce_usecs = vi->tx_usecs; > + ec->tx_max_coalesced_frames = vi->tx_max_packets; > + ec->rx_max_coalesced_frames = vi->rx_max_packets; > + } else { > + ec->rx_max_coalesced_frames = 1; > > - if (vi->sq[0].napi.weight) > - ec->tx_max_coalesced_frames = 1; > + if (vi->sq[0].napi.weight) > + ec->tx_max_coalesced_frames = 1; > + } > > return 0; > } > @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) > } > > static const struct ethtool_ops virtnet_ethtool_ops = { > - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, > + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | > + ETHTOOL_COALESCE_USECS, > .get_drvinfo = virtnet_get_drvinfo, > .get_link = ethtool_op_get_link, > .get_ringparam = virtnet_get_ringparam, > @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, > "VIRTIO_NET_F_CTRL_VQ") || > VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, > + "VIRTIO_NET_F_CTRL_VQ") || > + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, > "VIRTIO_NET_F_CTRL_VQ"))) { > return false; > } > @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) > if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) > vi->mergeable_rx_bufs = true; > > + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { > + vi->rx_usecs = 0; > + vi->tx_usecs = 0; > + vi->tx_max_packets = 0; > + vi->rx_max_packets = 0; > + } > + > if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) > vi->has_rss_hash_report = true; > > @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT > + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index 3f55a4215f1..29ced55514d 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -56,7 +56,7 @@ > #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > - > +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ > @@ -355,4 +355,36 @@ struct virtio_net_hash_config { > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 > #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > +/* > + * Control notifications coalescing. > + * > + * Request the device to change the notifications coalescing parameters. > + * > + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. > + */ > +#define VIRTIO_NET_CTRL_NOTF_COAL 6 > +/* > + * Set the tx-usecs/tx-max-packets patameters. > + * tx-usecs - Maximum number of usecs to delay a TX notification. > + * tx-max-packets - Maximum number of packets to send before a TX notification. > + */ > +struct virtio_net_ctrl_coal_tx { > + __le32 tx_max_packets; > + __le32 tx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 > + > +/* > + * Set the rx-usecs/rx-max-packets patameters. > + * rx-usecs - Maximum number of usecs to delay a RX notification. > + * rx-max-frames - Maximum number of packets to receive before a RX notification. > + */ > +struct virtio_net_ctrl_coal_rx { > + __le32 rx_max_packets; > + __le32 rx_usecs; > +}; > + > +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 > + > #endif /* _UAPI_LINUX_VIRTIO_NET_H */ > -- > 2.32.0
> We are very interested in this, and I would like to know what other plans you > have in the future? Such as qemu, vhost-uer, vhost-net. And further development > work in the kernel. Hi Xuan, I'm actually working on a VirtIO compatible DPU, no virtualization at all, so I wasn't planning on implementing it in qemu or vhost. I have some more development plans for the VirtIO spec and for the linux kernel in virtio-net (and maybe virtio-blk). Adding a timeout to the control vq is one example (needed with physical VirtIO devices). I would of course help to implement the notifications coalescing feature in qemu/vhost if you need. Alvaro
On Mon, 29 Aug 2022 12:14:54 +0300, Alvaro Karsz <alvaro.karsz@solid-run.com> wrote: > > We are very interested in this, and I would like to know what other plans you > > have in the future? Such as qemu, vhost-uer, vhost-net. And further development > > work in the kernel. > > Hi Xuan, > I'm actually working on a VirtIO compatible DPU, no virtualization at > all, so I wasn't planning on implementing it in qemu or vhost. > > I have some more development plans for the VirtIO spec and for the > linux kernel in virtio-net (and maybe virtio-blk). > Adding a timeout to the control vq is one example (needed with > physical VirtIO devices). > > I would of course help to implement the notifications coalescing > feature in qemu/vhost if you need. We'll do some implementation of net dim based on your previous work. In the meantime, if you don't implement coalescing on backends like qemu, we might do some work on that. Thanks. > > Alvaro
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 356cf8dd416..4fde66bd511 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -261,6 +261,12 @@ struct virtnet_info { u8 duplex; u32 speed; + /* Interrupt coalescing settings */ + u32 tx_usecs; + u32 rx_usecs; + u32 tx_max_packets; + u32 rx_max_packets; + unsigned long guest_offloads; unsigned long guest_offloads_capable; @@ -2587,27 +2593,89 @@ static int virtnet_get_link_ksettings(struct net_device *dev, return 0; } +static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi, + struct ethtool_coalesce *ec) +{ + struct scatterlist sgs_tx, sgs_rx; + struct virtio_net_ctrl_coal_tx coal_tx; + struct virtio_net_ctrl_coal_rx coal_rx; + + coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs); + coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames); + sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_TX_SET, + &sgs_tx)) + return -EINVAL; + + /* Save parameters */ + vi->tx_usecs = ec->tx_coalesce_usecs; + vi->tx_max_packets = ec->tx_max_coalesced_frames; + + coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs); + coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames); + sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL, + VIRTIO_NET_CTRL_NOTF_COAL_RX_SET, + &sgs_rx)) + return -EINVAL; + + /* Save parameters */ + vi->rx_usecs = ec->rx_coalesce_usecs; + vi->rx_max_packets = ec->rx_max_coalesced_frames; + + return 0; +} + +static int virtnet_coal_params_supported(struct ethtool_coalesce *ec) +{ + /* usecs coalescing is supported only if VIRTIO_NET_F_NOTF_COAL + * feature is negotiated. + */ + if (ec->rx_coalesce_usecs || ec->tx_coalesce_usecs) + return -EOPNOTSUPP; + + if (ec->tx_max_coalesced_frames > 1 || + ec->rx_max_coalesced_frames != 1) + return -EINVAL; + + return 0; +} + static int virtnet_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { struct virtnet_info *vi = netdev_priv(dev); - int i, napi_weight; - - if (ec->tx_max_coalesced_frames > 1 || - ec->rx_max_coalesced_frames != 1) - return -EINVAL; + int ret, i, napi_weight; + bool update_napi = false; + /* Can't change NAPI weight if the link is up */ napi_weight = ec->tx_max_coalesced_frames ? NAPI_POLL_WEIGHT : 0; if (napi_weight ^ vi->sq[0].napi.weight) { if (dev->flags & IFF_UP) return -EBUSY; + else + update_napi = true; + } + + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) + ret = virtnet_send_notf_coal_cmds(vi, ec); + else + ret = virtnet_coal_params_supported(ec); + + if (ret) + return ret; + + if (update_napi) { for (i = 0; i < vi->max_queue_pairs; i++) vi->sq[i].napi.weight = napi_weight; } - return 0; + return ret; } static int virtnet_get_coalesce(struct net_device *dev, @@ -2615,16 +2683,19 @@ static int virtnet_get_coalesce(struct net_device *dev, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack) { - struct ethtool_coalesce ec_default = { - .cmd = ETHTOOL_GCOALESCE, - .rx_max_coalesced_frames = 1, - }; struct virtnet_info *vi = netdev_priv(dev); - memcpy(ec, &ec_default, sizeof(ec_default)); + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + ec->rx_coalesce_usecs = vi->rx_usecs; + ec->tx_coalesce_usecs = vi->tx_usecs; + ec->tx_max_coalesced_frames = vi->tx_max_packets; + ec->rx_max_coalesced_frames = vi->rx_max_packets; + } else { + ec->rx_max_coalesced_frames = 1; - if (vi->sq[0].napi.weight) - ec->tx_max_coalesced_frames = 1; + if (vi->sq[0].napi.weight) + ec->tx_max_coalesced_frames = 1; + } return 0; } @@ -2743,7 +2814,8 @@ static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) } static const struct ethtool_ops virtnet_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, + .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES | + ETHTOOL_COALESCE_USECS, .get_drvinfo = virtnet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = virtnet_get_ringparam, @@ -3411,6 +3483,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_NOTF_COAL, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } @@ -3546,6 +3620,13 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_NOTF_COAL)) { + vi->rx_usecs = 0; + vi->tx_usecs = 0; + vi->tx_max_packets = 0; + vi->rx_max_packets = 0; + } + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) vi->has_rss_hash_report = true; @@ -3780,7 +3861,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 3f55a4215f1..29ced55514d 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -56,7 +56,7 @@ #define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ - +#define VIRTIO_NET_F_NOTF_COAL 53 /* Guest can handle notifications coalescing */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ #define VIRTIO_NET_F_RSC_EXT 61 /* extended coalescing info */ @@ -355,4 +355,36 @@ struct virtio_net_hash_config { #define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 #define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 +/* + * Control notifications coalescing. + * + * Request the device to change the notifications coalescing parameters. + * + * Available with the VIRTIO_NET_F_NOTF_COAL feature bit. + */ +#define VIRTIO_NET_CTRL_NOTF_COAL 6 +/* + * Set the tx-usecs/tx-max-packets patameters. + * tx-usecs - Maximum number of usecs to delay a TX notification. + * tx-max-packets - Maximum number of packets to send before a TX notification. + */ +struct virtio_net_ctrl_coal_tx { + __le32 tx_max_packets; + __le32 tx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_TX_SET 0 + +/* + * Set the rx-usecs/rx-max-packets patameters. + * rx-usecs - Maximum number of usecs to delay a RX notification. + * rx-max-frames - Maximum number of packets to receive before a RX notification. + */ +struct virtio_net_ctrl_coal_rx { + __le32 rx_max_packets; + __le32 rx_usecs; +}; + +#define VIRTIO_NET_CTRL_NOTF_COAL_RX_SET 1 + #endif /* _UAPI_LINUX_VIRTIO_NET_H */
New VirtIO network feature: VIRTIO_NET_F_NOTF_COAL. Control a Virtio network device notifications coalescing parameters using the control virtqueue. A device that supports this fetature can receive VIRTIO_NET_CTRL_NOTF_COAL control commands. - VIRTIO_NET_CTRL_NOTF_COAL_TX_SET: Ask the network device to change the following parameters: - tx_usecs: Maximum number of usecs to delay a TX notification. - tx_max_packets: Maximum number of packets to send before a TX notification. - VIRTIO_NET_CTRL_NOTF_COAL_RX_SET: Ask the network device to change the following parameters: - rx_usecs: Maximum number of usecs to delay a RX notification. - rx_max_packets: Maximum number of packets to receive before a RX notification. VirtIO spec. patch: https://lists.oasis-open.org/archives/virtio-comment/202206/msg00100.html Signed-off-by: Alvaro Karsz <alvaro.karsz@solid-run.com> --- v2: - Fix type assignments warnings found with sparse. - Fix a few typos. v3: - Change the coalescing parameters in a dedicated function. - Return -EBUSY from the set coalescing function when the device's link is up, even if the notifications coalescing feature is negotiated. v4: - If link is up and we need to update NAPI weight, return -EBUSY before sending the coalescing commands to the device --- drivers/net/virtio_net.c | 111 +++++++++++++++++++++++++++----- include/uapi/linux/virtio_net.h | 34 +++++++++- 2 files changed, 129 insertions(+), 16 deletions(-) -- 2.32.0