Message ID | 20221110212212.96825-2-nbd@nbd.name (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | mtk_eth_soc rx vlan offload improvement + dsa hardware untag support | expand |
On Thu, Nov 10, 2022 at 10:22:08PM +0100, Felix Fietkau wrote: > If a metadata dst is present with the type METADATA_HW_PORT_MUX on a dsa cpu > port netdev, assume that it carries the port number and that there is no DSA > tag present in the skb data. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- Doesn't look bad to me, but... > net/core/flow_dissector.c | 4 +++- > net/dsa/dsa.c | 19 ++++++++++++++++++- > 2 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c > index 25cd35f5922e..1f476abc25e1 100644 > --- a/net/core/flow_dissector.c > +++ b/net/core/flow_dissector.c > @@ -972,11 +972,13 @@ bool __skb_flow_dissect(const struct net *net, > if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && > proto == htons(ETH_P_XDSA))) { > const struct dsa_device_ops *ops; > + struct metadata_dst *md_dst = skb_metadata_dst(skb); > int offset = 0; > > ops = skb->dev->dsa_ptr->tag_ops; > /* Only DSA header taggers break flow dissection */ > - if (ops->needed_headroom) { > + if (ops->needed_headroom && > + (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) { > if (ops->flow_dissect) > ops->flow_dissect(skb, &proto, &offset); > else > diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c > index 64b14f655b23..0b67622cf905 100644 > --- a/net/dsa/dsa.c > +++ b/net/dsa/dsa.c > @@ -11,6 +11,7 @@ > #include <linux/netdevice.h> > #include <linux/sysfs.h> > #include <linux/ptp_classify.h> > +#include <net/dst_metadata.h> > > #include "dsa_priv.h" > > @@ -216,6 +217,7 @@ static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, > static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, > struct packet_type *pt, struct net_device *unused) > { > + struct metadata_dst *md_dst = skb_metadata_dst(skb); > struct dsa_port *cpu_dp = dev->dsa_ptr; > struct sk_buff *nskb = NULL; > struct dsa_slave_priv *p; > @@ -229,7 +231,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, > if (!skb) > return 0; > > - nskb = cpu_dp->rcv(skb, dev); > + if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { > + unsigned int port = md_dst->u.port_info.port_id; > + > + skb_dst_set(skb, NULL); If you insist on not using the refcounting feature and free your metadata_dst in the master's remove() function, that's going to invalidate absolutely any point I'm trying to make. Normally I'd leave you alone, however I really don't like that this is also forcing DSA to not use the refcount, and therefore, that it's forcing any other driver to do the same as mtk_eth_soc. Not sure how that's gonna scale in the hypothetical future when there will be a DSA master which can offload RX DSA tags, *and* the switch can change tagging protocols dynamically (which will force the master to allocate/free its metadata dst's at runtime too). I guess that will be for me to figure out, which I don't like. Jakub, what do you think? Refcounting or no refcounting? > + if (!skb_has_extensions(skb)) > + skb->slow_gro = 0; > + > + skb->dev = dsa_master_find_slave(dev, 0, port); > + if (likely(skb->dev)) { > + dsa_default_offload_fwd_mark(skb); > + nskb = skb; > + } > + } else { > + nskb = cpu_dp->rcv(skb, dev); > + } > + > if (!nskb) { > kfree_skb(skb); > return 0; > -- > 2.38.1 >
On 12.11.22 00:37, Vladimir Oltean wrote: > On Thu, Nov 10, 2022 at 10:22:08PM +0100, Felix Fietkau wrote: >> If a metadata dst is present with the type METADATA_HW_PORT_MUX on a dsa cpu >> port netdev, assume that it carries the port number and that there is no DSA >> tag present in the skb data. >> >> Signed-off-by: Felix Fietkau <nbd@nbd.name> >> else >> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c >> index 64b14f655b23..0b67622cf905 100644 >> --- a/net/dsa/dsa.c >> +++ b/net/dsa/dsa.c >> @@ -11,6 +11,7 @@ >> #include <linux/netdevice.h> >> #include <linux/sysfs.h> >> #include <linux/ptp_classify.h> >> +#include <net/dst_metadata.h> >> >> #include "dsa_priv.h" >> >> @@ -216,6 +217,7 @@ static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, >> static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, >> struct packet_type *pt, struct net_device *unused) >> { >> + struct metadata_dst *md_dst = skb_metadata_dst(skb); >> struct dsa_port *cpu_dp = dev->dsa_ptr; >> struct sk_buff *nskb = NULL; >> struct dsa_slave_priv *p; >> @@ -229,7 +231,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, >> if (!skb) >> return 0; >> >> - nskb = cpu_dp->rcv(skb, dev); >> + if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { >> + unsigned int port = md_dst->u.port_info.port_id; >> + >> + skb_dst_set(skb, NULL); > > If you insist on not using the refcounting feature and free your > metadata_dst in the master's remove() function, that's going to > invalidate absolutely any point I'm trying to make. Normally I'd leave > you alone, however I really don't like that this is also forcing DSA to > not use the refcount, and therefore, that it's forcing any other driver > to do the same as mtk_eth_soc. Not sure how that's gonna scale in the > hypothetical future when there will be a DSA master which can offload > RX DSA tags, *and* the switch can change tagging protocols dynamically > (which will force the master to allocate/free its metadata dst's at > runtime too). I guess that will be for me to figure out, which I don't > like. > > Jakub, what do you think? Refcounting or no refcounting? If think that refcounting for the metadata dst is useful in this case, I can replace the skb_dst_set call with skb_dst_drop() in v4, so it would work for both cases. - Felix
On Sat, 12 Nov 2022 01:37:14 +0200 Vladimir Oltean wrote:
> Jakub, what do you think? Refcounting or no refcounting?
I would not trust my word over Felix's.. but since you asked I'd vote
for refcounted. There's probably a handful of low level redirects
(generic XDP, TC, NFT) that can happen and steal the packet, and
keep it alive for a while. I don't think they will all necessarily
clear the dst.
On 12.11.22 05:40, Jakub Kicinski wrote: > On Sat, 12 Nov 2022 01:37:14 +0200 Vladimir Oltean wrote: >> Jakub, what do you think? Refcounting or no refcounting? > > I would not trust my word over Felix's.. but since you asked I'd vote > for refcounted. There's probably a handful of low level redirects > (generic XDP, TC, NFT) that can happen and steal the packet, and > keep it alive for a while. I don't think they will all necessarily > clear the dst. I don't really see a valid use case in running generic XDP, TC and NFT on a DSA master dealing with packets before the tag receive function has been run. And after the tag has been processed, the metadata DST is cleared from the skb. How about this: I send a v4 which uses skb_dst_drop instead of skb_dst_set, so that other drivers can use refcounting if it makes sense for them. For mtk_eth_soc, I prefer to leave out refcounting for performance reasons. Is that acceptable to you guys? - Felix
On Sat, Nov 12, 2022 at 12:13:15PM +0100, Felix Fietkau wrote: > On 12.11.22 05:40, Jakub Kicinski wrote: > I don't really see a valid use case in running generic XDP, TC and NFT on a > DSA master dealing with packets before the tag receive function has been > run. And after the tag has been processed, the metadata DST is cleared from > the skb. Oh, there are potentially many use cases, the problem is that maybe there aren't as many actual implementations as ideas? At least XDP is, I think, expected to be able to deal with DSA tags if run on a DSA master (not sure how that applies when RX DSA tag is offloaded, but whatever). Marek Behun had a prototype with Marvell tags, not sure how far that went in the end: https://www.mail-archive.com/netdev@vger.kernel.org/msg381018.html In general, forwarding a packet to another switch port belonging to the same master via XDP_TX should be relatively efficient. > How about this: I send a v4 which uses skb_dst_drop instead of skb_dst_set, > so that other drivers can use refcounting if it makes sense for them. For > mtk_eth_soc, I prefer to leave out refcounting for performance reasons. > Is that acceptable to you guys? I don't think you can mix refcounting at consumer side with no-refcounting at producer side, no? I suppose that we could leave refcounting out for now, and bug you if someone comes with a real need later and complains. Right now it's a bit hard for me to imagine all the possibilities. How does that sound?
On 14.11.22 12:55, Vladimir Oltean wrote: > On Sat, Nov 12, 2022 at 12:13:15PM +0100, Felix Fietkau wrote: >> On 12.11.22 05:40, Jakub Kicinski wrote: >> I don't really see a valid use case in running generic XDP, TC and NFT on a >> DSA master dealing with packets before the tag receive function has been >> run. And after the tag has been processed, the metadata DST is cleared from >> the skb. > > Oh, there are potentially many use cases, the problem is that maybe > there aren't as many actual implementations as ideas? At least XDP is, > I think, expected to be able to deal with DSA tags if run on a DSA > master (not sure how that applies when RX DSA tag is offloaded, but > whatever). Marek Behun had a prototype with Marvell tags, not sure how > far that went in the end: > https://www.mail-archive.com/netdev@vger.kernel.org/msg381018.html > In general, forwarding a packet to another switch port belonging to the > same master via XDP_TX should be relatively efficient. In that case it likely makes sense to disable DSA tag offloading whenever driver XDP is being used. Generic XDP probably doesn't matter much. Last time I tried to use it and ran into performance issues, I was told that it's only usable for testing anyway and there was no interest in fixing the cases that I ran into. >> How about this: I send a v4 which uses skb_dst_drop instead of skb_dst_set, >> so that other drivers can use refcounting if it makes sense for them. For >> mtk_eth_soc, I prefer to leave out refcounting for performance reasons. >> Is that acceptable to you guys? > > I don't think you can mix refcounting at consumer side with no-refcounting > at producer side, no? skb_dst_drop checks if refcounting was used for the skb dst pointer. > I suppose that we could leave refcounting out for now, and bug you if > someone comes with a real need later and complains. Right now it's a bit > hard for me to imagine all the possibilities. How does that sound? Sounds good. I think I'll send v4 anyway to deal with the XDP case and to switch to skb_dst_drop. - Felix
On Thu, Nov 10, 2022 at 10:22:08PM +0100, Felix Fietkau wrote: > If a metadata dst is present with the type METADATA_HW_PORT_MUX on a dsa cpu > port netdev, assume that it carries the port number and that there is no DSA > tag present in the skb data. > > Signed-off-by: Felix Fietkau <nbd@nbd.name> > --- > net/core/flow_dissector.c | 4 +++- > net/dsa/dsa.c | 19 ++++++++++++++++++- > 2 files changed, 21 insertions(+), 2 deletions(-) > > diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c > index 25cd35f5922e..1f476abc25e1 100644 > --- a/net/core/flow_dissector.c > +++ b/net/core/flow_dissector.c > @@ -972,11 +972,13 @@ bool __skb_flow_dissect(const struct net *net, > if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && > proto == htons(ETH_P_XDSA))) { > const struct dsa_device_ops *ops; > + struct metadata_dst *md_dst = skb_metadata_dst(skb); Since you're resending, could you please preserve reverse Christmas tree variable ordering here? > int offset = 0; > > ops = skb->dev->dsa_ptr->tag_ops; > /* Only DSA header taggers break flow dissection */ > - if (ops->needed_headroom) { > + if (ops->needed_headroom && > + (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) { > if (ops->flow_dissect) > ops->flow_dissect(skb, &proto, &offset); > else > diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c > index 64b14f655b23..0b67622cf905 100644 > --- a/net/dsa/dsa.c > +++ b/net/dsa/dsa.c > @@ -11,6 +11,7 @@ > #include <linux/netdevice.h> > #include <linux/sysfs.h> > #include <linux/ptp_classify.h> > +#include <net/dst_metadata.h> > > #include "dsa_priv.h" > > @@ -216,6 +217,7 @@ static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, > static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, > struct packet_type *pt, struct net_device *unused) > { > + struct metadata_dst *md_dst = skb_metadata_dst(skb); > struct dsa_port *cpu_dp = dev->dsa_ptr; > struct sk_buff *nskb = NULL; > struct dsa_slave_priv *p; > @@ -229,7 +231,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, > if (!skb) > return 0; > > - nskb = cpu_dp->rcv(skb, dev); > + if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { > + unsigned int port = md_dst->u.port_info.port_id; > + > + skb_dst_set(skb, NULL); > + if (!skb_has_extensions(skb)) > + skb->slow_gro = 0; > + > + skb->dev = dsa_master_find_slave(dev, 0, port); > + if (likely(skb->dev)) { > + dsa_default_offload_fwd_mark(skb); > + nskb = skb; > + } > + } else { > + nskb = cpu_dp->rcv(skb, dev); > + } > + > if (!nskb) { > kfree_skb(skb); > return 0; > -- > 2.38.1 >
On Mon, Nov 14, 2022 at 01:06:42PM +0100, Felix Fietkau wrote: > In that case it likely makes sense to disable DSA tag offloading whenever > driver XDP is being used. > Generic XDP probably doesn't matter much. Last time I tried to use it and > ran into performance issues, I was told that it's only usable for testing > anyway and there was no interest in fixing the cases that I ran into. Don't know about generic XDP performance, sorry. But I think it's reasonable that XDP programs written for a DSA switch should also work if the DSA master has no driver support for XDP. At least until it gains driver level support. > > > How about this: I send a v4 which uses skb_dst_drop instead of skb_dst_set, > > > so that other drivers can use refcounting if it makes sense for them. For > > > mtk_eth_soc, I prefer to leave out refcounting for performance reasons. > > > Is that acceptable to you guys? > > > > I don't think you can mix refcounting at consumer side with no-refcounting > > at producer side, no? > skb_dst_drop checks if refcounting was used for the skb dst pointer. > > > I suppose that we could leave refcounting out for now, and bug you if > > someone comes with a real need later and complains. Right now it's a bit > > hard for me to imagine all the possibilities. How does that sound? > Sounds good. I think I'll send v4 anyway to deal with the XDP case and to > switch to skb_dst_drop. Sounds good.
On Mon, Nov 14, 2022 at 4:21 AM Felix Fietkau <nbd@nbd.name> wrote: > > On 14.11.22 12:55, Vladimir Oltean wrote: > > On Sat, Nov 12, 2022 at 12:13:15PM +0100, Felix Fietkau wrote: > >> On 12.11.22 05:40, Jakub Kicinski wrote: > >> I don't really see a valid use case in running generic XDP, TC and NFT on a > >> DSA master dealing with packets before the tag receive function has been > >> run. And after the tag has been processed, the metadata DST is cleared from > >> the skb. > > > > Oh, there are potentially many use cases, the problem is that maybe > > there aren't as many actual implementations as ideas? At least XDP is, > > I think, expected to be able to deal with DSA tags if run on a DSA > > master (not sure how that applies when RX DSA tag is offloaded, but > > whatever). Marek Behun had a prototype with Marvell tags, not sure how > > far that went in the end: > > https://www.mail-archive.com/netdev@vger.kernel.org/msg381018.html > > In general, forwarding a packet to another switch port belonging to the > > same master via XDP_TX should be relatively efficient. > In that case it likely makes sense to disable DSA tag offloading > whenever driver XDP is being used. > Generic XDP probably doesn't matter much. Last time I tried to use it > and ran into performance issues, I was told that it's only usable for > testing anyway and there was no interest in fixing the cases that I ran > into. XDP continues to evolve rapidly, as do its use cases. ( ex: ttps://github.com/thebracket/cpumap-pping#readme ) What cases did you run into? specifically planning to be hacking on the mvpp2 switch driver soon. > > >> How about this: I send a v4 which uses skb_dst_drop instead of skb_dst_set, > >> so that other drivers can use refcounting if it makes sense for them. For > >> mtk_eth_soc, I prefer to leave out refcounting for performance reasons. > >> Is that acceptable to you guys? > > > > I don't think you can mix refcounting at consumer side with no-refcounting > > at producer side, no? > skb_dst_drop checks if refcounting was used for the skb dst pointer. > > > I suppose that we could leave refcounting out for now, and bug you if > > someone comes with a real need later and complains. Right now it's a bit > > hard for me to imagine all the possibilities. How does that sound? > Sounds good. I think I'll send v4 anyway to deal with the XDP case and > to switch to skb_dst_drop. > > - Felix
On 14.11.22 14:18, Dave Taht wrote: > On Mon, Nov 14, 2022 at 4:21 AM Felix Fietkau <nbd@nbd.name> wrote: >> >> On 14.11.22 12:55, Vladimir Oltean wrote: >> > On Sat, Nov 12, 2022 at 12:13:15PM +0100, Felix Fietkau wrote: >> >> On 12.11.22 05:40, Jakub Kicinski wrote: >> >> I don't really see a valid use case in running generic XDP, TC and NFT on a >> >> DSA master dealing with packets before the tag receive function has been >> >> run. And after the tag has been processed, the metadata DST is cleared from >> >> the skb. >> > >> > Oh, there are potentially many use cases, the problem is that maybe >> > there aren't as many actual implementations as ideas? At least XDP is, >> > I think, expected to be able to deal with DSA tags if run on a DSA >> > master (not sure how that applies when RX DSA tag is offloaded, but >> > whatever). Marek Behun had a prototype with Marvell tags, not sure how >> > far that went in the end: >> > https://www.mail-archive.com/netdev@vger.kernel.org/msg381018.html >> > In general, forwarding a packet to another switch port belonging to the >> > same master via XDP_TX should be relatively efficient. >> In that case it likely makes sense to disable DSA tag offloading >> whenever driver XDP is being used. >> Generic XDP probably doesn't matter much. Last time I tried to use it >> and ran into performance issues, I was told that it's only usable for >> testing anyway and there was no interest in fixing the cases that I ran >> into. > > XDP continues to evolve rapidly, as do its use cases. ( ex: > ttps://github.com/thebracket/cpumap-pping#readme ) > > What cases did you run into? My issue was the fact that XDP in general (including generic XDP) assumes that packets have 256 bytes of headroom, which is usually only true for drivers that already have proper driver or hardware XDP support. That makes generic XDP pretty much useless for normal use, since on XDP capable drivers you should use driver/hardware XDP anyway, and on everything else you get the significant performance penalty of an extra pskb_expand_head call. I ended up abandoning XDP for my stuff and used tc instead. - Felix
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 25cd35f5922e..1f476abc25e1 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -972,11 +972,13 @@ bool __skb_flow_dissect(const struct net *net, if (unlikely(skb->dev && netdev_uses_dsa(skb->dev) && proto == htons(ETH_P_XDSA))) { const struct dsa_device_ops *ops; + struct metadata_dst *md_dst = skb_metadata_dst(skb); int offset = 0; ops = skb->dev->dsa_ptr->tag_ops; /* Only DSA header taggers break flow dissection */ - if (ops->needed_headroom) { + if (ops->needed_headroom && + (!md_dst || md_dst->type != METADATA_HW_PORT_MUX)) { if (ops->flow_dissect) ops->flow_dissect(skb, &proto, &offset); else diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 64b14f655b23..0b67622cf905 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/sysfs.h> #include <linux/ptp_classify.h> +#include <net/dst_metadata.h> #include "dsa_priv.h" @@ -216,6 +217,7 @@ static bool dsa_skb_defer_rx_timestamp(struct dsa_slave_priv *p, static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *unused) { + struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; struct dsa_slave_priv *p; @@ -229,7 +231,22 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = cpu_dp->rcv(skb, dev); + if (md_dst && md_dst->type == METADATA_HW_PORT_MUX) { + unsigned int port = md_dst->u.port_info.port_id; + + skb_dst_set(skb, NULL); + if (!skb_has_extensions(skb)) + skb->slow_gro = 0; + + skb->dev = dsa_master_find_slave(dev, 0, port); + if (likely(skb->dev)) { + dsa_default_offload_fwd_mark(skb); + nskb = skb; + } + } else { + nskb = cpu_dp->rcv(skb, dev); + } + if (!nskb) { kfree_skb(skb); return 0;
If a metadata dst is present with the type METADATA_HW_PORT_MUX on a dsa cpu port netdev, assume that it carries the port number and that there is no DSA tag present in the skb data. Signed-off-by: Felix Fietkau <nbd@nbd.name> --- net/core/flow_dissector.c | 4 +++- net/dsa/dsa.c | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-)