diff mbox series

[net-next,05/14] net_sched: sch_codel: implement lockless codel_dump()

Message ID 20240415132054.3822230-6-edumazet@google.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series net_sched: first series for RTNL-less qdisc dumps | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 926 this patch: 926
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 7 of 7 maintainers
netdev/build_clang success Errors and warnings before: 937 this patch: 937
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 937 this patch: 937
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 67 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-04-18--00-00 (tests: 961)

Commit Message

Eric Dumazet April 15, 2024, 1:20 p.m. UTC
Instead of relying on RTNL, codel_dump() can use READ_ONCE()
annotations, paired with WRITE_ONCE() ones in codel_change().

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

Comments

Simon Horman April 17, 2024, 3:59 p.m. UTC | #1
On Mon, Apr 15, 2024 at 01:20:45PM +0000, Eric Dumazet wrote:
> Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> annotations, paired with WRITE_ONCE() ones in codel_change().
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> ---
>  net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
>  1 file changed, 18 insertions(+), 11 deletions(-)
> 
> diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
> index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
> --- a/net/sched/sch_codel.c
> +++ b/net/sched/sch_codel.c
> @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
>  	if (tb[TCA_CODEL_TARGET]) {
>  		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
>  
> -		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
> +		WRITE_ONCE(q->params.target,
> +			   ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
>  	}
>  
>  	if (tb[TCA_CODEL_CE_THRESHOLD]) {
>  		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
>  
> -		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
> +		WRITE_ONCE(q->params.ce_threshold,
> +			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
>  	}
>  
>  	if (tb[TCA_CODEL_INTERVAL]) {
>  		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
>  
> -		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
> +		WRITE_ONCE(q->params.interval,
> +			   ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
>  	}
>  
>  	if (tb[TCA_CODEL_LIMIT])
> -		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
> +		WRITE_ONCE(sch->limit,
> +			   nla_get_u32(tb[TCA_CODEL_LIMIT]));
>  

Hi Eric,

Sorry to be so bothersome.

As a follow-up to our discussion of patch 4/14 (net_choke),
I'm wondering if reading sch->limit in codel_qdisc_enqueue()
should be updated to use READ_ONCE().

>  	if (tb[TCA_CODEL_ECN])
> -		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
> +		WRITE_ONCE(q->params.ecn,
> +			   !!nla_get_u32(tb[TCA_CODEL_ECN]));
>  
>  	qlen = sch->q.qlen;
>  	while (sch->q.qlen > sch->limit) {

...
Eric Dumazet April 17, 2024, 4:05 p.m. UTC | #2
On Wed, Apr 17, 2024 at 5:59 PM Simon Horman <horms@kernel.org> wrote:
>
> On Mon, Apr 15, 2024 at 01:20:45PM +0000, Eric Dumazet wrote:
> > Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> > annotations, paired with WRITE_ONCE() ones in codel_change().
> >
> > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > ---
> >  net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
> >  1 file changed, 18 insertions(+), 11 deletions(-)
> >
> > diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
> > index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
> > --- a/net/sched/sch_codel.c
> > +++ b/net/sched/sch_codel.c
> > @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
> >       if (tb[TCA_CODEL_TARGET]) {
> >               u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
> >
> > -             q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
> > +             WRITE_ONCE(q->params.target,
> > +                        ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
> >       }
> >
> >       if (tb[TCA_CODEL_CE_THRESHOLD]) {
> >               u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
> >
> > -             q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
> > +             WRITE_ONCE(q->params.ce_threshold,
> > +                        (val * NSEC_PER_USEC) >> CODEL_SHIFT);
> >       }
> >
> >       if (tb[TCA_CODEL_INTERVAL]) {
> >               u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
> >
> > -             q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
> > +             WRITE_ONCE(q->params.interval,
> > +                        ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
> >       }
> >
> >       if (tb[TCA_CODEL_LIMIT])
> > -             sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
> > +             WRITE_ONCE(sch->limit,
> > +                        nla_get_u32(tb[TCA_CODEL_LIMIT]));
> >
>
> Hi Eric,
>
> Sorry to be so bothersome.
>
> As a follow-up to our discussion of patch 4/14 (net_choke),
> I'm wondering if reading sch->limit in codel_qdisc_enqueue()
> should be updated to use READ_ONCE().

No worries !

A READ_ONCE() in codel_qdisc_enqueue() is not needed
because codel_change() writes  all the fields  under the protection of
qdisc spinlock.

sch_tree_lock() / ... / sch_tree_unlock()

Note that I removed the READ_ONCE() in choke enqueue() in V2,
for the same reason.

>
> >       if (tb[TCA_CODEL_ECN])
> > -             q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
> > +             WRITE_ONCE(q->params.ecn,
> > +                        !!nla_get_u32(tb[TCA_CODEL_ECN]));
> >
> >       qlen = sch->q.qlen;
> >       while (sch->q.qlen > sch->limit) {
>
> ...
Simon Horman April 17, 2024, 4:21 p.m. UTC | #3
On Wed, Apr 17, 2024 at 06:05:46PM +0200, Eric Dumazet wrote:
> On Wed, Apr 17, 2024 at 5:59 PM Simon Horman <horms@kernel.org> wrote:
> >
> > On Mon, Apr 15, 2024 at 01:20:45PM +0000, Eric Dumazet wrote:
> > > Instead of relying on RTNL, codel_dump() can use READ_ONCE()
> > > annotations, paired with WRITE_ONCE() ones in codel_change().
> > >
> > > Signed-off-by: Eric Dumazet <edumazet@google.com>
> > > ---
> > >  net/sched/sch_codel.c | 29 ++++++++++++++++++-----------
> > >  1 file changed, 18 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
> > > index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
> > > --- a/net/sched/sch_codel.c
> > > +++ b/net/sched/sch_codel.c
> > > @@ -118,26 +118,31 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt,
> > >       if (tb[TCA_CODEL_TARGET]) {
> > >               u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
> > >
> > > -             q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
> > > +             WRITE_ONCE(q->params.target,
> > > +                        ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
> > >       }
> > >
> > >       if (tb[TCA_CODEL_CE_THRESHOLD]) {
> > >               u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
> > >
> > > -             q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
> > > +             WRITE_ONCE(q->params.ce_threshold,
> > > +                        (val * NSEC_PER_USEC) >> CODEL_SHIFT);
> > >       }
> > >
> > >       if (tb[TCA_CODEL_INTERVAL]) {
> > >               u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
> > >
> > > -             q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
> > > +             WRITE_ONCE(q->params.interval,
> > > +                        ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
> > >       }
> > >
> > >       if (tb[TCA_CODEL_LIMIT])
> > > -             sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
> > > +             WRITE_ONCE(sch->limit,
> > > +                        nla_get_u32(tb[TCA_CODEL_LIMIT]));
> > >
> >
> > Hi Eric,
> >
> > Sorry to be so bothersome.
> >
> > As a follow-up to our discussion of patch 4/14 (net_choke),
> > I'm wondering if reading sch->limit in codel_qdisc_enqueue()
> > should be updated to use READ_ONCE().
> 
> No worries !
> 
> A READ_ONCE() in codel_qdisc_enqueue() is not needed
> because codel_change() writes  all the fields  under the protection of
> qdisc spinlock.
> 
> sch_tree_lock() / ... / sch_tree_unlock()
> 
> Note that I removed the READ_ONCE() in choke enqueue() in V2,
> for the same reason.

Perfect, now I understand.

With that cleared up, I'm happy with (my understanding of) this patch.

Reviewed-by: Simon Horman <horms@kernel.org>
diff mbox series

Patch

diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index ecb3f164bb25b33bd662c8ee07dc1b5945fd882d..3e8d4fe4d91e3ef2b7715640f6675aa5e8e2a326 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -118,26 +118,31 @@  static int codel_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_CODEL_TARGET]) {
 		u32 target = nla_get_u32(tb[TCA_CODEL_TARGET]);
 
-		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.target,
+			   ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_CE_THRESHOLD]) {
 		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
 
-		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.ce_threshold,
+			   (val * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_INTERVAL]) {
 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
 
-		q->params.interval = ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT;
+		WRITE_ONCE(q->params.interval,
+			   ((u64)interval * NSEC_PER_USEC) >> CODEL_SHIFT);
 	}
 
 	if (tb[TCA_CODEL_LIMIT])
-		sch->limit = nla_get_u32(tb[TCA_CODEL_LIMIT]);
+		WRITE_ONCE(sch->limit,
+			   nla_get_u32(tb[TCA_CODEL_LIMIT]));
 
 	if (tb[TCA_CODEL_ECN])
-		q->params.ecn = !!nla_get_u32(tb[TCA_CODEL_ECN]);
+		WRITE_ONCE(q->params.ecn,
+			   !!nla_get_u32(tb[TCA_CODEL_ECN]));
 
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > sch->limit) {
@@ -183,6 +188,7 @@  static int codel_init(struct Qdisc *sch, struct nlattr *opt,
 static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct codel_sched_data *q = qdisc_priv(sch);
+	codel_time_t ce_threshold;
 	struct nlattr *opts;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -190,17 +196,18 @@  static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	if (nla_put_u32(skb, TCA_CODEL_TARGET,
-			codel_time_to_us(q->params.target)) ||
+			codel_time_to_us(READ_ONCE(q->params.target))) ||
 	    nla_put_u32(skb, TCA_CODEL_LIMIT,
-			sch->limit) ||
+			READ_ONCE(sch->limit)) ||
 	    nla_put_u32(skb, TCA_CODEL_INTERVAL,
-			codel_time_to_us(q->params.interval)) ||
+			codel_time_to_us(READ_ONCE(q->params.interval))) ||
 	    nla_put_u32(skb, TCA_CODEL_ECN,
-			q->params.ecn))
+			READ_ONCE(q->params.ecn)))
 		goto nla_put_failure;
-	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	ce_threshold = READ_ONCE(q->params.ce_threshold);
+	if (ce_threshold != CODEL_DISABLED_THRESHOLD &&
 	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
-			codel_time_to_us(q->params.ce_threshold)))
+			codel_time_to_us(ce_threshold)))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);