diff mbox series

[net-next,2/4] gro: don't dereference napi->gro_hash[x] multiple times in dev_gro_receive()

Message ID 20210312162127.239795-3-alobakin@pm.me (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series gro: micro-optimize dev_gro_receive() | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for net-next
netdev/subject_prefix success Link
netdev/cc_maintainers success CCed 10 of 10 maintainers
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 10 this patch: 10
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: braces {} are not necessary for any arm of this statement
netdev/build_allmodconfig_warn success Errors and warnings before: 10 this patch: 10
netdev/header_inline success Link

Commit Message

Alexander Lobakin March 12, 2021, 4:21 p.m. UTC
GRO bucket index doesn't change through the entire function.
Store a pointer to the corresponding bucket on stack once and use
it later instead of dereferencing again and again.

Signed-off-by: Alexander Lobakin <alobakin@pm.me>
---
 net/core/dev.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

--
2.30.2

Comments

Eric Dumazet March 12, 2021, 4:47 p.m. UTC | #1
On Fri, Mar 12, 2021 at 5:22 PM Alexander Lobakin <alobakin@pm.me> wrote:
>
> GRO bucket index doesn't change through the entire function.
> Store a pointer to the corresponding bucket on stack once and use
> it later instead of dereferencing again and again.
>
> Signed-off-by: Alexander Lobakin <alobakin@pm.me>
> ---
>  net/core/dev.c | 9 +++++----
>  1 file changed, 5 insertions(+), 4 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index adc42ba7ffd8..ee124aecb8a2 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -5957,6 +5957,7 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
>  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
>  {
>         u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
> +       struct gro_list *gro_list = &napi->gro_hash[bucket];
>         struct list_head *head = &offload_base;
>         struct packet_offload *ptype;
>         __be16 type = skb->protocol;
> @@ -6024,7 +6025,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>         if (pp) {
>                 skb_list_del_init(pp);
>                 napi_gro_complete(napi, pp);
> -               napi->gro_hash[bucket].count--;
> +               gro_list->count--;
>         }
>
>         if (same_flow)
> @@ -6033,10 +6034,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>         if (NAPI_GRO_CB(skb)->flush)
>                 goto normal;
>
> -       if (unlikely(napi->gro_hash[bucket].count >= MAX_GRO_SKBS)) {
> +       if (unlikely(gro_list->count >= MAX_GRO_SKBS)) {
>                 gro_flush_oldest(napi, gro_head);
>         } else {
> -               napi->gro_hash[bucket].count++;
> +               gro_list->count++;
>         }
>         NAPI_GRO_CB(skb)->count = 1;
>         NAPI_GRO_CB(skb)->age = jiffies;
> @@ -6050,7 +6051,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
>         if (grow > 0)
>                 gro_pull_from_frag0(skb, grow);
>  ok:
> -       if (napi->gro_hash[bucket].count) {
> +       if (gro_list->count) {
>                 if (!test_bit(bucket, &napi->gro_bitmask))
>                         __set_bit(bucket, &napi->gro_bitmask);
>         } else if (test_bit(bucket, &napi->gro_bitmask)) {
> --
> 2.30.2
>
>

This adds more register pressure, do you have precise measures to
confirm this change is a win ?

Presumably the compiler should be able to optimize the code just fine,
it can see @bucket does not change.
Alexander Lobakin March 12, 2021, 6:36 p.m. UTC | #2
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 12 Mar 2021 17:47:04 +0100

> On Fri, Mar 12, 2021 at 5:22 PM Alexander Lobakin <alobakin@pm.me> wrote:
> >
> > GRO bucket index doesn't change through the entire function.
> > Store a pointer to the corresponding bucket on stack once and use
> > it later instead of dereferencing again and again.
> >
> > Signed-off-by: Alexander Lobakin <alobakin@pm.me>
> > ---
> >  net/core/dev.c | 9 +++++----
> >  1 file changed, 5 insertions(+), 4 deletions(-)
> >
> > diff --git a/net/core/dev.c b/net/core/dev.c
> > index adc42ba7ffd8..ee124aecb8a2 100644
> > --- a/net/core/dev.c
> > +++ b/net/core/dev.c
> > @@ -5957,6 +5957,7 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
> >  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
> >  {
> >         u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
> > +       struct gro_list *gro_list = &napi->gro_hash[bucket];
> >         struct list_head *head = &offload_base;
> >         struct packet_offload *ptype;
> >         __be16 type = skb->protocol;
> > @@ -6024,7 +6025,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
> >         if (pp) {
> >                 skb_list_del_init(pp);
> >                 napi_gro_complete(napi, pp);
> > -               napi->gro_hash[bucket].count--;
> > +               gro_list->count--;
> >         }
> >
> >         if (same_flow)
> > @@ -6033,10 +6034,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
> >         if (NAPI_GRO_CB(skb)->flush)
> >                 goto normal;
> >
> > -       if (unlikely(napi->gro_hash[bucket].count >= MAX_GRO_SKBS)) {
> > +       if (unlikely(gro_list->count >= MAX_GRO_SKBS)) {
> >                 gro_flush_oldest(napi, gro_head);
> >         } else {
> > -               napi->gro_hash[bucket].count++;
> > +               gro_list->count++;
> >         }
> >         NAPI_GRO_CB(skb)->count = 1;
> >         NAPI_GRO_CB(skb)->age = jiffies;
> > @@ -6050,7 +6051,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
> >         if (grow > 0)
> >                 gro_pull_from_frag0(skb, grow);
> >  ok:
> > -       if (napi->gro_hash[bucket].count) {
> > +       if (gro_list->count) {
> >                 if (!test_bit(bucket, &napi->gro_bitmask))
> >                         __set_bit(bucket, &napi->gro_bitmask);
> >         } else if (test_bit(bucket, &napi->gro_bitmask)) {
> > --
> > 2.30.2
> >
> >
>
> This adds more register pressure, do you have precise measures to
> confirm this change is a win ?
>
> Presumably the compiler should be able to optimize the code just fine,
> it can see @bucket does not change.

This is mostly (if not purely) cosmetic, I don't think it changes
anything at all for the most of sane compilers.

Regarding registers, since @gro_list and @gro_head are pretty the
same, we could drop @gro_head in favour of @gro_list and just use
@gro_list->list instead.

Al
diff mbox series

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index adc42ba7ffd8..ee124aecb8a2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5957,6 +5957,7 @@  static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
 static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
 	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+	struct gro_list *gro_list = &napi->gro_hash[bucket];
 	struct list_head *head = &offload_base;
 	struct packet_offload *ptype;
 	__be16 type = skb->protocol;
@@ -6024,7 +6025,7 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (pp) {
 		skb_list_del_init(pp);
 		napi_gro_complete(napi, pp);
-		napi->gro_hash[bucket].count--;
+		gro_list->count--;
 	}

 	if (same_flow)
@@ -6033,10 +6034,10 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (NAPI_GRO_CB(skb)->flush)
 		goto normal;

-	if (unlikely(napi->gro_hash[bucket].count >= MAX_GRO_SKBS)) {
+	if (unlikely(gro_list->count >= MAX_GRO_SKBS)) {
 		gro_flush_oldest(napi, gro_head);
 	} else {
-		napi->gro_hash[bucket].count++;
+		gro_list->count++;
 	}
 	NAPI_GRO_CB(skb)->count = 1;
 	NAPI_GRO_CB(skb)->age = jiffies;
@@ -6050,7 +6051,7 @@  static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (grow > 0)
 		gro_pull_from_frag0(skb, grow);
 ok:
-	if (napi->gro_hash[bucket].count) {
+	if (gro_list->count) {
 		if (!test_bit(bucket, &napi->gro_bitmask))
 			__set_bit(bucket, &napi->gro_bitmask);
 	} else if (test_bit(bucket, &napi->gro_bitmask)) {