Message ID | 20210418114200.5839-1-alobakin@pm.me (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | [net] gro: fix napi_gro_frags() Fast GRO breakage due to IP alignment check | expand |
Context | Check | Description |
---|---|---|
netdev/cover_letter | success | Link |
netdev/fixes_present | success | Link |
netdev/patch_count | success | Link |
netdev/tree_selection | success | Clearly marked for net |
netdev/subject_prefix | success | Link |
netdev/cc_maintainers | success | CCed 12 of 12 maintainers |
netdev/source_inline | success | Was 0 now: 0 |
netdev/verify_signedoff | success | Link |
netdev/module_param | success | Was 0 now: 0 |
netdev/build_32bit | success | Errors and warnings before: 10 this patch: 10 |
netdev/kdoc | success | Errors and warnings before: 0 this patch: 0 |
netdev/verify_fixes | success | Link |
netdev/checkpatch | success | total: 0 errors, 0 warnings, 0 checks, 32 lines checked |
netdev/build_allmodconfig_warn | success | Errors and warnings before: 10 this patch: 10 |
netdev/header_inline | success | Link |
On Sun, Apr 18, 2021 at 1:43 PM Alexander Lobakin <alobakin@pm.me> wrote: > > Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment") > did the right thing, but missed the fact that napi_gro_frags() logics > calls for skb_gro_reset_offset() *before* pulling Ethernet header > to the skb linear space. > That said, the introduced check for frag0 address being aligned to 4 > always fails for it as Ethernet header is obviously 14 bytes long, > and in case with NET_IP_ALIGN its start is not aligned to 4. > > Fix this by adding @nhoff argument to skb_gro_reset_offset() which > tells if an IP header is placed right at the start of frag0 or not. > This restores Fast GRO for napi_gro_frags() that became very slow > after the mentioned commit, and preserves the introduced check to > avoid silent unaligned accesses. > > Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment") > Signed-off-by: Alexander Lobakin <alobakin@pm.me> > --- > net/core/dev.c | 8 ++++---- > 1 file changed, 4 insertions(+), 4 deletions(-) > > diff --git a/net/core/dev.c b/net/core/dev.c > index 1f79b9aa9a3f..965d5f9b6fee 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi, > return head; > } > > -static void skb_gro_reset_offset(struct sk_buff *skb) > +static void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) > { > const struct skb_shared_info *pinfo = skb_shinfo(skb); > const skb_frag_t *frag0 = &pinfo->frags[0]; > @@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) > > if (!skb_headlen(skb) && pinfo->nr_frags && > !PageHighMem(skb_frag_page(frag0)) && > - (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { > + (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { > NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); > NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, > skb_frag_size(frag0), > @@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) > skb_mark_napi_id(skb, napi); > trace_napi_gro_receive_entry(skb); > > - skb_gro_reset_offset(skb); > + skb_gro_reset_offset(skb, 0); > > ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); > trace_napi_gro_receive_exit(ret); > @@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) > napi->skb = NULL; > > skb_reset_mac_header(skb); > - skb_gro_reset_offset(skb); > + skb_gro_reset_offset(skb, hlen); > > if (unlikely(skb_gro_header_hard(skb, hlen))) { > eth = skb_gro_header_slow(skb, hlen, 0); > -- > 2.31.1 > > Good catch, thanks. This has the unfortunate effect of increasing code length on x86, maybe we should have an exception to normal rules so that skb_gro_reset_offset() is inlined. Reviewed-by: Eric Dumazet <edumazet@google.com>
From: Eric Dumazet <edumazet@google.com> Date: Mon, 19 Apr 2021 13:05:16 +0200 > On Sun, Apr 18, 2021 at 1:43 PM Alexander Lobakin <alobakin@pm.me> wrote: > > > > Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment") > > did the right thing, but missed the fact that napi_gro_frags() logics > > calls for skb_gro_reset_offset() *before* pulling Ethernet header > > to the skb linear space. > > That said, the introduced check for frag0 address being aligned to 4 > > always fails for it as Ethernet header is obviously 14 bytes long, > > and in case with NET_IP_ALIGN its start is not aligned to 4. > > > > Fix this by adding @nhoff argument to skb_gro_reset_offset() which > > tells if an IP header is placed right at the start of frag0 or not. > > This restores Fast GRO for napi_gro_frags() that became very slow > > after the mentioned commit, and preserves the introduced check to > > avoid silent unaligned accesses. > > > > Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment") > > Signed-off-by: Alexander Lobakin <alobakin@pm.me> > > --- > > net/core/dev.c | 8 ++++---- > > 1 file changed, 4 insertions(+), 4 deletions(-) > > > > diff --git a/net/core/dev.c b/net/core/dev.c > > index 1f79b9aa9a3f..965d5f9b6fee 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi, > > return head; > > } > > > > -static void skb_gro_reset_offset(struct sk_buff *skb) > > +static void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) > > { > > const struct skb_shared_info *pinfo = skb_shinfo(skb); > > const skb_frag_t *frag0 = &pinfo->frags[0]; > > @@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) > > > > if (!skb_headlen(skb) && pinfo->nr_frags && > > !PageHighMem(skb_frag_page(frag0)) && > > - (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { > > + (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { > > NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); > > NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, > > skb_frag_size(frag0), > > @@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) > > skb_mark_napi_id(skb, napi); > > trace_napi_gro_receive_entry(skb); > > > > - skb_gro_reset_offset(skb); > > + skb_gro_reset_offset(skb, 0); > > > > ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); > > trace_napi_gro_receive_exit(ret); > > @@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) > > napi->skb = NULL; > > > > skb_reset_mac_header(skb); > > - skb_gro_reset_offset(skb); > > + skb_gro_reset_offset(skb, hlen); > > > > if (unlikely(skb_gro_header_hard(skb, hlen))) { > > eth = skb_gro_header_slow(skb, hlen, 0); > > -- > > 2.31.1 > > > > > > Good catch, thanks. > > This has the unfortunate effect of increasing code length on x86, > maybe we should have an exception to > normal rules so that skb_gro_reset_offset() is inlined. Agree. To mitigate this codegrowth we either go with NET_IP_ALIGN ifdeffery or just inline skb_gro_reset_offset(). The function is tiny itself, I don't see a reason to not do this. Will drop v2 in a moment. > Reviewed-by: Eric Dumazet <edumazet@google.com> Thanks, Al
diff --git a/net/core/dev.c b/net/core/dev.c index 1f79b9aa9a3f..965d5f9b6fee 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5914,7 +5914,7 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi, return head; } -static void skb_gro_reset_offset(struct sk_buff *skb) +static void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff) { const struct skb_shared_info *pinfo = skb_shinfo(skb); const skb_frag_t *frag0 = &pinfo->frags[0]; @@ -5925,7 +5925,7 @@ static void skb_gro_reset_offset(struct sk_buff *skb) if (!skb_headlen(skb) && pinfo->nr_frags && !PageHighMem(skb_frag_page(frag0)) && - (!NET_IP_ALIGN || !(skb_frag_off(frag0) & 3))) { + (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0); NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int, skb_frag_size(frag0), @@ -6143,7 +6143,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) skb_mark_napi_id(skb, napi); trace_napi_gro_receive_entry(skb); - skb_gro_reset_offset(skb); + skb_gro_reset_offset(skb, 0); ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb)); trace_napi_gro_receive_exit(ret); @@ -6232,7 +6232,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi) napi->skb = NULL; skb_reset_mac_header(skb); - skb_gro_reset_offset(skb); + skb_gro_reset_offset(skb, hlen); if (unlikely(skb_gro_header_hard(skb, hlen))) { eth = skb_gro_header_slow(skb, hlen, 0);
Commit 38ec4944b593 ("gro: ensure frag0 meets IP header alignment") did the right thing, but missed the fact that napi_gro_frags() logics calls for skb_gro_reset_offset() *before* pulling Ethernet header to the skb linear space. That said, the introduced check for frag0 address being aligned to 4 always fails for it as Ethernet header is obviously 14 bytes long, and in case with NET_IP_ALIGN its start is not aligned to 4. Fix this by adding @nhoff argument to skb_gro_reset_offset() which tells if an IP header is placed right at the start of frag0 or not. This restores Fast GRO for napi_gro_frags() that became very slow after the mentioned commit, and preserves the introduced check to avoid silent unaligned accesses. Fixes: 38ec4944b593 ("gro: ensure frag0 meets IP header alignment") Signed-off-by: Alexander Lobakin <alobakin@pm.me> --- net/core/dev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) -- 2.31.1