diff mbox series

[net] net: Fix memory leaks of napi->rx_list

Message ID 1667361274-2621-1-git-send-email-wangyufen@huawei.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series [net] net: Fix memory leaks of napi->rx_list | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net
netdev/fixes_present success Fixes tag present in non-next series
netdev/subject_prefix success Link
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 1 maintainers not CCed: petrm@nvidia.com
netdev/build_clang success Errors and warnings before: 5 this patch: 5
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success Fixes tag looks correct
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 23 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

wangyufen Nov. 2, 2022, 3:54 a.m. UTC
kmemleak reports after running test_progs:

unreferenced object 0xffff8881b1672dc0 (size 232):
  comm "test_progs", pid 394388, jiffies 4354712116 (age 841.975s)
  hex dump (first 32 bytes):
    e0 84 d7 a8 81 88 ff ff 80 2c 67 b1 81 88 ff ff  .........,g.....
    00 40 c5 9b 81 88 ff ff 00 00 00 00 00 00 00 00  .@..............
  backtrace:
    [<00000000c8f01748>] napi_skb_cache_get+0xd4/0x150
    [<0000000041c7fc09>] __napi_build_skb+0x15/0x50
    [<00000000431c7079>] __napi_alloc_skb+0x26e/0x540
    [<000000003ecfa30e>] napi_get_frags+0x59/0x140
    [<0000000099b2199e>] tun_get_user+0x183d/0x3bb0 [tun]
    [<000000008a5adef0>] tun_chr_write_iter+0xc0/0x1b1 [tun]
    [<0000000049993ff4>] do_iter_readv_writev+0x19f/0x320
    [<000000008f338ea2>] do_iter_write+0x135/0x630
    [<000000008a3377a4>] vfs_writev+0x12e/0x440
    [<00000000a6b5639a>] do_writev+0x104/0x280
    [<00000000ccf065d8>] do_syscall_64+0x3b/0x90
    [<00000000d776e329>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

The issue occurs in the following scenarios:
tun_get_user()
  napi_gro_frags()
    napi_frags_finish()
      case GRO_NORMAL:
        gro_normal_one()
          list_add_tail(&skb->list, &napi->rx_list);
          <-- While napi->rx_count < READ_ONCE(gro_normal_batch),
          <-- gro_normal_list() is not called, napi->rx_list is not empty
...
netif_napi_del()
  __netif_napi_del()
  <-- &napi->rx_list is not empty, which caused memory leaks

To fix, add flush_rx_list() to free skbs in napi->rx_list.

Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs")
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
---
 net/core/dev.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

Comments

Eric Dumazet Nov. 2, 2022, 4:27 a.m. UTC | #1
On Tue, Nov 1, 2022 at 8:34 PM Wang Yufen <wangyufen@huawei.com> wrote:
>
> kmemleak reports after running test_progs:
>
> unreferenced object 0xffff8881b1672dc0 (size 232):
>   comm "test_progs", pid 394388, jiffies 4354712116 (age 841.975s)
>   hex dump (first 32 bytes):
>     e0 84 d7 a8 81 88 ff ff 80 2c 67 b1 81 88 ff ff  .........,g.....
>     00 40 c5 9b 81 88 ff ff 00 00 00 00 00 00 00 00  .@..............
>   backtrace:
>     [<00000000c8f01748>] napi_skb_cache_get+0xd4/0x150
>     [<0000000041c7fc09>] __napi_build_skb+0x15/0x50
>     [<00000000431c7079>] __napi_alloc_skb+0x26e/0x540
>     [<000000003ecfa30e>] napi_get_frags+0x59/0x140
>     [<0000000099b2199e>] tun_get_user+0x183d/0x3bb0 [tun]
>     [<000000008a5adef0>] tun_chr_write_iter+0xc0/0x1b1 [tun]
>     [<0000000049993ff4>] do_iter_readv_writev+0x19f/0x320
>     [<000000008f338ea2>] do_iter_write+0x135/0x630
>     [<000000008a3377a4>] vfs_writev+0x12e/0x440
>     [<00000000a6b5639a>] do_writev+0x104/0x280
>     [<00000000ccf065d8>] do_syscall_64+0x3b/0x90
>     [<00000000d776e329>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
>
> The issue occurs in the following scenarios:
> tun_get_user()
>   napi_gro_frags()
>     napi_frags_finish()
>       case GRO_NORMAL:
>         gro_normal_one()
>           list_add_tail(&skb->list, &napi->rx_list);
>           <-- While napi->rx_count < READ_ONCE(gro_normal_batch),
>           <-- gro_normal_list() is not called, napi->rx_list is not empty
> ...
> netif_napi_del()
>   __netif_napi_del()
>   <-- &napi->rx_list is not empty, which caused memory leaks
>
> To fix, add flush_rx_list() to free skbs in napi->rx_list.
>
> Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs")

I do not think the bug is there.

Most likely tun driver is buggy.

It does not follow the correct napi protocol.

It feeds packets to GRO, but does not ever ask to complete the work.

More sanity work is needed in tun, not in GRO layer.


> Signed-off-by: Wang Yufen <wangyufen@huawei.com>
> ---
>  net/core/dev.c | 11 +++++++++++
>  1 file changed, 11 insertions(+)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 3be2560..de3bc9c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6461,6 +6461,16 @@ static void flush_gro_hash(struct napi_struct *napi)
>         }
>  }
>
> +static void flush_rx_list(struct napi_struct *napi)
> +{
> +       struct sk_buff *skb, *next;
> +
> +       list_for_each_entry_safe(skb, next, &napi->rx_list, list) {
> +               skb_list_del_init(skb);
> +               kfree_skb(skb);
> +       }
> +}
> +
>  /* Must be called in process context */
>  void __netif_napi_del(struct napi_struct *napi)
>  {
> @@ -6471,6 +6481,7 @@ void __netif_napi_del(struct napi_struct *napi)
>         list_del_rcu(&napi->dev_list);
>         napi_free_frags(napi);
>
> +       flush_rx_list(napi);
>         flush_gro_hash(napi);
>         napi->gro_bitmask = 0;
>
> --
> 1.8.3.1
>
wangyufen Nov. 2, 2022, 7:58 a.m. UTC | #2
在 2022/11/2 12:27, Eric Dumazet 写道:
> On Tue, Nov 1, 2022 at 8:34 PM Wang Yufen <wangyufen@huawei.com> wrote:
>> kmemleak reports after running test_progs:
>>
>> unreferenced object 0xffff8881b1672dc0 (size 232):
>>    comm "test_progs", pid 394388, jiffies 4354712116 (age 841.975s)
>>    hex dump (first 32 bytes):
>>      e0 84 d7 a8 81 88 ff ff 80 2c 67 b1 81 88 ff ff  .........,g.....
>>      00 40 c5 9b 81 88 ff ff 00 00 00 00 00 00 00 00  .@..............
>>    backtrace:
>>      [<00000000c8f01748>] napi_skb_cache_get+0xd4/0x150
>>      [<0000000041c7fc09>] __napi_build_skb+0x15/0x50
>>      [<00000000431c7079>] __napi_alloc_skb+0x26e/0x540
>>      [<000000003ecfa30e>] napi_get_frags+0x59/0x140
>>      [<0000000099b2199e>] tun_get_user+0x183d/0x3bb0 [tun]
>>      [<000000008a5adef0>] tun_chr_write_iter+0xc0/0x1b1 [tun]
>>      [<0000000049993ff4>] do_iter_readv_writev+0x19f/0x320
>>      [<000000008f338ea2>] do_iter_write+0x135/0x630
>>      [<000000008a3377a4>] vfs_writev+0x12e/0x440
>>      [<00000000a6b5639a>] do_writev+0x104/0x280
>>      [<00000000ccf065d8>] do_syscall_64+0x3b/0x90
>>      [<00000000d776e329>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
>>
>> The issue occurs in the following scenarios:
>> tun_get_user()
>>    napi_gro_frags()
>>      napi_frags_finish()
>>        case GRO_NORMAL:
>>          gro_normal_one()
>>            list_add_tail(&skb->list, &napi->rx_list);
>>            <-- While napi->rx_count < READ_ONCE(gro_normal_batch),
>>            <-- gro_normal_list() is not called, napi->rx_list is not empty
>> ...
>> netif_napi_del()
>>    __netif_napi_del()
>>    <-- &napi->rx_list is not empty, which caused memory leaks
>>
>> To fix, add flush_rx_list() to free skbs in napi->rx_list.
>>
>> Fixes: 323ebb61e32b ("net: use listified RX for handling GRO_NORMAL skbs")
> I do not think the bug is there.
>
> Most likely tun driver is buggy.
>
> It does not follow the correct napi protocol.
>
> It feeds packets to GRO, but does not ever ask to complete the work.
>
> More sanity work is needed in tun, not in GRO layer.

OK, I will check the tun driver.

Thanks.

>
>> Signed-off-by: Wang Yufen <wangyufen@huawei.com>
>> ---
>>   net/core/dev.c | 11 +++++++++++
>>   1 file changed, 11 insertions(+)
>>
>> diff --git a/net/core/dev.c b/net/core/dev.c
>> index 3be2560..de3bc9c 100644
>> --- a/net/core/dev.c
>> +++ b/net/core/dev.c
>> @@ -6461,6 +6461,16 @@ static void flush_gro_hash(struct napi_struct *napi)
>>          }
>>   }
>>
>> +static void flush_rx_list(struct napi_struct *napi)
>> +{
>> +       struct sk_buff *skb, *next;
>> +
>> +       list_for_each_entry_safe(skb, next, &napi->rx_list, list) {
>> +               skb_list_del_init(skb);
>> +               kfree_skb(skb);
>> +       }
>> +}
>> +
>>   /* Must be called in process context */
>>   void __netif_napi_del(struct napi_struct *napi)
>>   {
>> @@ -6471,6 +6481,7 @@ void __netif_napi_del(struct napi_struct *napi)
>>          list_del_rcu(&napi->dev_list);
>>          napi_free_frags(napi);
>>
>> +       flush_rx_list(napi);
>>          flush_gro_hash(napi);
>>          napi->gro_bitmask = 0;
>>
>> --
>> 1.8.3.1
>>
Edward Cree Nov. 2, 2022, 3:28 p.m. UTC | #3
On 02/11/2022 04:27, Eric Dumazet wrote:
<snip>
> I do not think the bug is there.
> 
> Most likely tun driver is buggy.
<snip>
>> @@ -6471,6 +6481,7 @@ void __netif_napi_del(struct napi_struct *napi)
>>         list_del_rcu(&napi->dev_list);
>>         napi_free_frags(napi);
>>
>> +       flush_rx_list(napi);

But maybe it makes sense to put a WARN_ON_ONCE(!list_empty(&napi->rx_list))
 here, to catch such buggy drivers sooner.  WDYT?

-ed
diff mbox series

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index 3be2560..de3bc9c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6461,6 +6461,16 @@  static void flush_gro_hash(struct napi_struct *napi)
 	}
 }
 
+static void flush_rx_list(struct napi_struct *napi)
+{
+	struct sk_buff *skb, *next;
+
+	list_for_each_entry_safe(skb, next, &napi->rx_list, list) {
+		skb_list_del_init(skb);
+		kfree_skb(skb);
+	}
+}
+
 /* Must be called in process context */
 void __netif_napi_del(struct napi_struct *napi)
 {
@@ -6471,6 +6481,7 @@  void __netif_napi_del(struct napi_struct *napi)
 	list_del_rcu(&napi->dev_list);
 	napi_free_frags(napi);
 
+	flush_rx_list(napi);
 	flush_gro_hash(napi);
 	napi->gro_bitmask = 0;