diff mbox series

[v3] ipc/mqueue: fix potential sleeping issue in mqueue_flush_file

Message ID 20240119103703.2004155-1-shaozhengchao@huawei.com (mailing list archive)
State Not Applicable
Headers show
Series [v3] ipc/mqueue: fix potential sleeping issue in mqueue_flush_file | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

shaozhengchao Jan. 19, 2024, 10:37 a.m. UTC
I analyze the potential sleeping issue of the following processes:
Thread A                                Thread B
...                                     netlink_create  //ref = 1
do_mq_notify                            ...
  sock = netlink_getsockbyfilp          ...     //ref = 2
  info->notify_sock = sock;             ...
...                                     netlink_sendmsg
...                                       skb = netlink_alloc_large_skb  //skb->head is vmalloced
...                                       netlink_unicast
...                                         sk = netlink_getsockbyportid //ref = 3
...                                         netlink_sendskb
...                                           __netlink_sendskb
...                                             skb_queue_tail //put skb to sk_receive_queue
...                                         sock_put //ref = 2
...                                     ...
...                                     netlink_release
...                                       deferred_put_nlk_sk //ref = 1
mqueue_flush_file
  spin_lock
  remove_notification
    netlink_sendskb
      sock_put  //ref = 0
        sk_free
          ...
          __sk_destruct
            netlink_sock_destruct
              skb_queue_purge  //get skb from sk_receive_queue
                ...
                __skb_queue_purge_reason
                  kfree_skb_reason
                    __kfree_skb
                    ...
                    skb_release_all
                      skb_release_head_state
                        netlink_skb_destructor
                          vfree(skb->head)  //sleeping while holding spinlock

In netlink_sendmsg, if the memory pointed to by skb->head is allocated by
vmalloc, and is put to sk_receive_queue queue, also the skb is not freed.
When the mqueue executes flush, the sleeping bug will occur. Put sock
after releasing the spinlock.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
---
v3: Put sock after releasing the spinlock.
v2: CCed some networking maintainer & netdev list
---
 ipc/mqueue.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

Comments

Eric Dumazet Jan. 19, 2024, 1:09 p.m. UTC | #1
On Fri, Jan 19, 2024 at 11:27 AM Zhengchao Shao
<shaozhengchao@huawei.com> wrote:
>
> I analyze the potential sleeping issue of the following processes:
> Thread A                                Thread B
> ...                                     netlink_create  //ref = 1
> do_mq_notify                            ...
>   sock = netlink_getsockbyfilp          ...     //ref = 2
>   info->notify_sock = sock;             ...
> ...                                     netlink_sendmsg
> ...                                       skb = netlink_alloc_large_skb  //skb->head is vmalloced
> ...                                       netlink_unicast
> ...                                         sk = netlink_getsockbyportid //ref = 3
> ...                                         netlink_sendskb
> ...                                           __netlink_sendskb
> ...                                             skb_queue_tail //put skb to sk_receive_queue
> ...                                         sock_put //ref = 2
> ...                                     ...
> ...                                     netlink_release
> ...                                       deferred_put_nlk_sk //ref = 1
> mqueue_flush_file
>   spin_lock
>   remove_notification
>     netlink_sendskb
>       sock_put  //ref = 0
>         sk_free
>           ...
>           __sk_destruct
>             netlink_sock_destruct
>               skb_queue_purge  //get skb from sk_receive_queue
>                 ...
>                 __skb_queue_purge_reason
>                   kfree_skb_reason
>                     __kfree_skb
>                     ...
>                     skb_release_all
>                       skb_release_head_state
>                         netlink_skb_destructor
>                           vfree(skb->head)  //sleeping while holding spinlock
>
> In netlink_sendmsg, if the memory pointed to by skb->head is allocated by
> vmalloc, and is put to sk_receive_queue queue, also the skb is not freed.
> When the mqueue executes flush, the sleeping bug will occur. Put sock
> after releasing the spinlock.
>
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")

I think netlink started to use vmalloc() from commit c05cdb1b864f
("netlink: allow large data transfers from user-space")

> Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
> ---
> v3: Put sock after releasing the spinlock.
> v2: CCed some networking maintainer & netdev list
> ---
>  ipc/mqueue.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/ipc/mqueue.c b/ipc/mqueue.c
> index 5eea4dc0509e..4832343b7049 100644
> --- a/ipc/mqueue.c
> +++ b/ipc/mqueue.c
> @@ -664,12 +664,23 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
>  static int mqueue_flush_file(struct file *filp, fl_owner_t id)
>  {
>         struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
> +       struct sock *sk = NULL;
>
>         spin_lock(&info->lock);
> -       if (task_tgid(current) == info->notify_owner)
> -               remove_notification(info);
> +       if (task_tgid(current) == info->notify_owner) {
> +               if (info->notify_owner != NULL &&
> +                   info->notify.sigev_notify == SIGEV_THREAD) {
> +                       sk = info->notify_sock;
> +                       sock_hold(sk);
> +               }
>
> +               remove_notification(info);
> +       }
>         spin_unlock(&info->lock);
> +
> +       if (sk)
> +               sock_put(sk);
> +
>         return 0;
>  }
>


Note that we could instead call vfree_atomic() from netlink_skb_destructor()

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ed8ffd58ff375f3fa9f262e6f3b4d1a1aaf2731..9c962347cf859f16fc76e4d8a2fd22cdb3d142d6
100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
        if (is_vmalloc_addr(skb->head)) {
                if (!skb->cloned ||
                    !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
-                       vfree(skb->head);
+                       vfree_atomic(skb->head);

                skb->head = NULL;
        }

These big skbs are quite rare IMO, and we also could attempt
high-order allocations
in netlink_alloc_large_skb(), using kvmalloc() instead of vmalloc()
(next week when net-next opens)
shaozhengchao Jan. 20, 2024, 6:54 a.m. UTC | #2
On 2024/1/19 21:09, Eric Dumazet wrote:
> On Fri, Jan 19, 2024 at 11:27 AM Zhengchao Shao
> <shaozhengchao@huawei.com> wrote:
>>
>> I analyze the potential sleeping issue of the following processes:
>> Thread A                                Thread B
>> ...                                     netlink_create  //ref = 1
>> do_mq_notify                            ...
>>    sock = netlink_getsockbyfilp          ...     //ref = 2
>>    info->notify_sock = sock;             ...
>> ...                                     netlink_sendmsg
>> ...                                       skb = netlink_alloc_large_skb  //skb->head is vmalloced
>> ...                                       netlink_unicast
>> ...                                         sk = netlink_getsockbyportid //ref = 3
>> ...                                         netlink_sendskb
>> ...                                           __netlink_sendskb
>> ...                                             skb_queue_tail //put skb to sk_receive_queue
>> ...                                         sock_put //ref = 2
>> ...                                     ...
>> ...                                     netlink_release
>> ...                                       deferred_put_nlk_sk //ref = 1
>> mqueue_flush_file
>>    spin_lock
>>    remove_notification
>>      netlink_sendskb
>>        sock_put  //ref = 0
>>          sk_free
>>            ...
>>            __sk_destruct
>>              netlink_sock_destruct
>>                skb_queue_purge  //get skb from sk_receive_queue
>>                  ...
>>                  __skb_queue_purge_reason
>>                    kfree_skb_reason
>>                      __kfree_skb
>>                      ...
>>                      skb_release_all
>>                        skb_release_head_state
>>                          netlink_skb_destructor
>>                            vfree(skb->head)  //sleeping while holding spinlock
>>
>> In netlink_sendmsg, if the memory pointed to by skb->head is allocated by
>> vmalloc, and is put to sk_receive_queue queue, also the skb is not freed.
>> When the mqueue executes flush, the sleeping bug will occur. Put sock
>> after releasing the spinlock.
>>
>> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> 
Hi Eric:
> I think netlink started to use vmalloc() from commit c05cdb1b864f
> ("netlink: allow large data transfers from user-space")
> 
   Thank you for your review. Yes, you are right. Sorry for my mistake.
>> Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
>> ---
>> v3: Put sock after releasing the spinlock.
>> v2: CCed some networking maintainer & netdev list
>> ---
>>   ipc/mqueue.c | 15 +++++++++++++--
>>   1 file changed, 13 insertions(+), 2 deletions(-)
>>
>> diff --git a/ipc/mqueue.c b/ipc/mqueue.c
>> index 5eea4dc0509e..4832343b7049 100644
>> --- a/ipc/mqueue.c
>> +++ b/ipc/mqueue.c
>> @@ -664,12 +664,23 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
>>   static int mqueue_flush_file(struct file *filp, fl_owner_t id)
>>   {
>>          struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
>> +       struct sock *sk = NULL;
>>
>>          spin_lock(&info->lock);
>> -       if (task_tgid(current) == info->notify_owner)
>> -               remove_notification(info);
>> +       if (task_tgid(current) == info->notify_owner) {
>> +               if (info->notify_owner != NULL &&
>> +                   info->notify.sigev_notify == SIGEV_THREAD) {
>> +                       sk = info->notify_sock;
>> +                       sock_hold(sk);
>> +               }
>>
>> +               remove_notification(info);
>> +       }
>>          spin_unlock(&info->lock);
>> +
>> +       if (sk)
>> +               sock_put(sk);
>> +
>>          return 0;
>>   }
>>
> 
> 
> Note that we could instead call vfree_atomic() from netlink_skb_destructor()
> 
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 4ed8ffd58ff375f3fa9f262e6f3b4d1a1aaf2731..9c962347cf859f16fc76e4d8a2fd22cdb3d142d6
> 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
>          if (is_vmalloc_addr(skb->head)) {
>                  if (!skb->cloned ||
>                      !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
> -                       vfree(skb->head);
> +                       vfree_atomic(skb->head);
> 
>                  skb->head = NULL;
>          }
> 
OK, I will send v4 after verification.
> These big skbs are quite rare IMO, and we also could attempt
> high-order allocations
> in netlink_alloc_large_skb(), using kvmalloc() instead of vmalloc()
> (next week when net-next opens)
> 
It looks good to me. I would like to do it if you want...
Thank you.

Zhengchao Shao
diff mbox series

Patch

diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 5eea4dc0509e..4832343b7049 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -664,12 +664,23 @@  static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
 static int mqueue_flush_file(struct file *filp, fl_owner_t id)
 {
 	struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
+	struct sock *sk = NULL;
 
 	spin_lock(&info->lock);
-	if (task_tgid(current) == info->notify_owner)
-		remove_notification(info);
+	if (task_tgid(current) == info->notify_owner) {
+		if (info->notify_owner != NULL &&
+		    info->notify.sigev_notify == SIGEV_THREAD) {
+			sk = info->notify_sock;
+			sock_hold(sk);
+		}
 
+		remove_notification(info);
+	}
 	spin_unlock(&info->lock);
+
+	if (sk)
+		sock_put(sk);
+
 	return 0;
 }