Message ID | 20181101201558.11461-1-stefanha@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] vhost/vsock: fix use-after-free in network stack callers | expand |
On 2018/11/2 上午4:15, Stefan Hajnoczi wrote: > If the network stack calls .send_pkt()/.cancel_pkt() during .release(), > a struct vhost_vsock use-after-free is possible. This occurs because > .release() does not wait for other CPUs to stop using struct > vhost_vsock. > > Introduce a refcount for network stack callers in struct vhost_vsock and > wake up .release() when the refcount reaches zero. > > Reported-and-tested-by: syzbot+bd391451452fb0b93039@syzkaller.appspotmail.com > Reported-by: syzbot+e3e074963495f92a89ed@syzkaller.appspotmail.com > Reported-by: syzbot+d5a0a170c5069658b141@syzkaller.appspotmail.com > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > --- > Here is a version that avoids unnecessary wake_up() calls and passes > syzbot. I'm happy with this fix now. > > drivers/vhost/vsock.c | 33 +++++++++++++++++++++++++++++++-- > 1 file changed, 31 insertions(+), 2 deletions(-) > > diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c > index 34bc3ab40c6d..8daffbc4013a 100644 > --- a/drivers/vhost/vsock.c > +++ b/drivers/vhost/vsock.c > @@ -9,6 +9,7 @@ > */ > #include <linux/miscdevice.h> > #include <linux/atomic.h> > +#include <linux/refcount.h> > #include <linux/module.h> > #include <linux/mutex.h> > #include <linux/vmalloc.h> > @@ -42,6 +43,12 @@ struct vhost_vsock { > > atomic_t queued_replies; > > + /* For staying alive while there are network stack > + * .send_pkt()/.cancel_pkt() callers. > + */ > + refcount_t net_users; > + wait_queue_head_t net_users_wq; > + > u32 guest_cid; > }; > > @@ -75,6 +82,8 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) > > spin_lock_bh(&vhost_vsock_lock); > vsock = __vhost_vsock_get(guest_cid); > + if (vsock) > + refcount_inc(&vsock->net_users); > spin_unlock_bh(&vhost_vsock_lock); One more atomic operation compared to my patch. And the global spinlock on datapath is not avoided on datapath. I think it's better to use hlist + RCU instead of refcount. We can avoid spinlocks, refcounts on datapath. Thanks > > return vsock; > @@ -225,6 +234,10 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) > spin_unlock_bh(&vsock->send_pkt_list_lock); > > vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); > + > + if (refcount_dec_and_test(&vsock->net_users)) > + wake_up(&vsock->net_users_wq); > + > return len; > } > > @@ -265,6 +278,9 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) > vhost_poll_queue(&tx_vq->poll); > } > > + if (refcount_dec_and_test(&vsock->net_users)) > + wake_up(&vsock->net_users_wq); > + > return 0; > } > > @@ -521,6 +537,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) > vsock->guest_cid = 0; /* no CID assigned yet */ > > atomic_set(&vsock->queued_replies, 0); > + refcount_set(&vsock->net_users, 1); > + init_waitqueue_head(&vsock->net_users_wq); > > vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; > vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; > @@ -557,13 +575,17 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock) > static void vhost_vsock_reset_orphans(struct sock *sk) > { > struct vsock_sock *vsk = vsock_sk(sk); > + bool orphan; > + > + spin_lock_bh(&vhost_vsock_lock); > + orphan = __vhost_vsock_get(vsk->remote_addr.svm_cid) == NULL; > + spin_unlock_bh(&vhost_vsock_lock); > > /* vmci_transport.c doesn't take sk_lock here either. At least we're > * under vsock_table_lock so the sock cannot disappear while we're > * executing. > */ > - > - if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) { > + if (orphan) { > sock_set_flag(sk, SOCK_DONE); > vsk->peer_shutdown = SHUTDOWN_MASK; > sk->sk_state = SS_UNCONNECTED; > @@ -580,6 +602,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) > list_del(&vsock->list); > spin_unlock_bh(&vhost_vsock_lock); > > + /* Now that the vsock instance is no longer visible, wait for other > + * CPUs to drop their references. > + */ > + if (!refcount_dec_and_test(&vsock->net_users)) > + wait_event(vsock->net_users_wq, > + refcount_read(&vsock->net_users) == 0); > + > /* Iterating over all connections for all CIDs to find orphans is > * inefficient. Room for improvement here. */ > vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c index 34bc3ab40c6d..8daffbc4013a 100644 --- a/drivers/vhost/vsock.c +++ b/drivers/vhost/vsock.c @@ -9,6 +9,7 @@ */ #include <linux/miscdevice.h> #include <linux/atomic.h> +#include <linux/refcount.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/vmalloc.h> @@ -42,6 +43,12 @@ struct vhost_vsock { atomic_t queued_replies; + /* For staying alive while there are network stack + * .send_pkt()/.cancel_pkt() callers. + */ + refcount_t net_users; + wait_queue_head_t net_users_wq; + u32 guest_cid; }; @@ -75,6 +82,8 @@ static struct vhost_vsock *vhost_vsock_get(u32 guest_cid) spin_lock_bh(&vhost_vsock_lock); vsock = __vhost_vsock_get(guest_cid); + if (vsock) + refcount_inc(&vsock->net_users); spin_unlock_bh(&vhost_vsock_lock); return vsock; @@ -225,6 +234,10 @@ vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt) spin_unlock_bh(&vsock->send_pkt_list_lock); vhost_work_queue(&vsock->dev, &vsock->send_pkt_work); + + if (refcount_dec_and_test(&vsock->net_users)) + wake_up(&vsock->net_users_wq); + return len; } @@ -265,6 +278,9 @@ vhost_transport_cancel_pkt(struct vsock_sock *vsk) vhost_poll_queue(&tx_vq->poll); } + if (refcount_dec_and_test(&vsock->net_users)) + wake_up(&vsock->net_users_wq); + return 0; } @@ -521,6 +537,8 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file) vsock->guest_cid = 0; /* no CID assigned yet */ atomic_set(&vsock->queued_replies, 0); + refcount_set(&vsock->net_users, 1); + init_waitqueue_head(&vsock->net_users_wq); vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX]; vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX]; @@ -557,13 +575,17 @@ static void vhost_vsock_flush(struct vhost_vsock *vsock) static void vhost_vsock_reset_orphans(struct sock *sk) { struct vsock_sock *vsk = vsock_sk(sk); + bool orphan; + + spin_lock_bh(&vhost_vsock_lock); + orphan = __vhost_vsock_get(vsk->remote_addr.svm_cid) == NULL; + spin_unlock_bh(&vhost_vsock_lock); /* vmci_transport.c doesn't take sk_lock here either. At least we're * under vsock_table_lock so the sock cannot disappear while we're * executing. */ - - if (!vhost_vsock_get(vsk->remote_addr.svm_cid)) { + if (orphan) { sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; sk->sk_state = SS_UNCONNECTED; @@ -580,6 +602,13 @@ static int vhost_vsock_dev_release(struct inode *inode, struct file *file) list_del(&vsock->list); spin_unlock_bh(&vhost_vsock_lock); + /* Now that the vsock instance is no longer visible, wait for other + * CPUs to drop their references. + */ + if (!refcount_dec_and_test(&vsock->net_users)) + wait_event(vsock->net_users_wq, + refcount_read(&vsock->net_users) == 0); + /* Iterating over all connections for all CIDs to find orphans is * inefficient. Room for improvement here. */ vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
If the network stack calls .send_pkt()/.cancel_pkt() during .release(), a struct vhost_vsock use-after-free is possible. This occurs because .release() does not wait for other CPUs to stop using struct vhost_vsock. Introduce a refcount for network stack callers in struct vhost_vsock and wake up .release() when the refcount reaches zero. Reported-and-tested-by: syzbot+bd391451452fb0b93039@syzkaller.appspotmail.com Reported-by: syzbot+e3e074963495f92a89ed@syzkaller.appspotmail.com Reported-by: syzbot+d5a0a170c5069658b141@syzkaller.appspotmail.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> --- Here is a version that avoids unnecessary wake_up() calls and passes syzbot. I'm happy with this fix now. drivers/vhost/vsock.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-)