@@ -135,6 +135,10 @@ struct vsock_transport {
bool (*stream_is_active)(struct vsock_sock *);
bool (*stream_allow)(u32 cid, u32 port);
+ int (*zerocopy_dequeue)(struct vsock_sock *vsk,
+ struct vm_area_struct *vma,
+ unsigned long addr);
+
/* SEQ_PACKET. */
ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
int flags);
@@ -83,6 +83,8 @@
#define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8
+#define SO_VM_SOCKETS_ZEROCOPY 9
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
#define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD
@@ -1652,6 +1652,42 @@ static int vsock_connectible_setsockopt(struct socket *sock,
return err;
}
+static const struct vm_operations_struct afvsock_vm_ops = {
+};
+
+static int vsock_recv_zerocopy(struct socket *sock,
+ unsigned long address)
+{
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk = vsock_sk(sk);
+ struct vm_area_struct *vma;
+ const struct vsock_transport *transport;
+ int res;
+
+ transport = vsk->transport;
+
+ if (!transport->zerocopy_dequeue)
+ return -EOPNOTSUPP;
+
+ lock_sock(sk);
+ mmap_write_lock(current->mm);
+
+ vma = vma_lookup(current->mm, address);
+
+ if (!vma || vma->vm_ops != &afvsock_vm_ops) {
+ mmap_write_unlock(current->mm);
+ release_sock(sk);
+ return -EINVAL;
+ }
+
+ res = transport->zerocopy_dequeue(vsk, vma, address);
+
+ mmap_write_unlock(current->mm);
+ release_sock(sk);
+
+ return res;
+}
+
static int vsock_connectible_getsockopt(struct socket *sock,
int level, int optname,
char __user *optval,
@@ -1696,6 +1732,17 @@ static int vsock_connectible_getsockopt(struct socket *sock,
lv = sock_get_timeout(vsk->connect_timeout, &v,
optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
break;
+ case SO_VM_SOCKETS_ZEROCOPY: {
+ unsigned long vma_addr;
+
+ if (len < sizeof(vma_addr))
+ return -EINVAL;
+
+ if (copy_from_user(&vma_addr, optval, sizeof(vma_addr)))
+ return -EFAULT;
+
+ return vsock_recv_zerocopy(sock, vma_addr);
+ }
default:
return -ENOPROTOOPT;
@@ -2124,6 +2171,19 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
return err;
}
+static int afvsock_mmap(struct file *file, struct socket *sock,
+ struct vm_area_struct *vma)
+{
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+ vma->vm_flags |= (VM_MIXEDMAP);
+ vma->vm_ops = &afvsock_vm_ops;
+
+ return 0;
+}
+
static const struct proto_ops vsock_stream_ops = {
.family = PF_VSOCK,
.owner = THIS_MODULE,
@@ -2143,6 +2203,7 @@ static const struct proto_ops vsock_stream_ops = {
.recvmsg = vsock_connectible_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
+ .mmap = afvsock_mmap,
};
static const struct proto_ops vsock_seqpacket_ops = {
This: 1) Adds callback for 'mmap()' call on socket. It checks vm area flags and sets vm area ops. 2) Adds special 'getsockopt()' case which calls transport zerocopy callback. Input argument is vm area address. Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru> --- include/net/af_vsock.h | 4 +++ include/uapi/linux/vm_sockets.h | 2 ++ net/vmw_vsock/af_vsock.c | 61 +++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+)