diff mbox series

[RFC,v1,3/8] af_vsock: add zerocopy receive logic

Message ID 44d2404f-dc4f-f42c-1235-2ad7f537a030@sberdevices.ru (mailing list archive)
State RFC
Headers show
Series virtio/vsock: experimental zerocopy receive | expand

Checks

Context Check Description
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 5 this patch: 7
netdev/cc_maintainers warning 1 maintainers not CCed: edumazet@google.com
netdev/build_clang fail Errors and warnings before: 6 this patch: 8
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 5 this patch: 7
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 103 lines checked
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/tree_selection success Guessing tree name failed - patch did not apply, async

Commit Message

Arseniy Krasnov May 12, 2022, 5:12 a.m. UTC
This:
1) Adds callback for 'mmap()' call on socket. It
   checks vm area flags and sets vm area ops.
2) Adds special 'getsockopt()' case which calls
   transport zerocopy callback. Input argument is
   vm area address.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
---
 include/net/af_vsock.h          |  4 +++
 include/uapi/linux/vm_sockets.h |  2 ++
 net/vmw_vsock/af_vsock.c        | 61 +++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+)
diff mbox series

Patch

diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index ab207677e0a8..d0aefb9ee4cf 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -135,6 +135,10 @@  struct vsock_transport {
 	bool (*stream_is_active)(struct vsock_sock *);
 	bool (*stream_allow)(u32 cid, u32 port);
 
+	int (*zerocopy_dequeue)(struct vsock_sock *vsk,
+				struct vm_area_struct *vma,
+				unsigned long addr);
+
 	/* SEQ_PACKET. */
 	ssize_t (*seqpacket_dequeue)(struct vsock_sock *vsk, struct msghdr *msg,
 				     int flags);
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index c60ca33eac59..62aec51a2bc3 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -83,6 +83,8 @@ 
 
 #define SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW 8
 
+#define SO_VM_SOCKETS_ZEROCOPY 9
+
 #if !defined(__KERNEL__)
 #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
 #define SO_VM_SOCKETS_CONNECT_TIMEOUT SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 38baeb189d4e..3f98477ea546 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1652,6 +1652,42 @@  static int vsock_connectible_setsockopt(struct socket *sock,
 	return err;
 }
 
+static const struct vm_operations_struct afvsock_vm_ops = {
+};
+
+static int vsock_recv_zerocopy(struct socket *sock,
+			       unsigned long address)
+{
+	struct sock *sk = sock->sk;
+	struct vsock_sock *vsk = vsock_sk(sk);
+	struct vm_area_struct *vma;
+	const struct vsock_transport *transport;
+	int res;
+
+	transport = vsk->transport;
+
+	if (!transport->zerocopy_dequeue)
+		return -EOPNOTSUPP;
+
+	lock_sock(sk);
+	mmap_write_lock(current->mm);
+
+	vma = vma_lookup(current->mm, address);
+
+	if (!vma || vma->vm_ops != &afvsock_vm_ops) {
+		mmap_write_unlock(current->mm);
+		release_sock(sk);
+		return -EINVAL;
+	}
+
+	res = transport->zerocopy_dequeue(vsk, vma, address);
+
+	mmap_write_unlock(current->mm);
+	release_sock(sk);
+
+	return res;
+}
+
 static int vsock_connectible_getsockopt(struct socket *sock,
 					int level, int optname,
 					char __user *optval,
@@ -1696,6 +1732,17 @@  static int vsock_connectible_getsockopt(struct socket *sock,
 		lv = sock_get_timeout(vsk->connect_timeout, &v,
 				      optname == SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD);
 		break;
+	case SO_VM_SOCKETS_ZEROCOPY: {
+		unsigned long vma_addr;
+
+		if (len < sizeof(vma_addr))
+			return -EINVAL;
+
+		if (copy_from_user(&vma_addr, optval, sizeof(vma_addr)))
+			return -EFAULT;
+
+		return vsock_recv_zerocopy(sock, vma_addr);
+	}
 
 	default:
 		return -ENOPROTOOPT;
@@ -2124,6 +2171,19 @@  vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	return err;
 }
 
+static int afvsock_mmap(struct file *file, struct socket *sock,
+			struct vm_area_struct *vma)
+{
+	if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+		return -EPERM;
+
+	vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+	vma->vm_flags |= (VM_MIXEDMAP);
+	vma->vm_ops = &afvsock_vm_ops;
+
+	return 0;
+}
+
 static const struct proto_ops vsock_stream_ops = {
 	.family = PF_VSOCK,
 	.owner = THIS_MODULE,
@@ -2143,6 +2203,7 @@  static const struct proto_ops vsock_stream_ops = {
 	.recvmsg = vsock_connectible_recvmsg,
 	.mmap = sock_no_mmap,
 	.sendpage = sock_no_sendpage,
+	.mmap = afvsock_mmap,
 };
 
 static const struct proto_ops vsock_seqpacket_ops = {