Message ID | 20231208005250.2910004-15-almasrymina@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Device Memory TCP | expand |
On Thu, Dec 07, 2023 at 04:52:45PM -0800, Mina Almasry wrote: > Add an interface for the user to notify the kernel that it is done > reading the devmem dmabuf frags returned as cmsg. The kernel will > drop the reference on the frags to make them available for re-use. > > Signed-off-by: Willem de Bruijn <willemb@google.com> > Signed-off-by: Kaiyuan Zhang <kaiyuanz@google.com> > Signed-off-by: Mina Almasry <almasrymina@google.com> ... > diff --git a/net/core/sock.c b/net/core/sock.c > index fef349dd72fa..521bdc4ff260 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -1051,6 +1051,41 @@ static int sock_reserve_memory(struct sock *sk, int bytes) > return 0; > } > > +static noinline_for_stack int > +sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen) > +{ > + struct dmabuf_token tokens[128]; Hi Mina, I am guessing it is mostly due to the line above, but on x86 32bit builds I see: warning: the frame size of 1048 bytes is larger than 1024 bytes [-Wframe-larger-than > + unsigned int num_tokens, i, j; > + int ret; > + > + if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP) > + return -EBADF; > + > + if (optlen % sizeof(struct dmabuf_token) || optlen > sizeof(tokens)) > + return -EINVAL; > + > + num_tokens = optlen / sizeof(struct dmabuf_token); > + if (copy_from_sockptr(tokens, optval, optlen)) > + return -EFAULT; > + > + ret = 0; > + for (i = 0; i < num_tokens; i++) { > + for (j = 0; j < tokens[i].token_count; j++) { > + struct page *page = xa_erase(&sk->sk_user_pages, > + tokens[i].token_start + j); > + > + if (page) { > + if (WARN_ON_ONCE(!napi_pp_put_page(page, > + false))) > + page_pool_page_put_many(page, 1); > + ret++; > + } > + } > + } > + > + return ret; > +} > + > void sockopt_lock_sock(struct sock *sk) > { > /* When current->bpf_ctx is set, the setsockopt is called from ...
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index 25a2f5255f52..1acb77780f10 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -135,6 +135,7 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SO_DEVMEM_DONTNEED 97 #define SO_DEVMEM_LINEAR 98 #define SCM_DEVMEM_LINEAR SO_DEVMEM_LINEAR #define SO_DEVMEM_DMABUF 99 diff --git a/include/uapi/linux/uio.h b/include/uapi/linux/uio.h index ad92e37699da..65f33178a601 100644 --- a/include/uapi/linux/uio.h +++ b/include/uapi/linux/uio.h @@ -30,6 +30,10 @@ struct dmabuf_cmsg { __u32 dmabuf_id; /* dmabuf id this frag belongs to. */ }; +struct dmabuf_token { + __u32 token_start; + __u32 token_count; +}; /* * UIO_MAXIOV shall be at least 16 1003.1g (5.4.1.1) */ diff --git a/net/core/sock.c b/net/core/sock.c index fef349dd72fa..521bdc4ff260 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1051,6 +1051,41 @@ static int sock_reserve_memory(struct sock *sk, int bytes) return 0; } +static noinline_for_stack int +sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen) +{ + struct dmabuf_token tokens[128]; + unsigned int num_tokens, i, j; + int ret; + + if (sk->sk_type != SOCK_STREAM || sk->sk_protocol != IPPROTO_TCP) + return -EBADF; + + if (optlen % sizeof(struct dmabuf_token) || optlen > sizeof(tokens)) + return -EINVAL; + + num_tokens = optlen / sizeof(struct dmabuf_token); + if (copy_from_sockptr(tokens, optval, optlen)) + return -EFAULT; + + ret = 0; + for (i = 0; i < num_tokens; i++) { + for (j = 0; j < tokens[i].token_count; j++) { + struct page *page = xa_erase(&sk->sk_user_pages, + tokens[i].token_start + j); + + if (page) { + if (WARN_ON_ONCE(!napi_pp_put_page(page, + false))) + page_pool_page_put_many(page, 1); + ret++; + } + } + } + + return ret; +} + void sockopt_lock_sock(struct sock *sk) { /* When current->bpf_ctx is set, the setsockopt is called from @@ -1538,6 +1573,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname, break; } + case SO_DEVMEM_DONTNEED: + ret = sock_devmem_dontneed(sk, optval, optlen); + break; default: ret = -ENOPROTOOPT; break;