Message ID | 20240626193403.3854451-3-zijianzhang@bytedance.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | net: A lightweight zero-copy notification | expand |
On Wed, 26 Jun 2024 19:34:01 +0000 zijianzhang@bytedance.com wrote: > From: Zijian Zhang <zijianzhang@bytedance.com> > > Since ____sys_sendmsg creates a kernel copy of msg_control and passes > that to the callees, put_cmsg will write into this kernel buffer. If > people want to piggyback some information like timestamps upon returning > of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf, > which is not supported. As a result, users typically have to call recvmsg > on the ERRMSG_QUEUE of the socket, incurring extra system call overhead. > > This commit supports copying cmsg to userspace in TX path by introducing > a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic > upon returning of ___sys_sendmsg. sparse complains about the annotations: net/socket.c:2635:30: warning: incorrect type in assignment (different address spaces) net/socket.c:2635:30: expected void *msg_control net/socket.c:2635:30: got void [noderef] __user *[noderef] __user msg_control net/socket.c:2629:49: warning: dereference of noderef expression
Hi, kernel test robot noticed the following build warnings: [auto build test WARNING on net-next/main] url: https://github.com/intel-lab-lkp/linux/commits/zijianzhang-bytedance-com/selftests-fix-OOM-problem-in-msg_zerocopy-selftest/20240627-150801 base: net-next/main patch link: https://lore.kernel.org/r/20240626193403.3854451-3-zijianzhang%40bytedance.com patch subject: [PATCH net-next v6 2/4] sock: support copy cmsg to userspace in TX path config: i386-randconfig-062-20240628 (https://download.01.org/0day-ci/archive/20240628/202406281051.NAiS477l-lkp@intel.com/config) compiler: gcc-13 (Ubuntu 13.2.0-4ubuntu3) 13.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240628/202406281051.NAiS477l-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202406281051.NAiS477l-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> net/socket.c:2635:30: sparse: sparse: incorrect type in assignment (different address spaces) @@ expected void *msg_control @@ got void [noderef] __user *[noderef] __user msg_control @@ net/socket.c:2635:30: sparse: expected void *msg_control net/socket.c:2635:30: sparse: got void [noderef] __user *[noderef] __user msg_control >> net/socket.c:2629:49: sparse: sparse: dereference of noderef expression vim +2635 net/socket.c 2623 2624 static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, 2625 struct user_msghdr __user *umsg) 2626 { 2627 struct compat_msghdr __user *umsg_compat = 2628 (struct compat_msghdr __user *)umsg; > 2629 unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; 2630 unsigned int flags = msg_sys->msg_flags; 2631 struct msghdr msg_user = *msg_sys; 2632 struct cmsghdr *cmsg; 2633 int err; 2634 > 2635 msg_user.msg_control = umsg->msg_control; 2636 msg_user.msg_control_is_user = true; 2637 for_each_cmsghdr(cmsg, msg_sys) { 2638 if (!CMSG_OK(msg_sys, cmsg)) 2639 break; 2640 if (cmsg_copy_to_user(cmsg)) 2641 put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, 2642 cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); 2643 } 2644 2645 err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); 2646 if (err) 2647 return err; 2648 if (MSG_CMSG_COMPAT & flags) 2649 err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, 2650 &umsg_compat->msg_controllen); 2651 else 2652 err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, 2653 &umsg->msg_controllen); 2654 return err; 2655 } 2656
zijianzhang@ wrote: > From: Zijian Zhang <zijianzhang@bytedance.com> > > Since ____sys_sendmsg creates a kernel copy of msg_control and passes > that to the callees, put_cmsg will write into this kernel buffer. If > people want to piggyback some information like timestamps upon returning > of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf, > which is not supported. As a result, users typically have to call recvmsg > on the ERRMSG_QUEUE of the socket, incurring extra system call overhead. > > This commit supports copying cmsg to userspace in TX path by introducing > a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic > upon returning of ___sys_sendmsg. > > Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com> > Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com> > --- > include/linux/socket.h | 6 ++++++ > net/core/sock.c | 2 ++ > net/ipv4/ip_sockglue.c | 2 ++ > net/ipv6/datagram.c | 3 +++ > net/socket.c | 45 ++++++++++++++++++++++++++++++++++++++++++ > 5 files changed, 58 insertions(+) > > diff --git a/include/linux/socket.h b/include/linux/socket.h > index 89d16b90370b..35adc30c9db6 100644 > --- a/include/linux/socket.h > +++ b/include/linux/socket.h > @@ -168,6 +168,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr > return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); > } > > +static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg) > +{ > + return 0; > +} > + > static inline size_t msg_data_left(struct msghdr *msg) > { > return iov_iter_count(&msg->msg_iter); > @@ -329,6 +334,7 @@ struct ucred { > > #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ > #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ > +#define MSG_CMSG_COPY_TO_USER 0x10000000 /* Copy cmsg to user space */ Careful that userspace must not be able to set this bit. See also MSG_INTERNAL_SENDMSG_FLAGS. Perhaps better to define a bit like msg_control_is_user. > #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ > #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file > descriptor received through > diff --git a/net/core/sock.c b/net/core/sock.c > index 9abc4fe25953..4a766a91ff5c 100644 > --- a/net/core/sock.c > +++ b/net/core/sock.c > @@ -2879,6 +2879,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, > for_each_cmsghdr(cmsg, msg) { > if (!CMSG_OK(msg, cmsg)) > return -EINVAL; > + if (cmsg_copy_to_user(cmsg)) > + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; Probably better to pass msg to __sock_cmsg_send and only set this field in the specific cmsg handler that uses it. > if (cmsg->cmsg_level != SOL_SOCKET) > continue; > ret = __sock_cmsg_send(sk, cmsg, sockc); > diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c > index cf377377b52d..464d08b27fa8 100644 > --- a/net/ipv4/ip_sockglue.c > +++ b/net/ipv4/ip_sockglue.c > @@ -249,6 +249,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, > for_each_cmsghdr(cmsg, msg) { > if (!CMSG_OK(msg, cmsg)) > return -EINVAL; > + if (cmsg_copy_to_user(cmsg)) > + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; > #if IS_ENABLED(CONFIG_IPV6) > if (allow_ipv6 && > cmsg->cmsg_level == SOL_IPV6 && > diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c > index fff78496803d..b0341faf7f83 100644 > --- a/net/ipv6/datagram.c > +++ b/net/ipv6/datagram.c > @@ -776,6 +776,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, > goto exit_f; > } > > + if (cmsg_copy_to_user(cmsg)) > + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; > + > if (cmsg->cmsg_level == SOL_SOCKET) { > err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc); > if (err) > diff --git a/net/socket.c b/net/socket.c > index e416920e9399..6523cf5a7f32 100644 > --- a/net/socket.c > +++ b/net/socket.c > @@ -2621,6 +2621,39 @@ static int sendmsg_copy_msghdr(struct msghdr *msg, > return 0; > } > > +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, > + struct user_msghdr __user *umsg) > +{ > + struct compat_msghdr __user *umsg_compat = > + (struct compat_msghdr __user *)umsg; > + unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; > + unsigned int flags = msg_sys->msg_flags; > + struct msghdr msg_user = *msg_sys; > + struct cmsghdr *cmsg; > + int err; > + > + msg_user.msg_control = umsg->msg_control; > + msg_user.msg_control_is_user = true; > + for_each_cmsghdr(cmsg, msg_sys) { > + if (!CMSG_OK(msg_sys, cmsg)) > + break; > + if (cmsg_copy_to_user(cmsg)) > + put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, > + cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); > + } Alternatively just copy the entire msg_control if any cmsg wants to be copied back. The others will be unmodified. No need to iterate then. > + > + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); > + if (err) > + return err; Does this value need to be written? > + if (MSG_CMSG_COMPAT & flags) > + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, > + &umsg_compat->msg_controllen); > + else > + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, > + &umsg->msg_controllen); > + return err; > +} > + > static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, > struct msghdr *msg_sys, unsigned int flags, > struct used_address *used_address, > @@ -2638,6 +2671,18 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, > > err = ____sys_sendmsg(sock, msg_sys, flags, used_address, > allowed_msghdr_flags); > + if (err < 0) > + goto out; > + > + if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) { > + ssize_t len = err; > + > + err = sendmsg_copy_cmsg_to_user(msg_sys, msg); > + if (err) > + goto out; > + err = len; > + } > +out: > kfree(iov); > return err; > } > -- > 2.20.1 >
On 6/30/24 7:43 AM, Willem de Bruijn wrote: > zijianzhang@ wrote: >> From: Zijian Zhang <zijianzhang@bytedance.com> >> >> Since ____sys_sendmsg creates a kernel copy of msg_control and passes >> that to the callees, put_cmsg will write into this kernel buffer. If >> people want to piggyback some information like timestamps upon returning >> of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf, >> which is not supported. As a result, users typically have to call recvmsg >> on the ERRMSG_QUEUE of the socket, incurring extra system call overhead. >> >> This commit supports copying cmsg to userspace in TX path by introducing >> a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic >> upon returning of ___sys_sendmsg. >> >> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com> >> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com> >> --- >> include/linux/socket.h | 6 ++++++ >> net/core/sock.c | 2 ++ >> net/ipv4/ip_sockglue.c | 2 ++ >> net/ipv6/datagram.c | 3 +++ >> net/socket.c | 45 ++++++++++++++++++++++++++++++++++++++++++ >> 5 files changed, 58 insertions(+) >> >> diff --git a/include/linux/socket.h b/include/linux/socket.h >> index 89d16b90370b..35adc30c9db6 100644 >> --- a/include/linux/socket.h >> +++ b/include/linux/socket.h >> @@ -168,6 +168,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr >> return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); >> } >> >> +static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg) >> +{ >> + return 0; >> +} >> + >> static inline size_t msg_data_left(struct msghdr *msg) >> { >> return iov_iter_count(&msg->msg_iter); >> @@ -329,6 +334,7 @@ struct ucred { >> >> #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ >> #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ >> +#define MSG_CMSG_COPY_TO_USER 0x10000000 /* Copy cmsg to user space */ > > Careful that userspace must not be able to set this bit. See also > MSG_INTERNAL_SENDMSG_FLAGS. > > Perhaps better to define a bit like msg_control_is_user. > >> #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ >> #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file >> descriptor received through >> diff --git a/net/core/sock.c b/net/core/sock.c >> index 9abc4fe25953..4a766a91ff5c 100644 >> --- a/net/core/sock.c >> +++ b/net/core/sock.c >> @@ -2879,6 +2879,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, >> for_each_cmsghdr(cmsg, msg) { >> if (!CMSG_OK(msg, cmsg)) >> return -EINVAL; >> + if (cmsg_copy_to_user(cmsg)) >> + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; > > Probably better to pass msg to __sock_cmsg_send and only set this > field in the specific cmsg handler that uses it. > Thanks for the above suggestions! >> if (cmsg->cmsg_level != SOL_SOCKET) >> continue; >> ret = __sock_cmsg_send(sk, cmsg, sockc); ... >> +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, >> + struct user_msghdr __user *umsg) >> +{ >> + struct compat_msghdr __user *umsg_compat = >> + (struct compat_msghdr __user *)umsg; >> + unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; >> + unsigned int flags = msg_sys->msg_flags; >> + struct msghdr msg_user = *msg_sys; >> + struct cmsghdr *cmsg; >> + int err; >> + >> + msg_user.msg_control = umsg->msg_control; >> + msg_user.msg_control_is_user = true; >> + for_each_cmsghdr(cmsg, msg_sys) { >> + if (!CMSG_OK(msg_sys, cmsg)) >> + break; >> + if (cmsg_copy_to_user(cmsg)) >> + put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, >> + cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); >> + } > > Alternatively just copy the entire msg_control if any cmsg wants to > be copied back. The others will be unmodified. No need to iterate > then. > Copy the entire msg_control via copy_to_user does not take MSG_CMSG_COMPAT into account. I may have to use put_cmsg to deal with the compat version, and thus have to keep the for loop? If so, I may keep the function cmsg_copy_to_user to avoid extra copy? >> + >> + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); >> + if (err) >> + return err; > > Does this value need to be written? > I did this according to ____sys_recvmsg, maybe it's useful to export flag like MSG_CTRUNC to users? >> + if (MSG_CMSG_COMPAT & flags) >> + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, >> + &umsg_compat->msg_controllen); >> + else >> + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, >> + &umsg->msg_controllen); >> + return err; >> +} >> + >> static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, >> struct msghdr *msg_sys, unsigned int flags, >> struct used_address *used_address, >> @@ -2638,6 +2671,18 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, >> >> err = ____sys_sendmsg(sock, msg_sys, flags, used_address, >> allowed_msghdr_flags); >> + if (err < 0) >> + goto out; >> + >> + if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) { >> + ssize_t len = err; >> + >> + err = sendmsg_copy_cmsg_to_user(msg_sys, msg); >> + if (err) >> + goto out; >> + err = len; >> + } >> +out: >> kfree(iov); >> return err; >> } >> -- >> 2.20.1 >> > >
Zijian Zhang wrote: > > > On 6/30/24 7:43 AM, Willem de Bruijn wrote: > > zijianzhang@ wrote: > >> From: Zijian Zhang <zijianzhang@bytedance.com> > >> > >> Since ____sys_sendmsg creates a kernel copy of msg_control and passes > >> that to the callees, put_cmsg will write into this kernel buffer. If > >> people want to piggyback some information like timestamps upon returning > >> of sendmsg. ____sys_sendmsg will have to copy_to_user to the original buf, > >> which is not supported. As a result, users typically have to call recvmsg > >> on the ERRMSG_QUEUE of the socket, incurring extra system call overhead. > >> > >> This commit supports copying cmsg to userspace in TX path by introducing > >> a flag MSG_CMSG_COPY_TO_USER in struct msghdr to guide the copy logic > >> upon returning of ___sys_sendmsg. > >> > >> Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com> > >> Signed-off-by: Xiaochun Lu <xiaochun.lu@bytedance.com> > >> if (cmsg->cmsg_level != SOL_SOCKET) > >> continue; > >> ret = __sock_cmsg_send(sk, cmsg, sockc); > ... > >> +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, > >> + struct user_msghdr __user *umsg) > >> +{ > >> + struct compat_msghdr __user *umsg_compat = > >> + (struct compat_msghdr __user *)umsg; > >> + unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; > >> + unsigned int flags = msg_sys->msg_flags; > >> + struct msghdr msg_user = *msg_sys; > >> + struct cmsghdr *cmsg; > >> + int err; > >> + > >> + msg_user.msg_control = umsg->msg_control; > >> + msg_user.msg_control_is_user = true; > >> + for_each_cmsghdr(cmsg, msg_sys) { > >> + if (!CMSG_OK(msg_sys, cmsg)) > >> + break; > >> + if (cmsg_copy_to_user(cmsg)) > >> + put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, > >> + cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); > >> + } > > > > Alternatively just copy the entire msg_control if any cmsg wants to > > be copied back. The others will be unmodified. No need to iterate > > then. > > > > Copy the entire msg_control via copy_to_user does not take > MSG_CMSG_COMPAT into account. I may have to use put_cmsg to deal > with the compat version, and thus have to keep the for loop? Good point. Okay, then this is pretty clean. Only returning the cmsg that have been written to is actually quite nice. > If so, I may keep the function cmsg_copy_to_user to avoid extra copy? > > >> + > >> + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); > >> + if (err) > >> + return err; > > > > Does this value need to be written? > > > > I did this according to ____sys_recvmsg, maybe it's useful to export > flag like MSG_CTRUNC to users? Good point.
diff --git a/include/linux/socket.h b/include/linux/socket.h index 89d16b90370b..35adc30c9db6 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -168,6 +168,11 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } +static inline bool cmsg_copy_to_user(struct cmsghdr *__cmsg) +{ + return 0; +} + static inline size_t msg_data_left(struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); @@ -329,6 +334,7 @@ struct ucred { #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ +#define MSG_CMSG_COPY_TO_USER 0x10000000 /* Copy cmsg to user space */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ #define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file descriptor received through diff --git a/net/core/sock.c b/net/core/sock.c index 9abc4fe25953..4a766a91ff5c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2879,6 +2879,8 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; + if (cmsg_copy_to_user(cmsg)) + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; if (cmsg->cmsg_level != SOL_SOCKET) continue; ret = __sock_cmsg_send(sk, cmsg, sockc); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index cf377377b52d..464d08b27fa8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -249,6 +249,8 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; + if (cmsg_copy_to_user(cmsg)) + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; #if IS_ENABLED(CONFIG_IPV6) if (allow_ipv6 && cmsg->cmsg_level == SOL_IPV6 && diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index fff78496803d..b0341faf7f83 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -776,6 +776,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, goto exit_f; } + if (cmsg_copy_to_user(cmsg)) + msg->msg_flags |= MSG_CMSG_COPY_TO_USER; + if (cmsg->cmsg_level == SOL_SOCKET) { err = __sock_cmsg_send(sk, cmsg, &ipc6->sockc); if (err) diff --git a/net/socket.c b/net/socket.c index e416920e9399..6523cf5a7f32 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2621,6 +2621,39 @@ static int sendmsg_copy_msghdr(struct msghdr *msg, return 0; } +static int sendmsg_copy_cmsg_to_user(struct msghdr *msg_sys, + struct user_msghdr __user *umsg) +{ + struct compat_msghdr __user *umsg_compat = + (struct compat_msghdr __user *)umsg; + unsigned long cmsg_ptr = (unsigned long)umsg->msg_control; + unsigned int flags = msg_sys->msg_flags; + struct msghdr msg_user = *msg_sys; + struct cmsghdr *cmsg; + int err; + + msg_user.msg_control = umsg->msg_control; + msg_user.msg_control_is_user = true; + for_each_cmsghdr(cmsg, msg_sys) { + if (!CMSG_OK(msg_sys, cmsg)) + break; + if (cmsg_copy_to_user(cmsg)) + put_cmsg(&msg_user, cmsg->cmsg_level, cmsg->cmsg_type, + cmsg->cmsg_len - sizeof(*cmsg), CMSG_DATA(cmsg)); + } + + err = __put_user((msg_sys->msg_flags & ~MSG_CMSG_COMPAT), COMPAT_FLAGS(umsg)); + if (err) + return err; + if (MSG_CMSG_COMPAT & flags) + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, + &umsg_compat->msg_controllen); + else + err = __put_user((unsigned long)msg_user.msg_control - cmsg_ptr, + &umsg->msg_controllen); + return err; +} + static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address, @@ -2638,6 +2671,18 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg, err = ____sys_sendmsg(sock, msg_sys, flags, used_address, allowed_msghdr_flags); + if (err < 0) + goto out; + + if (msg_sys->msg_flags & MSG_CMSG_COPY_TO_USER) { + ssize_t len = err; + + err = sendmsg_copy_cmsg_to_user(msg_sys, msg); + if (err) + goto out; + err = len; + } +out: kfree(iov); return err; }