Message ID | 8bfd9a57bf42cfc10ee7195969058d6da277deed.1579649589.git.asml.silence@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | splice(2) support for io_uring | expand |
On 1/21/20 5:05 PM, Pavel Begunkov wrote: > @@ -373,6 +374,15 @@ struct io_rw { > u64 len; > }; > > +struct io_splice { > + struct file *file_in; > + struct file *file_out; > + loff_t __user *off_in; > + loff_t __user *off_out; > + u64 len; > + unsigned int flags; > +}; > + > struct io_connect { > struct file *file; > struct sockaddr __user *addr; Probably just make that len u32 as per previous email. > @@ -719,6 +730,11 @@ static const struct io_op_def io_op_defs[] = { > .needs_file = 1, > .fd_non_neg = 1, > }, > + [IORING_OP_SPLICE] = { > + .needs_file = 1, > + .hash_reg_file = 1, > + .unbound_nonreg_file = 1, > + } > }; > > static void io_wq_submit_work(struct io_wq_work **workptr); I probably want to queue up a reservation for the EPOLL_CTL that I haven't included yet, but which has been tested. But that's easily manageable, so no biggy on my end. > +static bool io_splice_punt(struct file *file) > +{ > + if (get_pipe_info(file)) > + return false; > + if (!io_file_supports_async(file)) > + return true; > + return !(file->f_mode & O_NONBLOCK); > +} > + > +static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt, > + bool force_nonblock) > +{ > + struct io_splice* sp = &req->splice; > + struct file *in = sp->file_in; > + struct file *out = sp->file_out; > + unsigned int flags = sp->flags; > + long ret; > + > + if (force_nonblock) { > + if (io_splice_punt(in) || io_splice_punt(out)) { > + req->flags |= REQ_F_MUST_PUNT; > + return -EAGAIN; > + } > + flags |= SPLICE_F_NONBLOCK; > + } > + > + ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags); > + if (force_nonblock && ret == -EAGAIN) > + return -EAGAIN; > + > + io_put_file(req->ctx, out, (flags & IOSQE_SPLICE_FIXED_OUT)); > + io_cqring_add_event(req, ret); > + if (ret != sp->len) > + req_set_fail_links(req); > + io_put_req_find_next(req, nxt); > + return 0; > +} This looks good. And this is why the put_file() needs to take separate arguments... > diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h > index 57d05cc5e271..f234b13e7ed3 100644 > --- a/include/uapi/linux/io_uring.h > +++ b/include/uapi/linux/io_uring.h > @@ -23,8 +23,14 @@ struct io_uring_sqe { > __u64 off; /* offset into file */ > __u64 addr2; > }; > - __u64 addr; /* pointer to buffer or iovecs */ > - __u32 len; /* buffer size or number of iovecs */ > + union { > + __u64 addr; /* pointer to buffer or iovecs */ > + __u64 off_out; > + }; > + union { > + __u32 len; /* buffer size or number of iovecs */ > + __s32 fd_out; > + }; > union { > __kernel_rwf_t rw_flags; > __u32 fsync_flags; > @@ -37,10 +43,12 @@ struct io_uring_sqe { > __u32 open_flags; > __u32 statx_flags; > __u32 fadvise_advice; > + __u32 splice_flags; > }; > __u64 user_data; /* data to be passed back at completion time */ > union { > __u16 buf_index; /* index into fixed buffers, if used */ > + __u64 splice_len; > __u64 __pad2[3]; > }; > }; Not a huge fan of this, also mean splice can't ever used fixed buffers. Hmm... > @@ -67,6 +75,9 @@ enum { > /* always go async */ > #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) > > +/* op custom flags */ > +#define IOSQE_SPLICE_FIXED_OUT (1U << 16) > + I don't think it's unreasonable to say that if you specify IOSQE_FIXED_FILE, then both are fixed. If not, then none of them are. What do you think?
On 22/01/2020 05:03, Jens Axboe wrote: > On 1/21/20 5:05 PM, Pavel Begunkov wrote: >> @@ -373,6 +374,15 @@ struct io_rw { >> u64 len; >> }; >> >> +struct io_splice { >> + struct file *file_in; >> + struct file *file_out; >> + loff_t __user *off_in; >> + loff_t __user *off_out; >> + u64 len; >> + unsigned int flags; >> +}; >> + >> struct io_connect { >> struct file *file; >> struct sockaddr __user *addr; > > Probably just make that len u32 as per previous email. Right, I don't want to have multiple types and names for it myself. > >> @@ -719,6 +730,11 @@ static const struct io_op_def io_op_defs[] = { >> .needs_file = 1, >> .fd_non_neg = 1, >> }, >> + [IORING_OP_SPLICE] = { >> + .needs_file = 1, >> + .hash_reg_file = 1, >> + .unbound_nonreg_file = 1, >> + } >> }; >> >> static void io_wq_submit_work(struct io_wq_work **workptr); > > I probably want to queue up a reservation for the EPOLL_CTL that I > haven't included yet, but which has been tested. But that's easily > manageable, so no biggy on my end. I didn't quite get it. Do you mean collision of opcode numbers? > >> +static bool io_splice_punt(struct file *file) >> +{ >> + if (get_pipe_info(file)) >> + return false; >> + if (!io_file_supports_async(file)) >> + return true; >> + return !(file->f_mode & O_NONBLOCK); >> +} >> + >> +static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt, >> + bool force_nonblock) >> +{ >> + struct io_splice* sp = &req->splice; >> + struct file *in = sp->file_in; >> + struct file *out = sp->file_out; >> + unsigned int flags = sp->flags; >> + long ret; >> + >> + if (force_nonblock) { >> + if (io_splice_punt(in) || io_splice_punt(out)) { >> + req->flags |= REQ_F_MUST_PUNT; >> + return -EAGAIN; >> + } >> + flags |= SPLICE_F_NONBLOCK; >> + } >> + >> + ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags); >> + if (force_nonblock && ret == -EAGAIN) >> + return -EAGAIN; >> + >> + io_put_file(req->ctx, out, (flags & IOSQE_SPLICE_FIXED_OUT)); >> + io_cqring_add_event(req, ret); >> + if (ret != sp->len) >> + req_set_fail_links(req); >> + io_put_req_find_next(req, nxt); >> + return 0; >> +} > > This looks good. And this is why the put_file() needs to take separate > arguments... > >> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >> index 57d05cc5e271..f234b13e7ed3 100644 >> --- a/include/uapi/linux/io_uring.h >> +++ b/include/uapi/linux/io_uring.h >> @@ -23,8 +23,14 @@ struct io_uring_sqe { >> __u64 off; /* offset into file */ >> __u64 addr2; >> }; >> - __u64 addr; /* pointer to buffer or iovecs */ >> - __u32 len; /* buffer size or number of iovecs */ >> + union { >> + __u64 addr; /* pointer to buffer or iovecs */ >> + __u64 off_out; >> + }; >> + union { >> + __u32 len; /* buffer size or number of iovecs */ >> + __s32 fd_out; >> + }; >> union { >> __kernel_rwf_t rw_flags; >> __u32 fsync_flags; >> @@ -37,10 +43,12 @@ struct io_uring_sqe { >> __u32 open_flags; >> __u32 statx_flags; >> __u32 fadvise_advice; >> + __u32 splice_flags; >> }; >> __u64 user_data; /* data to be passed back at completion time */ >> union { >> __u16 buf_index; /* index into fixed buffers, if used */ >> + __u64 splice_len; >> __u64 __pad2[3]; >> }; >> }; > > Not a huge fan of this, also mean splice can't ever used fixed buffers. > Hmm... But it's not like splice() ever uses user buffers. Isn't it? vmsplice does, but that's another opcode. > >> @@ -67,6 +75,9 @@ enum { >> /* always go async */ >> #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) >> >> +/* op custom flags */ >> +#define IOSQE_SPLICE_FIXED_OUT (1U << 16) >> + > > I don't think it's unreasonable to say that if you specify > IOSQE_FIXED_FILE, then both are fixed. If not, then none of them are. > What do you think? > It's plausible to register only one end for splicing, e.g. splice from short-lived sockets to pre-registered buffers-pipes. And it's clearer do it now.
On 1/21/20 7:40 PM, Pavel Begunkov wrote: >>> @@ -719,6 +730,11 @@ static const struct io_op_def io_op_defs[] = { >>> .needs_file = 1, >>> .fd_non_neg = 1, >>> }, >>> + [IORING_OP_SPLICE] = { >>> + .needs_file = 1, >>> + .hash_reg_file = 1, >>> + .unbound_nonreg_file = 1, >>> + } >>> }; >>> >>> static void io_wq_submit_work(struct io_wq_work **workptr); >> >> I probably want to queue up a reservation for the EPOLL_CTL that I >> haven't included yet, but which has been tested. But that's easily >> manageable, so no biggy on my end. > > I didn't quite get it. Do you mean collision of opcode numbers? Yeah that's all I meant, sorry wasn't too clear. But you can disregard, I'll just pop a reservation in front if/when this is ready to go in if it's before EPOLL_CTL op. >>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>> index 57d05cc5e271..f234b13e7ed3 100644 >>> --- a/include/uapi/linux/io_uring.h >>> +++ b/include/uapi/linux/io_uring.h >>> @@ -23,8 +23,14 @@ struct io_uring_sqe { >>> __u64 off; /* offset into file */ >>> __u64 addr2; >>> }; >>> - __u64 addr; /* pointer to buffer or iovecs */ >>> - __u32 len; /* buffer size or number of iovecs */ >>> + union { >>> + __u64 addr; /* pointer to buffer or iovecs */ >>> + __u64 off_out; >>> + }; >>> + union { >>> + __u32 len; /* buffer size or number of iovecs */ >>> + __s32 fd_out; >>> + }; >>> union { >>> __kernel_rwf_t rw_flags; >>> __u32 fsync_flags; >>> @@ -37,10 +43,12 @@ struct io_uring_sqe { >>> __u32 open_flags; >>> __u32 statx_flags; >>> __u32 fadvise_advice; >>> + __u32 splice_flags; >>> }; >>> __u64 user_data; /* data to be passed back at completion time */ >>> union { >>> __u16 buf_index; /* index into fixed buffers, if used */ >>> + __u64 splice_len; >>> __u64 __pad2[3]; >>> }; >>> }; >> >> Not a huge fan of this, also mean splice can't ever used fixed buffers. >> Hmm... > > But it's not like splice() ever uses user buffers. Isn't it? vmsplice > does, but that's another opcode. I guess that's true, I had vmsplice on my mind for this as well. But won't be a problem there, since it doesn't take 6 arguments like splice does. Another option is to do an indirect for splice, stuff the arguments in a struct that's passed in as a pointer in ->addr. A bit slower, but probably not a huge deal. >>> @@ -67,6 +75,9 @@ enum { >>> /* always go async */ >>> #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) >>> >>> +/* op custom flags */ >>> +#define IOSQE_SPLICE_FIXED_OUT (1U << 16) >>> + >> >> I don't think it's unreasonable to say that if you specify >> IOSQE_FIXED_FILE, then both are fixed. If not, then none of them are. >> What do you think? >> > > It's plausible to register only one end for splicing, e.g. splice from > short-lived sockets to pre-registered buffers-pipes. And it's clearer > do it now. You're probably right, though it's a bit nasty to add an unrelated flag in the splice flag space... We should probably reserve it in splice instead, and just not have it available from the regular system call.
On 22/01/2020 05:47, Jens Axboe wrote: > On 1/21/20 7:40 PM, Pavel Begunkov wrote: >>>> @@ -719,6 +730,11 @@ static const struct io_op_def io_op_defs[] = { >>>> .needs_file = 1, >>>> .fd_non_neg = 1, >>>> }, >>>> + [IORING_OP_SPLICE] = { >>>> + .needs_file = 1, >>>> + .hash_reg_file = 1, >>>> + .unbound_nonreg_file = 1, >>>> + } >>>> }; >>>> >>>> static void io_wq_submit_work(struct io_wq_work **workptr); >>> >>> I probably want to queue up a reservation for the EPOLL_CTL that I >>> haven't included yet, but which has been tested. But that's easily >>> manageable, so no biggy on my end. >> >> I didn't quite get it. Do you mean collision of opcode numbers? > > Yeah that's all I meant, sorry wasn't too clear. But you can disregard, > I'll just pop a reservation in front if/when this is ready to go in if > it's before EPOLL_CTL op. > >>>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>>> index 57d05cc5e271..f234b13e7ed3 100644 >>>> --- a/include/uapi/linux/io_uring.h >>>> +++ b/include/uapi/linux/io_uring.h >>>> @@ -23,8 +23,14 @@ struct io_uring_sqe { >>>> __u64 off; /* offset into file */ >>>> __u64 addr2; >>>> }; >>>> - __u64 addr; /* pointer to buffer or iovecs */ >>>> - __u32 len; /* buffer size or number of iovecs */ >>>> + union { >>>> + __u64 addr; /* pointer to buffer or iovecs */ >>>> + __u64 off_out; >>>> + }; >>>> + union { >>>> + __u32 len; /* buffer size or number of iovecs */ >>>> + __s32 fd_out; >>>> + }; >>>> union { >>>> __kernel_rwf_t rw_flags; >>>> __u32 fsync_flags; >>>> @@ -37,10 +43,12 @@ struct io_uring_sqe { >>>> __u32 open_flags; >>>> __u32 statx_flags; >>>> __u32 fadvise_advice; >>>> + __u32 splice_flags; >>>> }; >>>> __u64 user_data; /* data to be passed back at completion time */ >>>> union { >>>> __u16 buf_index; /* index into fixed buffers, if used */ >>>> + __u64 splice_len; >>>> __u64 __pad2[3]; >>>> }; >>>> }; >>> >>> Not a huge fan of this, also mean splice can't ever used fixed buffers. >>> Hmm... >> >> But it's not like splice() ever uses user buffers. Isn't it? vmsplice >> does, but that's another opcode. > > I guess that's true, I had vmsplice on my mind for this as well. But > won't be a problem there, since it doesn't take 6 arguments like splice > does. > > Another option is to do an indirect for splice, stuff the arguments in a > struct that's passed in as a pointer in ->addr. A bit slower, but > probably not a huge deal. > >>>> @@ -67,6 +75,9 @@ enum { >>>> /* always go async */ >>>> #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) >>>> >>>> +/* op custom flags */ >>>> +#define IOSQE_SPLICE_FIXED_OUT (1U << 16) >>>> + >>> >>> I don't think it's unreasonable to say that if you specify >>> IOSQE_FIXED_FILE, then both are fixed. If not, then none of them are. >>> What do you think? >>> >> >> It's plausible to register only one end for splicing, e.g. splice from >> short-lived sockets to pre-registered buffers-pipes. And it's clearer >> do it now. > > You're probably right, though it's a bit nasty to add an unrelated flag > in the splice flag space... We should probably reserve it in splice > instead, and just not have it available from the regular system call. > Agree, it looks bad. I don't want to add it into sqe->splice_flags to not clash with splice(2) in the future, but could have a separate field in @sqe... or can leave in in sqe->flags, as it's done in the patch, but that's like a portion of bits would be opcode specific and we would need to set rules for their use.
On 1/21/20 8:16 PM, Pavel Begunkov wrote: > On 22/01/2020 05:47, Jens Axboe wrote: >> On 1/21/20 7:40 PM, Pavel Begunkov wrote: >>>>> @@ -719,6 +730,11 @@ static const struct io_op_def io_op_defs[] = { >>>>> .needs_file = 1, >>>>> .fd_non_neg = 1, >>>>> }, >>>>> + [IORING_OP_SPLICE] = { >>>>> + .needs_file = 1, >>>>> + .hash_reg_file = 1, >>>>> + .unbound_nonreg_file = 1, >>>>> + } >>>>> }; >>>>> >>>>> static void io_wq_submit_work(struct io_wq_work **workptr); >>>> >>>> I probably want to queue up a reservation for the EPOLL_CTL that I >>>> haven't included yet, but which has been tested. But that's easily >>>> manageable, so no biggy on my end. >>> >>> I didn't quite get it. Do you mean collision of opcode numbers? >> >> Yeah that's all I meant, sorry wasn't too clear. But you can disregard, >> I'll just pop a reservation in front if/when this is ready to go in if >> it's before EPOLL_CTL op. >> >>>>> diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h >>>>> index 57d05cc5e271..f234b13e7ed3 100644 >>>>> --- a/include/uapi/linux/io_uring.h >>>>> +++ b/include/uapi/linux/io_uring.h >>>>> @@ -23,8 +23,14 @@ struct io_uring_sqe { >>>>> __u64 off; /* offset into file */ >>>>> __u64 addr2; >>>>> }; >>>>> - __u64 addr; /* pointer to buffer or iovecs */ >>>>> - __u32 len; /* buffer size or number of iovecs */ >>>>> + union { >>>>> + __u64 addr; /* pointer to buffer or iovecs */ >>>>> + __u64 off_out; >>>>> + }; >>>>> + union { >>>>> + __u32 len; /* buffer size or number of iovecs */ >>>>> + __s32 fd_out; >>>>> + }; >>>>> union { >>>>> __kernel_rwf_t rw_flags; >>>>> __u32 fsync_flags; >>>>> @@ -37,10 +43,12 @@ struct io_uring_sqe { >>>>> __u32 open_flags; >>>>> __u32 statx_flags; >>>>> __u32 fadvise_advice; >>>>> + __u32 splice_flags; >>>>> }; >>>>> __u64 user_data; /* data to be passed back at completion time */ >>>>> union { >>>>> __u16 buf_index; /* index into fixed buffers, if used */ >>>>> + __u64 splice_len; >>>>> __u64 __pad2[3]; >>>>> }; >>>>> }; >>>> >>>> Not a huge fan of this, also mean splice can't ever used fixed buffers. >>>> Hmm... >>> >>> But it's not like splice() ever uses user buffers. Isn't it? vmsplice >>> does, but that's another opcode. >> >> I guess that's true, I had vmsplice on my mind for this as well. But >> won't be a problem there, since it doesn't take 6 arguments like splice >> does. >> >> Another option is to do an indirect for splice, stuff the arguments in a >> struct that's passed in as a pointer in ->addr. A bit slower, but >> probably not a huge deal. >> >>>>> @@ -67,6 +75,9 @@ enum { >>>>> /* always go async */ >>>>> #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) >>>>> >>>>> +/* op custom flags */ >>>>> +#define IOSQE_SPLICE_FIXED_OUT (1U << 16) >>>>> + >>>> >>>> I don't think it's unreasonable to say that if you specify >>>> IOSQE_FIXED_FILE, then both are fixed. If not, then none of them are. >>>> What do you think? >>>> >>> >>> It's plausible to register only one end for splicing, e.g. splice from >>> short-lived sockets to pre-registered buffers-pipes. And it's clearer >>> do it now. >> >> You're probably right, though it's a bit nasty to add an unrelated flag >> in the splice flag space... We should probably reserve it in splice >> instead, and just not have it available from the regular system call. >> > Agree, it looks bad. I don't want to add it into sqe->splice_flags to > not clash with splice(2) in the future, but could have a separate > field in @sqe... or can leave in in sqe->flags, as it's done in the > patch, but that's like a portion of bits would be opcode specific and > we would need to set rules for their use. It won't clash with splice(2), just make that flag illegal if done through splice(2) directly. Honestly I think that's (by FAR) the best way to do it, having a private io_uring flag that acts as a splice flag is really confusing and prone to breakage. Not that it's a huge issue with splice as the flags have been stable for years, so don't really see a high risk of collision. But we should still do it right, which means adding SPLICE_F_OUT_FIXED or whatever you want to call it. Do that as a prep patch, make do_splice() into __do_splice(), and have io_uring call __do_splice(). Currently splice(2) is permissive in terms of flags, so maybe just mask it in do_splice() to be on the safe side. Then we know only internal users will set SPLICE_F_OUT_FIXED, and we'll never run into the risk of having a collision as it's part of the flag space anyway. The sqe->flags space is very tight, so adding a splice specific opcode there would be bad.
Hi Pavel, Thank you for the patch! Yet something to improve: [auto build test ERROR on next-20200121] [cannot apply to linus/master v5.5-rc7 v5.5-rc6 v5.5-rc5 v5.5-rc7] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/Pavel-Begunkov/splice-2-support-for-io_uring/20200124-114107 base: bc80e6ad8ee12b0ee6c7d05faf1ebd3f2fb8f1e5 config: powerpc64-defconfig (attached as .config) compiler: powerpc64-linux-gcc (GCC) 7.5.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=7.5.0 make.cross ARCH=powerpc64 If you fix the issue, kindly add following tag Reported-by: kbuild test robot <lkp@intel.com> All errors (new ones prefixed by >>): fs/io_uring.c: In function 'io_splice_punt': >> fs/io_uring.c:2364:6: error: too few arguments to function 'get_pipe_info' if (get_pipe_info(file)) ^~~~~~~~~~~~~ In file included from include/linux/splice.h:12:0, from include/linux/skbuff.h:36, from include/linux/if_ether.h:19, from include/uapi/linux/ethtool.h:19, from include/linux/ethtool.h:18, from include/linux/netdevice.h:37, from include/net/sock.h:46, from fs/io_uring.c:64: include/linux/pipe_fs_i.h:266:25: note: declared here struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); ^~~~~~~~~~~~~ vim +/get_pipe_info +2364 fs/io_uring.c 2361 2362 static bool io_splice_punt(struct file *file) 2363 { > 2364 if (get_pipe_info(file)) 2365 return false; 2366 if (!io_file_supports_async(file)) 2367 return true; 2368 return !(file->f_mode & O_NONBLOCK); 2369 } 2370 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
Hi Pavel, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on next-20200121] [cannot apply to linus/master v5.5-rc7 v5.5-rc6 v5.5-rc5 v5.5-rc7] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/Pavel-Begunkov/splice-2-support-for-io_uring/20200124-114107 base: bc80e6ad8ee12b0ee6c7d05faf1ebd3f2fb8f1e5 config: x86_64-randconfig-a002-20200125 (attached as .config) compiler: gcc-7 (Debian 7.5.0-3) 7.5.0 reproduce: # save the attached .config to linux build tree make ARCH=x86_64 If you fix the issue, kindly add following tag Reported-by: kbuild test robot <lkp@intel.com> All warnings (new ones prefixed by >>): In file included from include/linux/export.h:43:0, from include/linux/linkage.h:7, from include/linux/kernel.h:8, from fs/io_uring.c:42: fs/io_uring.c: In function 'io_splice_punt': fs/io_uring.c:2364:6: error: too few arguments to function 'get_pipe_info' if (get_pipe_info(file)) ^ include/linux/compiler.h:58:52: note: in definition of macro '__trace_if_var' #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond)) ^~~~ >> fs/io_uring.c:2364:2: note: in expansion of macro 'if' if (get_pipe_info(file)) ^~ In file included from include/linux/splice.h:12:0, from include/linux/skbuff.h:36, from include/linux/if_ether.h:19, from include/uapi/linux/ethtool.h:19, from include/linux/ethtool.h:18, from include/linux/netdevice.h:37, from include/net/sock.h:46, from fs/io_uring.c:64: include/linux/pipe_fs_i.h:266:25: note: declared here struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); ^~~~~~~~~~~~~ In file included from include/linux/export.h:43:0, from include/linux/linkage.h:7, from include/linux/kernel.h:8, from fs/io_uring.c:42: fs/io_uring.c:2364:6: error: too few arguments to function 'get_pipe_info' if (get_pipe_info(file)) ^ include/linux/compiler.h:58:61: note: in definition of macro '__trace_if_var' #define __trace_if_var(cond) (__builtin_constant_p(cond) ? (cond) : __trace_if_value(cond)) ^~~~ >> fs/io_uring.c:2364:2: note: in expansion of macro 'if' if (get_pipe_info(file)) ^~ In file included from include/linux/splice.h:12:0, from include/linux/skbuff.h:36, from include/linux/if_ether.h:19, from include/uapi/linux/ethtool.h:19, from include/linux/ethtool.h:18, from include/linux/netdevice.h:37, from include/net/sock.h:46, from fs/io_uring.c:64: include/linux/pipe_fs_i.h:266:25: note: declared here struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); ^~~~~~~~~~~~~ In file included from include/linux/export.h:43:0, from include/linux/linkage.h:7, from include/linux/kernel.h:8, from fs/io_uring.c:42: fs/io_uring.c:2364:6: error: too few arguments to function 'get_pipe_info' if (get_pipe_info(file)) ^ include/linux/compiler.h:69:3: note: in definition of macro '__trace_if_value' (cond) ? \ ^~~~ include/linux/compiler.h:56:28: note: in expansion of macro '__trace_if_var' #define if(cond, ...) if ( __trace_if_var( !!(cond , ## __VA_ARGS__) ) ) ^~~~~~~~~~~~~~ >> fs/io_uring.c:2364:2: note: in expansion of macro 'if' if (get_pipe_info(file)) ^~ In file included from include/linux/splice.h:12:0, from include/linux/skbuff.h:36, from include/linux/if_ether.h:19, from include/uapi/linux/ethtool.h:19, from include/linux/ethtool.h:18, from include/linux/netdevice.h:37, from include/net/sock.h:46, from fs/io_uring.c:64: include/linux/pipe_fs_i.h:266:25: note: declared here struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); ^~~~~~~~~~~~~ vim +/if +2364 fs/io_uring.c 2361 2362 static bool io_splice_punt(struct file *file) 2363 { > 2364 if (get_pipe_info(file)) 2365 return false; 2366 if (!io_file_supports_async(file)) 2367 return true; 2368 return !(file->f_mode & O_NONBLOCK); 2369 } 2370 --- 0-DAY kernel test infrastructure Open Source Technology Center https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org Intel Corporation
diff --git a/fs/io_uring.c b/fs/io_uring.c index e9e4aee0fb99..44ec9c63c41d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -74,6 +74,7 @@ #include <linux/namei.h> #include <linux/fsnotify.h> #include <linux/fadvise.h> +#include <linux/splice.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -373,6 +374,15 @@ struct io_rw { u64 len; }; +struct io_splice { + struct file *file_in; + struct file *file_out; + loff_t __user *off_in; + loff_t __user *off_out; + u64 len; + unsigned int flags; +}; + struct io_connect { struct file *file; struct sockaddr __user *addr; @@ -534,6 +544,7 @@ struct io_kiocb { struct io_files_update files_update; struct io_fadvise fadvise; struct io_madvise madvise; + struct io_splice splice; }; struct io_async_ctx *io; @@ -719,6 +730,11 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, .fd_non_neg = 1, }, + [IORING_OP_SPLICE] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + } }; static void io_wq_submit_work(struct io_wq_work **workptr); @@ -730,6 +746,10 @@ static void io_queue_linked_timeout(struct io_kiocb *req); static int __io_sqe_files_update(struct io_ring_ctx *ctx, struct io_uring_files_update *ip, unsigned nr_args); +static int io_get_file(struct io_submit_state *state, + struct io_ring_ctx *ctx, + int fd, struct file **out_file, + bool fixed); static struct kmem_cache *req_cachep; @@ -2322,6 +2342,61 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt, return ret; } +static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_splice* sp = &req->splice; + + sp->file_out = NULL; + sp->off_in = u64_to_user_ptr(READ_ONCE(sqe->off)); + sp->off_out = u64_to_user_ptr(READ_ONCE(sqe->off_out)); + sp->len = READ_ONCE(sqe->splice_len); + sp->flags = READ_ONCE(sqe->splice_flags); + + if (unlikely(READ_ONCE(sqe->ioprio) || (sp->flags & ~SPLICE_F_ALL))) + return -EINVAL; + + return io_get_file(NULL, req->ctx, READ_ONCE(sqe->fd_out), + &sp->file_out, (sp->flags & IOSQE_SPLICE_FIXED_OUT)); +} + +static bool io_splice_punt(struct file *file) +{ + if (get_pipe_info(file)) + return false; + if (!io_file_supports_async(file)) + return true; + return !(file->f_mode & O_NONBLOCK); +} + +static int io_splice(struct io_kiocb *req, struct io_kiocb **nxt, + bool force_nonblock) +{ + struct io_splice* sp = &req->splice; + struct file *in = sp->file_in; + struct file *out = sp->file_out; + unsigned int flags = sp->flags; + long ret; + + if (force_nonblock) { + if (io_splice_punt(in) || io_splice_punt(out)) { + req->flags |= REQ_F_MUST_PUNT; + return -EAGAIN; + } + flags |= SPLICE_F_NONBLOCK; + } + + ret = do_splice(in, sp->off_in, out, sp->off_out, sp->len, flags); + if (force_nonblock && ret == -EAGAIN) + return -EAGAIN; + + io_put_file(req->ctx, out, (flags & IOSQE_SPLICE_FIXED_OUT)); + io_cqring_add_event(req, ret); + if (ret != sp->len) + req_set_fail_links(req); + io_put_req_find_next(req, nxt); + return 0; +} + /* * IORING_OP_NOP just posts a completion event, nothing else. */ @@ -4044,6 +4119,9 @@ static int io_req_defer_prep(struct io_kiocb *req, case IORING_OP_OPENAT2: ret = io_openat2_prep(req, sqe); break; + case IORING_OP_SPLICE: + ret = io_splice_prep(req, sqe); + break; default: printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", req->opcode); @@ -4272,6 +4350,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, } ret = io_openat2(req, nxt, force_nonblock); break; + case IORING_OP_SPLICE: + if (sqe) { + ret = io_splice_prep(req, sqe); + if (ret < 0) + break; + } + ret = io_splice(req, nxt, force_nonblock); + break; default: ret = -EINVAL; break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 57d05cc5e271..f234b13e7ed3 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -23,8 +23,14 @@ struct io_uring_sqe { __u64 off; /* offset into file */ __u64 addr2; }; - __u64 addr; /* pointer to buffer or iovecs */ - __u32 len; /* buffer size or number of iovecs */ + union { + __u64 addr; /* pointer to buffer or iovecs */ + __u64 off_out; + }; + union { + __u32 len; /* buffer size or number of iovecs */ + __s32 fd_out; + }; union { __kernel_rwf_t rw_flags; __u32 fsync_flags; @@ -37,10 +43,12 @@ struct io_uring_sqe { __u32 open_flags; __u32 statx_flags; __u32 fadvise_advice; + __u32 splice_flags; }; __u64 user_data; /* data to be passed back at completion time */ union { __u16 buf_index; /* index into fixed buffers, if used */ + __u64 splice_len; __u64 __pad2[3]; }; }; @@ -67,6 +75,9 @@ enum { /* always go async */ #define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT) +/* op custom flags */ +#define IOSQE_SPLICE_FIXED_OUT (1U << 16) + /* * io_uring_setup() flags */ @@ -106,6 +117,7 @@ enum { IORING_OP_SEND, IORING_OP_RECV, IORING_OP_OPENAT2, + IORING_OP_SPLICE, /* this goes last, obviously */ IORING_OP_LAST,
Add support for splice(2). Nothing new, just reuse do_splice(). Signed-off-by: Pavel Begunkov <asml.silence@gmail.com> --- fs/io_uring.c | 86 +++++++++++++++++++++++++++++++++++ include/uapi/linux/io_uring.h | 16 ++++++- 2 files changed, 100 insertions(+), 2 deletions(-)