Message ID | 20200827145831.95189-4-sgarzare@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | io_uring: add restrictions to support untrusted applications and guests | expand |
Hi Jens, On Thu, Aug 27, 2020 at 04:58:31PM +0200, Stefano Garzarella wrote: > This patch adds a new IORING_SETUP_R_DISABLED flag to start the > rings disabled, allowing the user to register restrictions, > buffers, files, before to start processing SQEs. > > When IORING_SETUP_R_DISABLED is set, SQE are not processed and > SQPOLL kthread is not started. > > The restrictions registration are allowed only when the rings > are disable to prevent concurrency issue while processing SQEs. > > The rings can be enabled using IORING_REGISTER_ENABLE_RINGS > opcode with io_uring_register(2). > > Suggested-by: Jens Axboe <axboe@kernel.dk> > Reviewed-by: Kees Cook <keescook@chromium.org> > Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > --- > v4: > - fixed io_uring_enter() exit path when ring is disabled > > v3: > - enabled restrictions only when the rings start > > RFC v2: > - removed return value of io_sq_offload_start() > --- > fs/io_uring.c | 52 ++++++++++++++++++++++++++++++----- > include/uapi/linux/io_uring.h | 2 ++ > 2 files changed, 47 insertions(+), 7 deletions(-) > > diff --git a/fs/io_uring.c b/fs/io_uring.c > index 5f62997c147b..b036f3373fbe 100644 > --- a/fs/io_uring.c > +++ b/fs/io_uring.c > @@ -226,6 +226,7 @@ struct io_restriction { > DECLARE_BITMAP(sqe_op, IORING_OP_LAST); > u8 sqe_flags_allowed; > u8 sqe_flags_required; > + bool registered; > }; > > struct io_ring_ctx { > @@ -7497,8 +7498,8 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, > return ret; > } > > -static int io_sq_offload_start(struct io_ring_ctx *ctx, > - struct io_uring_params *p) > +static int io_sq_offload_create(struct io_ring_ctx *ctx, > + struct io_uring_params *p) > { > int ret; > > @@ -7532,7 +7533,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, > ctx->sqo_thread = NULL; > goto err; > } > - wake_up_process(ctx->sqo_thread); > } else if (p->flags & IORING_SETUP_SQ_AFF) { > /* Can't have SQ_AFF without SQPOLL */ > ret = -EINVAL; > @@ -7549,6 +7549,12 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, > return ret; > } > > +static void io_sq_offload_start(struct io_ring_ctx *ctx) > +{ > + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread) > + wake_up_process(ctx->sqo_thread); > +} > + > static inline void __io_unaccount_mem(struct user_struct *user, > unsigned long nr_pages) > { > @@ -8295,6 +8301,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, > if (!percpu_ref_tryget(&ctx->refs)) > goto out_fput; > > + if (ctx->flags & IORING_SETUP_R_DISABLED) > + goto out_fput; > + While writing the man page paragraph, I discovered that if the rings are disabled I returned ENXIO error in io_uring_enter(), coming from the previous check. I'm not sure it is the best one, maybe I can return EBADFD or another error. What do you suggest? I'll add a test for this case. Thanks, Stefano
On 9/8/20 7:44 AM, Stefano Garzarella wrote: > Hi Jens, > > On Thu, Aug 27, 2020 at 04:58:31PM +0200, Stefano Garzarella wrote: >> This patch adds a new IORING_SETUP_R_DISABLED flag to start the >> rings disabled, allowing the user to register restrictions, >> buffers, files, before to start processing SQEs. >> >> When IORING_SETUP_R_DISABLED is set, SQE are not processed and >> SQPOLL kthread is not started. >> >> The restrictions registration are allowed only when the rings >> are disable to prevent concurrency issue while processing SQEs. >> >> The rings can be enabled using IORING_REGISTER_ENABLE_RINGS >> opcode with io_uring_register(2). >> >> Suggested-by: Jens Axboe <axboe@kernel.dk> >> Reviewed-by: Kees Cook <keescook@chromium.org> >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >> --- >> v4: >> - fixed io_uring_enter() exit path when ring is disabled >> >> v3: >> - enabled restrictions only when the rings start >> >> RFC v2: >> - removed return value of io_sq_offload_start() >> --- >> fs/io_uring.c | 52 ++++++++++++++++++++++++++++++----- >> include/uapi/linux/io_uring.h | 2 ++ >> 2 files changed, 47 insertions(+), 7 deletions(-) >> >> diff --git a/fs/io_uring.c b/fs/io_uring.c >> index 5f62997c147b..b036f3373fbe 100644 >> --- a/fs/io_uring.c >> +++ b/fs/io_uring.c >> @@ -226,6 +226,7 @@ struct io_restriction { >> DECLARE_BITMAP(sqe_op, IORING_OP_LAST); >> u8 sqe_flags_allowed; >> u8 sqe_flags_required; >> + bool registered; >> }; >> >> struct io_ring_ctx { >> @@ -7497,8 +7498,8 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, >> return ret; >> } >> >> -static int io_sq_offload_start(struct io_ring_ctx *ctx, >> - struct io_uring_params *p) >> +static int io_sq_offload_create(struct io_ring_ctx *ctx, >> + struct io_uring_params *p) >> { >> int ret; >> >> @@ -7532,7 +7533,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, >> ctx->sqo_thread = NULL; >> goto err; >> } >> - wake_up_process(ctx->sqo_thread); >> } else if (p->flags & IORING_SETUP_SQ_AFF) { >> /* Can't have SQ_AFF without SQPOLL */ >> ret = -EINVAL; >> @@ -7549,6 +7549,12 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, >> return ret; >> } >> >> +static void io_sq_offload_start(struct io_ring_ctx *ctx) >> +{ >> + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread) >> + wake_up_process(ctx->sqo_thread); >> +} >> + >> static inline void __io_unaccount_mem(struct user_struct *user, >> unsigned long nr_pages) >> { >> @@ -8295,6 +8301,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, >> if (!percpu_ref_tryget(&ctx->refs)) >> goto out_fput; >> >> + if (ctx->flags & IORING_SETUP_R_DISABLED) >> + goto out_fput; >> + > > While writing the man page paragraph, I discovered that if the rings are > disabled I returned ENXIO error in io_uring_enter(), coming from the previous > check. > > I'm not sure it is the best one, maybe I can return EBADFD or another > error. > > What do you suggest? EBADFD seems indeed the most appropriate - the fd is valid, but not in the right state to do this. > I'll add a test for this case. Thanks!
On Tue, Sep 08, 2020 at 07:57:08AM -0600, Jens Axboe wrote: > On 9/8/20 7:44 AM, Stefano Garzarella wrote: > > Hi Jens, > > > > On Thu, Aug 27, 2020 at 04:58:31PM +0200, Stefano Garzarella wrote: > >> This patch adds a new IORING_SETUP_R_DISABLED flag to start the > >> rings disabled, allowing the user to register restrictions, > >> buffers, files, before to start processing SQEs. > >> > >> When IORING_SETUP_R_DISABLED is set, SQE are not processed and > >> SQPOLL kthread is not started. > >> > >> The restrictions registration are allowed only when the rings > >> are disable to prevent concurrency issue while processing SQEs. > >> > >> The rings can be enabled using IORING_REGISTER_ENABLE_RINGS > >> opcode with io_uring_register(2). > >> > >> Suggested-by: Jens Axboe <axboe@kernel.dk> > >> Reviewed-by: Kees Cook <keescook@chromium.org> > >> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> > >> --- > >> v4: > >> - fixed io_uring_enter() exit path when ring is disabled > >> > >> v3: > >> - enabled restrictions only when the rings start > >> > >> RFC v2: > >> - removed return value of io_sq_offload_start() > >> --- > >> fs/io_uring.c | 52 ++++++++++++++++++++++++++++++----- > >> include/uapi/linux/io_uring.h | 2 ++ > >> 2 files changed, 47 insertions(+), 7 deletions(-) > >> > >> diff --git a/fs/io_uring.c b/fs/io_uring.c > >> index 5f62997c147b..b036f3373fbe 100644 > >> --- a/fs/io_uring.c > >> +++ b/fs/io_uring.c > >> @@ -226,6 +226,7 @@ struct io_restriction { > >> DECLARE_BITMAP(sqe_op, IORING_OP_LAST); > >> u8 sqe_flags_allowed; > >> u8 sqe_flags_required; > >> + bool registered; > >> }; > >> > >> struct io_ring_ctx { > >> @@ -7497,8 +7498,8 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, > >> return ret; > >> } > >> > >> -static int io_sq_offload_start(struct io_ring_ctx *ctx, > >> - struct io_uring_params *p) > >> +static int io_sq_offload_create(struct io_ring_ctx *ctx, > >> + struct io_uring_params *p) > >> { > >> int ret; > >> > >> @@ -7532,7 +7533,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, > >> ctx->sqo_thread = NULL; > >> goto err; > >> } > >> - wake_up_process(ctx->sqo_thread); > >> } else if (p->flags & IORING_SETUP_SQ_AFF) { > >> /* Can't have SQ_AFF without SQPOLL */ > >> ret = -EINVAL; > >> @@ -7549,6 +7549,12 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, > >> return ret; > >> } > >> > >> +static void io_sq_offload_start(struct io_ring_ctx *ctx) > >> +{ > >> + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread) > >> + wake_up_process(ctx->sqo_thread); > >> +} > >> + > >> static inline void __io_unaccount_mem(struct user_struct *user, > >> unsigned long nr_pages) > >> { > >> @@ -8295,6 +8301,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, > >> if (!percpu_ref_tryget(&ctx->refs)) > >> goto out_fput; > >> > >> + if (ctx->flags & IORING_SETUP_R_DISABLED) > >> + goto out_fput; > >> + > > > > While writing the man page paragraph, I discovered that if the rings are > > disabled I returned ENXIO error in io_uring_enter(), coming from the previous > > check. > > > > I'm not sure it is the best one, maybe I can return EBADFD or another > > error. > > > > What do you suggest? > > EBADFD seems indeed the most appropriate - the fd is valid, but not in the > right state to do this. Yeah, the same interpretation as mine! Also, in io_uring_register() I'm returning EINVAL if the rings are not disabled and the user wants to register restrictions. Maybe also in this case I can return EBADFD. I'll send a patch with the fixes. Thanks, Stefano
On 9/8/20 8:10 AM, Stefano Garzarella wrote: > On Tue, Sep 08, 2020 at 07:57:08AM -0600, Jens Axboe wrote: >> On 9/8/20 7:44 AM, Stefano Garzarella wrote: >>> Hi Jens, >>> >>> On Thu, Aug 27, 2020 at 04:58:31PM +0200, Stefano Garzarella wrote: >>>> This patch adds a new IORING_SETUP_R_DISABLED flag to start the >>>> rings disabled, allowing the user to register restrictions, >>>> buffers, files, before to start processing SQEs. >>>> >>>> When IORING_SETUP_R_DISABLED is set, SQE are not processed and >>>> SQPOLL kthread is not started. >>>> >>>> The restrictions registration are allowed only when the rings >>>> are disable to prevent concurrency issue while processing SQEs. >>>> >>>> The rings can be enabled using IORING_REGISTER_ENABLE_RINGS >>>> opcode with io_uring_register(2). >>>> >>>> Suggested-by: Jens Axboe <axboe@kernel.dk> >>>> Reviewed-by: Kees Cook <keescook@chromium.org> >>>> Signed-off-by: Stefano Garzarella <sgarzare@redhat.com> >>>> --- >>>> v4: >>>> - fixed io_uring_enter() exit path when ring is disabled >>>> >>>> v3: >>>> - enabled restrictions only when the rings start >>>> >>>> RFC v2: >>>> - removed return value of io_sq_offload_start() >>>> --- >>>> fs/io_uring.c | 52 ++++++++++++++++++++++++++++++----- >>>> include/uapi/linux/io_uring.h | 2 ++ >>>> 2 files changed, 47 insertions(+), 7 deletions(-) >>>> >>>> diff --git a/fs/io_uring.c b/fs/io_uring.c >>>> index 5f62997c147b..b036f3373fbe 100644 >>>> --- a/fs/io_uring.c >>>> +++ b/fs/io_uring.c >>>> @@ -226,6 +226,7 @@ struct io_restriction { >>>> DECLARE_BITMAP(sqe_op, IORING_OP_LAST); >>>> u8 sqe_flags_allowed; >>>> u8 sqe_flags_required; >>>> + bool registered; >>>> }; >>>> >>>> struct io_ring_ctx { >>>> @@ -7497,8 +7498,8 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, >>>> return ret; >>>> } >>>> >>>> -static int io_sq_offload_start(struct io_ring_ctx *ctx, >>>> - struct io_uring_params *p) >>>> +static int io_sq_offload_create(struct io_ring_ctx *ctx, >>>> + struct io_uring_params *p) >>>> { >>>> int ret; >>>> >>>> @@ -7532,7 +7533,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, >>>> ctx->sqo_thread = NULL; >>>> goto err; >>>> } >>>> - wake_up_process(ctx->sqo_thread); >>>> } else if (p->flags & IORING_SETUP_SQ_AFF) { >>>> /* Can't have SQ_AFF without SQPOLL */ >>>> ret = -EINVAL; >>>> @@ -7549,6 +7549,12 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, >>>> return ret; >>>> } >>>> >>>> +static void io_sq_offload_start(struct io_ring_ctx *ctx) >>>> +{ >>>> + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread) >>>> + wake_up_process(ctx->sqo_thread); >>>> +} >>>> + >>>> static inline void __io_unaccount_mem(struct user_struct *user, >>>> unsigned long nr_pages) >>>> { >>>> @@ -8295,6 +8301,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, >>>> if (!percpu_ref_tryget(&ctx->refs)) >>>> goto out_fput; >>>> >>>> + if (ctx->flags & IORING_SETUP_R_DISABLED) >>>> + goto out_fput; >>>> + >>> >>> While writing the man page paragraph, I discovered that if the rings are >>> disabled I returned ENXIO error in io_uring_enter(), coming from the previous >>> check. >>> >>> I'm not sure it is the best one, maybe I can return EBADFD or another >>> error. >>> >>> What do you suggest? >> >> EBADFD seems indeed the most appropriate - the fd is valid, but not in the >> right state to do this. > > Yeah, the same interpretation as mine! > > Also, in io_uring_register() I'm returning EINVAL if the rings are not > disabled and the user wants to register restrictions. > Maybe also in this case I can return EBADFD. Yes let's do that, EINVAL is always way too overloaded, and it makes sense to use EBADFD consistently for any operation related to that.
diff --git a/fs/io_uring.c b/fs/io_uring.c index 5f62997c147b..b036f3373fbe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -226,6 +226,7 @@ struct io_restriction { DECLARE_BITMAP(sqe_op, IORING_OP_LAST); u8 sqe_flags_allowed; u8 sqe_flags_required; + bool registered; }; struct io_ring_ctx { @@ -7497,8 +7498,8 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx, return ret; } -static int io_sq_offload_start(struct io_ring_ctx *ctx, - struct io_uring_params *p) +static int io_sq_offload_create(struct io_ring_ctx *ctx, + struct io_uring_params *p) { int ret; @@ -7532,7 +7533,6 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, ctx->sqo_thread = NULL; goto err; } - wake_up_process(ctx->sqo_thread); } else if (p->flags & IORING_SETUP_SQ_AFF) { /* Can't have SQ_AFF without SQPOLL */ ret = -EINVAL; @@ -7549,6 +7549,12 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, return ret; } +static void io_sq_offload_start(struct io_ring_ctx *ctx) +{ + if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sqo_thread) + wake_up_process(ctx->sqo_thread); +} + static inline void __io_unaccount_mem(struct user_struct *user, unsigned long nr_pages) { @@ -8295,6 +8301,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, if (!percpu_ref_tryget(&ctx->refs)) goto out_fput; + if (ctx->flags & IORING_SETUP_R_DISABLED) + goto out_fput; + /* * For SQ polling, the thread will do all submissions and completions. * Just return the requested submit count, and wake the thread if @@ -8612,10 +8621,13 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p, if (ret) goto err; - ret = io_sq_offload_start(ctx, p); + ret = io_sq_offload_create(ctx, p); if (ret) goto err; + if (!(p->flags & IORING_SETUP_R_DISABLED)) + io_sq_offload_start(ctx); + memset(&p->sq_off, 0, sizeof(p->sq_off)); p->sq_off.head = offsetof(struct io_rings, sq.head); p->sq_off.tail = offsetof(struct io_rings, sq.tail); @@ -8678,7 +8690,8 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | - IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) + IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | + IORING_SETUP_R_DISABLED)) return -EINVAL; return io_uring_create(entries, &p, params); @@ -8761,8 +8774,12 @@ static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, size_t size; int i, ret; + /* Restrictions allowed only if rings started disabled */ + if (!(ctx->flags & IORING_SETUP_R_DISABLED)) + return -EINVAL; + /* We allow only a single restrictions registration */ - if (ctx->restricted) + if (ctx->restrictions.registered) return -EBUSY; if (!arg || nr_args > IORING_MAX_RESTRICTIONS) @@ -8814,12 +8831,27 @@ static int io_register_restrictions(struct io_ring_ctx *ctx, void __user *arg, if (ret != 0) memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); else - ctx->restricted = 1; + ctx->restrictions.registered = true; kfree(res); return ret; } +static int io_register_enable_rings(struct io_ring_ctx *ctx) +{ + if (!(ctx->flags & IORING_SETUP_R_DISABLED)) + return -EINVAL; + + if (ctx->restrictions.registered) + ctx->restricted = 1; + + ctx->flags &= ~IORING_SETUP_R_DISABLED; + + io_sq_offload_start(ctx); + + return 0; +} + static bool io_register_op_must_quiesce(int op) { switch (op) { @@ -8941,6 +8973,12 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_unregister_personality(ctx, nr_args); break; + case IORING_REGISTER_ENABLE_RINGS: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_register_enable_rings(ctx); + break; case IORING_REGISTER_RESTRICTIONS: ret = io_register_restrictions(ctx, arg, nr_args); break; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 6e7f2e5e917b..a0c85e0e9016 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -95,6 +95,7 @@ enum { #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ #define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */ +#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */ enum { IORING_OP_NOP, @@ -268,6 +269,7 @@ enum { IORING_REGISTER_PERSONALITY = 9, IORING_UNREGISTER_PERSONALITY = 10, IORING_REGISTER_RESTRICTIONS = 11, + IORING_REGISTER_ENABLE_RINGS = 12, /* this goes last */ IORING_REGISTER_LAST