Message ID | 83c72471f9f79fa982508bd4db472686a67b8320.1601478774.git.yifeifz2@illinois.edu (mailing list archive) |
---|---|
State | Not Applicable |
Headers | show |
Series | seccomp: Add bitmap cache of constant allow filter results | expand |
On Wed, Sep 30, 2020 at 10:19:14AM -0500, YiFei Zhu wrote: > From: YiFei Zhu <yifeifz2@illinois.edu> > > The fast (common) path for seccomp should be that the filter permits > the syscall to pass through, and failing seccomp is expected to be > an exceptional case; it is not expected for userspace to call a > denylisted syscall over and over. > > This first finds the current allow bitmask by iterating through > syscall_arches[] array and comparing it to the one in struct > seccomp_data; this loop is expected to be unrolled. It then > does a test_bit against the bitmask. If the bit is set, then > there is no need to run the full filter; it returns > SECCOMP_RET_ALLOW immediately. > > Co-developed-by: Dimitrios Skarlatos <dskarlat@cs.cmu.edu> > Signed-off-by: Dimitrios Skarlatos <dskarlat@cs.cmu.edu> > Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu> I'd like the content/ordering of this and the emulator patch to be reorganized a bit. I'd like to see the infrastructure of the cache added first (along with the "always allow" test logic in this patch), with the emulator missing: i.e. the patch is a logical no-op: no behavior changes because nothing ever changes the cache bits, but all the operational logic, structure changes, etc, is in place. Then the next patch would be replacing the no-op with the emulator. > --- > kernel/seccomp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 52 insertions(+) > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > index f09c9e74ae05..bed3b2a7f6c8 100644 > --- a/kernel/seccomp.c > +++ b/kernel/seccomp.c > @@ -172,6 +172,12 @@ struct seccomp_cache_filter_data { }; > static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) > { > } > + > +static inline bool seccomp_cache_check(const struct seccomp_filter *sfilter, bikeshedding: "cache check" doesn't tell me anything about what it's actually checking for. How about calling this seccomp_is_constant_allow() or something that reflects both the "bool" return ("is") and what that bool means ("should always be allowed"). > + const struct seccomp_data *sd) > +{ > + return false; > +} > #endif /* CONFIG_SECCOMP_CACHE_NR_ONLY */ > > /** > @@ -331,6 +337,49 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) > return 0; > } > > +#ifdef CONFIG_SECCOMP_CACHE_NR_ONLY > +static bool seccomp_cache_check_bitmap(const void *bitmap, size_t bitmap_size, Please also mark as "inline". > + int syscall_nr) > +{ > + if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size)) > + return false; > + syscall_nr = array_index_nospec(syscall_nr, bitmap_size); > + > + return test_bit(syscall_nr, bitmap); > +} > + > +/** > + * seccomp_cache_check - lookup seccomp cache > + * @sfilter: The seccomp filter > + * @sd: The seccomp data to lookup the cache with > + * > + * Returns true if the seccomp_data is cached and allowed. > + */ > +static bool seccomp_cache_check(const struct seccomp_filter *sfilter, inline too. > + const struct seccomp_data *sd) > +{ > + int syscall_nr = sd->nr; > + const struct seccomp_cache_filter_data *cache = &sfilter->cache; > + > +#ifdef SECCOMP_ARCH_DEFAULT > + if (likely(sd->arch == SECCOMP_ARCH_DEFAULT)) > + return seccomp_cache_check_bitmap(cache->syscall_allow_default, > + SECCOMP_ARCH_DEFAULT_NR, > + syscall_nr); > +#endif /* SECCOMP_ARCH_DEFAULT */ > + > +#ifdef SECCOMP_ARCH_COMPAT > + if (likely(sd->arch == SECCOMP_ARCH_COMPAT)) > + return seccomp_cache_check_bitmap(cache->syscall_allow_compat, > + SECCOMP_ARCH_COMPAT_NR, > + syscall_nr); > +#endif /* SECCOMP_ARCH_COMPAT */ > + > + WARN_ON_ONCE(true); > + return false; > +} > +#endif /* CONFIG_SECCOMP_CACHE_NR_ONLY */ > + > /** > * seccomp_run_filters - evaluates all seccomp filters against @sd > * @sd: optional seccomp data to be passed to filters > @@ -353,6 +402,9 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, > if (WARN_ON(f == NULL)) > return SECCOMP_RET_KILL_PROCESS; > > + if (seccomp_cache_check(f, sd)) > + return SECCOMP_RET_ALLOW; > + > /* > * All filters in the list are evaluated and the lowest BPF return > * value always takes priority (ignoring the DATA). > -- > 2.28.0 > Otherwise, yup, looks good.
On Wed, Sep 30, 2020 at 4:32 PM Kees Cook <keescook@chromium.org> wrote: > > On Wed, Sep 30, 2020 at 10:19:14AM -0500, YiFei Zhu wrote: > > From: YiFei Zhu <yifeifz2@illinois.edu> > > > > The fast (common) path for seccomp should be that the filter permits > > the syscall to pass through, and failing seccomp is expected to be > > an exceptional case; it is not expected for userspace to call a > > denylisted syscall over and over. > > > > This first finds the current allow bitmask by iterating through > > syscall_arches[] array and comparing it to the one in struct > > seccomp_data; this loop is expected to be unrolled. It then > > does a test_bit against the bitmask. If the bit is set, then > > there is no need to run the full filter; it returns > > SECCOMP_RET_ALLOW immediately. > > > > Co-developed-by: Dimitrios Skarlatos <dskarlat@cs.cmu.edu> > > Signed-off-by: Dimitrios Skarlatos <dskarlat@cs.cmu.edu> > > Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu> > > I'd like the content/ordering of this and the emulator patch to be reorganized a bit. > I'd like to see the infrastructure of the cache added first (along with > the "always allow" test logic in this patch), with the emulator missing: > i.e. the patch is a logical no-op: no behavior changes because nothing > ever changes the cache bits, but all the operational logic, structure > changes, etc, is in place. Then the next patch would be replacing the > no-op with the emulator. > > > --- > > kernel/seccomp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ > > 1 file changed, 52 insertions(+) > > > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > > index f09c9e74ae05..bed3b2a7f6c8 100644 > > --- a/kernel/seccomp.c > > +++ b/kernel/seccomp.c > > @@ -172,6 +172,12 @@ struct seccomp_cache_filter_data { }; > > static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) > > { > > } > > + > > +static inline bool seccomp_cache_check(const struct seccomp_filter *sfilter, > > bikeshedding: "cache check" doesn't tell me anything about what it's > actually checking for. How about calling this seccomp_is_constant_allow() or > something that reflects both the "bool" return ("is") and what that bool > means ("should always be allowed"). We have a naming conflict here. I'm about to rename seccomp_emu_is_const_allow to seccomp_is_const_allow. Adding another seccomp_is_constant_allow is confusing. Suggestions? I think I would prefer to change seccomp_cache_check to seccomp_cache_check_allow. While in this patch set seccomp_cache_check does imply the filter is "constant" allow, argument-processing cache may change this, and specifying an "allow" in the name specifies the 'what that bool means ("should always be allowed")'. YiFei Zhu
On Thu, Oct 08, 2020 at 07:17:39PM -0500, YiFei Zhu wrote: > On Wed, Sep 30, 2020 at 4:32 PM Kees Cook <keescook@chromium.org> wrote: > > > > On Wed, Sep 30, 2020 at 10:19:14AM -0500, YiFei Zhu wrote: > > > From: YiFei Zhu <yifeifz2@illinois.edu> > > > > > > The fast (common) path for seccomp should be that the filter permits > > > the syscall to pass through, and failing seccomp is expected to be > > > an exceptional case; it is not expected for userspace to call a > > > denylisted syscall over and over. > > > > > > This first finds the current allow bitmask by iterating through > > > syscall_arches[] array and comparing it to the one in struct > > > seccomp_data; this loop is expected to be unrolled. It then > > > does a test_bit against the bitmask. If the bit is set, then > > > there is no need to run the full filter; it returns > > > SECCOMP_RET_ALLOW immediately. > > > > > > Co-developed-by: Dimitrios Skarlatos <dskarlat@cs.cmu.edu> > > > Signed-off-by: Dimitrios Skarlatos <dskarlat@cs.cmu.edu> > > > Signed-off-by: YiFei Zhu <yifeifz2@illinois.edu> > > > > I'd like the content/ordering of this and the emulator patch to be reorganized a bit. > > I'd like to see the infrastructure of the cache added first (along with > > the "always allow" test logic in this patch), with the emulator missing: > > i.e. the patch is a logical no-op: no behavior changes because nothing > > ever changes the cache bits, but all the operational logic, structure > > changes, etc, is in place. Then the next patch would be replacing the > > no-op with the emulator. > > > > > --- > > > kernel/seccomp.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ > > > 1 file changed, 52 insertions(+) > > > > > > diff --git a/kernel/seccomp.c b/kernel/seccomp.c > > > index f09c9e74ae05..bed3b2a7f6c8 100644 > > > --- a/kernel/seccomp.c > > > +++ b/kernel/seccomp.c > > > @@ -172,6 +172,12 @@ struct seccomp_cache_filter_data { }; > > > static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) > > > { > > > } > > > + > > > +static inline bool seccomp_cache_check(const struct seccomp_filter *sfilter, > > > > bikeshedding: "cache check" doesn't tell me anything about what it's > > actually checking for. How about calling this seccomp_is_constant_allow() or > > something that reflects both the "bool" return ("is") and what that bool > > means ("should always be allowed"). > > We have a naming conflict here. I'm about to rename > seccomp_emu_is_const_allow to seccomp_is_const_allow. Adding another > seccomp_is_constant_allow is confusing. Suggestions? > > I think I would prefer to change seccomp_cache_check to > seccomp_cache_check_allow. While in this patch set seccomp_cache_check > does imply the filter is "constant" allow, argument-processing cache > may change this, and specifying an "allow" in the name specifies the > 'what that bool means ("should always be allowed")'. Yeah, that seems good.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c index f09c9e74ae05..bed3b2a7f6c8 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -172,6 +172,12 @@ struct seccomp_cache_filter_data { }; static inline void seccomp_cache_prepare(struct seccomp_filter *sfilter) { } + +static inline bool seccomp_cache_check(const struct seccomp_filter *sfilter, + const struct seccomp_data *sd) +{ + return false; +} #endif /* CONFIG_SECCOMP_CACHE_NR_ONLY */ /** @@ -331,6 +337,49 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen) return 0; } +#ifdef CONFIG_SECCOMP_CACHE_NR_ONLY +static bool seccomp_cache_check_bitmap(const void *bitmap, size_t bitmap_size, + int syscall_nr) +{ + if (unlikely(syscall_nr < 0 || syscall_nr >= bitmap_size)) + return false; + syscall_nr = array_index_nospec(syscall_nr, bitmap_size); + + return test_bit(syscall_nr, bitmap); +} + +/** + * seccomp_cache_check - lookup seccomp cache + * @sfilter: The seccomp filter + * @sd: The seccomp data to lookup the cache with + * + * Returns true if the seccomp_data is cached and allowed. + */ +static bool seccomp_cache_check(const struct seccomp_filter *sfilter, + const struct seccomp_data *sd) +{ + int syscall_nr = sd->nr; + const struct seccomp_cache_filter_data *cache = &sfilter->cache; + +#ifdef SECCOMP_ARCH_DEFAULT + if (likely(sd->arch == SECCOMP_ARCH_DEFAULT)) + return seccomp_cache_check_bitmap(cache->syscall_allow_default, + SECCOMP_ARCH_DEFAULT_NR, + syscall_nr); +#endif /* SECCOMP_ARCH_DEFAULT */ + +#ifdef SECCOMP_ARCH_COMPAT + if (likely(sd->arch == SECCOMP_ARCH_COMPAT)) + return seccomp_cache_check_bitmap(cache->syscall_allow_compat, + SECCOMP_ARCH_COMPAT_NR, + syscall_nr); +#endif /* SECCOMP_ARCH_COMPAT */ + + WARN_ON_ONCE(true); + return false; +} +#endif /* CONFIG_SECCOMP_CACHE_NR_ONLY */ + /** * seccomp_run_filters - evaluates all seccomp filters against @sd * @sd: optional seccomp data to be passed to filters @@ -353,6 +402,9 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd, if (WARN_ON(f == NULL)) return SECCOMP_RET_KILL_PROCESS; + if (seccomp_cache_check(f, sd)) + return SECCOMP_RET_ALLOW; + /* * All filters in the list are evaluated and the lowest BPF return * value always takes priority (ignoring the DATA).