Message ID | 20210517203343.3941777-2-arnd@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | compat: remove compat_alloc_user_space callers | expand |
Arnd Bergmann <arnd@kernel.org> writes: > From: Arnd Bergmann <arnd@arndb.de> > > The compat version of sys_kexec_load() uses compat_alloc_user_space to > convert the user-provided arguments into the native format. > > Move the conversion into the regular implementation with > an in_compat_syscall() check to simplify it and avoid the > compat_alloc_user_space() call. > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). Is it possible to do this without in_compat_syscall(), and casting pointers to a wrong type? We open ourselves up to bugs whenever we lie to the type system. Skimming through the code it looks like it should be possible to not need the in_compat_syscall and the casts to the wrong type by changing the order of the code a little bit. Eric > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > include/linux/kexec.h | 2 - > kernel/kexec.c | 95 +++++++++++++++++++------------------------ > 2 files changed, 42 insertions(+), 55 deletions(-) > > diff --git a/include/linux/kexec.h b/include/linux/kexec.h > index 0c994ae37729..f61e310d7a85 100644 > --- a/include/linux/kexec.h > +++ b/include/linux/kexec.h > @@ -88,14 +88,12 @@ struct kexec_segment { > size_t memsz; > }; > > -#ifdef CONFIG_COMPAT > struct compat_kexec_segment { > compat_uptr_t buf; > compat_size_t bufsz; > compat_ulong_t mem; /* User space sees this as a (void *) ... */ > compat_size_t memsz; > }; > -#endif > > #ifdef CONFIG_KEXEC_FILE > struct purgatory_info { > diff --git a/kernel/kexec.c b/kernel/kexec.c > index c82c6c06f051..6618b1d9f00b 100644 > --- a/kernel/kexec.c > +++ b/kernel/kexec.c > @@ -19,21 +19,46 @@ > > #include "kexec_internal.h" > > +static int copy_user_compat_segment_list(struct kimage *image, > + unsigned long nr_segments, > + void __user *segments) > +{ > + struct compat_kexec_segment __user *cs = segments; > + struct compat_kexec_segment segment; > + int i; > + > + for (i = 0; i < nr_segments; i++) { > + if (copy_from_user(&segment, &cs[i], sizeof(segment))) > + return -EFAULT; > + > + image->segment[i] = (struct kexec_segment) { > + .buf = compat_ptr(segment.buf), > + .bufsz = segment.bufsz, > + .mem = segment.mem, > + .memsz = segment.memsz, > + }; > + } > + > + return 0; > +} > + > + > static int copy_user_segment_list(struct kimage *image, > unsigned long nr_segments, > struct kexec_segment __user *segments) > { > - int ret; > size_t segment_bytes; > > /* Read in the segments */ > image->nr_segments = nr_segments; > segment_bytes = nr_segments * sizeof(*segments); > - ret = copy_from_user(image->segment, segments, segment_bytes); > - if (ret) > - ret = -EFAULT; > + if (in_compat_syscall()) > + return copy_user_compat_segment_list(image, nr_segments, segments); > > - return ret; > + if (copy_from_user(image->segment, segments, segment_bytes)) > + return -EFAULT; > + > + return 0; > } > > static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, > @@ -233,8 +258,9 @@ static inline int kexec_load_check(unsigned long nr_segments, > return 0; > } > > -SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, > - struct kexec_segment __user *, segments, unsigned long, flags) > +static int kernel_kexec_load(unsigned long entry, unsigned long nr_segments, > + struct kexec_segment __user * segments, > + unsigned long flags) > { > int result; > > @@ -265,57 +291,20 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, > return result; > } > > +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, > + struct kexec_segment __user *, segments, unsigned long, flags) > +{ > + return kernel_kexec_load(entry, nr_segments, segments, flags); > +} > + > #ifdef CONFIG_COMPAT > COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, > compat_ulong_t, nr_segments, > struct compat_kexec_segment __user *, segments, > compat_ulong_t, flags) > { > - struct compat_kexec_segment in; > - struct kexec_segment out, __user *ksegments; > - unsigned long i, result; > - > - result = kexec_load_check(nr_segments, flags); > - if (result) > - return result; > - > - /* Don't allow clients that don't understand the native > - * architecture to do anything. > - */ > - if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) > - return -EINVAL; > - > - ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); > - for (i = 0; i < nr_segments; i++) { > - result = copy_from_user(&in, &segments[i], sizeof(in)); > - if (result) > - return -EFAULT; > - > - out.buf = compat_ptr(in.buf); > - out.bufsz = in.bufsz; > - out.mem = in.mem; > - out.memsz = in.memsz; > - > - result = copy_to_user(&ksegments[i], &out, sizeof(out)); > - if (result) > - return -EFAULT; > - } > - > - /* Because we write directly to the reserved memory > - * region when loading crash kernels we need a mutex here to > - * prevent multiple crash kernels from attempting to load > - * simultaneously, and to prevent a crash kernel from loading > - * over the top of a in use crash kernel. > - * > - * KISS: always take the mutex. > - */ > - if (!mutex_trylock(&kexec_mutex)) > - return -EBUSY; > - > - result = do_kexec_load(entry, nr_segments, ksegments, flags); > - > - mutex_unlock(&kexec_mutex); > - > - return result; > + return kernel_kexec_load(entry, nr_segments, > + (struct kexec_segment __user *)segments, > + flags); > } > #endif
> + if (in_compat_syscall()) > + return copy_user_compat_segment_list(image, nr_segments, segments); Annoying overly lone line here. Otherwise: Reviewed-by: Christoph Hellwig <hch@lst.de>
On Mon, May 17, 2021 at 10:57:24PM -0500, Eric W. Biederman wrote: > We open ourselves up to bugs whenever we lie to the type system. > > Skimming through the code it looks like it should be possible > to not need the in_compat_syscall and the casts to the wrong > type by changing the order of the code a little bit. What kind of bug do you expect? We must only copy from user addresses once anyway. I've never seen bugs due the use of in_compat_syscall, but plenty due to cruft code trying to avoid it.
On Tue, May 18, 2021 at 8:40 AM Christoph Hellwig <hch@infradead.org> wrote: > > On Mon, May 17, 2021 at 10:57:24PM -0500, Eric W. Biederman wrote: > > We open ourselves up to bugs whenever we lie to the type system. > > > > Skimming through the code it looks like it should be possible > > to not need the in_compat_syscall and the casts to the wrong > > type by changing the order of the code a little bit. There are obviously other ways of doing the same. The reason for doing it this specific way is so I can eliminate the compat entry point entirely in patch 4/4. > What kind of bug do you expect? We must only copy from user addresses > once anyway. I've never seen bugs due the use of in_compat_syscall, > but plenty due to cruft code trying to avoid it. Right, I've used the same approach of passing a native-typed __user pointer and converting it in a copy_from_user/copy_to_user wrapper in a number of other places, as this tends to produce the most readable version by concentrating the tricky logic in the one place that already has to be careful. Most of the bugs I've seen with compat code are from duplicated code paths that diverge over time when a bugfix for the native version is applied incorrectly or not at all to the compat version. Arnd
On Tue, May 18, 2021 at 8:38 AM Christoph Hellwig <hch@infradead.org> wrote: > > > + if (in_compat_syscall()) > > + return copy_user_compat_segment_list(image, nr_segments, segments); > > Annoying overly lone line here. Oops, I was sure I had fixed all of these when you pointed this out before. I probably rebased a slightly older branch that did not have the fixes. > Otherwise: > > Reviewed-by: Christoph Hellwig <hch@lst.de> Thanks, Arnd
Arnd Bergmann <arnd@kernel.org> writes: > From: Arnd Bergmann <arnd@arndb.de> > > The compat version of sys_kexec_load() uses compat_alloc_user_space to > convert the user-provided arguments into the native format. > > Move the conversion into the regular implementation with > an in_compat_syscall() check to simplify it and avoid the > compat_alloc_user_space() call. > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com> The patch is wrong. The logic between the compat entry point and the ordinary entry point are by necessity different. This unifies the logic and breaks the compat entry point. The fundamentally necessity is that the code being loaded needs to know which mode the kernel is running in so it can safely transition to the new kernel. Given that the two entry points fundamentally need different logic, and that difference was not preserved and the goal of this patchset was to unify that which fundamentally needs to be different. I don't think this patch series makes any sense for kexec. Eric > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > include/linux/kexec.h | 2 - > kernel/kexec.c | 95 +++++++++++++++++++------------------------ > 2 files changed, 42 insertions(+), 55 deletions(-) > > diff --git a/include/linux/kexec.h b/include/linux/kexec.h > index 0c994ae37729..f61e310d7a85 100644 > --- a/include/linux/kexec.h > +++ b/include/linux/kexec.h > @@ -88,14 +88,12 @@ struct kexec_segment { > size_t memsz; > }; > > -#ifdef CONFIG_COMPAT > struct compat_kexec_segment { > compat_uptr_t buf; > compat_size_t bufsz; > compat_ulong_t mem; /* User space sees this as a (void *) ... */ > compat_size_t memsz; > }; > -#endif > > #ifdef CONFIG_KEXEC_FILE > struct purgatory_info { > diff --git a/kernel/kexec.c b/kernel/kexec.c > index c82c6c06f051..6618b1d9f00b 100644 > --- a/kernel/kexec.c > +++ b/kernel/kexec.c > @@ -19,21 +19,46 @@ > > #include "kexec_internal.h" > > +static int copy_user_compat_segment_list(struct kimage *image, > + unsigned long nr_segments, > + void __user *segments) > +{ > + struct compat_kexec_segment __user *cs = segments; > + struct compat_kexec_segment segment; > + int i; > + > + for (i = 0; i < nr_segments; i++) { > + if (copy_from_user(&segment, &cs[i], sizeof(segment))) > + return -EFAULT; > + > + image->segment[i] = (struct kexec_segment) { > + .buf = compat_ptr(segment.buf), > + .bufsz = segment.bufsz, > + .mem = segment.mem, > + .memsz = segment.memsz, > + }; > + } > + > + return 0; > +} > + > + > static int copy_user_segment_list(struct kimage *image, > unsigned long nr_segments, > struct kexec_segment __user *segments) > { > - int ret; > size_t segment_bytes; > > /* Read in the segments */ > image->nr_segments = nr_segments; > segment_bytes = nr_segments * sizeof(*segments); > - ret = copy_from_user(image->segment, segments, segment_bytes); > - if (ret) > - ret = -EFAULT; > + if (in_compat_syscall()) > + return copy_user_compat_segment_list(image, nr_segments, segments); > > - return ret; > + if (copy_from_user(image->segment, segments, segment_bytes)) > + return -EFAULT; > + > + return 0; > } > > static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, > @@ -233,8 +258,9 @@ static inline int kexec_load_check(unsigned long nr_segments, > return 0; > } > > -SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, > - struct kexec_segment __user *, segments, unsigned long, flags) > +static int kernel_kexec_load(unsigned long entry, unsigned long nr_segments, > + struct kexec_segment __user * segments, > + unsigned long flags) > { > int result; > > @@ -265,57 +291,20 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, > return result; > } > > +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, > + struct kexec_segment __user *, segments, unsigned long, flags) > +{ > + return kernel_kexec_load(entry, nr_segments, segments, flags); > +} > + > #ifdef CONFIG_COMPAT > COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, > compat_ulong_t, nr_segments, > struct compat_kexec_segment __user *, segments, > compat_ulong_t, flags) > { > - struct compat_kexec_segment in; > - struct kexec_segment out, __user *ksegments; > - unsigned long i, result; > - > - result = kexec_load_check(nr_segments, flags); > - if (result) > - return result; > - > - /* Don't allow clients that don't understand the native > - * architecture to do anything. > - */ > - if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) > - return -EINVAL; > - > - ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); > - for (i = 0; i < nr_segments; i++) { > - result = copy_from_user(&in, &segments[i], sizeof(in)); > - if (result) > - return -EFAULT; > - > - out.buf = compat_ptr(in.buf); > - out.bufsz = in.bufsz; > - out.mem = in.mem; > - out.memsz = in.memsz; > - > - result = copy_to_user(&ksegments[i], &out, sizeof(out)); > - if (result) > - return -EFAULT; > - } > - > - /* Because we write directly to the reserved memory > - * region when loading crash kernels we need a mutex here to > - * prevent multiple crash kernels from attempting to load > - * simultaneously, and to prevent a crash kernel from loading > - * over the top of a in use crash kernel. > - * > - * KISS: always take the mutex. > - */ > - if (!mutex_trylock(&kexec_mutex)) > - return -EBUSY; > - > - result = do_kexec_load(entry, nr_segments, ksegments, flags); > - > - mutex_unlock(&kexec_mutex); > - > - return result; > + return kernel_kexec_load(entry, nr_segments, > + (struct kexec_segment __user *)segments, > + flags); > } > #endif
On Tue, May 18, 2021 at 3:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote: > > Arnd Bergmann <arnd@kernel.org> writes: > > > From: Arnd Bergmann <arnd@arndb.de>KEXEC_ARCH_DEFAULT > > > > The compat version of sys_kexec_load() uses compat_alloc_user_space to > > convert the user-provided arguments into the native format. > > > > Move the conversion into the regular implementation with > > an in_compat_syscall() check to simplify it and avoid the > > compat_alloc_user_space() call. > > > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). > > Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com> >KEXEC_ARCH_DEFAULT > The patch is wrong. > > The logic between the compat entry point and the ordinary entry point > are by necessity different. This unifies the logic and breaks the compat > entry point. > > The fundamentally necessity is that the code being loaded needs to know > which mode the kernel is running in so it can safely transition to the > new kernel. > > Given that the two entry points fundamentally need different logic, > and that difference was not preserved and the goal of this patchset > was to unify that which fundamentally needs to be different. I don't > think this patch series makes any sense for kexec. Sorry, I'm not following that explanation. Can you clarify what different modes of the kernel you are referring to here, and how my patch changes this? The only difference I can see between the native and compat entry points is the layout of the kexec_segment structure, and that is obviously preserved by my patch. Arnd
On Tue, May 18, 2021 at 4:05 PM Arnd Bergmann <arnd@kernel.org> wrote: > > On Tue, May 18, 2021 at 3:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote: > > > > Arnd Bergmann <arnd@kernel.org> writes: > > > > > From: Arnd Bergmann <arnd@arndb.de>KEXEC_ARCH_DEFAULT > > > > > > The compat version of sys_kexec_load() uses compat_alloc_user_space to > > > convert the user-provided arguments into the native format. > > > > > > Move the conversion into the regular implementation with > > > an in_compat_syscall() check to simplify it and avoid the > > > compat_alloc_user_space() call. > > > > > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). > > > > Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com> > >KEXEC_ARCH_DEFAULT > > The patch is wrong. > > > > The logic between the compat entry point and the ordinary entry point > > are by necessity different. This unifies the logic and breaks the compat > > entry point. > > > > The fundamentally necessity is that the code being loaded needs to know > > which mode the kernel is running in so it can safely transition to the > > new kernel. > > > > Given that the two entry points fundamentally need different logic, > > and that difference was not preserved and the goal of this patchset > > was to unify that which fundamentally needs to be different. I don't > > think this patch series makes any sense for kexec. > > Sorry, I'm not following that explanation. Can you clarify what different > modes of the kernel you are referring to here, and how my patch > changes this? I think I figured it out now myself after comparing the two functions: --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -269,7 +269,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, /* Verify we are on the appropriate architecture */ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && - ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) + (in_compat_syscall() || + ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))) return -EINVAL; /* Because we write directly to the reserved memory Not sure if that's the best way of doing it, but it looks like folding this in restores the current behavior. Arnd
Arnd Bergmann <arnd@kernel.org> writes: > On Tue, May 18, 2021 at 4:05 PM Arnd Bergmann <arnd@kernel.org> wrote: >> >> On Tue, May 18, 2021 at 3:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote: >> > >> > Arnd Bergmann <arnd@kernel.org> writes: >> > >> > > From: Arnd Bergmann <arnd@arndb.de>KEXEC_ARCH_DEFAULT >> > > >> > > The compat version of sys_kexec_load() uses compat_alloc_user_space to >> > > convert the user-provided arguments into the native format. >> > > >> > > Move the conversion into the regular implementation with >> > > an in_compat_syscall() check to simplify it and avoid the >> > > compat_alloc_user_space() call. >> > > >> > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). >> > >> > Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com> >> >KEXEC_ARCH_DEFAULT >> > The patch is wrong. >> > >> > The logic between the compat entry point and the ordinary entry point >> > are by necessity different. This unifies the logic and breaks the compat >> > entry point. >> > >> > The fundamentally necessity is that the code being loaded needs to know >> > which mode the kernel is running in so it can safely transition to the >> > new kernel. >> > >> > Given that the two entry points fundamentally need different logic, >> > and that difference was not preserved and the goal of this patchset >> > was to unify that which fundamentally needs to be different. I don't >> > think this patch series makes any sense for kexec. >> >> Sorry, I'm not following that explanation. Can you clarify what different >> modes of the kernel you are referring to here, and how my patch >> changes this? > > I think I figured it out now myself after comparing the two functions: > > --- a/kernel/kexec.c > +++ b/kernel/kexec.c > @@ -269,7 +269,8 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, > unsigned long, nr_segments, > > /* Verify we are on the appropriate architecture */ > if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && > - ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) > + (in_compat_syscall() || > + ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT))) > return -EINVAL; > > /* Because we write directly to the reserved memory > > Not sure if that's the best way of doing it, but it looks like folding this > in restores the current behavior. Yes. That is pretty much all there is. I personally can't stand the sight of in_compat_syscall() doubly so when you have to lie to the type system with casts. The cognitive dissonance I experience is extreme. I will be happy to help you find another way to get rid of compat_alloc_user, but not that way. There is a whole mess in there that was introduced when someone added do_kexec_load while I was napping in 2017 that makes the system calls an absolute mess. It all needs to be cleaned up. Eric
From: Arnd Bergmann > Sent: 17 May 2021 21:34 > > The compat version of sys_kexec_load() uses compat_alloc_user_space to > convert the user-provided arguments into the native format. > > Move the conversion into the regular implementation with > an in_compat_syscall() check to simplify it and avoid the > compat_alloc_user_space() call. > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > include/linux/kexec.h | 2 - > kernel/kexec.c | 95 +++++++++++++++++++------------------------ > 2 files changed, 42 insertions(+), 55 deletions(-) > > diff --git a/include/linux/kexec.h b/include/linux/kexec.h > index 0c994ae37729..f61e310d7a85 100644 > --- a/include/linux/kexec.h > +++ b/include/linux/kexec.h > @@ -88,14 +88,12 @@ struct kexec_segment { > size_t memsz; > }; > > -#ifdef CONFIG_COMPAT > struct compat_kexec_segment { > compat_uptr_t buf; > compat_size_t bufsz; > compat_ulong_t mem; /* User space sees this as a (void *) ... */ > compat_size_t memsz; > }; > -#endif > > #ifdef CONFIG_KEXEC_FILE > struct purgatory_info { > diff --git a/kernel/kexec.c b/kernel/kexec.c > index c82c6c06f051..6618b1d9f00b 100644 > --- a/kernel/kexec.c > +++ b/kernel/kexec.c > @@ -19,21 +19,46 @@ > > #include "kexec_internal.h" > > +static int copy_user_compat_segment_list(struct kimage *image, > + unsigned long nr_segments, > + void __user *segments) > +{ > + struct compat_kexec_segment __user *cs = segments; > + struct compat_kexec_segment segment; > + int i; > + > + for (i = 0; i < nr_segments; i++) { > + if (copy_from_user(&segment, &cs[i], sizeof(segment))) > + return -EFAULT; How many segments are there? The multiple copy_from_user() will be slow. > + > + image->segment[i] = (struct kexec_segment) { > + .buf = compat_ptr(segment.buf), > + .bufsz = segment.bufsz, > + .mem = segment.mem, > + .memsz = segment.memsz, > + }; > + } > + > + return 0; > +} > + > + > static int copy_user_segment_list(struct kimage *image, > unsigned long nr_segments, > struct kexec_segment __user *segments) > { > - int ret; > size_t segment_bytes; > > /* Read in the segments */ > image->nr_segments = nr_segments; > segment_bytes = nr_segments * sizeof(*segments); Should there be a bound check on nr_segments? I can't see one in the code in this patch. > - ret = copy_from_user(image->segment, segments, segment_bytes); > - if (ret) > - ret = -EFAULT; > + if (in_compat_syscall()) > + return copy_user_compat_segment_list(image, nr_segments, segments); > > - return ret; > + if (copy_from_user(image->segment, segments, segment_bytes)) > + return -EFAULT; > + > + return 0; An alternate sequence (which Eric will like even less!) is to do a single copy_from_user() for the entire compat size array into the 'normal' buffer and then do a reverse order conversion of each array entry from 'compat' to '64 bit'. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
Arnd Bergmann <arnd@kernel.org> writes: > On Tue, May 18, 2021 at 4:05 PM Arnd Bergmann <arnd@kernel.org> wrote: >> >> On Tue, May 18, 2021 at 3:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote: >> > >> > Arnd Bergmann <arnd@kernel.org> writes: >> > >> > > From: Arnd Bergmann <arnd@arndb.de>KEXEC_ARCH_DEFAULT >> > > >> > > The compat version of sys_kexec_load() uses compat_alloc_user_space to >> > > convert the user-provided arguments into the native format. >> > > >> > > Move the conversion into the regular implementation with >> > > an in_compat_syscall() check to simplify it and avoid the >> > > compat_alloc_user_space() call. >> > > >> > > compat_sys_kexec_load() now behaves the same as sys_kexec_load(). >> > >> > Nacked-by: "Eric W. Biederman" <ebiederm@xmission.com> >> >KEXEC_ARCH_DEFAULT >> > The patch is wrong. >> > >> > The logic between the compat entry point and the ordinary entry point >> > are by necessity different. This unifies the logic and breaks the compat >> > entry point. >> > >> > The fundamentally necessity is that the code being loaded needs to know >> > which mode the kernel is running in so it can safely transition to the >> > new kernel. >> > >> > Given that the two entry points fundamentally need different logic, >> > and that difference was not preserved and the goal of this patchset >> > was to unify that which fundamentally needs to be different. I don't >> > think this patch series makes any sense for kexec. >> >> Sorry, I'm not following that explanation. Can you clarify what different >> modes of the kernel you are referring to here, and how my patch >> changes this? I think something like the untested diff below is enough to get rid of compat_alloc_user cleanly. Certainly it should be enough to give any idea what I am thinking. diff --git a/kernel/kexec.c b/kernel/kexec.c index c82c6c06f051..ce69a5d68023 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -19,26 +19,21 @@ #include "kexec_internal.h" -static int copy_user_segment_list(struct kimage *image, +static void copy_user_segment_list(struct kimage *image, unsigned long nr_segments, - struct kexec_segment __user *segments) + struct kexec_segment *segments) { - int ret; size_t segment_bytes; /* Read in the segments */ image->nr_segments = nr_segments; segment_bytes = nr_segments * sizeof(*segments); - ret = copy_from_user(image->segment, segments, segment_bytes); - if (ret) - ret = -EFAULT; - - return ret; + memcpy(image->segment, segments, segment_bytes); } static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, + struct kexec_segment *segments, unsigned long flags) { int ret; @@ -59,9 +54,7 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, image->start = entry; - ret = copy_user_segment_list(image, nr_segments, segments); - if (ret) - goto out_free_image; + copy_user_segment_list(image, nr_segments, segments); if (kexec_on_panic) { /* Enable special crash kernel control page alloc policy. */ @@ -103,8 +96,8 @@ static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, return ret; } -static int do_kexec_load(unsigned long entry, unsigned long nr_segments, - struct kexec_segment __user *segments, unsigned long flags) +static int do_kexec_load_locked(unsigned long entry, unsigned long nr_segments, + struct kexec_segment *segments, unsigned long flags) { struct kimage **dest_image, *image; unsigned long i; @@ -174,6 +167,27 @@ static int do_kexec_load(unsigned long entry, unsigned long nr_segments, return ret; } +static int do_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment *segments, unsigned long flags) +{ + int result; + + /* Because we write directly to the reserved memory + * region when loading crash kernels we need a mutex here to + * prevent multiple crash kernels from attempting to load + * simultaneously, and to prevent a crash kernel from loading + * over the top of a in use crash kernel. + * + * KISS: always take the mutex. + */ + if (!mutex_trylock(&kexec_mutex)) + return -EBUSY; + + result = do_kexec_load_locked(entry, nr_segments, segments, flags); + mutex_unlock(&kexec_mutex); + return result; +} + /* * Exec Kernel system call: for obvious reasons only root may call it. * @@ -224,6 +238,11 @@ static inline int kexec_load_check(unsigned long nr_segments, if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) return -EINVAL; + /* Verify we are on the appropriate architecture */ + if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && + ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) + return -EINVAL; + /* Put an artificial cap on the number * of segments passed to kexec_load. */ @@ -236,33 +255,29 @@ static inline int kexec_load_check(unsigned long nr_segments, SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, struct kexec_segment __user *, segments, unsigned long, flags) { - int result; + struct kexec_segment *ksegments; + unsigned long bytes, result; result = kexec_load_check(nr_segments, flags); if (result) return result; - /* Verify we are on the appropriate architecture */ - if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && - ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) - return -EINVAL; - - /* Because we write directly to the reserved memory - * region when loading crash kernels we need a mutex here to - * prevent multiple crash kernels from attempting to load - * simultaneously, and to prevent a crash kernel from loading - * over the top of a in use crash kernel. - * - * KISS: always take the mutex. - */ - if (!mutex_trylock(&kexec_mutex)) - return -EBUSY; + bytes = nr_segments * sizeof(ksegments[0]); + ksegments = kmalloc(bytes, GFP_KERNEL); + if (!ksegments) + return -ENOMEM; + result = copy_from_user(ksegments, segments, bytes); + if (result) + goto fail; + result = do_kexec_load(entry, nr_segments, segments, flags); + kfree(ksegments); - mutex_unlock(&kexec_mutex); - +fail: + kfree(ksegments); return result; + } #ifdef CONFIG_COMPAT @@ -272,9 +287,9 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, compat_ulong_t, flags) { struct compat_kexec_segment in; - struct kexec_segment out, __user *ksegments; - unsigned long i, result; - + struct kexec_segment *ksegments; + unsigned long bytes, i, result; + result = kexec_load_check(nr_segments, flags); if (result) return result; @@ -285,37 +300,26 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) return -EINVAL; - ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); + bytes = nr_segments * sizeof(ksegments[0]); + ksegments = kmalloc(bytes, GFP_KERNEL); + if (!ksegments) + return -ENOMEM; + for (i = 0; i < nr_segments; i++) { result = copy_from_user(&in, &segments[i], sizeof(in)); if (result) - return -EFAULT; - - out.buf = compat_ptr(in.buf); - out.bufsz = in.bufsz; - out.mem = in.mem; - out.memsz = in.memsz; + goto fail; - result = copy_to_user(&ksegments[i], &out, sizeof(out)); - if (result) - return -EFAULT; + ksegments[i].buf = compat_ptr(in.buf); + ksegments[i].bufsz = in.bufsz; + ksegments[i].mem = in.mem; + ksegments[i].memsz = in.memsz; } - /* Because we write directly to the reserved memory - * region when loading crash kernels we need a mutex here to - * prevent multiple crash kernels from attempting to load - * simultaneously, and to prevent a crash kernel from loading - * over the top of a in use crash kernel. - * - * KISS: always take the mutex. - */ - if (!mutex_trylock(&kexec_mutex)) - return -EBUSY; - result = do_kexec_load(entry, nr_segments, ksegments, flags); - mutex_unlock(&kexec_mutex); - +fail: + kfree(ksegments); return result; } #endif
On Wed, May 19, 2021 at 12:45 AM Eric W. Biederman <ebiederm@xmission.com> wrote: > Arnd Bergmann <arnd@kernel.org> writes: > > On Tue, May 18, 2021 at 4:05 PM Arnd Bergmann <arnd@kernel.org> wrote: > >> On Tue, May 18, 2021 at 3:41 PM Eric W. Biederman <ebiederm@xmission.com> wrote: > > I think something like the untested diff below is enough to get rid of > compat_alloc_user cleanly. > > Certainly it should be enough to give any idea what I am thinking. Yes, that looks sufficient to me. I had started a slightly different approach by trying to move the kimage_alloc_init() into the top-level entry points to avoid the extra kmalloc, but that got rather complicated, and your patch is simpler overall. The allocation could still be combined with kexec_load_check() into a new function to reduce the number of duplicate lines, but if you think the current version is ok, then I'll leave this part as it is. I've fixed a duplicate kfree() and some whitespace damage, and rebased the rest of my series on top of this to give it a spin on the build test boxes. I'll send a v4 series once I have made sure there are no build-time regressions. Can I add your Signed-off-by for the patch? Is there a set of tests I should run on it? Arnd
diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 0c994ae37729..f61e310d7a85 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -88,14 +88,12 @@ struct kexec_segment { size_t memsz; }; -#ifdef CONFIG_COMPAT struct compat_kexec_segment { compat_uptr_t buf; compat_size_t bufsz; compat_ulong_t mem; /* User space sees this as a (void *) ... */ compat_size_t memsz; }; -#endif #ifdef CONFIG_KEXEC_FILE struct purgatory_info { diff --git a/kernel/kexec.c b/kernel/kexec.c index c82c6c06f051..6618b1d9f00b 100644 --- a/kernel/kexec.c +++ b/kernel/kexec.c @@ -19,21 +19,46 @@ #include "kexec_internal.h" +static int copy_user_compat_segment_list(struct kimage *image, + unsigned long nr_segments, + void __user *segments) +{ + struct compat_kexec_segment __user *cs = segments; + struct compat_kexec_segment segment; + int i; + + for (i = 0; i < nr_segments; i++) { + if (copy_from_user(&segment, &cs[i], sizeof(segment))) + return -EFAULT; + + image->segment[i] = (struct kexec_segment) { + .buf = compat_ptr(segment.buf), + .bufsz = segment.bufsz, + .mem = segment.mem, + .memsz = segment.memsz, + }; + } + + return 0; +} + + static int copy_user_segment_list(struct kimage *image, unsigned long nr_segments, struct kexec_segment __user *segments) { - int ret; size_t segment_bytes; /* Read in the segments */ image->nr_segments = nr_segments; segment_bytes = nr_segments * sizeof(*segments); - ret = copy_from_user(image->segment, segments, segment_bytes); - if (ret) - ret = -EFAULT; + if (in_compat_syscall()) + return copy_user_compat_segment_list(image, nr_segments, segments); - return ret; + if (copy_from_user(image->segment, segments, segment_bytes)) + return -EFAULT; + + return 0; } static int kimage_alloc_init(struct kimage **rimage, unsigned long entry, @@ -233,8 +258,9 @@ static inline int kexec_load_check(unsigned long nr_segments, return 0; } -SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, - struct kexec_segment __user *, segments, unsigned long, flags) +static int kernel_kexec_load(unsigned long entry, unsigned long nr_segments, + struct kexec_segment __user * segments, + unsigned long flags) { int result; @@ -265,57 +291,20 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, return result; } +SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, + struct kexec_segment __user *, segments, unsigned long, flags) +{ + return kernel_kexec_load(entry, nr_segments, segments, flags); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry, compat_ulong_t, nr_segments, struct compat_kexec_segment __user *, segments, compat_ulong_t, flags) { - struct compat_kexec_segment in; - struct kexec_segment out, __user *ksegments; - unsigned long i, result; - - result = kexec_load_check(nr_segments, flags); - if (result) - return result; - - /* Don't allow clients that don't understand the native - * architecture to do anything. - */ - if ((flags & KEXEC_ARCH_MASK) == KEXEC_ARCH_DEFAULT) - return -EINVAL; - - ksegments = compat_alloc_user_space(nr_segments * sizeof(out)); - for (i = 0; i < nr_segments; i++) { - result = copy_from_user(&in, &segments[i], sizeof(in)); - if (result) - return -EFAULT; - - out.buf = compat_ptr(in.buf); - out.bufsz = in.bufsz; - out.mem = in.mem; - out.memsz = in.memsz; - - result = copy_to_user(&ksegments[i], &out, sizeof(out)); - if (result) - return -EFAULT; - } - - /* Because we write directly to the reserved memory - * region when loading crash kernels we need a mutex here to - * prevent multiple crash kernels from attempting to load - * simultaneously, and to prevent a crash kernel from loading - * over the top of a in use crash kernel. - * - * KISS: always take the mutex. - */ - if (!mutex_trylock(&kexec_mutex)) - return -EBUSY; - - result = do_kexec_load(entry, nr_segments, ksegments, flags); - - mutex_unlock(&kexec_mutex); - - return result; + return kernel_kexec_load(entry, nr_segments, + (struct kexec_segment __user *)segments, + flags); } #endif