Message ID | 1448455781-26660-1-git-send-email-cov@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Christopher, On Wed, Nov 25, 2015 at 07:49:29AM -0500, Christopher Covington wrote: > Some applications such as Checkpoint/Restore In Userspace (CRIU) remap and > unmap the VDSO. Add support for this to the AArch64 kernel by copying in > the PowerPC code with slight modifications. > > Signed-off-by: Christopher Covington <cov@codeaurora.org> > --- > arch/arm64/include/asm/Kbuild | 1 - > arch/arm64/include/asm/mm-arch-hooks.h | 28 ++++++++++++++++++++++++++++ > arch/arm64/include/asm/mmu_context.h | 23 ++++++++++++++++++++++- > 3 files changed, 50 insertions(+), 2 deletions(-) > create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h > > diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild > index 70fd9ff..b112a39 100644 > --- a/arch/arm64/include/asm/Kbuild > +++ b/arch/arm64/include/asm/Kbuild > @@ -25,7 +25,6 @@ generic-y += kvm_para.h > generic-y += local.h > generic-y += local64.h > generic-y += mcs_spinlock.h > -generic-y += mm-arch-hooks.h > generic-y += mman.h > generic-y += msgbuf.h > generic-y += msi.h > diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h > new file mode 100644 > index 0000000..e6923fb > --- /dev/null > +++ b/arch/arm64/include/asm/mm-arch-hooks.h > @@ -0,0 +1,28 @@ > +/* > + * Architecture specific mm hooks > + * > + * Copyright (C) 2015, IBM Corporation > + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of the GNU General Public License version 2 as > + * published by the Free Software Foundation. > + */ > + > +#ifndef __ASM_MM_ARCH_HOOKS_H > +#define __ASM_MM_ARCH_HOOKS_H > + > +static inline void arch_remap(struct mm_struct *mm, > + unsigned long old_start, unsigned long old_end, > + unsigned long new_start, unsigned long new_end) > +{ > + /* > + * mremap() doesn't allow moving multiple vmas so we can limit the > + * check to old_start == vdso_base. > + */ > + if ((void *)old_start == mm->context.vdso) > + mm->context.vdso = (void *)new_start; > +} > +#define arch_remap arch_remap > + > +#endif /* __ASM_MM_ARCH_HOOKS_H */ > diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h > index 2416578..9fe0773 100644 > --- a/arch/arm64/include/asm/mmu_context.h > +++ b/arch/arm64/include/asm/mmu_context.h > @@ -24,7 +24,6 @@ > > #include <asm/cacheflush.h> > #include <asm/proc-fns.h> > -#include <asm-generic/mm_hooks.h> > #include <asm/cputype.h> > #include <asm/pgtable.h> > > @@ -147,4 +146,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, > #define deactivate_mm(tsk,mm) do { } while (0) > #define activate_mm(prev,next) switch_mm(prev, next, NULL) > > +static inline void arch_dup_mmap(struct mm_struct *oldmm, > + struct mm_struct *mm) > +{ > +} > + > +static inline void arch_exit_mmap(struct mm_struct *mm) > +{ > +} > + > +static inline void arch_unmap(struct mm_struct *mm, > + struct vm_area_struct *vma, > + unsigned long start, unsigned long end) > +{ > + if ((void *)start <= mm->context.vdso && mm->context.vdso < (void *)end) > + mm->context.vdso = NULL; > +} > + > +static inline void arch_bprm_mm_init(struct mm_struct *mm, > + struct vm_area_struct *vma) > +{ > +} This is virtually identical to the PowerPC implementation, afaict. Any chance we could move this to core code and define some generic vdso accessors for the mm? Will
On 12/02/2015 07:19 AM, Will Deacon wrote: > Hi Christopher, > > On Wed, Nov 25, 2015 at 07:49:29AM -0500, Christopher Covington wrote: >> Some applications such as Checkpoint/Restore In Userspace (CRIU) remap and >> unmap the VDSO. Add support for this to the AArch64 kernel by copying in >> the PowerPC code with slight modifications. >> >> Signed-off-by: Christopher Covington <cov@codeaurora.org> >> --- >> arch/arm64/include/asm/Kbuild | 1 - >> arch/arm64/include/asm/mm-arch-hooks.h | 28 ++++++++++++++++++++++++++++ >> arch/arm64/include/asm/mmu_context.h | 23 ++++++++++++++++++++++- >> 3 files changed, 50 insertions(+), 2 deletions(-) >> create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h >> >> diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild >> index 70fd9ff..b112a39 100644 >> --- a/arch/arm64/include/asm/Kbuild >> +++ b/arch/arm64/include/asm/Kbuild >> @@ -25,7 +25,6 @@ generic-y += kvm_para.h >> generic-y += local.h >> generic-y += local64.h >> generic-y += mcs_spinlock.h >> -generic-y += mm-arch-hooks.h >> generic-y += mman.h >> generic-y += msgbuf.h >> generic-y += msi.h >> diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h >> new file mode 100644 >> index 0000000..e6923fb >> --- /dev/null >> +++ b/arch/arm64/include/asm/mm-arch-hooks.h >> @@ -0,0 +1,28 @@ >> +/* >> + * Architecture specific mm hooks >> + * >> + * Copyright (C) 2015, IBM Corporation >> + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> >> + * >> + * This program is free software; you can redistribute it and/or modify >> + * it under the terms of the GNU General Public License version 2 as >> + * published by the Free Software Foundation. >> + */ >> + >> +#ifndef __ASM_MM_ARCH_HOOKS_H >> +#define __ASM_MM_ARCH_HOOKS_H >> + >> +static inline void arch_remap(struct mm_struct *mm, >> + unsigned long old_start, unsigned long old_end, >> + unsigned long new_start, unsigned long new_end) >> +{ >> + /* >> + * mremap() doesn't allow moving multiple vmas so we can limit the >> + * check to old_start == vdso_base. >> + */ >> + if ((void *)old_start == mm->context.vdso) >> + mm->context.vdso = (void *)new_start; >> +} >> +#define arch_remap arch_remap >> + >> +#endif /* __ASM_MM_ARCH_HOOKS_H */ >> diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h >> index 2416578..9fe0773 100644 >> --- a/arch/arm64/include/asm/mmu_context.h >> +++ b/arch/arm64/include/asm/mmu_context.h >> @@ -24,7 +24,6 @@ >> >> #include <asm/cacheflush.h> >> #include <asm/proc-fns.h> >> -#include <asm-generic/mm_hooks.h> >> #include <asm/cputype.h> >> #include <asm/pgtable.h> >> >> @@ -147,4 +146,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, >> #define deactivate_mm(tsk,mm) do { } while (0) >> #define activate_mm(prev,next) switch_mm(prev, next, NULL) >> >> +static inline void arch_dup_mmap(struct mm_struct *oldmm, >> + struct mm_struct *mm) >> +{ >> +} >> + >> +static inline void arch_exit_mmap(struct mm_struct *mm) >> +{ >> +} >> + >> +static inline void arch_unmap(struct mm_struct *mm, >> + struct vm_area_struct *vma, >> + unsigned long start, unsigned long end) >> +{ >> + if ((void *)start <= mm->context.vdso && mm->context.vdso < (void *)end) >> + mm->context.vdso = NULL; >> +} >> + >> +static inline void arch_bprm_mm_init(struct mm_struct *mm, >> + struct vm_area_struct *vma) >> +{ >> +} > > This is virtually identical to the PowerPC implementation, afaict. Any > chance we could move this to core code and define some generic vdso > accessors for the mm? I think the same criticism applies to the VDSO code itself, and I'm not sure how easy it is to provide common VDSO remap/unmap without common VDSO code. I guess I'll do some investigating when I have then chance. Thanks, Christopher Covington
Please take a look at the following prototype of sharing the PowerPC VDSO unmap and remap code with other architectures. I've only hooked up arm64 to begin with. If folks think this is a reasonable approach I can work on 32 bit ARM as well. Not hearing back from an earlier request for guidance [1], I simply dove in and started hacking. Laurent's test case [2][3] is a compelling illustration of whether VDSO remap works or not on a given architecture. 1. https://lkml.org/lkml/2016/3/2/225 2. https://lists.openvz.org/pipermail/criu/2015-March/019161.html 3. http://lists.openvz.org/pipermail/criu/attachments/20150318/f02ed9ea/attachment.bin Thanks, Christopher Covington
On 04/28/2016 08:18 AM, Christopher Covington wrote: > Please take a look at the following prototype of sharing the PowerPC > VDSO unmap and remap code with other architectures. I've only hooked > up arm64 to begin with. If folks think this is a reasonable approach I > can work on 32 bit ARM as well. Not hearing back from an earlier > request for guidance [1], I simply dove in and started hacking. > Laurent's test case [2][3] is a compelling illustration of whether VDSO > remap works or not on a given architecture. I think there's a much nicer way: https://lkml.kernel.org/r/1461584223-9418-1-git-send-email-dsafonov@virtuozzo.com Could arm64 and ppc use this approach? These arch_xyz hooks are gross. Also, at some point, possibly quite soon, x86 will want a way for user code to ask the kernel to map a specific vdso variant at a specific address. Could we perhaps add a new pair of syscalls: struct vdso_info { unsigned long space_needed_before; unsigned long space_needed_after; unsigned long alignment; }; long vdso_get_info(unsigned int vdso_type, struct vdso_info *info); long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned int flags); #define VDSO_X86_I386 0 #define VDSO_X86_64 1 #define VDSO_X86_X32 2 // etc. vdso_remap will map the vdso of the chosen type such at AT_SYSINFO_EHDR lines up with addr. It will use up to space_needed_before bytes before that address and space_needed_after after than address. It will also unmap the old vdso (or maybe only do that if some flag is set). On x86, mremap is *not* sufficient for everything that's needed, because some programs will need to change the vdso type. --Andy
Hi Andy, On 04/28/2016 02:53 PM, Andy Lutomirski wrote: > On 04/28/2016 08:18 AM, Christopher Covington wrote: >> Please take a look at the following prototype of sharing the PowerPC >> VDSO unmap and remap code with other architectures. I've only hooked >> up arm64 to begin with. If folks think this is a reasonable approach I >> can work on 32 bit ARM as well. Not hearing back from an earlier >> request for guidance [1], I simply dove in and started hacking. >> Laurent's test case [2][3] is a compelling illustration of whether VDSO >> remap works or not on a given architecture. > > I think there's a much nicer way: > > https://lkml.kernel.org/r/1461584223-9418-1-git-send-email-dsafonov@virtuozzo.com > > Could arm64 and ppc use this approach? These arch_xyz hooks are gross. Thanks for the pointer. Any thoughts on how to keep essentially identical definitions of vdso_mremap from proliferating into every architecture and variant? > Also, at some point, possibly quite soon, x86 will want a way for > user code to ask the kernel to map a specific vdso variant at a specific > address. Could we perhaps add a new pair of syscalls: > > struct vdso_info { > unsigned long space_needed_before; > unsigned long space_needed_after; > unsigned long alignment; > }; > > long vdso_get_info(unsigned int vdso_type, struct vdso_info *info); > > long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned int flags); > > #define VDSO_X86_I386 0 > #define VDSO_X86_64 1 > #define VDSO_X86_X32 2 > // etc. > > vdso_remap will map the vdso of the chosen type such at > AT_SYSINFO_EHDR lines up with addr. It will use up to > space_needed_before bytes before that address and space_needed_after > after than address. It will also unmap the old vdso (or maybe only do > that if some flag is set). > > On x86, mremap is *not* sufficient for everything that's needed, > because some programs will need to change the vdso type. I don't I understand. Why can't people just exec() the ELF type that corresponds to the VDSO they want? Thanks, Cov
On 04/29/2016 04:22 PM, Christopher Covington wrote: > On 04/28/2016 02:53 PM, Andy Lutomirski wrote: >> Also, at some point, possibly quite soon, x86 will want a way for >> user code to ask the kernel to map a specific vdso variant at a specific >> address. Could we perhaps add a new pair of syscalls: >> >> struct vdso_info { >> unsigned long space_needed_before; >> unsigned long space_needed_after; >> unsigned long alignment; >> }; >> >> long vdso_get_info(unsigned int vdso_type, struct vdso_info *info); >> >> long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned int flags); >> >> #define VDSO_X86_I386 0 >> #define VDSO_X86_64 1 >> #define VDSO_X86_X32 2 >> // etc. >> >> vdso_remap will map the vdso of the chosen type such at >> AT_SYSINFO_EHDR lines up with addr. It will use up to >> space_needed_before bytes before that address and space_needed_after >> after than address. It will also unmap the old vdso (or maybe only do >> that if some flag is set). >> >> On x86, mremap is *not* sufficient for everything that's needed, >> because some programs will need to change the vdso type. > I don't I understand. Why can't people just exec() the ELF type that > corresponds to the VDSO they want? I may say about my needs in it: to not lose all the existing information in application. Imagine you're restoring a container with 64-bit and 32-bit applications (in compatible mode). So you need somehow switch vdso type in restorer for a 32-bit application. Yes, you may exec() and then - all already restored application properties will got lost. You will need to transpher information about mappings, make protocol between restorer binary and main criu application, finally you'll end up with some really much more difficult architecture than it is now. And it'll be slower. Also it's pretty logical: if one can switch between modes, why can't he change vdso mapping to mode he got to? (note: if the work about removing thread compatible flags will be done (on x86), there will not even be such a thing, as application mode - just difference on which syscalls it uses: compatible or native). Thanks, Dmitry Safonov
On 04/29/2016 09:55 AM, Dmitry Safonov wrote: > On 04/29/2016 04:22 PM, Christopher Covington wrote: >> On 04/28/2016 02:53 PM, Andy Lutomirski wrote: >>> Also, at some point, possibly quite soon, x86 will want a way for >>> user code to ask the kernel to map a specific vdso variant at a specific >>> address. Could we perhaps add a new pair of syscalls: >>> >>> struct vdso_info { >>> unsigned long space_needed_before; >>> unsigned long space_needed_after; >>> unsigned long alignment; >>> }; >>> >>> long vdso_get_info(unsigned int vdso_type, struct vdso_info *info); >>> >>> long vdso_remap(unsigned int vdso_type, unsigned long addr, unsigned >>> int flags); >>> >>> #define VDSO_X86_I386 0 >>> #define VDSO_X86_64 1 >>> #define VDSO_X86_X32 2 >>> // etc. >>> >>> vdso_remap will map the vdso of the chosen type such at >>> AT_SYSINFO_EHDR lines up with addr. It will use up to >>> space_needed_before bytes before that address and space_needed_after >>> after than address. It will also unmap the old vdso (or maybe only do >>> that if some flag is set). >>> >>> On x86, mremap is *not* sufficient for everything that's needed, >>> because some programs will need to change the vdso type. >> I don't I understand. Why can't people just exec() the ELF type that >> corresponds to the VDSO they want? > > I may say about my needs in it: to not lose all the existing > information in application. > Imagine you're restoring a container with 64-bit and 32-bit > applications (in compatible mode). So you need somehow > switch vdso type in restorer for a 32-bit application. > Yes, you may exec() and then - all already restored application > properties will got lost. You will need to transpher information > about mappings, make protocol between restorer binary > and main criu application, finally you'll end up with some > really much more difficult architecture than it is now. > And it'll be slower. Perhaps a more modest exec based strategy would be for x86_64 criu to handle all of the x86_64 restores as usual but exec i386 and/or x32 criu service daemons and use them for restoring applications needing those ABIs. Regards, Christopher Covington
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 70fd9ff..b112a39 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -25,7 +25,6 @@ generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h -generic-y += mm-arch-hooks.h generic-y += mman.h generic-y += msgbuf.h generic-y += msi.h diff --git a/arch/arm64/include/asm/mm-arch-hooks.h b/arch/arm64/include/asm/mm-arch-hooks.h new file mode 100644 index 0000000..e6923fb --- /dev/null +++ b/arch/arm64/include/asm/mm-arch-hooks.h @@ -0,0 +1,28 @@ +/* + * Architecture specific mm hooks + * + * Copyright (C) 2015, IBM Corporation + * Author: Laurent Dufour <ldufour@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ASM_MM_ARCH_HOOKS_H +#define __ASM_MM_ARCH_HOOKS_H + +static inline void arch_remap(struct mm_struct *mm, + unsigned long old_start, unsigned long old_end, + unsigned long new_start, unsigned long new_end) +{ + /* + * mremap() doesn't allow moving multiple vmas so we can limit the + * check to old_start == vdso_base. + */ + if ((void *)old_start == mm->context.vdso) + mm->context.vdso = (void *)new_start; +} +#define arch_remap arch_remap + +#endif /* __ASM_MM_ARCH_HOOKS_H */ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 2416578..9fe0773 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -24,7 +24,6 @@ #include <asm/cacheflush.h> #include <asm/proc-fns.h> -#include <asm-generic/mm_hooks.h> #include <asm/cputype.h> #include <asm/pgtable.h> @@ -147,4 +146,26 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, #define deactivate_mm(tsk,mm) do { } while (0) #define activate_mm(prev,next) switch_mm(prev, next, NULL) +static inline void arch_dup_mmap(struct mm_struct *oldmm, + struct mm_struct *mm) +{ +} + +static inline void arch_exit_mmap(struct mm_struct *mm) +{ +} + +static inline void arch_unmap(struct mm_struct *mm, + struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if ((void *)start <= mm->context.vdso && mm->context.vdso < (void *)end) + mm->context.vdso = NULL; +} + +static inline void arch_bprm_mm_init(struct mm_struct *mm, + struct vm_area_struct *vma) +{ +} + #endif
Some applications such as Checkpoint/Restore In Userspace (CRIU) remap and unmap the VDSO. Add support for this to the AArch64 kernel by copying in the PowerPC code with slight modifications. Signed-off-by: Christopher Covington <cov@codeaurora.org> --- arch/arm64/include/asm/Kbuild | 1 - arch/arm64/include/asm/mm-arch-hooks.h | 28 ++++++++++++++++++++++++++++ arch/arm64/include/asm/mmu_context.h | 23 ++++++++++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/include/asm/mm-arch-hooks.h