Message ID | 1384212412-21236-4-git-send-email-lauraa@codeaurora.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Laura, On Tue, Nov 12, 2013 at 8:26 AM, Laura Abbott <lauraa@codeaurora.org> wrote: > vmalloc is currently assumed to be a completely separate address space > from the lowmem region. While this may be true in the general case, > there are some instances where lowmem and virtual space intermixing > provides gains. One example is needing to steal a large chunk of physical > lowmem for another purpose outside the systems usage. Rather than > waste the precious lowmem space on a 32-bit system, we can allow the > virtual holes created by the physical holes to be used by vmalloc > for virtual addressing. Track lowmem allocations in vmalloc to > allow mixing of lowmem and vmalloc. > > Signed-off-by: Laura Abbott <lauraa@codeaurora.org> > Signed-off-by: Neeti Desai <neetid@codeaurora.org> > --- > include/linux/mm.h | 6 ++++++ > include/linux/vmalloc.h | 1 + > mm/Kconfig | 11 +++++++++++ > mm/vmalloc.c | 26 ++++++++++++++++++++++++++ > 4 files changed, 44 insertions(+), 0 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index f022460..76df50d 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -308,6 +308,10 @@ unsigned long vmalloc_to_pfn(const void *addr); > * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there > * is no special casing required. > */ > + > +#ifdef CONFIG_VMALLOC_SAVING mismatch below Kconfig. CONFIG_ENABLE_VMALLOC_SAVING? > +extern int is_vmalloc_addr(const void *x) > +#else > static inline int is_vmalloc_addr(const void *x) > { > #ifdef CONFIG_MMU > @@ -318,6 +322,8 @@ static inline int is_vmalloc_addr(const void *x) > return 0; > #endif > } > +#endif > + > #ifdef CONFIG_MMU > extern int is_vmalloc_or_module_addr(const void *x); > #else > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h > index 4b8a891..e0c8c49 100644 > --- a/include/linux/vmalloc.h > +++ b/include/linux/vmalloc.h > @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ > #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ > #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ > #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ > +#define VM_LOWMEM 0x00000040 /* Tracking of direct mapped lowmem */ > /* bits [20..32] reserved for arch specific ioremap internals */ > > /* > diff --git a/mm/Kconfig b/mm/Kconfig > index 8028dcc..b3c459d 100644 > --- a/mm/Kconfig > +++ b/mm/Kconfig > @@ -519,3 +519,14 @@ config MEM_SOFT_DIRTY > it can be cleared by hands. > > See Documentation/vm/soft-dirty.txt for more details. > + > +config ENABLE_VMALLOC_SAVING > + bool "Intermix lowmem and vmalloc virtual space" > + depends on ARCH_TRACKS_VMALLOC > + help > + Some memory layouts on embedded systems steal large amounts > + of lowmem physical memory for purposes outside of the kernel. > + Rather than waste the physical and virtual space, allow the > + kernel to use the virtual space as vmalloc space. > + > + If unsure, say N. > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 13a5495..c7b138b 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -204,6 +204,29 @@ static int vmap_page_range(unsigned long start, unsigned long end, > return ret; > } > > +#ifdef ENABLE_VMALLOC_SAVING missing "CONFIG_" Thank you, Kyungimn Park > +int is_vmalloc_addr(const void *x) > +{ > + struct rb_node *n; > + struct vmap_area *va; > + int ret = 0; > + > + spin_lock(&vmap_area_lock); > + > + for (n = rb_first(vmap_area_root); n; rb_next(n)) { > + va = rb_entry(n, struct vmap_area, rb_node); > + if (x >= va->va_start && x < va->va_end) { > + ret = 1; > + break; > + } > + } > + > + spin_unlock(&vmap_area_lock); > + return ret; > +} > +EXPORT_SYMBOL(is_vmalloc_addr); > +#endif > + > int is_vmalloc_or_module_addr(const void *x) > { > /* > @@ -2628,6 +2651,9 @@ static int s_show(struct seq_file *m, void *p) > if (v->flags & VM_VPAGES) > seq_printf(m, " vpages"); > > + if (v->flags & VM_LOWMEM) > + seq_printf(m, " lowmem"); > + > show_numa_info(m, v); > seq_putc(m, '\n'); > return 0; > -- > The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum, > hosted by The Linux Foundation > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@kvack.org. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
On 11/11/2013 3:37 PM, Kyungmin Park wrote: > Hi Laura, > > On Tue, Nov 12, 2013 at 8:26 AM, Laura Abbott <lauraa@codeaurora.org> wrote: >> vmalloc is currently assumed to be a completely separate address space >> from the lowmem region. While this may be true in the general case, >> there are some instances where lowmem and virtual space intermixing >> provides gains. One example is needing to steal a large chunk of physical >> lowmem for another purpose outside the systems usage. Rather than >> waste the precious lowmem space on a 32-bit system, we can allow the >> virtual holes created by the physical holes to be used by vmalloc >> for virtual addressing. Track lowmem allocations in vmalloc to >> allow mixing of lowmem and vmalloc. >> >> Signed-off-by: Laura Abbott <lauraa@codeaurora.org> >> Signed-off-by: Neeti Desai <neetid@codeaurora.org> >> --- >> include/linux/mm.h | 6 ++++++ >> include/linux/vmalloc.h | 1 + >> mm/Kconfig | 11 +++++++++++ >> mm/vmalloc.c | 26 ++++++++++++++++++++++++++ >> 4 files changed, 44 insertions(+), 0 deletions(-) >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index f022460..76df50d 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -308,6 +308,10 @@ unsigned long vmalloc_to_pfn(const void *addr); >> * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there >> * is no special casing required. >> */ >> + >> +#ifdef CONFIG_VMALLOC_SAVING > mismatch below Kconfig. CONFIG_ENABLE_VMALLOC_SAVING? Argh, I folded in a wrong patch when integrating. I'll fix it. >> +extern int is_vmalloc_addr(const void *x) >> +#else >> static inline int is_vmalloc_addr(const void *x) >> { >> #ifdef CONFIG_MMU >> @@ -318,6 +322,8 @@ static inline int is_vmalloc_addr(const void *x) >> return 0; >> #endif >> } >> +#endif >> + >> #ifdef CONFIG_MMU >> extern int is_vmalloc_or_module_addr(const void *x); >> #else >> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h >> index 4b8a891..e0c8c49 100644 >> --- a/include/linux/vmalloc.h >> +++ b/include/linux/vmalloc.h >> @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ >> #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ >> #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ >> #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ >> +#define VM_LOWMEM 0x00000040 /* Tracking of direct mapped lowmem */ >> /* bits [20..32] reserved for arch specific ioremap internals */ >> >> /* >> diff --git a/mm/Kconfig b/mm/Kconfig >> index 8028dcc..b3c459d 100644 >> --- a/mm/Kconfig >> +++ b/mm/Kconfig >> @@ -519,3 +519,14 @@ config MEM_SOFT_DIRTY >> it can be cleared by hands. >> >> See Documentation/vm/soft-dirty.txt for more details. >> + >> +config ENABLE_VMALLOC_SAVING >> + bool "Intermix lowmem and vmalloc virtual space" >> + depends on ARCH_TRACKS_VMALLOC >> + help >> + Some memory layouts on embedded systems steal large amounts >> + of lowmem physical memory for purposes outside of the kernel. >> + Rather than waste the physical and virtual space, allow the >> + kernel to use the virtual space as vmalloc space. >> + >> + If unsure, say N. >> diff --git a/mm/vmalloc.c b/mm/vmalloc.c >> index 13a5495..c7b138b 100644 >> --- a/mm/vmalloc.c >> +++ b/mm/vmalloc.c >> @@ -204,6 +204,29 @@ static int vmap_page_range(unsigned long start, unsigned long end, >> return ret; >> } >> >> +#ifdef ENABLE_VMALLOC_SAVING > missing "CONFIG_" > Yes, this is a mess and needs to be cleaned up. > Thank you, > Kyungimn Park Thanks, Laura
On 11/11/2013 03:26 PM, Laura Abbott wrote: > +config ENABLE_VMALLOC_SAVING > + bool "Intermix lowmem and vmalloc virtual space" > + depends on ARCH_TRACKS_VMALLOC > + help > + Some memory layouts on embedded systems steal large amounts > + of lowmem physical memory for purposes outside of the kernel. > + Rather than waste the physical and virtual space, allow the > + kernel to use the virtual space as vmalloc space. I really don't think this needs to be exposed with help text and so forth. How about just defining a 'def_bool n' with some comments and let the architecture 'select' it? > +#ifdef ENABLE_VMALLOC_SAVING > +int is_vmalloc_addr(const void *x) > +{ > + struct rb_node *n; > + struct vmap_area *va; > + int ret = 0; > + > + spin_lock(&vmap_area_lock); > + > + for (n = rb_first(vmap_area_root); n; rb_next(n)) { > + va = rb_entry(n, struct vmap_area, rb_node); > + if (x >= va->va_start && x < va->va_end) { > + ret = 1; > + break; > + } > + } > + > + spin_unlock(&vmap_area_lock); > + return ret; > +} > +EXPORT_SYMBOL(is_vmalloc_addr); > +#endif It's probably worth noting that this makes is_vmalloc_addr() a *LOT* more expensive than it was before. There are a couple dozen of these in the tree in kinda weird places (ext4, netlink, tcp). You didn't mention it here, but you probably want to at least make sure you're not adding a spinlock and a tree walk in some critical path.
On 11/14/2013 9:45 AM, Dave Hansen wrote: > On 11/11/2013 03:26 PM, Laura Abbott wrote: >> +config ENABLE_VMALLOC_SAVING >> + bool "Intermix lowmem and vmalloc virtual space" >> + depends on ARCH_TRACKS_VMALLOC >> + help >> + Some memory layouts on embedded systems steal large amounts >> + of lowmem physical memory for purposes outside of the kernel. >> + Rather than waste the physical and virtual space, allow the >> + kernel to use the virtual space as vmalloc space. > > I really don't think this needs to be exposed with help text and so > forth. How about just defining a 'def_bool n' with some comments and > let the architecture 'select' it? > >> +#ifdef ENABLE_VMALLOC_SAVING >> +int is_vmalloc_addr(const void *x) >> +{ >> + struct rb_node *n; >> + struct vmap_area *va; >> + int ret = 0; >> + >> + spin_lock(&vmap_area_lock); >> + >> + for (n = rb_first(vmap_area_root); n; rb_next(n)) { >> + va = rb_entry(n, struct vmap_area, rb_node); >> + if (x >= va->va_start && x < va->va_end) { >> + ret = 1; >> + break; >> + } >> + } >> + >> + spin_unlock(&vmap_area_lock); >> + return ret; >> +} >> +EXPORT_SYMBOL(is_vmalloc_addr); >> +#endif > > It's probably worth noting that this makes is_vmalloc_addr() a *LOT* > more expensive than it was before. There are a couple dozen of these in > the tree in kinda weird places (ext4, netlink, tcp). You didn't > mention it here, but you probably want to at least make sure you're not > adding a spinlock and a tree walk in some critical path. > Yes, that was a concern I had as well. If is_vmalloc_addr returned true the spinlock/tree walk would happen anyway so essentially this is getting rid of the fast path. This is typically used in the idiom alloc(size) { if (size > some metric) vmalloc else kmalloc } free (ptr) { if (is_vmalloc_addr(ptr) vfree else kfree } so my hypothesis would be that any path would have to be willing to take the penalty of vmalloc anyway. The actual cost would depend on the vmalloc / kmalloc ratio. I haven't had a chance to get profiling data yet to see the performance difference. Thanks, Laura
On 11/14/2013 08:52 PM, Laura Abbott wrote: > free (ptr) { > if (is_vmalloc_addr(ptr) > vfree > else > kfree > } > > so my hypothesis would be that any path would have to be willing to take > the penalty of vmalloc anyway. The actual cost would depend on the > vmalloc / kmalloc ratio. I haven't had a chance to get profiling data > yet to see the performance difference. Well, either that, or these kinds of things where it is a fallback: > hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); > if (hc == NULL) > hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL);
On Thu, 14 Nov 2013 20:52:38 -0800 Laura Abbott <lauraa@codeaurora.org> wrote: > If is_vmalloc_addr returned true > the spinlock/tree walk would happen anyway so essentially this is > getting rid of the fast path. This is typically used in the idiom > > alloc(size) { > if (size > some metric) > vmalloc > else > kmalloc > } A better form is if (kmalloc(..., GFP_NOWARN) == NULL) vmalloc > free (ptr) { > if (is_vmalloc_addr(ptr) > vfree > else > kfree > } > > so my hypothesis would be that any path would have to be willing to take > the penalty of vmalloc anyway. The actual cost would depend on the > vmalloc / kmalloc ratio. I haven't had a chance to get profiling data > yet to see the performance difference. I've resisted adding the above helper functions simply to discourage the use of vmalloc() - it *is* slow, and one day we might hit vmalloc-arena fragmentation issues. That being said, I might one day give up, because adding such helpers would be a significant cleanup. And once they are added, their use will proliferate and is_vmalloc_addr() will take quite a beating. So yes, it would be prudent to be worried about is_vmalloc_addr() performance at the outset. Couldn't is_vmalloc_addr() just be done with a plain old bitmap? It would consume 128kbytes to manage a 4G address space, and 1/8th of a meg isn't much.
On 11/26/2013 2:45 PM, Andrew Morton wrote: > So yes, it would be prudent to be worried about is_vmalloc_addr() > performance at the outset. > > Couldn't is_vmalloc_addr() just be done with a plain old bitmap? It > would consume 128kbytes to manage a 4G address space, and 1/8th of a meg > isn't much. > Yes, I came to the same conclusion after realizing I needed something similar to fix up proc/kcore.c . I plan to go with the bitmap for the next patch version. Laura
diff --git a/include/linux/mm.h b/include/linux/mm.h index f022460..76df50d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -308,6 +308,10 @@ unsigned long vmalloc_to_pfn(const void *addr); * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there * is no special casing required. */ + +#ifdef CONFIG_VMALLOC_SAVING +extern int is_vmalloc_addr(const void *x) +#else static inline int is_vmalloc_addr(const void *x) { #ifdef CONFIG_MMU @@ -318,6 +322,8 @@ static inline int is_vmalloc_addr(const void *x) return 0; #endif } +#endif + #ifdef CONFIG_MMU extern int is_vmalloc_or_module_addr(const void *x); #else diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 4b8a891..e0c8c49 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -16,6 +16,7 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_VPAGES 0x00000010 /* buffer for pages was vmalloc'ed */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_LOWMEM 0x00000040 /* Tracking of direct mapped lowmem */ /* bits [20..32] reserved for arch specific ioremap internals */ /* diff --git a/mm/Kconfig b/mm/Kconfig index 8028dcc..b3c459d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -519,3 +519,14 @@ config MEM_SOFT_DIRTY it can be cleared by hands. See Documentation/vm/soft-dirty.txt for more details. + +config ENABLE_VMALLOC_SAVING + bool "Intermix lowmem and vmalloc virtual space" + depends on ARCH_TRACKS_VMALLOC + help + Some memory layouts on embedded systems steal large amounts + of lowmem physical memory for purposes outside of the kernel. + Rather than waste the physical and virtual space, allow the + kernel to use the virtual space as vmalloc space. + + If unsure, say N. diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 13a5495..c7b138b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -204,6 +204,29 @@ static int vmap_page_range(unsigned long start, unsigned long end, return ret; } +#ifdef ENABLE_VMALLOC_SAVING +int is_vmalloc_addr(const void *x) +{ + struct rb_node *n; + struct vmap_area *va; + int ret = 0; + + spin_lock(&vmap_area_lock); + + for (n = rb_first(vmap_area_root); n; rb_next(n)) { + va = rb_entry(n, struct vmap_area, rb_node); + if (x >= va->va_start && x < va->va_end) { + ret = 1; + break; + } + } + + spin_unlock(&vmap_area_lock); + return ret; +} +EXPORT_SYMBOL(is_vmalloc_addr); +#endif + int is_vmalloc_or_module_addr(const void *x) { /* @@ -2628,6 +2651,9 @@ static int s_show(struct seq_file *m, void *p) if (v->flags & VM_VPAGES) seq_printf(m, " vpages"); + if (v->flags & VM_LOWMEM) + seq_printf(m, " lowmem"); + show_numa_info(m, v); seq_putc(m, '\n'); return 0;