Message ID | 20240531163217.1584450-4-Liam.Howlett@oracle.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Avoid MAP_FIXED gap exposure | expand |
On Fri, May 31, 2024 at 9:33 AM Liam R. Howlett <Liam.Howlett@oracle.com> wrote: > > Use a structure to pass along all the necessary information and counters > involved in removing vmas from the mm_struct. > > Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> Reviewed-by: Suren Baghdasaryan <surenb@google.com> > --- > mm/internal.h | 16 ++++++ > mm/mmap.c | 133 +++++++++++++++++++++++++++++--------------------- > 2 files changed, 94 insertions(+), 55 deletions(-) > > diff --git a/mm/internal.h b/mm/internal.h > index b2c75b12014e..6ebf77853d68 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -1428,6 +1428,22 @@ struct vma_prepare { > struct vm_area_struct *remove2; > }; > > +/* > + * vma munmap operation > + */ > +struct vma_munmap_struct { > + struct vma_iterator *vmi; > + struct mm_struct *mm; > + struct vm_area_struct *vma; /* The first vma to munmap */ > + struct list_head *uf; /* Userfaultfd list_head */ > + unsigned long start; /* Aligned start addr */ > + unsigned long end; /* Aligned end addr */ > + int vma_count; /* Number of vmas that will be removed */ > + unsigned long nr_pages; /* Number of pages being removed */ > + unsigned long locked_vm; /* Number of locked pages */ > + bool unlock; /* Unlock after the munmap */ > +}; > + > void __meminit __init_single_page(struct page *page, unsigned long pfn, > unsigned long zone, int nid); > > diff --git a/mm/mmap.c b/mm/mmap.c > index fad40d604c64..57f2383245ea 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -459,6 +459,31 @@ static inline void init_vma_prep(struct vma_prepare *vp, > init_multi_vma_prep(vp, vma, NULL, NULL, NULL); > } > > +/* > + * init_vma_munmap() - Initializer wrapper for vma_munmap_struct > + * @vms: The vma munmap struct > + * @vmi: The vma iterator > + * @vma: The first vm_area_struct to munmap > + * @start: The aligned start address to munmap > + * @end: The aligned end address to munmap > + * @uf: The userfaultfd list_head > + * @unlock: Unlock after the operation. Only unlocked on success > + */ > +static inline void init_vma_munmap(struct vma_munmap_struct *vms, > + struct vma_iterator *vmi, struct vm_area_struct *vma, > + unsigned long start, unsigned long end, struct list_head *uf, > + bool unlock) > +{ > + vms->vmi = vmi; > + vms->vma = vma; > + vms->mm = vma->vm_mm; > + vms->start = start; > + vms->end = end; > + vms->unlock = unlock; > + vms->uf = uf; > + vms->vma_count = 0; > + vms->nr_pages = vms->locked_vm = 0; > +} > > /* > * vma_prepare() - Helper function for handling locking VMAs prior to altering > @@ -2340,7 +2365,6 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) > > if (vma->vm_flags & VM_ACCOUNT) > nr_accounted += nrpages; > - > vm_stat_account(mm, vma->vm_flags, -nrpages); > remove_vma(vma, false); > } > @@ -2562,29 +2586,20 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach) > } > > /* > - * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree > + * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree > * for removal at a later date. Handles splitting first and last if necessary > * and marking the vmas as isolated. > * > - * @vmi: The vma iterator > - * @vma: The starting vm_area_struct > - * @mm: The mm_struct > - * @start: The aligned start address to munmap. > - * @end: The aligned end address to munmap. > - * @uf: The userfaultfd list_head > + * @vms: The vma munmap struct > * @mas_detach: The maple state tracking the detached tree > * > * Return: 0 on success > */ > -static int > -vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > - struct mm_struct *mm, unsigned long start, > - unsigned long end, struct list_head *uf, > - struct ma_state *mas_detach, unsigned long *locked_vm) > +static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, > + struct ma_state *mas_detach) > { > struct vm_area_struct *next = NULL; > int error = -ENOMEM; > - int count = 0; > > /* > * If we need to split any vma, do it now to save pain later. > @@ -2595,17 +2610,18 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > */ > > /* Does it split the first one? */ > - if (start > vma->vm_start) { > + if (vms->start > vms->vma->vm_start) { > > /* > * Make sure that map_count on return from munmap() will > * not exceed its limit; but let map_count go just above > * its limit temporarily, to help free resources as expected. > */ > - if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) > + if (vms->end < vms->vma->vm_end && > + vms->mm->map_count >= sysctl_max_map_count) > goto map_count_exceeded; > > - error = __split_vma(vmi, vma, start, 1); > + error = __split_vma(vms->vmi, vms->vma, vms->start, 1); > if (error) > goto start_split_failed; > } > @@ -2614,24 +2630,24 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > * Detach a range of VMAs from the mm. Using next as a temp variable as > * it is always overwritten. > */ > - next = vma; > + next = vms->vma; > do { > /* Does it split the end? */ > - if (next->vm_end > end) { > - error = __split_vma(vmi, next, end, 0); > + if (next->vm_end > vms->end) { > + error = __split_vma(vms->vmi, next, vms->end, 0); > if (error) > goto end_split_failed; > } > vma_start_write(next); > - mas_set(mas_detach, count++); > + mas_set(mas_detach, vms->vma_count++); > if (next->vm_flags & VM_LOCKED) > - *locked_vm += vma_pages(next); > + vms->locked_vm += vma_pages(next); > > error = mas_store_gfp(mas_detach, next, GFP_KERNEL); > if (error) > goto munmap_gather_failed; > vma_mark_detached(next, true); > - if (unlikely(uf)) { > + if (unlikely(vms->uf)) { > /* > * If userfaultfd_unmap_prep returns an error the vmas > * will remain split, but userland will get a > @@ -2641,16 +2657,17 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > * split, despite we could. This is unlikely enough > * failure that it's not worth optimizing it for. > */ > - error = userfaultfd_unmap_prep(next, start, end, uf); > + error = userfaultfd_unmap_prep(next, vms->start, > + vms->end, vms->uf); > > if (error) > goto userfaultfd_error; > } > #ifdef CONFIG_DEBUG_VM_MAPLE_TREE > - BUG_ON(next->vm_start < start); > - BUG_ON(next->vm_start > end); > + BUG_ON(next->vm_start < vms->start); > + BUG_ON(next->vm_start > vms->end); > #endif > - } for_each_vma_range(*vmi, next, end); > + } for_each_vma_range(*(vms->vmi), next, vms->end); > > #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) > /* Make sure no VMAs are about to be lost. */ > @@ -2659,21 +2676,21 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > struct vm_area_struct *vma_mas, *vma_test; > int test_count = 0; > > - vma_iter_set(vmi, start); > + vma_iter_set(vms->vmi, vms->start); > rcu_read_lock(); > - vma_test = mas_find(&test, count - 1); > - for_each_vma_range(*vmi, vma_mas, end) { > + vma_test = mas_find(&test, vms->vma_count - 1); > + for_each_vma_range(*(vms->vmi), vma_mas, vms->end) { > BUG_ON(vma_mas != vma_test); > test_count++; > - vma_test = mas_next(&test, count - 1); > + vma_test = mas_next(&test, vms->vma_count - 1); > } > rcu_read_unlock(); > - BUG_ON(count != test_count); > + BUG_ON(vms->vma_count != test_count); > } > #endif > > - while (vma_iter_addr(vmi) > start) > - vma_iter_prev_range(vmi); > + while (vma_iter_addr(vms->vmi) > vms->start) > + vma_iter_prev_range(vms->vmi); > > return 0; > > @@ -2686,38 +2703,44 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > return error; > } > > -static void > -vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, > - struct mm_struct *mm, unsigned long start, > - unsigned long end, bool unlock, struct ma_state *mas_detach, > - unsigned long locked_vm) > +/* > + * vmi_complete_munmap_vmas() - Update mm counters, unlock if directed, and free > + * all VMA resources. > + * > + * do_vmi_align_munmap() - munmap the aligned region from @start to @end. > + * @vms: The vma munmap struct > + * @mas_detach: The maple state of the detached vmas > + * > + */ > +static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, > + struct ma_state *mas_detach) > { > struct vm_area_struct *prev, *next; > - int count; > + struct mm_struct *mm; > > - count = mas_detach->index + 1; > - mm->map_count -= count; > - mm->locked_vm -= locked_vm; > - if (unlock) > + mm = vms->mm; > + mm->map_count -= vms->vma_count; > + mm->locked_vm -= vms->locked_vm; > + if (vms->unlock) > mmap_write_downgrade(mm); > > - prev = vma_iter_prev_range(vmi); > - next = vma_next(vmi); > + prev = vma_iter_prev_range(vms->vmi); > + next = vma_next(vms->vmi); > if (next) > - vma_iter_prev_range(vmi); > + vma_iter_prev_range(vms->vmi); > > /* > * We can free page tables without write-locking mmap_lock because VMAs > * were isolated before we downgraded mmap_lock. > */ > mas_set(mas_detach, 1); > - unmap_region(mm, mas_detach, vma, prev, next, start, end, count, > - !unlock); > + unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, > + vms->vma_count, !vms->unlock); > /* Statistics and freeing VMAs */ > mas_set(mas_detach, 0); > remove_mt(mm, mas_detach); > validate_mm(mm); > - if (unlock) > + if (vms->unlock) > mmap_read_unlock(mm); > > __mt_destroy(mas_detach->tree); > @@ -2746,11 +2769,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > MA_STATE(mas_detach, &mt_detach, 0, 0); > mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); > mt_on_stack(mt_detach); > + struct vma_munmap_struct vms; > int error; > - unsigned long locked_vm = 0; > > - error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, > - &mas_detach, &locked_vm); > + init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock); > + > + error = vms_gather_munmap_vmas(&vms, &mas_detach); > if (error) > goto gather_failed; > > @@ -2758,8 +2782,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, > if (error) > goto clear_area_failed; > > - vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, > - locked_vm); > + vms_complete_munmap_vmas(&vms, &mas_detach); > return 0; > > clear_area_failed: > -- > 2.43.0 >
diff --git a/mm/internal.h b/mm/internal.h index b2c75b12014e..6ebf77853d68 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1428,6 +1428,22 @@ struct vma_prepare { struct vm_area_struct *remove2; }; +/* + * vma munmap operation + */ +struct vma_munmap_struct { + struct vma_iterator *vmi; + struct mm_struct *mm; + struct vm_area_struct *vma; /* The first vma to munmap */ + struct list_head *uf; /* Userfaultfd list_head */ + unsigned long start; /* Aligned start addr */ + unsigned long end; /* Aligned end addr */ + int vma_count; /* Number of vmas that will be removed */ + unsigned long nr_pages; /* Number of pages being removed */ + unsigned long locked_vm; /* Number of locked pages */ + bool unlock; /* Unlock after the munmap */ +}; + void __meminit __init_single_page(struct page *page, unsigned long pfn, unsigned long zone, int nid); diff --git a/mm/mmap.c b/mm/mmap.c index fad40d604c64..57f2383245ea 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -459,6 +459,31 @@ static inline void init_vma_prep(struct vma_prepare *vp, init_multi_vma_prep(vp, vma, NULL, NULL, NULL); } +/* + * init_vma_munmap() - Initializer wrapper for vma_munmap_struct + * @vms: The vma munmap struct + * @vmi: The vma iterator + * @vma: The first vm_area_struct to munmap + * @start: The aligned start address to munmap + * @end: The aligned end address to munmap + * @uf: The userfaultfd list_head + * @unlock: Unlock after the operation. Only unlocked on success + */ +static inline void init_vma_munmap(struct vma_munmap_struct *vms, + struct vma_iterator *vmi, struct vm_area_struct *vma, + unsigned long start, unsigned long end, struct list_head *uf, + bool unlock) +{ + vms->vmi = vmi; + vms->vma = vma; + vms->mm = vma->vm_mm; + vms->start = start; + vms->end = end; + vms->unlock = unlock; + vms->uf = uf; + vms->vma_count = 0; + vms->nr_pages = vms->locked_vm = 0; +} /* * vma_prepare() - Helper function for handling locking VMAs prior to altering @@ -2340,7 +2365,6 @@ static inline void remove_mt(struct mm_struct *mm, struct ma_state *mas) if (vma->vm_flags & VM_ACCOUNT) nr_accounted += nrpages; - vm_stat_account(mm, vma->vm_flags, -nrpages); remove_vma(vma, false); } @@ -2562,29 +2586,20 @@ static inline void abort_munmap_vmas(struct ma_state *mas_detach) } /* - * vmi_gather_munmap_vmas() - Put all VMAs within a range into a maple tree + * vms_gather_munmap_vmas() - Put all VMAs within a range into a maple tree * for removal at a later date. Handles splitting first and last if necessary * and marking the vmas as isolated. * - * @vmi: The vma iterator - * @vma: The starting vm_area_struct - * @mm: The mm_struct - * @start: The aligned start address to munmap. - * @end: The aligned end address to munmap. - * @uf: The userfaultfd list_head + * @vms: The vma munmap struct * @mas_detach: The maple state tracking the detached tree * * Return: 0 on success */ -static int -vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, - struct mm_struct *mm, unsigned long start, - unsigned long end, struct list_head *uf, - struct ma_state *mas_detach, unsigned long *locked_vm) +static int vms_gather_munmap_vmas(struct vma_munmap_struct *vms, + struct ma_state *mas_detach) { struct vm_area_struct *next = NULL; int error = -ENOMEM; - int count = 0; /* * If we need to split any vma, do it now to save pain later. @@ -2595,17 +2610,18 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, */ /* Does it split the first one? */ - if (start > vma->vm_start) { + if (vms->start > vms->vma->vm_start) { /* * Make sure that map_count on return from munmap() will * not exceed its limit; but let map_count go just above * its limit temporarily, to help free resources as expected. */ - if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) + if (vms->end < vms->vma->vm_end && + vms->mm->map_count >= sysctl_max_map_count) goto map_count_exceeded; - error = __split_vma(vmi, vma, start, 1); + error = __split_vma(vms->vmi, vms->vma, vms->start, 1); if (error) goto start_split_failed; } @@ -2614,24 +2630,24 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, * Detach a range of VMAs from the mm. Using next as a temp variable as * it is always overwritten. */ - next = vma; + next = vms->vma; do { /* Does it split the end? */ - if (next->vm_end > end) { - error = __split_vma(vmi, next, end, 0); + if (next->vm_end > vms->end) { + error = __split_vma(vms->vmi, next, vms->end, 0); if (error) goto end_split_failed; } vma_start_write(next); - mas_set(mas_detach, count++); + mas_set(mas_detach, vms->vma_count++); if (next->vm_flags & VM_LOCKED) - *locked_vm += vma_pages(next); + vms->locked_vm += vma_pages(next); error = mas_store_gfp(mas_detach, next, GFP_KERNEL); if (error) goto munmap_gather_failed; vma_mark_detached(next, true); - if (unlikely(uf)) { + if (unlikely(vms->uf)) { /* * If userfaultfd_unmap_prep returns an error the vmas * will remain split, but userland will get a @@ -2641,16 +2657,17 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, * split, despite we could. This is unlikely enough * failure that it's not worth optimizing it for. */ - error = userfaultfd_unmap_prep(next, start, end, uf); + error = userfaultfd_unmap_prep(next, vms->start, + vms->end, vms->uf); if (error) goto userfaultfd_error; } #ifdef CONFIG_DEBUG_VM_MAPLE_TREE - BUG_ON(next->vm_start < start); - BUG_ON(next->vm_start > end); + BUG_ON(next->vm_start < vms->start); + BUG_ON(next->vm_start > vms->end); #endif - } for_each_vma_range(*vmi, next, end); + } for_each_vma_range(*(vms->vmi), next, vms->end); #if defined(CONFIG_DEBUG_VM_MAPLE_TREE) /* Make sure no VMAs are about to be lost. */ @@ -2659,21 +2676,21 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, struct vm_area_struct *vma_mas, *vma_test; int test_count = 0; - vma_iter_set(vmi, start); + vma_iter_set(vms->vmi, vms->start); rcu_read_lock(); - vma_test = mas_find(&test, count - 1); - for_each_vma_range(*vmi, vma_mas, end) { + vma_test = mas_find(&test, vms->vma_count - 1); + for_each_vma_range(*(vms->vmi), vma_mas, vms->end) { BUG_ON(vma_mas != vma_test); test_count++; - vma_test = mas_next(&test, count - 1); + vma_test = mas_next(&test, vms->vma_count - 1); } rcu_read_unlock(); - BUG_ON(count != test_count); + BUG_ON(vms->vma_count != test_count); } #endif - while (vma_iter_addr(vmi) > start) - vma_iter_prev_range(vmi); + while (vma_iter_addr(vms->vmi) > vms->start) + vma_iter_prev_range(vms->vmi); return 0; @@ -2686,38 +2703,44 @@ vmi_gather_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, return error; } -static void -vmi_complete_munmap_vmas(struct vma_iterator *vmi, struct vm_area_struct *vma, - struct mm_struct *mm, unsigned long start, - unsigned long end, bool unlock, struct ma_state *mas_detach, - unsigned long locked_vm) +/* + * vmi_complete_munmap_vmas() - Update mm counters, unlock if directed, and free + * all VMA resources. + * + * do_vmi_align_munmap() - munmap the aligned region from @start to @end. + * @vms: The vma munmap struct + * @mas_detach: The maple state of the detached vmas + * + */ +static void vms_complete_munmap_vmas(struct vma_munmap_struct *vms, + struct ma_state *mas_detach) { struct vm_area_struct *prev, *next; - int count; + struct mm_struct *mm; - count = mas_detach->index + 1; - mm->map_count -= count; - mm->locked_vm -= locked_vm; - if (unlock) + mm = vms->mm; + mm->map_count -= vms->vma_count; + mm->locked_vm -= vms->locked_vm; + if (vms->unlock) mmap_write_downgrade(mm); - prev = vma_iter_prev_range(vmi); - next = vma_next(vmi); + prev = vma_iter_prev_range(vms->vmi); + next = vma_next(vms->vmi); if (next) - vma_iter_prev_range(vmi); + vma_iter_prev_range(vms->vmi); /* * We can free page tables without write-locking mmap_lock because VMAs * were isolated before we downgraded mmap_lock. */ mas_set(mas_detach, 1); - unmap_region(mm, mas_detach, vma, prev, next, start, end, count, - !unlock); + unmap_region(mm, mas_detach, vms->vma, prev, next, vms->start, vms->end, + vms->vma_count, !vms->unlock); /* Statistics and freeing VMAs */ mas_set(mas_detach, 0); remove_mt(mm, mas_detach); validate_mm(mm); - if (unlock) + if (vms->unlock) mmap_read_unlock(mm); __mt_destroy(mas_detach->tree); @@ -2746,11 +2769,12 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, MA_STATE(mas_detach, &mt_detach, 0, 0); mt_init_flags(&mt_detach, vmi->mas.tree->ma_flags & MT_FLAGS_LOCK_MASK); mt_on_stack(mt_detach); + struct vma_munmap_struct vms; int error; - unsigned long locked_vm = 0; - error = vmi_gather_munmap_vmas(vmi, vma, mm, start, end, uf, - &mas_detach, &locked_vm); + init_vma_munmap(&vms, vmi, vma, start, end, uf, unlock); + + error = vms_gather_munmap_vmas(&vms, &mas_detach); if (error) goto gather_failed; @@ -2758,8 +2782,7 @@ do_vmi_align_munmap(struct vma_iterator *vmi, struct vm_area_struct *vma, if (error) goto clear_area_failed; - vmi_complete_munmap_vmas(vmi, vma, mm, start, end, unlock, &mas_detach, - locked_vm); + vms_complete_munmap_vmas(&vms, &mas_detach); return 0; clear_area_failed:
Use a structure to pass along all the necessary information and counters involved in removing vmas from the mm_struct. Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com> --- mm/internal.h | 16 ++++++ mm/mmap.c | 133 +++++++++++++++++++++++++++++--------------------- 2 files changed, 94 insertions(+), 55 deletions(-)