Message ID | 20220907144521.3115321-2-zokeefe@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | mm: add file/shmem support to MADV_COLLAPSE | expand |
On Wed, Sep 7, 2022 at 7:45 AM Zach O'Keefe <zokeefe@google.com> wrote: > > Extend 'mm/thp: add flag to enforce sysfs THP in > hugepage_vma_check()' to shmem, allowing callers to ignore > /sys/kernel/transparent_hugepage/shmem_enabled and tmpfs huge= mount. > > This is intended to be used by MADV_COLLAPSE, and the rationale is > analogous to the anon/file case: MADV_COLLAPSE is not coupled to > directives that advise the kernel's decisions on when THPs should be > considered eligible. shmem/tmpfs always claims large folio support, > regardless of sysfs or mount options. > > Signed-off-by: Zach O'Keefe <zokeefe@google.com> Reviewed-by: Yang Shi <shy828301@gmail.com> A nit below... > --- > include/linux/shmem_fs.h | 10 ++++++---- > mm/huge_memory.c | 2 +- > mm/shmem.c | 18 +++++++++--------- > 3 files changed, 16 insertions(+), 14 deletions(-) > > diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h > index f24071e3c826..d500ea967dc7 100644 > --- a/include/linux/shmem_fs.h > +++ b/include/linux/shmem_fs.h > @@ -92,11 +92,13 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, > extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); > int shmem_unuse(unsigned int type); > > -extern bool shmem_is_huge(struct vm_area_struct *vma, > - struct inode *inode, pgoff_t index); > -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) > +extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, > + pgoff_t index, bool shmem_huge_force); > +static inline bool shmem_huge_enabled(struct vm_area_struct *vma, > + bool shmem_huge_force) > { > - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff); > + return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, > + shmem_huge_force); > } > extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); > extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 7fa74b9749a6..53d170dac332 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -119,7 +119,7 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, > * own flags. > */ > if (!in_pf && shmem_file(vma->vm_file)) > - return shmem_huge_enabled(vma); > + return shmem_huge_enabled(vma, !enforce_sysfs); > > /* Enforce sysfs THP requirements as necessary */ > if (enforce_sysfs && > diff --git a/mm/shmem.c b/mm/shmem.c > index 99b7341bd0bf..47c42c566fd1 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -461,20 +461,20 @@ static bool shmem_confirm_swap(struct address_space *mapping, > > static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; > > -bool shmem_is_huge(struct vm_area_struct *vma, > - struct inode *inode, pgoff_t index) > +bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, > + pgoff_t index, bool shmem_huge_force) > { > loff_t i_size; > > if (!S_ISREG(inode->i_mode)) > return false; > - if (shmem_huge == SHMEM_HUGE_DENY) > - return false; > if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) || > test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) > return false; > - if (shmem_huge == SHMEM_HUGE_FORCE) > + if (shmem_huge == SHMEM_HUGE_FORCE || shmem_huge_force) shmem_huge_force means ignore all sysfs and mount options, so it seems better to have it test explicitly IMHO, like: if (shmem_huge_force) return true; if (shmem_huge == SHMEM_HUGE_FORCE) return true; > return true; > + if (shmem_huge == SHMEM_HUGE_DENY) > + return false; > > switch (SHMEM_SB(inode->i_sb)->huge) { > case SHMEM_HUGE_ALWAYS: > @@ -669,8 +669,8 @@ static long shmem_unused_huge_count(struct super_block *sb, > > #define shmem_huge SHMEM_HUGE_DENY > > -bool shmem_is_huge(struct vm_area_struct *vma, > - struct inode *inode, pgoff_t index) > +bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, > + pgoff_t index, bool shmem_huge_force) > { > return false; > } > @@ -1056,7 +1056,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns, > STATX_ATTR_NODUMP); > generic_fillattr(&init_user_ns, inode, stat); > > - if (shmem_is_huge(NULL, inode, 0)) > + if (shmem_is_huge(NULL, inode, 0, false)) > stat->blksize = HPAGE_PMD_SIZE; > > if (request_mask & STATX_BTIME) { > @@ -1888,7 +1888,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, > return 0; > } > > - if (!shmem_is_huge(vma, inode, index)) > + if (!shmem_is_huge(vma, inode, index, false)) > goto alloc_nohuge; > > huge_gfp = vma_thp_gfp_mask(vma); > -- > 2.37.2.789.g6183377224-goog >
On Sep 16 10:46, Yang Shi wrote: > On Wed, Sep 7, 2022 at 7:45 AM Zach O'Keefe <zokeefe@google.com> wrote: > > > > Extend 'mm/thp: add flag to enforce sysfs THP in > > hugepage_vma_check()' to shmem, allowing callers to ignore > > /sys/kernel/transparent_hugepage/shmem_enabled and tmpfs huge= mount. > > > > This is intended to be used by MADV_COLLAPSE, and the rationale is > > analogous to the anon/file case: MADV_COLLAPSE is not coupled to > > directives that advise the kernel's decisions on when THPs should be > > considered eligible. shmem/tmpfs always claims large folio support, > > regardless of sysfs or mount options. > > > > Signed-off-by: Zach O'Keefe <zokeefe@google.com> > > Reviewed-by: Yang Shi <shy828301@gmail.com> > > A nit below... > Hey Yang, Thanks for taking the time as always :) > > --- > > include/linux/shmem_fs.h | 10 ++++++---- > > mm/huge_memory.c | 2 +- > > mm/shmem.c | 18 +++++++++--------- > > 3 files changed, 16 insertions(+), 14 deletions(-) > > > > diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h > > index f24071e3c826..d500ea967dc7 100644 > > --- a/include/linux/shmem_fs.h > > +++ b/include/linux/shmem_fs.h > > @@ -92,11 +92,13 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, > > extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); > > int shmem_unuse(unsigned int type); > > > > -extern bool shmem_is_huge(struct vm_area_struct *vma, > > - struct inode *inode, pgoff_t index); > > -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) > > +extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, > > + pgoff_t index, bool shmem_huge_force); > > +static inline bool shmem_huge_enabled(struct vm_area_struct *vma, > > + bool shmem_huge_force) > > { > > - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff); > > + return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, > > + shmem_huge_force); > > } > > extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); > > extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > > index 7fa74b9749a6..53d170dac332 100644 > > --- a/mm/huge_memory.c > > +++ b/mm/huge_memory.c > > @@ -119,7 +119,7 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, > > * own flags. > > */ > > if (!in_pf && shmem_file(vma->vm_file)) > > - return shmem_huge_enabled(vma); > > + return shmem_huge_enabled(vma, !enforce_sysfs); > > > > /* Enforce sysfs THP requirements as necessary */ > > if (enforce_sysfs && > > diff --git a/mm/shmem.c b/mm/shmem.c > > index 99b7341bd0bf..47c42c566fd1 100644 > > --- a/mm/shmem.c > > +++ b/mm/shmem.c > > @@ -461,20 +461,20 @@ static bool shmem_confirm_swap(struct address_space *mapping, > > > > static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; > > > > -bool shmem_is_huge(struct vm_area_struct *vma, > > - struct inode *inode, pgoff_t index) > > +bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, > > + pgoff_t index, bool shmem_huge_force) > > { > > loff_t i_size; > > > > if (!S_ISREG(inode->i_mode)) > > return false; > > - if (shmem_huge == SHMEM_HUGE_DENY) > > - return false; > > if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) || > > test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) > > return false; > > - if (shmem_huge == SHMEM_HUGE_FORCE) > > + if (shmem_huge == SHMEM_HUGE_FORCE || shmem_huge_force) > > shmem_huge_force means ignore all sysfs and mount options, so it seems > better to have it test explicitly IMHO, like: > > if (shmem_huge_force) > return true; > > if (shmem_huge == SHMEM_HUGE_FORCE) > return true; > > This makes sense to me - a little bit cleaner / more direct. Thanks for the suggestion. Thank you again, Zach > > return true; > > + if (shmem_huge == SHMEM_HUGE_DENY) > > + return false; > > > > switch (SHMEM_SB(inode->i_sb)->huge) { > > case SHMEM_HUGE_ALWAYS: > > @@ -669,8 +669,8 @@ static long shmem_unused_huge_count(struct super_block *sb, > > > > #define shmem_huge SHMEM_HUGE_DENY > > > > -bool shmem_is_huge(struct vm_area_struct *vma, > > - struct inode *inode, pgoff_t index) > > +bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, > > + pgoff_t index, bool shmem_huge_force) > > { > > return false; > > } > > @@ -1056,7 +1056,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns, > > STATX_ATTR_NODUMP); > > generic_fillattr(&init_user_ns, inode, stat); > > > > - if (shmem_is_huge(NULL, inode, 0)) > > + if (shmem_is_huge(NULL, inode, 0, false)) > > stat->blksize = HPAGE_PMD_SIZE; > > > > if (request_mask & STATX_BTIME) { > > @@ -1888,7 +1888,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, > > return 0; > > } > > > > - if (!shmem_is_huge(vma, inode, index)) > > + if (!shmem_is_huge(vma, inode, index, false)) > > goto alloc_nohuge; > > > > huge_gfp = vma_thp_gfp_mask(vma); > > -- > > 2.37.2.789.g6183377224-goog > >
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index f24071e3c826..d500ea967dc7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -92,11 +92,13 @@ extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); -extern bool shmem_is_huge(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index); -static inline bool shmem_huge_enabled(struct vm_area_struct *vma) +extern bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, + pgoff_t index, bool shmem_huge_force); +static inline bool shmem_huge_enabled(struct vm_area_struct *vma, + bool shmem_huge_force) { - return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff); + return shmem_is_huge(vma, file_inode(vma->vm_file), vma->vm_pgoff, + shmem_huge_force); } extern unsigned long shmem_swap_usage(struct vm_area_struct *vma); extern unsigned long shmem_partial_swap_usage(struct address_space *mapping, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7fa74b9749a6..53d170dac332 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -119,7 +119,7 @@ bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags, * own flags. */ if (!in_pf && shmem_file(vma->vm_file)) - return shmem_huge_enabled(vma); + return shmem_huge_enabled(vma, !enforce_sysfs); /* Enforce sysfs THP requirements as necessary */ if (enforce_sysfs && diff --git a/mm/shmem.c b/mm/shmem.c index 99b7341bd0bf..47c42c566fd1 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -461,20 +461,20 @@ static bool shmem_confirm_swap(struct address_space *mapping, static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER; -bool shmem_is_huge(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index) +bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, + pgoff_t index, bool shmem_huge_force) { loff_t i_size; if (!S_ISREG(inode->i_mode)) return false; - if (shmem_huge == SHMEM_HUGE_DENY) - return false; if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))) return false; - if (shmem_huge == SHMEM_HUGE_FORCE) + if (shmem_huge == SHMEM_HUGE_FORCE || shmem_huge_force) return true; + if (shmem_huge == SHMEM_HUGE_DENY) + return false; switch (SHMEM_SB(inode->i_sb)->huge) { case SHMEM_HUGE_ALWAYS: @@ -669,8 +669,8 @@ static long shmem_unused_huge_count(struct super_block *sb, #define shmem_huge SHMEM_HUGE_DENY -bool shmem_is_huge(struct vm_area_struct *vma, - struct inode *inode, pgoff_t index) +bool shmem_is_huge(struct vm_area_struct *vma, struct inode *inode, + pgoff_t index, bool shmem_huge_force) { return false; } @@ -1056,7 +1056,7 @@ static int shmem_getattr(struct user_namespace *mnt_userns, STATX_ATTR_NODUMP); generic_fillattr(&init_user_ns, inode, stat); - if (shmem_is_huge(NULL, inode, 0)) + if (shmem_is_huge(NULL, inode, 0, false)) stat->blksize = HPAGE_PMD_SIZE; if (request_mask & STATX_BTIME) { @@ -1888,7 +1888,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, return 0; } - if (!shmem_is_huge(vma, inode, index)) + if (!shmem_is_huge(vma, inode, index, false)) goto alloc_nohuge; huge_gfp = vma_thp_gfp_mask(vma);
Extend 'mm/thp: add flag to enforce sysfs THP in hugepage_vma_check()' to shmem, allowing callers to ignore /sys/kernel/transparent_hugepage/shmem_enabled and tmpfs huge= mount. This is intended to be used by MADV_COLLAPSE, and the rationale is analogous to the anon/file case: MADV_COLLAPSE is not coupled to directives that advise the kernel's decisions on when THPs should be considered eligible. shmem/tmpfs always claims large folio support, regardless of sysfs or mount options. Signed-off-by: Zach O'Keefe <zokeefe@google.com> --- include/linux/shmem_fs.h | 10 ++++++---- mm/huge_memory.c | 2 +- mm/shmem.c | 18 +++++++++--------- 3 files changed, 16 insertions(+), 14 deletions(-)