diff mbox series

[v3,4/6] mm: shmem: add mTHP support for anonymous shmem

Message ID ec35a23026dd016705d211e85163cabe07681516.1717033868.git.baolin.wang@linux.alibaba.com (mailing list archive)
State New
Headers show
Series add mTHP support for anonymous shmem | expand

Commit Message

Baolin Wang May 30, 2024, 2:04 a.m. UTC
Commit 19eaf44954df adds multi-size THP (mTHP) for anonymous pages, that
can allow THP to be configured through the sysfs interface located at
'/sys/kernel/mm/transparent_hugepage/hugepage-XXkb/enabled'.

However, the anonymous share pages will ignore the anonymous mTHP rule
configured through the sysfs interface, and can only use the PMD-mapped
THP, that is not reasonable. Users expect to apply the mTHP rule for
all anonymous pages, including the anonymous share pages, in order to
enjoy the benefits of mTHP. For example, lower latency than PMD-mapped THP,
smaller memory bloat than PMD-mapped THP, contiguous PTEs on ARM architecture
to reduce TLB miss etc.

The primary strategy is similar to supporting anonymous mTHP. Introduce
a new interface '/mm/transparent_hugepage/hugepage-XXkb/shmem_enabled',
which can have all the same values as the top-level
'/sys/kernel/mm/transparent_hugepage/shmem_enabled', with adding a new
additional "inherit" option. By default all sizes will be set to "never"
except PMD size, which is set to "inherit". This ensures backward compatibility
with the anonymous shmem enabled of the top level, meanwhile also allows
independent control of anonymous shmem enabled for each mTHP.

Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
---
 include/linux/huge_mm.h |  10 +++
 mm/shmem.c              | 187 +++++++++++++++++++++++++++++++++-------
 2 files changed, 167 insertions(+), 30 deletions(-)

Comments

kernel test robot May 30, 2024, 6:36 a.m. UTC | #1
Hi Baolin,

kernel test robot noticed the following build warnings:

[auto build test WARNING on akpm-mm/mm-everything]
[also build test WARNING on next-20240529]
[cannot apply to linus/master v6.10-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/mm-memory-extend-finish_fault-to-support-large-folio/20240530-100805
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/ec35a23026dd016705d211e85163cabe07681516.1717033868.git.baolin.wang%40linux.alibaba.com
patch subject: [PATCH v3 4/6] mm: shmem: add mTHP support for anonymous shmem
config: riscv-defconfig (https://download.01.org/0day-ci/archive/20240530/202405301430.0NRLTOWU-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project bafda89a0944d947fc4b3b5663185e07a397ac30)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240530/202405301430.0NRLTOWU-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202405301430.0NRLTOWU-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from mm/shmem.c:28:
   In file included from include/linux/ramfs.h:5:
   In file included from include/linux/fs_parser.h:11:
   In file included from include/linux/fs_context.h:14:
   In file included from include/linux/security.h:33:
   In file included from include/linux/mm.h:2245:
   include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
     514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
         |                               ~~~~~~~~~~~ ^ ~~~
>> mm/shmem.c:1748:14: warning: variable 'suitable_orders' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
    1748 |                 } else if (orders & BIT(HPAGE_PMD_ORDER)) {
         |                            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/shmem.c:1766:25: note: uninitialized use occurs here
    1766 |                 order = highest_order(suitable_orders);
         |                                       ^~~~~~~~~~~~~~~
   mm/shmem.c:1748:10: note: remove the 'if' if its condition is always true
    1748 |                 } else if (orders & BIT(HPAGE_PMD_ORDER)) {
         |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   mm/shmem.c:1736:31: note: initialize the variable 'suitable_orders' to silence this warning
    1736 |         unsigned long suitable_orders;
         |                                      ^
         |                                       = 0
   2 warnings generated.


vim +1748 mm/shmem.c

  1728	
  1729	static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
  1730			gfp_t gfp, struct inode *inode, pgoff_t index,
  1731			struct mm_struct *fault_mm, unsigned long orders)
  1732	{
  1733		struct address_space *mapping = inode->i_mapping;
  1734		struct shmem_inode_info *info = SHMEM_I(inode);
  1735		struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
  1736		unsigned long suitable_orders;
  1737		struct folio *folio = NULL;
  1738		long pages;
  1739		int error, order;
  1740	
  1741		if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
  1742			orders = 0;
  1743	
  1744		if (orders > 0) {
  1745			if (vma && vma_is_anon_shmem(vma)) {
  1746				suitable_orders = anon_shmem_suitable_orders(inode, vmf,
  1747								mapping, index, orders);
> 1748			} else if (orders & BIT(HPAGE_PMD_ORDER)) {
  1749				pages = HPAGE_PMD_NR;
  1750				suitable_orders = BIT(HPAGE_PMD_ORDER);
  1751				index = round_down(index, HPAGE_PMD_NR);
  1752	
  1753				/*
  1754				 * Check for conflict before waiting on a huge allocation.
  1755				 * Conflict might be that a huge page has just been allocated
  1756				 * and added to page cache by a racing thread, or that there
  1757				 * is already at least one small page in the huge extent.
  1758				 * Be careful to retry when appropriate, but not forever!
  1759				 * Elsewhere -EEXIST would be the right code, but not here.
  1760				 */
  1761				if (xa_find(&mapping->i_pages, &index,
  1762					    index + HPAGE_PMD_NR - 1, XA_PRESENT))
  1763					return ERR_PTR(-E2BIG);
  1764			}
  1765	
  1766			order = highest_order(suitable_orders);
  1767			while (suitable_orders) {
  1768				pages = 1UL << order;
  1769				index = round_down(index, pages);
  1770				folio = shmem_alloc_folio(gfp, order, info, index);
  1771				if (folio)
  1772					goto allocated;
  1773	
  1774				if (pages == HPAGE_PMD_NR)
  1775					count_vm_event(THP_FILE_FALLBACK);
  1776				order = next_order(&suitable_orders, order);
  1777			}
  1778		} else {
  1779			pages = 1;
  1780			folio = shmem_alloc_folio(gfp, 0, info, index);
  1781		}
  1782		if (!folio)
  1783			return ERR_PTR(-ENOMEM);
  1784	
  1785	allocated:
  1786		__folio_set_locked(folio);
  1787		__folio_set_swapbacked(folio);
  1788	
  1789		gfp &= GFP_RECLAIM_MASK;
  1790		error = mem_cgroup_charge(folio, fault_mm, gfp);
  1791		if (error) {
  1792			if (xa_find(&mapping->i_pages, &index,
  1793					index + pages - 1, XA_PRESENT)) {
  1794				error = -EEXIST;
  1795			} else if (pages == HPAGE_PMD_NR) {
  1796				count_vm_event(THP_FILE_FALLBACK);
  1797				count_vm_event(THP_FILE_FALLBACK_CHARGE);
  1798			}
  1799			goto unlock;
  1800		}
  1801	
  1802		error = shmem_add_to_page_cache(folio, mapping, index, NULL, gfp);
  1803		if (error)
  1804			goto unlock;
  1805	
  1806		error = shmem_inode_acct_blocks(inode, pages);
  1807		if (error) {
  1808			struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  1809			long freed;
  1810			/*
  1811			 * Try to reclaim some space by splitting a few
  1812			 * large folios beyond i_size on the filesystem.
  1813			 */
  1814			shmem_unused_huge_shrink(sbinfo, NULL, 2);
  1815			/*
  1816			 * And do a shmem_recalc_inode() to account for freed pages:
  1817			 * except our folio is there in cache, so not quite balanced.
  1818			 */
  1819			spin_lock(&info->lock);
  1820			freed = pages + info->alloced - info->swapped -
  1821				READ_ONCE(mapping->nrpages);
  1822			if (freed > 0)
  1823				info->alloced -= freed;
  1824			spin_unlock(&info->lock);
  1825			if (freed > 0)
  1826				shmem_inode_unacct_blocks(inode, freed);
  1827			error = shmem_inode_acct_blocks(inode, pages);
  1828			if (error) {
  1829				filemap_remove_folio(folio);
  1830				goto unlock;
  1831			}
  1832		}
  1833	
  1834		shmem_recalc_inode(inode, pages, 0);
  1835		folio_add_lru(folio);
  1836		return folio;
  1837	
  1838	unlock:
  1839		folio_unlock(folio);
  1840		folio_put(folio);
  1841		return ERR_PTR(error);
  1842	}
  1843
Baolin Wang June 2, 2024, 4:16 a.m. UTC | #2
On 2024/5/30 14:36, kernel test robot wrote:
> Hi Baolin,
> 
> kernel test robot noticed the following build warnings:
> 
> [auto build test WARNING on akpm-mm/mm-everything]
> [also build test WARNING on next-20240529]
> [cannot apply to linus/master v6.10-rc1]
> [If your patch is applied to the wrong git tree, kindly drop us a note.
> And when submitting patch, we suggest to use '--base' as documented in
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/mm-memory-extend-finish_fault-to-support-large-folio/20240530-100805
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link:    https://lore.kernel.org/r/ec35a23026dd016705d211e85163cabe07681516.1717033868.git.baolin.wang%40linux.alibaba.com
> patch subject: [PATCH v3 4/6] mm: shmem: add mTHP support for anonymous shmem
> config: riscv-defconfig (https://download.01.org/0day-ci/archive/20240530/202405301430.0NRLTOWU-lkp@intel.com/config)
> compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project bafda89a0944d947fc4b3b5663185e07a397ac30)
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240530/202405301430.0NRLTOWU-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202405301430.0NRLTOWU-lkp@intel.com/
> 
> All warnings (new ones prefixed by >>):
> 
>     In file included from mm/shmem.c:28:
>     In file included from include/linux/ramfs.h:5:
>     In file included from include/linux/fs_parser.h:11:
>     In file included from include/linux/fs_context.h:14:
>     In file included from include/linux/security.h:33:
>     In file included from include/linux/mm.h:2245:
>     include/linux/vmstat.h:514:36: warning: arithmetic between different enumeration types ('enum node_stat_item' and 'enum lru_list') [-Wenum-enum-conversion]
>       514 |         return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_"
>           |                               ~~~~~~~~~~~ ^ ~~~
>>> mm/shmem.c:1748:14: warning: variable 'suitable_orders' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized]
>      1748 |                 } else if (orders & BIT(HPAGE_PMD_ORDER)) {
>           |                            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>     mm/shmem.c:1766:25: note: uninitialized use occurs here
>      1766 |                 order = highest_order(suitable_orders);
>           |                                       ^~~~~~~~~~~~~~~
>     mm/shmem.c:1748:10: note: remove the 'if' if its condition is always true
>      1748 |                 } else if (orders & BIT(HPAGE_PMD_ORDER)) {
>           |                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>     mm/shmem.c:1736:31: note: initialize the variable 'suitable_orders' to silence this warning
>      1736 |         unsigned long suitable_orders;
>           |                                      ^
>           |                                       = 0
>     2 warnings generated.

Thanks for reporting. Will fix the warning in next version.
Dan Carpenter June 4, 2024, 9:23 a.m. UTC | #3
Hi Baolin,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/mm-memory-extend-finish_fault-to-support-large-folio/20240530-100805
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/ec35a23026dd016705d211e85163cabe07681516.1717033868.git.baolin.wang%40linux.alibaba.com
patch subject: [PATCH v3 4/6] mm: shmem: add mTHP support for anonymous shmem
config: powerpc64-randconfig-r071-20240531 (https://download.01.org/0day-ci/archive/20240602/202406020203.14sT311e-lkp@intel.com/config)
compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project bafda89a0944d947fc4b3b5663185e07a397ac30)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202406020203.14sT311e-lkp@intel.com/

smatch warnings:
mm/shmem.c:1766 shmem_alloc_and_add_folio() error: uninitialized symbol 'suitable_orders'.

vim +/suitable_orders +1766 mm/shmem.c

ededbc2c2f28a1 Baolin Wang             2024-05-30  1729  static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
ededbc2c2f28a1 Baolin Wang             2024-05-30  1730  		gfp_t gfp, struct inode *inode, pgoff_t index,
ededbc2c2f28a1 Baolin Wang             2024-05-30  1731  		struct mm_struct *fault_mm, unsigned long orders)
800d8c63b2e989 Kirill A. Shutemov      2016-07-26  1732  {
3022fd7af9604d Hugh Dickins            2023-09-29  1733  	struct address_space *mapping = inode->i_mapping;
0f0796945614b7 Mike Rapoport           2017-09-06  1734  	struct shmem_inode_info *info = SHMEM_I(inode);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1735  	struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
ededbc2c2f28a1 Baolin Wang             2024-05-30  1736  	unsigned long suitable_orders;
ededbc2c2f28a1 Baolin Wang             2024-05-30  1737  	struct folio *folio = NULL;
3022fd7af9604d Hugh Dickins            2023-09-29  1738  	long pages;
ededbc2c2f28a1 Baolin Wang             2024-05-30  1739  	int error, order;
800d8c63b2e989 Kirill A. Shutemov      2016-07-26  1740  
396bcc5299c281 Matthew Wilcox (Oracle  2020-04-06  1741) 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
ededbc2c2f28a1 Baolin Wang             2024-05-30  1742  		orders = 0;
800d8c63b2e989 Kirill A. Shutemov      2016-07-26  1743  
ededbc2c2f28a1 Baolin Wang             2024-05-30  1744  	if (orders > 0) {
ededbc2c2f28a1 Baolin Wang             2024-05-30  1745  		if (vma && vma_is_anon_shmem(vma)) {
ededbc2c2f28a1 Baolin Wang             2024-05-30  1746  			suitable_orders = anon_shmem_suitable_orders(inode, vmf,
ededbc2c2f28a1 Baolin Wang             2024-05-30  1747  							mapping, index, orders);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1748  		} else if (orders & BIT(HPAGE_PMD_ORDER)) {
3022fd7af9604d Hugh Dickins            2023-09-29  1749  			pages = HPAGE_PMD_NR;
ededbc2c2f28a1 Baolin Wang             2024-05-30  1750  			suitable_orders = BIT(HPAGE_PMD_ORDER);
3022fd7af9604d Hugh Dickins            2023-09-29  1751  			index = round_down(index, HPAGE_PMD_NR);
3022fd7af9604d Hugh Dickins            2023-09-29  1752  
3022fd7af9604d Hugh Dickins            2023-09-29  1753  			/*
3022fd7af9604d Hugh Dickins            2023-09-29  1754  			 * Check for conflict before waiting on a huge allocation.
3022fd7af9604d Hugh Dickins            2023-09-29  1755  			 * Conflict might be that a huge page has just been allocated
3022fd7af9604d Hugh Dickins            2023-09-29  1756  			 * and added to page cache by a racing thread, or that there
3022fd7af9604d Hugh Dickins            2023-09-29  1757  			 * is already at least one small page in the huge extent.
3022fd7af9604d Hugh Dickins            2023-09-29  1758  			 * Be careful to retry when appropriate, but not forever!
3022fd7af9604d Hugh Dickins            2023-09-29  1759  			 * Elsewhere -EEXIST would be the right code, but not here.
3022fd7af9604d Hugh Dickins            2023-09-29  1760  			 */
3022fd7af9604d Hugh Dickins            2023-09-29  1761  			if (xa_find(&mapping->i_pages, &index,
3022fd7af9604d Hugh Dickins            2023-09-29  1762  				    index + HPAGE_PMD_NR - 1, XA_PRESENT))
3022fd7af9604d Hugh Dickins            2023-09-29  1763  				return ERR_PTR(-E2BIG);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1764  		}

suitable_orders uninitialized on else path.

52cd3b074050dd Lee Schermerhorn        2008-04-28  1765  
ededbc2c2f28a1 Baolin Wang             2024-05-30 @1766  		order = highest_order(suitable_orders);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1767  		while (suitable_orders) {
ededbc2c2f28a1 Baolin Wang             2024-05-30  1768  			pages = 1UL << order;
ededbc2c2f28a1 Baolin Wang             2024-05-30  1769  			index = round_down(index, pages);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1770  			folio = shmem_alloc_folio(gfp, order, info, index);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1771  			if (folio)
ededbc2c2f28a1 Baolin Wang             2024-05-30  1772  				goto allocated;
ededbc2c2f28a1 Baolin Wang             2024-05-30  1773  
ededbc2c2f28a1 Baolin Wang             2024-05-30  1774  			if (pages == HPAGE_PMD_NR)
3022fd7af9604d Hugh Dickins            2023-09-29  1775  				count_vm_event(THP_FILE_FALLBACK);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1776  			order = next_order(&suitable_orders, order);
ededbc2c2f28a1 Baolin Wang             2024-05-30  1777  		}
3022fd7af9604d Hugh Dickins            2023-09-29  1778  	} else {
3022fd7af9604d Hugh Dickins            2023-09-29  1779  		pages = 1;
Baolin Wang June 4, 2024, 9:46 a.m. UTC | #4
On 2024/6/4 17:23, Dan Carpenter wrote:
> Hi Baolin,
> 
> kernel test robot noticed the following build warnings:
> 
> https://git-scm.com/docs/git-format-patch#_base_tree_information]
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/Baolin-Wang/mm-memory-extend-finish_fault-to-support-large-folio/20240530-100805
> base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
> patch link:    https://lore.kernel.org/r/ec35a23026dd016705d211e85163cabe07681516.1717033868.git.baolin.wang%40linux.alibaba.com
> patch subject: [PATCH v3 4/6] mm: shmem: add mTHP support for anonymous shmem
> config: powerpc64-randconfig-r071-20240531 (https://download.01.org/0day-ci/archive/20240602/202406020203.14sT311e-lkp@intel.com/config)
> compiler: clang version 19.0.0git (https://github.com/llvm/llvm-project bafda89a0944d947fc4b3b5663185e07a397ac30)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
> | Closes: https://lore.kernel.org/r/202406020203.14sT311e-lkp@intel.com/
> 
> smatch warnings:
> mm/shmem.c:1766 shmem_alloc_and_add_folio() error: uninitialized symbol 'suitable_orders'.

Thanks Dan. LKP also reported this warning [1]. Will fix it.

[1] https://lore.kernel.org/all/202405301430.0NRLTOWU-lkp@intel.com/
diff mbox series

Patch

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index fac21548c5de..909cfc67521d 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -575,6 +575,16 @@  static inline bool thp_migration_supported(void)
 {
 	return false;
 }
+
+static inline int highest_order(unsigned long orders)
+{
+	return 0;
+}
+
+static inline int next_order(unsigned long *orders, int prev)
+{
+	return 0;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 static inline int split_folio_to_list_to_order(struct folio *folio,
diff --git a/mm/shmem.c b/mm/shmem.c
index d5ab5e211100..493873d7246c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1611,6 +1611,107 @@  static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
 	return result;
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
+				struct vm_area_struct *vma, pgoff_t index,
+				bool global_huge)
+{
+	unsigned long mask = READ_ONCE(huge_anon_shmem_orders_always);
+	unsigned long within_size_orders = READ_ONCE(huge_anon_shmem_orders_within_size);
+	unsigned long vm_flags = vma->vm_flags;
+	/*
+	 * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that
+	 * are enabled for this vma.
+	 */
+	unsigned long orders = BIT(PMD_ORDER + 1) - 1;
+	loff_t i_size;
+	int order;
+
+	if ((vm_flags & VM_NOHUGEPAGE) ||
+	    test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
+		return 0;
+
+	/* If the hardware/firmware marked hugepage support disabled. */
+	if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_UNSUPPORTED))
+		return 0;
+
+	/*
+	 * Following the 'deny' semantics of the top level, force the huge
+	 * option off from all mounts.
+	 */
+	if (shmem_huge == SHMEM_HUGE_DENY)
+		return 0;
+
+	/*
+	 * Only allow inherit orders if the top-level value is 'force', which
+	 * means non-PMD sized THP can not override 'huge' mount option now.
+	 */
+	if (shmem_huge == SHMEM_HUGE_FORCE)
+		return READ_ONCE(huge_anon_shmem_orders_inherit);
+
+	/* Allow mTHP that will be fully within i_size. */
+	order = highest_order(within_size_orders);
+	while (within_size_orders) {
+		index = round_up(index + 1, order);
+		i_size = round_up(i_size_read(inode), PAGE_SIZE);
+		if (i_size >> PAGE_SHIFT >= index) {
+			mask |= within_size_orders;
+			break;
+		}
+
+		order = next_order(&within_size_orders, order);
+	}
+
+	if (vm_flags & VM_HUGEPAGE)
+		mask |= READ_ONCE(huge_anon_shmem_orders_madvise);
+
+	if (global_huge)
+		mask |= READ_ONCE(huge_anon_shmem_orders_inherit);
+
+	return orders & mask;
+}
+
+static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
+					struct address_space *mapping, pgoff_t index,
+					unsigned long orders)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	unsigned long pages;
+	int order;
+
+	orders = thp_vma_suitable_orders(vma, vmf->address, orders);
+	if (!orders)
+		return 0;
+
+	/* Find the highest order that can add into the page cache */
+	order = highest_order(orders);
+	while (orders) {
+		pages = 1UL << order;
+		index = round_down(index, pages);
+		if (!xa_find(&mapping->i_pages, &index,
+			     index + pages - 1, XA_PRESENT))
+			break;
+		order = next_order(&orders, order);
+	}
+
+	return orders;
+}
+#else
+static unsigned long anon_shmem_allowable_huge_orders(struct inode *inode,
+				struct vm_area_struct *vma, pgoff_t index,
+				bool global_huge)
+{
+	return 0;
+}
+
+static unsigned long anon_shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf,
+					struct address_space *mapping, pgoff_t index,
+					unsigned long orders)
+{
+	return 0;
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
 static struct folio *shmem_alloc_folio(gfp_t gfp, int order,
 		struct shmem_inode_info *info, pgoff_t index)
 {
@@ -1625,38 +1726,55 @@  static struct folio *shmem_alloc_folio(gfp_t gfp, int order,
 	return folio;
 }
 
-static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
-		struct inode *inode, pgoff_t index,
-		struct mm_struct *fault_mm, bool huge)
+static struct folio *shmem_alloc_and_add_folio(struct vm_fault *vmf,
+		gfp_t gfp, struct inode *inode, pgoff_t index,
+		struct mm_struct *fault_mm, unsigned long orders)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct shmem_inode_info *info = SHMEM_I(inode);
-	struct folio *folio;
+	struct vm_area_struct *vma = vmf ? vmf->vma : NULL;
+	unsigned long suitable_orders;
+	struct folio *folio = NULL;
 	long pages;
-	int error;
+	int error, order;
 
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
-		huge = false;
+		orders = 0;
 
-	if (huge) {
-		pages = HPAGE_PMD_NR;
-		index = round_down(index, HPAGE_PMD_NR);
+	if (orders > 0) {
+		if (vma && vma_is_anon_shmem(vma)) {
+			suitable_orders = anon_shmem_suitable_orders(inode, vmf,
+							mapping, index, orders);
+		} else if (orders & BIT(HPAGE_PMD_ORDER)) {
+			pages = HPAGE_PMD_NR;
+			suitable_orders = BIT(HPAGE_PMD_ORDER);
+			index = round_down(index, HPAGE_PMD_NR);
 
-		/*
-		 * Check for conflict before waiting on a huge allocation.
-		 * Conflict might be that a huge page has just been allocated
-		 * and added to page cache by a racing thread, or that there
-		 * is already at least one small page in the huge extent.
-		 * Be careful to retry when appropriate, but not forever!
-		 * Elsewhere -EEXIST would be the right code, but not here.
-		 */
-		if (xa_find(&mapping->i_pages, &index,
-				index + HPAGE_PMD_NR - 1, XA_PRESENT))
-			return ERR_PTR(-E2BIG);
+			/*
+			 * Check for conflict before waiting on a huge allocation.
+			 * Conflict might be that a huge page has just been allocated
+			 * and added to page cache by a racing thread, or that there
+			 * is already at least one small page in the huge extent.
+			 * Be careful to retry when appropriate, but not forever!
+			 * Elsewhere -EEXIST would be the right code, but not here.
+			 */
+			if (xa_find(&mapping->i_pages, &index,
+				    index + HPAGE_PMD_NR - 1, XA_PRESENT))
+				return ERR_PTR(-E2BIG);
+		}
 
-		folio = shmem_alloc_folio(gfp, HPAGE_PMD_ORDER, info, index);
-		if (!folio && pages == HPAGE_PMD_NR)
-			count_vm_event(THP_FILE_FALLBACK);
+		order = highest_order(suitable_orders);
+		while (suitable_orders) {
+			pages = 1UL << order;
+			index = round_down(index, pages);
+			folio = shmem_alloc_folio(gfp, order, info, index);
+			if (folio)
+				goto allocated;
+
+			if (pages == HPAGE_PMD_NR)
+				count_vm_event(THP_FILE_FALLBACK);
+			order = next_order(&suitable_orders, order);
+		}
 	} else {
 		pages = 1;
 		folio = shmem_alloc_folio(gfp, 0, info, index);
@@ -1664,6 +1782,7 @@  static struct folio *shmem_alloc_and_add_folio(gfp_t gfp,
 	if (!folio)
 		return ERR_PTR(-ENOMEM);
 
+allocated:
 	__folio_set_locked(folio);
 	__folio_set_swapbacked(folio);
 
@@ -1958,7 +2077,8 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 	struct mm_struct *fault_mm;
 	struct folio *folio;
 	int error;
-	bool alloced;
+	bool alloced, huge;
+	unsigned long orders = 0;
 
 	if (WARN_ON_ONCE(!shmem_mapping(inode->i_mapping)))
 		return -EINVAL;
@@ -2030,14 +2150,21 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 		return 0;
 	}
 
-	if (shmem_is_huge(inode, index, false, fault_mm,
-			  vma ? vma->vm_flags : 0)) {
+	huge = shmem_is_huge(inode, index, false, fault_mm,
+			     vma ? vma->vm_flags : 0);
+	/* Find hugepage orders that are allowed for anonymous shmem. */
+	if (vma && vma_is_anon_shmem(vma))
+		orders = anon_shmem_allowable_huge_orders(inode, vma, index, huge);
+	else if (huge)
+		orders = BIT(HPAGE_PMD_ORDER);
+
+	if (orders > 0) {
 		gfp_t huge_gfp;
 
 		huge_gfp = vma_thp_gfp_mask(vma);
 		huge_gfp = limit_gfp_mask(huge_gfp, gfp);
-		folio = shmem_alloc_and_add_folio(huge_gfp,
-				inode, index, fault_mm, true);
+		folio = shmem_alloc_and_add_folio(vmf, huge_gfp,
+				inode, index, fault_mm, orders);
 		if (!IS_ERR(folio)) {
 			if (folio_test_pmd_mappable(folio))
 				count_vm_event(THP_FILE_ALLOC);
@@ -2047,7 +2174,7 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 			goto repeat;
 	}
 
-	folio = shmem_alloc_and_add_folio(gfp, inode, index, fault_mm, false);
+	folio = shmem_alloc_and_add_folio(vmf, gfp, inode, index, fault_mm, 0);
 	if (IS_ERR(folio)) {
 		error = PTR_ERR(folio);
 		if (error == -EEXIST)
@@ -2058,7 +2185,7 @@  static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 
 alloced:
 	alloced = true;
-	if (folio_test_pmd_mappable(folio) &&
+	if (folio_test_large(folio) &&
 	    DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE) <
 					folio_next_index(folio) - 1) {
 		struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);