diff mbox series

[RFC,v1,06/57] mm: Remove PAGE_SIZE compile-time constant assumption

Message ID 20241014105912.3207374-6-ryan.roberts@arm.com (mailing list archive)
State New, archived
Headers show
Series Boot-time page size selection for arm64 | expand

Commit Message

Ryan Roberts Oct. 14, 2024, 10:58 a.m. UTC
To prepare for supporting boot-time page size selection, refactor code
to remove assumptions about PAGE_SIZE being compile-time constant. Code
intended to be equivalent when compile-time page size is active.

Refactor "struct vmap_block" to use a flexible array for used_mmap since
VMAP_BBMAP_BITS is not a compile time constant for the boot-time page
size case.

Update various BUILD_BUG_ON() instances to check against appropriate
page size limit.

Re-define "union swap_header" so that it's no longer exactly page-sized.
Instead define a flexible "magic" array with a define which tells the
offset to where the magic signature begins.

Consider page size limit in some CPP condditionals.

Wrap global variables that are initialized with PAGE_SIZE derived values
using DEFINE_GLOBAL_PAGE_SIZE_VAR() so their initialization can be
deferred for boot-time page size builds.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---

***NOTE***
Any confused maintainers may want to read the cover note here for context:
https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/

 drivers/mtd/mtdswap.c         |  4 ++--
 include/linux/mm.h            |  2 +-
 include/linux/mm_types_task.h |  2 +-
 include/linux/mmzone.h        |  3 ++-
 include/linux/slab.h          |  7 ++++---
 include/linux/swap.h          | 17 ++++++++++++-----
 include/linux/swapops.h       |  6 +++++-
 mm/memcontrol.c               |  2 +-
 mm/memory.c                   |  4 ++--
 mm/mmap.c                     |  2 +-
 mm/page-writeback.c           |  2 +-
 mm/slub.c                     |  2 +-
 mm/sparse.c                   |  2 +-
 mm/swapfile.c                 |  2 +-
 mm/vmalloc.c                  |  7 ++++---
 15 files changed, 39 insertions(+), 25 deletions(-)

Comments

Ryan Roberts Oct. 16, 2024, 2:37 p.m. UTC | #1
+ Matthew Wilcox

This was a rather tricky series to get the recipients correct for and my script
did not realize that "supporter" was a pseudonym for "maintainer" so you were
missed off the original post. Appologies!

More context in cover letter:
https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/


On 14/10/2024 11:58, Ryan Roberts wrote:
> To prepare for supporting boot-time page size selection, refactor code
> to remove assumptions about PAGE_SIZE being compile-time constant. Code
> intended to be equivalent when compile-time page size is active.
> 
> Refactor "struct vmap_block" to use a flexible array for used_mmap since
> VMAP_BBMAP_BITS is not a compile time constant for the boot-time page
> size case.
> 
> Update various BUILD_BUG_ON() instances to check against appropriate
> page size limit.
> 
> Re-define "union swap_header" so that it's no longer exactly page-sized.
> Instead define a flexible "magic" array with a define which tells the
> offset to where the magic signature begins.
> 
> Consider page size limit in some CPP condditionals.
> 
> Wrap global variables that are initialized with PAGE_SIZE derived values
> using DEFINE_GLOBAL_PAGE_SIZE_VAR() so their initialization can be
> deferred for boot-time page size builds.
> 
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
> 
> ***NOTE***
> Any confused maintainers may want to read the cover note here for context:
> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
> 
>  drivers/mtd/mtdswap.c         |  4 ++--
>  include/linux/mm.h            |  2 +-
>  include/linux/mm_types_task.h |  2 +-
>  include/linux/mmzone.h        |  3 ++-
>  include/linux/slab.h          |  7 ++++---
>  include/linux/swap.h          | 17 ++++++++++++-----
>  include/linux/swapops.h       |  6 +++++-
>  mm/memcontrol.c               |  2 +-
>  mm/memory.c                   |  4 ++--
>  mm/mmap.c                     |  2 +-
>  mm/page-writeback.c           |  2 +-
>  mm/slub.c                     |  2 +-
>  mm/sparse.c                   |  2 +-
>  mm/swapfile.c                 |  2 +-
>  mm/vmalloc.c                  |  7 ++++---
>  15 files changed, 39 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
> index 680366616da24..7412a32708114 100644
> --- a/drivers/mtd/mtdswap.c
> +++ b/drivers/mtd/mtdswap.c
> @@ -1062,13 +1062,13 @@ static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
>  {
>  	union swap_header *hd = (union swap_header *)(buf);
>  
> -	memset(buf, 0, PAGE_SIZE - 10);
> +	memset(buf, 0, SWAP_HEADER_MAGIC);
>  
>  	hd->info.version = 1;
>  	hd->info.last_page = d->mbd_dev->size - 1;
>  	hd->info.nr_badpages = 0;
>  
> -	memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
> +	memcpy(buf + SWAP_HEADER_MAGIC, "SWAPSPACE2", 10);
>  
>  	return 0;
>  }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 09a840517c23a..49c2078354e6e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2927,7 +2927,7 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
>  static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
>  {
>  	BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
> -	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
> +	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE_MAX);
>  	return ptlock_ptr(virt_to_ptdesc(pte));
>  }
>  
> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
> index a2f6179b672b8..c356897d5f41c 100644
> --- a/include/linux/mm_types_task.h
> +++ b/include/linux/mm_types_task.h
> @@ -37,7 +37,7 @@ struct page;
>  
>  struct page_frag {
>  	struct page *page;
> -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
> +#if (BITS_PER_LONG > 32) || (PAGE_SIZE_MAX >= 65536)
>  	__u32 offset;
>  	__u32 size;
>  #else
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 1dc6248feb832..cd58034b82c81 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -1744,6 +1744,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes)
>   */
>  #define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
>  #define PFN_SECTION_SHIFT	(SECTION_SIZE_BITS - PAGE_SHIFT)
> +#define PFN_SECTION_SHIFT_MIN	(SECTION_SIZE_BITS - PAGE_SHIFT_MAX)
>  
>  #define NR_MEM_SECTIONS		(1UL << SECTIONS_SHIFT)
>  
> @@ -1753,7 +1754,7 @@ static inline bool movable_only_nodes(nodemask_t *nodes)
>  #define SECTION_BLOCKFLAGS_BITS \
>  	((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
>  
> -#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS
> +#if (MAX_PAGE_ORDER + PAGE_SHIFT_MAX) > SECTION_SIZE_BITS
>  #error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE
>  #endif
>  
> diff --git a/include/linux/slab.h b/include/linux/slab.h
> index eb2bf46291576..11c6ff3a12579 100644
> --- a/include/linux/slab.h
> +++ b/include/linux/slab.h
> @@ -347,7 +347,7 @@ static inline unsigned int arch_slab_minalign(void)
>   */
>  #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
>  #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
> -#define __assume_page_alignment __assume_aligned(PAGE_SIZE)
> +#define __assume_page_alignment __assume_aligned(PAGE_SIZE_MIN)
>  
>  /*
>   * Kmalloc array related definitions
> @@ -358,6 +358,7 @@ static inline unsigned int arch_slab_minalign(void)
>   * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
>   */
>  #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
> +#define KMALLOC_SHIFT_HIGH_MAX	(PAGE_SHIFT_MAX + 1)
>  #define KMALLOC_SHIFT_MAX	(MAX_PAGE_ORDER + PAGE_SHIFT)
>  #ifndef KMALLOC_SHIFT_LOW
>  #define KMALLOC_SHIFT_LOW	3
> @@ -426,7 +427,7 @@ enum kmalloc_cache_type {
>  	NR_KMALLOC_TYPES
>  };
>  
> -typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];
> +typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH_MAX + 1];
>  
>  extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
>  
> @@ -524,7 +525,7 @@ static __always_inline unsigned int __kmalloc_index(size_t size,
>  	/* Will never be reached. Needed because the compiler may complain */
>  	return -1;
>  }
> -static_assert(PAGE_SHIFT <= 20);
> +static_assert(PAGE_SHIFT_MAX <= 20);
>  #define kmalloc_index(s) __kmalloc_index(s, true)
>  
>  #include <linux/alloc_tag.h>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index ba7ea95d1c57a..e85df0332979f 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -132,10 +132,17 @@ static inline int current_is_kswapd(void)
>   * bootbits...
>   */
>  union swap_header {
> -	struct {
> -		char reserved[PAGE_SIZE - 10];
> -		char magic[10];			/* SWAP-SPACE or SWAPSPACE2 */
> -	} magic;
> +	/*
> +	 * Exists conceptually, but since PAGE_SIZE may not be known at compile
> +	 * time, we must access through pointer arithmetic at run time.
> +	 *
> +	 * struct {
> +	 * 	char reserved[PAGE_SIZE - 10];
> +	 * 	char magic[10];			   SWAP-SPACE or SWAPSPACE2
> +	 * } magic;
> +	 */
> +#define SWAP_HEADER_MAGIC	(PAGE_SIZE - 10)
> +	char magic[1];
>  	struct {
>  		char		bootbits[1024];	/* Space for disklabel etc. */
>  		__u32		version;
> @@ -201,7 +208,7 @@ struct swap_extent {
>   * Max bad pages in the new format..
>   */
>  #define MAX_SWAP_BADPAGES \
> -	((offsetof(union swap_header, magic.magic) - \
> +	((SWAP_HEADER_MAGIC - \
>  	  offsetof(union swap_header, info.badpages)) / sizeof(int))
>  
>  enum {
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index cb468e418ea11..890fe6a3e6702 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -34,10 +34,14 @@
>   */
>  #ifdef MAX_PHYSMEM_BITS
>  #define SWP_PFN_BITS		(MAX_PHYSMEM_BITS - PAGE_SHIFT)
> +#define SWP_PFN_BITS_MAX	(MAX_PHYSMEM_BITS - PAGE_SHIFT_MIN)
>  #else  /* MAX_PHYSMEM_BITS */
>  #define SWP_PFN_BITS		min_t(int, \
>  				      sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \
>  				      SWP_TYPE_SHIFT)
> +#define SWP_PFN_BITS_MAX	min_t(int, \
> +				      sizeof(phys_addr_t) * 8 - PAGE_SHIFT_MIN, \
> +				      SWP_TYPE_SHIFT)
>  #endif	/* MAX_PHYSMEM_BITS */
>  #define SWP_PFN_MASK		(BIT(SWP_PFN_BITS) - 1)
>  
> @@ -519,7 +523,7 @@ static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
>  static inline bool is_pfn_swap_entry(swp_entry_t entry)
>  {
>  	/* Make sure the swp offset can always store the needed fields */
> -	BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
> +	BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS_MAX);
>  
>  	return is_migration_entry(entry) || is_device_private_entry(entry) ||
>  	       is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c5f9195f76c65..4b17bec566fbd 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -4881,7 +4881,7 @@ static int __init mem_cgroup_init(void)
>  	 * to work fine, we should make sure that the overfill threshold can't
>  	 * exceed S32_MAX / PAGE_SIZE.
>  	 */
> -	BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE);
> +	BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE_MIN);
>  
>  	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
>  				  memcg_hotplug_cpu_dead);
> diff --git a/mm/memory.c b/mm/memory.c
> index ebfc9768f801a..14b5ef6870486 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4949,8 +4949,8 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
>  	return ret;
>  }
>  
> -static unsigned long fault_around_pages __read_mostly =
> -	65536 >> PAGE_SHIFT;
> +static __DEFINE_GLOBAL_PAGE_SIZE_VAR(unsigned long, fault_around_pages,
> +				     __read_mostly, 65536 >> PAGE_SHIFT);
>  
>  #ifdef CONFIG_DEBUG_FS
>  static int fault_around_bytes_get(void *data, u64 *val)
> diff --git a/mm/mmap.c b/mm/mmap.c
> index d0dfc85b209bb..d9642aba07ac4 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -2279,7 +2279,7 @@ int expand_downwards(struct vm_area_struct *vma, unsigned long address)
>  }
>  
>  /* enforced gap between the expanding stack and other mappings. */
> -unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
> +DEFINE_GLOBAL_PAGE_SIZE_VAR(unsigned long, stack_guard_gap, 256UL<<PAGE_SHIFT);
>  
>  static int __init cmdline_parse_stack_guard_gap(char *p)
>  {
> diff --git a/mm/page-writeback.c b/mm/page-writeback.c
> index 4430ac68e4c41..8fc9ac50749bd 100644
> --- a/mm/page-writeback.c
> +++ b/mm/page-writeback.c
> @@ -2292,7 +2292,7 @@ static int page_writeback_cpu_online(unsigned int cpu)
>  #ifdef CONFIG_SYSCTL
>  
>  /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
> -static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
> +static DEFINE_GLOBAL_PAGE_SIZE_VAR_CONST(unsigned long, dirty_bytes_min, 2 * PAGE_SIZE);
>  
>  static struct ctl_table vm_page_writeback_sysctls[] = {
>  	{
> diff --git a/mm/slub.c b/mm/slub.c
> index a77f354f83251..82f6e98cf25bb 100644
> --- a/mm/slub.c
> +++ b/mm/slub.c
> @@ -5001,7 +5001,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
>  static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
>  {
>  	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
> -			NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
> +			NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH_MAX *
>  			sizeof(struct kmem_cache_cpu));
>  
>  	/*
> diff --git a/mm/sparse.c b/mm/sparse.c
> index dc38539f85603..2491425930c4d 100644
> --- a/mm/sparse.c
> +++ b/mm/sparse.c
> @@ -277,7 +277,7 @@ static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
>  {
>  	unsigned long coded_mem_map =
>  		(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
> -	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
> +	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT_MIN);
>  	BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
>  	return coded_mem_map;
>  }
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index 38bdc439651ac..6311a1cc7e46b 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2931,7 +2931,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
>  	unsigned long swapfilepages;
>  	unsigned long last_page;
>  
> -	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
> +	if (memcmp("SWAPSPACE2", &swap_header->magic[SWAP_HEADER_MAGIC], 10)) {
>  		pr_err("Unable to find swap-space signature\n");
>  		return 0;
>  	}
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index a0df1e2e155a8..b4fbba204603c 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -2497,12 +2497,12 @@ struct vmap_block {
>  	spinlock_t lock;
>  	struct vmap_area *va;
>  	unsigned long free, dirty;
> -	DECLARE_BITMAP(used_map, VMAP_BBMAP_BITS);
>  	unsigned long dirty_min, dirty_max; /*< dirty range */
>  	struct list_head free_list;
>  	struct rcu_head rcu_head;
>  	struct list_head purge;
>  	unsigned int cpu;
> +	unsigned long used_map[];
>  };
>  
>  /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
> @@ -2600,11 +2600,12 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
>  	unsigned long vb_idx;
>  	int node, err;
>  	void *vaddr;
> +	size_t size;
>  
>  	node = numa_node_id();
>  
> -	vb = kmalloc_node(sizeof(struct vmap_block),
> -			gfp_mask & GFP_RECLAIM_MASK, node);
> +	size = struct_size(vb, used_map, BITS_TO_LONGS(VMAP_BBMAP_BITS));
> +	vb = kmalloc_node(size, gfp_mask & GFP_RECLAIM_MASK, node);
>  	if (unlikely(!vb))
>  		return ERR_PTR(-ENOMEM);
>
Vlastimil Babka Nov. 14, 2024, 10:17 a.m. UTC | #2
On 10/14/24 12:58, Ryan Roberts wrote:
> To prepare for supporting boot-time page size selection, refactor code
> to remove assumptions about PAGE_SIZE being compile-time constant. Code
> intended to be equivalent when compile-time page size is active.
> 
> Refactor "struct vmap_block" to use a flexible array for used_mmap since
> VMAP_BBMAP_BITS is not a compile time constant for the boot-time page
> size case.
> 
> Update various BUILD_BUG_ON() instances to check against appropriate
> page size limit.
> 
> Re-define "union swap_header" so that it's no longer exactly page-sized.
> Instead define a flexible "magic" array with a define which tells the
> offset to where the magic signature begins.
> 
> Consider page size limit in some CPP condditionals.
> 
> Wrap global variables that are initialized with PAGE_SIZE derived values
> using DEFINE_GLOBAL_PAGE_SIZE_VAR() so their initialization can be
> deferred for boot-time page size builds.
> 
> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
> 
> ***NOTE***
> Any confused maintainers may want to read the cover note here for context:
> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
> 
>  drivers/mtd/mtdswap.c         |  4 ++--
>  include/linux/mm.h            |  2 +-
>  include/linux/mm_types_task.h |  2 +-
>  include/linux/mmzone.h        |  3 ++-
>  include/linux/slab.h          |  7 ++++---
>  include/linux/swap.h          | 17 ++++++++++++-----
>  include/linux/swapops.h       |  6 +++++-
>  mm/memcontrol.c               |  2 +-
>  mm/memory.c                   |  4 ++--
>  mm/mmap.c                     |  2 +-
>  mm/page-writeback.c           |  2 +-
>  mm/slub.c                     |  2 +-
>  mm/sparse.c                   |  2 +-
>  mm/swapfile.c                 |  2 +-
>  mm/vmalloc.c                  |  7 ++++---
>  15 files changed, 39 insertions(+), 25 deletions(-)
> 

> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -132,10 +132,17 @@ static inline int current_is_kswapd(void)
>   * bootbits...
>   */
>  union swap_header {
> -	struct {
> -		char reserved[PAGE_SIZE - 10];
> -		char magic[10];			/* SWAP-SPACE or SWAPSPACE2 */
> -	} magic;
> +	/*
> +	 * Exists conceptually, but since PAGE_SIZE may not be known at compile
> +	 * time, we must access through pointer arithmetic at run time.
> +	 *
> +	 * struct {
> +	 * 	char reserved[PAGE_SIZE - 10];
> +	 * 	char magic[10];			   SWAP-SPACE or SWAPSPACE2
> +	 * } magic;
> +	 */
> +#define SWAP_HEADER_MAGIC	(PAGE_SIZE - 10)
> +	char magic[1];

I wonder if it makes sense to even keep this magic field anymore.

>  	struct {
>  		char		bootbits[1024];	/* Space for disklabel etc. */
>  		__u32		version;
> @@ -201,7 +208,7 @@ struct swap_extent {
>   * Max bad pages in the new format..
>   */
>  #define MAX_SWAP_BADPAGES \
> -	((offsetof(union swap_header, magic.magic) - \
> +	((SWAP_HEADER_MAGIC - \
>  	  offsetof(union swap_header, info.badpages)) / sizeof(int))
>  
>  enum {

<snip>

> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -2931,7 +2931,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
>  	unsigned long swapfilepages;
>  	unsigned long last_page;
>  
> -	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
> +	if (memcmp("SWAPSPACE2", &swap_header->magic[SWAP_HEADER_MAGIC], 10)) {

I'd expect static checkers to scream here because we overflow the magic[1]
both due to copying 10 bytes into 1 byte array and also with the insane
offset. Hence my suggestion to drop the field and use purely pointer arithmetic.

>  		pr_err("Unable to find swap-space signature\n");
>  		return 0;
>  	}
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index a0df1e2e155a8..b4fbba204603c 100644

Hm I'm actually looking at yourwip branch which also has:

--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -969,7 +969,7 @@ static inline int get_order_from_str(const char *size_str)
        return -EINVAL;
 }

-static char str_dup[PAGE_SIZE] __initdata;
+static char str_dup[PAGE_SIZE_MAX] __initdata;
 static int __init setup_thp_anon(char *str)
 {
        char *token, *range, *policy, *subtoken;

Why PAGE_SIZE_MAX? Isn't this the same case as "mm/memcontrol: Fix seq_buf
size to save memory when PAGE_SIZE is large"
Ryan Roberts Nov. 26, 2024, 10:08 a.m. UTC | #3
Hi Vlastimil,

Sorry about the slow response to your review of this series - I'm just getting
to it now. Comment's below...

On 14/11/2024 10:17, Vlastimil Babka wrote:
> On 10/14/24 12:58, Ryan Roberts wrote:
>> To prepare for supporting boot-time page size selection, refactor code
>> to remove assumptions about PAGE_SIZE being compile-time constant. Code
>> intended to be equivalent when compile-time page size is active.
>>
>> Refactor "struct vmap_block" to use a flexible array for used_mmap since
>> VMAP_BBMAP_BITS is not a compile time constant for the boot-time page
>> size case.
>>
>> Update various BUILD_BUG_ON() instances to check against appropriate
>> page size limit.
>>
>> Re-define "union swap_header" so that it's no longer exactly page-sized.
>> Instead define a flexible "magic" array with a define which tells the
>> offset to where the magic signature begins.
>>
>> Consider page size limit in some CPP condditionals.
>>
>> Wrap global variables that are initialized with PAGE_SIZE derived values
>> using DEFINE_GLOBAL_PAGE_SIZE_VAR() so their initialization can be
>> deferred for boot-time page size builds.
>>
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>> ---
>>
>> ***NOTE***
>> Any confused maintainers may want to read the cover note here for context:
>> https://lore.kernel.org/all/20241014105514.3206191-1-ryan.roberts@arm.com/
>>
>>  drivers/mtd/mtdswap.c         |  4 ++--
>>  include/linux/mm.h            |  2 +-
>>  include/linux/mm_types_task.h |  2 +-
>>  include/linux/mmzone.h        |  3 ++-
>>  include/linux/slab.h          |  7 ++++---
>>  include/linux/swap.h          | 17 ++++++++++++-----
>>  include/linux/swapops.h       |  6 +++++-
>>  mm/memcontrol.c               |  2 +-
>>  mm/memory.c                   |  4 ++--
>>  mm/mmap.c                     |  2 +-
>>  mm/page-writeback.c           |  2 +-
>>  mm/slub.c                     |  2 +-
>>  mm/sparse.c                   |  2 +-
>>  mm/swapfile.c                 |  2 +-
>>  mm/vmalloc.c                  |  7 ++++---
>>  15 files changed, 39 insertions(+), 25 deletions(-)
>>
> 
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -132,10 +132,17 @@ static inline int current_is_kswapd(void)
>>   * bootbits...
>>   */
>>  union swap_header {
>> -	struct {
>> -		char reserved[PAGE_SIZE - 10];
>> -		char magic[10];			/* SWAP-SPACE or SWAPSPACE2 */
>> -	} magic;
>> +	/*
>> +	 * Exists conceptually, but since PAGE_SIZE may not be known at compile
>> +	 * time, we must access through pointer arithmetic at run time.
>> +	 *
>> +	 * struct {
>> +	 * 	char reserved[PAGE_SIZE - 10];
>> +	 * 	char magic[10];			   SWAP-SPACE or SWAPSPACE2
>> +	 * } magic;
>> +	 */
>> +#define SWAP_HEADER_MAGIC	(PAGE_SIZE - 10)
>> +	char magic[1];
> 
> I wonder if it makes sense to even keep this magic field anymore.
> 
>>  	struct {
>>  		char		bootbits[1024];	/* Space for disklabel etc. */
>>  		__u32		version;
>> @@ -201,7 +208,7 @@ struct swap_extent {
>>   * Max bad pages in the new format..
>>   */
>>  #define MAX_SWAP_BADPAGES \
>> -	((offsetof(union swap_header, magic.magic) - \
>> +	((SWAP_HEADER_MAGIC - \
>>  	  offsetof(union swap_header, info.badpages)) / sizeof(int))
>>  
>>  enum {
> 
> <snip>
> 
>> --- a/mm/swapfile.c
>> +++ b/mm/swapfile.c
>> @@ -2931,7 +2931,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
>>  	unsigned long swapfilepages;
>>  	unsigned long last_page;
>>  
>> -	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
>> +	if (memcmp("SWAPSPACE2", &swap_header->magic[SWAP_HEADER_MAGIC], 10)) {
> 
> I'd expect static checkers to scream here because we overflow the magic[1]
> both due to copying 10 bytes into 1 byte array and also with the insane
> offset. Hence my suggestion to drop the field and use purely pointer arithmetic.

Yeah, good point. I'll remove magic[] and use pointer arithmetic.

> 
>>  		pr_err("Unable to find swap-space signature\n");
>>  		return 0;
>>  	}
>> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
>> index a0df1e2e155a8..b4fbba204603c 100644
> 
> Hm I'm actually looking at yourwip branch which also has:
> 
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -969,7 +969,7 @@ static inline int get_order_from_str(const char *size_str)
>         return -EINVAL;
>  }
> 
> -static char str_dup[PAGE_SIZE] __initdata;
> +static char str_dup[PAGE_SIZE_MAX] __initdata;
>  static int __init setup_thp_anon(char *str)
>  {
>         char *token, *range, *policy, *subtoken;
> 
> Why PAGE_SIZE_MAX? Isn't this the same case as "mm/memcontrol: Fix seq_buf
> size to save memory when PAGE_SIZE is large"

Hmm, you're probably right. I had a vague notion that "str", as passed into the
function, was guarranteed to be no bigger than PAGE_SIZE (perhaps I'm wrong). So
assumed that's where the original definition of str_dup[PAGE_SIZE] was coming from.

But I think your real question is "should the max size of str be a function of
PAGE_SIZE?". I think it could; there are more page orders that can legitimately
be described when the page size is bigger (at least for arm64). But in practice,
I'd expect any sane string for any page size to be easily within 4K.

So on that basis, I'll take your advice; changing this buffer to be 4K always.

Thanks,
Ryan
diff mbox series

Patch

diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 680366616da24..7412a32708114 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -1062,13 +1062,13 @@  static int mtdswap_auto_header(struct mtdswap_dev *d, char *buf)
 {
 	union swap_header *hd = (union swap_header *)(buf);
 
-	memset(buf, 0, PAGE_SIZE - 10);
+	memset(buf, 0, SWAP_HEADER_MAGIC);
 
 	hd->info.version = 1;
 	hd->info.last_page = d->mbd_dev->size - 1;
 	hd->info.nr_badpages = 0;
 
-	memcpy(buf + PAGE_SIZE - 10, "SWAPSPACE2", 10);
+	memcpy(buf + SWAP_HEADER_MAGIC, "SWAPSPACE2", 10);
 
 	return 0;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 09a840517c23a..49c2078354e6e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2927,7 +2927,7 @@  static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd)
 static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte)
 {
 	BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE));
-	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE);
+	BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE_MAX);
 	return ptlock_ptr(virt_to_ptdesc(pte));
 }
 
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index a2f6179b672b8..c356897d5f41c 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -37,7 +37,7 @@  struct page;
 
 struct page_frag {
 	struct page *page;
-#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE_MAX >= 65536)
 	__u32 offset;
 	__u32 size;
 #else
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1dc6248feb832..cd58034b82c81 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1744,6 +1744,7 @@  static inline bool movable_only_nodes(nodemask_t *nodes)
  */
 #define PA_SECTION_SHIFT	(SECTION_SIZE_BITS)
 #define PFN_SECTION_SHIFT	(SECTION_SIZE_BITS - PAGE_SHIFT)
+#define PFN_SECTION_SHIFT_MIN	(SECTION_SIZE_BITS - PAGE_SHIFT_MAX)
 
 #define NR_MEM_SECTIONS		(1UL << SECTIONS_SHIFT)
 
@@ -1753,7 +1754,7 @@  static inline bool movable_only_nodes(nodemask_t *nodes)
 #define SECTION_BLOCKFLAGS_BITS \
 	((1UL << (PFN_SECTION_SHIFT - pageblock_order)) * NR_PAGEBLOCK_BITS)
 
-#if (MAX_PAGE_ORDER + PAGE_SHIFT) > SECTION_SIZE_BITS
+#if (MAX_PAGE_ORDER + PAGE_SHIFT_MAX) > SECTION_SIZE_BITS
 #error Allocator MAX_PAGE_ORDER exceeds SECTION_SIZE
 #endif
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index eb2bf46291576..11c6ff3a12579 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -347,7 +347,7 @@  static inline unsigned int arch_slab_minalign(void)
  */
 #define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN)
 #define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN)
-#define __assume_page_alignment __assume_aligned(PAGE_SIZE)
+#define __assume_page_alignment __assume_aligned(PAGE_SIZE_MIN)
 
 /*
  * Kmalloc array related definitions
@@ -358,6 +358,7 @@  static inline unsigned int arch_slab_minalign(void)
  * (PAGE_SIZE*2).  Larger requests are passed to the page allocator.
  */
 #define KMALLOC_SHIFT_HIGH	(PAGE_SHIFT + 1)
+#define KMALLOC_SHIFT_HIGH_MAX	(PAGE_SHIFT_MAX + 1)
 #define KMALLOC_SHIFT_MAX	(MAX_PAGE_ORDER + PAGE_SHIFT)
 #ifndef KMALLOC_SHIFT_LOW
 #define KMALLOC_SHIFT_LOW	3
@@ -426,7 +427,7 @@  enum kmalloc_cache_type {
 	NR_KMALLOC_TYPES
 };
 
-typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH + 1];
+typedef struct kmem_cache * kmem_buckets[KMALLOC_SHIFT_HIGH_MAX + 1];
 
 extern kmem_buckets kmalloc_caches[NR_KMALLOC_TYPES];
 
@@ -524,7 +525,7 @@  static __always_inline unsigned int __kmalloc_index(size_t size,
 	/* Will never be reached. Needed because the compiler may complain */
 	return -1;
 }
-static_assert(PAGE_SHIFT <= 20);
+static_assert(PAGE_SHIFT_MAX <= 20);
 #define kmalloc_index(s) __kmalloc_index(s, true)
 
 #include <linux/alloc_tag.h>
diff --git a/include/linux/swap.h b/include/linux/swap.h
index ba7ea95d1c57a..e85df0332979f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -132,10 +132,17 @@  static inline int current_is_kswapd(void)
  * bootbits...
  */
 union swap_header {
-	struct {
-		char reserved[PAGE_SIZE - 10];
-		char magic[10];			/* SWAP-SPACE or SWAPSPACE2 */
-	} magic;
+	/*
+	 * Exists conceptually, but since PAGE_SIZE may not be known at compile
+	 * time, we must access through pointer arithmetic at run time.
+	 *
+	 * struct {
+	 * 	char reserved[PAGE_SIZE - 10];
+	 * 	char magic[10];			   SWAP-SPACE or SWAPSPACE2
+	 * } magic;
+	 */
+#define SWAP_HEADER_MAGIC	(PAGE_SIZE - 10)
+	char magic[1];
 	struct {
 		char		bootbits[1024];	/* Space for disklabel etc. */
 		__u32		version;
@@ -201,7 +208,7 @@  struct swap_extent {
  * Max bad pages in the new format..
  */
 #define MAX_SWAP_BADPAGES \
-	((offsetof(union swap_header, magic.magic) - \
+	((SWAP_HEADER_MAGIC - \
 	  offsetof(union swap_header, info.badpages)) / sizeof(int))
 
 enum {
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index cb468e418ea11..890fe6a3e6702 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -34,10 +34,14 @@ 
  */
 #ifdef MAX_PHYSMEM_BITS
 #define SWP_PFN_BITS		(MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#define SWP_PFN_BITS_MAX	(MAX_PHYSMEM_BITS - PAGE_SHIFT_MIN)
 #else  /* MAX_PHYSMEM_BITS */
 #define SWP_PFN_BITS		min_t(int, \
 				      sizeof(phys_addr_t) * 8 - PAGE_SHIFT, \
 				      SWP_TYPE_SHIFT)
+#define SWP_PFN_BITS_MAX	min_t(int, \
+				      sizeof(phys_addr_t) * 8 - PAGE_SHIFT_MIN, \
+				      SWP_TYPE_SHIFT)
 #endif	/* MAX_PHYSMEM_BITS */
 #define SWP_PFN_MASK		(BIT(SWP_PFN_BITS) - 1)
 
@@ -519,7 +523,7 @@  static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry)
 static inline bool is_pfn_swap_entry(swp_entry_t entry)
 {
 	/* Make sure the swp offset can always store the needed fields */
-	BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS);
+	BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS_MAX);
 
 	return is_migration_entry(entry) || is_device_private_entry(entry) ||
 	       is_device_exclusive_entry(entry) || is_hwpoison_entry(entry);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c5f9195f76c65..4b17bec566fbd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4881,7 +4881,7 @@  static int __init mem_cgroup_init(void)
 	 * to work fine, we should make sure that the overfill threshold can't
 	 * exceed S32_MAX / PAGE_SIZE.
 	 */
-	BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE);
+	BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE_MIN);
 
 	cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
 				  memcg_hotplug_cpu_dead);
diff --git a/mm/memory.c b/mm/memory.c
index ebfc9768f801a..14b5ef6870486 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4949,8 +4949,8 @@  vm_fault_t finish_fault(struct vm_fault *vmf)
 	return ret;
 }
 
-static unsigned long fault_around_pages __read_mostly =
-	65536 >> PAGE_SHIFT;
+static __DEFINE_GLOBAL_PAGE_SIZE_VAR(unsigned long, fault_around_pages,
+				     __read_mostly, 65536 >> PAGE_SHIFT);
 
 #ifdef CONFIG_DEBUG_FS
 static int fault_around_bytes_get(void *data, u64 *val)
diff --git a/mm/mmap.c b/mm/mmap.c
index d0dfc85b209bb..d9642aba07ac4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2279,7 +2279,7 @@  int expand_downwards(struct vm_area_struct *vma, unsigned long address)
 }
 
 /* enforced gap between the expanding stack and other mappings. */
-unsigned long stack_guard_gap = 256UL<<PAGE_SHIFT;
+DEFINE_GLOBAL_PAGE_SIZE_VAR(unsigned long, stack_guard_gap, 256UL<<PAGE_SHIFT);
 
 static int __init cmdline_parse_stack_guard_gap(char *p)
 {
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 4430ac68e4c41..8fc9ac50749bd 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2292,7 +2292,7 @@  static int page_writeback_cpu_online(unsigned int cpu)
 #ifdef CONFIG_SYSCTL
 
 /* this is needed for the proc_doulongvec_minmax of vm_dirty_bytes */
-static const unsigned long dirty_bytes_min = 2 * PAGE_SIZE;
+static DEFINE_GLOBAL_PAGE_SIZE_VAR_CONST(unsigned long, dirty_bytes_min, 2 * PAGE_SIZE);
 
 static struct ctl_table vm_page_writeback_sysctls[] = {
 	{
diff --git a/mm/slub.c b/mm/slub.c
index a77f354f83251..82f6e98cf25bb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -5001,7 +5001,7 @@  init_kmem_cache_node(struct kmem_cache_node *n)
 static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
 {
 	BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
-			NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH *
+			NR_KMALLOC_TYPES * KMALLOC_SHIFT_HIGH_MAX *
 			sizeof(struct kmem_cache_cpu));
 
 	/*
diff --git a/mm/sparse.c b/mm/sparse.c
index dc38539f85603..2491425930c4d 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -277,7 +277,7 @@  static unsigned long sparse_encode_mem_map(struct page *mem_map, unsigned long p
 {
 	unsigned long coded_mem_map =
 		(unsigned long)(mem_map - (section_nr_to_pfn(pnum)));
-	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT);
+	BUILD_BUG_ON(SECTION_MAP_LAST_BIT > PFN_SECTION_SHIFT_MIN);
 	BUG_ON(coded_mem_map & ~SECTION_MAP_MASK);
 	return coded_mem_map;
 }
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 38bdc439651ac..6311a1cc7e46b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2931,7 +2931,7 @@  static unsigned long read_swap_header(struct swap_info_struct *p,
 	unsigned long swapfilepages;
 	unsigned long last_page;
 
-	if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
+	if (memcmp("SWAPSPACE2", &swap_header->magic[SWAP_HEADER_MAGIC], 10)) {
 		pr_err("Unable to find swap-space signature\n");
 		return 0;
 	}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a0df1e2e155a8..b4fbba204603c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2497,12 +2497,12 @@  struct vmap_block {
 	spinlock_t lock;
 	struct vmap_area *va;
 	unsigned long free, dirty;
-	DECLARE_BITMAP(used_map, VMAP_BBMAP_BITS);
 	unsigned long dirty_min, dirty_max; /*< dirty range */
 	struct list_head free_list;
 	struct rcu_head rcu_head;
 	struct list_head purge;
 	unsigned int cpu;
+	unsigned long used_map[];
 };
 
 /* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -2600,11 +2600,12 @@  static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
 	unsigned long vb_idx;
 	int node, err;
 	void *vaddr;
+	size_t size;
 
 	node = numa_node_id();
 
-	vb = kmalloc_node(sizeof(struct vmap_block),
-			gfp_mask & GFP_RECLAIM_MASK, node);
+	size = struct_size(vb, used_map, BITS_TO_LONGS(VMAP_BBMAP_BITS));
+	vb = kmalloc_node(size, gfp_mask & GFP_RECLAIM_MASK, node);
 	if (unlikely(!vb))
 		return ERR_PTR(-ENOMEM);