diff mbox series

[v2,1/3] x86/vmemmap: Drop handling of 4K unaligned vmemmap range

Message ID 20210203104750.23405-2-osalvador@suse.de (mailing list archive)
State New, archived
Headers show
Series Cleanup and fixups for vmemmap handling | expand

Commit Message

Oscar Salvador Feb. 3, 2021, 10:47 a.m. UTC
remove_pte_table() is prepared to handle the case where either the
start or the end of the range is not PAGE aligned.
This cannot actually happen:

__populate_section_memmap enforces the range to be PMD aligned,
so as long as the size of the struct page remains multiple of 8,
the vmemmap range will be aligned to PAGE_SIZE.

Drop the dead code and place a VM_BUG_ON in vmemmap_{populate,free}
to catch nasty cases.

Signed-off-by: Oscar Salvador <osalvador@suse.de>
Suggested-by: David Hildenbrand <david@redhat.com>
---
 arch/x86/mm/init_64.c | 48 ++++++++++++-------------------------------
 1 file changed, 13 insertions(+), 35 deletions(-)

Comments

David Hildenbrand Feb. 3, 2021, 1:29 p.m. UTC | #1
On 03.02.21 11:47, Oscar Salvador wrote:
> remove_pte_table() is prepared to handle the case where either the
> start or the end of the range is not PAGE aligned.
> This cannot actually happen:
> 
> __populate_section_memmap enforces the range to be PMD aligned,
> so as long as the size of the struct page remains multiple of 8,
> the vmemmap range will be aligned to PAGE_SIZE.
> 
> Drop the dead code and place a VM_BUG_ON in vmemmap_{populate,free}
> to catch nasty cases.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> Suggested-by: David Hildenbrand <david@redhat.com>
> ---
>   arch/x86/mm/init_64.c | 48 ++++++++++++-------------------------------
>   1 file changed, 13 insertions(+), 35 deletions(-)
> 
> diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
> index b5a3fa4033d3..b0e1d215c83e 100644
> --- a/arch/x86/mm/init_64.c
> +++ b/arch/x86/mm/init_64.c
> @@ -962,7 +962,6 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
>   {
>   	unsigned long next, pages = 0;
>   	pte_t *pte;
> -	void *page_addr;
>   	phys_addr_t phys_addr;
>   
>   	pte = pte_start + pte_index(addr);
> @@ -983,42 +982,15 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
>   		if (phys_addr < (phys_addr_t)0x40000000)
>   			return;
>   
> -		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
> -			/*
> -			 * Do not free direct mapping pages since they were
> -			 * freed when offlining, or simplely not in use.
> -			 */
> -			if (!direct)
> -				free_pagetable(pte_page(*pte), 0);
> -
> -			spin_lock(&init_mm.page_table_lock);
> -			pte_clear(&init_mm, addr, pte);
> -			spin_unlock(&init_mm.page_table_lock);
> +		if (!direct)
> +			free_pagetable(pte_page(*pte), 0);
>   
> -			/* For non-direct mapping, pages means nothing. */
> -			pages++;
> -		} else {
> -			/*
> -			 * If we are here, we are freeing vmemmap pages since
> -			 * direct mapped memory ranges to be freed are aligned.
> -			 *
> -			 * If we are not removing the whole page, it means
> -			 * other page structs in this page are being used and
> -			 * we canot remove them. So fill the unused page_structs
> -			 * with 0xFD, and remove the page when it is wholly
> -			 * filled with 0xFD.
> -			 */
> -			memset((void *)addr, PAGE_INUSE, next - addr);
> -
> -			page_addr = page_address(pte_page(*pte));
> -			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
> -				free_pagetable(pte_page(*pte), 0);
> +		spin_lock(&init_mm.page_table_lock);
> +		pte_clear(&init_mm, addr, pte);
> +		spin_unlock(&init_mm.page_table_lock);
>   
> -				spin_lock(&init_mm.page_table_lock);
> -				pte_clear(&init_mm, addr, pte);
> -				spin_unlock(&init_mm.page_table_lock);
> -			}
> -		}
> +		/* For non-direct mapping, pages means nothing. */
> +		pages++;
>   	}
>   
>   	/* Call free_pte_table() in remove_pmd_table(). */
> @@ -1197,6 +1169,9 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct,
>   void __ref vmemmap_free(unsigned long start, unsigned long end,
>   		struct vmem_altmap *altmap)
>   {
> +	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
> +	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
> +
>   	remove_pagetable(start, end, false, altmap);
>   }
>   
> @@ -1556,6 +1531,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
>   {
>   	int err;
>   
> +	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
> +	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
> +
>   	if (end - start < PAGES_PER_SECTION * sizeof(struct page))
>   		err = vmemmap_populate_basepages(start, end, node, NULL);
>   	else if (boot_cpu_has(X86_FEATURE_PSE))
> 

Reviewed-by: David Hildenbrand <david@redhat.com>
Oscar Salvador Feb. 11, 2021, 9:25 p.m. UTC | #2
On Wed, Feb 03, 2021 at 11:47:48AM +0100, Oscar Salvador wrote:
> remove_pte_table() is prepared to handle the case where either the
> start or the end of the range is not PAGE aligned.
> This cannot actually happen:
> 
> __populate_section_memmap enforces the range to be PMD aligned,
> so as long as the size of the struct page remains multiple of 8,
> the vmemmap range will be aligned to PAGE_SIZE.
> 
> Drop the dead code and place a VM_BUG_ON in vmemmap_{populate,free}
> to catch nasty cases.
> 
> Signed-off-by: Oscar Salvador <osalvador@suse.de>
> Suggested-by: David Hildenbrand <david@redhat.com>

Hi Andrew, 

the cover letter got lost somehow, but is this patchset, [1] and [2]
on your radar?

[1] https://lore.kernel.org/linux-mm/20210204134325.7237-3-osalvador@suse.de/
[2] https://lore.kernel.org/linux-mm/20210204134325.7237-4-osalvador@suse.de/

thanks
diff mbox series

Patch

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index b5a3fa4033d3..b0e1d215c83e 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -962,7 +962,6 @@  remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 {
 	unsigned long next, pages = 0;
 	pte_t *pte;
-	void *page_addr;
 	phys_addr_t phys_addr;
 
 	pte = pte_start + pte_index(addr);
@@ -983,42 +982,15 @@  remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
 		if (phys_addr < (phys_addr_t)0x40000000)
 			return;
 
-		if (PAGE_ALIGNED(addr) && PAGE_ALIGNED(next)) {
-			/*
-			 * Do not free direct mapping pages since they were
-			 * freed when offlining, or simplely not in use.
-			 */
-			if (!direct)
-				free_pagetable(pte_page(*pte), 0);
-
-			spin_lock(&init_mm.page_table_lock);
-			pte_clear(&init_mm, addr, pte);
-			spin_unlock(&init_mm.page_table_lock);
+		if (!direct)
+			free_pagetable(pte_page(*pte), 0);
 
-			/* For non-direct mapping, pages means nothing. */
-			pages++;
-		} else {
-			/*
-			 * If we are here, we are freeing vmemmap pages since
-			 * direct mapped memory ranges to be freed are aligned.
-			 *
-			 * If we are not removing the whole page, it means
-			 * other page structs in this page are being used and
-			 * we canot remove them. So fill the unused page_structs
-			 * with 0xFD, and remove the page when it is wholly
-			 * filled with 0xFD.
-			 */
-			memset((void *)addr, PAGE_INUSE, next - addr);
-
-			page_addr = page_address(pte_page(*pte));
-			if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
-				free_pagetable(pte_page(*pte), 0);
+		spin_lock(&init_mm.page_table_lock);
+		pte_clear(&init_mm, addr, pte);
+		spin_unlock(&init_mm.page_table_lock);
 
-				spin_lock(&init_mm.page_table_lock);
-				pte_clear(&init_mm, addr, pte);
-				spin_unlock(&init_mm.page_table_lock);
-			}
-		}
+		/* For non-direct mapping, pages means nothing. */
+		pages++;
 	}
 
 	/* Call free_pte_table() in remove_pmd_table(). */
@@ -1197,6 +1169,9 @@  remove_pagetable(unsigned long start, unsigned long end, bool direct,
 void __ref vmemmap_free(unsigned long start, unsigned long end,
 		struct vmem_altmap *altmap)
 {
+	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
+	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
+
 	remove_pagetable(start, end, false, altmap);
 }
 
@@ -1556,6 +1531,9 @@  int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 {
 	int err;
 
+	VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
+	VM_BUG_ON(!IS_ALIGNED(end, PAGE_SIZE));
+
 	if (end - start < PAGES_PER_SECTION * sizeof(struct page))
 		err = vmemmap_populate_basepages(start, end, node, NULL);
 	else if (boot_cpu_has(X86_FEATURE_PSE))