diff mbox

[PATCHv4,7/7] arm64: add better page protections to arm64

Message ID 1414440752-9411-8-git-send-email-lauraa@codeaurora.org (mailing list archive)
State New, archived
Headers show

Commit Message

Laura Abbott Oct. 27, 2014, 8:12 p.m. UTC
Add page protections for arm64 similar to those in arm.
This is for security reasons to prevent certain classes
of exploits. The current method:

- Map all memory as either RWX or RW. We round to the nearest
  section to avoid creating page tables before everything is mapped
- Once everything is mapped, if either end of the RWX section should
  not be X, we split the PMD and remap as necessary
- When initmem is to be freed, we change the permissions back to
  RW (using stop machine if necessary to flush the TLB)
- If CONFIG_DEBUG_RODATA is set, the read only sections are set
  read only.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
---
v4: Combined the Kconfig options
---
 arch/arm64/Kconfig.debug            |  23 +++
 arch/arm64/include/asm/cacheflush.h |   4 +
 arch/arm64/kernel/vmlinux.lds.S     |  17 ++
 arch/arm64/mm/init.c                |   1 +
 arch/arm64/mm/mm.h                  |   2 +
 arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
 6 files changed, 314 insertions(+), 36 deletions(-)

Comments

Steve Capper Oct. 28, 2014, 11:29 a.m. UTC | #1
On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:
> Add page protections for arm64 similar to those in arm.
> This is for security reasons to prevent certain classes
> of exploits. The current method:
> 
> - Map all memory as either RWX or RW. We round to the nearest
>   section to avoid creating page tables before everything is mapped
> - Once everything is mapped, if either end of the RWX section should
>   not be X, we split the PMD and remap as necessary
> - When initmem is to be freed, we change the permissions back to
>   RW (using stop machine if necessary to flush the TLB)
> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>   read only.
> 
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>

Hi Laura,
I have some comments below.

> ---
> v4: Combined the Kconfig options
> ---
>  arch/arm64/Kconfig.debug            |  23 +++
>  arch/arm64/include/asm/cacheflush.h |   4 +
>  arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>  arch/arm64/mm/init.c                |   1 +
>  arch/arm64/mm/mm.h                  |   2 +
>  arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>  6 files changed, 314 insertions(+), 36 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
> index 0a12933..1b25015 100644
> --- a/arch/arm64/Kconfig.debug
> +++ b/arch/arm64/Kconfig.debug
> @@ -54,4 +54,27 @@ config DEBUG_SET_MODULE_RONX
>            against certain classes of kernel exploits.
>            If in doubt, say "N".
>  
> +config DEBUG_RODATA
> +	bool "Make kernel text and rodata read-only"
> +	help
> +	  If this is set, kernel text and rodata will be made read-only. This
> +	  is to help catch accidental or malicious attempts to change the
> +	  kernel's executable code. Additionally splits rodata from kernel
> +	  text so it can be made explicitly non-executable.
> +
> +          If in doubt, say Y
> +
> +config DEBUG_ALIGN_RODATA
> +	depends on DEBUG_RODATA
> +	bool "Align linker sections up to SECTION_SIZE"
> +	help
> +	  If this option is enabled, sections that may potentially be marked as
> +	  read only or non-executable will be aligned up to the section size of
> +	  the kernel. This prevents sections from being split into pages and
> +	  avoids a potential TLB penalty. The downside is an increase in
> +	  alignment and potentially wasted space. Turn on this option if
> +	  performance is more important than memory pressure.
> +
> +	  If in doubt, say N
> +
>  endmenu
> diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
> index 689b637..0014207 100644
> --- a/arch/arm64/include/asm/cacheflush.h
> +++ b/arch/arm64/include/asm/cacheflush.h
> @@ -152,4 +152,8 @@ int set_memory_ro(unsigned long addr, int numpages);
>  int set_memory_rw(unsigned long addr, int numpages);
>  int set_memory_x(unsigned long addr, int numpages);
>  int set_memory_nx(unsigned long addr, int numpages);
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void);
> +#endif
>  #endif
> diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
> index edf8715..83424ad 100644
> --- a/arch/arm64/kernel/vmlinux.lds.S
> +++ b/arch/arm64/kernel/vmlinux.lds.S
> @@ -8,6 +8,7 @@
>  #include <asm/thread_info.h>
>  #include <asm/memory.h>
>  #include <asm/page.h>
> +#include <asm/pgtable.h>
>  
>  #include "image.h"
>  
> @@ -54,6 +55,9 @@ SECTIONS
>  		_text = .;
>  		HEAD_TEXT
>  	}
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#endif
>  	.text : {			/* Real text segment		*/
>  		_stext = .;		/* Text and read-only data	*/
>  			__exception_text_start = .;
> @@ -70,19 +74,32 @@ SECTIONS
>  		*(.got)			/* Global offset table		*/
>  	}
>  
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#endif
>  	RO_DATA(PAGE_SIZE)
>  	EXCEPTION_TABLE(8)
>  	NOTES
>  	_etext = .;			/* End of text and rodata section */
>  
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +#else
>  	. = ALIGN(PAGE_SIZE);
> +#endif
>  	__init_begin = .;
>  
>  	INIT_TEXT_SECTION(8)
>  	.exit.text : {
>  		ARM_EXIT_KEEP(EXIT_TEXT)
>  	}
> +
> +#ifdef DEBUG_ALIGN_RODATA
> +	. = ALIGN(1<<SECTION_SHIFT);
> +	__init_data_begin = .;
> +#else
>  	. = ALIGN(16);
> +#endif
>  	.init.data : {
>  		INIT_DATA
>  		INIT_SETUP(16)
> diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
> index 494297c..61f44c7 100644
> --- a/arch/arm64/mm/init.c
> +++ b/arch/arm64/mm/init.c
> @@ -324,6 +324,7 @@ void __init mem_init(void)
>  
>  void free_initmem(void)
>  {
> +	fixup_init();
>  	free_initmem_default(0);
>  }
>  
> diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
> index d519f4f..82347d8 100644
> --- a/arch/arm64/mm/mm.h
> +++ b/arch/arm64/mm/mm.h
> @@ -1,2 +1,4 @@
>  extern void __init bootmem_init(void);
>  extern void __init arm64_swiotlb_init(void);
> +
> +void fixup_init(void);
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index e92f633..0428294 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -26,6 +26,7 @@
>  #include <linux/memblock.h>
>  #include <linux/fs.h>
>  #include <linux/io.h>
> +#include <linux/stop_machine.h>
>  
>  #include <asm/cputype.h>
>  #include <asm/fixmap.h>
> @@ -137,17 +138,55 @@ static void __init *early_alloc(unsigned long sz)
>  	return ptr;
>  }
>  
> -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
> +/*
> + * remap a PMD into pages
> + */
> +static noinline void __ref split_pmd(pmd_t *pmd, bool early)
> +{
> +	pte_t *pte, *start_pte;
> +	unsigned long pfn;
> +	int i = 0;
> +
> +	if (early)
> +		start_pte = pte = early_alloc(PTRS_PER_PTE*sizeof(pte_t));
> +	else
> +		start_pte = pte = (pte_t *)__get_free_page(PGALLOC_GFP);
> +
> +	BUG_ON(!pte);
> +
> +	pfn = pmd_pfn(*pmd);
> +
> +	do {
> +		/*
> +		 * Need to have the least restrictive permissions available
> +		 * permissions will be fixed up later
> +		 */
> +		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
> +		pfn++;
> +	} while (pte++, i++, i < PTRS_PER_PTE);
> +
> +
> +	__pmd_populate(pmd, __pa(start_pte), PMD_TYPE_TABLE);
> +	flush_tlb_all();
> +}
> +
> +static void __ref alloc_init_pte(pmd_t *pmd, unsigned long addr,
>  				  unsigned long end, unsigned long pfn,
> -				  pgprot_t prot)
> +				  pgprot_t prot, bool early)
>  {
>  	pte_t *pte;
>  
>  	if (pmd_none(*pmd)) {
> -		pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
> +		if (early)
> +			pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
> +		else
> +			pte = (pte_t *)__get_free_page(PGALLOC_GFP);
> +		BUG_ON(!pte);
>  		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
>  	}
> -	BUG_ON(pmd_bad(*pmd));
> +
> +	if (pmd_bad(*pmd))
> +		split_pmd(pmd, early);
>  
>  	pte = pte_offset_kernel(pmd, addr);
>  	do {
> @@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>  	} while (pte++, addr += PAGE_SIZE, addr != end);
>  }
>  
> -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
> +void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
> +{
> +	unsigned long pfn = pud_pfn(*old_pud);
> +        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
> +                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
> +                              PMD_SECT_VALID;
> +	pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);

This looks a little fragile, if I've understood things correctly then
all we want to do is create a set of pmds with the same pgprot_t as the
original pud?

In that case it would probably be easier to simply mask out the pfn
from our pud to create the pgprot rather than mask in a set of
attributes (that may change in future)?

> +	int i = 0;
> +
> +	do {
> +		set_pmd(pmd, pfn_pmd(pfn, prot));
> +		pfn++;
> +	} while (pmd++, i++, i < PTRS_PER_PMD);
> +}
> +
> +static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
>  				  unsigned long end, phys_addr_t phys,
> -				  int map_io)
> +				  pgprot_t sect_prot, pgprot_t pte_prot,
> +				  bool early, int map_io)
>  {
>  	pmd_t *pmd;
>  	unsigned long next;
> -	pmdval_t prot_sect;
> -	pgprot_t prot_pte;
>  
>  	if (map_io) {
> -		prot_sect = PROT_SECT_DEVICE_nGnRE;
> -		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
> -	} else {
> -		prot_sect = PROT_SECT_NORMAL_EXEC;
> -		prot_pte = PAGE_KERNEL_EXEC;
> +		sect_prot = PROT_SECT_DEVICE_nGnRE;
> +		pte_prot = __pgprot(PROT_DEVICE_nGnRE);
>  	}

I think we should wipe out map_io from all these functions as it's
causing too much complexity. It's enough to have just sect_prot and
pte_prot. I posted some similar feedback to Ard:
http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html

>  
>  	/*
>  	 * Check for initial section mappings in the pgd/pud and remove them.
>  	 */
>  	if (pud_none(*pud) || pud_bad(*pud)) {
> -		pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
> +		if (early)
> +			pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
> +		else
> +			pmd = pmd_alloc_one(&init_mm, addr);
> +		BUG_ON(!pmd);
> +		if (pud_sect(*pud)) {
> +			/*
> +			 * need to have the 1G of mappings continue to be
> +			 * present
> +			 */
> +			split_pud(pud, pmd);
> +		}
>  		pud_populate(&init_mm, pud, pmd);
> +		flush_tlb_all();
>  	}
>  
>  	pmd = pmd_offset(pud, addr);
> @@ -186,8 +248,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>  		next = pmd_addr_end(addr, end);
>  		/* try section mapping first */
>  		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
> -			pmd_t old_pmd =*pmd;
> -			set_pmd(pmd, __pmd(phys | prot_sect));
> +			pmd_t old_pmd = *pmd;
> +			set_pmd(pmd, __pmd(phys | sect_prot));
>  			/*
>  			 * Check for previous table entries created during
>  			 * boot (__create_page_tables) and flush them.
> @@ -196,15 +258,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>  				flush_tlb_all();
>  		} else {
>  			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
> -				       prot_pte);
> +					pte_prot, early);
>  		}
>  		phys += next - addr;
>  	} while (pmd++, addr = next, addr != end);
>  }
>  
> -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
> +static void __ref alloc_init_pud(pgd_t *pgd, unsigned long addr,
>  				  unsigned long end, unsigned long phys,
> -				  int map_io)
> +				  pgprot_t sect_prot, pgprot_t pte_prot,
> +				  bool early, int map_io)
>  {
>  	pud_t *pud;
>  	unsigned long next;
> @@ -222,10 +285,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>  		/*
>  		 * For 4K granule only, attempt to put down a 1GB block
>  		 */
> -		if (!map_io && (PAGE_SHIFT == 12) &&
> +		if (!map_io && early && (PAGE_SHIFT == 12) &&
>  		    ((addr | next | phys) & ~PUD_MASK) == 0) {
>  			pud_t old_pud = *pud;
> -			set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
> +			set_pud(pud, __pud(phys | sect_prot));
>  
>  			/*
>  			 * If we have an old value for a pud, it will
> @@ -240,7 +303,8 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>  				flush_tlb_all();
>  			}
>  		} else {
> -			alloc_init_pmd(pud, addr, next, phys, map_io);
> +			alloc_init_pmd(pud, addr, next, phys, sect_prot,
> +					pte_prot, early, map_io);
>  		}
>  		phys += next - addr;
>  	} while (pud++, addr = next, addr != end);
> @@ -250,9 +314,11 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>   * Create the page directory entries and any necessary page tables for the
>   * mapping specified by 'md'.
>   */
> -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
> -				    unsigned long virt, phys_addr_t size,
> -				    int map_io)
> +static void __ref __create_mapping(pgd_t *pgd, phys_addr_t phys,
> +				  unsigned long virt,
> +				  phys_addr_t size,
> +				  pgprot_t sect_prot, pgprot_t pte_prot,
> +				  int map_io, bool early)
>  {
>  	unsigned long addr, length, end, next;
>  
> @@ -262,31 +328,140 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
>  	end = addr + length;
>  	do {
>  		next = pgd_addr_end(addr, end);
> -		alloc_init_pud(pgd, addr, next, phys, map_io);
> +		alloc_init_pud(pgd, addr, next, phys, sect_prot, pte_prot,
> +				early, map_io);
>  		phys += next - addr;
>  	} while (pgd++, addr = next, addr != end);
>  }
>  
> -static void __init create_mapping(phys_addr_t phys, unsigned long virt,
> -				  phys_addr_t size)
> +void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
> +{
> +	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
> +		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
> +		return;
> +	}
> +	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
> +			 addr, addr, size, PROT_SECT_NORMAL_EXEC,
> +			 PAGE_KERNEL_EXEC, map_io, false);
> +}
> +
> +static inline pmd_t *pmd_off_k(unsigned long virt)
> +{
> +	return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
> +}
> +
> +#ifdef CONFIG_EARLY_PRINTK
> +/*
> + * Create an early I/O mapping using the pgd/pmd entries already populated
> + * in head.S as this function is called too early to allocated any memory. The
> + * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
> + */
> +void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
> +{
> +	unsigned long size, mask;
> +	bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
> +	pgd_t *pgd;
> +	pud_t *pud;
> +	pmd_t *pmd;
> +	pte_t *pte;
> +
> +	/*
> +	 * No early pte entries with !ARM64_64K_PAGES configuration, so using
> +	 * sections (pmd).
> +	 */
> +	size = page64k ? PAGE_SIZE : SECTION_SIZE;
> +	mask = ~(size - 1);
> +
> +	pgd = pgd_offset_k(virt);
> +	pud = pud_offset(pgd, virt);
> +	if (pud_none(*pud))
> +		return NULL;
> +	pmd = pmd_offset(pud, virt);
> +
> +	if (page64k) {
> +		if (pmd_none(*pmd))
> +			return NULL;
> +		pte = pte_offset_kernel(pmd, virt);
> +		set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
> +	} else {
> +		set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
> +	}
> +
> +	return (void __iomem *)((virt & mask) + (phys & ~mask));
> +}
> +#endif
> +
> +static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
> +				  phys_addr_t size,
> +				  pgprot_t sect_prot, pgprot_t pte_prot)
>  {
>  	if (virt < VMALLOC_START) {
>  		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
>  			&phys, virt);
>  		return;
>  	}
> -	__create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
> +
> +	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
> +				size, sect_prot, pte_prot, 0, true);
>  }
>  
> -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
> +static void __ref create_mapping_late(phys_addr_t phys, unsigned long virt,
> +				  phys_addr_t size,
> +				  pgprot_t sect_prot, pgprot_t pte_prot)
>  {
> -	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
> -		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
> +	if (virt < VMALLOC_START) {
> +		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
> +			&phys, virt);
>  		return;
>  	}
> -	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
> -			 addr, addr, size, map_io);
> +
> +	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
> +				size, sect_prot, pte_prot, 0, false);
> +}
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
> +{
> +	/*
> +	 * Set up the executable regions using the existing section mappings
> +	 * for now. This will get more fine grained later once all memory
> +	 * is mapped
> +	 */
> +	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
> +	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
> +
> +	if (end < kernel_x_start) {
> +		create_mapping(start, __phys_to_virt(start),
> +			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
> +	} else if (start >= kernel_x_end) {
> +		create_mapping(start, __phys_to_virt(start),
> +			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
> +	} else {
> +		if (start < kernel_x_start)
> +			create_mapping(start, __phys_to_virt(start),
> +				kernel_x_start - start,
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +		create_mapping(kernel_x_start,
> +				__phys_to_virt(kernel_x_start),
> +				kernel_x_end - kernel_x_start,
> +				PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
> +		if (kernel_x_end < end)
> +			create_mapping(kernel_x_end,
> +				__phys_to_virt(kernel_x_end),
> +				end - kernel_x_end,
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +	}
> +
> +}
> +#else
> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
> +{
> +	create_mapping(start, __phys_to_virt(start), end - start,
> +			PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
>  }
> +#endif
>  
>  static void __init map_mem(void)
>  {
> @@ -328,14 +503,69 @@ static void __init map_mem(void)
>  			memblock_set_current_limit(limit);
>  		}
>  #endif
> -
> -		create_mapping(start, __phys_to_virt(start), end - start);
> +		__map_memblock(start, end);
>  	}
>  
>  	/* Limit no longer required. */
>  	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
>  }
>  
> +void __init fixup_executable(void)
> +{
> +#ifdef CONFIG_DEBUG_RODATA
> +	/* now that we are actually fully mapped, make the start/end more fine grained */
> +	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
> +		unsigned long aligned_start = round_down(__pa(_stext),
> +							SECTION_SIZE);
> +
> +		create_mapping(aligned_start, __phys_to_virt(aligned_start),
> +				__pa(_stext) - aligned_start,
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +	}
> +
> +	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
> +		unsigned long aligned_end = round_up(__pa(__init_end),
> +							SECTION_SIZE);
> +		create_mapping(__pa(__init_end), (unsigned long)__init_end,
> +				aligned_end - __pa(__init_end),
> +				PROT_SECT_NORMAL,
> +				PAGE_KERNEL);
> +	}
> +#endif
> +}
> +
> +#ifdef CONFIG_DEBUG_RODATA
> +void mark_rodata_ro(void)
> +{
> +	create_mapping_late(__pa(_stext), (unsigned long)_stext,
> +				(unsigned long)_etext - (unsigned long)_stext,
> +				PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY,
> +				PAGE_KERNEL_EXEC | PTE_RDONLY);
> +
> +}
> +#endif
> +
> +static int __flush_mappings(void *unused)
> +{
> +	flush_tlb_kernel_range((unsigned long)__init_begin,
> +				(unsigned long)__init_end);
> +	return 0;
> +}
> +
> +void __ref fixup_init(void)
> +{
> +	phys_addr_t start = __pa(__init_begin);
> +	phys_addr_t end = __pa(__init_end);
> +
> +	create_mapping_late(start, (unsigned long)__init_begin,
> +			end - start,
> +			PROT_SECT_NORMAL,
> +			PAGE_KERNEL);
> +	if (!IS_ALIGNED(start, SECTION_SIZE) || !IS_ALIGNED(end, SECTION_SIZE))
> +		stop_machine(__flush_mappings, NULL, NULL);
> +}
> +
>  /*
>   * paging_init() sets up the page tables, initialises the zone memory
>   * maps and sets up the zero page.
> @@ -345,6 +575,7 @@ void __init paging_init(void)
>  	void *zero_page;
>  
>  	map_mem();
> +	fixup_executable();
>  
>  	/*
>  	 * Finally flush the caches and tlb to ensure that we're in a
> -- 
> Qualcomm Innovation Center, Inc.
> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>
Ard Biesheuvel Oct. 28, 2014, 11:44 a.m. UTC | #2
On 28 October 2014 12:29, Steve Capper <steve.capper@linaro.org> wrote:
> On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:
>> Add page protections for arm64 similar to those in arm.
>> This is for security reasons to prevent certain classes
>> of exploits. The current method:
>>
>> - Map all memory as either RWX or RW. We round to the nearest
>>   section to avoid creating page tables before everything is mapped
>> - Once everything is mapped, if either end of the RWX section should
>>   not be X, we split the PMD and remap as necessary
>> - When initmem is to be freed, we change the permissions back to
>>   RW (using stop machine if necessary to flush the TLB)
>> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>>   read only.
>>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>
> Hi Laura,
> I have some comments below.
>
>> ---
>> v4: Combined the Kconfig options
>> ---
>>  arch/arm64/Kconfig.debug            |  23 +++
>>  arch/arm64/include/asm/cacheflush.h |   4 +
>>  arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>>  arch/arm64/mm/init.c                |   1 +
>>  arch/arm64/mm/mm.h                  |   2 +
>>  arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>>  6 files changed, 314 insertions(+), 36 deletions(-)
>>
[...]
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index e92f633..0428294 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -26,6 +26,7 @@
>>  #include <linux/memblock.h>
>>  #include <linux/fs.h>
>>  #include <linux/io.h>
>> +#include <linux/stop_machine.h>
>>
>>  #include <asm/cputype.h>
>>  #include <asm/fixmap.h>
>> @@ -137,17 +138,55 @@ static void __init *early_alloc(unsigned long sz)
>>       return ptr;
>>  }
>>
>> -static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>> +/*
>> + * remap a PMD into pages
>> + */
>> +static noinline void __ref split_pmd(pmd_t *pmd, bool early)
>> +{
>> +     pte_t *pte, *start_pte;
>> +     unsigned long pfn;
>> +     int i = 0;
>> +
>> +     if (early)
>> +             start_pte = pte = early_alloc(PTRS_PER_PTE*sizeof(pte_t));
>> +     else
>> +             start_pte = pte = (pte_t *)__get_free_page(PGALLOC_GFP);
>> +
>> +     BUG_ON(!pte);
>> +
>> +     pfn = pmd_pfn(*pmd);
>> +
>> +     do {
>> +             /*
>> +              * Need to have the least restrictive permissions available
>> +              * permissions will be fixed up later
>> +              */
>> +             set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
>> +             pfn++;
>> +     } while (pte++, i++, i < PTRS_PER_PTE);
>> +
>> +
>> +     __pmd_populate(pmd, __pa(start_pte), PMD_TYPE_TABLE);
>> +     flush_tlb_all();
>> +}
>> +
>> +static void __ref alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>                                 unsigned long end, unsigned long pfn,
>> -                               pgprot_t prot)
>> +                               pgprot_t prot, bool early)
>>  {
>>       pte_t *pte;
>>
>>       if (pmd_none(*pmd)) {
>> -             pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
>> +             if (early)
>> +                     pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
>> +             else
>> +                     pte = (pte_t *)__get_free_page(PGALLOC_GFP);
>> +             BUG_ON(!pte);
>>               __pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
>>       }
>> -     BUG_ON(pmd_bad(*pmd));
>> +
>> +     if (pmd_bad(*pmd))
>> +             split_pmd(pmd, early);
>>
>>       pte = pte_offset_kernel(pmd, addr);
>>       do {
>> @@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>       } while (pte++, addr += PAGE_SIZE, addr != end);
>>  }
>>
>> -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>> +void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
>> +{
>> +     unsigned long pfn = pud_pfn(*old_pud);
>> +        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
>> +                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
>> +                              PMD_SECT_VALID;
>> +     pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);
>
> This looks a little fragile, if I've understood things correctly then
> all we want to do is create a set of pmds with the same pgprot_t as the
> original pud?
>
> In that case it would probably be easier to simply mask out the pfn
> from our pud to create the pgprot rather than mask in a set of
> attributes (that may change in future)?
>
>> +     int i = 0;
>> +
>> +     do {
>> +             set_pmd(pmd, pfn_pmd(pfn, prot));
>> +             pfn++;
>> +     } while (pmd++, i++, i < PTRS_PER_PMD);
>> +}
>> +
>> +static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
>>                                 unsigned long end, phys_addr_t phys,
>> -                               int map_io)
>> +                               pgprot_t sect_prot, pgprot_t pte_prot,
>> +                               bool early, int map_io)
>>  {
>>       pmd_t *pmd;
>>       unsigned long next;
>> -     pmdval_t prot_sect;
>> -     pgprot_t prot_pte;
>>
>>       if (map_io) {
>> -             prot_sect = PROT_SECT_DEVICE_nGnRE;
>> -             prot_pte = __pgprot(PROT_DEVICE_nGnRE);
>> -     } else {
>> -             prot_sect = PROT_SECT_NORMAL_EXEC;
>> -             prot_pte = PAGE_KERNEL_EXEC;
>> +             sect_prot = PROT_SECT_DEVICE_nGnRE;
>> +             pte_prot = __pgprot(PROT_DEVICE_nGnRE);
>>       }
>
> I think we should wipe out map_io from all these functions as it's
> causing too much complexity. It's enough to have just sect_prot and
> pte_prot. I posted some similar feedback to Ard:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html
>

I do agree. But could we have a single enum that maps onto {sect_prot,
pte_prot} tuples rather than having to pass both values everywhere we
call any of these functions?
I.e., { MMU_PROT_DEFAULT, MMU_PROT_READONLY, MMU_PROT_READWRITE,
MMU_PROT_READEXEC, MMU_PROT_DEVICE }, with the mapping local to mmu.c?

>>
>>       /*
>>        * Check for initial section mappings in the pgd/pud and remove them.
>>        */
>>       if (pud_none(*pud) || pud_bad(*pud)) {
>> -             pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
>> +             if (early)
>> +                     pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
>> +             else
>> +                     pmd = pmd_alloc_one(&init_mm, addr);
>> +             BUG_ON(!pmd);
>> +             if (pud_sect(*pud)) {
>> +                     /*
>> +                      * need to have the 1G of mappings continue to be
>> +                      * present
>> +                      */
>> +                     split_pud(pud, pmd);
>> +             }
>>               pud_populate(&init_mm, pud, pmd);
>> +             flush_tlb_all();
>>       }
>>
>>       pmd = pmd_offset(pud, addr);
>> @@ -186,8 +248,8 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>>               next = pmd_addr_end(addr, end);
>>               /* try section mapping first */
>>               if (((addr | next | phys) & ~SECTION_MASK) == 0) {
>> -                     pmd_t old_pmd =*pmd;
>> -                     set_pmd(pmd, __pmd(phys | prot_sect));
>> +                     pmd_t old_pmd = *pmd;
>> +                     set_pmd(pmd, __pmd(phys | sect_prot));
>>                       /*
>>                        * Check for previous table entries created during
>>                        * boot (__create_page_tables) and flush them.
>> @@ -196,15 +258,16 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>>                               flush_tlb_all();
>>               } else {
>>                       alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
>> -                                    prot_pte);
>> +                                     pte_prot, early);
>>               }
>>               phys += next - addr;
>>       } while (pmd++, addr = next, addr != end);
>>  }
>>
>> -static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>> +static void __ref alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>                                 unsigned long end, unsigned long phys,
>> -                               int map_io)
>> +                               pgprot_t sect_prot, pgprot_t pte_prot,
>> +                               bool early, int map_io)
>>  {
>>       pud_t *pud;
>>       unsigned long next;
>> @@ -222,10 +285,10 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>               /*
>>                * For 4K granule only, attempt to put down a 1GB block
>>                */
>> -             if (!map_io && (PAGE_SHIFT == 12) &&
>> +             if (!map_io && early && (PAGE_SHIFT == 12) &&
>>                   ((addr | next | phys) & ~PUD_MASK) == 0) {
>>                       pud_t old_pud = *pud;
>> -                     set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
>> +                     set_pud(pud, __pud(phys | sect_prot));
>>
>>                       /*
>>                        * If we have an old value for a pud, it will
>> @@ -240,7 +303,8 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>                               flush_tlb_all();
>>                       }
>>               } else {
>> -                     alloc_init_pmd(pud, addr, next, phys, map_io);
>> +                     alloc_init_pmd(pud, addr, next, phys, sect_prot,
>> +                                     pte_prot, early, map_io);
>>               }
>>               phys += next - addr;
>>       } while (pud++, addr = next, addr != end);
>> @@ -250,9 +314,11 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
>>   * Create the page directory entries and any necessary page tables for the
>>   * mapping specified by 'md'.
>>   */
>> -static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
>> -                                 unsigned long virt, phys_addr_t size,
>> -                                 int map_io)
>> +static void __ref __create_mapping(pgd_t *pgd, phys_addr_t phys,
>> +                               unsigned long virt,
>> +                               phys_addr_t size,
>> +                               pgprot_t sect_prot, pgprot_t pte_prot,
>> +                               int map_io, bool early)
>>  {
>>       unsigned long addr, length, end, next;
>>
>> @@ -262,31 +328,140 @@ static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
>>       end = addr + length;
>>       do {
>>               next = pgd_addr_end(addr, end);
>> -             alloc_init_pud(pgd, addr, next, phys, map_io);
>> +             alloc_init_pud(pgd, addr, next, phys, sect_prot, pte_prot,
>> +                             early, map_io);
>>               phys += next - addr;
>>       } while (pgd++, addr = next, addr != end);
>>  }
>>
>> -static void __init create_mapping(phys_addr_t phys, unsigned long virt,
>> -                               phys_addr_t size)
>> +void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
>> +{
>> +     if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
>> +             pr_warn("BUG: not creating id mapping for %pa\n", &addr);
>> +             return;
>> +     }
>> +     __create_mapping(&idmap_pg_dir[pgd_index(addr)],
>> +                      addr, addr, size, PROT_SECT_NORMAL_EXEC,
>> +                      PAGE_KERNEL_EXEC, map_io, false);
>> +}
>> +
>> +static inline pmd_t *pmd_off_k(unsigned long virt)
>> +{
>> +     return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
>> +}
>> +
>> +#ifdef CONFIG_EARLY_PRINTK
>> +/*
>> + * Create an early I/O mapping using the pgd/pmd entries already populated
>> + * in head.S as this function is called too early to allocated any memory. The
>> + * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
>> + */
>> +void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
>> +{
>> +     unsigned long size, mask;
>> +     bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
>> +     pgd_t *pgd;
>> +     pud_t *pud;
>> +     pmd_t *pmd;
>> +     pte_t *pte;
>> +
>> +     /*
>> +      * No early pte entries with !ARM64_64K_PAGES configuration, so using
>> +      * sections (pmd).
>> +      */
>> +     size = page64k ? PAGE_SIZE : SECTION_SIZE;
>> +     mask = ~(size - 1);
>> +
>> +     pgd = pgd_offset_k(virt);
>> +     pud = pud_offset(pgd, virt);
>> +     if (pud_none(*pud))
>> +             return NULL;
>> +     pmd = pmd_offset(pud, virt);
>> +
>> +     if (page64k) {
>> +             if (pmd_none(*pmd))
>> +                     return NULL;
>> +             pte = pte_offset_kernel(pmd, virt);
>> +             set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
>> +     } else {
>> +             set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
>> +     }
>> +
>> +     return (void __iomem *)((virt & mask) + (phys & ~mask));
>> +}
>> +#endif
>> +
>> +static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
>> +                               phys_addr_t size,
>> +                               pgprot_t sect_prot, pgprot_t pte_prot)
>>  {
>>       if (virt < VMALLOC_START) {
>>               pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
>>                       &phys, virt);
>>               return;
>>       }
>> -     __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
>> +
>> +     return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
>> +                             size, sect_prot, pte_prot, 0, true);
>>  }
>>
>> -void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
>> +static void __ref create_mapping_late(phys_addr_t phys, unsigned long virt,
>> +                               phys_addr_t size,
>> +                               pgprot_t sect_prot, pgprot_t pte_prot)
>>  {
>> -     if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
>> -             pr_warn("BUG: not creating id mapping for %pa\n", &addr);
>> +     if (virt < VMALLOC_START) {
>> +             pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
>> +                     &phys, virt);
>>               return;
>>       }
>> -     __create_mapping(&idmap_pg_dir[pgd_index(addr)],
>> -                      addr, addr, size, map_io);
>> +
>> +     return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
>> +                             size, sect_prot, pte_prot, 0, false);
>> +}
>> +
>> +#ifdef CONFIG_DEBUG_RODATA
>> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
>> +{
>> +     /*
>> +      * Set up the executable regions using the existing section mappings
>> +      * for now. This will get more fine grained later once all memory
>> +      * is mapped
>> +      */
>> +     unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
>> +     unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
>> +
>> +     if (end < kernel_x_start) {
>> +             create_mapping(start, __phys_to_virt(start),
>> +                     end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
>> +     } else if (start >= kernel_x_end) {
>> +             create_mapping(start, __phys_to_virt(start),
>> +                     end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
>> +     } else {
>> +             if (start < kernel_x_start)
>> +                     create_mapping(start, __phys_to_virt(start),
>> +                             kernel_x_start - start,
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +             create_mapping(kernel_x_start,
>> +                             __phys_to_virt(kernel_x_start),
>> +                             kernel_x_end - kernel_x_start,
>> +                             PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
>> +             if (kernel_x_end < end)
>> +                     create_mapping(kernel_x_end,
>> +                             __phys_to_virt(kernel_x_end),
>> +                             end - kernel_x_end,
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +     }
>> +
>> +}
>> +#else
>> +static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
>> +{
>> +     create_mapping(start, __phys_to_virt(start), end - start,
>> +                     PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
>>  }
>> +#endif
>>
>>  static void __init map_mem(void)
>>  {
>> @@ -328,14 +503,69 @@ static void __init map_mem(void)
>>                       memblock_set_current_limit(limit);
>>               }
>>  #endif
>> -
>> -             create_mapping(start, __phys_to_virt(start), end - start);
>> +             __map_memblock(start, end);
>>       }
>>
>>       /* Limit no longer required. */
>>       memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
>>  }
>>
>> +void __init fixup_executable(void)
>> +{
>> +#ifdef CONFIG_DEBUG_RODATA
>> +     /* now that we are actually fully mapped, make the start/end more fine grained */
>> +     if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
>> +             unsigned long aligned_start = round_down(__pa(_stext),
>> +                                                     SECTION_SIZE);
>> +
>> +             create_mapping(aligned_start, __phys_to_virt(aligned_start),
>> +                             __pa(_stext) - aligned_start,
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +     }
>> +
>> +     if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
>> +             unsigned long aligned_end = round_up(__pa(__init_end),
>> +                                                     SECTION_SIZE);
>> +             create_mapping(__pa(__init_end), (unsigned long)__init_end,
>> +                             aligned_end - __pa(__init_end),
>> +                             PROT_SECT_NORMAL,
>> +                             PAGE_KERNEL);
>> +     }
>> +#endif
>> +}
>> +
>> +#ifdef CONFIG_DEBUG_RODATA
>> +void mark_rodata_ro(void)
>> +{
>> +     create_mapping_late(__pa(_stext), (unsigned long)_stext,
>> +                             (unsigned long)_etext - (unsigned long)_stext,
>> +                             PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY,
>> +                             PAGE_KERNEL_EXEC | PTE_RDONLY);
>> +
>> +}
>> +#endif
>> +
>> +static int __flush_mappings(void *unused)
>> +{
>> +     flush_tlb_kernel_range((unsigned long)__init_begin,
>> +                             (unsigned long)__init_end);
>> +     return 0;
>> +}
>> +
>> +void __ref fixup_init(void)
>> +{
>> +     phys_addr_t start = __pa(__init_begin);
>> +     phys_addr_t end = __pa(__init_end);
>> +
>> +     create_mapping_late(start, (unsigned long)__init_begin,
>> +                     end - start,
>> +                     PROT_SECT_NORMAL,
>> +                     PAGE_KERNEL);
>> +     if (!IS_ALIGNED(start, SECTION_SIZE) || !IS_ALIGNED(end, SECTION_SIZE))
>> +             stop_machine(__flush_mappings, NULL, NULL);
>> +}
>> +
>>  /*
>>   * paging_init() sets up the page tables, initialises the zone memory
>>   * maps and sets up the zero page.
>> @@ -345,6 +575,7 @@ void __init paging_init(void)
>>       void *zero_page;
>>
>>       map_mem();
>> +     fixup_executable();
>>
>>       /*
>>        * Finally flush the caches and tlb to ensure that we're in a
>> --
>> Qualcomm Innovation Center, Inc.
>> Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux Foundation Collaborative Project
>>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Steve Capper Oct. 28, 2014, 1:40 p.m. UTC | #3
On 28 October 2014 11:44, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 28 October 2014 12:29, Steve Capper <steve.capper@linaro.org> wrote:
>> On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:

[...]

>>
>> I think we should wipe out map_io from all these functions as it's
>> causing too much complexity. It's enough to have just sect_prot and
>> pte_prot. I posted some similar feedback to Ard:
>> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html
>>
>
> I do agree. But could we have a single enum that maps onto {sect_prot,
> pte_prot} tuples rather than having to pass both values everywhere we
> call any of these functions?
> I.e., { MMU_PROT_DEFAULT, MMU_PROT_READONLY, MMU_PROT_READWRITE,
> MMU_PROT_READEXEC, MMU_PROT_DEVICE }, with the mapping local to mmu.c?
>

arch/arm has a mem_types array, I'm not sure how applicable that would
be for arm64
One could also be (slightly) filthy and promote a pte_prot into a
sect_prot via manipulation of the lower 2 bits (see for example
pte_mkhuge).

Basically as long as the mapping logic doesn't make decisions about
which pgprots to use, then I'm happy :-).

Cheers,
--
Steve
Mark Rutland Oct. 28, 2014, 2:20 p.m. UTC | #4
[resending due to previous header destruction]

Hi Laura,

On Mon, Oct 27, 2014 at 08:12:32PM +0000, Laura Abbott wrote:
> Add page protections for arm64 similar to those in arm.
> This is for security reasons to prevent certain classes
> of exploits. The current method:
>
> - Map all memory as either RWX or RW. We round to the nearest
>   section to avoid creating page tables before everything is mapped
> - Once everything is mapped, if either end of the RWX section should
>   not be X, we split the PMD and remap as necessary
> - When initmem is to be freed, we change the permissions back to
>   RW (using stop machine if necessary to flush the TLB)
> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>   read only.
>
> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
> ---
> v4: Combined the Kconfig options
> ---
>  arch/arm64/Kconfig.debug            |  23 +++
>  arch/arm64/include/asm/cacheflush.h |   4 +
>  arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>  arch/arm64/mm/init.c                |   1 +
>  arch/arm64/mm/mm.h                  |   2 +
>  arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>  6 files changed, 314 insertions(+), 36 deletions(-)

With this patch applied to v3.18-rc2, my board to blows up at boot when
using UEFI (without DEBUG_RODATA selected):

---->8----
EFI stub: Booting Linux Kernel...
Initializing cgroup subsys cpu
Linux version 3.18.0-rc2+ (mark@leverpostej) (gcc version 4.9.2 20140904 (prerelease) (crosstool-NG linaro-1.13.1-4.9-2014.09 - Linaro GCC 4.9-2014.09) ) #112 SMP PREEMPT Tue Oct 28 13:58:41 GMT 2014
CPU: AArch64 Processor [410fd030] revision 0
Detected VIPT I-cache on CPU0
bootconsole [uart0] enabled
efi: Getting EFI parameters from FDT:
EFI v2.40 by ARM Juno EFI Oct  7 2014 15:05:42
efi:  ACPI=0xfebdc000  ACPI 2.0=0xfebdc014
cma: Reserved 16 MiB at fd800000
BUG: failure at arch/arm64/mm/mmu.c:234/alloc_init_pmd()!
Kernel panic - not syncing: BUG!
CPU: 0 PID: 0 Comm: swapper Not tainted 3.18.0-rc2+ #112
Call trace:
[<ffffffc000087ec8>] dump_backtrace+0x0/0x124
[<ffffffc000087ffc>] show_stack+0x10/0x1c
[<ffffffc0004ebd58>] dump_stack+0x74/0xb8
[<ffffffc0004eb018>] panic+0xe0/0x220
[<ffffffc0004e8e08>] alloc_init_pmd+0x1cc/0x1dc
[<ffffffc0004e8e3c>] alloc_init_pud+0x24/0x6c
[<ffffffc0004e8f54>] __create_mapping+0xd0/0xf0
[<ffffffc00069a0a0>] create_id_mapping+0x5c/0x68
[<ffffffc00069964c>] efi_idmap_init+0x54/0xd8
[<ffffffc0006978a8>] setup_arch+0x408/0x5c0
[<ffffffc00069566c>] start_kernel+0x94/0x3a0
---[ end Kernel panic - not syncing: BUG!
---->8----

I've not yet figured out precisely why. I haven't tried a !EFI boot
because of the way my board is set up at the moment.

Mark.
Laura Abbott Oct. 30, 2014, 5:12 p.m. UTC | #5
On 10/28/2014 4:29 AM, Steve Capper wrote:
> On Mon, Oct 27, 2014 at 01:12:32PM -0700, Laura Abbott wrote:
>> Add page protections for arm64 similar to those in arm.
>> This is for security reasons to prevent certain classes
>> of exploits. The current method:
>>
>> - Map all memory as either RWX or RW. We round to the nearest
>>    section to avoid creating page tables before everything is mapped
>> - Once everything is mapped, if either end of the RWX section should
>>    not be X, we split the PMD and remap as necessary
>> - When initmem is to be freed, we change the permissions back to
>>    RW (using stop machine if necessary to flush the TLB)
>> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>>    read only.
>>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>
> Hi Laura,
> I have some comments below.
>
...
>> @@ -156,29 +195,52 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
>>   	} while (pte++, addr += PAGE_SIZE, addr != end);
>>   }
>>
>> -static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
>> +void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
>> +{
>> +	unsigned long pfn = pud_pfn(*old_pud);
>> +        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
>> +                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
>> +                              PMD_SECT_VALID;
>> +	pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);
>
> This looks a little fragile, if I've understood things correctly then
> all we want to do is create a set of pmds with the same pgprot_t as the
> original pud?
>
> In that case it would probably be easier to simply mask out the pfn
> from our pud to create the pgprot rather than mask in a set of
> attributes (that may change in future)?
>

Good suggestion. I'll check that in the next version.

>> +	int i = 0;
>> +
>> +	do {
>> +		set_pmd(pmd, pfn_pmd(pfn, prot));
>> +		pfn++;
>> +	} while (pmd++, i++, i < PTRS_PER_PMD);
>> +}
>> +
>> +static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
>>   				  unsigned long end, phys_addr_t phys,
>> -				  int map_io)
>> +				  pgprot_t sect_prot, pgprot_t pte_prot,
>> +				  bool early, int map_io)
>>   {
>>   	pmd_t *pmd;
>>   	unsigned long next;
>> -	pmdval_t prot_sect;
>> -	pgprot_t prot_pte;
>>
>>   	if (map_io) {
>> -		prot_sect = PROT_SECT_DEVICE_nGnRE;
>> -		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
>> -	} else {
>> -		prot_sect = PROT_SECT_NORMAL_EXEC;
>> -		prot_pte = PAGE_KERNEL_EXEC;
>> +		sect_prot = PROT_SECT_DEVICE_nGnRE;
>> +		pte_prot = __pgprot(PROT_DEVICE_nGnRE);
>>   	}
>
> I think we should wipe out map_io from all these functions as it's
> causing too much complexity. It's enough to have just sect_prot and
> pte_prot. I posted some similar feedback to Ard:
> http://lists.infradead.org/pipermail/linux-arm-kernel/2014-October/296918.html
>

Sounds fine. I think my series would conflict with Ard's so I should
give that series a review and see if it makes sense to base this
series off of that series. Part of this work might also be relevent
for adding DEBUG_PAGEALLOC for arm64 so I might split that out into
a separate patch as well.

Thanks,
Laura
Laura Abbott Oct. 30, 2014, 5:17 p.m. UTC | #6
On 10/28/2014 7:20 AM, Mark Rutland wrote:
> [resending due to previous header destruction]
>
> Hi Laura,
>
> On Mon, Oct 27, 2014 at 08:12:32PM +0000, Laura Abbott wrote:
>> Add page protections for arm64 similar to those in arm.
>> This is for security reasons to prevent certain classes
>> of exploits. The current method:
>>
>> - Map all memory as either RWX or RW. We round to the nearest
>>    section to avoid creating page tables before everything is mapped
>> - Once everything is mapped, if either end of the RWX section should
>>    not be X, we split the PMD and remap as necessary
>> - When initmem is to be freed, we change the permissions back to
>>    RW (using stop machine if necessary to flush the TLB)
>> - If CONFIG_DEBUG_RODATA is set, the read only sections are set
>>    read only.
>>
>> Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
>> ---
>> v4: Combined the Kconfig options
>> ---
>>   arch/arm64/Kconfig.debug            |  23 +++
>>   arch/arm64/include/asm/cacheflush.h |   4 +
>>   arch/arm64/kernel/vmlinux.lds.S     |  17 ++
>>   arch/arm64/mm/init.c                |   1 +
>>   arch/arm64/mm/mm.h                  |   2 +
>>   arch/arm64/mm/mmu.c                 | 303 +++++++++++++++++++++++++++++++-----
>>   6 files changed, 314 insertions(+), 36 deletions(-)
>
> With this patch applied to v3.18-rc2, my board to blows up at boot when
> using UEFI (without DEBUG_RODATA selected):
>
> ---->8----
> EFI stub: Booting Linux Kernel...
> Initializing cgroup subsys cpu
> Linux version 3.18.0-rc2+ (mark@leverpostej) (gcc version 4.9.2 20140904 (prerelease) (crosstool-NG linaro-1.13.1-4.9-2014.09 - Linaro GCC 4.9-2014.09) ) #112 SMP PREEMPT Tue Oct 28 13:58:41 GMT 2014
> CPU: AArch64 Processor [410fd030] revision 0
> Detected VIPT I-cache on CPU0
> bootconsole [uart0] enabled
> efi: Getting EFI parameters from FDT:
> EFI v2.40 by ARM Juno EFI Oct  7 2014 15:05:42
> efi:  ACPI=0xfebdc000  ACPI 2.0=0xfebdc014
> cma: Reserved 16 MiB at fd800000
> BUG: failure at arch/arm64/mm/mmu.c:234/alloc_init_pmd()!
> Kernel panic - not syncing: BUG!
> CPU: 0 PID: 0 Comm: swapper Not tainted 3.18.0-rc2+ #112
> Call trace:
> [<ffffffc000087ec8>] dump_backtrace+0x0/0x124
> [<ffffffc000087ffc>] show_stack+0x10/0x1c
> [<ffffffc0004ebd58>] dump_stack+0x74/0xb8
> [<ffffffc0004eb018>] panic+0xe0/0x220
> [<ffffffc0004e8e08>] alloc_init_pmd+0x1cc/0x1dc
> [<ffffffc0004e8e3c>] alloc_init_pud+0x24/0x6c
> [<ffffffc0004e8f54>] __create_mapping+0xd0/0xf0
> [<ffffffc00069a0a0>] create_id_mapping+0x5c/0x68
> [<ffffffc00069964c>] efi_idmap_init+0x54/0xd8
> [<ffffffc0006978a8>] setup_arch+0x408/0x5c0
> [<ffffffc00069566c>] start_kernel+0x94/0x3a0
> ---[ end Kernel panic - not syncing: BUG!
> ---->8----
>
> I've not yet figured out precisely why. I haven't tried a !EFI boot
> because of the way my board is set up at the moment.
>

I think it's because the idmap is now being created earlier than
expected so it's trying to get memory from the normal allocator
and dying. I'll have to see if I can get my hands on an EFI
board and test it out.

Thanks,
Laura
diff mbox

Patch

diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug
index 0a12933..1b25015 100644
--- a/arch/arm64/Kconfig.debug
+++ b/arch/arm64/Kconfig.debug
@@ -54,4 +54,27 @@  config DEBUG_SET_MODULE_RONX
           against certain classes of kernel exploits.
           If in doubt, say "N".
 
+config DEBUG_RODATA
+	bool "Make kernel text and rodata read-only"
+	help
+	  If this is set, kernel text and rodata will be made read-only. This
+	  is to help catch accidental or malicious attempts to change the
+	  kernel's executable code. Additionally splits rodata from kernel
+	  text so it can be made explicitly non-executable.
+
+          If in doubt, say Y
+
+config DEBUG_ALIGN_RODATA
+	depends on DEBUG_RODATA
+	bool "Align linker sections up to SECTION_SIZE"
+	help
+	  If this option is enabled, sections that may potentially be marked as
+	  read only or non-executable will be aligned up to the section size of
+	  the kernel. This prevents sections from being split into pages and
+	  avoids a potential TLB penalty. The downside is an increase in
+	  alignment and potentially wasted space. Turn on this option if
+	  performance is more important than memory pressure.
+
+	  If in doubt, say N
+
 endmenu
diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h
index 689b637..0014207 100644
--- a/arch/arm64/include/asm/cacheflush.h
+++ b/arch/arm64/include/asm/cacheflush.h
@@ -152,4 +152,8 @@  int set_memory_ro(unsigned long addr, int numpages);
 int set_memory_rw(unsigned long addr, int numpages);
 int set_memory_x(unsigned long addr, int numpages);
 int set_memory_nx(unsigned long addr, int numpages);
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void);
+#endif
 #endif
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index edf8715..83424ad 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -8,6 +8,7 @@ 
 #include <asm/thread_info.h>
 #include <asm/memory.h>
 #include <asm/page.h>
+#include <asm/pgtable.h>
 
 #include "image.h"
 
@@ -54,6 +55,9 @@  SECTIONS
 		_text = .;
 		HEAD_TEXT
 	}
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
 	.text : {			/* Real text segment		*/
 		_stext = .;		/* Text and read-only data	*/
 			__exception_text_start = .;
@@ -70,19 +74,32 @@  SECTIONS
 		*(.got)			/* Global offset table		*/
 	}
 
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#endif
 	RO_DATA(PAGE_SIZE)
 	EXCEPTION_TABLE(8)
 	NOTES
 	_etext = .;			/* End of text and rodata section */
 
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+#else
 	. = ALIGN(PAGE_SIZE);
+#endif
 	__init_begin = .;
 
 	INIT_TEXT_SECTION(8)
 	.exit.text : {
 		ARM_EXIT_KEEP(EXIT_TEXT)
 	}
+
+#ifdef DEBUG_ALIGN_RODATA
+	. = ALIGN(1<<SECTION_SHIFT);
+	__init_data_begin = .;
+#else
 	. = ALIGN(16);
+#endif
 	.init.data : {
 		INIT_DATA
 		INIT_SETUP(16)
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 494297c..61f44c7 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -324,6 +324,7 @@  void __init mem_init(void)
 
 void free_initmem(void)
 {
+	fixup_init();
 	free_initmem_default(0);
 }
 
diff --git a/arch/arm64/mm/mm.h b/arch/arm64/mm/mm.h
index d519f4f..82347d8 100644
--- a/arch/arm64/mm/mm.h
+++ b/arch/arm64/mm/mm.h
@@ -1,2 +1,4 @@ 
 extern void __init bootmem_init(void);
 extern void __init arm64_swiotlb_init(void);
+
+void fixup_init(void);
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e92f633..0428294 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -26,6 +26,7 @@ 
 #include <linux/memblock.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/stop_machine.h>
 
 #include <asm/cputype.h>
 #include <asm/fixmap.h>
@@ -137,17 +138,55 @@  static void __init *early_alloc(unsigned long sz)
 	return ptr;
 }
 
-static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
+/*
+ * remap a PMD into pages
+ */
+static noinline void __ref split_pmd(pmd_t *pmd, bool early)
+{
+	pte_t *pte, *start_pte;
+	unsigned long pfn;
+	int i = 0;
+
+	if (early)
+		start_pte = pte = early_alloc(PTRS_PER_PTE*sizeof(pte_t));
+	else
+		start_pte = pte = (pte_t *)__get_free_page(PGALLOC_GFP);
+
+	BUG_ON(!pte);
+
+	pfn = pmd_pfn(*pmd);
+
+	do {
+		/*
+		 * Need to have the least restrictive permissions available
+		 * permissions will be fixed up later
+		 */
+		set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
+		pfn++;
+	} while (pte++, i++, i < PTRS_PER_PTE);
+
+
+	__pmd_populate(pmd, __pa(start_pte), PMD_TYPE_TABLE);
+	flush_tlb_all();
+}
+
+static void __ref alloc_init_pte(pmd_t *pmd, unsigned long addr,
 				  unsigned long end, unsigned long pfn,
-				  pgprot_t prot)
+				  pgprot_t prot, bool early)
 {
 	pte_t *pte;
 
 	if (pmd_none(*pmd)) {
-		pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
+		if (early)
+			pte = early_alloc(PTRS_PER_PTE * sizeof(pte_t));
+		else
+			pte = (pte_t *)__get_free_page(PGALLOC_GFP);
+		BUG_ON(!pte);
 		__pmd_populate(pmd, __pa(pte), PMD_TYPE_TABLE);
 	}
-	BUG_ON(pmd_bad(*pmd));
+
+	if (pmd_bad(*pmd))
+		split_pmd(pmd, early);
 
 	pte = pte_offset_kernel(pmd, addr);
 	do {
@@ -156,29 +195,52 @@  static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 }
 
-static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
+void __ref split_pud(pud_t *old_pud, pmd_t *pmd)
+{
+	unsigned long pfn = pud_pfn(*old_pud);
+        const pmdval_t mask = PMD_SECT_USER | PMD_SECT_PXN | PMD_SECT_UXN |
+                              PMD_SECT_RDONLY | PMD_SECT_PROT_NONE |
+                              PMD_SECT_VALID;
+	pgprot_t prot = __pgprot(pud_val(*old_pud) & mask);
+	int i = 0;
+
+	do {
+		set_pmd(pmd, pfn_pmd(pfn, prot));
+		pfn++;
+	} while (pmd++, i++, i < PTRS_PER_PMD);
+}
+
+static void __ref alloc_init_pmd(pud_t *pud, unsigned long addr,
 				  unsigned long end, phys_addr_t phys,
-				  int map_io)
+				  pgprot_t sect_prot, pgprot_t pte_prot,
+				  bool early, int map_io)
 {
 	pmd_t *pmd;
 	unsigned long next;
-	pmdval_t prot_sect;
-	pgprot_t prot_pte;
 
 	if (map_io) {
-		prot_sect = PROT_SECT_DEVICE_nGnRE;
-		prot_pte = __pgprot(PROT_DEVICE_nGnRE);
-	} else {
-		prot_sect = PROT_SECT_NORMAL_EXEC;
-		prot_pte = PAGE_KERNEL_EXEC;
+		sect_prot = PROT_SECT_DEVICE_nGnRE;
+		pte_prot = __pgprot(PROT_DEVICE_nGnRE);
 	}
 
 	/*
 	 * Check for initial section mappings in the pgd/pud and remove them.
 	 */
 	if (pud_none(*pud) || pud_bad(*pud)) {
-		pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		if (early)
+			pmd = early_alloc(PTRS_PER_PMD * sizeof(pmd_t));
+		else
+			pmd = pmd_alloc_one(&init_mm, addr);
+		BUG_ON(!pmd);
+		if (pud_sect(*pud)) {
+			/*
+			 * need to have the 1G of mappings continue to be
+			 * present
+			 */
+			split_pud(pud, pmd);
+		}
 		pud_populate(&init_mm, pud, pmd);
+		flush_tlb_all();
 	}
 
 	pmd = pmd_offset(pud, addr);
@@ -186,8 +248,8 @@  static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 		next = pmd_addr_end(addr, end);
 		/* try section mapping first */
 		if (((addr | next | phys) & ~SECTION_MASK) == 0) {
-			pmd_t old_pmd =*pmd;
-			set_pmd(pmd, __pmd(phys | prot_sect));
+			pmd_t old_pmd = *pmd;
+			set_pmd(pmd, __pmd(phys | sect_prot));
 			/*
 			 * Check for previous table entries created during
 			 * boot (__create_page_tables) and flush them.
@@ -196,15 +258,16 @@  static void __init alloc_init_pmd(pud_t *pud, unsigned long addr,
 				flush_tlb_all();
 		} else {
 			alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys),
-				       prot_pte);
+					pte_prot, early);
 		}
 		phys += next - addr;
 	} while (pmd++, addr = next, addr != end);
 }
 
-static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
+static void __ref alloc_init_pud(pgd_t *pgd, unsigned long addr,
 				  unsigned long end, unsigned long phys,
-				  int map_io)
+				  pgprot_t sect_prot, pgprot_t pte_prot,
+				  bool early, int map_io)
 {
 	pud_t *pud;
 	unsigned long next;
@@ -222,10 +285,10 @@  static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 		/*
 		 * For 4K granule only, attempt to put down a 1GB block
 		 */
-		if (!map_io && (PAGE_SHIFT == 12) &&
+		if (!map_io && early && (PAGE_SHIFT == 12) &&
 		    ((addr | next | phys) & ~PUD_MASK) == 0) {
 			pud_t old_pud = *pud;
-			set_pud(pud, __pud(phys | PROT_SECT_NORMAL_EXEC));
+			set_pud(pud, __pud(phys | sect_prot));
 
 			/*
 			 * If we have an old value for a pud, it will
@@ -240,7 +303,8 @@  static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
 				flush_tlb_all();
 			}
 		} else {
-			alloc_init_pmd(pud, addr, next, phys, map_io);
+			alloc_init_pmd(pud, addr, next, phys, sect_prot,
+					pte_prot, early, map_io);
 		}
 		phys += next - addr;
 	} while (pud++, addr = next, addr != end);
@@ -250,9 +314,11 @@  static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr,
  * Create the page directory entries and any necessary page tables for the
  * mapping specified by 'md'.
  */
-static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
-				    unsigned long virt, phys_addr_t size,
-				    int map_io)
+static void __ref __create_mapping(pgd_t *pgd, phys_addr_t phys,
+				  unsigned long virt,
+				  phys_addr_t size,
+				  pgprot_t sect_prot, pgprot_t pte_prot,
+				  int map_io, bool early)
 {
 	unsigned long addr, length, end, next;
 
@@ -262,31 +328,140 @@  static void __init __create_mapping(pgd_t *pgd, phys_addr_t phys,
 	end = addr + length;
 	do {
 		next = pgd_addr_end(addr, end);
-		alloc_init_pud(pgd, addr, next, phys, map_io);
+		alloc_init_pud(pgd, addr, next, phys, sect_prot, pte_prot,
+				early, map_io);
 		phys += next - addr;
 	} while (pgd++, addr = next, addr != end);
 }
 
-static void __init create_mapping(phys_addr_t phys, unsigned long virt,
-				  phys_addr_t size)
+void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+{
+	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
+		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+		return;
+	}
+	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
+			 addr, addr, size, PROT_SECT_NORMAL_EXEC,
+			 PAGE_KERNEL_EXEC, map_io, false);
+}
+
+static inline pmd_t *pmd_off_k(unsigned long virt)
+{
+	return pmd_offset(pud_offset(pgd_offset_k(virt), virt), virt);
+}
+
+#ifdef CONFIG_EARLY_PRINTK
+/*
+ * Create an early I/O mapping using the pgd/pmd entries already populated
+ * in head.S as this function is called too early to allocated any memory. The
+ * mapping size is 2MB with 4KB pages or 64KB or 64KB pages.
+ */
+void __iomem * __init early_io_map(phys_addr_t phys, unsigned long virt)
+{
+	unsigned long size, mask;
+	bool page64k = IS_ENABLED(CONFIG_ARM64_64K_PAGES);
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/*
+	 * No early pte entries with !ARM64_64K_PAGES configuration, so using
+	 * sections (pmd).
+	 */
+	size = page64k ? PAGE_SIZE : SECTION_SIZE;
+	mask = ~(size - 1);
+
+	pgd = pgd_offset_k(virt);
+	pud = pud_offset(pgd, virt);
+	if (pud_none(*pud))
+		return NULL;
+	pmd = pmd_offset(pud, virt);
+
+	if (page64k) {
+		if (pmd_none(*pmd))
+			return NULL;
+		pte = pte_offset_kernel(pmd, virt);
+		set_pte(pte, __pte((phys & mask) | PROT_DEVICE_nGnRE));
+	} else {
+		set_pmd(pmd, __pmd((phys & mask) | PROT_SECT_DEVICE_nGnRE));
+	}
+
+	return (void __iomem *)((virt & mask) + (phys & ~mask));
+}
+#endif
+
+static void __ref create_mapping(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size,
+				  pgprot_t sect_prot, pgprot_t pte_prot)
 {
 	if (virt < VMALLOC_START) {
 		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
 			&phys, virt);
 		return;
 	}
-	__create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt, size, 0);
+
+	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
+				size, sect_prot, pte_prot, 0, true);
 }
 
-void __init create_id_mapping(phys_addr_t addr, phys_addr_t size, int map_io)
+static void __ref create_mapping_late(phys_addr_t phys, unsigned long virt,
+				  phys_addr_t size,
+				  pgprot_t sect_prot, pgprot_t pte_prot)
 {
-	if ((addr >> PGDIR_SHIFT) >= ARRAY_SIZE(idmap_pg_dir)) {
-		pr_warn("BUG: not creating id mapping for %pa\n", &addr);
+	if (virt < VMALLOC_START) {
+		pr_warn("BUG: not creating mapping for %pa at 0x%016lx - outside kernel range\n",
+			&phys, virt);
 		return;
 	}
-	__create_mapping(&idmap_pg_dir[pgd_index(addr)],
-			 addr, addr, size, map_io);
+
+	return __create_mapping(pgd_offset_k(virt & PAGE_MASK), phys, virt,
+				size, sect_prot, pte_prot, 0, false);
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+	/*
+	 * Set up the executable regions using the existing section mappings
+	 * for now. This will get more fine grained later once all memory
+	 * is mapped
+	 */
+	unsigned long kernel_x_start = round_down(__pa(_stext), SECTION_SIZE);
+	unsigned long kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
+
+	if (end < kernel_x_start) {
+		create_mapping(start, __phys_to_virt(start),
+			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
+	} else if (start >= kernel_x_end) {
+		create_mapping(start, __phys_to_virt(start),
+			end - start, PROT_SECT_NORMAL, PAGE_KERNEL);
+	} else {
+		if (start < kernel_x_start)
+			create_mapping(start, __phys_to_virt(start),
+				kernel_x_start - start,
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+		create_mapping(kernel_x_start,
+				__phys_to_virt(kernel_x_start),
+				kernel_x_end - kernel_x_start,
+				PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
+		if (kernel_x_end < end)
+			create_mapping(kernel_x_end,
+				__phys_to_virt(kernel_x_end),
+				end - kernel_x_end,
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+	}
+
+}
+#else
+static void __init __map_memblock(phys_addr_t start, phys_addr_t end)
+{
+	create_mapping(start, __phys_to_virt(start), end - start,
+			PROT_SECT_NORMAL_EXEC, PAGE_KERNEL_EXEC);
 }
+#endif
 
 static void __init map_mem(void)
 {
@@ -328,14 +503,69 @@  static void __init map_mem(void)
 			memblock_set_current_limit(limit);
 		}
 #endif
-
-		create_mapping(start, __phys_to_virt(start), end - start);
+		__map_memblock(start, end);
 	}
 
 	/* Limit no longer required. */
 	memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
 }
 
+void __init fixup_executable(void)
+{
+#ifdef CONFIG_DEBUG_RODATA
+	/* now that we are actually fully mapped, make the start/end more fine grained */
+	if (!IS_ALIGNED((unsigned long)_stext, SECTION_SIZE)) {
+		unsigned long aligned_start = round_down(__pa(_stext),
+							SECTION_SIZE);
+
+		create_mapping(aligned_start, __phys_to_virt(aligned_start),
+				__pa(_stext) - aligned_start,
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+	}
+
+	if (!IS_ALIGNED((unsigned long)__init_end, SECTION_SIZE)) {
+		unsigned long aligned_end = round_up(__pa(__init_end),
+							SECTION_SIZE);
+		create_mapping(__pa(__init_end), (unsigned long)__init_end,
+				aligned_end - __pa(__init_end),
+				PROT_SECT_NORMAL,
+				PAGE_KERNEL);
+	}
+#endif
+}
+
+#ifdef CONFIG_DEBUG_RODATA
+void mark_rodata_ro(void)
+{
+	create_mapping_late(__pa(_stext), (unsigned long)_stext,
+				(unsigned long)_etext - (unsigned long)_stext,
+				PROT_SECT_NORMAL_EXEC | PMD_SECT_RDONLY,
+				PAGE_KERNEL_EXEC | PTE_RDONLY);
+
+}
+#endif
+
+static int __flush_mappings(void *unused)
+{
+	flush_tlb_kernel_range((unsigned long)__init_begin,
+				(unsigned long)__init_end);
+	return 0;
+}
+
+void __ref fixup_init(void)
+{
+	phys_addr_t start = __pa(__init_begin);
+	phys_addr_t end = __pa(__init_end);
+
+	create_mapping_late(start, (unsigned long)__init_begin,
+			end - start,
+			PROT_SECT_NORMAL,
+			PAGE_KERNEL);
+	if (!IS_ALIGNED(start, SECTION_SIZE) || !IS_ALIGNED(end, SECTION_SIZE))
+		stop_machine(__flush_mappings, NULL, NULL);
+}
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps and sets up the zero page.
@@ -345,6 +575,7 @@  void __init paging_init(void)
 	void *zero_page;
 
 	map_mem();
+	fixup_executable();
 
 	/*
 	 * Finally flush the caches and tlb to ensure that we're in a