diff mbox series

[RFC,08/16] mm: Introduce kernel_pgtables_set_pkey()

Message ID 20241206101110.1646108-9-kevin.brodsky@arm.com (mailing list archive)
State New
Headers show
Series pkeys-based page table hardening | expand

Commit Message

Kevin Brodsky Dec. 6, 2024, 10:11 a.m. UTC
kernel_pgtables_set_pkey() allows setting the pkey of all page table
pages in swapper_pg_dir, recursively. This will be needed by
kpkeys_hardened_pgtables, as it relies on all PTPs being mapped with
a non-default pkey. Those initial kernel page tables cannot
practically be assigned a non-default pkey right when they are
allocated, so mutating them during (early) boot is required.

Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
---

It feels that some sort of locking is called for in
kernel_pgtables_set_pkey(), but I couldn't figure out what would be
appropriate.

---
 include/linux/mm.h |   2 +
 mm/memory.c        | 130 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)

Comments

Peter Zijlstra Dec. 9, 2024, 10:03 a.m. UTC | #1
On Fri, Dec 06, 2024 at 10:11:02AM +0000, Kevin Brodsky wrote:
> kernel_pgtables_set_pkey() allows setting the pkey of all page table
> pages in swapper_pg_dir, recursively. This will be needed by
> kpkeys_hardened_pgtables, as it relies on all PTPs being mapped with
> a non-default pkey. Those initial kernel page tables cannot
> practically be assigned a non-default pkey right when they are
> allocated, so mutating them during (early) boot is required.
> 
> Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
> ---
> 
> It feels that some sort of locking is called for in
> kernel_pgtables_set_pkey(), but I couldn't figure out what would be
> appropriate.

init_mm.page_table_lock is typically the one used to serialize kernel
page tables IIRC.
Kevin Brodsky Dec. 10, 2024, 9:27 a.m. UTC | #2
On 09/12/2024 11:03, Peter Zijlstra wrote:
> On Fri, Dec 06, 2024 at 10:11:02AM +0000, Kevin Brodsky wrote:
>> kernel_pgtables_set_pkey() allows setting the pkey of all page table
>> pages in swapper_pg_dir, recursively. This will be needed by
>> kpkeys_hardened_pgtables, as it relies on all PTPs being mapped with
>> a non-default pkey. Those initial kernel page tables cannot
>> practically be assigned a non-default pkey right when they are
>> allocated, so mutating them during (early) boot is required.
>>
>> Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
>> ---
>>
>> It feels that some sort of locking is called for in
>> kernel_pgtables_set_pkey(), but I couldn't figure out what would be
>> appropriate.
> init_mm.page_table_lock is typically the one used to serialize kernel
> page tables IIRC.

That does seem to be the case, thanks! Hopefully holding that spinlock
for the entire duration of the loop in kernel_pgtables_set_pkey() won't
be an issue.

- Kevin
diff mbox series

Patch

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c39c4945946c..683e883dae77 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -4179,4 +4179,6 @@  int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st
 int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
 int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
 
+int kernel_pgtables_set_pkey(int pkey);
+
 #endif /* _LINUX_MM_H */
diff --git a/mm/memory.c b/mm/memory.c
index 75c2dfd04f72..278ddf9f6249 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -76,6 +76,7 @@ 
 #include <linux/ptrace.h>
 #include <linux/vmalloc.h>
 #include <linux/sched/sysctl.h>
+#include <linux/kpkeys.h>
 
 #include <trace/events/kmem.h>
 
@@ -6974,3 +6975,132 @@  void vma_pgtable_walk_end(struct vm_area_struct *vma)
 	if (is_vm_hugetlb_page(vma))
 		hugetlb_vma_unlock_read(vma);
 }
+
+static int set_page_pkey(void *p, int pkey)
+{
+	unsigned long addr = (unsigned long)p;
+
+	/*
+	 * swapper_pg_dir itself will be made read-only by mark_rodata_ro()
+	 * so there is no point in changing its pkey.
+	 */
+	if (p == swapper_pg_dir)
+		return 0;
+
+	return set_memory_pkey(addr, 1, pkey);
+}
+
+static int set_pkey_pte(pmd_t *pmd, int pkey)
+{
+	pte_t *pte;
+	int err;
+
+	pte = pte_offset_kernel(pmd, 0);
+	err = set_page_pkey(pte, pkey);
+
+	return err;
+}
+
+static int set_pkey_pmd(pud_t *pud, int pkey)
+{
+	pmd_t *pmd;
+	int i, err = 0;
+
+	pmd = pmd_offset(pud, 0);
+
+	err = set_page_pkey(pmd, pkey);
+	if (err)
+		return err;
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (pmd_none(pmd[i]) || pmd_bad(pmd[i]) || pmd_leaf(pmd[i]))
+			continue;
+		err = set_pkey_pte(&pmd[i], pkey);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int set_pkey_pud(p4d_t *p4d, int pkey)
+{
+	pud_t *pud;
+	int i, err = 0;
+
+	if (mm_pmd_folded(&init_mm))
+		return set_pkey_pmd((pud_t *)p4d, pkey);
+
+	pud = pud_offset(p4d, 0);
+
+	err = set_page_pkey(pud, pkey);
+	if (err)
+		return err;
+
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		if (pud_none(pud[i]) || pud_bad(pud[i]) || pud_leaf(pud[i]))
+			continue;
+		err = set_pkey_pmd(&pud[i], pkey);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+static int set_pkey_p4d(pgd_t *pgd, int pkey)
+{
+	p4d_t *p4d;
+	int i, err = 0;
+
+	if (mm_pud_folded(&init_mm))
+		return set_pkey_pud((p4d_t *)pgd, pkey);
+
+	p4d = p4d_offset(pgd, 0);
+
+	err = set_page_pkey(p4d, pkey);
+	if (err)
+		return err;
+
+	for (i = 0; i < PTRS_PER_P4D; i++) {
+		if (p4d_none(p4d[i]) || p4d_bad(p4d[i]) || p4d_leaf(p4d[i]))
+			continue;
+		err = set_pkey_pud(&p4d[i], pkey);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
+/**
+ * kernel_pgtables_set_pkey - set pkey for all kernel page table pages
+ * @pkey: pkey to set the page table pages to
+ *
+ * Walks swapper_pg_dir setting the protection key of every page table page (at
+ * all levels) to @pkey. swapper_pg_dir itself is left untouched as it is
+ * expected to be mapped read-only by mark_rodata_ro().
+ *
+ * No-op if the architecture does not support kpkeys.
+ */
+int kernel_pgtables_set_pkey(int pkey)
+{
+	pgd_t *pgd = swapper_pg_dir;
+	int i, err = 0;
+
+	if (!arch_kpkeys_enabled())
+		return 0;
+
+	if (mm_p4d_folded(&init_mm))
+		return set_pkey_p4d(pgd, pkey);
+
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		if (pgd_none(pgd[i]) || pgd_bad(pgd[i]) || pgd_leaf(pgd[i]))
+			continue;
+		err = set_pkey_p4d(&pgd[i], pkey);
+		if (err)
+			break;
+	}
+
+	return err;
+}