@@ -133,6 +133,7 @@ struct asi {
struct mm_struct *mm;
int64_t ref_count;
enum asi_class_id class_id;
+ spinlock_t pgd_lock;
};
DECLARE_PER_CPU_ALIGNED(struct asi *, curr_asi);
@@ -147,6 +148,7 @@ const char *asi_class_name(enum asi_class_id class_id);
int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_asi);
void asi_destroy(struct asi *asi);
+void asi_clone_user_pgtbl(struct mm_struct *mm, pgd_t *pgdp);
/* Enter an ASI domain (restricted address space) and begin the critical section. */
void asi_enter(struct asi *asi);
@@ -286,6 +288,15 @@ static __always_inline bool asi_in_critical_section(void)
void asi_handle_switch_mm(void);
+/*
+ * This function returns true when we would like to map userspace addresses
+ * in the restricted address space.
+ */
+static inline bool asi_maps_user_addr(enum asi_class_id class_id)
+{
+ return false;
+}
+
#endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
#endif
@@ -114,12 +114,16 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
set_p4d(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
+ if (!pgtable_l5_enabled())
+ asi_clone_user_pgtbl(mm, (pgd_t *)p4d);
}
static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
{
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
+ if (!pgtable_l5_enabled())
+ asi_clone_user_pgtbl(mm, (pgd_t *)p4d);
}
extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
@@ -137,6 +141,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+ asi_clone_user_pgtbl(mm, pgd);
}
static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
@@ -145,6 +150,7 @@ static inline void pgd_populate_safe(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4
return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd_safe(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
+ asi_clone_user_pgtbl(mm, pgd);
}
static inline p4d_t *p4d_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -157,6 +157,8 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
static inline void native_p4d_clear(p4d_t *p4d)
{
native_set_p4d(p4d, native_make_p4d(0));
+ if (!pgtable_l5_enabled())
+ asi_clone_user_pgtbl(NULL, (pgd_t *)p4d);
}
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
@@ -167,6 +169,8 @@ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
static inline void native_pgd_clear(pgd_t *pgd)
{
native_set_pgd(pgd, native_make_pgd(0));
+ if (pgtable_l5_enabled())
+ asi_clone_user_pgtbl(NULL, pgd);
}
/*
@@ -9712,11 +9712,15 @@ static inline int kvm_x86_init_asi_class(void)
/*
* And the same for data left behind by code in the userspace domain
* (i.e. the VMM itself, plus kernel code serving its syscalls etc).
- * This should eventually be configurable: users whose VMMs contain
- * no secrets can disable it to avoid paying a mitigation cost on
- * transition between their guest and userspace.
+ *
+ *
+ * If we decided to map userspace into the guest's restricted address
+ * space then we don't bother with this since we assume either no bugs
+ * allow the guest to leak that data, or the user doesn't care about
+ * that security boundary.
*/
- policy.protect_data |= ASI_TAINT_USER_DATA;
+ if (!asi_maps_user_addr(ASI_CLASS_KVM))
+ policy.protect_data |= ASI_TAINT_USER_DATA;
return asi_init_class(ASI_CLASS_KVM, &policy);
}
@@ -14,6 +14,7 @@
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
#include <asm/traps.h>
+#include <asm/pgtable.h>
#include "mm_internal.h"
#include "../../../mm/internal.h"
@@ -351,6 +352,33 @@ static void __asi_destroy(struct asi *asi)
memset(asi, 0, sizeof(struct asi));
}
+static void __asi_init_user_pgds(struct mm_struct *mm, struct asi *asi)
+{
+ int i;
+
+ if (!asi_maps_user_addr(asi->class_id))
+ return;
+
+ /*
+ * The code below must be executed only after the given asi is
+ * available in mm->asi[index] to ensure at least either this
+ * function or __asi_clone_user_pgd() will copy entries in the
+ * unrestricted pgd to the restricted pgd.
+ */
+ if (WARN_ON_ONCE(&mm->asi[asi->class_id] != asi))
+ return;
+
+ /*
+ * See the comment for __asi_clone_user_pgd() why we hold the lock here.
+ */
+ spin_lock(&asi->pgd_lock);
+
+ for (i = 0; i < KERNEL_PGD_BOUNDARY; i++)
+ set_pgd(asi->pgd + i, READ_ONCE(*(mm->pgd + i)));
+
+ spin_unlock(&asi->pgd_lock);
+}
+
int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_asi)
{
struct asi *asi;
@@ -388,6 +416,7 @@ int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_
asi->mm = mm;
asi->class_id = class_id;
+ spin_lock_init(&asi->pgd_lock);
for (i = KERNEL_PGD_BOUNDARY; i < PTRS_PER_PGD; i++)
set_pgd(asi->pgd + i, asi_global_nonsensitive_pgd[i]);
@@ -398,6 +427,7 @@ int asi_init(struct mm_struct *mm, enum asi_class_id class_id, struct asi **out_
else
*out_asi = asi;
+ __asi_init_user_pgds(mm, asi);
mutex_unlock(&mm->asi_init_lock);
return err;
@@ -891,3 +921,65 @@ void asi_unmap(struct asi *asi, void *addr, size_t len)
asi_flush_tlb_range(asi, addr, len);
}
+
+/*
+ * This function is to copy the given unrestricted pgd entry for
+ * userspace addresses to the corresponding restricted pgd entries.
+ * It means that the unrestricted pgd entry must be updated before
+ * this function is called.
+ * We map entire userspace addresses to the restricted address spaces
+ * by copying unrestricted pgd entries to the restricted page tables
+ * so that we don't need to maintain consistency of lower level PTEs
+ * between the unrestricted page table and the restricted page tables.
+ */
+void asi_clone_user_pgtbl(struct mm_struct *mm, pgd_t *pgdp)
+{
+ unsigned long pgd_idx;
+ struct asi *asi;
+ int i;
+
+ if (!static_asi_enabled())
+ return;
+
+ /* We shouldn't need to take care non-userspace mapping. */
+ if (!pgdp_maps_userspace(pgdp))
+ return;
+
+ /*
+ * The mm will be NULL for p{4,g}d_clear(). We need to get
+ * the owner mm for this pgd in this case. The pgd page has
+ * a valid pt_mm only when SHARED_KERNEL_PMD == 0.
+ */
+ BUILD_BUG_ON(SHARED_KERNEL_PMD);
+ if (!mm) {
+ mm = pgd_page_get_mm(virt_to_page(pgdp));
+ if (WARN_ON_ONCE(!mm))
+ return;
+ }
+
+ /*
+ * Compute a PGD index of the given pgd entry. This will be the
+ * index of the ASI PGD entry to be updated.
+ */
+ pgd_idx = pgdp - PTR_ALIGN_DOWN(pgdp, PAGE_SIZE);
+
+ for (i = 0; i < ARRAY_SIZE(mm->asi); i++) {
+ asi = mm->asi + i;
+
+ if (!asi_pgd(asi) || !asi_maps_user_addr(asi->class_id))
+ continue;
+
+ /*
+ * We need to synchronize concurrent callers of
+ * __asi_clone_user_pgd() among themselves, as well as
+ * __asi_init_user_pgds(). The lock makes sure that reading
+ * the unrestricted pgd and updating the corresponding
+ * ASI pgd are not interleaved by concurrent calls.
+ * We cannot rely on mm->page_table_lock here because it
+ * is not always held when pgd/p4d_clear_bad() is called.
+ */
+ spin_lock(&asi->pgd_lock);
+ set_pgd(asi_pgd(asi) + pgd_idx, READ_ONCE(*pgdp));
+ spin_unlock(&asi->pgd_lock);
+ }
+}
@@ -95,6 +95,10 @@ void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len) { }
static inline void asi_check_boottime_disable(void) { }
+static inline void asi_clone_user_pgtbl(struct mm_struct *mm, pgd_t *pgdp) { };
+
+static inline bool asi_maps_user_addr(enum asi_class_id class_id) { return false; }
+
#endif /* !CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
#endif /* !_ASSEMBLY_ */