@@ -135,6 +135,11 @@ void asi_relax(void);
/* Immediately exit the restricted address space if in it */
void asi_exit(void);
+int asi_map_gfp(struct asi *asi, void *addr, size_t len, gfp_t gfp_flags);
+int asi_map(struct asi *asi, void *addr, size_t len);
+void asi_unmap(struct asi *asi, void *addr, size_t len);
+void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len);
+
static inline void asi_init_thread_state(struct thread_struct *thread)
{
thread->asi_state.intr_nest_depth = 0;
@@ -9,6 +9,9 @@
#include <asm/cmdline.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>
+#include <asm/traps.h>
+
+#include "../../../mm/internal.h"
static struct asi_class asi_class[ASI_MAX_NUM];
static DEFINE_SPINLOCK(asi_class_lock);
@@ -98,7 +101,6 @@ EXPORT_SYMBOL_GPL(asi_unregister_class);
*/
static_assert(!IS_ENABLED(CONFIG_PARAVIRT));
#define DEFINE_ASI_PGTBL_ALLOC(base, level) \
-__maybe_unused \
static level##_t * asi_##level##_alloc(struct asi *asi, \
base##_t *base, ulong addr, \
gfp_t flags) \
@@ -338,3 +340,237 @@ void asi_init_mm_state(struct mm_struct *mm)
memset(mm->asi, 0, sizeof(mm->asi));
mutex_init(&mm->asi_init_lock);
}
+
+static bool is_page_within_range(unsigned long addr, unsigned long page_size,
+ unsigned long range_start, unsigned long range_end)
+{
+ unsigned long page_start = ALIGN_DOWN(addr, page_size);
+ unsigned long page_end = page_start + page_size;
+
+ return page_start >= range_start && page_end <= range_end;
+}
+
+static bool follow_physaddr(
+ pgd_t *pgd_table, unsigned long virt,
+ phys_addr_t *phys, unsigned long *page_size, ulong *flags)
+{
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /* This may be written using lookup_address_in_*, see kcl/675039. */
+
+ *page_size = PGDIR_SIZE;
+ pgd = pgd_offset_pgd(pgd_table, virt);
+ if (!pgd_present(*pgd))
+ return false;
+ if (pgd_leaf(*pgd)) {
+ *phys = PFN_PHYS(pgd_pfn(*pgd)) | (virt & ~PGDIR_MASK);
+ *flags = pgd_flags(*pgd);
+ return true;
+ }
+
+ *page_size = P4D_SIZE;
+ p4d = p4d_offset(pgd, virt);
+ if (!p4d_present(*p4d))
+ return false;
+ if (p4d_leaf(*p4d)) {
+ *phys = PFN_PHYS(p4d_pfn(*p4d)) | (virt & ~P4D_MASK);
+ *flags = p4d_flags(*p4d);
+ return true;
+ }
+
+ *page_size = PUD_SIZE;
+ pud = pud_offset(p4d, virt);
+ if (!pud_present(*pud))
+ return false;
+ if (pud_leaf(*pud)) {
+ *phys = PFN_PHYS(pud_pfn(*pud)) | (virt & ~PUD_MASK);
+ *flags = pud_flags(*pud);
+ return true;
+ }
+
+ *page_size = PMD_SIZE;
+ pmd = pmd_offset(pud, virt);
+ if (!pmd_present(*pmd))
+ return false;
+ if (pmd_leaf(*pmd)) {
+ *phys = PFN_PHYS(pmd_pfn(*pmd)) | (virt & ~PMD_MASK);
+ *flags = pmd_flags(*pmd);
+ return true;
+ }
+
+ *page_size = PAGE_SIZE;
+ pte = pte_offset_map(pmd, virt);
+ if (!pte)
+ return false;
+
+ if (!pte_present(*pte)) {
+ pte_unmap(pte);
+ return false;
+ }
+
+ *phys = PFN_PHYS(pte_pfn(*pte)) | (virt & ~PAGE_MASK);
+ *flags = pte_flags(*pte);
+
+ pte_unmap(pte);
+ return true;
+}
+
+/*
+ * Map the given range into the ASI page tables. The source of the mapping is
+ * the regular unrestricted page tables. Can be used to map any kernel memory.
+ *
+ * The caller MUST ensure that the source mapping will not change during this
+ * function. For dynamic kernel memory, this is generally ensured by mapping the
+ * memory within the allocator.
+ *
+ * If this fails, it may leave partial mappings behind. You must asi_unmap them,
+ * bearing in mind asi_unmap's requirements on the calling context. Part of the
+ * reason for this is that we don't want to unexpectedly undo mappings that
+ * weren't created by the present caller.
+ *
+ * If the source mapping is a large page and the range being mapped spans the
+ * entire large page, then it will be mapped as a large page in the ASI page
+ * tables too. If the range does not span the entire huge page, then it will be
+ * mapped as smaller pages. In that case, the implementation is slightly
+ * inefficient, as it will walk the source page tables again for each small
+ * destination page, but that should be ok for now, as usually in such cases,
+ * the range would consist of a small-ish number of pages.
+ *
+ * Note that upstream
+ * (https://lore.kernel.org/all/20210317155843.c15e71f966f1e4da508dea04@linux-foundation.org/)
+ * vmap_p4d_range supports huge mappings. It is probably possible to use that
+ * logic instead of custom mapping duplication logic in later versions of ASI.
+ */
+int __must_check asi_map_gfp(struct asi *asi, void *addr, unsigned long len, gfp_t gfp_flags)
+{
+ unsigned long virt;
+ unsigned long start = (size_t)addr;
+ unsigned long end = start + len;
+ unsigned long page_size;
+
+ if (!static_asi_enabled())
+ return 0;
+
+ VM_BUG_ON(!IS_ALIGNED(start, PAGE_SIZE));
+ VM_BUG_ON(!IS_ALIGNED(len, PAGE_SIZE));
+ VM_BUG_ON(!fault_in_kernel_space(start)); /* Misnamed, ignore "fault_" */
+
+ gfp_flags &= GFP_RECLAIM_MASK;
+
+ if (asi->mm != &init_mm)
+ gfp_flags |= __GFP_ACCOUNT;
+
+ for (virt = start; virt < end; virt = ALIGN(virt + 1, page_size)) {
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ phys_addr_t phys;
+ ulong flags;
+
+ if (!follow_physaddr(asi->mm->pgd, virt, &phys, &page_size, &flags))
+ continue;
+
+#define MAP_AT_LEVEL(base, BASE, level, LEVEL) { \
+ if (base##_leaf(*base)) { \
+ if (WARN_ON_ONCE(PHYS_PFN(phys & BASE##_MASK) !=\
+ base##_pfn(*base))) \
+ return -EBUSY; \
+ continue; \
+ } \
+ \
+ level = asi_##level##_alloc(asi, base, virt, gfp_flags);\
+ if (!level) \
+ return -ENOMEM; \
+ \
+ if (page_size >= LEVEL##_SIZE && \
+ (level##_none(*level) || level##_leaf(*level)) && \
+ is_page_within_range(virt, LEVEL##_SIZE, \
+ start, end)) { \
+ page_size = LEVEL##_SIZE; \
+ phys &= LEVEL##_MASK; \
+ \
+ if (!level##_none(*level)) { \
+ if (WARN_ON_ONCE(level##_pfn(*level) != \
+ PHYS_PFN(phys))) { \
+ return -EBUSY; \
+ } \
+ } else { \
+ set_##level(level, \
+ __##level(phys | flags)); \
+ } \
+ continue; \
+ } \
+ }
+
+ pgd = pgd_offset_pgd(asi->pgd, virt);
+
+ MAP_AT_LEVEL(pgd, PGDIR, p4d, P4D);
+ MAP_AT_LEVEL(p4d, P4D, pud, PUD);
+ MAP_AT_LEVEL(pud, PUD, pmd, PMD);
+ /*
+ * If a large page is going to be partially mapped
+ * in 4k pages, convert the PSE/PAT bits.
+ */
+ if (page_size >= PMD_SIZE)
+ flags = protval_large_2_4k(flags);
+ MAP_AT_LEVEL(pmd, PMD, pte, PAGE);
+
+ VM_BUG_ON(true); /* Should never reach here. */
+ }
+
+ return 0;
+#undef MAP_AT_LEVEL
+}
+
+int __must_check asi_map(struct asi *asi, void *addr, unsigned long len)
+{
+ return asi_map_gfp(asi, addr, len, GFP_KERNEL);
+}
+
+/*
+ * Unmap a kernel address range previously mapped into the ASI page tables.
+ *
+ * The area being unmapped must be a whole previously mapped region (or regions)
+ * Unmapping a partial subset of a previously mapped region is not supported.
+ * That will work, but may end up unmapping more than what was asked for, if
+ * the mapping contained huge pages. A later patch will remove this limitation
+ * by splitting the huge mapping in the ASI page table in such a case. For now,
+ * vunmap_pgd_range() will just emit a warning if this situation is detected.
+ *
+ * This might sleep, and cannot be called with interrupts disabled.
+ */
+void asi_unmap(struct asi *asi, void *addr, size_t len)
+{
+ size_t start = (size_t)addr;
+ size_t end = start + len;
+ pgtbl_mod_mask mask = 0;
+
+ if (!static_asi_enabled() || !len)
+ return;
+
+ VM_BUG_ON(start & ~PAGE_MASK);
+ VM_BUG_ON(len & ~PAGE_MASK);
+ VM_BUG_ON(!fault_in_kernel_space(start)); /* Misnamed, ignore "fault_" */
+
+ vunmap_pgd_range(asi->pgd, start, end, &mask);
+
+ /* We don't support partial unmappings - b/270310049 */
+ if (mask & PGTBL_P4D_MODIFIED) {
+ VM_WARN_ON(!IS_ALIGNED((ulong)addr, P4D_SIZE));
+ VM_WARN_ON(!IS_ALIGNED((ulong)len, P4D_SIZE));
+ } else if (mask & PGTBL_PUD_MODIFIED) {
+ VM_WARN_ON(!IS_ALIGNED((ulong)addr, PUD_SIZE));
+ VM_WARN_ON(!IS_ALIGNED((ulong)len, PUD_SIZE));
+ } else if (mask & PGTBL_PMD_MODIFIED) {
+ VM_WARN_ON(!IS_ALIGNED((ulong)addr, PMD_SIZE));
+ VM_WARN_ON(!IS_ALIGNED((ulong)len, PMD_SIZE));
+ }
+
+ asi_flush_tlb_range(asi, addr, len);
+}
@@ -1026,6 +1026,11 @@ inline_or_noinstr u16 asi_pcid(struct asi *asi, u16 asid)
return kern_pcid(asid) | ((asi->index + 1) << ASI_PCID_BITS_SHIFT);
}
+void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len)
+{
+ flush_tlb_kernel_range((ulong)addr, (ulong)addr + len);
+}
+
#else /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */
u16 asi_pcid(struct asi *asi, u16 asid) { return kern_pcid(asid); }
@@ -2,6 +2,8 @@
#ifndef __ASM_GENERIC_ASI_H
#define __ASM_GENERIC_ASI_H
+#include <linux/types.h>
+
#ifndef CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION
#define ASI_MAX_NUM_ORDER 0
@@ -58,6 +60,17 @@ static inline int asi_intr_nest_depth(void) { return 0; }
static inline void asi_intr_exit(void) { }
+static inline int asi_map(struct asi *asi, void *addr, size_t len)
+{
+ return 0;
+}
+
+static inline
+void asi_unmap(struct asi *asi, void *addr, size_t len) { }
+
+static inline
+void asi_flush_tlb_range(struct asi *asi, void *addr, size_t len) { }
+
#define static_asi_enabled() false
static inline void asi_check_boottime_disable(void) { }
@@ -1788,6 +1788,9 @@ typedef unsigned int pgtbl_mod_mask;
#ifndef pmd_leaf
#define pmd_leaf(x) false
#endif
+#ifndef pte_leaf
+#define pte_leaf(x) 1
+#endif
#ifndef pgd_leaf_size
#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
@@ -217,6 +217,8 @@ void unmap_page_range(struct mmu_gather *tlb,
void page_cache_ra_order(struct readahead_control *, struct file_ra_state *,
unsigned int order);
void force_page_cache_ra(struct readahead_control *, unsigned long nr);
+void vunmap_pgd_range(pgd_t *pgd_table, unsigned long addr, unsigned long end,
+ pgtbl_mod_mask *mask);
static inline void force_page_cache_readahead(struct address_space *mapping,
struct file *file, pgoff_t index, unsigned long nr_to_read)
{
@@ -419,6 +419,24 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
} while (p4d++, addr = next, addr != end);
}
+void vunmap_pgd_range(pgd_t *pgd_table, unsigned long addr, unsigned long end,
+ pgtbl_mod_mask *mask)
+{
+ unsigned long next;
+ pgd_t *pgd = pgd_offset_pgd(pgd_table, addr);
+
+ BUG_ON(addr >= end);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_bad(*pgd))
+ *mask |= PGTBL_PGD_MODIFIED;
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ vunmap_p4d_range(pgd, addr, next, mask);
+ } while (pgd++, addr = next, addr != end);
+}
+
/*
* vunmap_range_noflush is similar to vunmap_range, but does not
* flush caches or TLBs.
@@ -433,21 +451,9 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
*/
void __vunmap_range_noflush(unsigned long start, unsigned long end)
{
- unsigned long next;
- pgd_t *pgd;
- unsigned long addr = start;
pgtbl_mod_mask mask = 0;
- BUG_ON(addr >= end);
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, end);
- if (pgd_bad(*pgd))
- mask |= PGTBL_PGD_MODIFIED;
- if (pgd_none_or_clear_bad(pgd))
- continue;
- vunmap_p4d_range(pgd, addr, next, &mask);
- } while (pgd++, addr = next, addr != end);
+ vunmap_pgd_range(init_mm.pgd, start, end, &mask);
if (mask & ARCH_PAGE_TABLE_SYNC_MASK)
arch_sync_kernel_mappings(start, end);