@@ -47,6 +47,11 @@ config ARCH_MAP_DOMAIN_PAGE
config GENERIC_BUG_FRAME
bool
+config GENERIC_PT
+ bool
+ help
+ Introduces common function to work with page table when MMU is enabled.
+
config HAS_ALTERNATIVE
bool
@@ -2,6 +2,7 @@ obj-$(CONFIG_ARGO) += argo.o
obj-y += bitmap.o
obj-bin-$(CONFIG_SELF_TESTS) += bitops.init.o
obj-$(CONFIG_GENERIC_BUG_FRAME) += bug.o
+obj-$(CONFIG_GENERIC_PT) += mmu/pt.o
obj-$(CONFIG_HYPFS_CONFIG) += config_data.o
obj-$(CONFIG_CORE_PARKING) += core_parking.o
obj-y += cpu.o
new file mode 100644
@@ -0,0 +1,441 @@
+#include <xen/bug.h>
+#include <xen/domain_page.h>
+#include <xen/errno.h>
+#include <xen/mm.h>
+#include <xen/mm-frame.h>
+#include <xen/pmap.h>
+#include <xen/spinlock.h>
+
+/* Sanity check of the entry */
+static bool xen_pt_check_entry(pte_t entry, mfn_t mfn, unsigned int level,
+ unsigned int flags)
+{
+ /* Sanity check when modifying an entry. */
+ if ( (flags & _PAGE_PRESENT) && mfn_eq(mfn, INVALID_MFN) )
+ {
+ /* We don't allow modifying an invalid entry. */
+ if ( !pte_is_valid(entry) )
+ {
+ printk("Modifying invalid entry is not allowed.\n");
+ return false;
+ }
+
+ /* We don't allow modifying a table entry */
+ if ( !pte_is_mapping(entry, level) )
+ {
+ printk("Modifying a table entry is not allowed.\n");
+ return false;
+ }
+
+ if ( !sanity_arch_specific_pte_checks(entry) )
+ {
+ printk("sanity check failed\n");
+ return false;
+ }
+ }
+ /* Sanity check when inserting a mapping */
+ else if ( flags & _PAGE_PRESENT )
+ {
+ /* We should be here with a valid MFN. */
+ ASSERT(!mfn_eq(mfn, INVALID_MFN));
+
+ /*
+ * We don't allow replacing any valid entry.
+ *
+ * Note that the function xen_pt_update() relies on this
+ * assumption and will skip the TLB flush. The function will need
+ * to be updated if the check is relaxed.
+ */
+ if ( pte_is_valid(entry) )
+ {
+ if ( pte_is_mapping(entry, level) )
+ printk("Changing MFN for a valid entry is not allowed (%#"PRI_mfn" -> %#"PRI_mfn").\n",
+ mfn_x(pte_get_mfn(entry)), mfn_x(mfn));
+ else
+ printk("Trying to replace a table with a mapping.\n");
+ return false;
+ }
+ }
+ /* Sanity check when removing a mapping. */
+ else if ( (flags & (_PAGE_PRESENT|_PAGE_POPULATE)) == 0 )
+ {
+ /* We should be here with an invalid MFN. */
+ ASSERT(mfn_eq(mfn, INVALID_MFN));
+
+ /* We don't allow removing a table */
+ if ( pte_is_table(entry, level) )
+ {
+ printk("Removing a table is not allowed.\n");
+ return false;
+ }
+
+ if ( get_contig_bit(entry) )
+ {
+ printk("Removing entry with contiguous bit set is not allowed.\n");
+ return false;
+ }
+ }
+ /* Sanity check when populating the page-table. No check so far. */
+ else
+ {
+ ASSERT(flags & _PAGE_POPULATE);
+ /* We should be here with an invalid MFN */
+ ASSERT(mfn_eq(mfn, INVALID_MFN));
+ }
+
+ return true;
+}
+
+static pte_t *xen_map_table(mfn_t mfn)
+{
+ /*
+ * During early boot, map_domain_page() may be unusable. Use the
+ * PMAP to map temporarily a page-table.
+ */
+ if ( system_state == SYS_STATE_early_boot )
+ return pmap_map(mfn);
+
+ return map_domain_page(mfn);
+}
+
+static void xen_unmap_table(const pte_t *table)
+{
+ /*
+ * During early boot, xen_map_table() will not use map_domain_page()
+ * but the PMAP.
+ */
+ if ( system_state == SYS_STATE_early_boot )
+ pmap_unmap(table);
+ else
+ unmap_domain_page(table);
+}
+
+static int create_xen_table(pte_t *entry)
+{
+ mfn_t mfn;
+ void *p;
+ pte_t pte;
+
+ if ( system_state != SYS_STATE_early_boot )
+ {
+ struct page_info *pg = alloc_domheap_page(NULL, 0);
+
+ if ( pg == NULL )
+ return -ENOMEM;
+
+ mfn = page_to_mfn(pg);
+ }
+ else
+ mfn = alloc_boot_pages(1, 1);
+
+ p = xen_map_table(mfn);
+ clear_page(p);
+ xen_unmap_table(p);
+
+ pte = mfn_to_xen_entry(mfn, MT_NORMAL);
+
+ set_pte_table_bit(&pte, 1);
+
+ write_pte(entry, pte);
+
+ return 0;
+}
+
+#define XEN_TABLE_MAP_FAILED 0
+#define XEN_TABLE_SUPER_PAGE 1
+#define XEN_TABLE_NORMAL_PAGE 2
+
+/*
+ * Take the currently mapped table, find the corresponding entry,
+ * and map the next table, if available.
+ *
+ * The read_only parameters indicates whether intermediate tables should
+ * be allocated when not present.
+ *
+ * Return values:
+ * XEN_TABLE_MAP_FAILED: Either read_only was set and the entry
+ * was empty, or allocating a new page failed.
+ * XEN_TABLE_NORMAL_PAGE: next level mapped normally
+ * XEN_TABLE_SUPER_PAGE: The next entry points to a superpage.
+ */
+static int xen_pt_next_level(bool read_only, unsigned int level,
+ pte_t **table, unsigned int offset)
+{
+ pte_t *entry;
+ int ret;
+ mfn_t mfn;
+
+ entry = *table + offset;
+
+ if ( !pte_is_valid(*entry) )
+ {
+ if ( read_only )
+ return XEN_TABLE_MAP_FAILED;
+
+ ret = create_xen_table(entry);
+ if ( ret )
+ return XEN_TABLE_MAP_FAILED;
+ }
+
+ if ( pte_is_mapping(*entry, level) )
+ {
+ return XEN_TABLE_SUPER_PAGE;
+ }
+
+ mfn = pte_get_mfn(*entry);
+
+ xen_unmap_table(*table);
+ *table = xen_map_table(mfn);
+
+ return XEN_TABLE_NORMAL_PAGE;
+}
+
+/* Update an entry at the level @target. */
+static int xen_pt_update_entry(mfn_t root, unsigned long virt,
+ mfn_t mfn, unsigned int arch_target,
+ unsigned int flags)
+{
+ int rc;
+ unsigned int level = convert_level(HYP_PT_ROOT_LEVEL);
+ unsigned int arch_level = convert_level(level);
+ unsigned int target = convert_level(arch_target);
+ pte_t *table;
+ /*
+ * The intermediate page tables are read-only when the MFN is not valid
+ * and we are not populating page table.
+ * This means we either modify permissions or remove an entry.
+ */
+ bool read_only = mfn_eq(mfn, INVALID_MFN) && !(flags & _PAGE_POPULATE);
+ pte_t pte, *entry;
+
+ /* convenience aliases */
+ DECLARE_OFFSETS(offsets, (paddr_t)virt);
+
+ /* _PAGE_POPULATE and _PAGE_PRESENT should never be set together. */
+ ASSERT((flags & (_PAGE_POPULATE|_PAGE_PRESENT)) != (_PAGE_POPULATE|_PAGE_PRESENT));
+
+ table = xen_map_table(root);
+ for ( ; level < target; level++, arch_level = convert_level(level) )
+ {
+ rc = xen_pt_next_level(read_only, arch_level, &table, offsets[arch_level]);
+ if ( rc == XEN_TABLE_MAP_FAILED )
+ {
+ /*
+ * We are here because xen_pt_next_level has failed to map
+ * the intermediate page table (e.g the table does not exist
+ * and the pt is read-only). It is a valid case when
+ * removing a mapping as it may not exist in the page table.
+ * In this case, just ignore it.
+ */
+ if ( flags & (_PAGE_PRESENT | _PAGE_POPULATE) )
+ {
+ printk("%s: Unable to map level %u\n", __func__, arch_level);
+ rc = -ENOENT;
+ goto out;
+ }
+ else
+ {
+ rc = 0;
+ goto out;
+ }
+ }
+ else if ( rc != XEN_TABLE_NORMAL_PAGE ) {
+ break;
+ }
+ }
+
+ if ( arch_level != arch_target )
+ {
+ printk("%s: Shattering superpage is not supported\n", __func__);
+ rc = -EOPNOTSUPP;
+ goto out;
+ }
+
+ entry = table + offsets[arch_level];
+
+ rc = -EINVAL;
+ if ( !xen_pt_check_entry(*entry, mfn, arch_level, flags) )
+ goto out;
+
+ /* If we are only populating page-table, then we are done. */
+ rc = 0;
+ if ( flags & _PAGE_POPULATE )
+ goto out;
+
+ /* We are removing the page */
+ if ( !(flags & _PAGE_PRESENT) )
+ memset(&pte, 0x00, sizeof(pte));
+ else
+ {
+ /* We are inserting a mapping => Create new pte. */
+ if ( !mfn_eq(mfn, INVALID_MFN) )
+ {
+ pte = mfn_to_xen_entry(mfn, PAGE_AI_MASK(flags));
+
+ set_pte_table_bit(&pte, (arch_level == 3));
+ }
+ else /* We are updating the permission => Copy the current pte. */
+ pte = *entry;
+
+ set_pte_permissions(&pte, flags);
+ }
+
+ write_pte(entry, pte);
+
+ rc = 0;
+
+out:
+ xen_unmap_table(table);
+
+ return rc;
+}
+
+static DEFINE_SPINLOCK(xen_pt_lock);
+
+/* Return the level where mapping should be done */
+int xen_pt_mapping_level(unsigned long vfn, mfn_t mfn, unsigned long nr,
+ unsigned int flags)
+{
+ unsigned int level = 0;
+ unsigned long mask;
+ unsigned int i = 0;
+
+ /*
+ * Don't take into account the MFN when removing mapping (i.e
+ * MFN_INVALID) to calculate the correct target order.
+ *
+ * Per the Arm Arm, `vfn` and `mfn` must be both superpage aligned.
+ * They are or-ed together and then checked against the size of
+ * each level.
+ *
+ * `left` is not included and checked separately to allow
+ * superpage mapping even if it is not properly aligned (the
+ * user may have asked to map 2MB + 4k).
+ */
+ mask = !mfn_eq(mfn, INVALID_MFN) ? mfn_x(mfn) : 0;
+ mask |= vfn;
+
+ /*
+ * Always use level 3 mapping unless the caller request block
+ * mapping.
+ */
+ if ( likely(!(flags & _PAGE_BLOCK)) )
+ return level;
+
+ for ( i = 0; i < CONFIG_PAGING_LEVELS; i++ )
+ {
+ if ( !(mask & (BIT(XEN_PT_LEVEL_ORDER(convert_level(i)), UL) - 1)) &&
+ (nr >= BIT(XEN_PT_LEVEL_ORDER(convert_level(i)), UL)) )
+ {
+ level = convert_level(i);
+ break;
+ }
+ }
+
+ return level;
+}
+
+static int xen_pt_update(unsigned long virt,
+ mfn_t mfn,
+ /* const on purpose as it is used for TLB flush */
+ const unsigned long nr_mfns,
+ unsigned int flags)
+{
+ int rc = 0;
+ unsigned long vfn = virt >> PAGE_SHIFT;
+ unsigned long left = nr_mfns;
+
+ /*
+ * For arm32, page-tables are different on each CPUs. Yet, they share
+ * some common mappings. It is assumed that only common mappings
+ * will be modified with this function.
+ *
+ * XXX: Add a check.
+ */
+ const mfn_t root = get_root_page();
+
+ /*
+ * The hardware was configured to forbid mapping both writeable and
+ * executable.
+ * When modifying/creating mapping (i.e _PAGE_PRESENT is set),
+ * prevent any update if this happen.
+ */
+ if ( (flags & _PAGE_PRESENT) && !PAGE_RO_MASK(flags) &&
+ !PAGE_XN_MASK(flags) )
+ {
+ printk("Mappings should not be both Writeable and Executable.\n");
+ return -EINVAL;
+ }
+
+ if ( flags & _PAGE_CONTIG )
+ {
+ printk("_PAGE_CONTIG is an internal only flag.\n");
+ return -EINVAL;
+ }
+
+ if ( !IS_ALIGNED(virt, PAGE_SIZE) )
+ {
+ printk("The virtual address is not aligned to the page-size.\n");
+ return -EINVAL;
+ }
+
+ spin_lock(&xen_pt_lock);
+
+ while ( left )
+ {
+ unsigned int order, level, nr_contig, new_flags;
+
+ level = xen_pt_mapping_level(vfn, mfn, left, flags);
+ order = XEN_PT_LEVEL_ORDER(level);
+
+ ASSERT(left >= BIT(order, UL));
+
+ /*
+ * Check if we can set the contiguous mapping and update the
+ * flags accordingly.
+ */
+ nr_contig = xen_pt_check_contig(vfn, mfn, level, left, flags);
+ new_flags = flags | ((nr_contig > 1) ? _PAGE_CONTIG : 0);
+
+ for ( ; nr_contig > 0; nr_contig-- )
+ {
+ rc = xen_pt_update_entry(root, vfn << PAGE_SHIFT, mfn, level,
+ new_flags);
+ if ( rc )
+ break;
+
+ vfn += 1U << order;
+ if ( !mfn_eq(mfn, INVALID_MFN) )
+ mfn = mfn_add(mfn, 1U << order);
+
+ left -= (1U << order);
+ }
+
+ if ( rc )
+ break;
+ }
+
+ /*
+ * The TLBs flush can be safely skipped when a mapping is inserted
+ * as we don't allow mapping replacement (see xen_pt_check_entry()).
+ *
+ * For all the other cases, the TLBs will be flushed unconditionally
+ * even if the mapping has failed. This is because we may have
+ * partially modified the PT. This will prevent any unexpected
+ * behavior afterwards.
+ */
+ if ( !((flags & _PAGE_PRESENT) && !mfn_eq(mfn, INVALID_MFN)) )
+ flush_xen_tlb_range_va(virt, PAGE_SIZE * nr_mfns);
+
+ spin_unlock(&xen_pt_lock);
+
+ return rc;
+}
+
+int map_pages_to_xen(unsigned long virt,
+ mfn_t mfn,
+ unsigned long nr_mfns,
+ unsigned int flags)
+{
+ return xen_pt_update(virt, mfn, nr_mfns, flags);
+}
@@ -648,4 +648,28 @@ static inline void put_page_alloc_ref(struct page_info *page)
}
}
+#ifdef CONFIG_GENERIC_PT
+
+/* is needed because of pte_t. */
+#include <asm/page.h>
+
+const mfn_t get_root_page(void);
+
+unsigned int xen_pt_check_contig(unsigned long vfn, mfn_t mfn,
+ unsigned int level, unsigned long left,
+ unsigned int flags);
+
+void set_pte_table_bit(pte_t *pte, unsigned int tbl_bit_val);
+
+bool sanity_arch_specific_pte_checks(pte_t entry);
+
+unsigned int get_contig_bit(pte_t entry);
+
+void set_pte_permissions(pte_t *pte, unsigned int flags);
+
+void flush_xen_tlb_range_va(vaddr_t va,
+ unsigned long size);
+
+#endif /* CONFIG_GENERIC_PT */
+
#endif /* __XEN_MM_H__ */
At least, between Arm and RISC-V most of the code related to Xen page table handling are common. This GENERIC_PT code is based on Arm's arm/mmu/pt.c except some minor changes such as introduction of the following functions: * get_root_page() * xen_pt_check_contig() * set_pte_table_bit() * sanity_arch_specific_pte_checks() * get_contig_bit() * set_pte_permissions() * flush_xen_tlb_range_va() It was done because not every functions has the generic pte_flags and it could be a different positions of the PTE bits in a PTE. Signed-off-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> --- Changes in V2: - newly introduced patch --- xen/common/Kconfig | 5 + xen/common/Makefile | 1 + xen/common/mmu/pt.c | 441 +++++++++++++++++++++++++++++++++++++++++++ xen/include/xen/mm.h | 24 +++ 4 files changed, 471 insertions(+) create mode 100644 xen/common/mmu/pt.c