diff mbox series

[RFC,v4,part-2,02/13] mm/dpt: Track buffers allocated for a decorated page-table

Message ID 20200504145810.11882-3-alexandre.chartre@oracle.com (mailing list archive)
State New, archived
Headers show
Series ASI - Part II (Decorated Page-Table) | expand

Commit Message

Alexandre Chartre May 4, 2020, 2:57 p.m. UTC
Add functions to track buffers allocated for a decorated page-table.
A page-table can have direct references to the kernel page table, at
different levels (PGD, P4D, PUD, PMD). When freeing a page-table, we
should make sure that we free parts actually allocated for the decorated
page-table, and not parts of the kernel page table referenced from the
page-table. To do so, we will keep track of buffers when building the
page-table.

Signed-off-by: Alexandre Chartre <alexandre.chartre@oracle.com>
---
 arch/x86/include/asm/dpt.h | 21 ++++++++++
 arch/x86/mm/dpt.c          | 82 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+)
diff mbox series

Patch

diff --git a/arch/x86/include/asm/dpt.h b/arch/x86/include/asm/dpt.h
index 1da4d43d5e94..b9cba051ebf2 100644
--- a/arch/x86/include/asm/dpt.h
+++ b/arch/x86/include/asm/dpt.h
@@ -3,9 +3,18 @@ 
 #define ARCH_X86_MM_DPT_H
 
 #include <linux/spinlock.h>
+#include <linux/xarray.h>
 
 #include <asm/pgtable.h>
 
+enum page_table_level {
+	PGT_LEVEL_PTE,
+	PGT_LEVEL_PMD,
+	PGT_LEVEL_PUD,
+	PGT_LEVEL_P4D,
+	PGT_LEVEL_PGD
+};
+
 /*
  * A decorated page-table (dpt) encapsulates a native page-table (e.g.
  * a PGD) and maintain additional attributes related to this page-table.
@@ -15,6 +24,18 @@  struct dpt {
 	pgd_t			*pagetable;	/* the actual page-table */
 	unsigned int		alignment;	/* page-table alignment */
 
+	/*
+	 * A page-table can have direct references to another page-table,
+	 * at different levels (PGD, P4D, PUD, PMD). When freeing or
+	 * modifying a page-table, we should make sure that we free/modify
+	 * parts effectively allocated to the actual page-table, and not
+	 * parts of another page-table referenced from this page-table.
+	 *
+	 * To do so, the backend_pages XArray is used to keep track of pages
+	 * used for this page-table.
+	 */
+	struct xarray		backend_pages;		/* page-table pages */
+	unsigned long		backend_pages_count;	/* pages count */
 };
 
 extern struct dpt *dpt_create(unsigned int pgt_alignment);
diff --git a/arch/x86/mm/dpt.c b/arch/x86/mm/dpt.c
index 333e259c5b7f..6df2d4fde8ec 100644
--- a/arch/x86/mm/dpt.c
+++ b/arch/x86/mm/dpt.c
@@ -8,6 +8,80 @@ 
 
 #include <asm/dpt.h>
 
+/*
+ * Get the pointer to the beginning of a page table directory from a page
+ * table directory entry.
+ */
+#define DPT_BACKEND_PAGE_ALIGN(entry)	\
+	((typeof(entry))(((unsigned long)(entry)) & PAGE_MASK))
+
+/*
+ * Pages used to build a page-table are stored in the backend_pages XArray.
+ * Each entry in the array is a logical OR of the page address and the page
+ * table level (PTE, PMD, PUD, P4D) this page is used for in the page-table.
+ *
+ * As a page address is aligned with PAGE_SIZE, we have plenty of space
+ * for storing the page table level (which is a value between 0 and 4) in
+ * the low bits of the page address.
+ *
+ */
+
+#define DPT_BACKEND_PAGE_ENTRY(addr, level)	\
+	((typeof(addr))(((unsigned long)(addr)) | ((unsigned long)(level))))
+#define DPT_BACKEND_PAGE_ADDR(entry)		\
+	((void *)(((unsigned long)(entry)) & PAGE_MASK))
+#define DPT_BACKEND_PAGE_LEVEL(entry)		\
+	((enum page_table_level)(((unsigned long)(entry)) & ~PAGE_MASK))
+
+static int dpt_add_backend_page(struct dpt *dpt, void *addr,
+				enum page_table_level level)
+{
+	unsigned long index;
+	void *old_entry;
+
+	if ((!addr) || ((unsigned long)addr) & ~PAGE_MASK)
+		return -EINVAL;
+
+	lockdep_assert_held(&dpt->lock);
+	index = dpt->backend_pages_count;
+
+	old_entry = xa_store(&dpt->backend_pages, index,
+			     DPT_BACKEND_PAGE_ENTRY(addr, level),
+			     GFP_KERNEL);
+	if (xa_is_err(old_entry))
+		return xa_err(old_entry);
+	if (old_entry)
+		return -EBUSY;
+
+	dpt->backend_pages_count++;
+
+	return 0;
+}
+
+/*
+ * Check if an offset in the page-table is valid, i.e. check that the
+ * offset is on a page effectively belonging to the page-table.
+ */
+static bool dpt_valid_offset(struct dpt *dpt, void *offset)
+{
+	unsigned long index;
+	void *addr, *entry;
+	bool valid;
+
+	addr = DPT_BACKEND_PAGE_ALIGN(offset);
+	valid = false;
+
+	lockdep_assert_held(&dpt->lock);
+	xa_for_each(&dpt->backend_pages, index, entry) {
+		if (DPT_BACKEND_PAGE_ADDR(entry) == addr) {
+			valid = true;
+			break;
+		}
+	}
+
+	return valid;
+}
+
 /*
  * dpt_create - allocate a page-table and create a corresponding
  * decorated page-table. The page-table is allocated and aligned
@@ -41,6 +115,7 @@  struct dpt *dpt_create(unsigned int pgt_alignment)
 	dpt->alignment = pgt_alignment;
 
 	spin_lock_init(&dpt->lock);
+	xa_init(&dpt->backend_pages);
 
 	return dpt;
 }
@@ -50,10 +125,17 @@  void dpt_destroy(struct dpt *dpt)
 {
 	unsigned int pgt_alignment;
 	unsigned int alloc_order;
+	unsigned long index;
+	void *entry;
 
 	if (!dpt)
 		return;
 
+	if (dpt->backend_pages_count) {
+		xa_for_each(&dpt->backend_pages, index, entry)
+			free_page((unsigned long)DPT_BACKEND_PAGE_ADDR(entry));
+	}
+
 	if (dpt->pagetable) {
 		pgt_alignment = dpt->alignment;
 		alloc_order = round_up(PAGE_SIZE + pgt_alignment,