@@ -90,6 +90,7 @@ struct dev_pagemap_ops {
};
#define PGMAP_ALTMAP_VALID (1 << 0)
+#define PGMAP_PROT_ENABLED (1 << 1)
/**
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
@@ -1123,6 +1123,39 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
page->pgmap->type == MEMORY_DEVICE_PCI_P2PDMA;
}
+#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
+DECLARE_STATIC_KEY_FALSE(dev_protection_static_key);
+
+/*
+ * We make page_is_access_protected() as quick as possible.
+ * 1) If no mappings have been enabled with extra protection we skip this
+ * entirely
+ * 2) Skip pages which are not ZONE_DEVICE
+ * 3) Only then check if this particular page was mapped with extra
+ * protections.
+ */
+static inline bool page_is_access_protected(struct page *page)
+{
+ if (!static_branch_unlikely(&dev_protection_static_key))
+ return false;
+ if (!is_zone_device_page(page))
+ return false;
+ if (page->pgmap->flags & PGMAP_PROT_ENABLED)
+ return true;
+ return false;
+}
+
+void dev_access_enable(void);
+void dev_access_disable(void);
+#else
+static inline bool page_is_access_protected(struct page *page)
+{
+ return false;
+}
+static inline void dev_access_enable(void) { }
+static inline void dev_access_disable(void) { }
+#endif /* CONFIG_ZONE_DEVICE_ACCESS_PROTECTION */
+
/* 127: arbitrary random number, small enough to assemble well */
#define page_ref_zero_or_close_to_overflow(page) \
((unsigned int) page_ref_count(page) + 127u <= 127u)
@@ -1313,6 +1313,9 @@ struct task_struct {
struct callback_head mce_kill_me;
#endif
+#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
+ unsigned int dev_page_access_ref;
+#endif
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
@@ -204,6 +204,9 @@ struct task_struct init_task
#ifdef CONFIG_SECURITY
.security = NULL,
#endif
+#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
+ .dev_page_access_ref = 0,
+#endif
};
EXPORT_SYMBOL(init_task);
@@ -957,6 +957,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
#ifdef CONFIG_MEMCG
tsk->active_memcg = NULL;
+#endif
+#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
+ tsk->dev_page_access_ref = 0;
#endif
return tsk;
@@ -798,6 +798,19 @@ config ZONE_DEVICE
If FS_DAX is enabled, then say Y.
+config ZONE_DEVICE_ACCESS_PROTECTION
+ bool "Device memory access protection"
+ depends on ZONE_DEVICE
+ depends on ARCH_HAS_SUPERVISOR_PKEYS
+
+ help
+ Enable the option of having access protections on device memory
+ areas. This protects against access to device memory which is not
+ intended such as stray writes. This feature is particularly useful
+ to protect against corruption of persistent memory.
+
+ If in doubt, say 'Y'.
+
config DEV_PAGEMAP_OPS
bool
@@ -6,12 +6,16 @@
#include <linux/memory_hotplug.h>
#include <linux/mm.h>
#include <linux/pfn_t.h>
+#include <linux/pkeys.h>
#include <linux/swap.h>
#include <linux/mmzone.h>
#include <linux/swapops.h>
#include <linux/types.h>
#include <linux/wait_bit.h>
#include <linux/xarray.h>
+#include <uapi/asm-generic/mman-common.h>
+
+#define PKEY_INVALID (INT_MIN)
static DEFINE_XARRAY(pgmap_array);
@@ -70,6 +74,110 @@ static void devmap_managed_enable_put(void)
}
#endif /* CONFIG_DEV_PAGEMAP_OPS */
+#ifdef CONFIG_ZONE_DEVICE_ACCESS_PROTECTION
+/*
+ * Note all devices which have asked for protections share the same key. The
+ * key may, or may not, have been provided by the core. If not, protection
+ * will remain disabled. The key acquisition is attempted at init time and
+ * never again. So we don't have to worry about dev_page_pkey changing.
+ */
+static int dev_page_pkey = PKEY_INVALID;
+DEFINE_STATIC_KEY_FALSE(dev_protection_static_key);
+EXPORT_SYMBOL(dev_protection_static_key);
+DEFINE_MUTEX(dev_prot_enable_lock);
+static int dev_protection_enable;
+
+static pgprot_t dev_protection_enable_get(struct dev_pagemap *pgmap, pgprot_t prot)
+{
+ if (pgmap->flags & PGMAP_PROT_ENABLED && dev_page_pkey != PKEY_INVALID) {
+ pgprotval_t val = pgprot_val(prot);
+
+ mutex_lock(&dev_prot_enable_lock);
+ dev_protection_enable++;
+ /* Only enable the static branch 1 time */
+ if (dev_protection_enable == 1)
+ static_branch_enable(&dev_protection_static_key);
+ mutex_unlock(&dev_prot_enable_lock);
+
+ prot = __pgprot(val | _PAGE_PKEY(dev_page_pkey));
+ }
+ return prot;
+}
+
+static void dev_protection_enable_put(struct dev_pagemap *pgmap)
+{
+ if (pgmap->flags & PGMAP_PROT_ENABLED && dev_page_pkey != PKEY_INVALID) {
+ mutex_lock(&dev_prot_enable_lock);
+ dev_protection_enable--;
+ if (dev_protection_enable == 0)
+ static_branch_disable(&dev_protection_static_key);
+ mutex_unlock(&dev_prot_enable_lock);
+ }
+}
+
+void dev_access_disable(void)
+{
+ unsigned long flags;
+
+ if (!static_branch_unlikely(&dev_protection_static_key))
+ return;
+
+ local_irq_save(flags);
+ current->dev_page_access_ref--;
+ if (current->dev_page_access_ref == 0)
+ pks_update_protection(dev_page_pkey, PKEY_DISABLE_ACCESS);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(dev_access_disable);
+
+void dev_access_enable(void)
+{
+ unsigned long flags;
+
+ if (!static_branch_unlikely(&dev_protection_static_key))
+ return;
+
+ local_irq_save(flags);
+ /* 0 clears the PKEY_DISABLE_ACCESS bit, allowing access */
+ if (current->dev_page_access_ref == 0)
+ pks_update_protection(dev_page_pkey, 0);
+ current->dev_page_access_ref++;
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(dev_access_enable);
+
+/**
+ * dev_access_protection_init: Configure a PKS key domain for device pages
+ *
+ * The domain defaults to the protected state. Device page mappings should set
+ * the PGMAP_PROT_ENABLED flag when mapping pages.
+ *
+ * Note the pkey is never free'ed. This is run at init time and we either get
+ * the key or we do not. We need to do this to maintian a constant key (or
+ * not) as device memory is added or removed.
+ */
+static int __init __dev_access_protection_init(void)
+{
+ int pkey = pks_key_alloc("Device Memory");
+
+ if (pkey < 0)
+ return 0;
+
+ dev_page_pkey = pkey;
+
+ return 0;
+}
+subsys_initcall(__dev_access_protection_init);
+#else
+static pgprot_t dev_protection_enable_get(struct dev_pagemap *pgmap, pgprot_t prot)
+{
+ return prot;
+}
+static void dev_protection_enable_put(struct dev_pagemap *pgmap)
+{
+}
+#endif /* CONFIG_ZONE_DEVICE_ACCESS_PROTECTION */
+
static void pgmap_array_delete(struct resource *res)
{
xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
@@ -159,6 +267,7 @@ void memunmap_pages(struct dev_pagemap *pgmap)
pgmap_array_delete(res);
WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
devmap_managed_enable_put();
+ dev_protection_enable_put(pgmap);
}
EXPORT_SYMBOL_GPL(memunmap_pages);
@@ -194,6 +303,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
int error, is_ram;
bool need_devmap_managed = true;
+ params.pgprot = dev_protection_enable_get(pgmap, params.pgprot);
+
switch (pgmap->type) {
case MEMORY_DEVICE_PRIVATE:
if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {