@@ -717,6 +717,24 @@ static inline enum zone_type page_zonenum(const struct page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
}
+/**
+ * struct dev_pagemap - metadata for ZONE_DEVICE mappings
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+ /* TODO: vmem_altmap and percpu_ref count */
+ struct device *dev;
+};
+
+#ifdef CONFIG_ZONE_DEVICE
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys);
+#else
+static inline struct dev_pagemap *get_dev_pagemap(resource_size_t phys)
+{
+ return NULL;
+}
+#endif
+
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define SECTION_IN_PAGE_FLAGS
#endif
@@ -10,6 +10,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
+#include <linux/rculist.h>
#include <linux/device.h>
#include <linux/types.h>
#include <linux/io.h>
@@ -138,18 +139,52 @@ void devm_memunmap(struct device *dev, void *addr)
EXPORT_SYMBOL(devm_memunmap);
#ifdef CONFIG_ZONE_DEVICE
+static LIST_HEAD(ranges);
+static DEFINE_SPINLOCK(range_lock);
+
struct page_map {
struct resource res;
+ struct dev_pagemap pgmap;
+ struct list_head list;
};
+static void add_page_map(struct page_map *page_map)
+{
+ spin_lock(&range_lock);
+ list_add_rcu(&page_map->list, &ranges);
+ spin_unlock(&range_lock);
+}
+
+static void del_page_map(struct page_map *page_map)
+{
+ spin_lock(&range_lock);
+ list_del_rcu(&page_map->list);
+ spin_unlock(&range_lock);
+}
+
static void devm_memremap_pages_release(struct device *dev, void *res)
{
struct page_map *page_map = res;
+ del_page_map(page_map);
+
/* pages are dead and unused, undo the arch mapping */
arch_remove_memory(page_map->res.start, resource_size(&page_map->res));
}
+/* assumes rcu_read_lock() held at entry */
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys)
+{
+ struct page_map *page_map;
+
+ WARN_ON_ONCE(!rcu_read_lock_held());
+
+ list_for_each_entry_rcu(page_map, &ranges, list)
+ if (phys >= page_map->res.start && phys <= page_map->res.end)
+ return &page_map->pgmap;
+ return NULL;
+}
+
void *devm_memremap_pages(struct device *dev, struct resource *res)
{
int is_ram = region_intersects(res->start, resource_size(res),
@@ -173,12 +208,17 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
memcpy(&page_map->res, res, sizeof(*res));
+ page_map->pgmap.dev = dev;
+ INIT_LIST_HEAD(&page_map->list);
+ add_page_map(page_map);
+
nid = dev_to_node(dev);
if (nid < 0)
nid = numa_mem_id();
error = arch_add_memory(nid, res->start, resource_size(res), true);
if (error) {
+ del_page_map(page_map);
devres_free(page_map);
return ERR_PTR(error);
}
There are several scenarios where we need to retrieve and update metadata associated with a given devm_memremap_pages() mapping, and the only lookup key available is a pfn in the range: 1/ We want to augment vmemmap_populate() (called via arch_add_memory()) to allocate memmap storage from pre-allocated pages reserved by the device driver. At vmemmap_alloc_block_buf() time it grabs device pages rather than page allocator pages. This is in support of devm_memremap_pages() mappings where the memmap is too large to fit in main memory (i.e. large persistent memory devices). 2/ Taking a reference against the mapping when inserting device pages into the address_space radix of a given inode. This facilitates unmap_mapping_range() and truncate_inode_pages() operations when the driver is tearing down the mapping. 3/ get_user_pages() operations on ZONE_DEVICE memory require taking a reference against the mapping so that the driver teardown path can revoke and drain usage of device pages. Cc: Christoph Hellwig <hch@lst.de> Cc: Dave Chinner <david@fromorbit.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> --- include/linux/mm.h | 18 ++++++++++++++++++ kernel/memremap.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+)