@@ -106,6 +106,7 @@ static void memory_block_release(struct device *dev)
{
struct memory_block *mem = to_memory_block(dev);
+ WARN(mem->altmap.alloc, "Altmap not fully unmapped");
kfree(mem);
}
@@ -183,7 +184,7 @@ static int memory_block_online(struct memory_block *mem)
{
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
- unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+ unsigned long nr_vmemmap_pages = 0;
struct zone *zone;
int ret;
@@ -200,6 +201,9 @@ static int memory_block_online(struct memory_block *mem)
* stage helps to keep accounting easier to follow - e.g vmemmaps
* belong to the same zone as the memory they backed.
*/
+ if (mem->altmap.alloc)
+ nr_vmemmap_pages = mem->altmap.alloc + mem->altmap.reserve;
+
if (nr_vmemmap_pages) {
ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
if (ret)
@@ -230,7 +234,7 @@ static int memory_block_offline(struct memory_block *mem)
{
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
- unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+ unsigned long nr_vmemmap_pages = 0;
int ret;
if (!mem->zone)
@@ -240,6 +244,9 @@ static int memory_block_offline(struct memory_block *mem)
* Unaccount before offlining, such that unpopulated zone and kthreads
* can properly be torn down in offline_pages().
*/
+ if (mem->altmap.alloc)
+ nr_vmemmap_pages = mem->altmap.alloc + mem->altmap.reserve;
+
if (nr_vmemmap_pages)
adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
-nr_vmemmap_pages);
@@ -726,7 +733,7 @@ void memory_block_add_nid(struct memory_block *mem, int nid,
#endif
static int add_memory_block(unsigned long block_id, unsigned long state,
- unsigned long nr_vmemmap_pages,
+ struct vmem_altmap *altmap,
struct memory_group *group)
{
struct memory_block *mem;
@@ -744,7 +751,10 @@ static int add_memory_block(unsigned long block_id, unsigned long state,
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
mem->nid = NUMA_NO_NODE;
- mem->nr_vmemmap_pages = nr_vmemmap_pages;
+ if (altmap)
+ memcpy(&mem->altmap, altmap, sizeof(*altmap));
+ else
+ mem->altmap.alloc = 0;
INIT_LIST_HEAD(&mem->group_next);
#ifndef CONFIG_NUMA
@@ -783,14 +793,14 @@ static int __init add_boot_memory_block(unsigned long base_section_nr)
if (section_count == 0)
return 0;
return add_memory_block(memory_block_id(base_section_nr),
- MEM_ONLINE, 0, NULL);
+ MEM_ONLINE, NULL, NULL);
}
static int add_hotplug_memory_block(unsigned long block_id,
- unsigned long nr_vmemmap_pages,
+ struct vmem_altmap *altmap,
struct memory_group *group)
{
- return add_memory_block(block_id, MEM_OFFLINE, nr_vmemmap_pages, group);
+ return add_memory_block(block_id, MEM_OFFLINE, altmap, group);
}
static void remove_memory_block(struct memory_block *memory)
@@ -818,7 +828,7 @@ static void remove_memory_block(struct memory_block *memory)
* Called under device_hotplug_lock.
*/
int create_memory_block_devices(unsigned long start, unsigned long size,
- unsigned long vmemmap_pages,
+ struct vmem_altmap *altmap,
struct memory_group *group)
{
const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
@@ -832,7 +842,7 @@ int create_memory_block_devices(unsigned long start, unsigned long size,
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = add_hotplug_memory_block(block_id, vmemmap_pages, group);
+ ret = add_hotplug_memory_block(block_id, altmap, group);
if (ret)
break;
}
@@ -64,6 +64,23 @@ struct memory_group {
};
};
+/**
+ * struct vmem_altmap - pre-allocated storage for vmemmap_populate
+ * @base_pfn: base of the entire dev_pagemap mapping
+ * @reserve: pages mapped, but reserved for driver use (relative to @base)
+ * @free: free pages set aside in the mapping for memmap storage
+ * @align: pages reserved to meet allocation alignments
+ * @alloc: track pages consumed, private to vmemmap_populate()
+ */
+struct vmem_altmap {
+ unsigned long base_pfn;
+ const unsigned long end_pfn;
+ const unsigned long reserve;
+ unsigned long free;
+ unsigned long align;
+ unsigned long alloc;
+};
+
struct memory_block {
unsigned long start_section_nr;
unsigned long state; /* serialized by the dev->lock */
@@ -77,11 +94,7 @@ struct memory_block {
*/
struct zone *zone;
struct device dev;
- /*
- * Number of vmemmap pages. These pages
- * lay at the beginning of the memory block.
- */
- unsigned long nr_vmemmap_pages;
+ struct vmem_altmap altmap;
struct memory_group *group; /* group (if any) for this block */
struct list_head group_next; /* next block inside memory group */
#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
@@ -147,7 +160,7 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
- unsigned long vmemmap_pages,
+ struct vmem_altmap *altmap,
struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
@@ -2,6 +2,7 @@
#ifndef _LINUX_MEMREMAP_H_
#define _LINUX_MEMREMAP_H_
+#include <linux/memory.h>
#include <linux/mmzone.h>
#include <linux/range.h>
#include <linux/ioport.h>
@@ -10,23 +11,6 @@
struct resource;
struct device;
-/**
- * struct vmem_altmap - pre-allocated storage for vmemmap_populate
- * @base_pfn: base of the entire dev_pagemap mapping
- * @reserve: pages mapped, but reserved for driver use (relative to @base)
- * @free: free pages set aside in the mapping for memmap storage
- * @align: pages reserved to meet allocation alignments
- * @alloc: track pages consumed, private to vmemmap_populate()
- */
-struct vmem_altmap {
- unsigned long base_pfn;
- const unsigned long end_pfn;
- const unsigned long reserve;
- unsigned long free;
- unsigned long align;
- unsigned long alloc;
-};
-
/*
* Specialize ZONE_DEVICE memory into multiple types each has a different
* usage.
@@ -1391,7 +1391,7 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
goto error;
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
+ ret = create_memory_block_devices(start, size, &mhp_altmap,
group);
if (ret) {
arch_remove_memory(start, size, NULL);
@@ -1993,12 +1993,18 @@ static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
return 0;
}
-static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
+static int get_vmemmap_altmap_cb(struct memory_block *mem, void *arg)
{
+ struct vmem_altmap **altmap = (struct vmem_altmap **)arg;
/*
- * If not set, continue with the next block.
+ * If we have any pages allocated from altmap
+ * return the altmap details and break callback.
*/
- return mem->nr_vmemmap_pages;
+ if (mem->altmap.alloc) {
+ *altmap = &mem->altmap;
+ return 1;
+ }
+ return 0;
}
static int check_cpu_on_node(int nid)
@@ -2073,9 +2079,8 @@ EXPORT_SYMBOL(try_offline_node);
static int __ref try_remove_memory(u64 start, u64 size)
{
- struct vmem_altmap mhp_altmap = {};
+ int ret;
struct vmem_altmap *altmap = NULL;
- unsigned long nr_vmemmap_pages;
int rc = 0, nid = NUMA_NO_NODE;
BUG_ON(check_hotplug_memory_range(start, size));
@@ -2097,24 +2102,13 @@ static int __ref try_remove_memory(u64 start, u64 size)
* We only support removing memory added with MHP_MEMMAP_ON_MEMORY in
* the same granularity it was added - a single memory block.
*/
- if (mhp_memmap_on_memory()) {
- nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
- get_nr_vmemmap_pages_cb);
- if (nr_vmemmap_pages) {
- if (size != memory_block_size_bytes()) {
- pr_warn("Refuse to remove %#llx - %#llx,"
- "wrong granularity\n",
- start, start + size);
- return -EINVAL;
- }
-
- /*
- * Let remove_pmd_table->free_hugepage_table do the
- * right thing if we used vmem_altmap when hot-adding
- * the range.
- */
- mhp_altmap.alloc = nr_vmemmap_pages;
- altmap = &mhp_altmap;
+ ret = walk_memory_blocks(start, size, &altmap, get_vmemmap_altmap_cb);
+ if (ret) {
+ if (size != memory_block_size_bytes()) {
+ pr_warn("Refuse to remove %#llx - %#llx,"
+ "wrong granularity\n",
+ start, start + size);
+ return -EINVAL;
}
}
With memmap on memory, some architecture needs more details w.r.t altmap such as base_pfn, end_pfn, etc to unmap vmemmap memory. Embed vmem_altmap data structure to memory_bock and use that instead of nr_vmemmap_pages. On memory unplug, if the kernel finds any memory block in the range to be using vmem_altmap, the kernel fails to unplug the memory if the request is not a single memory block unplug. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> --- drivers/base/memory.c | 28 ++++++++++++++++++--------- include/linux/memory.h | 25 ++++++++++++++++++------ include/linux/memremap.h | 18 +---------------- mm/memory_hotplug.c | 42 +++++++++++++++++----------------------- 4 files changed, 57 insertions(+), 56 deletions(-)