@@ -85,6 +85,7 @@ config VIRTIO_MEM
depends on VIRTIO
depends on MEMORY_HOTPLUG_SPARSE
depends on MEMORY_HOTREMOVE
+ select CONTIG_ALLOC
help
This driver provides access to virtio-mem paravirtualized memory
devices, allowing to hotplug and hotunplug memory.
@@ -23,6 +23,10 @@
#include <acpi/acpi_numa.h>
+static bool unplug_online = true;
+module_param(unplug_online, bool, 0644);
+MODULE_PARM_DESC(unplug_online, "Try to unplug online memory");
+
enum virtio_mem_mb_state {
/* Unplugged, not added to Linux. Can be reused later. */
VIRTIO_MEM_MB_STATE_UNUSED = 0,
@@ -654,23 +658,35 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
}
/*
- * Set a range of pages PG_offline.
+ * Set a range of pages PG_offline. Remember pages that were never onlined
+ * (via generic_online_page()) using PageDirty().
*/
static void virtio_mem_set_fake_offline(unsigned long pfn,
- unsigned int nr_pages)
+ unsigned int nr_pages, bool onlined)
{
- for (; nr_pages--; pfn++)
- __SetPageOffline(pfn_to_page(pfn));
+ for (; nr_pages--; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ __SetPageOffline(page);
+ if (!onlined)
+ SetPageDirty(page);
+ }
}
/*
- * Clear PG_offline from a range of pages.
+ * Clear PG_offline from a range of pages. If the pages were never onlined,
+ * (via generic_online_page()), clear PageDirty().
*/
static void virtio_mem_clear_fake_offline(unsigned long pfn,
- unsigned int nr_pages)
+ unsigned int nr_pages, bool onlined)
{
- for (; nr_pages--; pfn++)
- __ClearPageOffline(pfn_to_page(pfn));
+ for (; nr_pages--; pfn++) {
+ struct page *page = pfn_to_page(pfn);
+
+ __ClearPageOffline(page);
+ if (!onlined)
+ ClearPageDirty(page);
+ }
}
/*
@@ -686,10 +702,26 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages)
* We are always called with subblock granularity, which is at least
* aligned to MAX_ORDER - 1.
*/
- virtio_mem_clear_fake_offline(pfn, nr_pages);
+ for (i = 0; i < nr_pages; i += 1 << order) {
+ struct page *page = pfn_to_page(pfn + i);
- for (i = 0; i < nr_pages; i += 1 << order)
- generic_online_page(pfn_to_page(pfn + i), order);
+ /*
+ * If the page is PageDirty(), it was kept fake-offline when
+ * onlining the memory block. Otherwise, it was allocated
+ * using alloc_contig_range(). All pages in a subblock are
+ * alike.
+ */
+ if (PageDirty(page)) {
+ virtio_mem_clear_fake_offline(pfn + i, 1 << order,
+ false);
+ generic_online_page(page, order);
+ } else {
+ virtio_mem_clear_fake_offline(pfn + i, 1 << order,
+ true);
+ free_contig_range(pfn + i, 1 << order);
+ adjust_managed_page_count(page, 1 << order);
+ }
+ }
}
static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
@@ -718,7 +750,8 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order)
if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
generic_online_page(page, order);
else
- virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order);
+ virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
+ false);
rcu_read_unlock();
return;
}
@@ -1186,6 +1219,72 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm,
return 0;
}
+/*
+ * Unplug the desired number of plugged subblocks of an online memory block.
+ * Will skip subblock that are busy.
+ *
+ * Will modify the state of the memory block.
+ *
+ * Note: Can fail after some subblocks were successfully unplugged. Can
+ * return 0 even if subblocks were busy and could not get unplugged.
+ */
+static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm,
+ unsigned long mb_id,
+ uint64_t *nb_sb)
+{
+ const unsigned long nr_pages = PFN_DOWN(vm->subblock_size);
+ unsigned long start_pfn;
+ int rc, sb_id;
+
+ /*
+ * TODO: To increase the performance we want to try bigger, consecutive
+ * subblocks first before falling back to single subblocks. Also,
+ * we should sense via something like is_mem_section_removable()
+ * first if it makes sense to go ahead any try to allocate.
+ */
+ for (sb_id = 0; sb_id < vm->nb_sb_per_mb && *nb_sb; sb_id++) {
+ /* Find the next candidate subblock */
+ while (sb_id < vm->nb_sb_per_mb &&
+ !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1))
+ sb_id++;
+ if (sb_id >= vm->nb_sb_per_mb)
+ break;
+
+ start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
+ sb_id * vm->subblock_size);
+ rc = alloc_contig_range(start_pfn, start_pfn + nr_pages,
+ MIGRATE_MOVABLE, GFP_KERNEL);
+ if (rc == -ENOMEM)
+ /* whoops, out of memory */
+ return rc;
+ if (rc)
+ /* memory busy, we can't unplug this chunk */
+ continue;
+
+ /* Mark it as fake-offline before unplugging it */
+ virtio_mem_set_fake_offline(start_pfn, nr_pages, true);
+ adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
+
+ /* Try to unplug the allocated memory */
+ rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, 1);
+ if (rc) {
+ /* Return the memory to the buddy. */
+ virtio_mem_fake_online(start_pfn, nr_pages);
+ return rc;
+ }
+
+ virtio_mem_mb_set_state(vm, mb_id,
+ VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
+ *nb_sb -= 1;
+ }
+
+ /*
+ * TODO: Once all subblocks of a memory block were unplugged, we want
+ * to offline the memory block and remove it.
+ */
+ return 0;
+}
+
/*
* Try to unplug the requested amount of memory.
*/
@@ -1225,8 +1324,37 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
cond_resched();
}
+ if (!unplug_online) {
+ mutex_unlock(&vm->hotplug_mutex);
+ return 0;
+ }
+
+ /* Try to unplug subblocks of partially plugged online blocks. */
+ virtio_mem_for_each_mb_state_rev(vm, mb_id,
+ VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) {
+ rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
+ &nb_sb);
+ if (rc || !nb_sb)
+ goto out_unlock;
+ mutex_unlock(&vm->hotplug_mutex);
+ cond_resched();
+ mutex_lock(&vm->hotplug_mutex);
+ }
+
+ /* Try to unplug subblocks of plugged online blocks. */
+ virtio_mem_for_each_mb_state_rev(vm, mb_id,
+ VIRTIO_MEM_MB_STATE_ONLINE) {
+ rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id,
+ &nb_sb);
+ if (rc || !nb_sb)
+ goto out_unlock;
+ mutex_unlock(&vm->hotplug_mutex);
+ cond_resched();
+ mutex_lock(&vm->hotplug_mutex);
+ }
+
mutex_unlock(&vm->hotplug_mutex);
- return 0;
+ return nb_sb ? -EBUSY : 0;
out_unlock:
mutex_unlock(&vm->hotplug_mutex);
return rc;
@@ -1332,7 +1460,8 @@ static void virtio_mem_run_wq(struct work_struct *work)
case -EBUSY:
/*
* The hypervisor cannot process our request right now
- * (e.g., out of memory, migrating).
+ * (e.g., out of memory, migrating) or we cannot free up
+ * any memory to unplug it (all plugged memory is busy).
*/
case -ENOMEM:
/* Out of memory, try again later. */
@@ -8594,6 +8594,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
pfn_max_align_up(end), migratetype);
return ret;
}
+EXPORT_SYMBOL(alloc_contig_range);
static int __alloc_contig_pages(unsigned long start_pfn,
unsigned long nr_pages, gfp_t gfp_mask)
@@ -8709,6 +8710,7 @@ void free_contig_range(unsigned long pfn, unsigned int nr_pages)
}
WARN(count != 0, "%d pages are still in use!\n", count);
}
+EXPORT_SYMBOL(free_contig_range);
/*
* The zone indicated has a new number of managed_pages; batch sizes and percpu