@@ -67,6 +67,8 @@ extern int migrate_page(struct address_space *mapping,
enum migrate_mode mode);
extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
unsigned long private, enum migrate_mode mode, int reason);
+extern int migrate_pages_concur(struct list_head *l, new_page_t new, free_page_t free,
+ unsigned long private, enum migrate_mode mode, int reason);
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
@@ -87,6 +89,10 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
free_page_t free, unsigned long private, enum migrate_mode mode,
int reason)
{ return -ENOSYS; }
+static inline int migrate_pages_concur(struct list_head *l, new_page_t new,
+ free_page_t free, unsigned long private, enum migrate_mode mode,
+ int reason)
+ { return -ENOSYS; }
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }
@@ -24,6 +24,7 @@ enum migrate_mode {
MIGRATE_SINGLETHREAD = 0,
MIGRATE_MT = 1<<4,
MIGRATE_DMA = 1<<5,
+ MIGRATE_CONCUR = 1<<6,
};
#endif /* MIGRATE_MODE_H_INCLUDED */
@@ -50,6 +50,7 @@ enum {
#define MPOL_MF_MOVE_DMA (1<<5) /* Use DMA page copy routine */
#define MPOL_MF_MOVE_MT (1<<6) /* Use multi-threaded page copy routine */
+#define MPOL_MF_MOVE_CONCUR (1<<7) /* Move pages in a batch */
#define MPOL_MF_VALID (MPOL_MF_STRICT | \
MPOL_MF_MOVE | \
@@ -57,6 +57,15 @@
int accel_page_copy = 1;
+
+struct page_migration_work_item {
+ struct list_head list;
+ struct page *old_page;
+ struct page *new_page;
+ struct anon_vma *anon_vma;
+ int page_was_mapped;
+};
+
/*
* migrate_prep() needs to be called before we start compiling a list of pages
* to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
@@ -1396,6 +1405,509 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
return rc;
}
+static int __unmap_page_concur(struct page *page, struct page *newpage,
+ struct anon_vma **anon_vma,
+ int *page_was_mapped,
+ int force, enum migrate_mode mode)
+{
+ int rc = -EAGAIN;
+ bool is_lru = !__PageMovable(page);
+
+ *anon_vma = NULL;
+ *page_was_mapped = 0;
+
+ if (!trylock_page(page)) {
+ if (!force || ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC))
+ goto out;
+
+ /*
+ * It's not safe for direct compaction to call lock_page.
+ * For example, during page readahead pages are added locked
+ * to the LRU. Later, when the IO completes the pages are
+ * marked uptodate and unlocked. However, the queueing
+ * could be merging multiple pages for one bio (e.g.
+ * mpage_readpages). If an allocation happens for the
+ * second or third page, the process can end up locking
+ * the same page twice and deadlocking. Rather than
+ * trying to be clever about what pages can be locked,
+ * avoid the use of lock_page for direct compaction
+ * altogether.
+ */
+ if (current->flags & PF_MEMALLOC)
+ goto out;
+
+ lock_page(page);
+ }
+
+ /* We are working on page_mapping(page) == NULL */
+ VM_BUG_ON_PAGE(PageWriteback(page), page);
+#if 0
+ if (PageWriteback(page)) {
+ /*
+ * Only in the case of a full synchronous migration is it
+ * necessary to wait for PageWriteback. In the async case,
+ * the retry loop is too short and in the sync-light case,
+ * the overhead of stalling is too much
+ */
+ if ((mode & MIGRATE_MODE_MASK) != MIGRATE_SYNC) {
+ rc = -EBUSY;
+ goto out_unlock;
+ }
+ if (!force)
+ goto out_unlock;
+ wait_on_page_writeback(page);
+ }
+#endif
+
+ /*
+ * By try_to_unmap(), page->mapcount goes down to 0 here. In this case,
+ * we cannot notice that anon_vma is freed while we migrates a page.
+ * This get_anon_vma() delays freeing anon_vma pointer until the end
+ * of migration. File cache pages are no problem because of page_lock()
+ * File Caches may use write_page() or lock_page() in migration, then,
+ * just care Anon page here.
+ *
+ * Only page_get_anon_vma() understands the subtleties of
+ * getting a hold on an anon_vma from outside one of its mms.
+ * But if we cannot get anon_vma, then we won't need it anyway,
+ * because that implies that the anon page is no longer mapped
+ * (and cannot be remapped so long as we hold the page lock).
+ */
+ if (PageAnon(page) && !PageKsm(page))
+ *anon_vma = page_get_anon_vma(page);
+
+ /*
+ * Block others from accessing the new page when we get around to
+ * establishing additional references. We are usually the only one
+ * holding a reference to newpage at this point. We used to have a BUG
+ * here if trylock_page(newpage) fails, but would like to allow for
+ * cases where there might be a race with the previous use of newpage.
+ * This is much like races on refcount of oldpage: just don't BUG().
+ */
+ if (unlikely(!trylock_page(newpage)))
+ goto out_unlock;
+
+ if (unlikely(!is_lru)) {
+ /* Just migrate the page and remove it from item list */
+ VM_BUG_ON(1);
+ rc = move_to_new_page(newpage, page, mode);
+ goto out_unlock_both;
+ }
+
+ /*
+ * Corner case handling:
+ * 1. When a new swap-cache page is read into, it is added to the LRU
+ * and treated as swapcache but it has no rmap yet.
+ * Calling try_to_unmap() against a page->mapping==NULL page will
+ * trigger a BUG. So handle it here.
+ * 2. An orphaned page (see truncate_complete_page) might have
+ * fs-private metadata. The page can be picked up due to memory
+ * offlining. Everywhere else except page reclaim, the page is
+ * invisible to the vm, so the page can not be migrated. So try to
+ * free the metadata, so the page can be freed.
+ */
+ if (!page->mapping) {
+ VM_BUG_ON_PAGE(PageAnon(page), page);
+ if (page_has_private(page)) {
+ try_to_free_buffers(page);
+ goto out_unlock_both;
+ }
+ } else if (page_mapped(page)) {
+ /* Establish migration ptes */
+ VM_BUG_ON_PAGE(PageAnon(page) && !PageKsm(page) && !*anon_vma,
+ page);
+ try_to_unmap(page,
+ TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS);
+ *page_was_mapped = 1;
+ }
+
+ return MIGRATEPAGE_SUCCESS;
+
+out_unlock_both:
+ unlock_page(newpage);
+out_unlock:
+ /* Drop an anon_vma reference if we took one */
+ if (*anon_vma)
+ put_anon_vma(*anon_vma);
+ unlock_page(page);
+out:
+ return rc;
+}
+
+static int unmap_pages_and_get_new_concur(new_page_t get_new_page,
+ free_page_t put_new_page, unsigned long private,
+ struct page_migration_work_item *item,
+ int force,
+ enum migrate_mode mode, enum migrate_reason reason)
+{
+ int rc = MIGRATEPAGE_SUCCESS;
+
+ if (!thp_migration_supported() && PageTransHuge(item->old_page))
+ return -ENOMEM;
+
+ item->new_page = get_new_page(item->old_page, private);
+ if (!item->new_page)
+ return -ENOMEM;
+
+ if (page_count(item->old_page) == 1) {
+ /* page was freed from under us. So we are done. */
+ ClearPageActive(item->old_page);
+ ClearPageUnevictable(item->old_page);
+ if (unlikely(__PageMovable(item->old_page))) {
+ lock_page(item->old_page);
+ if (!PageMovable(item->old_page))
+ __ClearPageIsolated(item->old_page);
+ unlock_page(item->old_page);
+ }
+ if (put_new_page)
+ put_new_page(item->new_page, private);
+ else
+ put_page(item->new_page);
+ item->new_page = NULL;
+ goto out;
+ }
+
+ rc = __unmap_page_concur(item->old_page, item->new_page, &item->anon_vma,
+ &item->page_was_mapped,
+ force, mode);
+ if (rc == MIGRATEPAGE_SUCCESS)
+ return rc;
+
+out:
+ if (rc != -EAGAIN) {
+ list_del(&item->old_page->lru);
+
+ if (likely(!__PageMovable(item->old_page)))
+ mod_node_page_state(page_pgdat(item->old_page), NR_ISOLATED_ANON +
+ page_is_file_cache(item->old_page),
+ -hpage_nr_pages(item->old_page));
+ }
+
+ if (rc == MIGRATEPAGE_SUCCESS) {
+ /* only for pages freed under us */
+ VM_BUG_ON(page_count(item->old_page) != 1);
+ put_page(item->old_page);
+ item->old_page = NULL;
+
+ } else {
+ if (rc != -EAGAIN) {
+ if (likely(!__PageMovable(item->old_page))) {
+ putback_lru_page(item->old_page);
+ goto put_new;
+ }
+
+ lock_page(item->old_page);
+ if (PageMovable(item->old_page))
+ putback_movable_page(item->old_page);
+ else
+ __ClearPageIsolated(item->old_page);
+ unlock_page(item->old_page);
+ put_page(item->old_page);
+ }
+
+ /*
+ * If migration was not successful and there's a freeing callback, use
+ * it. Otherwise, putback_lru_page() will drop the reference grabbed
+ * during isolation.
+ */
+put_new:
+ if (put_new_page)
+ put_new_page(item->new_page, private);
+ else
+ put_page(item->new_page);
+ item->new_page = NULL;
+
+ }
+
+ return rc;
+}
+
+static int move_mapping_concurr(struct list_head *unmapped_list_ptr,
+ struct list_head *wip_list_ptr,
+ free_page_t put_new_page, unsigned long private,
+ enum migrate_mode mode)
+{
+ struct page_migration_work_item *iterator, *iterator2;
+ struct address_space *mapping;
+
+ list_for_each_entry_safe(iterator, iterator2, unmapped_list_ptr, list) {
+ VM_BUG_ON_PAGE(!PageLocked(iterator->old_page), iterator->old_page);
+ VM_BUG_ON_PAGE(!PageLocked(iterator->new_page), iterator->new_page);
+
+ mapping = page_mapping(iterator->old_page);
+
+ VM_BUG_ON(mapping);
+
+ VM_BUG_ON(PageWriteback(iterator->old_page));
+
+ if (page_count(iterator->old_page) != 1) {
+ list_move(&iterator->list, wip_list_ptr);
+ if (iterator->page_was_mapped)
+ remove_migration_ptes(iterator->old_page,
+ iterator->old_page, false);
+ unlock_page(iterator->new_page);
+ if (iterator->anon_vma)
+ put_anon_vma(iterator->anon_vma);
+ unlock_page(iterator->old_page);
+
+ if (put_new_page)
+ put_new_page(iterator->new_page, private);
+ else
+ put_page(iterator->new_page);
+ iterator->new_page = NULL;
+ continue;
+ }
+
+ iterator->new_page->index = iterator->old_page->index;
+ iterator->new_page->mapping = iterator->old_page->mapping;
+ if (PageSwapBacked(iterator->old_page))
+ SetPageSwapBacked(iterator->new_page);
+ }
+
+ return 0;
+}
+
+static int copy_to_new_pages_concur(struct list_head *unmapped_list_ptr,
+ enum migrate_mode mode)
+{
+ struct page_migration_work_item *iterator;
+ int num_pages = 0, idx = 0;
+ struct page **src_page_list = NULL, **dst_page_list = NULL;
+ unsigned long size = 0;
+ int rc = -EFAULT;
+
+ if (list_empty(unmapped_list_ptr))
+ return 0;
+
+ list_for_each_entry(iterator, unmapped_list_ptr, list) {
+ ++num_pages;
+ size += PAGE_SIZE * hpage_nr_pages(iterator->old_page);
+ }
+
+ src_page_list = kzalloc(sizeof(struct page *)*num_pages, GFP_KERNEL);
+ if (!src_page_list) {
+ BUG();
+ return -ENOMEM;
+ }
+ dst_page_list = kzalloc(sizeof(struct page *)*num_pages, GFP_KERNEL);
+ if (!dst_page_list) {
+ BUG();
+ return -ENOMEM;
+ }
+
+ list_for_each_entry(iterator, unmapped_list_ptr, list) {
+ src_page_list[idx] = iterator->old_page;
+ dst_page_list[idx] = iterator->new_page;
+ ++idx;
+ }
+
+ BUG_ON(idx != num_pages);
+
+ if (mode & MIGRATE_DMA)
+ rc = copy_page_lists_dma_always(dst_page_list, src_page_list,
+ num_pages);
+ else if (mode & MIGRATE_MT)
+ rc = copy_page_lists_mt(dst_page_list, src_page_list,
+ num_pages);
+
+ if (rc) {
+ list_for_each_entry(iterator, unmapped_list_ptr, list) {
+ if (PageHuge(iterator->old_page) ||
+ PageTransHuge(iterator->old_page))
+ copy_huge_page(iterator->new_page, iterator->old_page, 0);
+ else
+ copy_highpage(iterator->new_page, iterator->old_page);
+ }
+ }
+
+ kfree(src_page_list);
+ kfree(dst_page_list);
+
+ list_for_each_entry(iterator, unmapped_list_ptr, list) {
+ migrate_page_states(iterator->new_page, iterator->old_page);
+ }
+
+ return 0;
+}
+
+static int remove_migration_ptes_concurr(struct list_head *unmapped_list_ptr)
+{
+ struct page_migration_work_item *iterator, *iterator2;
+
+ list_for_each_entry_safe(iterator, iterator2, unmapped_list_ptr, list) {
+ if (iterator->page_was_mapped)
+ remove_migration_ptes(iterator->old_page, iterator->new_page, false);
+
+ unlock_page(iterator->new_page);
+
+ if (iterator->anon_vma)
+ put_anon_vma(iterator->anon_vma);
+
+ unlock_page(iterator->old_page);
+
+ list_del(&iterator->old_page->lru);
+ mod_node_page_state(page_pgdat(iterator->old_page), NR_ISOLATED_ANON +
+ page_is_file_cache(iterator->old_page),
+ -hpage_nr_pages(iterator->old_page));
+
+ put_page(iterator->old_page);
+ iterator->old_page = NULL;
+
+ if (unlikely(__PageMovable(iterator->new_page)))
+ put_page(iterator->new_page);
+ else
+ putback_lru_page(iterator->new_page);
+ iterator->new_page = NULL;
+ }
+
+ return 0;
+}
+
+int migrate_pages_concur(struct list_head *from, new_page_t get_new_page,
+ free_page_t put_new_page, unsigned long private,
+ enum migrate_mode mode, int reason)
+{
+ int retry = 1;
+ int nr_failed = 0;
+ int nr_succeeded = 0;
+ int pass = 0;
+ struct page *page;
+ int swapwrite = current->flags & PF_SWAPWRITE;
+ int rc;
+ int total_num_pages = 0, idx;
+ struct page_migration_work_item *item_list;
+ struct page_migration_work_item *iterator, *iterator2;
+ int item_list_order = 0;
+
+ LIST_HEAD(wip_list);
+ LIST_HEAD(unmapped_list);
+ LIST_HEAD(serialized_list);
+ LIST_HEAD(failed_list);
+
+ if (!swapwrite)
+ current->flags |= PF_SWAPWRITE;
+
+ list_for_each_entry(page, from, lru)
+ ++total_num_pages;
+
+ item_list_order = get_order(total_num_pages *
+ sizeof(struct page_migration_work_item));
+
+ if (item_list_order > MAX_ORDER) {
+ item_list = alloc_pages_exact(total_num_pages *
+ sizeof(struct page_migration_work_item), GFP_ATOMIC);
+ memset(item_list, 0, total_num_pages *
+ sizeof(struct page_migration_work_item));
+ } else {
+ item_list = (struct page_migration_work_item *)__get_free_pages(GFP_ATOMIC,
+ item_list_order);
+ memset(item_list, 0, PAGE_SIZE<<item_list_order);
+ }
+
+ idx = 0;
+ list_for_each_entry(page, from, lru) {
+ item_list[idx].old_page = page;
+ item_list[idx].new_page = NULL;
+ INIT_LIST_HEAD(&item_list[idx].list);
+ list_add_tail(&item_list[idx].list, &wip_list);
+ idx += 1;
+ }
+
+ for(pass = 0; pass < 1 && retry; pass++) {
+ retry = 0;
+
+ /* unmap and get new page for page_mapping(page) == NULL */
+ list_for_each_entry_safe(iterator, iterator2, &wip_list, list) {
+ cond_resched();
+
+ if (iterator->new_page) {
+ pr_info("%s: iterator already has a new page?\n", __func__);
+ VM_BUG_ON_PAGE(1, iterator->old_page);
+ }
+
+ /* We do not migrate huge pages, file-backed, or swapcached pages */
+ if (PageHuge(iterator->old_page)) {
+ rc = -ENODEV;
+ }
+ else if ((page_mapping(iterator->old_page) != NULL)) {
+ rc = -ENODEV;
+ }
+ else
+ rc = unmap_pages_and_get_new_concur(get_new_page, put_new_page,
+ private, iterator, pass > 2, mode,
+ reason);
+
+ switch(rc) {
+ case -ENODEV:
+ list_move(&iterator->list, &serialized_list);
+ break;
+ case -ENOMEM:
+ if (PageTransHuge(page))
+ list_move(&iterator->list, &serialized_list);
+ else
+ goto out;
+ break;
+ case -EAGAIN:
+ retry++;
+ break;
+ case MIGRATEPAGE_SUCCESS:
+ if (iterator->old_page) {
+ list_move(&iterator->list, &unmapped_list);
+ nr_succeeded++;
+ } else { /* pages are freed under us */
+ list_del(&iterator->list);
+ }
+ break;
+ default:
+ /*
+ * Permanent failure (-EBUSY, -ENOSYS, etc.):
+ * unlike -EAGAIN case, the failed page is
+ * removed from migration page list and not
+ * retried in the next outer loop.
+ */
+ list_move(&iterator->list, &failed_list);
+ nr_failed++;
+ break;
+ }
+ }
+out:
+ if (list_empty(&unmapped_list))
+ continue;
+
+ /* move page->mapping to new page, only -EAGAIN could happen */
+ move_mapping_concurr(&unmapped_list, &wip_list, put_new_page, private, mode);
+ /* copy pages in unmapped_list */
+ copy_to_new_pages_concur(&unmapped_list, mode);
+ /* remove migration pte, if old_page is NULL?, unlock old and new
+ * pages, put anon_vma, put old and new pages */
+ remove_migration_ptes_concurr(&unmapped_list);
+ }
+ nr_failed += retry;
+ rc = nr_failed;
+
+ if (!list_empty(from))
+ rc = migrate_pages(from, get_new_page, put_new_page,
+ private, mode, reason);
+
+ if (nr_succeeded)
+ count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
+ if (nr_failed)
+ count_vm_events(PGMIGRATE_FAIL, nr_failed);
+ trace_mm_migrate_pages(nr_succeeded, nr_failed, mode, reason);
+
+ if (item_list_order >= MAX_ORDER) {
+ free_pages_exact(item_list, total_num_pages *
+ sizeof(struct page_migration_work_item));
+ } else {
+ free_pages((unsigned long)item_list, item_list_order);
+ }
+
+ if (!swapwrite)
+ current->flags &= ~PF_SWAPWRITE;
+
+ return rc;
+}
+
/*
* migrate_pages - migrate the pages specified in a list, to the free pages
* supplied as the target for the page migration
@@ -1521,17 +2033,25 @@ static int store_status(int __user *status, int start, int value, int nr)
static int do_move_pages_to_node(struct mm_struct *mm,
struct list_head *pagelist, int node,
- bool migrate_mt, bool migrate_dma)
+ bool migrate_mt, bool migrate_dma, bool migrate_concur)
{
int err;
if (list_empty(pagelist))
return 0;
- err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
- MIGRATE_SYNC | (migrate_mt ? MIGRATE_MT : MIGRATE_SINGLETHREAD) |
- (migrate_dma ? MIGRATE_DMA : MIGRATE_SINGLETHREAD),
- MR_SYSCALL);
+ if (migrate_concur) {
+ err = migrate_pages_concur(pagelist, alloc_new_node_page, NULL, node,
+ MIGRATE_SYNC | (migrate_mt ? MIGRATE_MT : MIGRATE_SINGLETHREAD) |
+ (migrate_dma ? MIGRATE_DMA : MIGRATE_SINGLETHREAD),
+ MR_SYSCALL);
+
+ } else {
+ err = migrate_pages(pagelist, alloc_new_node_page, NULL, node,
+ MIGRATE_SYNC | (migrate_mt ? MIGRATE_MT : MIGRATE_SINGLETHREAD) |
+ (migrate_dma ? MIGRATE_DMA : MIGRATE_SINGLETHREAD),
+ MR_SYSCALL);
+ }
if (err)
putback_movable_pages(pagelist);
return err;
@@ -1653,7 +2173,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
start = i;
} else if (node != current_node) {
err = do_move_pages_to_node(mm, &pagelist, current_node,
- flags & MPOL_MF_MOVE_MT, flags & MPOL_MF_MOVE_DMA);
+ flags & MPOL_MF_MOVE_MT, flags & MPOL_MF_MOVE_DMA,
+ flags & MPOL_MF_MOVE_CONCUR);
if (err)
goto out;
err = store_status(status, start, current_node, i - start);
@@ -1677,7 +2198,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
goto out_flush;
err = do_move_pages_to_node(mm, &pagelist, current_node,
- flags & MPOL_MF_MOVE_MT, flags & MPOL_MF_MOVE_DMA);
+ flags & MPOL_MF_MOVE_MT, flags & MPOL_MF_MOVE_DMA,
+ flags & MPOL_MF_MOVE_CONCUR);
if (err)
goto out;
if (i > start) {
@@ -1693,7 +2215,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
/* Make sure we do not overwrite the existing error */
err1 = do_move_pages_to_node(mm, &pagelist, current_node,
- flags & MPOL_MF_MOVE_MT, flags & MPOL_MF_MOVE_DMA);
+ flags & MPOL_MF_MOVE_MT, flags & MPOL_MF_MOVE_DMA,
+ flags & MPOL_MF_MOVE_CONCUR);
if (!err1)
err1 = store_status(status, start, current_node, i - start);
if (!err)
@@ -1789,7 +2312,9 @@ static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
nodemask_t task_nodes;
/* Check flags */
- if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL|MPOL_MF_MOVE_MT|MPOL_MF_MOVE_DMA))
+ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL|
+ MPOL_MF_MOVE_DMA|MPOL_MF_MOVE_MT|
+ MPOL_MF_MOVE_CONCUR))
return -EINVAL;
if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))