@@ -46,6 +46,7 @@ extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
extern void migrate_prep(void);
+extern void migrate_finish(void);
extern void migrate_prep_local(void);
extern void migrate_page_states(struct page *newpage, struct page *page);
extern void migrate_page_copy(struct page *newpage, struct page *page);
@@ -67,6 +68,7 @@ static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }
static inline int migrate_prep(void) { return -ENOSYS; }
+static inline int migrate_finish(void) { return -ENOSYS; }
static inline int migrate_prep_local(void) { return -ENOSYS; }
static inline void migrate_page_states(struct page *newpage, struct page *page)
@@ -339,6 +339,20 @@ extern void lru_note_cost(struct lruvec *lruvec, bool file,
extern void lru_note_cost_page(struct page *);
extern void lru_cache_add(struct page *);
extern void mark_page_accessed(struct page *);
+
+extern atomic_t lru_disable_count;
+
+static inline bool lru_cache_disabled(void)
+{
+ return atomic_read(&lru_disable_count);
+}
+
+static inline void lru_cache_enable(void)
+{
+ atomic_dec(&lru_disable_count);
+}
+
+extern void lru_cache_disable(void);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
@@ -1611,6 +1611,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
* in a way that pages from isolated pageblock are left on pcplists.
*/
zone_pcp_disable(zone);
+ lru_cache_disable();
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
@@ -1642,7 +1643,6 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
}
cond_resched();
- lru_add_drain_all();
ret = scan_movable_pages(pfn, end_pfn, &pfn);
if (!ret) {
@@ -1687,6 +1687,7 @@ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
spin_unlock_irqrestore(&zone->lock, flags);
+ lru_cache_enable();
zone_pcp_enable(zone);
/* removal success */
@@ -1208,6 +1208,8 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
break;
}
mmap_read_unlock(mm);
+
+ migrate_finish();
if (err < 0)
return err;
return busy;
@@ -1371,6 +1373,8 @@ static long do_mbind(unsigned long start, unsigned long len,
mmap_write_unlock(mm);
mpol_out:
mpol_put(new);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ migrate_finish();
return err;
}
@@ -66,11 +66,13 @@ void migrate_prep(void)
{
/*
* Clear the LRU lists so pages can be isolated.
- * Note that pages may be moved off the LRU after we have
- * drained them. Those pages will fail to migrate like other
- * pages that may be busy.
*/
- lru_add_drain_all();
+ lru_cache_disable();
+}
+
+void migrate_finish(void)
+{
+ lru_cache_enable();
}
/* Do the necessary work of migrate_prep but not if it involves other CPUs */
@@ -1838,6 +1840,7 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
if (err >= 0)
err = err1;
out:
+ migrate_finish();
return err;
}
@@ -8495,6 +8495,8 @@ static int __alloc_contig_migrate_range(struct compact_control *cc,
ret = migrate_pages(&cc->migratepages, alloc_migration_target,
NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
}
+
+ migrate_finish();
if (ret < 0) {
putback_movable_pages(&cc->migratepages);
return ret;
@@ -235,6 +235,18 @@ static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
}
}
+/* return true if pagevec needs to drain */
+static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page)
+{
+ bool ret = false;
+
+ if (!pagevec_add(pvec, page) || PageCompound(page) ||
+ lru_cache_disabled())
+ ret = true;
+
+ return ret;
+}
+
/*
* Writeback is about to end against a page which has been marked for immediate
* reclaim. If it still appears to be reclaimable, move it to the tail of the
@@ -252,7 +264,7 @@ void rotate_reclaimable_page(struct page *page)
get_page(page);
local_lock_irqsave(&lru_rotate.lock, flags);
pvec = this_cpu_ptr(&lru_rotate.pvec);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
local_unlock_irqrestore(&lru_rotate.lock, flags);
}
@@ -343,7 +355,7 @@ static void activate_page(struct page *page)
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.activate_page);
get_page(page);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, __activate_page);
local_unlock(&lru_pvecs.lock);
}
@@ -458,7 +470,7 @@ void lru_cache_add(struct page *page)
get_page(page);
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
__pagevec_lru_add(pvec);
local_unlock(&lru_pvecs.lock);
}
@@ -654,7 +666,7 @@ void deactivate_file_page(struct page *page)
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
local_unlock(&lru_pvecs.lock);
}
@@ -676,7 +688,7 @@ void deactivate_page(struct page *page)
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
get_page(page);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn);
local_unlock(&lru_pvecs.lock);
}
@@ -698,7 +710,7 @@ void mark_page_lazyfree(struct page *page)
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
get_page(page);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
local_unlock(&lru_pvecs.lock);
}
@@ -735,7 +747,7 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
* Calling this function with cpu hotplug locks held can actually lead
* to obscure indirect dependencies via WQ context.
*/
-void lru_add_drain_all(void)
+inline void __lru_add_drain_all(bool force_all_cpus)
{
/*
* lru_drain_gen - Global pages generation number
@@ -780,7 +792,7 @@ void lru_add_drain_all(void)
* (C) Exit the draining operation if a newer generation, from another
* lru_add_drain_all(), was already scheduled for draining. Check (A).
*/
- if (unlikely(this_gen != lru_drain_gen))
+ if (unlikely(this_gen != lru_drain_gen && !force_all_cpus))
goto done;
/*
@@ -810,7 +822,8 @@ void lru_add_drain_all(void)
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
- if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
+ if (force_all_cpus ||
+ pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
@@ -828,6 +841,11 @@ void lru_add_drain_all(void)
done:
mutex_unlock(&lock);
}
+
+void lru_add_drain_all(void)
+{
+ __lru_add_drain_all(false);
+}
#else
void lru_add_drain_all(void)
{
@@ -835,6 +853,34 @@ void lru_add_drain_all(void)
}
#endif /* CONFIG_SMP */
+atomic_t lru_disable_count = ATOMIC_INIT(0);
+
+/*
+ * lru_cache_disable() needs to be called before we start compiling
+ * a list of pages to be migrated using isolate_lru_page().
+ * It drains pages on LRU cache and then disable on all cpus until
+ * lru_cache_enable is called.
+ *
+ * Must be paired with a call to lru_cache_enable().
+ */
+void lru_cache_disable(void)
+{
+ atomic_inc(&lru_disable_count);
+#ifdef CONFIG_SMP
+ /*
+ * lru_add_drain_all in the force mode will schedule draining on
+ * all online CPUs so any calls of lru_cache_disabled wrapped by
+ * local_lock or preemption disabled would be ordered by that.
+ * The atomic operation doesn't need to have stronger ordering
+ * requirements because that is enforeced by the scheduling
+ * guarantees.
+ */
+ __lru_add_drain_all(true);
+#else
+ lru_add_drain();
+#endif
+}
+
/**
* release_pages - batched put_page()
* @pages: array of pages to release