@@ -745,7 +745,9 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
*
- * This function relies on page->mem_cgroup being stable.
+ * The lruvec can be changed to its parent lruvec when the page reparented.
+ * The caller need to recheck if it cares about this change (just like
+ * lock_page_lruvec() does).
*/
static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
{
@@ -765,14 +767,6 @@ struct lruvec *lock_page_lruvec_irq(struct page *page);
struct lruvec *lock_page_lruvec_irqsave(struct page *page,
unsigned long *flags);
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page);
-#else
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
-{
-}
-#endif
-
static inline
struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
return css ? container_of(css, struct mem_cgroup, css) : NULL;
@@ -1212,10 +1206,6 @@ static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
return &pgdat->__lruvec;
}
-static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
-{
-}
-
static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{
return NULL;
@@ -517,6 +517,8 @@ static struct lruvec *compact_lock_page_irqsave(struct page *page,
{
struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
lruvec = mem_cgroup_page_lruvec(page);
/* Track if the lock is contended in async mode */
@@ -529,7 +531,13 @@ static struct lruvec *compact_lock_page_irqsave(struct page *page,
spin_lock_irqsave(&lruvec->lru_lock, *flags);
out:
- lruvec_memcg_debug(lruvec, page);
+ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
+ spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+ goto retry;
+ }
+
+ /* See the comments in lock_page_lruvec(). */
+ rcu_read_unlock();
return lruvec;
}
@@ -1178,23 +1178,6 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
return ret;
}
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
-{
- struct mem_cgroup *memcg;
-
- if (mem_cgroup_disabled())
- return;
-
- memcg = page_memcg(page);
-
- if (!memcg)
- VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != root_mem_cgroup, page);
- else
- VM_BUG_ON_PAGE(lruvec_memcg(lruvec) != memcg, page);
-}
-#endif
-
/**
* lock_page_lruvec - lock and return lruvec for a given page.
* @page: the page
@@ -1209,10 +1192,21 @@ struct lruvec *lock_page_lruvec(struct page *page)
{
struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
lruvec = mem_cgroup_page_lruvec(page);
spin_lock(&lruvec->lru_lock);
- lruvec_memcg_debug(lruvec, page);
+ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
+ spin_unlock(&lruvec->lru_lock);
+ goto retry;
+ }
+
+ /*
+ * Preemption is disabled in the internal of spin_lock, which can serve
+ * as RCU read-side critical sections.
+ */
+ rcu_read_unlock();
return lruvec;
}
@@ -1221,10 +1215,18 @@ struct lruvec *lock_page_lruvec_irq(struct page *page)
{
struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
lruvec = mem_cgroup_page_lruvec(page);
spin_lock_irq(&lruvec->lru_lock);
- lruvec_memcg_debug(lruvec, page);
+ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
+ spin_unlock_irq(&lruvec->lru_lock);
+ goto retry;
+ }
+
+ /* See the comments in lock_page_lruvec(). */
+ rcu_read_unlock();
return lruvec;
}
@@ -1233,10 +1235,18 @@ struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
{
struct lruvec *lruvec;
+ rcu_read_lock();
+retry:
lruvec = mem_cgroup_page_lruvec(page);
spin_lock_irqsave(&lruvec->lru_lock, *flags);
- lruvec_memcg_debug(lruvec, page);
+ if (unlikely(lruvec_memcg(lruvec) != page_memcg(page))) {
+ spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+ goto retry;
+ }
+
+ /* See the comments in lock_page_lruvec(). */
+ rcu_read_unlock();
return lruvec;
}
@@ -313,6 +313,11 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
void lru_note_cost_page(struct page *page)
{
+ /*
+ * The rcu read lock is held by the caller, so we do not need to
+ * care about the lruvec returned by mem_cgroup_page_lruvec() being
+ * released.
+ */
lru_note_cost(mem_cgroup_page_lruvec(page),
page_is_file_lru(page), thp_nr_pages(page));
}