@@ -122,6 +122,7 @@ struct mem_cgroup_tree {
};
static struct mem_cgroup_tree soft_limit_tree __read_mostly;
+static struct mem_cgroup_tree soft_limit_toptier_tree __read_mostly;
/* for OOM */
struct mem_cgroup_eventfd_list {
@@ -590,17 +591,27 @@ mem_cgroup_page_nodeinfo(struct mem_cgroup *memcg, struct page *page)
}
static struct mem_cgroup_tree_per_node *
-soft_limit_tree_node(int nid)
-{
- return soft_limit_tree.rb_tree_per_node[nid];
+soft_limit_tree_node(int nid, enum node_states type)
+{
+ switch (type) {
+ case N_MEMORY:
+ return soft_limit_tree.rb_tree_per_node[nid];
+ case N_TOPTIER:
+ if (node_state(nid, N_TOPTIER))
+ return soft_limit_toptier_tree.rb_tree_per_node[nid];
+ else
+ return NULL;
+ default:
+ return NULL;
+ }
}
static struct mem_cgroup_tree_per_node *
-soft_limit_tree_from_page(struct page *page)
+soft_limit_tree_from_page(struct page *page, enum node_states type)
{
int nid = page_to_nid(page);
- return soft_limit_tree.rb_tree_per_node[nid];
+ return soft_limit_tree_node(nid, type);
}
static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_node *mz,
@@ -661,12 +672,24 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_node *mz,
spin_unlock_irqrestore(&mctz->lock, flags);
}
-static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
+static unsigned long soft_limit_excess(struct mem_cgroup *memcg, enum node_states type)
{
- unsigned long nr_pages = page_counter_read(&memcg->memory);
- unsigned long soft_limit = READ_ONCE(memcg->soft_limit);
+ unsigned long nr_pages;
+ unsigned long soft_limit;
unsigned long excess = 0;
+ switch (type) {
+ case N_MEMORY:
+ nr_pages = page_counter_read(&memcg->memory);
+ soft_limit = READ_ONCE(memcg->soft_limit);
+ break;
+ case N_TOPTIER:
+ nr_pages = page_counter_read(&memcg->toptier);
+ soft_limit = READ_ONCE(memcg->toptier_soft_limit);
+ break;
+ default:
+ return 0;
+ }
if (nr_pages > soft_limit)
excess = nr_pages - soft_limit;
@@ -679,7 +702,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
struct mem_cgroup_per_node *mz;
struct mem_cgroup_tree_per_node *mctz;
- mctz = soft_limit_tree_from_page(page);
+ mctz = soft_limit_tree_from_page(page, N_MEMORY);
if (!mctz)
return;
/*
@@ -688,7 +711,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
*/
for (; memcg; memcg = parent_mem_cgroup(memcg)) {
mz = mem_cgroup_page_nodeinfo(memcg, page);
- excess = soft_limit_excess(memcg);
+ excess = soft_limit_excess(memcg, N_MEMORY);
/*
* We have to update the tree if mz is on RB-tree or
* mem is over its softlimit.
@@ -718,7 +741,7 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
for_each_node(nid) {
mz = mem_cgroup_nodeinfo(memcg, nid);
- mctz = soft_limit_tree_node(nid);
+ mctz = soft_limit_tree_node(nid, N_MEMORY);
if (mctz)
mem_cgroup_remove_exceeded(mz, mctz);
}
@@ -742,7 +765,7 @@ __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_node *mctz)
* position in the tree.
*/
__mem_cgroup_remove_exceeded(mz, mctz);
- if (!soft_limit_excess(mz->memcg) ||
+ if (!soft_limit_excess(mz->memcg, N_MEMORY) ||
!css_tryget(&mz->memcg->css))
goto retry;
done:
@@ -1805,7 +1828,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
.pgdat = pgdat,
};
- excess = soft_limit_excess(root_memcg);
+ excess = soft_limit_excess(root_memcg, N_MEMORY);
while (1) {
victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
@@ -1834,7 +1857,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
total += mem_cgroup_shrink_node(victim, gfp_mask, false,
pgdat, &nr_scanned);
*total_scanned += nr_scanned;
- if (!soft_limit_excess(root_memcg))
+ if (!soft_limit_excess(root_memcg, N_MEMORY))
break;
}
mem_cgroup_iter_break(root_memcg, victim);
@@ -3457,7 +3480,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
if (order > 0)
return 0;
- mctz = soft_limit_tree_node(pgdat->node_id);
+ mctz = soft_limit_tree_node(pgdat->node_id, N_MEMORY);
/*
* Do not even bother to check the largest node if the root
@@ -3513,7 +3536,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
if (!reclaimed)
next_mz = __mem_cgroup_largest_soft_limit_node(mctz);
- excess = soft_limit_excess(mz->memcg);
+ excess = soft_limit_excess(mz->memcg, N_MEMORY);
/*
* One school of thought says that we should not add
* back the node to the tree if reclaim returns 0.
@@ -7189,6 +7212,19 @@ static int __init mem_cgroup_init(void)
rtpn->rb_rightmost = NULL;
spin_lock_init(&rtpn->lock);
soft_limit_tree.rb_tree_per_node[node] = rtpn;
+
+ if (!node_state(node, N_TOPTIER)) {
+ soft_limit_toptier_tree.rb_tree_per_node[node] = NULL;
+ continue;
+ }
+
+ rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL,
+ node_online(node) ? node : NUMA_NO_NODE);
+
+ rtpn->rb_root = RB_ROOT;
+ rtpn->rb_rightmost = NULL;
+ spin_lock_init(&rtpn->lock);
+ soft_limit_toptier_tree.rb_tree_per_node[node] = rtpn;
}
return 0;
Define a per node soft_limit_top_tier red black tree that sort and track the cgroups by each group's excess over its toptier soft limit. A cgroup is added to the tree if it has exceeded its top tier soft limit and it has used pages on the node. Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> --- mm/memcontrol.c | 68 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 16 deletions(-)