@@ -2177,22 +2177,56 @@ bool mempolicy_nodemask_intersects(struct task_struct *tsk,
return ret;
}
-/* Allocate a page in interleaved policy.
- Own path because it needs to do special accounting. */
-static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
- unsigned nid)
+/* Handle page allocation for all but interleaved policies */
+static struct page *alloc_pages_policy(struct mempolicy *pol, gfp_t gfp,
+ unsigned int order, int preferred_nid)
{
struct page *page;
+ gfp_t gfp_mask = gfp;
- page = __alloc_pages(gfp, order, nid);
- /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */
- if (!static_branch_likely(&vm_numa_stat_key))
+ if (pol->mode == MPOL_INTERLEAVE) {
+ page = __alloc_pages(gfp, order, preferred_nid);
+ /* skip NUMA_INTERLEAVE_HIT counter update if numa stats is disabled */
+ if (!static_branch_likely(&vm_numa_stat_key))
+ return page;
+ if (page && page_to_nid(page) == preferred_nid) {
+ preempt_disable();
+ __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
+ preempt_enable();
+ }
return page;
- if (page && page_to_nid(page) == nid) {
- preempt_disable();
- __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
- preempt_enable();
}
+
+ VM_BUG_ON(preferred_nid != NUMA_NO_NODE);
+
+ preferred_nid = numa_node_id();
+
+ /*
+ * There is a two pass approach implemented here for
+ * MPOL_PREFERRED_MANY. In the first pass we pretend the preferred nodes
+ * are bound, but allow the allocation to fail. The below table explains
+ * how this is achieved.
+ *
+ * | Policy | preferred nid | nodemask |
+ * |-------------------------------|---------------|------------|
+ * | MPOL_DEFAULT | local | NULL |
+ * | MPOL_PREFERRED | best | NULL |
+ * | MPOL_INTERLEAVE | ERR | ERR |
+ * | MPOL_BIND | local | pol->nodes |
+ * | MPOL_PREFERRED_MANY | best | pol->nodes |
+ * | MPOL_PREFERRED_MANY (round 2) | local | NULL |
+ * +-------------------------------+---------------+------------+
+ */
+ if (pol->mode == MPOL_PREFERRED_MANY)
+ gfp_mask |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+
+ page = __alloc_pages_nodemask(gfp_mask, order,
+ policy_node(gfp, pol, preferred_nid),
+ policy_nodemask(gfp, pol));
+
+ if (unlikely(!page && pol->mode == MPOL_PREFERRED_MANY))
+ page = __alloc_pages_nodemask(gfp, order, preferred_nid, NULL);
+
return page;
}
@@ -2234,8 +2268,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
unsigned nid;
nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order);
+ page = alloc_pages_policy(pol, gfp, order, nid);
mpol_cond_put(pol);
- page = alloc_page_interleave(gfp, order, nid);
goto out;
}
@@ -2319,7 +2353,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
* nor system default_policy
*/
if (pol->mode == MPOL_INTERLEAVE)
- page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
+ page = alloc_pages_policy(pol, gfp, order,
+ interleave_nodes(pol));
else
page = __alloc_pages_nodemask(gfp, order,
policy_node(gfp, pol, numa_node_id()),