@@ -57,6 +57,22 @@ struct mmap_state {
.state = VMA_MERGE_START, \
}
+/*
+ * If, at any point, the VMA had unCoW'd mappings from parents, it will maintain
+ * more than one anon_vma_chain connecting it to more than one anon_vma. A merge
+ * would mean a wider range of folios sharing the root anon_vma lock, and thus
+ * potential lock contention, we do not wish to encourage merging such that this
+ * scales to a problem.
+ */
+static bool vma_had_uncowed_parents(struct vm_area_struct *vma)
+{
+ /*
+ * The list_is_singular() test is to avoid merging VMA cloned from
+ * parents. This can improve scalability caused by anon_vma lock.
+ */
+ return vma && vma->anon_vma && !list_is_singular(&vma->anon_vma_chain);
+}
+
static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_next)
{
struct vm_area_struct *vma = merge_next ? vmg->next : vmg->prev;
@@ -82,24 +98,28 @@ static inline bool is_mergeable_vma(struct vma_merge_struct *vmg, bool merge_nex
return true;
}
-static inline bool is_mergeable_anon_vma(struct anon_vma *anon_vma1,
- struct anon_vma *anon_vma2, struct vm_area_struct *vma)
+static bool is_mergeable_anon_vma(struct vma_merge_struct *vmg, bool merge_next)
{
+ struct vm_area_struct *tgt = merge_next ? vmg->next : vmg->prev;
+ struct vm_area_struct *src = vmg->middle; /* exisitng merge case. */
+ struct anon_vma *tgt_anon = tgt->anon_vma;
+ struct anon_vma *src_anon = vmg->anon_vma;
+
/*
- * The list_is_singular() test is to avoid merging VMA cloned from
- * parents. This can improve scalability caused by anon_vma lock.
+ * We _can_ have !src, vmg->anon_vma via copy_vma(). In this instance we
+ * will remove the existing VMA's anon_vma's so there's no scalability
+ * concerns.
*/
- if ((!anon_vma1 || !anon_vma2) && (!vma ||
- list_is_singular(&vma->anon_vma_chain)))
- return true;
- return anon_vma1 == anon_vma2;
-}
+ VM_WARN_ON(src && src_anon != src->anon_vma);
-/* Are the anon_vma's belonging to each VMA compatible with one another? */
-static inline bool are_anon_vmas_compatible(struct vm_area_struct *vma1,
- struct vm_area_struct *vma2)
-{
- return is_mergeable_anon_vma(vma1->anon_vma, vma2->anon_vma, NULL);
+ /* Case 1 - we will dup_anon_vma() from src into tgt. */
+ if (!tgt_anon && src_anon)
+ return !vma_had_uncowed_parents(src);
+ /* Case 2 - we will simply use tgt's anon_vma. */
+ if (tgt_anon && !src_anon)
+ return !vma_had_uncowed_parents(tgt);
+ /* Case 3 - the anon_vma's are already shared. */
+ return src_anon == tgt_anon;
}
/*
@@ -164,7 +184,7 @@ static bool can_vma_merge_before(struct vma_merge_struct *vmg)
pgoff_t pglen = PHYS_PFN(vmg->end - vmg->start);
if (is_mergeable_vma(vmg, /* merge_next = */ true) &&
- is_mergeable_anon_vma(vmg->anon_vma, vmg->next->anon_vma, vmg->next)) {
+ is_mergeable_anon_vma(vmg, /* merge_next = */ true)) {
if (vmg->next->vm_pgoff == vmg->pgoff + pglen)
return true;
}
@@ -184,7 +204,7 @@ static bool can_vma_merge_before(struct vma_merge_struct *vmg)
static bool can_vma_merge_after(struct vma_merge_struct *vmg)
{
if (is_mergeable_vma(vmg, /* merge_next = */ false) &&
- is_mergeable_anon_vma(vmg->anon_vma, vmg->prev->anon_vma, vmg->prev)) {
+ is_mergeable_anon_vma(vmg, /* merge_next = */ false)) {
if (vmg->prev->vm_pgoff + vma_pages(vmg->prev) == vmg->pgoff)
return true;
}
@@ -400,8 +420,10 @@ static bool can_vma_merge_left(struct vma_merge_struct *vmg)
static bool can_vma_merge_right(struct vma_merge_struct *vmg,
bool can_merge_left)
{
- if (!vmg->next || vmg->end != vmg->next->vm_start ||
- !can_vma_merge_before(vmg))
+ struct vm_area_struct *next = vmg->next;
+ struct vm_area_struct *prev;
+
+ if (!next || vmg->end != next->vm_start || !can_vma_merge_before(vmg))
return false;
if (!can_merge_left)
@@ -414,7 +436,9 @@ static bool can_vma_merge_right(struct vma_merge_struct *vmg,
*
* We therefore check this in addition to mergeability to either side.
*/
- return are_anon_vmas_compatible(vmg->prev, vmg->next);
+ prev = vmg->prev;
+ return !prev->anon_vma || !next->anon_vma ||
+ prev->anon_vma == next->anon_vma;
}
/*
@@ -554,7 +578,9 @@ static int split_vma(struct vma_iterator *vmi, struct vm_area_struct *vma,
}
/*
- * dup_anon_vma() - Helper function to duplicate anon_vma
+ * dup_anon_vma() - Helper function to duplicate anon_vma on VMA merge in the
+ * instance that the destination VMA has no anon_vma but the source does.
+ *
* @dst: The destination VMA
* @src: The source VMA
* @dup: Pointer to the destination VMA when successful.
@@ -565,9 +591,18 @@ static int dup_anon_vma(struct vm_area_struct *dst,
struct vm_area_struct *src, struct vm_area_struct **dup)
{
/*
- * Easily overlooked: when mprotect shifts the boundary, make sure the
- * expanding vma has anon_vma set if the shrinking vma had, to cover any
- * anon pages imported.
+ * There are three cases to consider for correctly propagating
+ * anon_vma's on merge.
+ *
+ * The first is trivial - neither VMA has anon_vma, we need not do
+ * anything.
+ *
+ * The second where both have anon_vma is also a no-op, as they must
+ * then be the same, so there is simply nothing to copy.
+ *
+ * Here we cover the third - if the destination VMA has no anon_vma,
+ * that is it is unfaulted, we need to ensure that the newly merged
+ * range is referenced by the anon_vma's of the source.
*/
if (src->anon_vma && !dst->anon_vma) {
int ret;
@@ -185,6 +185,15 @@ static void vmg_set_range(struct vma_merge_struct *vmg, unsigned long start,
vmg->__adjust_next_start = false;
}
+/* Helper function to set both the VMG range and its anon_vma. */
+static void vmg_set_range_anon_vma(struct vma_merge_struct *vmg, unsigned long start,
+ unsigned long end, pgoff_t pgoff, vm_flags_t flags,
+ struct anon_vma *anon_vma)
+{
+ vmg_set_range(vmg, start, end, pgoff, flags);
+ vmg->anon_vma = anon_vma;
+}
+
/*
* Helper function to try to merge a new VMA.
*
@@ -265,6 +274,22 @@ static void dummy_close(struct vm_area_struct *)
{
}
+static void __vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc,
+ struct anon_vma *anon_vma)
+{
+ vma->anon_vma = anon_vma;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
+ list_add(&avc->same_vma, &vma->anon_vma_chain);
+ avc->anon_vma = vma->anon_vma;
+}
+
+static void vma_set_dummy_anon_vma(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc)
+{
+ __vma_set_dummy_anon_vma(vma, avc, &dummy_anon_vma);
+}
+
static bool test_simple_merge(void)
{
struct vm_area_struct *vma;
@@ -953,6 +978,7 @@ static bool test_merge_existing(void)
const struct vm_operations_struct vm_ops = {
.close = dummy_close,
};
+ struct anon_vma_chain avc = {};
/*
* Merge right case - partial span.
@@ -968,10 +994,10 @@ static bool test_merge_existing(void)
vma->vm_ops = &vm_ops; /* This should have no impact. */
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
vmg.middle = vma;
vmg.prev = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x3000);
@@ -1001,9 +1027,9 @@ static bool test_merge_existing(void)
vma = alloc_and_link_vma(&mm, 0x2000, 0x6000, 2, flags);
vma_next = alloc_and_link_vma(&mm, 0x6000, 0x9000, 6, flags);
vma_next->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x2000, 0x6000, 2, flags);
+ vmg_set_range_anon_vma(&vmg, 0x2000, 0x6000, 2, flags, &dummy_anon_vma);
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_next);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_next->vm_start, 0x2000);
@@ -1030,11 +1056,10 @@ static bool test_merge_existing(void)
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vma->vm_ops = &vm_ops; /* This should have no impact. */
- vmg_set_range(&vmg, 0x3000, 0x6000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x6000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
-
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1064,10 +1089,10 @@ static bool test_merge_existing(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1094,10 +1119,10 @@ static bool test_merge_existing(void)
vma_prev->vm_ops = &vm_ops; /* This should have no impact. */
vma = alloc_and_link_vma(&mm, 0x3000, 0x7000, 3, flags);
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &avc);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1180,12 +1205,9 @@ static bool test_anon_vma_non_mergeable(void)
.mm = &mm,
.vmi = &vmi,
};
- struct anon_vma_chain dummy_anon_vma_chain1 = {
- .anon_vma = &dummy_anon_vma,
- };
- struct anon_vma_chain dummy_anon_vma_chain2 = {
- .anon_vma = &dummy_anon_vma,
- };
+ struct anon_vma_chain dummy_anon_vma_chain_1 = {};
+ struct anon_vma_chain dummy_anon_vma_chain_2 = {};
+ struct anon_vma dummy_anon_vma_2;
/*
* In the case of modified VMA merge, merging both left and right VMAs
@@ -1209,24 +1231,11 @@ static bool test_anon_vma_non_mergeable(void)
*
* However, when prev is compared to next, the merge should fail.
*/
-
- INIT_LIST_HEAD(&vma_prev->anon_vma_chain);
- list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain);
- ASSERT_TRUE(list_is_singular(&vma_prev->anon_vma_chain));
- vma_prev->anon_vma = &dummy_anon_vma;
- ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_prev->anon_vma, vma_prev));
-
- INIT_LIST_HEAD(&vma_next->anon_vma_chain);
- list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain);
- ASSERT_TRUE(list_is_singular(&vma_next->anon_vma_chain));
- vma_next->anon_vma = (struct anon_vma *)2;
- ASSERT_TRUE(is_mergeable_anon_vma(NULL, vma_next->anon_vma, vma_next));
-
- ASSERT_FALSE(is_mergeable_anon_vma(vma_prev->anon_vma, vma_next->anon_vma, NULL));
-
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
vmg.prev = vma_prev;
vmg.middle = vma;
+ vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
+ __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
ASSERT_EQ(merge_existing(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
@@ -1253,17 +1262,12 @@ static bool test_anon_vma_non_mergeable(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x7000, 0x9000, 7, flags);
- INIT_LIST_HEAD(&vma_prev->anon_vma_chain);
- list_add(&dummy_anon_vma_chain1.same_vma, &vma_prev->anon_vma_chain);
- vma_prev->anon_vma = (struct anon_vma *)1;
-
- INIT_LIST_HEAD(&vma_next->anon_vma_chain);
- list_add(&dummy_anon_vma_chain2.same_vma, &vma_next->anon_vma_chain);
- vma_next->anon_vma = (struct anon_vma *)2;
-
- vmg_set_range(&vmg, 0x3000, 0x7000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x7000, 3, flags, NULL);
vmg.prev = vma_prev;
+ vma_set_dummy_anon_vma(vma_prev, &dummy_anon_vma_chain_1);
+ __vma_set_dummy_anon_vma(vma_next, &dummy_anon_vma_chain_2, &dummy_anon_vma_2);
+ vmg.anon_vma = NULL;
ASSERT_EQ(merge_new(&vmg), vma_prev);
ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS);
ASSERT_EQ(vma_prev->vm_start, 0);
@@ -1363,8 +1367,8 @@ static bool test_dup_anon_vma(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
-
- vma->anon_vma = &dummy_anon_vma;
+ vmg.anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1392,7 +1396,7 @@ static bool test_dup_anon_vma(void)
vma_prev = alloc_and_link_vma(&mm, 0, 0x3000, 0, flags);
vma = alloc_and_link_vma(&mm, 0x3000, 0x8000, 3, flags);
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma_prev;
vmg.middle = vma;
@@ -1420,7 +1424,7 @@ static bool test_dup_anon_vma(void)
vma = alloc_and_link_vma(&mm, 0, 0x5000, 0, flags);
vma_next = alloc_and_link_vma(&mm, 0x5000, 0x8000, 5, flags);
- vma->anon_vma = &dummy_anon_vma;
+ vma_set_dummy_anon_vma(vma, &dummy_anon_vma_chain);
vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
vmg.prev = vma;
vmg.middle = vma;
@@ -1447,6 +1451,7 @@ static bool test_vmi_prealloc_fail(void)
.mm = &mm,
.vmi = &vmi,
};
+ struct anon_vma_chain avc = {};
struct vm_area_struct *vma_prev, *vma;
/*
@@ -1459,9 +1464,10 @@ static bool test_vmi_prealloc_fail(void)
vma = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags);
vma->anon_vma = &dummy_anon_vma;
- vmg_set_range(&vmg, 0x3000, 0x5000, 3, flags);
+ vmg_set_range_anon_vma(&vmg, 0x3000, 0x5000, 3, flags, &dummy_anon_vma);
vmg.prev = vma_prev;
vmg.middle = vma;
+ vma_set_dummy_anon_vma(vma, &avc);
fail_prealloc = true;