diff mbox series

[v3] mm/ksm: fix ksm exec support for prctl

Message ID 20240318090441.179486-1-tujinjiang@huawei.com (mailing list archive)
State New
Headers show
Series [v3] mm/ksm: fix ksm exec support for prctl | expand

Commit Message

Jinjiang Tu March 18, 2024, 9:04 a.m. UTC
commit 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") inherits
MMF_VM_MERGE_ANY flag when a task calls execve(). Howerver, it doesn't
create the mm_slot, so ksmd will not try to scan this task.

To fix it, allocate and add the mm_slot to ksm_mm_head in __bprm_mm_init()
when the mm has MMF_VM_MERGE_ANY flag.

Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
---
 fs/exec.c           |  4 ++++
 include/linux/ksm.h | 13 +++++++++++++
 2 files changed, 17 insertions(+)

Comments

Jinjiang Tu March 18, 2024, 8:19 a.m. UTC | #1
Sorry, the v3 tag is erroneous. This patch is the first version.

在 2024/3/18 17:04, Jinjiang Tu 写道:
> commit 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") inherits
> MMF_VM_MERGE_ANY flag when a task calls execve(). Howerver, it doesn't
> create the mm_slot, so ksmd will not try to scan this task.
>
> To fix it, allocate and add the mm_slot to ksm_mm_head in __bprm_mm_init()
> when the mm has MMF_VM_MERGE_ANY flag.
>
> Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
> Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
> ---
>   fs/exec.c           |  4 ++++
>   include/linux/ksm.h | 13 +++++++++++++
>   2 files changed, 17 insertions(+)
>
> diff --git a/fs/exec.c b/fs/exec.c
> index ff6f26671cfc..00f40163cc12 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -67,6 +67,7 @@
>   #include <linux/time_namespace.h>
>   #include <linux/user_events.h>
>   #include <linux/rseq.h>
> +#include <linux/ksm.h>
>   
>   #include <linux/uaccess.h>
>   #include <asm/mmu_context.h>
> @@ -267,6 +268,9 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
>   		goto err_free;
>   	}
>   
> +	if (ksm_execve(mm))
> +		goto err;
> +
>   	/*
>   	 * Place the stack at the largest stack address the architecture
>   	 * supports. Later, we'll move this to an appropriate place. We don't
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index 401348e9f92b..7e2b1de3996a 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -59,6 +59,14 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
>   	return 0;
>   }
>   
> +static inline int ksm_execve(struct mm_struct *mm)
> +{
> +	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
> +		return __ksm_enter(mm);
> +
> +	return 0;
> +}
> +
>   static inline void ksm_exit(struct mm_struct *mm)
>   {
>   	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
> @@ -107,6 +115,11 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
>   	return 0;
>   }
>   
> +static inline int ksm_execve(struct mm_struct *mm)
> +{
> +	return 0;
> +}
> +
>   static inline void ksm_exit(struct mm_struct *mm)
>   {
>   }
David Hildenbrand March 18, 2024, 9:36 a.m. UTC | #2
On 18.03.24 10:04, Jinjiang Tu wrote:
> commit 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") inherits
> MMF_VM_MERGE_ANY flag when a task calls execve(). Howerver, it doesn't
> create the mm_slot, so ksmd will not try to scan this task.
> 
> To fix it, allocate and add the mm_slot to ksm_mm_head in __bprm_mm_init()
> when the mm has MMF_VM_MERGE_ANY flag.

That would mean that 3c6f33b7273a is effectively ineffective for 
fork+exec and only works with fork?

> 
> Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
> Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
> ---
>   fs/exec.c           |  4 ++++
>   include/linux/ksm.h | 13 +++++++++++++
>   2 files changed, 17 insertions(+)
> 
> diff --git a/fs/exec.c b/fs/exec.c
> index ff6f26671cfc..00f40163cc12 100644
> --- a/fs/exec.c
> +++ b/fs/exec.c
> @@ -67,6 +67,7 @@
>   #include <linux/time_namespace.h>
>   #include <linux/user_events.h>
>   #include <linux/rseq.h>
> +#include <linux/ksm.h>
>   
>   #include <linux/uaccess.h>
>   #include <asm/mmu_context.h>
> @@ -267,6 +268,9 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
>   		goto err_free;
>   	}
>   
> +	if (ksm_execve(mm))
> +		goto err;
> +
>   	/*
>   	 * Place the stack at the largest stack address the architecture
>   	 * supports. Later, we'll move this to an appropriate place. We don't
> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
> index 401348e9f92b..7e2b1de3996a 100644
> --- a/include/linux/ksm.h
> +++ b/include/linux/ksm.h
> @@ -59,6 +59,14 @@ static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
>   	return 0;
>   }
>   
> +static inline int ksm_execve(struct mm_struct *mm)
> +{
> +	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
> +		return __ksm_enter(mm);

As soon as we did the __ksm_enter(), we have to set MMF_VM_MERGEABLE. I 
don't think it would be set right now, because:

mm_alloc()->mm_init() will initialize the flags using

	mm->flags = mmf_init_flags(current->mm->flags);

Whereby MMF_INIT_MASK contains only MMF_VM_MERGE_ANY_MASK.


So we likely need a set_bit(MMF_VM_MERGEABLE, &mm->flags) here as well. 
Otherwise ksm_exit() wouldn't clean up, and we might call __ksm_enter() 
twice.


And now I wonder, when would be the right point to call __ksm_enter().

__mmput() calls ksm_exit(). But for example, if __bprm_mm_init() fails 
after __ksm_enter(), we will only call mmdrop(), not cleaning up ... so 
that looks wrong.

We would have to make sure we call __ksm_enter() only once we know that 
callers will call mm_put(). that is the case once we return from 
bprm_mm_init() with success.

Maybe move the ksm_execve() to bprm_mm_init(), adding a comment that 
cleanup will only happen during mm_put(), so it's harder to mess up in 
the future?
David Hildenbrand March 18, 2024, 9:59 a.m. UTC | #3
On 18.03.24 10:04, Jinjiang Tu wrote:
> commit 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") inherits
> MMF_VM_MERGE_ANY flag when a task calls execve(). Howerver, it doesn't
> create the mm_slot, so ksmd will not try to scan this task.
> 
> To fix it, allocate and add the mm_slot to ksm_mm_head in __bprm_mm_init()
> when the mm has MMF_VM_MERGE_ANY flag.
> 
> Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
> Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
> ---

Can we also please extend the KSM selftest? 
tools/testing/selftests/mm/ksm_functional_tests.c

We do have test_prctl_fork_exec(), but in ksm_fork_exec_child() we only 
test if the flag is set, not if de-duplication using the ksm daemon 
actually works.
Jinjiang Tu March 18, 2024, 10:52 a.m. UTC | #4
在 2024/3/18 17:36, David Hildenbrand 写道:
> On 18.03.24 10:04, Jinjiang Tu wrote:
>> commit 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") inherits
>> MMF_VM_MERGE_ANY flag when a task calls execve(). Howerver, it doesn't
>> create the mm_slot, so ksmd will not try to scan this task.
>>
>> To fix it, allocate and add the mm_slot to ksm_mm_head in 
>> __bprm_mm_init()
>> when the mm has MMF_VM_MERGE_ANY flag.
>
> That would mean that 3c6f33b7273a is effectively ineffective for 
> fork+exec and only works with fork?
>
Yes. In my test case, parent process calls prctl with 
PR_SET_MEMORY_MERGE, and then fork, execeve a new
process. The new process allocates 3 anon pages with same content and 
loops infinitely. However, the 3 pages
are not merged.
>>
>> Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
>> Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
>> ---
>>   fs/exec.c           |  4 ++++
>>   include/linux/ksm.h | 13 +++++++++++++
>>   2 files changed, 17 insertions(+)
>>
>> diff --git a/fs/exec.c b/fs/exec.c
>> index ff6f26671cfc..00f40163cc12 100644
>> --- a/fs/exec.c
>> +++ b/fs/exec.c
>> @@ -67,6 +67,7 @@
>>   #include <linux/time_namespace.h>
>>   #include <linux/user_events.h>
>>   #include <linux/rseq.h>
>> +#include <linux/ksm.h>
>>     #include <linux/uaccess.h>
>>   #include <asm/mmu_context.h>
>> @@ -267,6 +268,9 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
>>           goto err_free;
>>       }
>>   +    if (ksm_execve(mm))
>> +        goto err;
>> +
>>       /*
>>        * Place the stack at the largest stack address the architecture
>>        * supports. Later, we'll move this to an appropriate place. We 
>> don't
>> diff --git a/include/linux/ksm.h b/include/linux/ksm.h
>> index 401348e9f92b..7e2b1de3996a 100644
>> --- a/include/linux/ksm.h
>> +++ b/include/linux/ksm.h
>> @@ -59,6 +59,14 @@ static inline int ksm_fork(struct mm_struct *mm, 
>> struct mm_struct *oldmm)
>>       return 0;
>>   }
>>   +static inline int ksm_execve(struct mm_struct *mm)
>> +{
>> +    if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
>> +        return __ksm_enter(mm);
>
> As soon as we did the __ksm_enter(), we have to set MMF_VM_MERGEABLE. 
> I don't think it would be set right now, because:
>
> mm_alloc()->mm_init() will initialize the flags using
>
>     mm->flags = mmf_init_flags(current->mm->flags);
>
> Whereby MMF_INIT_MASK contains only MMF_VM_MERGE_ANY_MASK.
>
>
> So we likely need a set_bit(MMF_VM_MERGEABLE, &mm->flags) here as 
> well. Otherwise ksm_exit() wouldn't clean up, and we might call 
> __ksm_enter() twice.

__ksm_enter() will set MMF_VM_MERGEABLE when it succeeds.

>
>
> And now I wonder, when would be the right point to call __ksm_enter().
>
> __mmput() calls ksm_exit(). But for example, if __bprm_mm_init() fails 
> after __ksm_enter(), we will only call mmdrop(), not cleaning up ... 
> so that looks wrong.
Yes, I forgot cleanup in error path. ksm_exit() should be called when 
__bprm_mm_init() fails after __ksm_enter().
>
> We would have to make sure we call __ksm_enter() only once we know 
> that callers will call mm_put(). that is the case once we return from 
> bprm_mm_init() with success.
>
> Maybe move the ksm_execve() to bprm_mm_init(), adding a comment that 
> cleanup will only happen during mm_put(), so it's harder to mess up in 
> the future?
>
__ksm_enter() should be called under mmap write lock to avoid race with 
ksmd. So we can't move ksm_execve() to bprm_mm_init().
David Hildenbrand March 18, 2024, 10:54 a.m. UTC | #5
>>>    +static inline int ksm_execve(struct mm_struct *mm)
>>> +{
>>> +    if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
>>> +        return __ksm_enter(mm);
>>
>> As soon as we did the __ksm_enter(), we have to set MMF_VM_MERGEABLE.
>> I don't think it would be set right now, because:
>>
>> mm_alloc()->mm_init() will initialize the flags using
>>
>>      mm->flags = mmf_init_flags(current->mm->flags);
>>
>> Whereby MMF_INIT_MASK contains only MMF_VM_MERGE_ANY_MASK.
>>
>>
>> So we likely need a set_bit(MMF_VM_MERGEABLE, &mm->flags) here as
>> well. Otherwise ksm_exit() wouldn't clean up, and we might call
>> __ksm_enter() twice.
> 
> __ksm_enter() will set MMF_VM_MERGEABLE when it succeeds.
> 

Ah, indeed!

>>
>>
>> And now I wonder, when would be the right point to call __ksm_enter().
>>
>> __mmput() calls ksm_exit(). But for example, if __bprm_mm_init() fails
>> after __ksm_enter(), we will only call mmdrop(), not cleaning up ...
>> so that looks wrong.
> Yes, I forgot cleanup in error path. ksm_exit() should be called when
> __bprm_mm_init() fails after __ksm_enter().
>>
>> We would have to make sure we call __ksm_enter() only once we know
>> that callers will call mm_put(). that is the case once we return from
>> bprm_mm_init() with success.
>>
>> Maybe move the ksm_execve() to bprm_mm_init(), adding a comment that
>> cleanup will only happen during mm_put(), so it's harder to mess up in
>> the future?
>>
> __ksm_enter() should be called under mmap write lock to avoid race with
> ksmd. So we can't move ksm_execve() to bprm_mm_init().

Makes sense. We have to be careful that this won't silently break in the 
future. Better add some comment as well.
Jinjiang Tu March 18, 2024, 10:54 a.m. UTC | #6
在 2024/3/18 17:59, David Hildenbrand 写道:
> On 18.03.24 10:04, Jinjiang Tu wrote:
>> commit 3c6f33b7273a ("mm/ksm: support fork/exec for prctl") inherits
>> MMF_VM_MERGE_ANY flag when a task calls execve(). Howerver, it doesn't
>> create the mm_slot, so ksmd will not try to scan this task.
>>
>> To fix it, allocate and add the mm_slot to ksm_mm_head in 
>> __bprm_mm_init()
>> when the mm has MMF_VM_MERGE_ANY flag.
>>
>> Fixes: 3c6f33b7273a ("mm/ksm: support fork/exec for prctl")
>> Signed-off-by: Jinjiang Tu <tujinjiang@huawei.com>
>> ---
>
> Can we also please extend the KSM selftest? 
> tools/testing/selftests/mm/ksm_functional_tests.c
>
> We do have test_prctl_fork_exec(), but in ksm_fork_exec_child() we 
> only test if the flag is set, not if de-duplication using the ksm 
> daemon actually works.
>
OK, I will try to extend the selftest.
diff mbox series

Patch

diff --git a/fs/exec.c b/fs/exec.c
index ff6f26671cfc..00f40163cc12 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -67,6 +67,7 @@ 
 #include <linux/time_namespace.h>
 #include <linux/user_events.h>
 #include <linux/rseq.h>
+#include <linux/ksm.h>
 
 #include <linux/uaccess.h>
 #include <asm/mmu_context.h>
@@ -267,6 +268,9 @@  static int __bprm_mm_init(struct linux_binprm *bprm)
 		goto err_free;
 	}
 
+	if (ksm_execve(mm))
+		goto err;
+
 	/*
 	 * Place the stack at the largest stack address the architecture
 	 * supports. Later, we'll move this to an appropriate place. We don't
diff --git a/include/linux/ksm.h b/include/linux/ksm.h
index 401348e9f92b..7e2b1de3996a 100644
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -59,6 +59,14 @@  static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
+static inline int ksm_execve(struct mm_struct *mm)
+{
+	if (test_bit(MMF_VM_MERGE_ANY, &mm->flags))
+		return __ksm_enter(mm);
+
+	return 0;
+}
+
 static inline void ksm_exit(struct mm_struct *mm)
 {
 	if (test_bit(MMF_VM_MERGEABLE, &mm->flags))
@@ -107,6 +115,11 @@  static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 	return 0;
 }
 
+static inline int ksm_execve(struct mm_struct *mm)
+{
+	return 0;
+}
+
 static inline void ksm_exit(struct mm_struct *mm)
 {
 }