diff mbox series

[v13,4/5] arm64: support copy_mc_[user]_highpage()

Message ID 20241209024257.3618492-5-tongtiangen@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm64: add ARCH_HAS_COPY_MC support | expand

Commit Message

Tong Tiangen Dec. 9, 2024, 2:42 a.m. UTC
Currently, many scenarios that can tolerate memory errors when copying page
have been supported in the kernel[1~5], all of which are implemented by
copy_mc_[user]_highpage(). arm64 should also support this mechanism.

Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
__HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.

Add new helper copy_mc_page() which provide a page copy implementation with
hardware memory error safe. The code logic of copy_mc_page() is the same as
copy_page(), the main difference is that the ldp insn of copy_mc_page()
contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
main logic is extracted to copy_page_template.S. In addition, the fixup of
MOPS insn is not considered at present.

[1] commit d302c2398ba2 ("mm, hwpoison: when copy-on-write hits poison, take page offline")
[2] commit 1cb9dc4b475c ("mm: hwpoison: support recovery from HugePage copy-on-write faults")
[3] commit 6b970599e807 ("mm: hwpoison: support recovery from ksm_might_need_to_copy()")
[4] commit 98c76c9f1ef7 ("mm/khugepaged: recover from poisoned anonymous memory")
[5] commit 12904d953364 ("mm/khugepaged: recover from poisoned file-backed memory")

Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
---
 arch/arm64/include/asm/mte.h        |  9 ++++
 arch/arm64/include/asm/page.h       | 10 ++++
 arch/arm64/lib/Makefile             |  2 +
 arch/arm64/lib/copy_mc_page.S       | 37 ++++++++++++++
 arch/arm64/lib/copy_page.S          | 62 ++----------------------
 arch/arm64/lib/copy_page_template.S | 70 +++++++++++++++++++++++++++
 arch/arm64/lib/mte.S                | 29 +++++++++++
 arch/arm64/mm/copypage.c            | 75 +++++++++++++++++++++++++++++
 include/linux/highmem.h             |  8 +++
 9 files changed, 245 insertions(+), 57 deletions(-)
 create mode 100644 arch/arm64/lib/copy_mc_page.S
 create mode 100644 arch/arm64/lib/copy_page_template.S

Comments

Catalin Marinas Feb. 12, 2025, 5:11 p.m. UTC | #1
On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
> Currently, many scenarios that can tolerate memory errors when copying page
> have been supported in the kernel[1~5], all of which are implemented by
> copy_mc_[user]_highpage(). arm64 should also support this mechanism.
> 
> Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
> architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
> __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
> 
> Add new helper copy_mc_page() which provide a page copy implementation with
> hardware memory error safe. The code logic of copy_mc_page() is the same as
> copy_page(), the main difference is that the ldp insn of copy_mc_page()
> contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
> main logic is extracted to copy_page_template.S. In addition, the fixup of
> MOPS insn is not considered at present.

Could we not add the exception table entry permanently but ignore the
exception table entry if it's not on the do_sea() path? That would save
some code duplication.

> diff --git a/arch/arm64/lib/copy_mc_page.S b/arch/arm64/lib/copy_mc_page.S
> new file mode 100644
> index 000000000000..51564828c30c
> --- /dev/null
> +++ b/arch/arm64/lib/copy_mc_page.S
> @@ -0,0 +1,37 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +
> +#include <linux/linkage.h>
> +#include <linux/const.h>
> +#include <asm/assembler.h>
> +#include <asm/page.h>
> +#include <asm/cpufeature.h>
> +#include <asm/alternative.h>
> +#include <asm/asm-extable.h>
> +#include <asm/asm-uaccess.h>
> +
> +/*
> + * Copy a page from src to dest (both are page aligned) with memory error safe
> + *
> + * Parameters:
> + *	x0 - dest
> + *	x1 - src
> + * Returns:
> + * 	x0 - Return 0 if copy success, or -EFAULT if anything goes wrong
> + *	     while copying.
> + */
> +	.macro ldp1 reg1, reg2, ptr, val
> +	KERNEL_MEM_ERR(9998f, ldp \reg1, \reg2, [\ptr, \val])
> +	.endm
> +
> +SYM_FUNC_START(__pi_copy_mc_page)
> +#include "copy_page_template.S"
> +
> +	mov x0, #0
> +	ret
> +
> +9998:	mov x0, #-EFAULT
> +	ret
> +
> +SYM_FUNC_END(__pi_copy_mc_page)
> +SYM_FUNC_ALIAS(copy_mc_page, __pi_copy_mc_page)
> +EXPORT_SYMBOL(copy_mc_page)
[...]
> diff --git a/arch/arm64/lib/copy_page_template.S b/arch/arm64/lib/copy_page_template.S
> new file mode 100644
> index 000000000000..f96c7988c93d
> --- /dev/null
> +++ b/arch/arm64/lib/copy_page_template.S
> @@ -0,0 +1,70 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2012 ARM Ltd.
> + */
> +
> +/*
> + * Copy a page from src to dest (both are page aligned)
> + *
> + * Parameters:
> + *	x0 - dest
> + *	x1 - src
> + */
> +
> +#ifdef CONFIG_AS_HAS_MOPS
> +	.arch_extension mops
> +alternative_if_not ARM64_HAS_MOPS
> +	b	.Lno_mops
> +alternative_else_nop_endif
> +
> +	mov	x2, #PAGE_SIZE
> +	cpypwn	[x0]!, [x1]!, x2!
> +	cpymwn	[x0]!, [x1]!, x2!
> +	cpyewn	[x0]!, [x1]!, x2!
> +	ret
> +.Lno_mops:
> +#endif
[...]

So if we have FEAT_MOPS, the machine check won't work?

Kristina is going to post MOPS support for the uaccess routines soon.
You can see how they are wired up and do something similar here.

But I'd prefer if we had the same code, only the exception table entry
treated differently. Similarly for the MTE tag copying.
Tong Tiangen Feb. 14, 2025, 1:45 a.m. UTC | #2
在 2025/2/13 1:11, Catalin Marinas 写道:
> On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
>> Currently, many scenarios that can tolerate memory errors when copying page
>> have been supported in the kernel[1~5], all of which are implemented by
>> copy_mc_[user]_highpage(). arm64 should also support this mechanism.
>>
>> Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
>> architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
>> __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
>>
>> Add new helper copy_mc_page() which provide a page copy implementation with
>> hardware memory error safe. The code logic of copy_mc_page() is the same as
>> copy_page(), the main difference is that the ldp insn of copy_mc_page()
>> contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
>> main logic is extracted to copy_page_template.S. In addition, the fixup of
>> MOPS insn is not considered at present.
> 
> Could we not add the exception table entry permanently but ignore the
> exception table entry if it's not on the do_sea() path? That would save
> some code duplication.

The location of the added exception table entry is likely to appear on
the a path, which should not be avoided. What we can do is merge
duplicate code as much as possible, and extract common code into common
files, as we did in this patch.


> 
>> diff --git a/arch/arm64/lib/copy_mc_page.S b/arch/arm64/lib/copy_mc_page.S
>> new file mode 100644
>> index 000000000000..51564828c30c
>> --- /dev/null
>> +++ b/arch/arm64/lib/copy_mc_page.S
>> @@ -0,0 +1,37 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +
>> +#include <linux/linkage.h>
>> +#include <linux/const.h>
>> +#include <asm/assembler.h>
>> +#include <asm/page.h>
>> +#include <asm/cpufeature.h>
>> +#include <asm/alternative.h>
>> +#include <asm/asm-extable.h>
>> +#include <asm/asm-uaccess.h>
>> +
>> +/*
>> + * Copy a page from src to dest (both are page aligned) with memory error safe
>> + *
>> + * Parameters:
>> + *	x0 - dest
>> + *	x1 - src
>> + * Returns:
>> + * 	x0 - Return 0 if copy success, or -EFAULT if anything goes wrong
>> + *	     while copying.
>> + */
>> +	.macro ldp1 reg1, reg2, ptr, val
>> +	KERNEL_MEM_ERR(9998f, ldp \reg1, \reg2, [\ptr, \val])
>> +	.endm
>> +
>> +SYM_FUNC_START(__pi_copy_mc_page)
>> +#include "copy_page_template.S"
>> +
>> +	mov x0, #0
>> +	ret
>> +
>> +9998:	mov x0, #-EFAULT
>> +	ret
>> +
>> +SYM_FUNC_END(__pi_copy_mc_page)
>> +SYM_FUNC_ALIAS(copy_mc_page, __pi_copy_mc_page)
>> +EXPORT_SYMBOL(copy_mc_page)
> [...]
>> diff --git a/arch/arm64/lib/copy_page_template.S b/arch/arm64/lib/copy_page_template.S
>> new file mode 100644
>> index 000000000000..f96c7988c93d
>> --- /dev/null
>> +++ b/arch/arm64/lib/copy_page_template.S
>> @@ -0,0 +1,70 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2012 ARM Ltd.
>> + */
>> +
>> +/*
>> + * Copy a page from src to dest (both are page aligned)
>> + *
>> + * Parameters:
>> + *	x0 - dest
>> + *	x1 - src
>> + */
>> +
>> +#ifdef CONFIG_AS_HAS_MOPS
>> +	.arch_extension mops
>> +alternative_if_not ARM64_HAS_MOPS
>> +	b	.Lno_mops
>> +alternative_else_nop_endif
>> +
>> +	mov	x2, #PAGE_SIZE
>> +	cpypwn	[x0]!, [x1]!, x2!
>> +	cpymwn	[x0]!, [x1]!, x2!
>> +	cpyewn	[x0]!, [x1]!, x2!
>> +	ret
>> +.Lno_mops:
>> +#endif
> [...]
> 
> So if we have FEAT_MOPS, the machine check won't work?
> 
> Kristina is going to post MOPS support for the uaccess routines soon.
> You can see how they are wired up and do something similar here.
> 
> But I'd prefer if we had the same code, only the exception table entry
> treated differently. Similarly for the MTE tag copying.

Does MOPS also support features similar to memory error safe? I'll see
how he handles it.

>
Tong Tiangen Feb. 14, 2025, 2:49 a.m. UTC | #3
在 2025/2/13 1:11, Catalin Marinas 写道:
> On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
>> Currently, many scenarios that can tolerate memory errors when copying page
>> have been supported in the kernel[1~5], all of which are implemented by
>> copy_mc_[user]_highpage(). arm64 should also support this mechanism.
>>
>> Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
>> architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
>> __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
>>
>> Add new helper copy_mc_page() which provide a page copy implementation with
>> hardware memory error safe. The code logic of copy_mc_page() is the same as
>> copy_page(), the main difference is that the ldp insn of copy_mc_page()
>> contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
>> main logic is extracted to copy_page_template.S. In addition, the fixup of
>> MOPS insn is not considered at present.
> 
> Could we not add the exception table entry permanently but ignore the
> exception table entry if it's not on the do_sea() path? That would save
> some code duplication.

I'm sorry, I didn't catch your point, that the do_sea() and non do_sea()
paths use different exception tables? My understanding is that the
exception table entry problem is fine. After all, the search is
performed only after a fault trigger. Code duplication can be solved by
extracting repeated logic to a public file.

> 
>> diff --git a/arch/arm64/lib/copy_mc_page.S b/arch/arm64/lib/copy_mc_page.S
>> new file mode 100644
>> index 000000000000..51564828c30c
>> --- /dev/null
>> +++ b/arch/arm64/lib/copy_mc_page.S
>> @@ -0,0 +1,37 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +
>> +#include <linux/linkage.h>
>> +#include <linux/const.h>
>> +#include <asm/assembler.h>
>> +#include <asm/page.h>
>> +#include <asm/cpufeature.h>
>> +#include <asm/alternative.h>
>> +#include <asm/asm-extable.h>
>> +#include <asm/asm-uaccess.h>
>> +
>> +/*
>> + * Copy a page from src to dest (both are page aligned) with memory error safe
>> + *
>> + * Parameters:
>> + *	x0 - dest
>> + *	x1 - src
>> + * Returns:
>> + * 	x0 - Return 0 if copy success, or -EFAULT if anything goes wrong
>> + *	     while copying.
>> + */
>> +	.macro ldp1 reg1, reg2, ptr, val
>> +	KERNEL_MEM_ERR(9998f, ldp \reg1, \reg2, [\ptr, \val])
>> +	.endm
>> +
>> +SYM_FUNC_START(__pi_copy_mc_page)
>> +#include "copy_page_template.S"
>> +
>> +	mov x0, #0
>> +	ret
>> +
>> +9998:	mov x0, #-EFAULT
>> +	ret
>> +
>> +SYM_FUNC_END(__pi_copy_mc_page)
>> +SYM_FUNC_ALIAS(copy_mc_page, __pi_copy_mc_page)
>> +EXPORT_SYMBOL(copy_mc_page)
> [...]
>> diff --git a/arch/arm64/lib/copy_page_template.S b/arch/arm64/lib/copy_page_template.S
>> new file mode 100644
>> index 000000000000..f96c7988c93d
>> --- /dev/null
>> +++ b/arch/arm64/lib/copy_page_template.S
>> @@ -0,0 +1,70 @@
>> +/* SPDX-License-Identifier: GPL-2.0-only */
>> +/*
>> + * Copyright (C) 2012 ARM Ltd.
>> + */
>> +
>> +/*
>> + * Copy a page from src to dest (both are page aligned)
>> + *
>> + * Parameters:
>> + *	x0 - dest
>> + *	x1 - src
>> + */
>> +
>> +#ifdef CONFIG_AS_HAS_MOPS
>> +	.arch_extension mops
>> +alternative_if_not ARM64_HAS_MOPS
>> +	b	.Lno_mops
>> +alternative_else_nop_endif
>> +
>> +	mov	x2, #PAGE_SIZE
>> +	cpypwn	[x0]!, [x1]!, x2!
>> +	cpymwn	[x0]!, [x1]!, x2!
>> +	cpyewn	[x0]!, [x1]!, x2!
>> +	ret
>> +.Lno_mops:
>> +#endif
> [...]
> 
> So if we have FEAT_MOPS, the machine check won't work?
> 
> Kristina is going to post MOPS support for the uaccess routines soon.
> You can see how they are wired up and do something similar here.
> 
> But I'd prefer if we had the same code, only the exception table entry
> treated differently. Similarly for the MTE tag copying.
>
Catalin Marinas Feb. 14, 2025, 5:24 p.m. UTC | #4
On Fri, Feb 14, 2025 at 10:49:01AM +0800, Tong Tiangen wrote:
> 在 2025/2/13 1:11, Catalin Marinas 写道:
> > On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
> > > Currently, many scenarios that can tolerate memory errors when copying page
> > > have been supported in the kernel[1~5], all of which are implemented by
> > > copy_mc_[user]_highpage(). arm64 should also support this mechanism.
> > > 
> > > Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
> > > architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
> > > __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
> > > 
> > > Add new helper copy_mc_page() which provide a page copy implementation with
> > > hardware memory error safe. The code logic of copy_mc_page() is the same as
> > > copy_page(), the main difference is that the ldp insn of copy_mc_page()
> > > contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
> > > main logic is extracted to copy_page_template.S. In addition, the fixup of
> > > MOPS insn is not considered at present.
> > 
> > Could we not add the exception table entry permanently but ignore the
> > exception table entry if it's not on the do_sea() path? That would save
> > some code duplication.
> 
> I'm sorry, I didn't catch your point, that the do_sea() and non do_sea()
> paths use different exception tables?

No, they would have the same exception table, only that we'd interpret
it differently depending on whether it's a SEA error or not. Or rather
ignore the exception table altogether for non-SEA errors.

> My understanding is that the
> exception table entry problem is fine. After all, the search is
> performed only after a fault trigger. Code duplication can be solved by
> extracting repeated logic to a public file.

If the new exception table entries are only taken into account for SEA
errors, why do we need a duplicate copy_mc_page() function generated?
Isn't the copy_page() and copy_mc_page() code identical (except for the
additional labels to jump to for the exception)?
Tong Tiangen Feb. 17, 2025, 8:07 a.m. UTC | #5
在 2025/2/15 1:24, Catalin Marinas 写道:
> On Fri, Feb 14, 2025 at 10:49:01AM +0800, Tong Tiangen wrote:
>> 在 2025/2/13 1:11, Catalin Marinas 写道:
>>> On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
>>>> Currently, many scenarios that can tolerate memory errors when copying page
>>>> have been supported in the kernel[1~5], all of which are implemented by
>>>> copy_mc_[user]_highpage(). arm64 should also support this mechanism.
>>>>
>>>> Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
>>>> architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
>>>> __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
>>>>
>>>> Add new helper copy_mc_page() which provide a page copy implementation with
>>>> hardware memory error safe. The code logic of copy_mc_page() is the same as
>>>> copy_page(), the main difference is that the ldp insn of copy_mc_page()
>>>> contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
>>>> main logic is extracted to copy_page_template.S. In addition, the fixup of
>>>> MOPS insn is not considered at present.
>>>
>>> Could we not add the exception table entry permanently but ignore the
>>> exception table entry if it's not on the do_sea() path? That would save
>>> some code duplication.
>>
>> I'm sorry, I didn't catch your point, that the do_sea() and non do_sea()
>> paths use different exception tables?
> 
> No, they would have the same exception table, only that we'd interpret
> it differently depending on whether it's a SEA error or not. Or rather
> ignore the exception table altogether for non-SEA errors.

You mean to use the same exception type (EX_TYPE_KACCESS_ERR_ZERO) and
then do different processing on SEA errors and non-SEA errors, right?

If so, some instructions of copy_page() did not add to the exception
table will be added to the exception table, and the original logic will
be affected.

For example, if an instruction is not added to the exception table, the
instruction will panic when it triggers a non-SEA error. If this
instruction is added to the exception table because of SEA processing,
and then a non-SEA error is triggered, should we fix it?

Thanks,
Tong.

> 
>> My understanding is that the
>> exception table entry problem is fine. After all, the search is
>> performed only after a fault trigger. Code duplication can be solved by
>> extracting repeated logic to a public file.
> 
> If the new exception table entries are only taken into account for SEA
> errors, why do we need a duplicate copy_mc_page() function generated?
> Isn't the copy_page() and copy_mc_page() code identical (except for the
> additional labels to jump to for the exception)?
>
Catalin Marinas Feb. 17, 2025, 2:55 p.m. UTC | #6
On Mon, Feb 17, 2025 at 04:07:49PM +0800, Tong Tiangen wrote:
> 在 2025/2/15 1:24, Catalin Marinas 写道:
> > On Fri, Feb 14, 2025 at 10:49:01AM +0800, Tong Tiangen wrote:
> > > 在 2025/2/13 1:11, Catalin Marinas 写道:
> > > > On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
> > > > > Currently, many scenarios that can tolerate memory errors when copying page
> > > > > have been supported in the kernel[1~5], all of which are implemented by
> > > > > copy_mc_[user]_highpage(). arm64 should also support this mechanism.
> > > > > 
> > > > > Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
> > > > > architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
> > > > > __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
> > > > > 
> > > > > Add new helper copy_mc_page() which provide a page copy implementation with
> > > > > hardware memory error safe. The code logic of copy_mc_page() is the same as
> > > > > copy_page(), the main difference is that the ldp insn of copy_mc_page()
> > > > > contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
> > > > > main logic is extracted to copy_page_template.S. In addition, the fixup of
> > > > > MOPS insn is not considered at present.
> > > > 
> > > > Could we not add the exception table entry permanently but ignore the
> > > > exception table entry if it's not on the do_sea() path? That would save
> > > > some code duplication.
> > > 
> > > I'm sorry, I didn't catch your point, that the do_sea() and non do_sea()
> > > paths use different exception tables?
> > 
> > No, they would have the same exception table, only that we'd interpret
> > it differently depending on whether it's a SEA error or not. Or rather
> > ignore the exception table altogether for non-SEA errors.
> 
> You mean to use the same exception type (EX_TYPE_KACCESS_ERR_ZERO) and
> then do different processing on SEA errors and non-SEA errors, right?

Right.

> If so, some instructions of copy_page() did not add to the exception
> table will be added to the exception table, and the original logic will
> be affected.
> 
> For example, if an instruction is not added to the exception table, the
> instruction will panic when it triggers a non-SEA error. If this
> instruction is added to the exception table because of SEA processing,
> and then a non-SEA error is triggered, should we fix it?

No, we shouldn't fix it. The exception table entries have a type
associated. For a non-SEA error, we preserve the original behaviour even
if we find a SEA-specific entry in the exception table. You already need
such logic even if you duplicate the code for configurations where you
have MC enabled.
Tong Tiangen Feb. 18, 2025, 11:51 a.m. UTC | #7
在 2025/2/17 22:55, Catalin Marinas 写道:
> On Mon, Feb 17, 2025 at 04:07:49PM +0800, Tong Tiangen wrote:
>> 在 2025/2/15 1:24, Catalin Marinas 写道:
>>> On Fri, Feb 14, 2025 at 10:49:01AM +0800, Tong Tiangen wrote:
>>>> 在 2025/2/13 1:11, Catalin Marinas 写道:
>>>>> On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
>>>>>> Currently, many scenarios that can tolerate memory errors when copying page
>>>>>> have been supported in the kernel[1~5], all of which are implemented by
>>>>>> copy_mc_[user]_highpage(). arm64 should also support this mechanism.
>>>>>>
>>>>>> Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
>>>>>> architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
>>>>>> __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
>>>>>>
>>>>>> Add new helper copy_mc_page() which provide a page copy implementation with
>>>>>> hardware memory error safe. The code logic of copy_mc_page() is the same as
>>>>>> copy_page(), the main difference is that the ldp insn of copy_mc_page()
>>>>>> contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
>>>>>> main logic is extracted to copy_page_template.S. In addition, the fixup of
>>>>>> MOPS insn is not considered at present.
>>>>>
>>>>> Could we not add the exception table entry permanently but ignore the
>>>>> exception table entry if it's not on the do_sea() path? That would save
>>>>> some code duplication.
>>>>
>>>> I'm sorry, I didn't catch your point, that the do_sea() and non do_sea()
>>>> paths use different exception tables?
>>>
>>> No, they would have the same exception table, only that we'd interpret
>>> it differently depending on whether it's a SEA error or not. Or rather
>>> ignore the exception table altogether for non-SEA errors.
>>
>> You mean to use the same exception type (EX_TYPE_KACCESS_ERR_ZERO) and
>> then do different processing on SEA errors and non-SEA errors, right?
> 
> Right.

Ok, now we have the same understanding.

> 
>> If so, some instructions of copy_page() did not add to the exception
>> table will be added to the exception table, and the original logic will
>> be affected.
>>
>> For example, if an instruction is not added to the exception table, the
>> instruction will panic when it triggers a non-SEA error. If this
>> instruction is added to the exception table because of SEA processing,
>> and then a non-SEA error is triggered, should we fix it?
> 
> No, we shouldn't fix it. The exception table entries have a type
> associated. For a non-SEA error, we preserve the original behaviour even
> if we find a SEA-specific entry in the exception table. You already need
> such logic even if you duplicate the code for configurations where you
> have MC enabled.


So we need another way to distinguish the different processing of the
same exception type on SEA and non-SEA path.

For example, using strcut exception_table_entry.data, the disadvantage
is that it occupies the future expansion space of data.

I still think it's better to use methods like copy_from_user.S and
copy_to_user.S calling copy_template.S, and the duplicate code in
copy_template.S.

Thanks,
Tong.

>
Catalin Marinas Feb. 18, 2025, 7:42 p.m. UTC | #8
On Tue, Feb 18, 2025 at 07:51:10PM +0800, Tong Tiangen wrote:
> > > > > 在 2025/2/13 1:11, Catalin Marinas 写道:
> > > > > > On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
> > > > > > > Currently, many scenarios that can tolerate memory errors when copying page
> > > > > > > have been supported in the kernel[1~5], all of which are implemented by
> > > > > > > copy_mc_[user]_highpage(). arm64 should also support this mechanism.
> > > > > > > 
> > > > > > > Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
> > > > > > > architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
> > > > > > > __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
> > > > > > > 
> > > > > > > Add new helper copy_mc_page() which provide a page copy implementation with
> > > > > > > hardware memory error safe. The code logic of copy_mc_page() is the same as
> > > > > > > copy_page(), the main difference is that the ldp insn of copy_mc_page()
> > > > > > > contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
> > > > > > > main logic is extracted to copy_page_template.S. In addition, the fixup of
> > > > > > > MOPS insn is not considered at present.
> > > > > > 
> > > > > > Could we not add the exception table entry permanently but ignore the
> > > > > > exception table entry if it's not on the do_sea() path? That would save
> > > > > > some code duplication.
[...]
> So we need another way to distinguish the different processing of the
> same exception type on SEA and non-SEA path.

Distinguishing whether the fault is SEA or non-SEA is already done by
the exception handling you are adding. What we don't have though is
information about whether the caller invoked copy_highpage() or
copy_mc_highpage(). That's where the code duplication comes in handy.

It's a shame we need to duplicate identical functions just to have
different addresses to look up in the exception table. We are also short
of caller saved registers to track this information (e.g. an extra
argument to those functions that the exception handler interprets).

I need to think a bit more, we could in theory get the arm64 memcpy_mc()
to return an error code depending on what type of fault it got (e.g.
-EHWPOISON for SEA, -EFAULT for non-SEA). copy_mc_highpage() would
interpret this one and panic if -EFAULT. But we lose some fault details
we normally get on a faulty access like some of the registers.

Well, maybe the simples is still to keep the function duplication. I'll
have another look at the series tomorrow.
Tong Tiangen March 4, 2025, 2:10 p.m. UTC | #9
Hi,Catalin:

Kindly ping ...

Thanks.:)

在 2025/2/19 3:42, Catalin Marinas 写道:
> On Tue, Feb 18, 2025 at 07:51:10PM +0800, Tong Tiangen wrote:
>>>>>> 在 2025/2/13 1:11, Catalin Marinas 写道:
>>>>>>> On Mon, Dec 09, 2024 at 10:42:56AM +0800, Tong Tiangen wrote:
>>>>>>>> Currently, many scenarios that can tolerate memory errors when copying page
>>>>>>>> have been supported in the kernel[1~5], all of which are implemented by
>>>>>>>> copy_mc_[user]_highpage(). arm64 should also support this mechanism.
>>>>>>>>
>>>>>>>> Due to mte, arm64 needs to have its own copy_mc_[user]_highpage()
>>>>>>>> architecture implementation, macros __HAVE_ARCH_COPY_MC_HIGHPAGE and
>>>>>>>> __HAVE_ARCH_COPY_MC_USER_HIGHPAGE have been added to control it.
>>>>>>>>
>>>>>>>> Add new helper copy_mc_page() which provide a page copy implementation with
>>>>>>>> hardware memory error safe. The code logic of copy_mc_page() is the same as
>>>>>>>> copy_page(), the main difference is that the ldp insn of copy_mc_page()
>>>>>>>> contains the fixup type EX_TYPE_KACCESS_ERR_ZERO_MEM_ERR, therefore, the
>>>>>>>> main logic is extracted to copy_page_template.S. In addition, the fixup of
>>>>>>>> MOPS insn is not considered at present.
>>>>>>>
>>>>>>> Could we not add the exception table entry permanently but ignore the
>>>>>>> exception table entry if it's not on the do_sea() path? That would save
>>>>>>> some code duplication.
> [...]
>> So we need another way to distinguish the different processing of the
>> same exception type on SEA and non-SEA path.
> 
> Distinguishing whether the fault is SEA or non-SEA is already done by
> the exception handling you are adding. What we don't have though is
> information about whether the caller invoked copy_highpage() or
> copy_mc_highpage(). That's where the code duplication comes in handy.
> 
> It's a shame we need to duplicate identical functions just to have
> different addresses to look up in the exception table. We are also short
> of caller saved registers to track this information (e.g. an extra
> argument to those functions that the exception handler interprets).
> 
> I need to think a bit more, we could in theory get the arm64 memcpy_mc()
> to return an error code depending on what type of fault it got (e.g.
> -EHWPOISON for SEA, -EFAULT for non-SEA). copy_mc_highpage() would
> interpret this one and panic if -EFAULT. But we lose some fault details
> we normally get on a faulty access like some of the registers.
> 
> Well, maybe the simples is still to keep the function duplication. I'll
> have another look at the series tomorrow.
>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 6567df8ec8ca..efcd850ea2f8 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -98,6 +98,11 @@  static inline bool try_page_mte_tagging(struct page *page)
 void mte_zero_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t pte, unsigned int nr_pages);
 void mte_copy_page_tags(void *kto, const void *kfrom);
+
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+int mte_copy_mc_page_tags(void *kto, const void *kfrom);
+#endif
+
 void mte_thread_init_user(void);
 void mte_thread_switch(struct task_struct *next);
 void mte_cpu_setup(void);
@@ -134,6 +139,10 @@  static inline void mte_sync_tags(pte_t pte, unsigned int nr_pages)
 static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 {
 }
+static inline int mte_copy_mc_page_tags(void *kto, const void *kfrom)
+{
+	return 0;
+}
 static inline void mte_thread_init_user(void)
 {
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 2312e6ee595f..304cc86b8a10 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -29,6 +29,16 @@  void copy_user_highpage(struct page *to, struct page *from,
 void copy_highpage(struct page *to, struct page *from);
 #define __HAVE_ARCH_COPY_HIGHPAGE
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+int copy_mc_page(void *to, const void *from);
+int copy_mc_highpage(struct page *to, struct page *from);
+#define __HAVE_ARCH_COPY_MC_HIGHPAGE
+
+int copy_mc_user_highpage(struct page *to, struct page *from,
+		unsigned long vaddr, struct vm_area_struct *vma);
+#define __HAVE_ARCH_COPY_MC_USER_HIGHPAGE
+#endif
+
 struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
 						unsigned long vaddr);
 #define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 8e882f479d98..78b0e9904689 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -13,6 +13,8 @@  endif
 
 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
 
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc_page.o
+
 obj-$(CONFIG_CRC32) += crc32.o crc32-glue.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/arm64/lib/copy_mc_page.S b/arch/arm64/lib/copy_mc_page.S
new file mode 100644
index 000000000000..51564828c30c
--- /dev/null
+++ b/arch/arm64/lib/copy_mc_page.S
@@ -0,0 +1,37 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <linux/const.h>
+#include <asm/assembler.h>
+#include <asm/page.h>
+#include <asm/cpufeature.h>
+#include <asm/alternative.h>
+#include <asm/asm-extable.h>
+#include <asm/asm-uaccess.h>
+
+/*
+ * Copy a page from src to dest (both are page aligned) with memory error safe
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ * Returns:
+ * 	x0 - Return 0 if copy success, or -EFAULT if anything goes wrong
+ *	     while copying.
+ */
+	.macro ldp1 reg1, reg2, ptr, val
+	KERNEL_MEM_ERR(9998f, ldp \reg1, \reg2, [\ptr, \val])
+	.endm
+
+SYM_FUNC_START(__pi_copy_mc_page)
+#include "copy_page_template.S"
+
+	mov x0, #0
+	ret
+
+9998:	mov x0, #-EFAULT
+	ret
+
+SYM_FUNC_END(__pi_copy_mc_page)
+SYM_FUNC_ALIAS(copy_mc_page, __pi_copy_mc_page)
+EXPORT_SYMBOL(copy_mc_page)
diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index e6374e7e5511..d0186bbf99f1 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -17,65 +17,13 @@ 
  *	x0 - dest
  *	x1 - src
  */
-SYM_FUNC_START(__pi_copy_page)
-#ifdef CONFIG_AS_HAS_MOPS
-	.arch_extension mops
-alternative_if_not ARM64_HAS_MOPS
-	b	.Lno_mops
-alternative_else_nop_endif
-
-	mov	x2, #PAGE_SIZE
-	cpypwn	[x0]!, [x1]!, x2!
-	cpymwn	[x0]!, [x1]!, x2!
-	cpyewn	[x0]!, [x1]!, x2!
-	ret
-.Lno_mops:
-#endif
-	ldp	x2, x3, [x1]
-	ldp	x4, x5, [x1, #16]
-	ldp	x6, x7, [x1, #32]
-	ldp	x8, x9, [x1, #48]
-	ldp	x10, x11, [x1, #64]
-	ldp	x12, x13, [x1, #80]
-	ldp	x14, x15, [x1, #96]
-	ldp	x16, x17, [x1, #112]
-
-	add	x0, x0, #256
-	add	x1, x1, #128
-1:
-	tst	x0, #(PAGE_SIZE - 1)
 
-	stnp	x2, x3, [x0, #-256]
-	ldp	x2, x3, [x1]
-	stnp	x4, x5, [x0, #16 - 256]
-	ldp	x4, x5, [x1, #16]
-	stnp	x6, x7, [x0, #32 - 256]
-	ldp	x6, x7, [x1, #32]
-	stnp	x8, x9, [x0, #48 - 256]
-	ldp	x8, x9, [x1, #48]
-	stnp	x10, x11, [x0, #64 - 256]
-	ldp	x10, x11, [x1, #64]
-	stnp	x12, x13, [x0, #80 - 256]
-	ldp	x12, x13, [x1, #80]
-	stnp	x14, x15, [x0, #96 - 256]
-	ldp	x14, x15, [x1, #96]
-	stnp	x16, x17, [x0, #112 - 256]
-	ldp	x16, x17, [x1, #112]
-
-	add	x0, x0, #128
-	add	x1, x1, #128
-
-	b.ne	1b
-
-	stnp	x2, x3, [x0, #-256]
-	stnp	x4, x5, [x0, #16 - 256]
-	stnp	x6, x7, [x0, #32 - 256]
-	stnp	x8, x9, [x0, #48 - 256]
-	stnp	x10, x11, [x0, #64 - 256]
-	stnp	x12, x13, [x0, #80 - 256]
-	stnp	x14, x15, [x0, #96 - 256]
-	stnp	x16, x17, [x0, #112 - 256]
+	.macro ldp1 reg1, reg2, ptr, val
+	ldp \reg1, \reg2, [\ptr, \val]
+	.endm
 
+SYM_FUNC_START(__pi_copy_page)
+#include "copy_page_template.S"
 	ret
 SYM_FUNC_END(__pi_copy_page)
 SYM_FUNC_ALIAS(copy_page, __pi_copy_page)
diff --git a/arch/arm64/lib/copy_page_template.S b/arch/arm64/lib/copy_page_template.S
new file mode 100644
index 000000000000..f96c7988c93d
--- /dev/null
+++ b/arch/arm64/lib/copy_page_template.S
@@ -0,0 +1,70 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ */
+
+/*
+ * Copy a page from src to dest (both are page aligned)
+ *
+ * Parameters:
+ *	x0 - dest
+ *	x1 - src
+ */
+
+#ifdef CONFIG_AS_HAS_MOPS
+	.arch_extension mops
+alternative_if_not ARM64_HAS_MOPS
+	b	.Lno_mops
+alternative_else_nop_endif
+
+	mov	x2, #PAGE_SIZE
+	cpypwn	[x0]!, [x1]!, x2!
+	cpymwn	[x0]!, [x1]!, x2!
+	cpyewn	[x0]!, [x1]!, x2!
+	ret
+.Lno_mops:
+#endif
+	ldp1	x2, x3, x1, #0
+	ldp1	x4, x5, x1, #16
+	ldp1	x6, x7, x1, #32
+	ldp1	x8, x9, x1, #48
+	ldp1	x10, x11, x1, #64
+	ldp1	x12, x13, x1, #80
+	ldp1	x14, x15, x1, #96
+	ldp1	x16, x17, x1, #112
+
+	add	x0, x0, #256
+	add	x1, x1, #128
+1:
+	tst	x0, #(PAGE_SIZE - 1)
+
+	stnp	x2, x3, [x0, #-256]
+	ldp1	x2, x3, x1, #0
+	stnp	x4, x5, [x0, #16 - 256]
+	ldp1	x4, x5, x1, #16
+	stnp	x6, x7, [x0, #32 - 256]
+	ldp1	x6, x7, x1, #32
+	stnp	x8, x9, [x0, #48 - 256]
+	ldp1	x8, x9, x1, #48
+	stnp	x10, x11, [x0, #64 - 256]
+	ldp1	x10, x11, x1, #64
+	stnp	x12, x13, [x0, #80 - 256]
+	ldp1	x12, x13, x1, #80
+	stnp	x14, x15, [x0, #96 - 256]
+	ldp1	x14, x15, x1, #96
+	stnp	x16, x17, [x0, #112 - 256]
+	ldp1	x16, x17, x1, #112
+
+	add	x0, x0, #128
+	add	x1, x1, #128
+
+	b.ne	1b
+
+	stnp	x2, x3, [x0, #-256]
+	stnp	x4, x5, [x0, #16 - 256]
+	stnp	x6, x7, [x0, #32 - 256]
+	stnp	x8, x9, [x0, #48 - 256]
+	stnp	x10, x11, [x0, #64 - 256]
+	stnp	x12, x13, [x0, #80 - 256]
+	stnp	x14, x15, [x0, #96 - 256]
+	stnp	x16, x17, [x0, #112 - 256]
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 5018ac03b6bf..9d4eeb76a838 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -80,6 +80,35 @@  SYM_FUNC_START(mte_copy_page_tags)
 	ret
 SYM_FUNC_END(mte_copy_page_tags)
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+/*
+ * Copy the tags from the source page to the destination one with memory error safe
+ *   x0 - address of the destination page
+ *   x1 - address of the source page
+ * Returns:
+ *   x0 - Return 0 if copy success, or
+ *        -EFAULT if anything goes wrong while copying.
+ */
+SYM_FUNC_START(mte_copy_mc_page_tags)
+	mov	x2, x0
+	mov	x3, x1
+	multitag_transfer_size x5, x6
+1:
+KERNEL_MEM_ERR(2f, ldgm	x4, [x3])
+	stgm	x4, [x2]
+	add	x2, x2, x5
+	add	x3, x3, x5
+	tst	x2, #(PAGE_SIZE - 1)
+	b.ne	1b
+
+	mov x0, #0
+	ret
+
+2:	mov x0, #-EFAULT
+	ret
+SYM_FUNC_END(mte_copy_mc_page_tags)
+#endif
+
 /*
  * Read tags from a user buffer (one tag per byte) and set the corresponding
  * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index a86c897017df..1a369f325ebb 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -67,3 +67,78 @@  void copy_user_highpage(struct page *to, struct page *from,
 	flush_dcache_page(to);
 }
 EXPORT_SYMBOL_GPL(copy_user_highpage);
+
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+/*
+ * Return -EFAULT if anything goes wrong while copying page or mte.
+ */
+int copy_mc_highpage(struct page *to, struct page *from)
+{
+	void *kto = page_address(to);
+	void *kfrom = page_address(from);
+	struct folio *src = page_folio(from);
+	struct folio *dst = page_folio(to);
+	unsigned int i, nr_pages;
+	int ret;
+
+	ret = copy_mc_page(kto, kfrom);
+	if (ret)
+		return -EFAULT;
+
+	if (kasan_hw_tags_enabled())
+		page_kasan_tag_reset(to);
+
+	if (!system_supports_mte())
+		return 0;
+
+	if (folio_test_hugetlb(src)) {
+		if (!folio_test_hugetlb_mte_tagged(src) ||
+		    from != folio_page(src, 0))
+			return 0;
+
+		WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst));
+
+		/*
+		 * Populate tags for all subpages.
+		 *
+		 * Don't assume the first page is head page since
+		 * huge page copy may start from any subpage.
+		 */
+		nr_pages = folio_nr_pages(src);
+		for (i = 0; i < nr_pages; i++) {
+			kfrom = page_address(folio_page(src, i));
+			kto = page_address(folio_page(dst, i));
+			ret = mte_copy_mc_page_tags(kto, kfrom);
+			if (ret)
+				return -EFAULT;
+		}
+		folio_set_hugetlb_mte_tagged(dst);
+	} else if (page_mte_tagged(from)) {
+		/* It's a new page, shouldn't have been tagged yet */
+		WARN_ON_ONCE(!try_page_mte_tagging(to));
+
+		ret = mte_copy_mc_page_tags(kto, kfrom);
+		if (ret)
+			return -EFAULT;
+		set_page_mte_tagged(to);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(copy_mc_highpage);
+
+int copy_mc_user_highpage(struct page *to, struct page *from,
+			unsigned long vaddr, struct vm_area_struct *vma)
+{
+	int ret;
+
+	ret = copy_mc_highpage(to, from);
+	if (ret)
+		return ret;
+
+	flush_dcache_page(to);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_user_highpage);
+#endif
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 0eb4b9b06837..89a6e0fd0b31 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -326,6 +326,7 @@  static inline void copy_highpage(struct page *to, struct page *from)
 #endif
 
 #ifdef copy_mc_to_kernel
+#ifndef __HAVE_ARCH_COPY_MC_USER_HIGHPAGE
 /*
  * If architecture supports machine check exception handling, define the
  * #MC versions of copy_user_highpage and copy_highpage. They copy a memory
@@ -351,7 +352,9 @@  static inline int copy_mc_user_highpage(struct page *to, struct page *from,
 
 	return ret ? -EFAULT : 0;
 }
+#endif
 
+#ifndef __HAVE_ARCH_COPY_MC_HIGHPAGE
 static inline int copy_mc_highpage(struct page *to, struct page *from)
 {
 	unsigned long ret;
@@ -370,20 +373,25 @@  static inline int copy_mc_highpage(struct page *to, struct page *from)
 
 	return ret ? -EFAULT : 0;
 }
+#endif
 #else
+#ifndef __HAVE_ARCH_COPY_MC_USER_HIGHPAGE
 static inline int copy_mc_user_highpage(struct page *to, struct page *from,
 					unsigned long vaddr, struct vm_area_struct *vma)
 {
 	copy_user_highpage(to, from, vaddr, vma);
 	return 0;
 }
+#endif
 
+#ifndef __HAVE_ARCH_COPY_MC_HIGHPAGE
 static inline int copy_mc_highpage(struct page *to, struct page *from)
 {
 	copy_highpage(to, from);
 	return 0;
 }
 #endif
+#endif
 
 static inline void memcpy_page(struct page *dst_page, size_t dst_off,
 			       struct page *src_page, size_t src_off,