diff mbox series

[RFC] io_uring: Fix io_uring_mmu_get_unmapped_area() for IA-64

Message ID ZLhTuTPecx2fGuH1@p100 (mailing list archive)
State New
Headers show
Series [RFC] io_uring: Fix io_uring_mmu_get_unmapped_area() for IA-64 | expand

Commit Message

Helge Deller July 19, 2023, 9:20 p.m. UTC
The io_uring testcase is broken on IA-64 since commit d808459b2e31
("io_uring: Adjust mapping wrt architecture aliasing requirements").

The reason is, that this commit introduced an own architecture
independend get_unmapped_area() search algorithm which doesn't suite the
memory region requirements for IA-64.

To avoid similar problems in the future it's better to switch back to
the architecture-provided get_unmapped_area() function and adjust the
needed input parameters before the call.  Additionally
io_uring_mmu_get_unmapped_area() will now become easier to understand
and maintain.

This patch has been tested on physical IA-64 and PA-RISC machines,
without any failures in the io_uring testcases. On PA-RISC the
LTP mmmap testcases did not report any regressions either.

I don't expect issues for other architectures, but it would be nice if
this patch could be tested on other machines too.

Reported-by: matoro <matoro_mailinglist_kernel@matoro.tk>
Fixes: d808459b2e31 ("io_uring: Adjust mapping wrt architecture aliasing requirements")
Signed-off-by: Helge Deller <deller@gmx.de>

Comments

matoro July 20, 2023, 12:14 a.m. UTC | #1
On 2023-07-19 17:20, Helge Deller wrote:
> The io_uring testcase is broken on IA-64 since commit d808459b2e31
> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
> 
> The reason is, that this commit introduced an own architecture
> independend get_unmapped_area() search algorithm which doesn't suite 
> the
> memory region requirements for IA-64.
> 
> To avoid similar problems in the future it's better to switch back to
> the architecture-provided get_unmapped_area() function and adjust the
> needed input parameters before the call.  Additionally
> io_uring_mmu_get_unmapped_area() will now become easier to understand
> and maintain.
> 
> This patch has been tested on physical IA-64 and PA-RISC machines,
> without any failures in the io_uring testcases. On PA-RISC the
> LTP mmmap testcases did not report any regressions either.
> 
> I don't expect issues for other architectures, but it would be nice if
> this patch could be tested on other machines too.
> 
> Reported-by: matoro <matoro_mailinglist_kernel@matoro.tk>
> Fixes: d808459b2e31 ("io_uring: Adjust mapping wrt architecture 
> aliasing requirements")
> Signed-off-by: Helge Deller <deller@gmx.de>
> 
> diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
> index 6e948d015332..eb561cc93632 100644
> --- a/arch/ia64/kernel/sys_ia64.c
> +++ b/arch/ia64/kernel/sys_ia64.c
> @@ -63,7 +63,7 @@ arch_get_unmapped_area (struct file *filp, unsigned 
> long addr, unsigned long len
>  	info.low_limit = addr;
>  	info.high_limit = TASK_SIZE;
>  	info.align_mask = align_mask;
> -	info.align_offset = 0;
> +	info.align_offset = pgoff << PAGE_SHIFT;
>  	return vm_unmapped_area(&info);
>  }
> 
> diff --git a/arch/parisc/kernel/sys_parisc.c 
> b/arch/parisc/kernel/sys_parisc.c
> index 39acccabf2ed..465b7cb9d44f 100644
> --- a/arch/parisc/kernel/sys_parisc.c
> +++ b/arch/parisc/kernel/sys_parisc.c
> @@ -26,12 +26,17 @@
>  #include <linux/compat.h>
> 
>  /*
> - * Construct an artificial page offset for the mapping based on the 
> physical
> + * Construct an artificial page offset for the mapping based on the 
> virtual
>   * address of the kernel file mapping variable.
> + * If filp is zero the calculated pgoff value aliases the memory of 
> the given
> + * address. This is useful for io_uring where the mapping shall alias 
> a kernel
> + * address and a userspace adress where both the kernel and the 
> userspace
> + * access the same memory region.
>   */
> -#define GET_FILP_PGOFF(filp)		\
> -	(filp ? (((unsigned long) filp->f_mapping) >> 8)	\
> -		 & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)
> +#define GET_FILP_PGOFF(filp, addr)		\
> +	((filp ? (((unsigned long) filp->f_mapping) >> 8)	\
> +		 & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)	\
> +	  + (addr >> PAGE_SHIFT))
> 
>  static unsigned long shared_align_offset(unsigned long filp_pgoff,
>  					 unsigned long pgoff)
> @@ -111,7 +116,7 @@ static unsigned long 
> arch_get_unmapped_area_common(struct file *filp,
>  	do_color_align = 0;
>  	if (filp || (flags & MAP_SHARED))
>  		do_color_align = 1;
> -	filp_pgoff = GET_FILP_PGOFF(filp);
> +	filp_pgoff = GET_FILP_PGOFF(filp, addr);
> 
>  	if (flags & MAP_FIXED) {
>  		/* Even MAP_FIXED mappings must reside within TASK_SIZE */
> diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
> index f1b79959d1c1..70eb01faf15f 100644
> --- a/io_uring/io_uring.c
> +++ b/io_uring/io_uring.c
> @@ -3425,8 +3425,6 @@ static unsigned long 
> io_uring_mmu_get_unmapped_area(struct file *filp,
>  			unsigned long addr, unsigned long len,
>  			unsigned long pgoff, unsigned long flags)
>  {
> -	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
> -	struct vm_unmapped_area_info info;
>  	void *ptr;
> 
>  	/*
> @@ -3441,32 +3439,26 @@ static unsigned long 
> io_uring_mmu_get_unmapped_area(struct file *filp,
>  	if (IS_ERR(ptr))
>  		return -ENOMEM;
> 
> -	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
> -	info.length = len;
> -	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
> -	info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
> +	/*
> +	 * Some architectures have strong cache aliasing requirements.
> +	 * For such architectures we need a coherent mapping which aliases
> +	 * kernel memory *and* userspace memory. To achieve that:
> +	 * - use a NULL file pointer to reference physical memory, and
> +	 * - use the kernel virtual address of the shared io_uring context
> +	 *   (instead of the userspace-provided address, which has to be 0UL
> +	 *   anyway).
> +	 * For architectures without such aliasing requirements, the
> +	 * architecture will return any suitable mapping because addr is 0.
> +	 */
> +	filp = NULL;
> +	flags |= MAP_SHARED;
> +	pgoff = 0;	/* has been translated to ptr above */
>  #ifdef SHM_COLOUR
> -	info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
> +	addr = (uintptr_t) ptr;
>  #else
> -	info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
> +	addr = 0UL;
>  #endif
> -	info.align_offset = (unsigned long) ptr;
> -
> -	/*
> -	 * A failed mmap() very likely causes application failure,
> -	 * so fall back to the bottom-up function here. This scenario
> -	 * can happen with large stack limits and large mmap()
> -	 * allocations.
> -	 */
> -	addr = vm_unmapped_area(&info);
> -	if (offset_in_page(addr)) {
> -		info.flags = 0;
> -		info.low_limit = TASK_UNMAPPED_BASE;
> -		info.high_limit = mmap_end;
> -		addr = vm_unmapped_area(&info);
> -	}
> -
> -	return addr;
> +	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
>  }
> 
>  #else /* !CONFIG_MMU */

Tested-by: matoro <matoro_mailinglist_kernel@matoro.tk>

On 6.4.4.  The NFS thing I mentioned in our previous emails appears to 
be a separate regression not related to io_uring.
Jens Axboe July 20, 2023, 10:30 p.m. UTC | #2
On 7/19/23 3:20?PM, Helge Deller wrote:
> The io_uring testcase is broken on IA-64 since commit d808459b2e31
> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
> 
> The reason is, that this commit introduced an own architecture
> independend get_unmapped_area() search algorithm which doesn't suite the
> memory region requirements for IA-64.
> 
> To avoid similar problems in the future it's better to switch back to
> the architecture-provided get_unmapped_area() function and adjust the
> needed input parameters before the call.  Additionally
> io_uring_mmu_get_unmapped_area() will now become easier to understand
> and maintain.
> 
> This patch has been tested on physical IA-64 and PA-RISC machines,
> without any failures in the io_uring testcases. On PA-RISC the
> LTP mmmap testcases did not report any regressions either.
> 
> I don't expect issues for other architectures, but it would be nice if
> this patch could be tested on other machines too.

Any comments from the IA64 folks?

Helge, should this be split into three patches? One for hppa, one for
ia64, and then the io_uring one?
Helge Deller July 20, 2023, 10:38 p.m. UTC | #3
On 7/21/23 00:30, Jens Axboe wrote:
> On 7/19/23 3:20?PM, Helge Deller wrote:
>> The io_uring testcase is broken on IA-64 since commit d808459b2e31
>> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
>>
>> The reason is, that this commit introduced an own architecture
>> independend get_unmapped_area() search algorithm which doesn't suite the
>> memory region requirements for IA-64.
>>
>> To avoid similar problems in the future it's better to switch back to
>> the architecture-provided get_unmapped_area() function and adjust the
>> needed input parameters before the call.  Additionally
>> io_uring_mmu_get_unmapped_area() will now become easier to understand
>> and maintain.
>>
>> This patch has been tested on physical IA-64 and PA-RISC machines,
>> without any failures in the io_uring testcases. On PA-RISC the
>> LTP mmmap testcases did not report any regressions either.
>>
>> I don't expect issues for other architectures, but it would be nice if
>> this patch could be tested on other machines too.
>
> Any comments from the IA64 folks?

matoro tested it on ia64 at least...

> Helge, should this be split into three patches? One for hppa, one for
> ia64, and then the io_uring one?

If we split up, I would prefer to split it into 2 patches:
One for io_uring together with the hppa patch, since they should go in together.

The ia64 patch is probably unrelated, and can go seperately. But
there doesn't seem to be any ia64 maintainer...?

Do you have a chance to test this patch on the other io_uring platforms,
without applying it into your tree? I think some testing would be good.

Helge
matoro July 20, 2023, 10:39 p.m. UTC | #4
On 2023-07-20 18:38, Helge Deller wrote:
> On 7/21/23 00:30, Jens Axboe wrote:
>> On 7/19/23 3:20?PM, Helge Deller wrote:
>>> The io_uring testcase is broken on IA-64 since commit d808459b2e31
>>> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
>>> 
>>> The reason is, that this commit introduced an own architecture
>>> independend get_unmapped_area() search algorithm which doesn't suite 
>>> the
>>> memory region requirements for IA-64.
>>> 
>>> To avoid similar problems in the future it's better to switch back to
>>> the architecture-provided get_unmapped_area() function and adjust the
>>> needed input parameters before the call.  Additionally
>>> io_uring_mmu_get_unmapped_area() will now become easier to understand
>>> and maintain.
>>> 
>>> This patch has been tested on physical IA-64 and PA-RISC machines,
>>> without any failures in the io_uring testcases. On PA-RISC the
>>> LTP mmmap testcases did not report any regressions either.
>>> 
>>> I don't expect issues for other architectures, but it would be nice 
>>> if
>>> this patch could be tested on other machines too.
>> 
>> Any comments from the IA64 folks?
> 
> matoro tested it on ia64 at least...
> 
>> Helge, should this be split into three patches? One for hppa, one for
>> ia64, and then the io_uring one?
> 
> If we split up, I would prefer to split it into 2 patches:
> One for io_uring together with the hppa patch, since they should go in 
> together.
> 
> The ia64 patch is probably unrelated, and can go seperately. But
> there doesn't seem to be any ia64 maintainer...?
> 
> Do you have a chance to test this patch on the other io_uring 
> platforms,
> without applying it into your tree? I think some testing would be good.
> 
> Helge

I tested this on ppc64le last night just to be sure, still had 100% pass 
on test suite.
Jens Axboe July 20, 2023, 10:43 p.m. UTC | #5
On 7/20/23 4:38?PM, Helge Deller wrote:
> On 7/21/23 00:30, Jens Axboe wrote:
>> On 7/19/23 3:20?PM, Helge Deller wrote:
>>> The io_uring testcase is broken on IA-64 since commit d808459b2e31
>>> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
>>>
>>> The reason is, that this commit introduced an own architecture
>>> independend get_unmapped_area() search algorithm which doesn't suite the
>>> memory region requirements for IA-64.
>>>
>>> To avoid similar problems in the future it's better to switch back to
>>> the architecture-provided get_unmapped_area() function and adjust the
>>> needed input parameters before the call.  Additionally
>>> io_uring_mmu_get_unmapped_area() will now become easier to understand
>>> and maintain.
>>>
>>> This patch has been tested on physical IA-64 and PA-RISC machines,
>>> without any failures in the io_uring testcases. On PA-RISC the
>>> LTP mmmap testcases did not report any regressions either.
>>>
>>> I don't expect issues for other architectures, but it would be nice if
>>> this patch could be tested on other machines too.
>>
>> Any comments from the IA64 folks?
> 
> matoro tested it on ia64 at least...
> 
>> Helge, should this be split into three patches? One for hppa, one for
>> ia64, and then the io_uring one?
> 
> If we split up, I would prefer to split it into 2 patches: One for
> io_uring together with the hppa patch, since they should go in
> together.

OK, that makes sense. Want to spin a new version done like that?

> The ia64 patch is probably unrelated, and can go seperately. But there
> doesn't seem to be any ia64 maintainer...?

Yeah, not too worried about IA64, and matoro having tested it is great.

> Do you have a chance to test this patch on the other io_uring
> platforms, without applying it into your tree? I think some testing
> would be good.

Yep, I can run it on arm64 and x86-64 pretty easily. Will do so.
Jens Axboe July 20, 2023, 10:43 p.m. UTC | #6
On 7/20/23 4:39?PM, matoro wrote:
> On 2023-07-20 18:38, Helge Deller wrote:
>> On 7/21/23 00:30, Jens Axboe wrote:
>>> On 7/19/23 3:20?PM, Helge Deller wrote:
>>>> The io_uring testcase is broken on IA-64 since commit d808459b2e31
>>>> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
>>>>
>>>> The reason is, that this commit introduced an own architecture
>>>> independend get_unmapped_area() search algorithm which doesn't suite the
>>>> memory region requirements for IA-64.
>>>>
>>>> To avoid similar problems in the future it's better to switch back to
>>>> the architecture-provided get_unmapped_area() function and adjust the
>>>> needed input parameters before the call.  Additionally
>>>> io_uring_mmu_get_unmapped_area() will now become easier to understand
>>>> and maintain.
>>>>
>>>> This patch has been tested on physical IA-64 and PA-RISC machines,
>>>> without any failures in the io_uring testcases. On PA-RISC the
>>>> LTP mmmap testcases did not report any regressions either.
>>>>
>>>> I don't expect issues for other architectures, but it would be nice if
>>>> this patch could be tested on other machines too.
>>>
>>> Any comments from the IA64 folks?
>>
>> matoro tested it on ia64 at least...
>>
>>> Helge, should this be split into three patches? One for hppa, one for
>>> ia64, and then the io_uring one?
>>
>> If we split up, I would prefer to split it into 2 patches: One for
>> io_uring together with the hppa patch, since they should go in
>> together.
>>
>> The ia64 patch is probably unrelated, and can go seperately. But
>> there doesn't seem to be any ia64 maintainer...?
>>
>> Do you have a chance to test this patch on the other io_uring
>> platforms, without applying it into your tree? I think some testing
>> would be good.
>>
>> Helge
>
> 
> I tested this on ppc64le last night just to be sure, still had 100%
> pass on test suite.

Excellent, thanks.
Jens Axboe July 20, 2023, 10:57 p.m. UTC | #7
On 7/20/23 4:43?PM, Jens Axboe wrote:
>> Do you have a chance to test this patch on the other io_uring
>> platforms, without applying it into your tree? I think some testing
>> would be good.
> 
> Yep, I can run it on arm64 and x86-64 pretty easily. Will do so.

Works on both of those as well.
Helge Deller July 20, 2023, 11:01 p.m. UTC | #8
On 7/21/23 00:43, Jens Axboe wrote:
> On 7/20/23 4:38?PM, Helge Deller wrote:
>> On 7/21/23 00:30, Jens Axboe wrote:
>>> On 7/19/23 3:20?PM, Helge Deller wrote:
>>>> The io_uring testcase is broken on IA-64 since commit d808459b2e31
>>>> ("io_uring: Adjust mapping wrt architecture aliasing requirements").
>>>>
>>>> The reason is, that this commit introduced an own architecture
>>>> independend get_unmapped_area() search algorithm which doesn't suite the
>>>> memory region requirements for IA-64.
>>>>
>>>> To avoid similar problems in the future it's better to switch back to
>>>> the architecture-provided get_unmapped_area() function and adjust the
>>>> needed input parameters before the call.  Additionally
>>>> io_uring_mmu_get_unmapped_area() will now become easier to understand
>>>> and maintain.
>>>>
>>>> This patch has been tested on physical IA-64 and PA-RISC machines,
>>>> without any failures in the io_uring testcases. On PA-RISC the
>>>> LTP mmmap testcases did not report any regressions either.
>>>>
>>>> I don't expect issues for other architectures, but it would be nice if
>>>> this patch could be tested on other machines too.
>>>
>>> Any comments from the IA64 folks?
>>
>> matoro tested it on ia64 at least...
>>
>>> Helge, should this be split into three patches? One for hppa, one for
>>> ia64, and then the io_uring one?
>>
>> If we split up, I would prefer to split it into 2 patches: One for
>> io_uring together with the hppa patch, since they should go in
>> together.
>
> OK, that makes sense. Want to spin a new version done like that?

Yes.
I'll send it tomorrow.
(now that we know it works on the major platforms)

Helge
diff mbox series

Patch

diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 6e948d015332..eb561cc93632 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -63,7 +63,7 @@  arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
 	info.low_limit = addr;
 	info.high_limit = TASK_SIZE;
 	info.align_mask = align_mask;
-	info.align_offset = 0;
+	info.align_offset = pgoff << PAGE_SHIFT;
 	return vm_unmapped_area(&info);
 }

diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 39acccabf2ed..465b7cb9d44f 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -26,12 +26,17 @@ 
 #include <linux/compat.h>

 /*
- * Construct an artificial page offset for the mapping based on the physical
+ * Construct an artificial page offset for the mapping based on the virtual
  * address of the kernel file mapping variable.
+ * If filp is zero the calculated pgoff value aliases the memory of the given
+ * address. This is useful for io_uring where the mapping shall alias a kernel
+ * address and a userspace adress where both the kernel and the userspace
+ * access the same memory region.
  */
-#define GET_FILP_PGOFF(filp)		\
-	(filp ? (((unsigned long) filp->f_mapping) >> 8)	\
-		 & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)
+#define GET_FILP_PGOFF(filp, addr)		\
+	((filp ? (((unsigned long) filp->f_mapping) >> 8)	\
+		 & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL)	\
+	  + (addr >> PAGE_SHIFT))

 static unsigned long shared_align_offset(unsigned long filp_pgoff,
 					 unsigned long pgoff)
@@ -111,7 +116,7 @@  static unsigned long arch_get_unmapped_area_common(struct file *filp,
 	do_color_align = 0;
 	if (filp || (flags & MAP_SHARED))
 		do_color_align = 1;
-	filp_pgoff = GET_FILP_PGOFF(filp);
+	filp_pgoff = GET_FILP_PGOFF(filp, addr);

 	if (flags & MAP_FIXED) {
 		/* Even MAP_FIXED mappings must reside within TASK_SIZE */
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index f1b79959d1c1..70eb01faf15f 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -3425,8 +3425,6 @@  static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
 			unsigned long addr, unsigned long len,
 			unsigned long pgoff, unsigned long flags)
 {
-	const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
-	struct vm_unmapped_area_info info;
 	void *ptr;

 	/*
@@ -3441,32 +3439,26 @@  static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
 	if (IS_ERR(ptr))
 		return -ENOMEM;

-	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-	info.length = len;
-	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-	info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+	/*
+	 * Some architectures have strong cache aliasing requirements.
+	 * For such architectures we need a coherent mapping which aliases
+	 * kernel memory *and* userspace memory. To achieve that:
+	 * - use a NULL file pointer to reference physical memory, and
+	 * - use the kernel virtual address of the shared io_uring context
+	 *   (instead of the userspace-provided address, which has to be 0UL
+	 *   anyway).
+	 * For architectures without such aliasing requirements, the
+	 * architecture will return any suitable mapping because addr is 0.
+	 */
+	filp = NULL;
+	flags |= MAP_SHARED;
+	pgoff = 0;	/* has been translated to ptr above */
 #ifdef SHM_COLOUR
-	info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
+	addr = (uintptr_t) ptr;
 #else
-	info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
+	addr = 0UL;
 #endif
-	info.align_offset = (unsigned long) ptr;
-
-	/*
-	 * A failed mmap() very likely causes application failure,
-	 * so fall back to the bottom-up function here. This scenario
-	 * can happen with large stack limits and large mmap()
-	 * allocations.
-	 */
-	addr = vm_unmapped_area(&info);
-	if (offset_in_page(addr)) {
-		info.flags = 0;
-		info.low_limit = TASK_UNMAPPED_BASE;
-		info.high_limit = mmap_end;
-		addr = vm_unmapped_area(&info);
-	}
-
-	return addr;
+	return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
 }

 #else /* !CONFIG_MMU */