diff mbox series

[PATCHv5,06/13] x86/mm: Provide ARCH_GET_UNTAG_MASK and ARCH_ENABLE_TAGGED_ADDR

Message ID 20220712231328.5294-7-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series Linear Address Masking enabling | expand

Commit Message

Kirill A. Shutemov July 12, 2022, 11:13 p.m. UTC
Add a couple of arch_prctl() handles:

 - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
   of tag bits. It is rounded up to the nearest LAM mode that can
   provide it. For now only LAM_U57 is supported, with 6 tag bits.

 - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
   bits located in the address.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 arch/x86/include/uapi/asm/prctl.h |  3 ++
 arch/x86/kernel/process_64.c      | 60 ++++++++++++++++++++++++++++++-
 2 files changed, 62 insertions(+), 1 deletion(-)

Comments

Alexander Potapenko July 18, 2022, 5:47 p.m. UTC | #1
On Wed, Jul 13, 2022 at 1:13 AM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Add a couple of arch_prctl() handles:
>
>  - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
>    of tag bits. It is rounded up to the nearest LAM mode that can
>    provide it. For now only LAM_U57 is supported, with 6 tag bits.
>
>  - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
>    bits located in the address.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> ---
>  arch/x86/include/uapi/asm/prctl.h |  3 ++
>  arch/x86/kernel/process_64.c      | 60 ++++++++++++++++++++++++++++++-
>  2 files changed, 62 insertions(+), 1 deletion(-)


> +
> +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
> +{
> +       int ret = 0;
> +
> +       if (!cpu_feature_enabled(X86_FEATURE_LAM))
> +               return -ENODEV;

Hm, I used to think ENODEV is specific to devices, and -EINVAL is more
appropriate here.
On the other hand, e.g. prctl(PR_SET_SPECULATION_CTRL) can also return ENODEV...


>  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
>  {
>         int ret = 0;
> @@ -829,7 +883,11 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
>         case ARCH_MAP_VDSO_64:
>                 return prctl_map_vdso(&vdso_image_64, arg2);
>  #endif
> -
> +       case ARCH_GET_UNTAG_MASK:
> +               return put_user(task->mm->context.untag_mask,
> +                               (unsigned long __user *)arg2);

Can we have ARCH_GET_UNTAG_MASK return the same error value (ENODEV or
EINVAL) as ARCH_ENABLE_TAGGED_ADDR in the case the host doesn't
support LAM?
After all, the mask does not make much sense in this case.

> +       case ARCH_ENABLE_TAGGED_ADDR:
> +               return prctl_enable_tagged_addr(task->mm, arg2);
>         default:
>                 ret = -EINVAL;
>                 break;
> --
> 2.35.1
>
Kirill A. Shutemov July 20, 2022, 12:57 a.m. UTC | #2
On Mon, Jul 18, 2022 at 07:47:44PM +0200, Alexander Potapenko wrote:
> On Wed, Jul 13, 2022 at 1:13 AM Kirill A. Shutemov
> <kirill.shutemov@linux.intel.com> wrote:
> >
> > Add a couple of arch_prctl() handles:
> >
> >  - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
> >    of tag bits. It is rounded up to the nearest LAM mode that can
> >    provide it. For now only LAM_U57 is supported, with 6 tag bits.
> >
> >  - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
> >    bits located in the address.
> >
> > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > ---
> >  arch/x86/include/uapi/asm/prctl.h |  3 ++
> >  arch/x86/kernel/process_64.c      | 60 ++++++++++++++++++++++++++++++-
> >  2 files changed, 62 insertions(+), 1 deletion(-)
> 
> 
> > +
> > +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
> > +{
> > +       int ret = 0;
> > +
> > +       if (!cpu_feature_enabled(X86_FEATURE_LAM))
> > +               return -ENODEV;
> 
> Hm, I used to think ENODEV is specific to devices, and -EINVAL is more
> appropriate here.
> On the other hand, e.g. prctl(PR_SET_SPECULATION_CTRL) can also return ENODEV...

I'm fine either way. Although there are way too many -EINVALs around, so
it does not communicate much to user.

> >  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> >  {
> >         int ret = 0;
> > @@ -829,7 +883,11 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> >         case ARCH_MAP_VDSO_64:
> >                 return prctl_map_vdso(&vdso_image_64, arg2);
> >  #endif
> > -
> > +       case ARCH_GET_UNTAG_MASK:
> > +               return put_user(task->mm->context.untag_mask,
> > +                               (unsigned long __user *)arg2);
> 
> Can we have ARCH_GET_UNTAG_MASK return the same error value (ENODEV or
> EINVAL) as ARCH_ENABLE_TAGGED_ADDR in the case the host doesn't
> support LAM?
> After all, the mask does not make much sense in this case.

I'm not sure about this.

As it is ARCH_GET_UNTAG_MASK returns -1UL mask if LAM is not present or
not enabled. Applying this mask will give correct result for both.

Why is -ENODEV better here? Looks like just more work for userspace.

> 
> > +       case ARCH_ENABLE_TAGGED_ADDR:
> > +               return prctl_enable_tagged_addr(task->mm, arg2);
> >         default:
> >                 ret = -EINVAL;
> >                 break;
> > --
> > 2.35.1
> >
> 
> 
> -- 
> Alexander Potapenko
> Software Engineer
> 
> Google Germany GmbH
> Erika-Mann-Straße, 33
> 80636 München
> 
> Geschäftsführer: Paul Manicle, Liana Sebastian
> Registergericht und -nummer: Hamburg, HRB 86891
> Sitz der Gesellschaft: Hamburg
Alexander Potapenko July 20, 2022, 8:19 a.m. UTC | #3
On Wed, Jul 20, 2022 at 2:57 AM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> On Mon, Jul 18, 2022 at 07:47:44PM +0200, Alexander Potapenko wrote:
> > On Wed, Jul 13, 2022 at 1:13 AM Kirill A. Shutemov
> > <kirill.shutemov@linux.intel.com> wrote:
> > >
> > > Add a couple of arch_prctl() handles:
> > >
> > >  - ARCH_ENABLE_TAGGED_ADDR enabled LAM. The argument is required number
> > >    of tag bits. It is rounded up to the nearest LAM mode that can
> > >    provide it. For now only LAM_U57 is supported, with 6 tag bits.
> > >
> > >  - ARCH_GET_UNTAG_MASK returns untag mask. It can indicates where tag
> > >    bits located in the address.
> > >
> > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
> > > ---
> > >  arch/x86/include/uapi/asm/prctl.h |  3 ++
> > >  arch/x86/kernel/process_64.c      | 60 ++++++++++++++++++++++++++++++-
> > >  2 files changed, 62 insertions(+), 1 deletion(-)
> >
> >
> > > +
> > > +static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
> > > +{
> > > +       int ret = 0;
> > > +
> > > +       if (!cpu_feature_enabled(X86_FEATURE_LAM))
> > > +               return -ENODEV;
> >
> > Hm, I used to think ENODEV is specific to devices, and -EINVAL is more
> > appropriate here.
> > On the other hand, e.g. prctl(PR_SET_SPECULATION_CTRL) can also return ENODEV...
>
> I'm fine either way. Although there are way too many -EINVALs around, so
> it does not communicate much to user.
>
> > >  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> > >  {
> > >         int ret = 0;
> > > @@ -829,7 +883,11 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> > >         case ARCH_MAP_VDSO_64:
> > >                 return prctl_map_vdso(&vdso_image_64, arg2);
> > >  #endif
> > > -
> > > +       case ARCH_GET_UNTAG_MASK:
> > > +               return put_user(task->mm->context.untag_mask,
> > > +                               (unsigned long __user *)arg2);
> >
> > Can we have ARCH_GET_UNTAG_MASK return the same error value (ENODEV or
> > EINVAL) as ARCH_ENABLE_TAGGED_ADDR in the case the host doesn't
> > support LAM?
> > After all, the mask does not make much sense in this case.
>
> I'm not sure about this.
>
> As it is ARCH_GET_UNTAG_MASK returns -1UL mask if LAM is not present or
> not enabled. Applying this mask will give correct result for both.

Is anyone going to use this mask if tagging is unsupported?
Tools like HWASan won't even try to proceed in that case.

> Why is -ENODEV better here? Looks like just more work for userspace.

This boils down to the question of detecting LAM support I raised previously.
It's nice to have a syscall without side effects to check whether LAM
can be enabled at all (e.g. one can do the check in the parent process
and conditionally enable LAM in certain, but not all, child processes)
CPUID won't help here, because the presence of the LAM bit in CPUID
doesn't guarantee its support in the kernel, and every other solution
is more complicated than just issuing a system call.

Note that TBI has PR_GET_TAGGED_ADDR_CTRL, which can be used to detect
the presence of memory tagging support.

>
> >
> > > +       case ARCH_ENABLE_TAGGED_ADDR:
> > > +               return prctl_enable_tagged_addr(task->mm, arg2);
> > >         default:
> > >                 ret = -EINVAL;
> > >                 break;
> > > --
> > > 2.35.1
> > >
> >
> >
> > --
> > Alexander Potapenko
> > Software Engineer
> >
> > Google Germany GmbH
> > Erika-Mann-Straße, 33
> > 80636 München
> >
> > Geschäftsführer: Paul Manicle, Liana Sebastian
> > Registergericht und -nummer: Hamburg, HRB 86891
> > Sitz der Gesellschaft: Hamburg
>
> --
>  Kirill A. Shutemov
Kirill A. Shutemov July 20, 2022, 12:47 p.m. UTC | #4
On Wed, Jul 20, 2022 at 10:19:36AM +0200, Alexander Potapenko wrote:
> > > >  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> > > >  {
> > > >         int ret = 0;
> > > > @@ -829,7 +883,11 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> > > >         case ARCH_MAP_VDSO_64:
> > > >                 return prctl_map_vdso(&vdso_image_64, arg2);
> > > >  #endif
> > > > -
> > > > +       case ARCH_GET_UNTAG_MASK:
> > > > +               return put_user(task->mm->context.untag_mask,
> > > > +                               (unsigned long __user *)arg2);
> > >
> > > Can we have ARCH_GET_UNTAG_MASK return the same error value (ENODEV or
> > > EINVAL) as ARCH_ENABLE_TAGGED_ADDR in the case the host doesn't
> > > support LAM?
> > > After all, the mask does not make much sense in this case.
> >
> > I'm not sure about this.
> >
> > As it is ARCH_GET_UNTAG_MASK returns -1UL mask if LAM is not present or
> > not enabled. Applying this mask will give correct result for both.
> 
> Is anyone going to use this mask if tagging is unsupported?
> Tools like HWASan won't even try to proceed in that case.

I can imagine the code that tries to be indifferent to whether a pointer
has tags. It gets mask from ARCH_GET_UNTAG_MASK and applies it to the
pointer without any conditions.

> > Why is -ENODEV better here? Looks like just more work for userspace.
> 
> This boils down to the question of detecting LAM support I raised previously.
> It's nice to have a syscall without side effects to check whether LAM
> can be enabled at all (e.g. one can do the check in the parent process
> and conditionally enable LAM in certain, but not all, child processes)
> CPUID won't help here, because the presence of the LAM bit in CPUID
> doesn't guarantee its support in the kernel, and every other solution
> is more complicated than just issuing a system call.
> 
> Note that TBI has PR_GET_TAGGED_ADDR_CTRL, which can be used to detect
> the presence of memory tagging support.

I would rather make enumeration explicit:

diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 38164a05c23c..a31e27b95b19 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -22,5 +22,6 @@

 #define ARCH_GET_UNTAG_MASK		0x4001
 #define ARCH_ENABLE_TAGGED_ADDR		0x4002
+#define ARCH_GET_MAX_TAG_BITS		0x4003

 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index cfa2e42a135a..2e4df63b775f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -911,6 +911,13 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 				(unsigned long __user *)arg2);
 	case ARCH_ENABLE_TAGGED_ADDR:
 		return prctl_enable_tagged_addr(task->mm, arg2);
+	case ARCH_GET_MAX_TAG_BITS:
+		if (!cpu_feature_enabled(X86_FEATURE_LAM))
+			return put_user(0, (unsigned long __user *)arg2);
+		else if (lam_u48_allowed())
+			return put_user(15, (unsigned long __user *)arg2);
+		else
+			return put_user(6, (unsigned long __user *)arg2);
 	default:
 		ret = -EINVAL;
 		break;
Alexander Potapenko July 20, 2022, 12:54 p.m. UTC | #5
On Wed, Jul 20, 2022 at 2:47 PM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> On Wed, Jul 20, 2022 at 10:19:36AM +0200, Alexander Potapenko wrote:
> > > > >  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> > > > >  {
> > > > >         int ret = 0;
> > > > > @@ -829,7 +883,11 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
> > > > >         case ARCH_MAP_VDSO_64:
> > > > >                 return prctl_map_vdso(&vdso_image_64, arg2);
> > > > >  #endif
> > > > > -
> > > > > +       case ARCH_GET_UNTAG_MASK:
> > > > > +               return put_user(task->mm->context.untag_mask,
> > > > > +                               (unsigned long __user *)arg2);
> > > >
> > > > Can we have ARCH_GET_UNTAG_MASK return the same error value (ENODEV or
> > > > EINVAL) as ARCH_ENABLE_TAGGED_ADDR in the case the host doesn't
> > > > support LAM?
> > > > After all, the mask does not make much sense in this case.
> > >
> > > I'm not sure about this.
> > >
> > > As it is ARCH_GET_UNTAG_MASK returns -1UL mask if LAM is not present or
> > > not enabled. Applying this mask will give correct result for both.
> >
> > Is anyone going to use this mask if tagging is unsupported?
> > Tools like HWASan won't even try to proceed in that case.
>
> I can imagine the code that tries to be indifferent to whether a pointer
> has tags. It gets mask from ARCH_GET_UNTAG_MASK and applies it to the
> pointer without any conditions.

In that case there would still be just one call to ARCH_GET_UNTAG_MASK
to get the mask that will probably be applied many times.
So there's not a big difference with checking for -ENODEV and setting
that mask manually.
But your proposal with a special arch_prctl indeed looks cleaner.

> > > Why is -ENODEV better here? Looks like just more work for userspace.
> >
> > This boils down to the question of detecting LAM support I raised previously.
> > It's nice to have a syscall without side effects to check whether LAM
> > can be enabled at all (e.g. one can do the check in the parent process
> > and conditionally enable LAM in certain, but not all, child processes)
> > CPUID won't help here, because the presence of the LAM bit in CPUID
> > doesn't guarantee its support in the kernel, and every other solution
> > is more complicated than just issuing a system call.
> >
> > Note that TBI has PR_GET_TAGGED_ADDR_CTRL, which can be used to detect
> > the presence of memory tagging support.
>
> I would rather make enumeration explicit:

Ok, this would also work. Thanks!

> diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
> index 38164a05c23c..a31e27b95b19 100644
> --- a/arch/x86/include/uapi/asm/prctl.h
> +++ b/arch/x86/include/uapi/asm/prctl.h
> @@ -22,5 +22,6 @@
>
>  #define ARCH_GET_UNTAG_MASK            0x4001
>  #define ARCH_ENABLE_TAGGED_ADDR                0x4002
> +#define ARCH_GET_MAX_TAG_BITS          0x4003
>
>  #endif /* _ASM_X86_PRCTL_H */
> diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
> index cfa2e42a135a..2e4df63b775f 100644
> --- a/arch/x86/kernel/process_64.c
> +++ b/arch/x86/kernel/process_64.c
> @@ -911,6 +911,13 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
>                                 (unsigned long __user *)arg2);
>         case ARCH_ENABLE_TAGGED_ADDR:
>                 return prctl_enable_tagged_addr(task->mm, arg2);
> +       case ARCH_GET_MAX_TAG_BITS:
> +               if (!cpu_feature_enabled(X86_FEATURE_LAM))
> +                       return put_user(0, (unsigned long __user *)arg2);
> +               else if (lam_u48_allowed())
> +                       return put_user(15, (unsigned long __user *)arg2);
> +               else
> +                       return put_user(6, (unsigned long __user *)arg2);
>         default:
>                 ret = -EINVAL;
>                 break;
> --
>  Kirill A. Shutemov
diff mbox series

Patch

diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 500b96e71f18..38164a05c23c 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -20,4 +20,7 @@ 
 #define ARCH_MAP_VDSO_32		0x2002
 #define ARCH_MAP_VDSO_64		0x2003
 
+#define ARCH_GET_UNTAG_MASK		0x4001
+#define ARCH_ENABLE_TAGGED_ADDR		0x4002
+
 #endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 1962008fe743..82a19168bfa4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -742,6 +742,60 @@  static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
 }
 #endif
 
+static void enable_lam_func(void *mm)
+{
+	struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+	unsigned long lam_mask;
+	unsigned long cr3;
+
+	if (loaded_mm != mm)
+		return;
+
+	lam_mask = READ_ONCE(loaded_mm->context.lam_cr3_mask);
+
+	/* Update CR3 to get LAM active on the CPU */
+	cr3 = __read_cr3();
+	cr3 &= ~(X86_CR3_LAM_U48 | X86_CR3_LAM_U57);
+	cr3 |= lam_mask;
+	write_cr3(cr3);
+	set_tlbstate_cr3_lam_mask(lam_mask);
+}
+
+static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
+{
+	int ret = 0;
+
+	if (!cpu_feature_enabled(X86_FEATURE_LAM))
+		return -ENODEV;
+
+	mutex_lock(&mm->context.lock);
+
+	/* Already enabled? */
+	if (mm->context.lam_cr3_mask) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (!nr_bits) {
+		ret = -EINVAL;
+		goto out;
+	} else if (nr_bits <= 6) {
+		mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
+		mm->context.untag_mask =  ~GENMASK(62, 57);
+	} else {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Make lam_cr3_mask and untag_mask visible on other CPUs */
+	smp_mb();
+
+	on_each_cpu_mask(mm_cpumask(mm), enable_lam_func, mm, true);
+out:
+	mutex_unlock(&mm->context.lock);
+	return ret;
+}
+
 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 {
 	int ret = 0;
@@ -829,7 +883,11 @@  long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
 	case ARCH_MAP_VDSO_64:
 		return prctl_map_vdso(&vdso_image_64, arg2);
 #endif
-
+	case ARCH_GET_UNTAG_MASK:
+		return put_user(task->mm->context.untag_mask,
+				(unsigned long __user *)arg2);
+	case ARCH_ENABLE_TAGGED_ADDR:
+		return prctl_enable_tagged_addr(task->mm, arg2);
 	default:
 		ret = -EINVAL;
 		break;