Message ID | 20230312112612.31869-12-kirill.shutemov@linux.intel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Linear Address Masking enabling | expand |
On Sun, 12 Mar 2023 at 12:27, Kirill A. Shutemov <kirill.shutemov@linux.intel.com> wrote: > > IOMMU and SVA-capable devices know nothing about LAM and only expect > canonical addresses. An attempt to pass down tagged pointer will lead > to address translation failure. > > By default do not allow to enable both LAM and use SVA in the same > process. > > The new ARCH_FORCE_TAGGED_SVA arch_prctl() overrides the limitation. > By using the arch_prctl() userspace takes responsibility to never pass > tagged address to the device. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> > Reviewed-by: Ashok Raj <ashok.raj@intel.com> > Reviewed-by: Jacob Pan <jacob.jun.pan@linux.intel.com> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > arch/x86/include/asm/mmu.h | 2 ++ > arch/x86/include/asm/mmu_context.h | 6 ++++++ > arch/x86/include/uapi/asm/prctl.h | 1 + > arch/x86/kernel/process_64.c | 7 +++++++ > drivers/iommu/iommu-sva.c | 4 ++++ > include/linux/mmu_context.h | 7 +++++++ > 6 files changed, 27 insertions(+) > > diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h > index e80762e998ce..0da5c227f490 100644 > --- a/arch/x86/include/asm/mmu.h > +++ b/arch/x86/include/asm/mmu.h > @@ -14,6 +14,8 @@ > #define MM_CONTEXT_HAS_VSYSCALL 1 > /* Do not allow changing LAM mode */ > #define MM_CONTEXT_LOCK_LAM 2 > +/* Allow LAM and SVA coexisting */ > +#define MM_CONTEXT_FORCE_TAGGED_SVA 3 > > /* > * x86 has arch-specific MMU state beyond what lives in mm_struct. > diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h > index 06eaaf75d572..4c396e9a384f 100644 > --- a/arch/x86/include/asm/mmu_context.h > +++ b/arch/x86/include/asm/mmu_context.h > @@ -115,6 +115,12 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm) > mm->context.untag_mask = -1UL; > } > > +#define arch_pgtable_dma_compat arch_pgtable_dma_compat > +static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) > +{ > + return !mm_lam_cr3_mask(mm) || > + test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags); > +} > #else > > static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) > diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h > index a31e27b95b19..eb290d89cb32 100644 > --- a/arch/x86/include/uapi/asm/prctl.h > +++ b/arch/x86/include/uapi/asm/prctl.h > @@ -23,5 +23,6 @@ > #define ARCH_GET_UNTAG_MASK 0x4001 > #define ARCH_ENABLE_TAGGED_ADDR 0x4002 > #define ARCH_GET_MAX_TAG_BITS 0x4003 > +#define ARCH_FORCE_TAGGED_SVA 0x4004 > > #endif /* _ASM_X86_PRCTL_H */ > diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c > index 88aae519c8f8..eda826a956df 100644 > --- a/arch/x86/kernel/process_64.c > +++ b/arch/x86/kernel/process_64.c > @@ -756,6 +756,10 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) > if (current->mm != mm) > return -EINVAL; > > + if (mm_valid_pasid(mm) && > + !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) > + return -EINTR; > + > if (mmap_write_lock_killable(mm)) > return -EINTR; > > @@ -878,6 +882,9 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) > (unsigned long __user *)arg2); > case ARCH_ENABLE_TAGGED_ADDR: > return prctl_enable_tagged_addr(task->mm, arg2); > + case ARCH_FORCE_TAGGED_SVA: > + set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); Hi Kirill, ARCH_ENABLE_TAGGED_ADDR checks that task->mm == current->mm, shouldn't ARCH_FORCE_TAGGED_SVA check that as well? Also it looks like currently to enable both LAM and SVA. LAM enabling checks for SVA, but SVA doesn't and both are not mutually exclusive. > + return 0; > case ARCH_GET_MAX_TAG_BITS: > if (!cpu_feature_enabled(X86_FEATURE_LAM)) > return put_user(0, (unsigned long __user *)arg2); > diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c > index 4ee2929f0d7a..dd76a1a09cf7 100644 > --- a/drivers/iommu/iommu-sva.c > +++ b/drivers/iommu/iommu-sva.c > @@ -2,6 +2,7 @@ > /* > * Helpers for IOMMU drivers implementing SVA > */ > +#include <linux/mmu_context.h> > #include <linux/mutex.h> > #include <linux/sched/mm.h> > #include <linux/iommu.h> > @@ -32,6 +33,9 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) > min == 0 || max < min) > return -EINVAL; > > + if (!arch_pgtable_dma_compat(mm)) > + return -EBUSY; > + > mutex_lock(&iommu_sva_lock); > /* Is a PASID already associated with this mm? */ > if (mm_valid_pasid(mm)) { > diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h > index 14b9c1fa05c4..f2b7a3f04099 100644 > --- a/include/linux/mmu_context.h > +++ b/include/linux/mmu_context.h > @@ -35,4 +35,11 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) > } > #endif > > +#ifndef arch_pgtable_dma_compat > +static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) > +{ > + return true; > +} > +#endif > + > #endif > -- > 2.39.2 >
On Mon, Apr 03, 2023 at 08:18:57AM +0200, Dmitry Vyukov wrote: > Hi Kirill, > > ARCH_ENABLE_TAGGED_ADDR checks that task->mm == current->mm, > shouldn't ARCH_FORCE_TAGGED_SVA check that as well? Do you a particular race in mind? I cannot think of anything right away. I guess we can add the check for consistency. But if there's a bug it is a different story. > Also it looks like currently to enable both LAM and SVA. > LAM enabling checks for SVA, but SVA doesn't and both are not mutually > exclusive. For LAM we check SVM with mm_valid_pasid() && !test_bit() in prctl_enable_tagged_addr(). For SVM we check for LAM with !mm_lam_cr3_mask() || test_bit() in arch_pgtable_dma_compat() which called from iommu_sva_alloc_pasid(). Hm?
On Mon, 3 Apr 2023 at 11:44, Kirill A. Shutemov <kirill@shutemov.name> wrote: > > On Mon, Apr 03, 2023 at 08:18:57AM +0200, Dmitry Vyukov wrote: > > Hi Kirill, > > > > ARCH_ENABLE_TAGGED_ADDR checks that task->mm == current->mm, > > shouldn't ARCH_FORCE_TAGGED_SVA check that as well? > > Do you a particular race in mind? I cannot think of anything right away. > > I guess we can add the check for consistency. But if there's a bug it is a > different story. No, I don't have a particular race in mind. Was thinking solely about consistency and if these things should be set for other processes (relaxing the check is always possible in future, but adding new restrictions is generally not possible). > > Also it looks like currently to enable both LAM and SVA. > > LAM enabling checks for SVA, but SVA doesn't and both are not mutually > > exclusive. > > For LAM we check SVM with mm_valid_pasid() && !test_bit() in > prctl_enable_tagged_addr(). > > For SVM we check for LAM with !mm_lam_cr3_mask() || test_bit() in > arch_pgtable_dma_compat() which called from iommu_sva_alloc_pasid(). It seems that currently it's possible to both enable LAM and set SVA bit. Then arch_pgtable_dma_compat() will return true, but LAM is enabled.
On Mon, Apr 03, 2023 at 11:56:48AM +0200, Dmitry Vyukov wrote: > On Mon, 3 Apr 2023 at 11:44, Kirill A. Shutemov <kirill@shutemov.name> wrote: > > > > On Mon, Apr 03, 2023 at 08:18:57AM +0200, Dmitry Vyukov wrote: > > > Hi Kirill, > > > > > > ARCH_ENABLE_TAGGED_ADDR checks that task->mm == current->mm, > > > shouldn't ARCH_FORCE_TAGGED_SVA check that as well? > > > > Do you a particular race in mind? I cannot think of anything right away. > > > > I guess we can add the check for consistency. But if there's a bug it is a > > different story. > > No, I don't have a particular race in mind. Was thinking solely about > consistency and if these things should be set for other processes > (relaxing the check is always possible in future, but adding new > restrictions is generally not possible). Okay. Makes sense. It is only reachable with task != current from ptrace, which is rather obscure path. Anyway, I will prepare a proper patch with this fixup: diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index eda826a956df..4ffd8e67d273 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -883,6 +883,8 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) case ARCH_ENABLE_TAGGED_ADDR: return prctl_enable_tagged_addr(task->mm, arg2); case ARCH_FORCE_TAGGED_SVA: + if (current != task) + return -EINVAL; set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); return 0; case ARCH_GET_MAX_TAG_BITS: > > > Also it looks like currently to enable both LAM and SVA. > > > LAM enabling checks for SVA, but SVA doesn't and both are not mutually > > > exclusive. > > > > For LAM we check SVM with mm_valid_pasid() && !test_bit() in > > prctl_enable_tagged_addr(). > > > > For SVM we check for LAM with !mm_lam_cr3_mask() || test_bit() in > > arch_pgtable_dma_compat() which called from iommu_sva_alloc_pasid(). > > It seems that currently it's possible to both enable LAM and set SVA bit. > Then arch_pgtable_dma_compat() will return true, but LAM is enabled. Right. That's the point of the bit. It allows SVA and LAM to co-exist: The new ARCH_FORCE_TAGGED_SVA arch_prctl() overrides the limitation. By using the arch_prctl() userspace takes responsibility to never pass tagged address to the device. I'm confused.
On Mon, 3 Apr 2023 at 12:17, Kirill A. Shutemov <kirill@shutemov.name> wrote: > > On Mon, Apr 03, 2023 at 11:56:48AM +0200, Dmitry Vyukov wrote: > > On Mon, 3 Apr 2023 at 11:44, Kirill A. Shutemov <kirill@shutemov.name> wrote: > > > > > > On Mon, Apr 03, 2023 at 08:18:57AM +0200, Dmitry Vyukov wrote: > > > > Hi Kirill, > > > > > > > > ARCH_ENABLE_TAGGED_ADDR checks that task->mm == current->mm, > > > > shouldn't ARCH_FORCE_TAGGED_SVA check that as well? > > > > > > Do you a particular race in mind? I cannot think of anything right away. > > > > > > I guess we can add the check for consistency. But if there's a bug it is a > > > different story. > > > > No, I don't have a particular race in mind. Was thinking solely about > > consistency and if these things should be set for other processes > > (relaxing the check is always possible in future, but adding new > > restrictions is generally not possible). > > Okay. Makes sense. > > It is only reachable with task != current from ptrace, which is rather > obscure path. > > Anyway, I will prepare a proper patch with this fixup: > > diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c > index eda826a956df..4ffd8e67d273 100644 > --- a/arch/x86/kernel/process_64.c > +++ b/arch/x86/kernel/process_64.c > @@ -883,6 +883,8 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) > case ARCH_ENABLE_TAGGED_ADDR: > return prctl_enable_tagged_addr(task->mm, arg2); > case ARCH_FORCE_TAGGED_SVA: > + if (current != task) > + return -EINVAL; > set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); > return 0; > case ARCH_GET_MAX_TAG_BITS: > > > > > Also it looks like currently to enable both LAM and SVA. > > > > LAM enabling checks for SVA, but SVA doesn't and both are not mutually > > > > exclusive. > > > > > > For LAM we check SVM with mm_valid_pasid() && !test_bit() in > > > prctl_enable_tagged_addr(). > > > > > > For SVM we check for LAM with !mm_lam_cr3_mask() || test_bit() in > > > arch_pgtable_dma_compat() which called from iommu_sva_alloc_pasid(). > > > > It seems that currently it's possible to both enable LAM and set SVA bit. > > Then arch_pgtable_dma_compat() will return true, but LAM is enabled. > > Right. That's the point of the bit. It allows SVA and LAM to co-exist: > > The new ARCH_FORCE_TAGGED_SVA arch_prctl() overrides the limitation. > By using the arch_prctl() userspace takes responsibility to never pass > tagged address to the device. > > I'm confused. Then I misunderstood what it means. Ignore. While we are here: if (mm_valid_pasid(mm) && !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) return -EINTR; should be EINVAL?
On Mon, Apr 03, 2023 at 12:22:01PM +0200, Dmitry Vyukov wrote: > Then I misunderstood what it means. Ignore. > > While we are here: > > if (mm_valid_pasid(mm) && > !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) > return -EINTR; > > should be EINVAL? Yes. My bad. Will fix.
diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h index e80762e998ce..0da5c227f490 100644 --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -14,6 +14,8 @@ #define MM_CONTEXT_HAS_VSYSCALL 1 /* Do not allow changing LAM mode */ #define MM_CONTEXT_LOCK_LAM 2 +/* Allow LAM and SVA coexisting */ +#define MM_CONTEXT_FORCE_TAGGED_SVA 3 /* * x86 has arch-specific MMU state beyond what lives in mm_struct. diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 06eaaf75d572..4c396e9a384f 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -115,6 +115,12 @@ static inline void mm_reset_untag_mask(struct mm_struct *mm) mm->context.untag_mask = -1UL; } +#define arch_pgtable_dma_compat arch_pgtable_dma_compat +static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) +{ + return !mm_lam_cr3_mask(mm) || + test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags); +} #else static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm) diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h index a31e27b95b19..eb290d89cb32 100644 --- a/arch/x86/include/uapi/asm/prctl.h +++ b/arch/x86/include/uapi/asm/prctl.h @@ -23,5 +23,6 @@ #define ARCH_GET_UNTAG_MASK 0x4001 #define ARCH_ENABLE_TAGGED_ADDR 0x4002 #define ARCH_GET_MAX_TAG_BITS 0x4003 +#define ARCH_FORCE_TAGGED_SVA 0x4004 #endif /* _ASM_X86_PRCTL_H */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 88aae519c8f8..eda826a956df 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -756,6 +756,10 @@ static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits) if (current->mm != mm) return -EINVAL; + if (mm_valid_pasid(mm) && + !test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags)) + return -EINTR; + if (mmap_write_lock_killable(mm)) return -EINTR; @@ -878,6 +882,9 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) (unsigned long __user *)arg2); case ARCH_ENABLE_TAGGED_ADDR: return prctl_enable_tagged_addr(task->mm, arg2); + case ARCH_FORCE_TAGGED_SVA: + set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags); + return 0; case ARCH_GET_MAX_TAG_BITS: if (!cpu_feature_enabled(X86_FEATURE_LAM)) return put_user(0, (unsigned long __user *)arg2); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 4ee2929f0d7a..dd76a1a09cf7 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -2,6 +2,7 @@ /* * Helpers for IOMMU drivers implementing SVA */ +#include <linux/mmu_context.h> #include <linux/mutex.h> #include <linux/sched/mm.h> #include <linux/iommu.h> @@ -32,6 +33,9 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) min == 0 || max < min) return -EINVAL; + if (!arch_pgtable_dma_compat(mm)) + return -EBUSY; + mutex_lock(&iommu_sva_lock); /* Is a PASID already associated with this mm? */ if (mm_valid_pasid(mm)) { diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index 14b9c1fa05c4..f2b7a3f04099 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -35,4 +35,11 @@ static inline unsigned long mm_untag_mask(struct mm_struct *mm) } #endif +#ifndef arch_pgtable_dma_compat +static inline bool arch_pgtable_dma_compat(struct mm_struct *mm) +{ + return true; +} +#endif + #endif