diff mbox series

[PATCHv5.1,04/13] x86/mm: Handle LAM on context switch

Message ID 20220713150200.17080-1-kirill.shutemov@linux.intel.com (mailing list archive)
State New
Headers show
Series None | expand

Commit Message

Kirill A. Shutemov July 13, 2022, 3:02 p.m. UTC
Linear Address Masking mode for userspace pointers encoded in CR3 bits.
The mode is selected per-thread. Add new thread features indicate that the
thread has Linear Address Masking enabled.

switch_mm_irqs_off() now respects these flags and constructs CR3
accordingly.

The active LAM mode gets recorded in the tlb_state.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
---
 v5.1:
  - Fix build issue with CONFIG_MODULE=y
---
 arch/x86/include/asm/mmu.h         |  3 +++
 arch/x86/include/asm/mmu_context.h | 24 +++++++++++++++++
 arch/x86/include/asm/tlbflush.h    | 35 +++++++++++++++++++++++++
 arch/x86/mm/tlb.c                  | 42 +++++++++++++++++++-----------
 4 files changed, 89 insertions(+), 15 deletions(-)

Comments

Alexander Potapenko July 20, 2022, 8:57 a.m. UTC | #1
>         /*
> @@ -491,6 +493,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>  {
>         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
>         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
> +       unsigned long prev_lam = tlbstate_lam_cr3_mask();
Note: this variable is never used if CONFIG_DEBUG_VM is off.

>  #ifdef CONFIG_DEBUG_VM
> -       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
> +       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
>                 /*
>                  * If we were to BUG here, we'd be very likely to kill
>                  * the system so hard that we don't see the call trace.
Kirill A. Shutemov July 20, 2022, 12:38 p.m. UTC | #2
On Wed, Jul 20, 2022 at 10:57:01AM +0200, Alexander Potapenko wrote:
> >         /*
> > @@ -491,6 +493,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
> >  {
> >         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
> >         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
> > +       unsigned long prev_lam = tlbstate_lam_cr3_mask();
> Note: this variable is never used if CONFIG_DEBUG_VM is off.

Good point. I will add this:

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 4c93f87a8928..5e9ed9f55c36 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -558,6 +558,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	if (real_prev == next) {
 		VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
 			   next->context.ctx_id);
+		VM_WARN_ON(prev_lam != new_lam);

 		/*
 		 * Even in lazy TLB mode, the CPU should stay set in the
Alexander Potapenko July 21, 2022, 1:13 p.m. UTC | #3
On Wed, Jul 13, 2022 at 5:02 PM Kirill A. Shutemov
<kirill.shutemov@linux.intel.com> wrote:
>
> Linear Address Masking mode for userspace pointers encoded in CR3 bits.
> The mode is selected per-thread. Add new thread features indicate that the
> thread has Linear Address Masking enabled.
>
> switch_mm_irqs_off() now respects these flags and constructs CR3
> accordingly.
>
> The active LAM mode gets recorded in the tlb_state.
>
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tested-by: Alexander Potapenko <glider@google.com>

> ---
>  v5.1:
>   - Fix build issue with CONFIG_MODULE=y
> ---
>  arch/x86/include/asm/mmu.h         |  3 +++
>  arch/x86/include/asm/mmu_context.h | 24 +++++++++++++++++
>  arch/x86/include/asm/tlbflush.h    | 35 +++++++++++++++++++++++++
>  arch/x86/mm/tlb.c                  | 42 +++++++++++++++++++-----------
>  4 files changed, 89 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
> index 5d7494631ea9..002889ca8978 100644
> --- a/arch/x86/include/asm/mmu.h
> +++ b/arch/x86/include/asm/mmu.h
> @@ -40,6 +40,9 @@ typedef struct {
>
>  #ifdef CONFIG_X86_64
>         unsigned short flags;
> +
> +       /* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
> +       unsigned long lam_cr3_mask;
>  #endif
>
>         struct mutex lock;
> diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
> index b8d40ddeab00..69c943b2ae90 100644
> --- a/arch/x86/include/asm/mmu_context.h
> +++ b/arch/x86/include/asm/mmu_context.h
> @@ -91,6 +91,29 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
>  }
>  #endif
>
> +#ifdef CONFIG_X86_64
> +static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
> +{
> +       return mm->context.lam_cr3_mask;
> +}
> +
> +static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
> +{
> +       mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
> +}
> +
> +#else
> +
> +static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
> +{
> +       return 0;
> +}
> +
> +static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
> +{
> +}
> +#endif
> +
>  #define enter_lazy_tlb enter_lazy_tlb
>  extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
>
> @@ -168,6 +191,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
>  {
>         arch_dup_pkeys(oldmm, mm);
>         paravirt_arch_dup_mmap(oldmm, mm);
> +       dup_lam(oldmm, mm);
>         return ldt_dup_context(oldmm, mm);
>  }
>
> diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
> index 4af5579c7ef7..efe83d33327f 100644
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -100,6 +100,16 @@ struct tlb_state {
>          */
>         bool invalidate_other;
>
> +#ifdef CONFIG_X86_64
> +       /*
> +        * Active LAM mode.
> +        *
> +        * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
> +        * disabled.
> +        */
> +       u8 lam;
> +#endif
> +
>         /*
>          * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
>          * the corresponding user PCID needs a flush next time we
> @@ -356,6 +366,30 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
>  }
>  #define huge_pmd_needs_flush huge_pmd_needs_flush
>
> +#ifdef CONFIG_X86_64
> +static inline unsigned long tlbstate_lam_cr3_mask(void)
> +{
> +       unsigned long lam = this_cpu_read(cpu_tlbstate.lam);
> +
> +       return lam << X86_CR3_LAM_U57_BIT;
> +}
> +
> +static inline void set_tlbstate_cr3_lam_mask(unsigned long mask)
> +{
> +       this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
> +}
> +
> +#else
> +
> +static inline unsigned long tlbstate_lam_cr3_mask(void)
> +{
> +       return 0;
> +}
> +
> +static inline void set_tlbstate_cr3_lam_mask(u64 mask)
> +{
> +}
> +#endif
>  #endif /* !MODULE */
>
>  static inline void __native_tlb_flush_global(unsigned long cr4)
> @@ -363,4 +397,5 @@ static inline void __native_tlb_flush_global(unsigned long cr4)
>         native_write_cr4(cr4 ^ X86_CR4_PGE);
>         native_write_cr4(cr4);
>  }
> +
>  #endif /* _ASM_X86_TLBFLUSH_H */
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index d400b6d9d246..4c93f87a8928 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -154,17 +154,18 @@ static inline u16 user_pcid(u16 asid)
>         return ret;
>  }
>
> -static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
> +static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
>  {
>         if (static_cpu_has(X86_FEATURE_PCID)) {
> -               return __sme_pa(pgd) | kern_pcid(asid);
> +               return __sme_pa(pgd) | kern_pcid(asid) | lam;
>         } else {
>                 VM_WARN_ON_ONCE(asid != 0);
> -               return __sme_pa(pgd);
> +               return __sme_pa(pgd) | lam;
>         }
>  }
>
> -static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
> +static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
> +                                             unsigned long lam)
>  {
>         VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
>         /*
> @@ -173,7 +174,7 @@ static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
>          * boot because all CPU's the have same capabilities:
>          */
>         VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
> -       return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
> +       return __sme_pa(pgd) | kern_pcid(asid) | lam | CR3_NOFLUSH;
>  }
>
>  /*
> @@ -274,15 +275,16 @@ static inline void invalidate_user_asid(u16 asid)
>                   (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
>  }
>
> -static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
> +static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
> +                           bool need_flush)
>  {
>         unsigned long new_mm_cr3;
>
>         if (need_flush) {
>                 invalidate_user_asid(new_asid);
> -               new_mm_cr3 = build_cr3(pgdir, new_asid);
> +               new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
>         } else {
> -               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
> +               new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
>         }
>
>         /*
> @@ -491,6 +493,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>  {
>         struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
>         u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
> +       unsigned long prev_lam = tlbstate_lam_cr3_mask();
> +       unsigned long new_lam = mm_lam_cr3_mask(next);
>         bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
>         unsigned cpu = smp_processor_id();
>         u64 next_tlb_gen;
> @@ -520,7 +524,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>          * isn't free.
>          */
>  #ifdef CONFIG_DEBUG_VM
> -       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
> +       if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
>                 /*
>                  * If we were to BUG here, we'd be very likely to kill
>                  * the system so hard that we don't see the call trace.
> @@ -622,15 +626,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
>                 barrier();
>         }
>
> +       set_tlbstate_cr3_lam_mask(new_lam);
>         if (need_flush) {
>                 this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
>                 this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
> -               load_new_mm_cr3(next->pgd, new_asid, true);
> +               load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
>
>                 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
>         } else {
>                 /* The new ASID is already up to date. */
> -               load_new_mm_cr3(next->pgd, new_asid, false);
> +               load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
>
>                 trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
>         }
> @@ -691,6 +696,10 @@ void initialize_tlbstate_and_flush(void)
>         /* Assert that CR3 already references the right mm. */
>         WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
>
> +       /* LAM expected to be disabled in CR3 and init_mm */
> +       WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
> +       WARN_ON(mm_lam_cr3_mask(&init_mm));
> +
>         /*
>          * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
>          * doesn't work like other CR4 bits because it can only be set from
> @@ -699,8 +708,8 @@ void initialize_tlbstate_and_flush(void)
>         WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
>                 !(cr4_read_shadow() & X86_CR4_PCIDE));
>
> -       /* Force ASID 0 and force a TLB flush. */
> -       write_cr3(build_cr3(mm->pgd, 0));
> +       /* Disable LAM, force ASID 0 and force a TLB flush. */
> +       write_cr3(build_cr3(mm->pgd, 0, 0));
>
>         /* Reinitialize tlbstate. */
>         this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
> @@ -708,6 +717,7 @@ void initialize_tlbstate_and_flush(void)
>         this_cpu_write(cpu_tlbstate.next_asid, 1);
>         this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
>         this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
> +       set_tlbstate_cr3_lam_mask(0);
>
>         for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
>                 this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
> @@ -1047,8 +1057,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>   */
>  unsigned long __get_current_cr3_fast(void)
>  {
> -       unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
> -               this_cpu_read(cpu_tlbstate.loaded_mm_asid));
> +       unsigned long cr3 =
> +               build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
> +               this_cpu_read(cpu_tlbstate.loaded_mm_asid),
> +               tlbstate_lam_cr3_mask());
>
>         /* For now, be very restrictive about when this can be called. */
>         VM_WARN_ON(in_nmi() || preemptible());
> --
> 2.35.1
>
diff mbox series

Patch

diff --git a/arch/x86/include/asm/mmu.h b/arch/x86/include/asm/mmu.h
index 5d7494631ea9..002889ca8978 100644
--- a/arch/x86/include/asm/mmu.h
+++ b/arch/x86/include/asm/mmu.h
@@ -40,6 +40,9 @@  typedef struct {
 
 #ifdef CONFIG_X86_64
 	unsigned short flags;
+
+	/* Active LAM mode:  X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
+	unsigned long lam_cr3_mask;
 #endif
 
 	struct mutex lock;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index b8d40ddeab00..69c943b2ae90 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -91,6 +91,29 @@  static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
 }
 #endif
 
+#ifdef CONFIG_X86_64
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
+	return mm->context.lam_cr3_mask;
+}
+
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+	mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
+}
+
+#else
+
+static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
+{
+	return 0;
+}
+
+static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
+{
+}
+#endif
+
 #define enter_lazy_tlb enter_lazy_tlb
 extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
 
@@ -168,6 +191,7 @@  static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
 {
 	arch_dup_pkeys(oldmm, mm);
 	paravirt_arch_dup_mmap(oldmm, mm);
+	dup_lam(oldmm, mm);
 	return ldt_dup_context(oldmm, mm);
 }
 
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 4af5579c7ef7..efe83d33327f 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -100,6 +100,16 @@  struct tlb_state {
 	 */
 	bool invalidate_other;
 
+#ifdef CONFIG_X86_64
+	/*
+	 * Active LAM mode.
+	 *
+	 * X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
+	 * disabled.
+	 */
+	u8 lam;
+#endif
+
 	/*
 	 * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
 	 * the corresponding user PCID needs a flush next time we
@@ -356,6 +366,30 @@  static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
 }
 #define huge_pmd_needs_flush huge_pmd_needs_flush
 
+#ifdef CONFIG_X86_64
+static inline unsigned long tlbstate_lam_cr3_mask(void)
+{
+	unsigned long lam = this_cpu_read(cpu_tlbstate.lam);
+
+	return lam << X86_CR3_LAM_U57_BIT;
+}
+
+static inline void set_tlbstate_cr3_lam_mask(unsigned long mask)
+{
+	this_cpu_write(cpu_tlbstate.lam, mask >> X86_CR3_LAM_U57_BIT);
+}
+
+#else
+
+static inline unsigned long tlbstate_lam_cr3_mask(void)
+{
+	return 0;
+}
+
+static inline void set_tlbstate_cr3_lam_mask(u64 mask)
+{
+}
+#endif
 #endif /* !MODULE */
 
 static inline void __native_tlb_flush_global(unsigned long cr4)
@@ -363,4 +397,5 @@  static inline void __native_tlb_flush_global(unsigned long cr4)
 	native_write_cr4(cr4 ^ X86_CR4_PGE);
 	native_write_cr4(cr4);
 }
+
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index d400b6d9d246..4c93f87a8928 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -154,17 +154,18 @@  static inline u16 user_pcid(u16 asid)
 	return ret;
 }
 
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
 {
 	if (static_cpu_has(X86_FEATURE_PCID)) {
-		return __sme_pa(pgd) | kern_pcid(asid);
+		return __sme_pa(pgd) | kern_pcid(asid) | lam;
 	} else {
 		VM_WARN_ON_ONCE(asid != 0);
-		return __sme_pa(pgd);
+		return __sme_pa(pgd) | lam;
 	}
 }
 
-static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
+static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
+					      unsigned long lam)
 {
 	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
 	/*
@@ -173,7 +174,7 @@  static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
 	 * boot because all CPU's the have same capabilities:
 	 */
 	VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
-	return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
+	return __sme_pa(pgd) | kern_pcid(asid) | lam | CR3_NOFLUSH;
 }
 
 /*
@@ -274,15 +275,16 @@  static inline void invalidate_user_asid(u16 asid)
 		  (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
 }
 
-static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
+static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
+			    bool need_flush)
 {
 	unsigned long new_mm_cr3;
 
 	if (need_flush) {
 		invalidate_user_asid(new_asid);
-		new_mm_cr3 = build_cr3(pgdir, new_asid);
+		new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
 	} else {
-		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
+		new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
 	}
 
 	/*
@@ -491,6 +493,8 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 {
 	struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
 	u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
+	unsigned long prev_lam = tlbstate_lam_cr3_mask();
+	unsigned long new_lam = mm_lam_cr3_mask(next);
 	bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
 	unsigned cpu = smp_processor_id();
 	u64 next_tlb_gen;
@@ -520,7 +524,7 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 	 * isn't free.
 	 */
 #ifdef CONFIG_DEBUG_VM
-	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
+	if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid, prev_lam))) {
 		/*
 		 * If we were to BUG here, we'd be very likely to kill
 		 * the system so hard that we don't see the call trace.
@@ -622,15 +626,16 @@  void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
 		barrier();
 	}
 
+	set_tlbstate_cr3_lam_mask(new_lam);
 	if (need_flush) {
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
 		this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
-		load_new_mm_cr3(next->pgd, new_asid, true);
+		load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
 	} else {
 		/* The new ASID is already up to date. */
-		load_new_mm_cr3(next->pgd, new_asid, false);
+		load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
 
 		trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
 	}
@@ -691,6 +696,10 @@  void initialize_tlbstate_and_flush(void)
 	/* Assert that CR3 already references the right mm. */
 	WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
 
+	/* LAM expected to be disabled in CR3 and init_mm */
+	WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
+	WARN_ON(mm_lam_cr3_mask(&init_mm));
+
 	/*
 	 * Assert that CR4.PCIDE is set if needed.  (CR4.PCIDE initialization
 	 * doesn't work like other CR4 bits because it can only be set from
@@ -699,8 +708,8 @@  void initialize_tlbstate_and_flush(void)
 	WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
 		!(cr4_read_shadow() & X86_CR4_PCIDE));
 
-	/* Force ASID 0 and force a TLB flush. */
-	write_cr3(build_cr3(mm->pgd, 0));
+	/* Disable LAM, force ASID 0 and force a TLB flush. */
+	write_cr3(build_cr3(mm->pgd, 0, 0));
 
 	/* Reinitialize tlbstate. */
 	this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
@@ -708,6 +717,7 @@  void initialize_tlbstate_and_flush(void)
 	this_cpu_write(cpu_tlbstate.next_asid, 1);
 	this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
 	this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
+	set_tlbstate_cr3_lam_mask(0);
 
 	for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
 		this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
@@ -1047,8 +1057,10 @@  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  */
 unsigned long __get_current_cr3_fast(void)
 {
-	unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
-		this_cpu_read(cpu_tlbstate.loaded_mm_asid));
+	unsigned long cr3 =
+		build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
+		this_cpu_read(cpu_tlbstate.loaded_mm_asid),
+		tlbstate_lam_cr3_mask());
 
 	/* For now, be very restrictive about when this can be called. */
 	VM_WARN_ON(in_nmi() || preemptible());