diff mbox series

[RFC,v2,01/29] mm: asi: Make some utility functions noinstr compatible

Message ID 20250110-asi-rfc-v2-v2-1-8419288bc805@google.com (mailing list archive)
State Awaiting Upstream
Headers show
Series Address Space Isolation (ASI) | expand

Commit Message

Brendan Jackman Jan. 10, 2025, 6:40 p.m. UTC
Some existing utility functions would need to be called from a noinstr
context in the later patches. So mark these as either noinstr or
__always_inline.

An earlier version of this by Junaid had a macro that was intended to
tell the compiler "either inline this function, or call it in the
noinstr section", which basically boiled down to:

 #define inline_or_noinstr noinline __section(".noinstr.text")

Unfortunately Thomas pointed out this will prevent the function from
being inlined at call sites in .text.

So far I haven't been able[1] to find a formulation that lets us :
1. avoid calls from .noinstr.text -> .text,
2. while also letting the compiler freely decide what to inline.

1 is a functional requirement so here I'm just giving up on 2. Existing
callsites of this code are just forced inline. For the incoming code
that needs to call it from noinstr, they will be out-of-line calls.

[1] https://lore.kernel.org/lkml/CA+i-1C1z35M8wA_4AwMq7--c1OgjNoLGTkn4+Td5gKg7QQAzWw@mail.gmail.com/

Checkpatch-args: --ignore=COMMIT_LOG_LONG_LINE
Signed-off-by: Brendan Jackman <jackmanb@google.com>
---
 arch/x86/include/asm/processor.h     |  2 +-
 arch/x86/include/asm/special_insns.h |  8 ++++----
 arch/x86/include/asm/tlbflush.h      |  3 +++
 arch/x86/mm/tlb.c                    | 13 +++++++++----
 4 files changed, 17 insertions(+), 9 deletions(-)

Comments

Borislav Petkov Jan. 16, 2025, 12:18 a.m. UTC | #1
On Fri, Jan 10, 2025 at 06:40:27PM +0000, Brendan Jackman wrote:
> Subject: Re: [PATCH RFC v2 01/29] mm: asi: Make some utility functions noinstr compatible

The tip tree preferred format for patch subject prefixes is
'subsys/component:', e.g. 'x86/apic:', 'x86/mm/fault:', 'sched/fair:',
'genirq/core:'. Please do not use file names or complete file paths as
prefix. 'git log path/to/file' should give you a reasonable hint in most
cases.

So I guess "x86/asm:" or so.

> Some existing utility functions would need to be called from a noinstr
> context in the later patches. So mark these as either noinstr or
> __always_inline.
> 
> An earlier version of this by Junaid had a macro that was intended to
> tell the compiler "either inline this function, or call it in the
> noinstr section", which basically boiled down to:
> 
>  #define inline_or_noinstr noinline __section(".noinstr.text")
> 
> Unfortunately Thomas pointed out this will prevent the function from
> being inlined at call sites in .text.
> 
> So far I haven't been able[1] to find a formulation that lets us :
> 1. avoid calls from .noinstr.text -> .text,
> 2. while also letting the compiler freely decide what to inline.
> 
> 1 is a functional requirement so here I'm just giving up on 2. Existing
> callsites of this code are just forced inline. For the incoming code
> that needs to call it from noinstr, they will be out-of-line calls.

I'm not sure some of that belongs in the commit message - if you want to have
it in the submission, you should put it under the --- line below, right above
the diffstat.

> [1] https://lore.kernel.org/lkml/CA+i-1C1z35M8wA_4AwMq7--c1OgjNoLGTkn4+Td5gKg7QQAzWw@mail.gmail.com/
> 
> Checkpatch-args: --ignore=COMMIT_LOG_LONG_LINE

Yeah, you can drop those. People should not turn off brain, use checkpatch and
point at all the silly errors it spits anyway.

> Signed-off-by: Brendan Jackman <jackmanb@google.com>
> ---
>  arch/x86/include/asm/processor.h     |  2 +-
>  arch/x86/include/asm/special_insns.h |  8 ++++----
>  arch/x86/include/asm/tlbflush.h      |  3 +++
>  arch/x86/mm/tlb.c                    | 13 +++++++++----
>  4 files changed, 17 insertions(+), 9 deletions(-)

So I was just about to look at the below diff but then booting the patch in my
guest causes it to stop at:

[    1.110988] sr 2:0:0:0: Attached scsi generic sg1 type 5
[    1.114210] PM: Image not found (code -22)
[    1.114903] clk: Disabling unused clocks
[    1.119397] EXT4-fs (sda2): mounted filesystem 90868bc4-a017-4fa2-ac81-931ba260346f ro with ordered data mode. Quota mode: disabled.
[    1.121069] VFS: Mounted root (ext4 filesystem) readonly on device 8:2.
<--- EOF

with the below call stack.

Booting it on Linus' master branch is ok but this is tip/master with all that
we've accumulated for the next merge window along with other stuff I'm poking
at...

Long story short, lemme try to poke around tomorrow to try to figure out what
actually happens. It could be caused by the part of Rik's patches and this one
inlining things. We'll see...

native_flush_tlb_one_user (addr=2507219558400) at arch/x86/mm/tlb.c:1177
1177            if (!static_cpu_has(X86_FEATURE_PTI))
(gdb) bt
#0  native_flush_tlb_one_user (addr=2507219558400) at arch/x86/mm/tlb.c:1177
#1  0xffffffff8128206e in flush_tlb_one_user (addr=addr@entry=2507219558400) at arch/x86/mm/tlb.c:1196
#2  flush_tlb_one_kernel (addr=addr@entry=2507219558400) at arch/x86/mm/tlb.c:1151
#3  0xffffffff812820b7 in do_kernel_range_flush (info=0xffff88807dc311c0) at arch/x86/mm/tlb.c:1092
#4  0xffffffff8137beb6 in csd_do_func (csd=0x0 <fixed_percpu_data>, info=0xffff88807dc311c0, 
    func=0xffffffff81282090 <do_kernel_range_flush>) at kernel/smp.c:134
#5  smp_call_function_many_cond (mask=<optimized out>, func=func@entry=0xffffffff81282090 <do_kernel_range_flush>, 
    info=0xffff88807dc311c0, scf_flags=scf_flags@entry=3, cond_func=cond_func@entry=0x0 <fixed_percpu_data>)
    at kernel/smp.c:876
#6  0xffffffff8137c254 in on_each_cpu_cond_mask (cond_func=cond_func@entry=0x0 <fixed_percpu_data>, 
    func=func@entry=0xffffffff81282090 <do_kernel_range_flush>, info=<optimized out>, wait=wait@entry=true, 
    mask=<optimized out>) at kernel/smp.c:1052
#7  0xffffffff81282020 in on_each_cpu (wait=1, info=<optimized out>, func=0xffffffff81282090 <do_kernel_range_flush>)
    at ./include/linux/smp.h:71
#8  flush_tlb_kernel_range (start=start@entry=18446683600570097664, end=<optimized out>, end@entry=18446683600579907584)
    at arch/x86/mm/tlb.c:1106
#9  0xffffffff81481c3f in __purge_vmap_area_lazy (start=18446683600570097664, start@entry=18446744073709551615, 
    end=18446683600579907584, end@entry=0, full_pool_decay=full_pool_decay@entry=false) at mm/vmalloc.c:2284
#10 0xffffffff81481fde in _vm_unmap_aliases (start=start@entry=18446744073709551615, end=end@entry=0, 
    flush=<optimized out>, flush@entry=0) at mm/vmalloc.c:2899
#11 0xffffffff81482049 in vm_unmap_aliases () at mm/vmalloc.c:2922
#12 0xffffffff81284d9f in change_page_attr_set_clr (addr=0xffffc9000001fef0, numpages=<optimized out>, mask_set=..., 
    mask_clr=..., force_split=<optimized out>, in_flag=0, pages=0x0 <fixed_percpu_data>)
    at arch/x86/mm/pat/set_memory.c:1881
#13 0xffffffff81285c52 in change_page_attr_set (array=0, mask=..., numpages=511, addr=0xffffc9000001fef0)
    at arch/x86/mm/pat/set_memory.c:1922
#14 set_memory_nx (addr=<optimized out>, addr@entry=18446744071759204352, numpages=numpages@entry=511)
    at arch/x86/mm/pat/set_memory.c:2110
#15 0xffffffff8127b729 in free_init_pages (what=what@entry=0xffffffff8226bac0 "unused decrypted", 
    begin=18446744071759204352, end=18446744071761297408) at arch/x86/mm/init.c:929
#16 0xffffffff898f25aa in mem_encrypt_free_decrypted_mem () at arch/x86/mm/mem_encrypt_amd.c:574
#17 0xffffffff81ca06c3 in free_initmem () at arch/x86/mm/init.c:973
#18 0xffffffff81c9fa48 in kernel_init (unused=<optimized out>) at init/main.c:1475
#19 0xffffffff812398a0 in ret_from_fork (prev=<optimized out>, regs=0xffffc9000001ff58, 
    fn=0xffffffff81c9fa10 <kernel_init>, fn_arg=0x0 <fixed_percpu_data>) at arch/x86/kernel/process.c:148
#20 0xffffffff81206f7a in ret_from_fork_asm () at arch/x86/entry/entry_64.S:244
#21 0x1f0f2e6600000000 in ?? ()
#22 0x2e66000000000084 in ?? ()
#23 0x0000000000841f0f in ?? ()
#24 0x000000841f0f2e66 in ?? ()
#25 0x00841f0f2e660000 in ?? ()
#26 0x1f0f2e6600000000 in ?? ()
#27 0x2e66000000000084 in ?? ()
#28 0x0000000000841f0f in ?? ()
#29 0x000000841f0f2e66 in ?? ()
#30 0x00841f0f2e660000 in ?? ()
#31 0x0000000000000000 in ?? ()
(gdb)
Borislav Petkov Jan. 16, 2025, 10:27 a.m. UTC | #2
On Thu, Jan 16, 2025 at 01:18:58AM +0100, Borislav Petkov wrote:
> Long story short, lemme try to poke around tomorrow to try to figure out what
> actually happens. It could be caused by the part of Rik's patches and this one
> inlining things. We'll see...

Looks transient... The very similar guest boots fine on another machine. Let's
watch this...
Brendan Jackman Jan. 16, 2025, 1:22 p.m. UTC | #3
On Thu, 16 Jan 2025 at 01:21, Borislav Petkov <bp@alien8.de> wrote:
> > Unfortunately Thomas pointed out this will prevent the function from
> > being inlined at call sites in .text.
> >
> > So far I haven't been able[1] to find a formulation that lets us :
> > 1. avoid calls from .noinstr.text -> .text,
> > 2. while also letting the compiler freely decide what to inline.
> >
> > 1 is a functional requirement so here I'm just giving up on 2. Existing
> > callsites of this code are just forced inline. For the incoming code
> > that needs to call it from noinstr, they will be out-of-line calls.
>
> I'm not sure some of that belongs in the commit message - if you want to have
> it in the submission, you should put it under the --- line below, right above
> the diffstat.

Sure. I'm actually not even sure that for a [PATCH]-quality thing this
cross-cutting commit even makes sense - once we've decided on the
general way to solve this problem, perhaps the changes should just be
part of the commit that needs them?

It feels messy to have a patch that "does multiple things", but on the
other hand it might be annoying to review a patch that says "make a
load of random changes across the kernel, which are needed at various
points in various upcoming patches, trust me".

Do you have any opinion on that?

(BTW, since a comment you made on another series (can't find it on
Lore...), I've changed my writing style to avoid stuff like this in
comments & commit messages in general, but this text all predates
that. I'll do my best to sort all that stuff out before I send
anything as a [PATCH].)

On Thu, 16 Jan 2025 at 11:29, Borislav Petkov <bp@alien8.de> wrote:
>
> On Thu, Jan 16, 2025 at 01:18:58AM +0100, Borislav Petkov wrote:
> > Long story short, lemme try to poke around tomorrow to try to figure out what
> > actually happens. It could be caused by the part of Rik's patches and this one
> > inlining things. We'll see...
>
> Looks transient... The very similar guest boots fine on another machine. Let's
> watch this...

Oh, I didn't notice your update until now. But yeah I also couldn't
reproduce it on a Sapphire Rapids machine and on QEMU with this patch
applied on top of tip/master (37bc915c6ad0f).
Borislav Petkov Jan. 16, 2025, 2:02 p.m. UTC | #4
On Thu, Jan 16, 2025 at 02:22:42PM +0100, Brendan Jackman wrote:
> Sure. I'm actually not even sure that for a [PATCH]-quality thing this
> cross-cutting commit even makes sense - once we've decided on the
> general way to solve this problem, perhaps the changes should just be
> part of the commit that needs them?

Right, that sounds better.

> It feels messy to have a patch that "does multiple things", but on the
> other hand it might be annoying to review a patch that says "make a
> load of random changes across the kernel, which are needed at various
> points in various upcoming patches, trust me".
> 
> Do you have any opinion on that?

You're absolutely right - we do things when we need them and not before.
Otherwise, often times things get done preemptively and then forgotten only
for someone to notice way later and undo them again.

> (BTW, since a comment you made on another series (can't find it on
> Lore...), I've changed my writing style to avoid stuff like this in
> comments & commit messages in general, but this text all predates
> that. I'll do my best to sort all that stuff out before I send
> anything as a [PATCH].)

Thanks!

Btw, good and funny way to use "[PATCH]-quality" to mean non-RFC. :-P

> Oh, I didn't notice your update until now. But yeah I also couldn't
> reproduce it on a Sapphire Rapids machine and on QEMU with this patch
> applied on top of tip/master (37bc915c6ad0f).

Yeah, it feels like toolchain-related but I can't put my finger on it yet.
We'll see if and when this thing will re-surface its ugly head...

:-)

Thx.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 4a686f0e5dbf6d906ed38276148b186e920927b3..1a1b7ea5d7d32a47d783d9d62cd2a53672addd6f 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -220,7 +220,7 @@  void print_cpu_msr(struct cpuinfo_x86 *);
 /*
  * Friendlier CR3 helpers.
  */
-static inline unsigned long read_cr3_pa(void)
+static __always_inline unsigned long read_cr3_pa(void)
 {
 	return __read_cr3() & CR3_ADDR_MASK;
 }
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index aec6e2d3aa1d52e5c8f513e188015a45e9eeaeb2..6e103358966f6f1333aa07be97aec5f8af794120 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -42,14 +42,14 @@  static __always_inline void native_write_cr2(unsigned long val)
 	asm volatile("mov %0,%%cr2": : "r" (val) : "memory");
 }
 
-static inline unsigned long __native_read_cr3(void)
+static __always_inline unsigned long __native_read_cr3(void)
 {
 	unsigned long val;
 	asm volatile("mov %%cr3,%0\n\t" : "=r" (val) : __FORCE_ORDER);
 	return val;
 }
 
-static inline void native_write_cr3(unsigned long val)
+static __always_inline void native_write_cr3(unsigned long val)
 {
 	asm volatile("mov %0,%%cr3": : "r" (val) : "memory");
 }
@@ -153,12 +153,12 @@  static __always_inline void write_cr2(unsigned long x)
  * Careful!  CR3 contains more than just an address.  You probably want
  * read_cr3_pa() instead.
  */
-static inline unsigned long __read_cr3(void)
+static __always_inline unsigned long __read_cr3(void)
 {
 	return __native_read_cr3();
 }
 
-static inline void write_cr3(unsigned long x)
+static __always_inline void write_cr3(unsigned long x)
 {
 	native_write_cr3(x);
 }
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 69e79fff41b800a0a138bcbf548dde9d72993105..c884174a44e119a3c027c44ada6c5cdba14d1282 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -423,4 +423,7 @@  static inline void __native_tlb_flush_global(unsigned long cr4)
 	native_write_cr4(cr4 ^ X86_CR4_PGE);
 	native_write_cr4(cr4);
 }
+
+unsigned long build_cr3_noinstr(pgd_t *pgd, u16 asid, unsigned long lam);
+
 #endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 86593d1b787d8a5b9fa4bd492356898ec8870938..f0428e5e1f1947903ee87c4c6444844ee11b45c3 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -108,7 +108,7 @@ 
 /*
  * Given @asid, compute kPCID
  */
-static inline u16 kern_pcid(u16 asid)
+static __always_inline u16 kern_pcid(u16 asid)
 {
 	VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
 
@@ -153,9 +153,9 @@  static inline u16 user_pcid(u16 asid)
 	return ret;
 }
 
-static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
+static __always_inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
 {
-	unsigned long cr3 = __sme_pa(pgd) | lam;
+	unsigned long cr3 = __sme_pa_nodebug(pgd) | lam;
 
 	if (static_cpu_has(X86_FEATURE_PCID)) {
 		cr3 |= kern_pcid(asid);
@@ -166,6 +166,11 @@  static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
 	return cr3;
 }
 
+noinstr unsigned long build_cr3_noinstr(pgd_t *pgd, u16 asid, unsigned long lam)
+{
+	return build_cr3(pgd, asid, lam);
+}
+
 static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
 					      unsigned long lam)
 {
@@ -1084,7 +1089,7 @@  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
  * It's intended to be used for code like KVM that sneakily changes CR3
  * and needs to restore it.  It needs to be used very carefully.
  */
-unsigned long __get_current_cr3_fast(void)
+noinstr unsigned long __get_current_cr3_fast(void)
 {
 	unsigned long cr3 =
 		build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,