diff mbox series

[3/3] x86: mm: add x86_64 support for page table check

Message ID 20211123214814.3756047-4-pasha.tatashin@soleen.com (mailing list archive)
State New
Headers show
Series page table check | expand

Commit Message

Pasha Tatashin Nov. 23, 2021, 9:48 p.m. UTC
Add page table check hooks into routines that modify user page tables.

Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
 arch/x86/Kconfig               |  1 +
 arch/x86/include/asm/pgtable.h | 29 +++++++++++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

Comments

Jiri Slaby Dec. 1, 2021, 8 a.m. UTC | #1
On 23. 11. 21, 22:48, Pasha Tatashin wrote:
> Add page table check hooks into routines that modify user page tables.

Hi,

I bisected to this as this causes crashes during modules load:
#PF: supervisor write access in kernel mode
#PF: error_code(0x0003) - permissions violation
PGD 6d615067 P4D 6d615067 PUD 6d616063 PMD 800000006d2001e1
Oops: 0003 [#1] PREEMPT SMP PTI
CPU: 0 PID: 6189 Comm: modprobe Kdump: loaded Tainted: G          I E 
   5.16.0-rc2-next-20211129-vanilla #3 
83846a405f0e3937f5c8dfbc7d449622b8f46369
Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be 
filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
RIP: 0010:jump_label_module_notify (kernel/jump_label.c:370 
kernel/jump_label.c:670 kernel/jump_label.c:748)
Code: 00 48 8b 43 08 a8 02 0f 85 e9 00 00 00 48 83 e0 fc 48 c7 02 00 00 
00 00 48 89 42 08 48 8b 43 08 83 e0 03 48 09 c2 48 83 ca 02 <48> 89 53 
08 4d 89 66 10 49 89 6e 08 48 8b 43 08 a8 02 0f 84 98 00
All code
========
    0:	00 48 8b             	add    %cl,-0x75(%rax)
    3:	43 08 a8 02 0f 85 e9 	rex.XB or %bpl,-0x167af0fe(%r8)
    a:	00 00                	add    %al,(%rax)
    c:	00 48 83             	add    %cl,-0x7d(%rax)
    f:	e0 fc                	loopne 0xd
   11:	48 c7 02 00 00 00 00 	movq   $0x0,(%rdx)
   18:	48 89 42 08          	mov    %rax,0x8(%rdx)
   1c:	48 8b 43 08          	mov    0x8(%rbx),%rax
   20:	83 e0 03             	and    $0x3,%eax
   23:	48 09 c2             	or     %rax,%rdx
   26:	48 83 ca 02          	or     $0x2,%rdx
   2a:*	48 89 53 08          	mov    %rdx,0x8(%rbx)		<-- trapping 
instruction
   2e:	4d 89 66 10          	mov    %r12,0x10(%r14)
   32:	49 89 6e 08          	mov    %rbp,0x8(%r14)
   36:	48 8b 43 08          	mov    0x8(%rbx),%rax
   3a:	a8 02                	test   $0x2,%al
   3c:	0f                   	.byte 0xf
   3d:	84                   	.byte 0x84
   3e:	98                   	cwtl
	...

Code starting with the faulting instruction
===========================================
    0:	48 89 53 08          	mov    %rdx,0x8(%rbx)
    4:	4d 89 66 10          	mov    %r12,0x10(%r14)
    8:	49 89 6e 08          	mov    %rbp,0x8(%r14)
    c:	48 8b 43 08          	mov    0x8(%rbx),%rax
   10:	a8 02                	test   $0x2,%al
   12:	0f                   	.byte 0xf
   13:	84                   	.byte 0x84
   14:	98                   	cwtl
	...
RSP: 0018:ffffaf4dc051fbe8 EFLAGS: 00010282
RAX: 0000000000000001 RBX: ffffffff931ee760 RCX: 0000000000000001
RDX: ffff9d1aa7d43883 RSI: ffffffff91c50aa0 RDI: ffffffff931ee760
RBP: ffffffffc0782000 R08: 0000000000000020 R09: 0000000000000000
R10: ffff9d1aa7d43880 R11: 0000000000000000 R12: ffffffffc079a980
R13: ffffffffc0784080 R14: ffff9d1aa7d43ca0 R15: ffffffffc0782008
FS:  00007f87104b8740(0000) GS:ffff9d1b45c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffff931ee768 CR3: 000000010fdce000 CR4: 00000000000006f0
Call Trace:
  <TASK>
blocking_notifier_call_chain_robust (kernel/notifier.c:83 
kernel/notifier.c:118 kernel/notifier.c:283 kernel/notifier.c:271)
load_module (./include/linux/notifier.h:198 kernel/module.c:3923 
kernel/module.c:4100)
__do_sys_finit_module (kernel/module.c:4224)
do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)

> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
>   arch/x86/Kconfig               |  1 +
>   arch/x86/include/asm/pgtable.h | 29 +++++++++++++++++++++++++++--
>   2 files changed, 28 insertions(+), 2 deletions(-)
> 

regards,
Jiri Slaby Dec. 1, 2021, 8:44 a.m. UTC | #2
On 01. 12. 21, 9:00, Jiri Slaby wrote:
> On 23. 11. 21, 22:48, Pasha Tatashin wrote:
>> Add page table check hooks into routines that modify user page tables.
> 
> Hi,
> 
> I bisected to this as this causes crashes during modules load:

And it's not enough to unset CONFIG_PAGE_TABLE_CHECK_ENFORCED. I had to 
unset CONFIG_PAGE_TABLE_CHECK completely to get rid of this.

> #PF: supervisor write access in kernel mode
> #PF: error_code(0x0003) - permissions violation
> PGD 6d615067 P4D 6d615067 PUD 6d616063 PMD 800000006d2001e1
> Oops: 0003 [#1] PREEMPT SMP PTI
> CPU: 0 PID: 6189 Comm: modprobe Kdump: loaded Tainted: G          I E   
> 5.16.0-rc2-next-20211129-vanilla #3 
> 83846a405f0e3937f5c8dfbc7d449622b8f46369
> Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be 
> filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
> RIP: 0010:jump_label_module_notify (kernel/jump_label.c:370 
> kernel/jump_label.c:670 kernel/jump_label.c:748)
> Code: 00 48 8b 43 08 a8 02 0f 85 e9 00 00 00 48 83 e0 fc 48 c7 02 00 00 
> 00 00 48 89 42 08 48 8b 43 08 83 e0 03 48 09 c2 48 83 ca 02 <48> 89 53 
> 08 4d 89 66 10 49 89 6e 08 48 8b 43 08 a8 02 0f 84 98 00
> All code
> ========
>     0:    00 48 8b                 add    %cl,-0x75(%rax)
>     3:    43 08 a8 02 0f 85 e9     rex.XB or %bpl,-0x167af0fe(%r8)
>     a:    00 00                    add    %al,(%rax)
>     c:    00 48 83                 add    %cl,-0x7d(%rax)
>     f:    e0 fc                    loopne 0xd
>    11:    48 c7 02 00 00 00 00     movq   $0x0,(%rdx)
>    18:    48 89 42 08              mov    %rax,0x8(%rdx)
>    1c:    48 8b 43 08              mov    0x8(%rbx),%rax
>    20:    83 e0 03                 and    $0x3,%eax
>    23:    48 09 c2                 or     %rax,%rdx
>    26:    48 83 ca 02              or     $0x2,%rdx
>    2a:*    48 89 53 08              mov    %rdx,0x8(%rbx)        <-- 
> trapping instruction
>    2e:    4d 89 66 10              mov    %r12,0x10(%r14)
>    32:    49 89 6e 08              mov    %rbp,0x8(%r14)
>    36:    48 8b 43 08              mov    0x8(%rbx),%rax
>    3a:    a8 02                    test   $0x2,%al
>    3c:    0f                       .byte 0xf
>    3d:    84                       .byte 0x84
>    3e:    98                       cwtl
>      ...
> 
> Code starting with the faulting instruction
> ===========================================
>     0:    48 89 53 08              mov    %rdx,0x8(%rbx)
>     4:    4d 89 66 10              mov    %r12,0x10(%r14)
>     8:    49 89 6e 08              mov    %rbp,0x8(%r14)
>     c:    48 8b 43 08              mov    0x8(%rbx),%rax
>    10:    a8 02                    test   $0x2,%al
>    12:    0f                       .byte 0xf
>    13:    84                       .byte 0x84
>    14:    98                       cwtl
>      ...
> RSP: 0018:ffffaf4dc051fbe8 EFLAGS: 00010282
> RAX: 0000000000000001 RBX: ffffffff931ee760 RCX: 0000000000000001
> RDX: ffff9d1aa7d43883 RSI: ffffffff91c50aa0 RDI: ffffffff931ee760
> RBP: ffffffffc0782000 R08: 0000000000000020 R09: 0000000000000000
> R10: ffff9d1aa7d43880 R11: 0000000000000000 R12: ffffffffc079a980
> R13: ffffffffc0784080 R14: ffff9d1aa7d43ca0 R15: ffffffffc0782008
> FS:  00007f87104b8740(0000) GS:ffff9d1b45c00000(0000) 
> knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: ffffffff931ee768 CR3: 000000010fdce000 CR4: 00000000000006f0
> Call Trace:
>   <TASK>
> blocking_notifier_call_chain_robust (kernel/notifier.c:83 
> kernel/notifier.c:118 kernel/notifier.c:283 kernel/notifier.c:271)
> load_module (./include/linux/notifier.h:198 kernel/module.c:3923 
> kernel/module.c:4100)
> __do_sys_finit_module (kernel/module.c:4224)
> do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
> entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
> 
>> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
>> ---
>>   arch/x86/Kconfig               |  1 +
>>   arch/x86/include/asm/pgtable.h | 29 +++++++++++++++++++++++++++--
>>   2 files changed, 28 insertions(+), 2 deletions(-)
>>
> 
> regards,
Pasha Tatashin Dec. 1, 2021, 1:55 p.m. UTC | #3
On Wed, Dec 1, 2021 at 3:44 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>
> On 01. 12. 21, 9:00, Jiri Slaby wrote:
> > On 23. 11. 21, 22:48, Pasha Tatashin wrote:
> >> Add page table check hooks into routines that modify user page tables.
> >
> > Hi,
> >
> > I bisected to this as this causes crashes during modules load:
>
> And it's not enough to unset CONFIG_PAGE_TABLE_CHECK_ENFORCED. I had to
> unset CONFIG_PAGE_TABLE_CHECK completely to get rid of this.

Hi,

Thanks for reporting this. Seems like module load for some reasons
does not like the static branches. However, I was not able to repro
this. Could you please share your config and the module that you were
loading?

Thank you,
Pasha

>
> > #PF: supervisor write access in kernel mode
> > #PF: error_code(0x0003) - permissions violation
> > PGD 6d615067 P4D 6d615067 PUD 6d616063 PMD 800000006d2001e1
> > Oops: 0003 [#1] PREEMPT SMP PTI
> > CPU: 0 PID: 6189 Comm: modprobe Kdump: loaded Tainted: G          I E
> > 5.16.0-rc2-next-20211129-vanilla #3
> > 83846a405f0e3937f5c8dfbc7d449622b8f46369
> > Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./To be
> > filled by O.E.M., BIOS SDBLI944.86P 05/08/2007
> > RIP: 0010:jump_label_module_notify (kernel/jump_label.c:370
> > kernel/jump_label.c:670 kernel/jump_label.c:748)
> > Code: 00 48 8b 43 08 a8 02 0f 85 e9 00 00 00 48 83 e0 fc 48 c7 02 00 00
> > 00 00 48 89 42 08 48 8b 43 08 83 e0 03 48 09 c2 48 83 ca 02 <48> 89 53
> > 08 4d 89 66 10 49 89 6e 08 48 8b 43 08 a8 02 0f 84 98 00
> > All code
> > ========
> >     0:    00 48 8b                 add    %cl,-0x75(%rax)
> >     3:    43 08 a8 02 0f 85 e9     rex.XB or %bpl,-0x167af0fe(%r8)
> >     a:    00 00                    add    %al,(%rax)
> >     c:    00 48 83                 add    %cl,-0x7d(%rax)
> >     f:    e0 fc                    loopne 0xd
> >    11:    48 c7 02 00 00 00 00     movq   $0x0,(%rdx)
> >    18:    48 89 42 08              mov    %rax,0x8(%rdx)
> >    1c:    48 8b 43 08              mov    0x8(%rbx),%rax
> >    20:    83 e0 03                 and    $0x3,%eax
> >    23:    48 09 c2                 or     %rax,%rdx
> >    26:    48 83 ca 02              or     $0x2,%rdx
> >    2a:*    48 89 53 08              mov    %rdx,0x8(%rbx)        <--
> > trapping instruction
> >    2e:    4d 89 66 10              mov    %r12,0x10(%r14)
> >    32:    49 89 6e 08              mov    %rbp,0x8(%r14)
> >    36:    48 8b 43 08              mov    0x8(%rbx),%rax
> >    3a:    a8 02                    test   $0x2,%al
> >    3c:    0f                       .byte 0xf
> >    3d:    84                       .byte 0x84
> >    3e:    98                       cwtl
> >      ...
> >
> > Code starting with the faulting instruction
> > ===========================================
> >     0:    48 89 53 08              mov    %rdx,0x8(%rbx)
> >     4:    4d 89 66 10              mov    %r12,0x10(%r14)
> >     8:    49 89 6e 08              mov    %rbp,0x8(%r14)
> >     c:    48 8b 43 08              mov    0x8(%rbx),%rax
> >    10:    a8 02                    test   $0x2,%al
> >    12:    0f                       .byte 0xf
> >    13:    84                       .byte 0x84
> >    14:    98                       cwtl
> >      ...
> > RSP: 0018:ffffaf4dc051fbe8 EFLAGS: 00010282
> > RAX: 0000000000000001 RBX: ffffffff931ee760 RCX: 0000000000000001
> > RDX: ffff9d1aa7d43883 RSI: ffffffff91c50aa0 RDI: ffffffff931ee760
> > RBP: ffffffffc0782000 R08: 0000000000000020 R09: 0000000000000000
> > R10: ffff9d1aa7d43880 R11: 0000000000000000 R12: ffffffffc079a980
> > R13: ffffffffc0784080 R14: ffff9d1aa7d43ca0 R15: ffffffffc0782008
> > FS:  00007f87104b8740(0000) GS:ffff9d1b45c00000(0000)
> > knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: ffffffff931ee768 CR3: 000000010fdce000 CR4: 00000000000006f0
> > Call Trace:
> >   <TASK>
> > blocking_notifier_call_chain_robust (kernel/notifier.c:83
> > kernel/notifier.c:118 kernel/notifier.c:283 kernel/notifier.c:271)
> > load_module (./include/linux/notifier.h:198 kernel/module.c:3923
> > kernel/module.c:4100)
> > __do_sys_finit_module (kernel/module.c:4224)
> > do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
> > entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:113)
> >
> >> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> >> ---
> >>   arch/x86/Kconfig               |  1 +
> >>   arch/x86/include/asm/pgtable.h | 29 +++++++++++++++++++++++++++--
> >>   2 files changed, 28 insertions(+), 2 deletions(-)
> >>
> >
> > regards,
>
>
> --
> js
> suse labs
Jiri Slaby Dec. 2, 2021, 9:59 a.m. UTC | #4
On 01. 12. 21, 14:55, Pasha Tatashin wrote:
> On Wed, Dec 1, 2021 at 3:44 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>
>> On 01. 12. 21, 9:00, Jiri Slaby wrote:
>>> On 23. 11. 21, 22:48, Pasha Tatashin wrote:
>>>> Add page table check hooks into routines that modify user page tables.
>>>
>>> Hi,
>>>
>>> I bisected to this as this causes crashes during modules load:
>>
>> And it's not enough to unset CONFIG_PAGE_TABLE_CHECK_ENFORCED. I had to
>> unset CONFIG_PAGE_TABLE_CHECK completely to get rid of this.
> 
> Hi,
> 
> Thanks for reporting this. Seems like module load for some reasons
> does not like the static branches. However, I was not able to repro
> this. Could you please share your config and the module that you were
> loading?

It's the openSUSE's -next config:
https://raw.githubusercontent.com/openSUSE/kernel-source/linux-next/config/x86_64/vanilla

But with CONFIG_IWLMEI=n (as that fails to link).

One has to load i915, other modules (71 on my system) are apparently fine.

i915 tries to patch your `page_table_check_disabled' which is defined as 
`DEFINE_STATIC_KEY_TRUE_RO':
 > jump_label_add_module: key=__tracepoint_mmap_lock_released 
(ffffffff93f36d88) mod=0000000000000000
 > jump_label_add_module: key=hugetlb_free_vmemmap_enabled_key 
(ffffffff94873560) mod=0000000000000000
 > jump_label_add_module: key=devmap_managed_key (ffffffff94902700) 
mod=0000000000000000
 > jump_label_add_module: key=page_table_check_disabled 
(ffffffff939da760) mod=0000000000000000
 > BUG: unable to handle page fault for address: ffffffff939da768

regards,
Jiri Slaby Dec. 2, 2021, 10:05 a.m. UTC | #5
On 02. 12. 21, 10:59, Jiri Slaby wrote:
> On 01. 12. 21, 14:55, Pasha Tatashin wrote:
>> On Wed, Dec 1, 2021 at 3:44 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>>>
>>> On 01. 12. 21, 9:00, Jiri Slaby wrote:
>>>> On 23. 11. 21, 22:48, Pasha Tatashin wrote:
>>>>> Add page table check hooks into routines that modify user page tables.
>>>>
>>>> Hi,
>>>>
>>>> I bisected to this as this causes crashes during modules load:
>>>
>>> And it's not enough to unset CONFIG_PAGE_TABLE_CHECK_ENFORCED. I had to
>>> unset CONFIG_PAGE_TABLE_CHECK completely to get rid of this.
>>
>> Hi,
>>
>> Thanks for reporting this. Seems like module load for some reasons
>> does not like the static branches. However, I was not able to repro
>> this. Could you please share your config and the module that you were
>> loading?
> 
> It's the openSUSE's -next config:
> https://raw.githubusercontent.com/openSUSE/kernel-source/linux-next/config/x86_64/vanilla 
> 
> 
> But with CONFIG_IWLMEI=n (as that fails to link).
> 
> One has to load i915, other modules (71 on my system) are apparently fine.
> 
> i915 tries to patch your `page_table_check_disabled' which is defined as 
> `DEFINE_STATIC_KEY_TRUE_RO':
>  > jump_label_add_module: key=__tracepoint_mmap_lock_released 
> (ffffffff93f36d88) mod=0000000000000000
>  > jump_label_add_module: key=hugetlb_free_vmemmap_enabled_key 
> (ffffffff94873560) mod=0000000000000000
>  > jump_label_add_module: key=devmap_managed_key (ffffffff94902700) 
> mod=0000000000000000
>  > jump_label_add_module: key=page_table_check_disabled 
> (ffffffff939da760) mod=0000000000000000
>  > BUG: unable to handle page fault for address: ffffffff939da768

Provided you EXPORT page_table_check_disabled and it can be used from 
modules, it can be barely marked as RO. So the below indeed fixes the 
problem for me:

--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -18,7 +18,7 @@ struct page_table_check {
  static bool __page_table_check_enabled __initdata =
 
IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);

-DEFINE_STATIC_KEY_TRUE_RO(page_table_check_disabled);
+DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
  EXPORT_SYMBOL(page_table_check_disabled);

  static int __init early_page_table_check_param(char *buf)

> 
> regards,
Muchun Song Dec. 2, 2021, 10:30 a.m. UTC | #6
On Thu, Dec 2, 2021 at 6:06 PM Jiri Slaby <jirislaby@kernel.org> wrote:
>
> On 02. 12. 21, 10:59, Jiri Slaby wrote:
> > On 01. 12. 21, 14:55, Pasha Tatashin wrote:
> >> On Wed, Dec 1, 2021 at 3:44 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> >>>
> >>> On 01. 12. 21, 9:00, Jiri Slaby wrote:
> >>>> On 23. 11. 21, 22:48, Pasha Tatashin wrote:
> >>>>> Add page table check hooks into routines that modify user page tables.
> >>>>
> >>>> Hi,
> >>>>
> >>>> I bisected to this as this causes crashes during modules load:
> >>>
> >>> And it's not enough to unset CONFIG_PAGE_TABLE_CHECK_ENFORCED. I had to
> >>> unset CONFIG_PAGE_TABLE_CHECK completely to get rid of this.
> >>
> >> Hi,
> >>
> >> Thanks for reporting this. Seems like module load for some reasons
> >> does not like the static branches. However, I was not able to repro
> >> this. Could you please share your config and the module that you were
> >> loading?
> >
> > It's the openSUSE's -next config:
> > https://raw.githubusercontent.com/openSUSE/kernel-source/linux-next/config/x86_64/vanilla
> >
> >
> > But with CONFIG_IWLMEI=n (as that fails to link).
> >
> > One has to load i915, other modules (71 on my system) are apparently fine.
> >
> > i915 tries to patch your `page_table_check_disabled' which is defined as
> > `DEFINE_STATIC_KEY_TRUE_RO':
> >  > jump_label_add_module: key=__tracepoint_mmap_lock_released
> > (ffffffff93f36d88) mod=0000000000000000
> >  > jump_label_add_module: key=hugetlb_free_vmemmap_enabled_key
> > (ffffffff94873560) mod=0000000000000000
> >  > jump_label_add_module: key=devmap_managed_key (ffffffff94902700)
> > mod=0000000000000000
> >  > jump_label_add_module: key=page_table_check_disabled
> > (ffffffff939da760) mod=0000000000000000
> >  > BUG: unable to handle page fault for address: ffffffff939da768
>
> Provided you EXPORT page_table_check_disabled and it can be used from
> modules, it can be barely marked as RO. So the below indeed fixes the
> problem for me:

Agree. I had encountered a similar problem some months ago. In one
word, DEFINE_STATIC_KEY_TRUE_RO() and EXPORT_SYMBOL()
are mutually exclusive.

Thanks.

>
> --- a/mm/page_table_check.c
> +++ b/mm/page_table_check.c
> @@ -18,7 +18,7 @@ struct page_table_check {
>   static bool __page_table_check_enabled __initdata =
>
> IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
>
> -DEFINE_STATIC_KEY_TRUE_RO(page_table_check_disabled);
> +DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);
>   EXPORT_SYMBOL(page_table_check_disabled);
>
>   static int __init early_page_table_check_param(char *buf)
>
> >
> > regards,
>
>
> --
> js
> suse labs
Pasha Tatashin Dec. 2, 2021, 3:01 p.m. UTC | #7
On Thu, Dec 2, 2021 at 5:05 AM Jiri Slaby <jirislaby@kernel.org> wrote:
>
> On 02. 12. 21, 10:59, Jiri Slaby wrote:
> > On 01. 12. 21, 14:55, Pasha Tatashin wrote:
> >> On Wed, Dec 1, 2021 at 3:44 AM Jiri Slaby <jirislaby@kernel.org> wrote:
> >>>
> >>> On 01. 12. 21, 9:00, Jiri Slaby wrote:
> >>>> On 23. 11. 21, 22:48, Pasha Tatashin wrote:
> >>>>> Add page table check hooks into routines that modify user page tables.
> >>>>
> >>>> Hi,
> >>>>
> >>>> I bisected to this as this causes crashes during modules load:
> >>>
> >>> And it's not enough to unset CONFIG_PAGE_TABLE_CHECK_ENFORCED. I had to
> >>> unset CONFIG_PAGE_TABLE_CHECK completely to get rid of this.
> >>
> >> Hi,
> >>
> >> Thanks for reporting this. Seems like module load for some reasons
> >> does not like the static branches. However, I was not able to repro
> >> this. Could you please share your config and the module that you were
> >> loading?
> >
> > It's the openSUSE's -next config:
> > https://raw.githubusercontent.com/openSUSE/kernel-source/linux-next/config/x86_64/vanilla
> >
> >
> > But with CONFIG_IWLMEI=n (as that fails to link).
> >
> > One has to load i915, other modules (71 on my system) are apparently fine.
> >
> > i915 tries to patch your `page_table_check_disabled' which is defined as
> > `DEFINE_STATIC_KEY_TRUE_RO':
> >  > jump_label_add_module: key=__tracepoint_mmap_lock_released
> > (ffffffff93f36d88) mod=0000000000000000
> >  > jump_label_add_module: key=hugetlb_free_vmemmap_enabled_key
> > (ffffffff94873560) mod=0000000000000000
> >  > jump_label_add_module: key=devmap_managed_key (ffffffff94902700)
> > mod=0000000000000000
> >  > jump_label_add_module: key=page_table_check_disabled
> > (ffffffff939da760) mod=0000000000000000
> >  > BUG: unable to handle page fault for address: ffffffff939da768
>
> Provided you EXPORT page_table_check_disabled and it can be used from
> modules, it can be barely marked as RO. So the below indeed fixes the
> problem for me:
>
> --- a/mm/page_table_check.c
> +++ b/mm/page_table_check.c
> @@ -18,7 +18,7 @@ struct page_table_check {
>   static bool __page_table_check_enabled __initdata =
>
> IS_ENABLED(CONFIG_PAGE_TABLE_CHECK_ENFORCED);
>
> -DEFINE_STATIC_KEY_TRUE_RO(page_table_check_disabled);
> +DEFINE_STATIC_KEY_TRUE(page_table_check_disabled);

Makes sense, thanks! I will remove _RO in the next version.

Pasha
diff mbox series

Patch

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5e16393d9988..7636ea400a71 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -104,6 +104,7 @@  config X86
 	select ARCH_SUPPORTS_ACPI
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+	select ARCH_SUPPORTS_PAGE_TABLE_CHECK	if X86_64
 	select ARCH_SUPPORTS_NUMA_BALANCING	if X86_64
 	select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP	if NR_CPUS <= 4096
 	select ARCH_SUPPORTS_LTO_CLANG
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 448cd01eb3ec..ae34614b7e8d 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -26,6 +26,7 @@ 
 #include <asm/pkru.h>
 #include <asm/fpu/api.h>
 #include <asm-generic/pgtable_uffd.h>
+#include <linux/page_table_check.h>
 
 extern pgd_t early_top_pgt[PTRS_PER_PGD];
 bool __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -1006,18 +1007,21 @@  static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
 static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 			      pte_t *ptep, pte_t pte)
 {
+	page_table_check_pte_set(mm, addr, ptep, pte);
 	set_pte(ptep, pte);
 }
 
 static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 			      pmd_t *pmdp, pmd_t pmd)
 {
+	page_table_check_pmd_set(mm, addr, pmdp, pmd);
 	set_pmd(pmdp, pmd);
 }
 
 static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
 			      pud_t *pudp, pud_t pud)
 {
+	page_table_check_pud_set(mm, addr, pudp, pud);
 	native_set_pud(pudp, pud);
 }
 
@@ -1048,6 +1052,7 @@  static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pte_t *ptep)
 {
 	pte_t pte = native_ptep_get_and_clear(ptep);
+	page_table_check_pte_clear(mm, addr, pte);
 	return pte;
 }
 
@@ -1063,12 +1068,23 @@  static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 		 * care about updates and native needs no locking
 		 */
 		pte = native_local_ptep_get_and_clear(ptep);
+		page_table_check_pte_clear(mm, addr, pte);
 	} else {
 		pte = ptep_get_and_clear(mm, addr, ptep);
 	}
 	return pte;
 }
 
+#define __HAVE_ARCH_PTEP_CLEAR
+static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
+			      pte_t *ptep)
+{
+	if (IS_ENABLED(CONFIG_PAGE_TABLE_CHECK))
+		ptep_get_and_clear(mm, addr, ptep);
+	else
+		pte_clear(mm, addr, ptep);
+}
+
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm,
 				      unsigned long addr, pte_t *ptep)
@@ -1109,14 +1125,22 @@  static inline int pmd_write(pmd_t pmd)
 static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long addr,
 				       pmd_t *pmdp)
 {
-	return native_pmdp_get_and_clear(pmdp);
+	pmd_t pmd = native_pmdp_get_and_clear(pmdp);
+
+	page_table_check_pmd_clear(mm, addr, pmd);
+
+	return pmd;
 }
 
 #define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
 static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
 					unsigned long addr, pud_t *pudp)
 {
-	return native_pudp_get_and_clear(pudp);
+	pud_t pud = native_pudp_get_and_clear(pudp);
+
+	page_table_check_pud_clear(mm, addr, pud);
+
+	return pud;
 }
 
 #define __HAVE_ARCH_PMDP_SET_WRPROTECT
@@ -1137,6 +1161,7 @@  static inline int pud_write(pud_t pud)
 static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmdp, pmd_t pmd)
 {
+	page_table_check_pmd_set(vma->vm_mm, address, pmdp, pmd);
 	if (IS_ENABLED(CONFIG_SMP)) {
 		return xchg(pmdp, pmd);
 	} else {