Message ID | 20210310104139.679618-4-elver@google.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Add support for synchronous signals on perf events | expand |
On Wed, Mar 10, 2021 at 11:41AM +0100, Marco Elver wrote: > Adds bit perf_event_attr::remove_on_exec, to support removing an event > from a task on exec. > > This option supports the case where an event is supposed to be > process-wide only, and should not propagate beyond exec, to limit > monitoring to the original process image only. [...] > +static void perf_remove_from_owner(struct perf_event *event); > +static void perf_event_exit_event(struct perf_event *child_event, > + struct perf_event_context *child_ctx, > + struct task_struct *child); > + > +/* > + * Removes all events from the current task that have been marked > + * remove-on-exec, and feeds their values back to parent events. > + */ > +static void perf_event_remove_on_exec(void) > +{ > + int ctxn; > + > + for_each_task_context_nr(ctxn) { > + struct perf_event_context *ctx; > + struct perf_event *event, *next; > + > + ctx = perf_pin_task_context(current, ctxn); > + if (!ctx) > + continue; > + mutex_lock(&ctx->mutex); > + > + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { > + if (!event->attr.remove_on_exec) > + continue; > + > + if (!is_kernel_event(event)) > + perf_remove_from_owner(event); > + perf_remove_from_context(event, DETACH_GROUP); > + /* > + * Remove the event and feed back its values to the > + * parent event. > + */ > + perf_event_exit_event(event, ctx, current); > + } > + mutex_unlock(&ctx->mutex); > + put_ctx(ctx); > + } > +} Yikes; it seems this is somehow broken. I just decided to run the remove_on_exec kselftest in a loop like so: for x in {1..10}; do ( tools/testing/selftests/perf_events/remove_on_exec & ) ; done While the kselftest runs pass, I see a number of kernel warnings (below). Any suggestions? I'll go and try to debug this... Thanks, -- Marco ------ >8 ------ hardirqs last disabled at (4150): [<ffffffffa633219b>] sysvec_call_function_single+0xb/0xc0 arch/x86/kernel/smp.c:243 softirqs last enabled at (3846): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 softirqs last disabled at (3839): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 ---[ end trace 74c79be9940ec2d1 ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1369 at kernel/events/core.c:247 event_function+0xef/0x100 kernel/events/core.c:249 Modules linked in: CPU: 3 PID: 1369 Comm: exe Tainted: G W 5.12.0-rc2+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:event_function+0xef/0x100 kernel/events/core.c:247 Code: 5b 5d 41 5c 41 5d 41 5e 41 5f c3 65 8b 05 a5 79 88 5a 85 c0 0f 84 6e ff ff ff 0f 0b e9 67 ff ff ff 4c 39 f5 74 a7 0f 0b eb a3 <0f> 0b eb 9f 0f 0b eb 96 41 bd fd ff ff ff eb ac 90 48 8b 47 10 48 RSP: 0000:ffff980880158f70 EFLAGS: 00010086 RAX: 0000000000000000 RBX: ffff98088111fde0 RCX: 944f9e9405e234a1 RDX: ffff8a5d4d2ac340 RSI: ffffffffa6b4ccef RDI: ffff8a606fcf0c08 RBP: ffff8a606fcf0c00 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: 0000000000000000 R13: ffff8a5d4e6db800 R14: ffff8a5d46534a00 R15: ffff8a606fcf0c08 FS: 0000000000000000(0000) GS:ffff8a606fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd2b331e225 CR3: 00000001e0e22006 CR4: 0000000000770ee0 DR0: 0000564596006388 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: <IRQ> remote_function kernel/events/core.c:91 [inline] remote_function+0x44/0x50 kernel/events/core.c:71 flush_smp_call_function_queue+0x13a/0x1d0 kernel/smp.c:395 __sysvec_call_function_single+0x3e/0x1c0 arch/x86/kernel/smp.c:248 sysvec_call_function_single+0x89/0xc0 arch/x86/kernel/smp.c:243 </IRQ> asm_sysvec_call_function_single+0x12/0x20 arch/x86/include/asm/idtentry.h:640 RIP: 0010:lock_page_memcg+0xc7/0x170 mm/memcontrol.c:2157 Code: 00 00 e8 6c ae e9 ff 48 c7 c6 d3 07 83 a5 58 4c 89 f7 e8 6c ab e9 ff 48 85 db 74 06 e8 22 e1 f3 ff fb 41 8b 84 24 00 0b 00 00 <85> c0 7e a7 4d 8d b4 24 70 06 00 00 4c 89 f7 e8 85 b2 b0 00 48 89 RSP: 0000:ffff980881bc7b38 EFLAGS: 00000206 RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffffffffa6c1a6ed RDI: ffffffffa6b9ab37 RBP: ffffccff47891b80 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a5d403e9000 R13: ffffffffa58307d3 R14: ffff8a5d403e9688 R15: ffff8a5d47067128 page_remove_rmap+0xc/0xb0 mm/rmap.c:1348 zap_pte_range mm/memory.c:1276 [inline] zap_pmd_range mm/memory.c:1380 [inline] zap_pud_range mm/memory.c:1409 [inline] zap_p4d_range mm/memory.c:1430 [inline] unmap_page_range+0x612/0xb00 mm/memory.c:1451 unmap_vmas+0xbe/0x150 mm/memory.c:1528 exit_mmap+0x8f/0x1d0 mm/mmap.c:3218 __mmput kernel/fork.c:1082 [inline] mmput+0x3c/0xe0 kernel/fork.c:1103 exit_mm kernel/exit.c:501 [inline] do_exit+0x369/0xb60 kernel/exit.c:812 do_group_exit+0x34/0xb0 kernel/exit.c:922 get_signal+0x170/0xc80 kernel/signal.c:2775 arch_do_signal_or_restart+0xea/0x740 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x10f/0x190 kernel/entry/common.c:208 irqentry_exit_to_user_mode+0x5/0x30 kernel/entry/common.c:314 asm_sysvec_reschedule_ipi+0x12/0x20 arch/x86/include/asm/idtentry.h:637 RIP: 0033:0x5598fc00409b Code: Unable to access opcode bytes at RIP 0x5598fc004071. RSP: 002b:00007ffe94151cf0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f6db39331b0 RDX: 0000000000000004 RSI: 00007ffe94151cfc RDI: 0000000000000001 RBP: 00007ffe94151da0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000059 R11: 0000000000000246 R12: 00005598fc0010d0 R13: 00007ffe94151ea0 R14: 0000000000000000 R15: 0000000000000000 irq event stamp: 4150 hardirqs last enabled at (4149): [<ffffffffa583080e>] lock_page_memcg+0xbe/0x170 mm/memcontrol.c:2154 hardirqs last disabled at (4150): [<ffffffffa633219b>] sysvec_call_function_single+0xb/0xc0 arch/x86/kernel/smp.c:243 softirqs last enabled at (3846): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 softirqs last disabled at (3839): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 ---[ end trace 74c79be9940ec2d2 ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1369 at kernel/events/core.c:2253 event_sched_out+0x4c/0x200 kernel/events/core.c:2253 Modules linked in: CPU: 3 PID: 1369 Comm: exe Tainted: G W 5.12.0-rc2+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:event_sched_out+0x4c/0x200 kernel/events/core.c:2253 Code: 92 01 85 c9 75 12 83 bb a8 00 00 00 01 74 26 5b 5d 41 5c 41 5d 41 5e c3 48 8d 7d 20 be ff ff ff ff e8 18 cd b9 00 85 c0 75 dc <0f> 0b 83 bb a8 00 00 00 01 75 da 48 8b 53 28 48 8b 4b 20 48 8d 43 RSP: 0000:ffff980880158f18 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8a5d4e6db800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffffffa6b4ccef RDI: ffffffffa6b9ab37 RBP: ffff8a5d46534a00 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a606fcf0c00 R13: ffff8a606fcf0c00 R14: ffff8a5d46534a00 R15: ffff8a606fcf0c08 FS: 0000000000000000(0000) GS:ffff8a606fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd2b331e225 CR3: 00000001e0e22006 CR4: 0000000000770ee0 DR0: 0000564596006388 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: <IRQ> __perf_remove_from_context+0x29/0xd0 kernel/events/core.c:2333 event_function+0xab/0x100 kernel/events/core.c:252 remote_function kernel/events/core.c:91 [inline] remote_function+0x44/0x50 kernel/events/core.c:71 flush_smp_call_function_queue+0x13a/0x1d0 kernel/smp.c:395 __sysvec_call_function_single+0x3e/0x1c0 arch/x86/kernel/smp.c:248 sysvec_call_function_single+0x89/0xc0 arch/x86/kernel/smp.c:243 </IRQ> asm_sysvec_call_function_single+0x12/0x20 arch/x86/include/asm/idtentry.h:640 RIP: 0010:lock_page_memcg+0xc7/0x170 mm/memcontrol.c:2157 Code: 00 00 e8 6c ae e9 ff 48 c7 c6 d3 07 83 a5 58 4c 89 f7 e8 6c ab e9 ff 48 85 db 74 06 e8 22 e1 f3 ff fb 41 8b 84 24 00 0b 00 00 <85> c0 7e a7 4d 8d b4 24 70 06 00 00 4c 89 f7 e8 85 b2 b0 00 48 89 RSP: 0000:ffff980881bc7b38 EFLAGS: 00000206 RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffffffffa6c1a6ed RDI: ffffffffa6b9ab37 RBP: ffffccff47891b80 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a5d403e9000 R13: ffffffffa58307d3 R14: ffff8a5d403e9688 R15: ffff8a5d47067128 page_remove_rmap+0xc/0xb0 mm/rmap.c:1348 zap_pte_range mm/memory.c:1276 [inline] zap_pmd_range mm/memory.c:1380 [inline] zap_pud_range mm/memory.c:1409 [inline] zap_p4d_range mm/memory.c:1430 [inline] unmap_page_range+0x612/0xb00 mm/memory.c:1451 unmap_vmas+0xbe/0x150 mm/memory.c:1528 exit_mmap+0x8f/0x1d0 mm/mmap.c:3218 __mmput kernel/fork.c:1082 [inline] mmput+0x3c/0xe0 kernel/fork.c:1103 exit_mm kernel/exit.c:501 [inline] do_exit+0x369/0xb60 kernel/exit.c:812 do_group_exit+0x34/0xb0 kernel/exit.c:922 get_signal+0x170/0xc80 kernel/signal.c:2775 arch_do_signal_or_restart+0xea/0x740 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x10f/0x190 kernel/entry/common.c:208 irqentry_exit_to_user_mode+0x5/0x30 kernel/entry/common.c:314 asm_sysvec_reschedule_ipi+0x12/0x20 arch/x86/include/asm/idtentry.h:637 RIP: 0033:0x5598fc00409b Code: Unable to access opcode bytes at RIP 0x5598fc004071. RSP: 002b:00007ffe94151cf0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f6db39331b0 RDX: 0000000000000004 RSI: 00007ffe94151cfc RDI: 0000000000000001 RBP: 00007ffe94151da0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000059 R11: 0000000000000246 R12: 00005598fc0010d0 R13: 00007ffe94151ea0 R14: 0000000000000000 R15: 0000000000000000 irq event stamp: 4150 hardirqs last enabled at (4149): [<ffffffffa583080e>] lock_page_memcg+0xbe/0x170 mm/memcontrol.c:2154 hardirqs last disabled at (4150): [<ffffffffa633219b>] sysvec_call_function_single+0xb/0xc0 arch/x86/kernel/smp.c:243 softirqs last enabled at (3846): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 softirqs last disabled at (3839): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 ---[ end trace 74c79be9940ec2d3 ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1369 at kernel/events/core.c:2152 perf_group_detach+0xe1/0x300 kernel/events/core.c:2152 Modules linked in: CPU: 3 PID: 1369 Comm: exe Tainted: G W 5.12.0-rc2+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:perf_group_detach+0xe1/0x300 kernel/events/core.c:2152 Code: 41 5c 41 5d 41 5e 41 5f e9 bc 54 ff ff 48 8b 87 20 02 00 00 be ff ff ff ff 48 8d 78 20 e8 27 88 b9 00 85 c0 0f 85 41 ff ff ff <0f> 0b e9 3a ff ff ff 48 8b 45 10 4c 8b 28 48 8d 58 f0 49 83 ed 10 RSP: 0000:ffff980880158f10 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8a5d4e6db800 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffffffa6b4ccef RDI: ffffffffa6b9ab37 RBP: ffff8a5d4e6db800 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a606fcf0c00 R13: 0000000000000001 R14: ffff8a5d46534a00 R15: ffff8a606fcf0c08 FS: 0000000000000000(0000) GS:ffff8a606fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd2b331e225 CR3: 00000001e0e22006 CR4: 0000000000770ee0 DR0: 0000564596006388 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: <IRQ> __perf_remove_from_context+0x91/0xd0 kernel/events/core.c:2335 event_function+0xab/0x100 kernel/events/core.c:252 remote_function kernel/events/core.c:91 [inline] remote_function+0x44/0x50 kernel/events/core.c:71 flush_smp_call_function_queue+0x13a/0x1d0 kernel/smp.c:395 __sysvec_call_function_single+0x3e/0x1c0 arch/x86/kernel/smp.c:248 sysvec_call_function_single+0x89/0xc0 arch/x86/kernel/smp.c:243 </IRQ> asm_sysvec_call_function_single+0x12/0x20 arch/x86/include/asm/idtentry.h:640 RIP: 0010:lock_page_memcg+0xc7/0x170 mm/memcontrol.c:2157 Code: 00 00 e8 6c ae e9 ff 48 c7 c6 d3 07 83 a5 58 4c 89 f7 e8 6c ab e9 ff 48 85 db 74 06 e8 22 e1 f3 ff fb 41 8b 84 24 00 0b 00 00 <85> c0 7e a7 4d 8d b4 24 70 06 00 00 4c 89 f7 e8 85 b2 b0 00 48 89 RSP: 0000:ffff980881bc7b38 EFLAGS: 00000206 RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffffffffa6c1a6ed RDI: ffffffffa6b9ab37 RBP: ffffccff47891b80 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a5d403e9000 R13: ffffffffa58307d3 R14: ffff8a5d403e9688 R15: ffff8a5d47067128 page_remove_rmap+0xc/0xb0 mm/rmap.c:1348 zap_pte_range mm/memory.c:1276 [inline] zap_pmd_range mm/memory.c:1380 [inline] zap_pud_range mm/memory.c:1409 [inline] zap_p4d_range mm/memory.c:1430 [inline] unmap_page_range+0x612/0xb00 mm/memory.c:1451 unmap_vmas+0xbe/0x150 mm/memory.c:1528 exit_mmap+0x8f/0x1d0 mm/mmap.c:3218 __mmput kernel/fork.c:1082 [inline] mmput+0x3c/0xe0 kernel/fork.c:1103 exit_mm kernel/exit.c:501 [inline] do_exit+0x369/0xb60 kernel/exit.c:812 do_group_exit+0x34/0xb0 kernel/exit.c:922 get_signal+0x170/0xc80 kernel/signal.c:2775 arch_do_signal_or_restart+0xea/0x740 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x10f/0x190 kernel/entry/common.c:208 irqentry_exit_to_user_mode+0x5/0x30 kernel/entry/common.c:314 asm_sysvec_reschedule_ipi+0x12/0x20 arch/x86/include/asm/idtentry.h:637 RIP: 0033:0x5598fc00409b Code: Unable to access opcode bytes at RIP 0x5598fc004071. RSP: 002b:00007ffe94151cf0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f6db39331b0 RDX: 0000000000000004 RSI: 00007ffe94151cfc RDI: 0000000000000001 RBP: 00007ffe94151da0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000059 R11: 0000000000000246 R12: 00005598fc0010d0 R13: 00007ffe94151ea0 R14: 0000000000000000 R15: 0000000000000000 irq event stamp: 4150 hardirqs last enabled at (4149): [<ffffffffa583080e>] lock_page_memcg+0xbe/0x170 mm/memcontrol.c:2154 hardirqs last disabled at (4150): [<ffffffffa633219b>] sysvec_call_function_single+0xb/0xc0 arch/x86/kernel/smp.c:243 softirqs last enabled at (3846): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 softirqs last disabled at (3839): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 ---[ end trace 74c79be9940ec2d4 ]--- ------------[ cut here ]------------ WARNING: CPU: 3 PID: 1369 at kernel/events/core.c:1993 list_del_event+0xaf/0x110 kernel/events/core.c:1993 Modules linked in: CPU: 3 PID: 1369 Comm: exe Tainted: G W 5.12.0-rc2+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014 RIP: 0010:list_del_event+0xaf/0x110 kernel/events/core.c:1993 Code: 00 00 01 eb ba be ff ff ff ff 48 89 ef e8 b9 fe ff ff eb db 48 8d 7b 20 be ff ff ff ff e8 39 1d ba 00 85 c0 0f 85 72 ff ff ff <0f> 0b e9 6b ff ff ff 48 8d 83 e8 00 00 00 f6 85 08 01 00 00 04 48 RSP: 0000:ffff980880158f28 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8a5d46534a00 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffffffffa6b4ccef RDI: ffffffffa6b9ab37 RBP: ffff8a5d4e6db800 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a606fcf0c00 R13: 0000000000000001 R14: ffff8a5d46534a00 R15: ffff8a606fcf0c08 FS: 0000000000000000(0000) GS:ffff8a606fcc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fd2b331e225 CR3: 00000001e0e22006 CR4: 0000000000770ee0 DR0: 0000564596006388 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 PKRU: 55555554 Call Trace: <IRQ> __perf_remove_from_context+0x3a/0xd0 kernel/events/core.c:2336 event_function+0xab/0x100 kernel/events/core.c:252 remote_function kernel/events/core.c:91 [inline] remote_function+0x44/0x50 kernel/events/core.c:71 flush_smp_call_function_queue+0x13a/0x1d0 kernel/smp.c:395 __sysvec_call_function_single+0x3e/0x1c0 arch/x86/kernel/smp.c:248 sysvec_call_function_single+0x89/0xc0 arch/x86/kernel/smp.c:243 </IRQ> asm_sysvec_call_function_single+0x12/0x20 arch/x86/include/asm/idtentry.h:640 RIP: 0010:lock_page_memcg+0xc7/0x170 mm/memcontrol.c:2157 Code: 00 00 e8 6c ae e9 ff 48 c7 c6 d3 07 83 a5 58 4c 89 f7 e8 6c ab e9 ff 48 85 db 74 06 e8 22 e1 f3 ff fb 41 8b 84 24 00 0b 00 00 <85> c0 7e a7 4d 8d b4 24 70 06 00 00 4c 89 f7 e8 85 b2 b0 00 48 89 RSP: 0000:ffff980881bc7b38 EFLAGS: 00000206 RAX: 0000000000000000 RBX: 0000000000000200 RCX: 0000000000000006 RDX: 0000000000000000 RSI: ffffffffa6c1a6ed RDI: ffffffffa6b9ab37 RBP: ffffccff47891b80 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000000 R11: ffff8a5d4d2accb8 R12: ffff8a5d403e9000 R13: ffffffffa58307d3 R14: ffff8a5d403e9688 R15: ffff8a5d47067128 page_remove_rmap+0xc/0xb0 mm/rmap.c:1348 zap_pte_range mm/memory.c:1276 [inline] zap_pmd_range mm/memory.c:1380 [inline] zap_pud_range mm/memory.c:1409 [inline] zap_p4d_range mm/memory.c:1430 [inline] unmap_page_range+0x612/0xb00 mm/memory.c:1451 unmap_vmas+0xbe/0x150 mm/memory.c:1528 exit_mmap+0x8f/0x1d0 mm/mmap.c:3218 __mmput kernel/fork.c:1082 [inline] mmput+0x3c/0xe0 kernel/fork.c:1103 exit_mm kernel/exit.c:501 [inline] do_exit+0x369/0xb60 kernel/exit.c:812 do_group_exit+0x34/0xb0 kernel/exit.c:922 get_signal+0x170/0xc80 kernel/signal.c:2775 arch_do_signal_or_restart+0xea/0x740 arch/x86/kernel/signal.c:811 handle_signal_work kernel/entry/common.c:147 [inline] exit_to_user_mode_loop kernel/entry/common.c:171 [inline] exit_to_user_mode_prepare+0x10f/0x190 kernel/entry/common.c:208 irqentry_exit_to_user_mode+0x5/0x30 kernel/entry/common.c:314 asm_sysvec_reschedule_ipi+0x12/0x20 arch/x86/include/asm/idtentry.h:637 RIP: 0033:0x5598fc00409b Code: Unable to access opcode bytes at RIP 0x5598fc004071. RSP: 002b:00007ffe94151cf0 EFLAGS: 00000246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00007f6db39331b0 RDX: 0000000000000004 RSI: 00007ffe94151cfc RDI: 0000000000000001 RBP: 00007ffe94151da0 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000059 R11: 0000000000000246 R12: 00005598fc0010d0 R13: 00007ffe94151ea0 R14: 0000000000000000 R15: 0000000000000000 irq event stamp: 4150 hardirqs last enabled at (4149): [<ffffffffa583080e>] lock_page_memcg+0xbe/0x170 mm/memcontrol.c:2154 hardirqs last disabled at (4150): [<ffffffffa633219b>] sysvec_call_function_single+0xb/0xc0 arch/x86/kernel/smp.c:243 softirqs last enabled at (3846): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last enabled at (3846): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 softirqs last disabled at (3839): [<ffffffffa566f621>] invoke_softirq kernel/softirq.c:221 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] __irq_exit_rcu kernel/softirq.c:422 [inline] softirqs last disabled at (3839): [<ffffffffa566f621>] irq_exit_rcu+0xe1/0x120 kernel/softirq.c:434 ---[ end trace 74c79be9940ec2d5 ]---
On Wed, Mar 10, 2021 at 11:41:34AM +0100, Marco Elver wrote: > Adds bit perf_event_attr::remove_on_exec, to support removing an event > from a task on exec. > > This option supports the case where an event is supposed to be > process-wide only, and should not propagate beyond exec, to limit > monitoring to the original process image only. > > Signed-off-by: Marco Elver <elver@google.com> > +/* > + * Removes all events from the current task that have been marked > + * remove-on-exec, and feeds their values back to parent events. > + */ > +static void perf_event_remove_on_exec(void) > +{ > + int ctxn; > + > + for_each_task_context_nr(ctxn) { > + struct perf_event_context *ctx; > + struct perf_event *event, *next; > + > + ctx = perf_pin_task_context(current, ctxn); > + if (!ctx) > + continue; > + mutex_lock(&ctx->mutex); > + > + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { > + if (!event->attr.remove_on_exec) > + continue; > + > + if (!is_kernel_event(event)) > + perf_remove_from_owner(event); > + perf_remove_from_context(event, DETACH_GROUP); There's a comment on this in perf_event_exit_event(), if this task happens to have the original event, then DETACH_GROUP will destroy the grouping. I think this wants to be: perf_remove_from_text(event, child_event->parent ? DETACH_GROUP : 0); or something. > + /* > + * Remove the event and feed back its values to the > + * parent event. > + */ > + perf_event_exit_event(event, ctx, current); Oooh, and here we call it... but it will do list_del_even() / perf_group_detach() *again*. So the problem is that perf_event_exit_task_context() doesn't use remove_from_context(), but instead does task_ctx_sched_out() and then relies on the events not being active. Whereas above you *DO* use remote_from_context(), but then perf_event_exit_event() will try and remove it more. > + } > + mutex_unlock(&ctx->mutex); perf_unpin_context(ctx); > + put_ctx(ctx); > + } > +}
On Tue, Mar 16, 2021 at 05:22PM +0100, Peter Zijlstra wrote: > On Wed, Mar 10, 2021 at 11:41:34AM +0100, Marco Elver wrote: > > Adds bit perf_event_attr::remove_on_exec, to support removing an event > > from a task on exec. > > > > This option supports the case where an event is supposed to be > > process-wide only, and should not propagate beyond exec, to limit > > monitoring to the original process image only. > > > > Signed-off-by: Marco Elver <elver@google.com> > > > +/* > > + * Removes all events from the current task that have been marked > > + * remove-on-exec, and feeds their values back to parent events. > > + */ > > +static void perf_event_remove_on_exec(void) > > +{ > > + int ctxn; > > + > > + for_each_task_context_nr(ctxn) { > > + struct perf_event_context *ctx; > > + struct perf_event *event, *next; > > + > > + ctx = perf_pin_task_context(current, ctxn); > > + if (!ctx) > > + continue; > > + mutex_lock(&ctx->mutex); > > + > > + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { > > + if (!event->attr.remove_on_exec) > > + continue; > > + > > + if (!is_kernel_event(event)) > > + perf_remove_from_owner(event); > > + perf_remove_from_context(event, DETACH_GROUP); > > There's a comment on this in perf_event_exit_event(), if this task > happens to have the original event, then DETACH_GROUP will destroy the > grouping. > > I think this wants to be: > > perf_remove_from_text(event, > child_event->parent ? DETACH_GROUP : 0); > > or something. > > > + /* > > + * Remove the event and feed back its values to the > > + * parent event. > > + */ > > + perf_event_exit_event(event, ctx, current); > > Oooh, and here we call it... but it will do list_del_even() / > perf_group_detach() *again*. > > So the problem is that perf_event_exit_task_context() doesn't use > remove_from_context(), but instead does task_ctx_sched_out() and then > relies on the events not being active. > > Whereas above you *DO* use remote_from_context(), but then > perf_event_exit_event() will try and remove it more. AFAIK, we want to deallocate the events and not just remove them, so doing what perf_event_exit_event() is the right way forward? Or did you have something else in mind? I'm still trying to make sense of the zoo of synchronisation mechanisms at play here. No matter what I try, it seems I get stuck on the fact that I can't cleanly "pause" the context to remove the events (warnings in event_function()). This is what I've been playing with to understand: diff --git a/kernel/events/core.c b/kernel/events/core.c index 450ea9415ed7..c585cef284a0 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4195,6 +4195,88 @@ static void perf_event_enable_on_exec(int ctxn) put_ctx(clone_ctx); } +static void perf_remove_from_owner(struct perf_event *event); +static void perf_event_exit_event(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child); + +/* + * Removes all events from the current task that have been marked + * remove-on-exec, and feeds their values back to parent events. + */ +static void perf_event_remove_on_exec(void) +{ + struct perf_event *event, *next; + int ctxn; + + /***************** BROKEN BROKEN BROKEN *****************/ + + for_each_task_context_nr(ctxn) { + struct perf_event_context *ctx; + bool removed = false; + + ctx = perf_pin_task_context(current, ctxn); + if (!ctx) + continue; + mutex_lock(&ctx->mutex); + + raw_spin_lock_irq(&ctx->lock); + /* + * WIP: Ok, we will unschedule the context, _and_ tell everyone + * still trying to use that it's dead... even though it isn't. + * + * This can't be right... + */ + task_ctx_sched_out(__get_cpu_context(ctx), ctx, EVENT_ALL); + RCU_INIT_POINTER(current->perf_event_ctxp[ctxn], NULL); + WRITE_ONCE(ctx->task, TASK_TOMBSTONE); This code here is obviously bogus, because it removes the context from the task: we might still need it since this task is not dead yet. What's the right way to pause the context to remove the events from it? + raw_spin_unlock_irq(&ctx->lock); + + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { + if (!event->attr.remove_on_exec) + continue; + removed = true; + + if (!is_kernel_event(event)) + perf_remove_from_owner(event); + + /* + * WIP: Want to free the event and feed back its values + * to the parent (if any) ... + */ + perf_event_exit_event(event, ctx, current); + } + ... need to schedule context back in here? + + mutex_unlock(&ctx->mutex); + perf_unpin_context(ctx); + put_ctx(ctx); + } +} + struct perf_read_data { struct perf_event *event; bool group; @@ -7553,6 +7635,8 @@ void perf_event_exec(void) true); } rcu_read_unlock(); + + perf_event_remove_on_exec(); } Thanks, -- Marco
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 813efb65fea8..8c5b9f5ad63f 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -390,7 +390,8 @@ struct perf_event_attr { text_poke : 1, /* include text poke events */ build_id : 1, /* use build id in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ - __reserved_1 : 28; + remove_on_exec : 1, /* event is removed from task on exec */ + __reserved_1 : 27; union { __u32 wakeup_events; /* wakeup every n events */ diff --git a/kernel/events/core.c b/kernel/events/core.c index a8382e6c907c..bc9e6e35e414 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4195,6 +4195,46 @@ static void perf_event_enable_on_exec(int ctxn) put_ctx(clone_ctx); } +static void perf_remove_from_owner(struct perf_event *event); +static void perf_event_exit_event(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child); + +/* + * Removes all events from the current task that have been marked + * remove-on-exec, and feeds their values back to parent events. + */ +static void perf_event_remove_on_exec(void) +{ + int ctxn; + + for_each_task_context_nr(ctxn) { + struct perf_event_context *ctx; + struct perf_event *event, *next; + + ctx = perf_pin_task_context(current, ctxn); + if (!ctx) + continue; + mutex_lock(&ctx->mutex); + + list_for_each_entry_safe(event, next, &ctx->event_list, event_entry) { + if (!event->attr.remove_on_exec) + continue; + + if (!is_kernel_event(event)) + perf_remove_from_owner(event); + perf_remove_from_context(event, DETACH_GROUP); + /* + * Remove the event and feed back its values to the + * parent event. + */ + perf_event_exit_event(event, ctx, current); + } + mutex_unlock(&ctx->mutex); + put_ctx(ctx); + } +} + struct perf_read_data { struct perf_event *event; bool group; @@ -7519,6 +7559,8 @@ void perf_event_exec(void) true); } rcu_read_unlock(); + + perf_event_remove_on_exec(); } struct remote_output { @@ -11600,6 +11642,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr, if (!attr->inherit && attr->inherit_thread) return -EINVAL; + if (attr->remove_on_exec && attr->enable_on_exec) + return -EINVAL; + out: return ret;
Adds bit perf_event_attr::remove_on_exec, to support removing an event from a task on exec. This option supports the case where an event is supposed to be process-wide only, and should not propagate beyond exec, to limit monitoring to the original process image only. Signed-off-by: Marco Elver <elver@google.com> --- v2: * Add patch to series. --- include/uapi/linux/perf_event.h | 3 ++- kernel/events/core.c | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-)