Message ID | 20230808192713.329414-1-helgaas@kernel.org (mailing list archive) |
---|---|
State | Handled Elsewhere, archived |
Headers | show |
Series | PCI: acpiphp: Log more slot and notification details | expand |
On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <helgaas@kernel.org> wrote: > > From: Bjorn Helgaas <bhelgaas@google.com> > > When registering an acpiphp slot, log the slot name in the same style as > pciehp and include the PCI bus/device and whether a device is present or > the slot is empty. > > When handling an ACPI notification, log the PCI bus/device and notification > type. > > Sample dmesg log diff: > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > - acpiphp: Slot [3] registered > - acpiphp: Slot [4] registered > PCI host bridge to bus 0000:00 > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > <ACPI Device Check notification> > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > + acpiphp: pci 0000:00:03 Slot(3) registered (enabled) > + acpiphp: pci 0000:00:04 Slot(4) registered (empty) > PCI host bridge to bus 0000:00 > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > <ACPI Device Check notification> > + acpiphp: pci 0000:00:04 Slot(4) Device Check > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> > --- > drivers/pci/hotplug/acpiphp_core.c | 4 ---- > drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++-- > 2 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c > index c02257f4b61c..19d47607d009 100644 > --- a/drivers/pci/hotplug/acpiphp_core.c > +++ b/drivers/pci/hotplug/acpiphp_core.c > @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot, > goto error_slot; > } > > - pr_info("Slot [%s] registered\n", slot_name(slot)); > - > return 0; > error_slot: > kfree(slot); > @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot) > { > struct slot *slot = acpiphp_slot->slot; > > - pr_info("Slot [%s] unregistered\n", slot_name(slot)); > - > pci_hp_deregister(&slot->hotplug_slot); > kfree(slot); > } > diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c > index 328d1e416014..eeca2753a5c7 100644 > --- a/drivers/pci/hotplug/acpiphp_glue.c > +++ b/drivers/pci/hotplug/acpiphp_glue.c > @@ -25,7 +25,7 @@ > * bus. It loses the refcount when the driver unloads. > */ > > -#define pr_fmt(fmt) "acpiphp_glue: " fmt > +#define pr_fmt(fmt) "acpiphp: " fmt > > #include <linux/module.h> > > @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data, > &val, 60*1000)) > slot->flags |= SLOT_ENABLED; > > + if (slot->slot) > + pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n", > + pci_domain_nr(slot->bus), slot->bus->number, > + slot->device, slot_name(slot->slot), > + slot->flags & SLOT_ENABLED ? "enabled" : "empty"); > + > return AE_OK; > } > > @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) > acpi_unlock_hp_context(); > } > slot->flags |= SLOT_IS_GOING_AWAY; > - if (slot->slot) > + if (slot->slot) { > + pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n", > + pci_domain_nr(slot->bus), slot->bus->number, > + slot->device, slot_name(slot->slot)); > + > acpiphp_unregister_hotplug_slot(slot); > + } > } > > mutex_lock(&bridge_mutex); > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context) > > pci_lock_rescan_remove(); > > + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n", > + pci_domain_nr(slot->bus), slot->bus->number, > + slot->device, slot_name(slot->slot), > + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" : > + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" : > + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" : > + "Notification"); > + pr_debug() perhaps? On systems that don't have any hotplug problems these messages will just be filling the kernel log unnecessarily. > switch (type) { > case ACPI_NOTIFY_BUS_CHECK: > /* bus re-enumerate */ > --
On Tue, Aug 08, 2023 at 09:39:22PM +0200, Rafael J. Wysocki wrote: > On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <helgaas@kernel.org> wrote: > > From: Bjorn Helgaas <bhelgaas@google.com> > > > > When registering an acpiphp slot, log the slot name in the same style as > > pciehp and include the PCI bus/device and whether a device is present or > > the slot is empty. > > > > When handling an ACPI notification, log the PCI bus/device and notification > > type. > > > > Sample dmesg log diff: > > > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > > - acpiphp: Slot [3] registered > > - acpiphp: Slot [4] registered > > PCI host bridge to bus 0000:00 > > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > > <ACPI Device Check notification> > > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > > + acpiphp: pci 0000:00:03 Slot(3) registered (enabled) > > + acpiphp: pci 0000:00:04 Slot(4) registered (empty) > > PCI host bridge to bus 0000:00 > > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > > <ACPI Device Check notification> > > + acpiphp: pci 0000:00:04 Slot(4) Device Check > > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > ... > > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context) > > > > pci_lock_rescan_remove(); > > > > + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n", > > + pci_domain_nr(slot->bus), slot->bus->number, > > + slot->device, slot_name(slot->slot), > > + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" : > > + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" : > > + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" : > > + "Notification"); > > pr_debug() perhaps? > > On systems that don't have any hotplug problems these messages will > just be filling the kernel log unnecessarily. If these notifications are really common, pr_debug() sounds like the right thing. I assumed that they would not be common, e.g., they would happen for user-time things like dock/undock, plug/unplug, suspend/resume, etc. In pciehp, we use _info for attention button presses, presence detect changes, link up/down, and I assumed the ACPI notify events would roughly correspond to those. No? Bjorn
On Tue, Aug 8, 2023 at 10:58 PM Bjorn Helgaas <helgaas@kernel.org> wrote: > > On Tue, Aug 08, 2023 at 09:39:22PM +0200, Rafael J. Wysocki wrote: > > On Tue, Aug 8, 2023 at 9:27 PM Bjorn Helgaas <helgaas@kernel.org> wrote: > > > From: Bjorn Helgaas <bhelgaas@google.com> > > > > > > When registering an acpiphp slot, log the slot name in the same style as > > > pciehp and include the PCI bus/device and whether a device is present or > > > the slot is empty. > > > > > > When handling an ACPI notification, log the PCI bus/device and notification > > > type. > > > > > > Sample dmesg log diff: > > > > > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > > > - acpiphp: Slot [3] registered > > > - acpiphp: Slot [4] registered > > > PCI host bridge to bus 0000:00 > > > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > > > <ACPI Device Check notification> > > > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > > > > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > > > + acpiphp: pci 0000:00:03 Slot(3) registered (enabled) > > > + acpiphp: pci 0000:00:04 Slot(4) registered (empty) > > > PCI host bridge to bus 0000:00 > > > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > > > <ACPI Device Check notification> > > > + acpiphp: pci 0000:00:04 Slot(4) Device Check > > > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > > ... > > > > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context) > > > > > > pci_lock_rescan_remove(); > > > > > > + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n", > > > + pci_domain_nr(slot->bus), slot->bus->number, > > > + slot->device, slot_name(slot->slot), > > > + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" : > > > + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" : > > > + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" : > > > + "Notification"); > > > > pr_debug() perhaps? > > > > On systems that don't have any hotplug problems these messages will > > just be filling the kernel log unnecessarily. > > If these notifications are really common, pr_debug() sounds like the > right thing. I assumed that they would not be common, e.g., they > would happen for user-time things like dock/undock, plug/unplug, > suspend/resume, etc. > > In pciehp, we use _info for attention button presses, presence detect > changes, link up/down, and I assumed the ACPI notify events would > roughly correspond to those. No? Depending on how often the system gets suspended and resumed, they may end up in the log quite often and if there are no problems related to them, they are just noise. IMHO in that case the users are taught to ignore stuff that lands in the log which is not fantastic.
On Tue, 8 Aug 2023 14:27:13 -0500 Bjorn Helgaas <helgaas@kernel.org> wrote: > From: Bjorn Helgaas <bhelgaas@google.com> > > When registering an acpiphp slot, log the slot name in the same style as > pciehp and include the PCI bus/device and whether a device is present or > the slot is empty. > > When handling an ACPI notification, log the PCI bus/device and notification > type. > > Sample dmesg log diff: > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > - acpiphp: Slot [3] registered > - acpiphp: Slot [4] registered > PCI host bridge to bus 0000:00 > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > <ACPI Device Check notification> Having ACPI node name/path here that received notification would be helpfull > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-ff]) > + acpiphp: pci 0000:00:03 Slot(3) registered (enabled) > + acpiphp: pci 0000:00:04 Slot(4) registered (empty) > PCI host bridge to bus 0000:00 > pci 0000:00:03.0: [8086:100e] type 00 class 0x020000 > <ACPI Device Check notification> > + acpiphp: pci 0000:00:04 Slot(4) Device Check > pci 0000:00:04.0: [8086:100e] type 00 class 0x020000 > > Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> > --- > drivers/pci/hotplug/acpiphp_core.c | 4 ---- > drivers/pci/hotplug/acpiphp_glue.c | 23 +++++++++++++++++++++-- > 2 files changed, 21 insertions(+), 6 deletions(-) > > diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c > index c02257f4b61c..19d47607d009 100644 > --- a/drivers/pci/hotplug/acpiphp_core.c > +++ b/drivers/pci/hotplug/acpiphp_core.c > @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot, > goto error_slot; > } > > - pr_info("Slot [%s] registered\n", slot_name(slot)); > - > return 0; > error_slot: > kfree(slot); > @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot) > { > struct slot *slot = acpiphp_slot->slot; > > - pr_info("Slot [%s] unregistered\n", slot_name(slot)); > - > pci_hp_deregister(&slot->hotplug_slot); > kfree(slot); > } > diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c > index 328d1e416014..eeca2753a5c7 100644 > --- a/drivers/pci/hotplug/acpiphp_glue.c > +++ b/drivers/pci/hotplug/acpiphp_glue.c > @@ -25,7 +25,7 @@ > * bus. It loses the refcount when the driver unloads. > */ > > -#define pr_fmt(fmt) "acpiphp_glue: " fmt > +#define pr_fmt(fmt) "acpiphp: " fmt > > #include <linux/module.h> > > @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data, > &val, 60*1000)) > slot->flags |= SLOT_ENABLED; > > + if (slot->slot) > + pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n", > + pci_domain_nr(slot->bus), slot->bus->number, > + slot->device, slot_name(slot->slot), > + slot->flags & SLOT_ENABLED ? "enabled" : "empty"); > + > return AE_OK; > } > > @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) > acpi_unlock_hp_context(); > } > slot->flags |= SLOT_IS_GOING_AWAY; > - if (slot->slot) > + if (slot->slot) { > + pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n", > + pci_domain_nr(slot->bus), slot->bus->number, > + slot->device, slot_name(slot->slot)); > + > acpiphp_unregister_hotplug_slot(slot); > + } > } > > mutex_lock(&bridge_mutex); > @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context) > > pci_lock_rescan_remove(); > > + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n", > + pci_domain_nr(slot->bus), slot->bus->number, > + slot->device, slot_name(slot->slot), I had similar issue with logging patches that I've asked Woody to run. it crashes here with buscheck on non existing device Call Trace: <TASK> ? __die_body+0x19/0x60 ? page_fault_oops+0x158/0x430 ? fixup_exception+0x21/0x330 ? exc_page_fault+0x6b/0x150 ? asm_exc_page_fault+0x26/0x30 ? acpiphp_hotplug_notify+0x13d/0x2f0 ? __pfx_acpiphp_hotplug_notify+0x10/0x10 acpi_device_hotplug+0xc2/0x4f0 acpi_hotplug_work_fn+0x19/0x30 process_one_work+0x1f8/0x3e0 worker_thread+0x29/0x3b0 ? __pfx_worker_thread+0x10/0x10 kthread+0xf2/0x120 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x2f/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0 (gdb) p *slot $2 = {node = {next = 0xffff8881003287c0, prev = 0xffff888100887730}, bus = 0xffff8881003de800, funcs = {next = 0xffff8881008877b0, prev = 0xffff888100887a50}, slot = 0x0 <fixed_percpu_data>, ^^^ likely culprit device = 0x0, flags = 0x1} reproducer hack: https://gitlab.com/imammedo/qemu/-/tree/acpiphp_buscheck_on_missing_device?ref_type=heads ./qemu-system-x86_64 -M q35 -cpu host -smp 4 -enable-kvm -m 6G -kernel ~/builds/linux-2.6/arch/x86/boot/bzImage -append 'nokaslr root=/dev/sda1 console=ttyS0' -snapshot rhel9.img -device pcie-root-port,id=rp1,bus=pcie.0,chassis=0,addr=8 -monitor stdio -serial file:/tmp/s -s then once booted to trigger buscheck issue at monitor prompt: (qemu) device_add e1000e,bus=rp1 > + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" : > + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" : > + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" : > + "Notification"); > + > switch (type) { > case ACPI_NOTIFY_BUS_CHECK: > /* bus re-enumerate */
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c index c02257f4b61c..19d47607d009 100644 --- a/drivers/pci/hotplug/acpiphp_core.c +++ b/drivers/pci/hotplug/acpiphp_core.c @@ -282,8 +282,6 @@ int acpiphp_register_hotplug_slot(struct acpiphp_slot *acpiphp_slot, goto error_slot; } - pr_info("Slot [%s] registered\n", slot_name(slot)); - return 0; error_slot: kfree(slot); @@ -296,8 +294,6 @@ void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *acpiphp_slot) { struct slot *slot = acpiphp_slot->slot; - pr_info("Slot [%s] unregistered\n", slot_name(slot)); - pci_hp_deregister(&slot->hotplug_slot); kfree(slot); } diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 328d1e416014..eeca2753a5c7 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -25,7 +25,7 @@ * bus. It loses the refcount when the driver unloads. */ -#define pr_fmt(fmt) "acpiphp_glue: " fmt +#define pr_fmt(fmt) "acpiphp: " fmt #include <linux/module.h> @@ -333,6 +333,12 @@ static acpi_status acpiphp_add_context(acpi_handle handle, u32 lvl, void *data, &val, 60*1000)) slot->flags |= SLOT_ENABLED; + if (slot->slot) + pr_info("pci %04x:%02x:%02x Slot(%s) registered (%s)\n", + pci_domain_nr(slot->bus), slot->bus->number, + slot->device, slot_name(slot->slot), + slot->flags & SLOT_ENABLED ? "enabled" : "empty"); + return AE_OK; } @@ -351,8 +357,13 @@ static void cleanup_bridge(struct acpiphp_bridge *bridge) acpi_unlock_hp_context(); } slot->flags |= SLOT_IS_GOING_AWAY; - if (slot->slot) + if (slot->slot) { + pr_info("pci %04x:%02x:%02x Slot(%s) unregistered\n", + pci_domain_nr(slot->bus), slot->bus->number, + slot->device, slot_name(slot->slot)); + acpiphp_unregister_hotplug_slot(slot); + } } mutex_lock(&bridge_mutex); @@ -793,6 +804,14 @@ static void hotplug_event(u32 type, struct acpiphp_context *context) pci_lock_rescan_remove(); + pr_info("pci %04x:%02x:%02x Slot(%s) %s\n", + pci_domain_nr(slot->bus), slot->bus->number, + slot->device, slot_name(slot->slot), + type == ACPI_NOTIFY_BUS_CHECK ? "Bus Check" : + type == ACPI_NOTIFY_DEVICE_CHECK ? "Device Check" : + type == ACPI_NOTIFY_EJECT_REQUEST ? "Eject Request" : + "Notification"); + switch (type) { case ACPI_NOTIFY_BUS_CHECK: /* bus re-enumerate */