diff mbox

[v4,1/2] x86, pci: Reset PCIe devices at boot time

Message ID 20121015065526.2832.90867.sendpatchset@indoh (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Takao Indoh Oct. 15, 2012, 7 a.m. UTC
This patch resets PCIe devices at boot time by hot reset when
"reset_devices" is specified.

Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
---
 arch/x86/include/asm/pci-direct.h |    1 
 arch/x86/kernel/setup.c           |    3 
 arch/x86/pci/early.c              |  344 ++++++++++++++++++++++++++++
 include/linux/pci.h               |    2 
 init/main.c                       |    4 
 5 files changed, 352 insertions(+), 2 deletions(-)


--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Khalid Aziz Oct. 15, 2012, 5:17 p.m. UTC | #1
On Mon, 2012-10-15 at 16:00 +0900, Takao Indoh wrote:
> This patch resets PCIe devices at boot time by hot reset when
> "reset_devices" is specified.
> 
> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
> ---
>  arch/x86/include/asm/pci-direct.h |    1 
>  arch/x86/kernel/setup.c           |    3 
>  arch/x86/pci/early.c              |  344 ++++++++++++++++++++++++++++
>  include/linux/pci.h               |    2 
>  init/main.c                       |    4 
>  5 files changed, 352 insertions(+), 2 deletions(-)
> 


Looks good.

Reviewed-by: Khalid Aziz <khalid@gonehiking.org>

--
Khalid

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Oct. 15, 2012, 6:36 p.m. UTC | #2
On Mon, Oct 15, 2012 at 12:00 AM, Takao Indoh
<indou.takao@jp.fujitsu.com> wrote:
> This patch resets PCIe devices at boot time by hot reset when
> "reset_devices" is specified.

how about pci devices that domain_nr is not zero ?

>
> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
> ---
>  arch/x86/include/asm/pci-direct.h |    1
>  arch/x86/kernel/setup.c           |    3
>  arch/x86/pci/early.c              |  344 ++++++++++++++++++++++++++++
>  include/linux/pci.h               |    2
>  init/main.c                       |    4
>  5 files changed, 352 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
> index b1e7a45..de30db2 100644
> --- a/arch/x86/include/asm/pci-direct.h
> +++ b/arch/x86/include/asm/pci-direct.h
> @@ -18,4 +18,5 @@ extern int early_pci_allowed(void);
>  extern unsigned int pci_early_dump_regs;
>  extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
>  extern void early_dump_pci_devices(void);
> +extern void early_reset_pcie_devices(void);
>  #endif /* _ASM_X86_PCI_DIRECT_H */
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index a2bb18e..73d3425 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -987,6 +987,9 @@ void __init setup_arch(char **cmdline_p)
>         generic_apic_probe();
>
>         early_quirks();
> +#ifdef CONFIG_PCI
> +       early_reset_pcie_devices();
> +#endif
>
>         /*
>          * Read APIC and some other early information from ACPI tables.
> diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
> index d1067d5..683b30f 100644
> --- a/arch/x86/pci/early.c
> +++ b/arch/x86/pci/early.c
> @@ -1,5 +1,6 @@
>  #include <linux/kernel.h>
>  #include <linux/pci.h>
> +#include <linux/bootmem.h>
>  #include <asm/pci-direct.h>
>  #include <asm/io.h>
>  #include <asm/pci_x86.h>
> @@ -109,3 +110,346 @@ void early_dump_pci_devices(void)
>                 }
>         }
>  }
> +
> +#define PCI_EXP_SAVE_REGS      7
> +#define pcie_cap_has_devctl(type, flags)       1
> +#define pcie_cap_has_lnkctl(type, flags)               \
> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
> +                (type == PCI_EXP_TYPE_ROOT_PORT ||     \
> +                 type == PCI_EXP_TYPE_ENDPOINT ||      \
> +                 type == PCI_EXP_TYPE_LEG_END))
> +#define pcie_cap_has_sltctl(type, flags)               \
> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
> +                ((type == PCI_EXP_TYPE_ROOT_PORT) ||   \
> +                 (type == PCI_EXP_TYPE_DOWNSTREAM &&   \
> +                  (flags & PCI_EXP_FLAGS_SLOT))))
> +#define pcie_cap_has_rtctl(type, flags)                        \
> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
> +                (type == PCI_EXP_TYPE_ROOT_PORT ||     \
> +                 type == PCI_EXP_TYPE_RC_EC))
> +
> +struct save_config {
> +       u32 pci[16];
> +       u16 pcie[PCI_EXP_SAVE_REGS];
> +};
> +
> +struct pcie_dev {
> +       int cap;   /* position of PCI Express capability */
> +       int flags; /* PCI_EXP_FLAGS */
> +       struct save_config save; /* saved configration register */
> +};
> +
> +struct pcie_port {
> +       struct list_head dev;
> +       u8 secondary;
> +       struct pcie_dev child[PCI_MAX_FUNCTIONS];
> +};
> +
> +static LIST_HEAD(device_list);
> +static void __init pci_udelay(int loops)
> +{
> +       while (loops--) {
> +               /* Approximately 1 us */
> +               native_io_delay();
> +       }
> +}
> +
> +/* Derived from drivers/pci/pci.c */
> +#define PCI_FIND_CAP_TTL       48
> +static int __init __pci_find_next_cap_ttl(u8 bus, u8 slot, u8 func,
> +                                         u8 pos, int cap, int *ttl)
> +{
> +       u8 id;
> +
> +       while ((*ttl)--) {
> +               pos = read_pci_config_byte(bus, slot, func, pos);
> +               if (pos < 0x40)
> +                       break;
> +               pos &= ~3;
> +               id = read_pci_config_byte(bus, slot, func,
> +                                       pos + PCI_CAP_LIST_ID);
> +               if (id == 0xff)
> +                       break;
> +               if (id == cap)
> +                       return pos;
> +               pos += PCI_CAP_LIST_NEXT;
> +       }
> +       return 0;
> +}
> +
> +static int __init __pci_find_next_cap(u8 bus, u8 slot, u8 func, u8 pos, int cap)
> +{
> +       int ttl = PCI_FIND_CAP_TTL;
> +
> +       return __pci_find_next_cap_ttl(bus, slot, func, pos, cap, &ttl);
> +}
> +
> +static int __init __pci_bus_find_cap_start(u8 bus, u8 slot, u8 func,
> +                                          u8 hdr_type)
> +{
> +       u16 status;
> +
> +       status = read_pci_config_16(bus, slot, func, PCI_STATUS);
> +       if (!(status & PCI_STATUS_CAP_LIST))
> +               return 0;
> +
> +       switch (hdr_type) {
> +       case PCI_HEADER_TYPE_NORMAL:
> +       case PCI_HEADER_TYPE_BRIDGE:
> +               return PCI_CAPABILITY_LIST;
> +       case PCI_HEADER_TYPE_CARDBUS:
> +               return PCI_CB_CAPABILITY_LIST;
> +       default:
> +               return 0;
> +       }
> +
> +       return 0;
> +}
> +
> +static int __init early_pci_find_capability(u8 bus, u8 slot, u8 func, int cap)
> +{
> +       int pos;
> +       u8 type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
> +
> +       pos = __pci_bus_find_cap_start(bus, slot, func, type & 0x7f);
> +       if (pos)
> +               pos = __pci_find_next_cap(bus, slot, func, pos, cap);
> +
> +       return pos;
> +}
> +
> +static void __init do_reset(u8 bus, u8 slot, u8 func)
> +{
> +       u16 ctrl;
> +
> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d reset\n", bus, slot, func);
> +
> +       /* Assert Secondary Bus Reset */
> +       ctrl = read_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL);
> +       ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
> +       write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
> +
> +       /*
> +        * PCIe spec requires software to ensure a minimum reset duration
> +        * (Trst == 1ms). We have here 5ms safety margin because pci_udelay is
> +        * not precise.
> +        */
> +       pci_udelay(5000);
> +
> +       /* De-assert Secondary Bus Reset */
> +       ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
> +       write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
> +}
> +
> +static void __init save_state(unsigned bus, unsigned slot, unsigned func,
> +               struct pcie_dev *dev)
> +{
> +       int i;
> +       int pcie, flags, pcie_type;
> +       struct save_config *save;
> +
> +       pcie = dev->cap;
> +       flags = dev->flags;
> +       pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
> +       save = &dev->save;
> +
> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d save state\n", bus, slot, func);
> +
> +       for (i = 0; i < 16; i++)
> +               save->pci[i] = read_pci_config(bus, slot, func, i * 4);
> +       i = 0;
> +       if (pcie_cap_has_devctl(pcie_type, flags))
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_DEVCTL);
> +       if (pcie_cap_has_lnkctl(pcie_type, flags))
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_LNKCTL);
> +       if (pcie_cap_has_sltctl(pcie_type, flags))
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_SLTCTL);
> +       if (pcie_cap_has_rtctl(pcie_type, flags))
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_RTCTL);
> +
> +       if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_DEVCTL2);
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_LNKCTL2);
> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
> +                                                     pcie + PCI_EXP_SLTCTL2);
> +       }
> +}
> +
> +static void __init restore_state(unsigned bus, unsigned slot, unsigned func,
> +               struct pcie_dev *dev)
> +{
> +       int i = 0;
> +       int pcie, flags, pcie_type;
> +       struct save_config *save;
> +
> +       pcie = dev->cap;
> +       flags = dev->flags;
> +       pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
> +       save = &dev->save;
> +
> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d restore state\n",
> +              bus, slot, func);
> +
> +       if (pcie_cap_has_devctl(pcie_type, flags))
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_DEVCTL, save->pcie[i++]);
> +       if (pcie_cap_has_lnkctl(pcie_type, flags))
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_LNKCTL, save->pcie[i++]);
> +       if (pcie_cap_has_sltctl(pcie_type, flags))
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_SLTCTL, save->pcie[i++]);
> +       if (pcie_cap_has_rtctl(pcie_type, flags))
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_RTCTL, save->pcie[i++]);
> +
> +       if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_DEVCTL2, save->pcie[i++]);
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_LNKCTL2, save->pcie[i++]);
> +               write_pci_config_16(bus, slot, func,
> +                                   pcie + PCI_EXP_SLTCTL2, save->pcie[i++]);
> +       }
> +
> +       for (i = 15; i >= 0; i--)
> +               write_pci_config(bus, slot, func, i * 4, save->pci[i]);
> +}

do you have to pass bus/slot/func and use read/pci_config directly ?

I had one patchset that use dummy pci device and reuse existing late quirk code
in early_quirk to do usb handoff early.

please check

git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
for-x86-early-quirk-usb

678a023: x86: usb handoff in early_quirk
2d418d8: pci, usb: Make usb handoff func all take base remapping
d9bd1ad: x86, pci: add dummy pci device for early stage
de38757: x86: early_quirk check all bus/dev/func in domain 0
325cc7a: make msleep to do mdelay before scheduler is running
eec78a4: x86: set percpu cpu_info lpj to default
52ebec4: x86, pci: early dump skip device the same way as later probe code

if that could help.
you may reuse some later functions that take pci_dev as parameters.
also mdelay should work early...
and use early_quirk instead add another calling in setup.c

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Takao Indoh Oct. 16, 2012, 4:23 a.m. UTC | #3
(2012/10/16 3:36), Yinghai Lu wrote:
> On Mon, Oct 15, 2012 at 12:00 AM, Takao Indoh
> <indou.takao@jp.fujitsu.com> wrote:
>> This patch resets PCIe devices at boot time by hot reset when
>> "reset_devices" is specified.
>
> how about pci devices that domain_nr is not zero ?

This patch does not support multiple domains yet.

>>
>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>> ---
>>   arch/x86/include/asm/pci-direct.h |    1
>>   arch/x86/kernel/setup.c           |    3
>>   arch/x86/pci/early.c              |  344 ++++++++++++++++++++++++++++
>>   include/linux/pci.h               |    2
>>   init/main.c                       |    4
>>   5 files changed, 352 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
>> index b1e7a45..de30db2 100644
>> --- a/arch/x86/include/asm/pci-direct.h
>> +++ b/arch/x86/include/asm/pci-direct.h
>> @@ -18,4 +18,5 @@ extern int early_pci_allowed(void);
>>   extern unsigned int pci_early_dump_regs;
>>   extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
>>   extern void early_dump_pci_devices(void);
>> +extern void early_reset_pcie_devices(void);
>>   #endif /* _ASM_X86_PCI_DIRECT_H */
>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>> index a2bb18e..73d3425 100644
>> --- a/arch/x86/kernel/setup.c
>> +++ b/arch/x86/kernel/setup.c
>> @@ -987,6 +987,9 @@ void __init setup_arch(char **cmdline_p)
>>          generic_apic_probe();
>>
>>          early_quirks();
>> +#ifdef CONFIG_PCI
>> +       early_reset_pcie_devices();
>> +#endif
>>
>>          /*
>>           * Read APIC and some other early information from ACPI tables.
>> diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
>> index d1067d5..683b30f 100644
>> --- a/arch/x86/pci/early.c
>> +++ b/arch/x86/pci/early.c
>> @@ -1,5 +1,6 @@
>>   #include <linux/kernel.h>
>>   #include <linux/pci.h>
>> +#include <linux/bootmem.h>
>>   #include <asm/pci-direct.h>
>>   #include <asm/io.h>
>>   #include <asm/pci_x86.h>
>> @@ -109,3 +110,346 @@ void early_dump_pci_devices(void)
>>                  }
>>          }
>>   }
>> +
>> +#define PCI_EXP_SAVE_REGS      7
>> +#define pcie_cap_has_devctl(type, flags)       1
>> +#define pcie_cap_has_lnkctl(type, flags)               \
>> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
>> +                (type == PCI_EXP_TYPE_ROOT_PORT ||     \
>> +                 type == PCI_EXP_TYPE_ENDPOINT ||      \
>> +                 type == PCI_EXP_TYPE_LEG_END))
>> +#define pcie_cap_has_sltctl(type, flags)               \
>> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
>> +                ((type == PCI_EXP_TYPE_ROOT_PORT) ||   \
>> +                 (type == PCI_EXP_TYPE_DOWNSTREAM &&   \
>> +                  (flags & PCI_EXP_FLAGS_SLOT))))
>> +#define pcie_cap_has_rtctl(type, flags)                        \
>> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
>> +                (type == PCI_EXP_TYPE_ROOT_PORT ||     \
>> +                 type == PCI_EXP_TYPE_RC_EC))
>> +
>> +struct save_config {
>> +       u32 pci[16];
>> +       u16 pcie[PCI_EXP_SAVE_REGS];
>> +};
>> +
>> +struct pcie_dev {
>> +       int cap;   /* position of PCI Express capability */
>> +       int flags; /* PCI_EXP_FLAGS */
>> +       struct save_config save; /* saved configration register */
>> +};
>> +
>> +struct pcie_port {
>> +       struct list_head dev;
>> +       u8 secondary;
>> +       struct pcie_dev child[PCI_MAX_FUNCTIONS];
>> +};
>> +
>> +static LIST_HEAD(device_list);
>> +static void __init pci_udelay(int loops)
>> +{
>> +       while (loops--) {
>> +               /* Approximately 1 us */
>> +               native_io_delay();
>> +       }
>> +}
>> +
>> +/* Derived from drivers/pci/pci.c */
>> +#define PCI_FIND_CAP_TTL       48
>> +static int __init __pci_find_next_cap_ttl(u8 bus, u8 slot, u8 func,
>> +                                         u8 pos, int cap, int *ttl)
>> +{
>> +       u8 id;
>> +
>> +       while ((*ttl)--) {
>> +               pos = read_pci_config_byte(bus, slot, func, pos);
>> +               if (pos < 0x40)
>> +                       break;
>> +               pos &= ~3;
>> +               id = read_pci_config_byte(bus, slot, func,
>> +                                       pos + PCI_CAP_LIST_ID);
>> +               if (id == 0xff)
>> +                       break;
>> +               if (id == cap)
>> +                       return pos;
>> +               pos += PCI_CAP_LIST_NEXT;
>> +       }
>> +       return 0;
>> +}
>> +
>> +static int __init __pci_find_next_cap(u8 bus, u8 slot, u8 func, u8 pos, int cap)
>> +{
>> +       int ttl = PCI_FIND_CAP_TTL;
>> +
>> +       return __pci_find_next_cap_ttl(bus, slot, func, pos, cap, &ttl);
>> +}
>> +
>> +static int __init __pci_bus_find_cap_start(u8 bus, u8 slot, u8 func,
>> +                                          u8 hdr_type)
>> +{
>> +       u16 status;
>> +
>> +       status = read_pci_config_16(bus, slot, func, PCI_STATUS);
>> +       if (!(status & PCI_STATUS_CAP_LIST))
>> +               return 0;
>> +
>> +       switch (hdr_type) {
>> +       case PCI_HEADER_TYPE_NORMAL:
>> +       case PCI_HEADER_TYPE_BRIDGE:
>> +               return PCI_CAPABILITY_LIST;
>> +       case PCI_HEADER_TYPE_CARDBUS:
>> +               return PCI_CB_CAPABILITY_LIST;
>> +       default:
>> +               return 0;
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +static int __init early_pci_find_capability(u8 bus, u8 slot, u8 func, int cap)
>> +{
>> +       int pos;
>> +       u8 type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
>> +
>> +       pos = __pci_bus_find_cap_start(bus, slot, func, type & 0x7f);
>> +       if (pos)
>> +               pos = __pci_find_next_cap(bus, slot, func, pos, cap);
>> +
>> +       return pos;
>> +}
>> +
>> +static void __init do_reset(u8 bus, u8 slot, u8 func)
>> +{
>> +       u16 ctrl;
>> +
>> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d reset\n", bus, slot, func);
>> +
>> +       /* Assert Secondary Bus Reset */
>> +       ctrl = read_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL);
>> +       ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
>> +       write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
>> +
>> +       /*
>> +        * PCIe spec requires software to ensure a minimum reset duration
>> +        * (Trst == 1ms). We have here 5ms safety margin because pci_udelay is
>> +        * not precise.
>> +        */
>> +       pci_udelay(5000);
>> +
>> +       /* De-assert Secondary Bus Reset */
>> +       ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
>> +       write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
>> +}
>> +
>> +static void __init save_state(unsigned bus, unsigned slot, unsigned func,
>> +               struct pcie_dev *dev)
>> +{
>> +       int i;
>> +       int pcie, flags, pcie_type;
>> +       struct save_config *save;
>> +
>> +       pcie = dev->cap;
>> +       flags = dev->flags;
>> +       pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
>> +       save = &dev->save;
>> +
>> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d save state\n", bus, slot, func);
>> +
>> +       for (i = 0; i < 16; i++)
>> +               save->pci[i] = read_pci_config(bus, slot, func, i * 4);
>> +       i = 0;
>> +       if (pcie_cap_has_devctl(pcie_type, flags))
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_DEVCTL);
>> +       if (pcie_cap_has_lnkctl(pcie_type, flags))
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_LNKCTL);
>> +       if (pcie_cap_has_sltctl(pcie_type, flags))
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_SLTCTL);
>> +       if (pcie_cap_has_rtctl(pcie_type, flags))
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_RTCTL);
>> +
>> +       if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_DEVCTL2);
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_LNKCTL2);
>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>> +                                                     pcie + PCI_EXP_SLTCTL2);
>> +       }
>> +}
>> +
>> +static void __init restore_state(unsigned bus, unsigned slot, unsigned func,
>> +               struct pcie_dev *dev)
>> +{
>> +       int i = 0;
>> +       int pcie, flags, pcie_type;
>> +       struct save_config *save;
>> +
>> +       pcie = dev->cap;
>> +       flags = dev->flags;
>> +       pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
>> +       save = &dev->save;
>> +
>> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d restore state\n",
>> +              bus, slot, func);
>> +
>> +       if (pcie_cap_has_devctl(pcie_type, flags))
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_DEVCTL, save->pcie[i++]);
>> +       if (pcie_cap_has_lnkctl(pcie_type, flags))
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_LNKCTL, save->pcie[i++]);
>> +       if (pcie_cap_has_sltctl(pcie_type, flags))
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_SLTCTL, save->pcie[i++]);
>> +       if (pcie_cap_has_rtctl(pcie_type, flags))
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_RTCTL, save->pcie[i++]);
>> +
>> +       if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_DEVCTL2, save->pcie[i++]);
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_LNKCTL2, save->pcie[i++]);
>> +               write_pci_config_16(bus, slot, func,
>> +                                   pcie + PCI_EXP_SLTCTL2, save->pcie[i++]);
>> +       }
>> +
>> +       for (i = 15; i >= 0; i--)
>> +               write_pci_config(bus, slot, func, i * 4, save->pci[i]);
>> +}
>
> do you have to pass bus/slot/func and use read/pci_config directly ?
>
> I had one patchset that use dummy pci device and reuse existing late quirk code
> in early_quirk to do usb handoff early.
>
> please check
>
> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
> for-x86-early-quirk-usb
>
> 678a023: x86: usb handoff in early_quirk
> 2d418d8: pci, usb: Make usb handoff func all take base remapping
> d9bd1ad: x86, pci: add dummy pci device for early stage
> de38757: x86: early_quirk check all bus/dev/func in domain 0
> 325cc7a: make msleep to do mdelay before scheduler is running
> eec78a4: x86: set percpu cpu_info lpj to default
> 52ebec4: x86, pci: early dump skip device the same way as later probe code
>
> if that could help.
> you may reuse some later functions that take pci_dev as parameters.
d9bd1ad looks very useful for my patch. Thanks for the information.
What is the status of this patch? Already got in tip tree or
somewhere?

> also mdelay should work early...
mdelay does not work in early.c as far as I tested. Maybe
it works after calibration.

> and use early_quirk instead add another calling in setup.c
I think this reset code should not be added to early_quirk.
In my understanding "quirk" is used to avoid problems of specific
hardware.

Thanks,
Takao Indoh

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Takao Indoh Oct. 16, 2012, 11:45 a.m. UTC | #4
(2012/10/16 2:17), Khalid Aziz wrote:
> On Mon, 2012-10-15 at 16:00 +0900, Takao Indoh wrote:
>> This patch resets PCIe devices at boot time by hot reset when
>> "reset_devices" is specified.
>>
>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>> ---
>>   arch/x86/include/asm/pci-direct.h |    1
>>   arch/x86/kernel/setup.c           |    3
>>   arch/x86/pci/early.c              |  344 ++++++++++++++++++++++++++++
>>   include/linux/pci.h               |    2
>>   init/main.c                       |    4
>>   5 files changed, 352 insertions(+), 2 deletions(-)
>>
>
>
> Looks good.
>
> Reviewed-by: Khalid Aziz <khalid@gonehiking.org>
>

Thanks! But unfortunately I found a bug, so I'll post v5 patch soon.

A bug I found is that configuration register is accessed without
delay after reset.

This is an algorithm to reset devices.

  for (each device) {  <===== (A)
    if (does not have downstream devices)
      continue
    for (each downstream device) {
      save config registers
    }
    do_bus_reset <==== (B)
  }
  wait 500 ms
  ...

Let's say my system has the following devices.

00:01.0 (root port)
|
+- 01:00.0 (device)

In this case,
1) At first, 00:01.0 is found at (A). And its downstream devcice 01:00.0
   is reset at (B).
2) Next, 01:00.0 is found at (A). Then config register of 01:00.0 is
   accessed. This is PCIe spec violation because the config register of
   01:00.0 is accessed without delay after reset. PCIe spec requires
   at least 100ms waiting time before sending a config request.

Therefore I'll update patches like this so that devices could be reset
after saving phase is done:

  for (each device) {
    if (does not have downstream devices)
      continue
    for_each (its downstream devices) {
      save config registers
    }
-   do_bus_reset
  }
+ for (each device) {
+   do_bus_reset
+ }
  wait 500 ms
  ...

Thanks,
Takao Indoh

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Takao Indoh Nov. 7, 2012, 6:48 a.m. UTC | #5
(2012/10/16 13:23), Takao Indoh wrote:
> (2012/10/16 3:36), Yinghai Lu wrote:
>> On Mon, Oct 15, 2012 at 12:00 AM, Takao Indoh
>> <indou.takao@jp.fujitsu.com> wrote:
>>> This patch resets PCIe devices at boot time by hot reset when
>>> "reset_devices" is specified.
>>
>> how about pci devices that domain_nr is not zero ?
>
> This patch does not support multiple domains yet.
>
>>>
>>> Signed-off-by: Takao Indoh <indou.takao@jp.fujitsu.com>
>>> ---
>>>   arch/x86/include/asm/pci-direct.h |    1
>>>   arch/x86/kernel/setup.c           |    3
>>>   arch/x86/pci/early.c              |  344 ++++++++++++++++++++++++++++
>>>   include/linux/pci.h               |    2
>>>   init/main.c                       |    4
>>>   5 files changed, 352 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
>>> index b1e7a45..de30db2 100644
>>> --- a/arch/x86/include/asm/pci-direct.h
>>> +++ b/arch/x86/include/asm/pci-direct.h
>>> @@ -18,4 +18,5 @@ extern int early_pci_allowed(void);
>>>   extern unsigned int pci_early_dump_regs;
>>>   extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
>>>   extern void early_dump_pci_devices(void);
>>> +extern void early_reset_pcie_devices(void);
>>>   #endif /* _ASM_X86_PCI_DIRECT_H */
>>> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
>>> index a2bb18e..73d3425 100644
>>> --- a/arch/x86/kernel/setup.c
>>> +++ b/arch/x86/kernel/setup.c
>>> @@ -987,6 +987,9 @@ void __init setup_arch(char **cmdline_p)
>>>          generic_apic_probe();
>>>
>>>          early_quirks();
>>> +#ifdef CONFIG_PCI
>>> +       early_reset_pcie_devices();
>>> +#endif
>>>
>>>          /*
>>>           * Read APIC and some other early information from ACPI tables.
>>> diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
>>> index d1067d5..683b30f 100644
>>> --- a/arch/x86/pci/early.c
>>> +++ b/arch/x86/pci/early.c
>>> @@ -1,5 +1,6 @@
>>>   #include <linux/kernel.h>
>>>   #include <linux/pci.h>
>>> +#include <linux/bootmem.h>
>>>   #include <asm/pci-direct.h>
>>>   #include <asm/io.h>
>>>   #include <asm/pci_x86.h>
>>> @@ -109,3 +110,346 @@ void early_dump_pci_devices(void)
>>>                  }
>>>          }
>>>   }
>>> +
>>> +#define PCI_EXP_SAVE_REGS      7
>>> +#define pcie_cap_has_devctl(type, flags)       1
>>> +#define pcie_cap_has_lnkctl(type, flags)               \
>>> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
>>> +                (type == PCI_EXP_TYPE_ROOT_PORT ||     \
>>> +                 type == PCI_EXP_TYPE_ENDPOINT ||      \
>>> +                 type == PCI_EXP_TYPE_LEG_END))
>>> +#define pcie_cap_has_sltctl(type, flags)               \
>>> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
>>> +                ((type == PCI_EXP_TYPE_ROOT_PORT) ||   \
>>> +                 (type == PCI_EXP_TYPE_DOWNSTREAM &&   \
>>> +                  (flags & PCI_EXP_FLAGS_SLOT))))
>>> +#define pcie_cap_has_rtctl(type, flags)                        \
>>> +               ((flags & PCI_EXP_FLAGS_VERS) > 1 ||    \
>>> +                (type == PCI_EXP_TYPE_ROOT_PORT ||     \
>>> +                 type == PCI_EXP_TYPE_RC_EC))
>>> +
>>> +struct save_config {
>>> +       u32 pci[16];
>>> +       u16 pcie[PCI_EXP_SAVE_REGS];
>>> +};
>>> +
>>> +struct pcie_dev {
>>> +       int cap;   /* position of PCI Express capability */
>>> +       int flags; /* PCI_EXP_FLAGS */
>>> +       struct save_config save; /* saved configration register */
>>> +};
>>> +
>>> +struct pcie_port {
>>> +       struct list_head dev;
>>> +       u8 secondary;
>>> +       struct pcie_dev child[PCI_MAX_FUNCTIONS];
>>> +};
>>> +
>>> +static LIST_HEAD(device_list);
>>> +static void __init pci_udelay(int loops)
>>> +{
>>> +       while (loops--) {
>>> +               /* Approximately 1 us */
>>> +               native_io_delay();
>>> +       }
>>> +}
>>> +
>>> +/* Derived from drivers/pci/pci.c */
>>> +#define PCI_FIND_CAP_TTL       48
>>> +static int __init __pci_find_next_cap_ttl(u8 bus, u8 slot, u8 func,
>>> +                                         u8 pos, int cap, int *ttl)
>>> +{
>>> +       u8 id;
>>> +
>>> +       while ((*ttl)--) {
>>> +               pos = read_pci_config_byte(bus, slot, func, pos);
>>> +               if (pos < 0x40)
>>> +                       break;
>>> +               pos &= ~3;
>>> +               id = read_pci_config_byte(bus, slot, func,
>>> +                                       pos + PCI_CAP_LIST_ID);
>>> +               if (id == 0xff)
>>> +                       break;
>>> +               if (id == cap)
>>> +                       return pos;
>>> +               pos += PCI_CAP_LIST_NEXT;
>>> +       }
>>> +       return 0;
>>> +}
>>> +
>>> +static int __init __pci_find_next_cap(u8 bus, u8 slot, u8 func, u8 pos, int cap)
>>> +{
>>> +       int ttl = PCI_FIND_CAP_TTL;
>>> +
>>> +       return __pci_find_next_cap_ttl(bus, slot, func, pos, cap, &ttl);
>>> +}
>>> +
>>> +static int __init __pci_bus_find_cap_start(u8 bus, u8 slot, u8 func,
>>> +                                          u8 hdr_type)
>>> +{
>>> +       u16 status;
>>> +
>>> +       status = read_pci_config_16(bus, slot, func, PCI_STATUS);
>>> +       if (!(status & PCI_STATUS_CAP_LIST))
>>> +               return 0;
>>> +
>>> +       switch (hdr_type) {
>>> +       case PCI_HEADER_TYPE_NORMAL:
>>> +       case PCI_HEADER_TYPE_BRIDGE:
>>> +               return PCI_CAPABILITY_LIST;
>>> +       case PCI_HEADER_TYPE_CARDBUS:
>>> +               return PCI_CB_CAPABILITY_LIST;
>>> +       default:
>>> +               return 0;
>>> +       }
>>> +
>>> +       return 0;
>>> +}
>>> +
>>> +static int __init early_pci_find_capability(u8 bus, u8 slot, u8 func, int cap)
>>> +{
>>> +       int pos;
>>> +       u8 type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
>>> +
>>> +       pos = __pci_bus_find_cap_start(bus, slot, func, type & 0x7f);
>>> +       if (pos)
>>> +               pos = __pci_find_next_cap(bus, slot, func, pos, cap);
>>> +
>>> +       return pos;
>>> +}
>>> +
>>> +static void __init do_reset(u8 bus, u8 slot, u8 func)
>>> +{
>>> +       u16 ctrl;
>>> +
>>> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d reset\n", bus, slot, func);
>>> +
>>> +       /* Assert Secondary Bus Reset */
>>> +       ctrl = read_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL);
>>> +       ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
>>> +       write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
>>> +
>>> +       /*
>>> +        * PCIe spec requires software to ensure a minimum reset duration
>>> +        * (Trst == 1ms). We have here 5ms safety margin because pci_udelay is
>>> +        * not precise.
>>> +        */
>>> +       pci_udelay(5000);
>>> +
>>> +       /* De-assert Secondary Bus Reset */
>>> +       ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
>>> +       write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
>>> +}
>>> +
>>> +static void __init save_state(unsigned bus, unsigned slot, unsigned func,
>>> +               struct pcie_dev *dev)
>>> +{
>>> +       int i;
>>> +       int pcie, flags, pcie_type;
>>> +       struct save_config *save;
>>> +
>>> +       pcie = dev->cap;
>>> +       flags = dev->flags;
>>> +       pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
>>> +       save = &dev->save;
>>> +
>>> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d save state\n", bus, slot, func);
>>> +
>>> +       for (i = 0; i < 16; i++)
>>> +               save->pci[i] = read_pci_config(bus, slot, func, i * 4);
>>> +       i = 0;
>>> +       if (pcie_cap_has_devctl(pcie_type, flags))
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_DEVCTL);
>>> +       if (pcie_cap_has_lnkctl(pcie_type, flags))
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_LNKCTL);
>>> +       if (pcie_cap_has_sltctl(pcie_type, flags))
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_SLTCTL);
>>> +       if (pcie_cap_has_rtctl(pcie_type, flags))
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_RTCTL);
>>> +
>>> +       if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_DEVCTL2);
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_LNKCTL2);
>>> +               save->pcie[i++] = read_pci_config_16(bus, slot, func,
>>> +                                                     pcie + PCI_EXP_SLTCTL2);
>>> +       }
>>> +}
>>> +
>>> +static void __init restore_state(unsigned bus, unsigned slot, unsigned func,
>>> +               struct pcie_dev *dev)
>>> +{
>>> +       int i = 0;
>>> +       int pcie, flags, pcie_type;
>>> +       struct save_config *save;
>>> +
>>> +       pcie = dev->cap;
>>> +       flags = dev->flags;
>>> +       pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
>>> +       save = &dev->save;
>>> +
>>> +       printk(KERN_INFO "pci 0000:%02x:%02x.%d restore state\n",
>>> +              bus, slot, func);
>>> +
>>> +       if (pcie_cap_has_devctl(pcie_type, flags))
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_DEVCTL, save->pcie[i++]);
>>> +       if (pcie_cap_has_lnkctl(pcie_type, flags))
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_LNKCTL, save->pcie[i++]);
>>> +       if (pcie_cap_has_sltctl(pcie_type, flags))
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_SLTCTL, save->pcie[i++]);
>>> +       if (pcie_cap_has_rtctl(pcie_type, flags))
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_RTCTL, save->pcie[i++]);
>>> +
>>> +       if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_DEVCTL2, save->pcie[i++]);
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_LNKCTL2, save->pcie[i++]);
>>> +               write_pci_config_16(bus, slot, func,
>>> +                                   pcie + PCI_EXP_SLTCTL2, save->pcie[i++]);
>>> +       }
>>> +
>>> +       for (i = 15; i >= 0; i--)
>>> +               write_pci_config(bus, slot, func, i * 4, save->pci[i]);
>>> +}
>>
>> do you have to pass bus/slot/func and use read/pci_config directly ?
>>
>> I had one patchset that use dummy pci device and reuse existing late quirk code
>> in early_quirk to do usb handoff early.
>>
>> please check
>>
>> git://git.kernel.org/pub/scm/linux/kernel/git/yinghai/linux-yinghai.git
>> for-x86-early-quirk-usb
>>
>> 678a023: x86: usb handoff in early_quirk
>> 2d418d8: pci, usb: Make usb handoff func all take base remapping
>> d9bd1ad: x86, pci: add dummy pci device for early stage
>> de38757: x86: early_quirk check all bus/dev/func in domain 0
>> 325cc7a: make msleep to do mdelay before scheduler is running
>> eec78a4: x86: set percpu cpu_info lpj to default
>> 52ebec4: x86, pci: early dump skip device the same way as later probe code
>>
>> if that could help.
>> you may reuse some later functions that take pci_dev as parameters.
> d9bd1ad looks very useful for my patch. Thanks for the information.
> What is the status of this patch? Already got in tip tree or
> somewhere?

Hi Yinghai,

I'm rewriting my reset code using your dummy pci_dev patch. Do you have
a plan to post it or can I post it with my patches?

Thanks,
Takao Indoh

--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Yinghai Lu Nov. 7, 2012, 6:20 p.m. UTC | #6
On Tue, Nov 6, 2012 at 10:48 PM, Takao Indoh <indou.takao@jp.fujitsu.com> wrote:
> I'm rewriting my reset code using your dummy pci_dev patch. Do you have
> a plan to post it or can I post it with my patches?

Yes, you can post it with your patches if you like.

Yinghai
--
To unsubscribe from this list: send the line "unsubscribe linux-pci" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/x86/include/asm/pci-direct.h b/arch/x86/include/asm/pci-direct.h
index b1e7a45..de30db2 100644
--- a/arch/x86/include/asm/pci-direct.h
+++ b/arch/x86/include/asm/pci-direct.h
@@ -18,4 +18,5 @@  extern int early_pci_allowed(void);
 extern unsigned int pci_early_dump_regs;
 extern void early_dump_pci_device(u8 bus, u8 slot, u8 func);
 extern void early_dump_pci_devices(void);
+extern void early_reset_pcie_devices(void);
 #endif /* _ASM_X86_PCI_DIRECT_H */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a2bb18e..73d3425 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -987,6 +987,9 @@  void __init setup_arch(char **cmdline_p)
 	generic_apic_probe();
 
 	early_quirks();
+#ifdef CONFIG_PCI
+	early_reset_pcie_devices();
+#endif
 
 	/*
 	 * Read APIC and some other early information from ACPI tables.
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c
index d1067d5..683b30f 100644
--- a/arch/x86/pci/early.c
+++ b/arch/x86/pci/early.c
@@ -1,5 +1,6 @@ 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/bootmem.h>
 #include <asm/pci-direct.h>
 #include <asm/io.h>
 #include <asm/pci_x86.h>
@@ -109,3 +110,346 @@  void early_dump_pci_devices(void)
 		}
 	}
 }
+
+#define PCI_EXP_SAVE_REGS	7
+#define pcie_cap_has_devctl(type, flags)	1
+#define pcie_cap_has_lnkctl(type, flags)		\
+		((flags & PCI_EXP_FLAGS_VERS) > 1 ||	\
+		 (type == PCI_EXP_TYPE_ROOT_PORT ||	\
+		  type == PCI_EXP_TYPE_ENDPOINT ||	\
+		  type == PCI_EXP_TYPE_LEG_END))
+#define pcie_cap_has_sltctl(type, flags)		\
+		((flags & PCI_EXP_FLAGS_VERS) > 1 ||	\
+		 ((type == PCI_EXP_TYPE_ROOT_PORT) ||	\
+		  (type == PCI_EXP_TYPE_DOWNSTREAM &&	\
+		   (flags & PCI_EXP_FLAGS_SLOT))))
+#define pcie_cap_has_rtctl(type, flags)			\
+		((flags & PCI_EXP_FLAGS_VERS) > 1 ||	\
+		 (type == PCI_EXP_TYPE_ROOT_PORT ||	\
+		  type == PCI_EXP_TYPE_RC_EC))
+
+struct save_config {
+	u32 pci[16];
+	u16 pcie[PCI_EXP_SAVE_REGS];
+};
+
+struct pcie_dev {
+	int cap;   /* position of PCI Express capability */
+	int flags; /* PCI_EXP_FLAGS */
+	struct save_config save; /* saved configration register */
+};
+
+struct pcie_port {
+	struct list_head dev;
+	u8 secondary;
+	struct pcie_dev child[PCI_MAX_FUNCTIONS];
+};
+
+static LIST_HEAD(device_list);
+static void __init pci_udelay(int loops)
+{
+	while (loops--) {
+		/* Approximately 1 us */
+		native_io_delay();
+	}
+}
+
+/* Derived from drivers/pci/pci.c */
+#define PCI_FIND_CAP_TTL	48
+static int __init __pci_find_next_cap_ttl(u8 bus, u8 slot, u8 func,
+					  u8 pos, int cap, int *ttl)
+{
+	u8 id;
+
+	while ((*ttl)--) {
+		pos = read_pci_config_byte(bus, slot, func, pos);
+		if (pos < 0x40)
+			break;
+		pos &= ~3;
+		id = read_pci_config_byte(bus, slot, func,
+					pos + PCI_CAP_LIST_ID);
+		if (id == 0xff)
+			break;
+		if (id == cap)
+			return pos;
+		pos += PCI_CAP_LIST_NEXT;
+	}
+	return 0;
+}
+
+static int __init __pci_find_next_cap(u8 bus, u8 slot, u8 func, u8 pos, int cap)
+{
+	int ttl = PCI_FIND_CAP_TTL;
+
+	return __pci_find_next_cap_ttl(bus, slot, func, pos, cap, &ttl);
+}
+
+static int __init __pci_bus_find_cap_start(u8 bus, u8 slot, u8 func,
+					   u8 hdr_type)
+{
+	u16 status;
+
+	status = read_pci_config_16(bus, slot, func, PCI_STATUS);
+	if (!(status & PCI_STATUS_CAP_LIST))
+		return 0;
+
+	switch (hdr_type) {
+	case PCI_HEADER_TYPE_NORMAL:
+	case PCI_HEADER_TYPE_BRIDGE:
+		return PCI_CAPABILITY_LIST;
+	case PCI_HEADER_TYPE_CARDBUS:
+		return PCI_CB_CAPABILITY_LIST;
+	default:
+		return 0;
+	}
+
+	return 0;
+}
+
+static int __init early_pci_find_capability(u8 bus, u8 slot, u8 func, int cap)
+{
+	int pos;
+	u8 type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
+
+	pos = __pci_bus_find_cap_start(bus, slot, func, type & 0x7f);
+	if (pos)
+		pos = __pci_find_next_cap(bus, slot, func, pos, cap);
+
+	return pos;
+}
+
+static void __init do_reset(u8 bus, u8 slot, u8 func)
+{
+	u16 ctrl;
+
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d reset\n", bus, slot, func);
+
+	/* Assert Secondary Bus Reset */
+	ctrl = read_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL);
+	ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
+	write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
+
+	/*
+	 * PCIe spec requires software to ensure a minimum reset duration
+	 * (Trst == 1ms). We have here 5ms safety margin because pci_udelay is
+	 * not precise.
+	 */
+	pci_udelay(5000);
+
+	/* De-assert Secondary Bus Reset */
+	ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
+	write_pci_config_16(bus, slot, func, PCI_BRIDGE_CONTROL, ctrl);
+}
+
+static void __init save_state(unsigned bus, unsigned slot, unsigned func,
+		struct pcie_dev *dev)
+{
+	int i;
+	int pcie, flags, pcie_type;
+	struct save_config *save;
+
+	pcie = dev->cap;
+	flags = dev->flags;
+	pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+	save = &dev->save;
+
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d save state\n", bus, slot, func);
+
+	for (i = 0; i < 16; i++)
+		save->pci[i] = read_pci_config(bus, slot, func, i * 4);
+	i = 0;
+	if (pcie_cap_has_devctl(pcie_type, flags))
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_DEVCTL);
+	if (pcie_cap_has_lnkctl(pcie_type, flags))
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_LNKCTL);
+	if (pcie_cap_has_sltctl(pcie_type, flags))
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_SLTCTL);
+	if (pcie_cap_has_rtctl(pcie_type, flags))
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_RTCTL);
+
+	if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_DEVCTL2);
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_LNKCTL2);
+		save->pcie[i++] = read_pci_config_16(bus, slot, func,
+						      pcie + PCI_EXP_SLTCTL2);
+	}
+}
+
+static void __init restore_state(unsigned bus, unsigned slot, unsigned func,
+		struct pcie_dev *dev)
+{
+	int i = 0;
+	int pcie, flags, pcie_type;
+	struct save_config *save;
+
+	pcie = dev->cap;
+	flags = dev->flags;
+	pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+	save = &dev->save;
+
+	printk(KERN_INFO "pci 0000:%02x:%02x.%d restore state\n",
+	       bus, slot, func);
+
+	if (pcie_cap_has_devctl(pcie_type, flags))
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_DEVCTL, save->pcie[i++]);
+	if (pcie_cap_has_lnkctl(pcie_type, flags))
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_LNKCTL, save->pcie[i++]);
+	if (pcie_cap_has_sltctl(pcie_type, flags))
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_SLTCTL, save->pcie[i++]);
+	if (pcie_cap_has_rtctl(pcie_type, flags))
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_RTCTL, save->pcie[i++]);
+
+	if ((flags & PCI_EXP_FLAGS_VERS) >= 2) {
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_DEVCTL2, save->pcie[i++]);
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_LNKCTL2, save->pcie[i++]);
+		write_pci_config_16(bus, slot, func,
+				    pcie + PCI_EXP_SLTCTL2, save->pcie[i++]);
+	}
+
+	for (i = 15; i >= 0; i--)
+		write_pci_config(bus, slot, func, i * 4, save->pci[i]);
+}
+
+static void __init find_pcie_device(unsigned bus, unsigned slot, unsigned func)
+{
+	int f, count;
+	int pcie, pcie_type;
+	u8 type;
+	u16 vendor, flags;
+	u32 class;
+	int secondary;
+	struct pcie_port *port;
+	int pcie_cap[PCI_MAX_FUNCTIONS];
+	int pcie_flags[PCI_MAX_FUNCTIONS];
+
+	pcie = early_pci_find_capability(bus, slot, func, PCI_CAP_ID_EXP);
+	if (!pcie)
+		return;
+
+	flags = read_pci_config_16(bus, slot, func, pcie + PCI_EXP_FLAGS);
+	pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+	if ((pcie_type != PCI_EXP_TYPE_ROOT_PORT) &&
+	    (pcie_type != PCI_EXP_TYPE_DOWNSTREAM))
+		return;
+
+	type = read_pci_config_byte(bus, slot, func, PCI_HEADER_TYPE);
+	if ((type & 0x7f) != PCI_HEADER_TYPE_BRIDGE)
+		return;
+	secondary = read_pci_config_byte(bus, slot, func, PCI_SECONDARY_BUS);
+
+	memset(pcie_cap, 0, sizeof(pcie_cap));
+	memset(pcie_flags, 0, sizeof(pcie_flags));
+	for (count = 0, f = 0; f < PCI_MAX_FUNCTIONS; f++) {
+		vendor = read_pci_config_16(secondary, 0, f, PCI_VENDOR_ID);
+		if (vendor == 0xffff)
+			continue;
+
+		pcie = early_pci_find_capability(secondary, 0, f,
+				PCI_CAP_ID_EXP);
+		if (!pcie)
+			continue;
+
+		flags = read_pci_config_16(secondary, 0, f,
+				pcie + PCI_EXP_FLAGS);
+		pcie_type = (flags & PCI_EXP_FLAGS_TYPE) >> 4;
+		if ((pcie_type == PCI_EXP_TYPE_UPSTREAM) ||
+		    (pcie_type == PCI_EXP_TYPE_PCI_BRIDGE))
+			/* Don't reset switch, bridge */
+			return;
+
+		class = read_pci_config(secondary, 0, f, PCI_CLASS_REVISION);
+		if ((class >> 24) == PCI_BASE_CLASS_DISPLAY)
+			/* Don't reset VGA device */
+			return;
+
+		count++;
+		pcie_cap[f] = pcie;
+		pcie_flags[f] = flags;
+	}
+
+	if (!count)
+		return;
+
+	port = (struct pcie_port *)alloc_bootmem(sizeof(struct pcie_port));
+	if (port == NULL) {
+		printk(KERN_ERR "pci 0000:%02x:%02x.%d alloc_bootmem failed\n",
+		       bus, slot, func);
+		return;
+	}
+	memset(port, 0, sizeof(*port));
+	port->secondary = secondary;
+	for (f = 0; f < PCI_MAX_FUNCTIONS; f++) {
+		if (pcie_cap[f] != 0) {
+			port->child[f].cap = pcie_cap[f];
+			port->child[f].flags = pcie_flags[f];
+			save_state(secondary, 0, f, &port->child[f]);
+		}
+	}
+	do_reset(bus, slot, func);
+	list_add_tail(&port->dev, &device_list);
+}
+
+void __init early_reset_pcie_devices(void)
+{
+	unsigned bus, slot, func;
+	struct pcie_port *port, *tmp;
+
+	if (!early_pci_allowed() || !reset_devices)
+		return;
+
+	/* Find PCIe port and reset its downstream devices */
+	for (bus = 0; bus < 256; bus++) {
+		for (slot = 0; slot < 32; slot++) {
+			for (func = 0; func < PCI_MAX_FUNCTIONS; func++) {
+				u16 vendor;
+				u8 type;
+				vendor = read_pci_config_16(bus, slot, func,
+						PCI_VENDOR_ID);
+
+				if (vendor == 0xffff)
+					continue;
+
+				find_pcie_device(bus, slot, func);
+
+				if (func == 0) {
+					type = read_pci_config_byte(bus, slot,
+								    func,
+							       PCI_HEADER_TYPE);
+					if (!(type & 0x80))
+						break;
+				}
+			}
+		}
+	}
+
+	if (list_empty(&device_list))
+		return;
+
+	/*
+	 * According to PCIe spec, software must wait a minimum of 100 ms
+	 * before sending a configuration request. We have 500ms safety margin
+	 * here.
+	 */
+	pci_udelay(500000);
+
+	/* Restore config registers and free memory */
+	list_for_each_entry_safe(port, tmp, &device_list, dev) {
+		for (func = 0; func < PCI_MAX_FUNCTIONS; func++)
+			if (port->child[func].cap)
+				restore_state(port->secondary, 0, func,
+					      &port->child[func]);
+		free_bootmem(__pa(port), sizeof(*port));
+	}
+}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index ee21795..eca3231 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -35,6 +35,8 @@ 
 /* Include the ID list */
 #include <linux/pci_ids.h>
 
+#define PCI_MAX_FUNCTIONS 8
+
 /* pci_slot represents a physical slot */
 struct pci_slot {
 	struct pci_bus *bus;		/* The bus this slot is on */
diff --git a/init/main.c b/init/main.c
index 9cf77ab..0eb7430 100644
--- a/init/main.c
+++ b/init/main.c
@@ -144,10 +144,10 @@  EXPORT_SYMBOL(reset_devices);
 static int __init set_reset_devices(char *str)
 {
 	reset_devices = 1;
-	return 1;
+	return 0;
 }
 
-__setup("reset_devices", set_reset_devices);
+early_param("reset_devices", set_reset_devices);
 
 static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
 const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };