diff mbox

[V12,9/9] powerpc/eeh: Support error recovery for VF PE

Message ID 1446607732-4166-10-git-send-email-weiyang@linux.vnet.ibm.com (mailing list archive)
State New, archived
Delegated to: Bjorn Helgaas
Headers show

Commit Message

Wei Yang Nov. 4, 2015, 3:28 a.m. UTC
PFs are enumerated on PCI bus, while VFs are created by PF's driver.

In EEH recovery, it has two cases:
1. Device and driver is EEH aware, error handlers are called.
2. Device and driver is not EEH aware, un-plug the device and plug it again
by enumerating it.

The special thing happens on the second case. For a PF, we could use the
original pci core to enumerate the bus, while for VF we need to record the
VFs which aer un-plugged then plug it again.

Also The patch caches the VF index in pci_dn, which can be used to
calculate VF's bus, device and function number. Those information helps to
locate the VF's PCI device instance when doing hotplug during EEH recovery
if necessary.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/eeh.h        |   7 ++
 arch/powerpc/include/asm/pci-bridge.h |   1 +
 arch/powerpc/kernel/eeh.c             |   8 +++
 arch/powerpc/kernel/eeh_dev.c         |   1 +
 arch/powerpc/kernel/eeh_driver.c      | 127 +++++++++++++++++++++++++++-------
 arch/powerpc/kernel/eeh_pe.c          |   3 +-
 arch/powerpc/kernel/pci_dn.c          |   4 +-
 7 files changed, 123 insertions(+), 28 deletions(-)

Comments

Alexey Kardashevskiy Nov. 4, 2015, 5:01 a.m. UTC | #1
On 11/04/2015 02:28 PM, Wei Yang wrote:
> PFs are enumerated on PCI bus, while VFs are created by PF's driver.
>
> In EEH recovery, it has two cases:
> 1. Device and driver is EEH aware, error handlers are called.
> 2. Device and driver is not EEH aware, un-plug the device and plug it again
> by enumerating it.
>
> The special thing happens on the second case. For a PF, we could use the
> original pci core to enumerate the bus, while for VF we need to record the
> VFs which aer un-plugged then plug it again.
>
> Also The patch caches the VF index in pci_dn, which can be used to
> calculate VF's bus, device and function number. Those information helps to
> locate the VF's PCI device instance when doing hotplug during EEH recovery
> if necessary.
>
> Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
> ---
>   arch/powerpc/include/asm/eeh.h        |   7 ++
>   arch/powerpc/include/asm/pci-bridge.h |   1 +
>   arch/powerpc/kernel/eeh.c             |   8 +++
>   arch/powerpc/kernel/eeh_dev.c         |   1 +
>   arch/powerpc/kernel/eeh_driver.c      | 127 +++++++++++++++++++++++++++-------
>   arch/powerpc/kernel/eeh_pe.c          |   3 +-
>   arch/powerpc/kernel/pci_dn.c          |   4 +-
>   7 files changed, 123 insertions(+), 28 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
> index 331c856..1f68190 100644
> --- a/arch/powerpc/include/asm/eeh.h
> +++ b/arch/powerpc/include/asm/eeh.h
> @@ -127,6 +127,11 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
>   #define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
>   #define EEH_DEV_REMOVED		(1 << 10)	/* Removed permanently	*/
>
> +struct eeh_rmv_data {
> +	struct list_head edev_list;
> +	int removed;
> +};


This struct is only used in arch/powerpc/kernel/eeh_driver.c so move it there.


> +
>   struct eeh_dev {
>   	int mode;			/* EEH mode			*/
>   	int class_code;			/* Class code of the device	*/
> @@ -139,9 +144,11 @@ struct eeh_dev {
>   	int af_cap;			/* Saved AF capability		*/
>   	struct eeh_pe *pe;		/* Associated PE		*/
>   	struct list_head list;		/* Form link list in the PE	*/
> +	struct list_head rmv_list;	/* Record the removed edev 	*/
>   	struct pci_controller *phb;	/* Associated PHB		*/
>   	struct pci_dn *pdn;		/* Associated PCI device node	*/
>   	struct pci_dev *pdev;		/* Associated PCI device	*/
> +	bool   in_error;		/* Error flag for eeh_dev	*/
>   	struct pci_dev *physfn;		/* Associated PF PORT		*/
>   	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
>   };
> diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
> index 9b365d6..533e6e9 100644
> --- a/arch/powerpc/include/asm/pci-bridge.h
> +++ b/arch/powerpc/include/asm/pci-bridge.h
> @@ -211,6 +211,7 @@ struct pci_dn {
>   #define IODA_INVALID_PE		(-1)
>   #ifdef CONFIG_PPC_POWERNV
>   	int	pe_number;
> +	int     vf_index;		/* VF index in the PF */
>   #ifdef CONFIG_PCI_IOV
>   	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>   	u16     num_vfs;		/* number of VFs enabled*/
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 41a4b30..0f36750 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1245,6 +1245,14 @@ void eeh_remove_device(struct pci_dev *dev)
>   	 * from the parent PE during the BAR resotre.
>   	 */
>   	edev->pdev = NULL;
> +
> +	/*
> +	 * The flag "in_error" is used to trace EEH devices for VFs
> +	 * in error state or not. It's set in eeh_report_error(). If
> +	 * it's not set, eeh_report_{reset,resume}() won't be called
> +	 * for the VF EEH device.
> +	 */
> +	edev->in_error = 0;


It is a bool, so "= false".


>   	dev->dev.archdata.edev = NULL;
>   	if (!(edev->pe->state & EEH_PE_KEEP))
>   		eeh_rmv_from_parent_pe(edev);
> diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
> index aabba94..7815095 100644
> --- a/arch/powerpc/kernel/eeh_dev.c
> +++ b/arch/powerpc/kernel/eeh_dev.c
> @@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data)
>   	edev->pdn = pdn;
>   	edev->phb = phb;
>   	INIT_LIST_HEAD(&edev->list);
> +	INIT_LIST_HEAD(&edev->rmv_list);
>
>   	return NULL;
>   }
> diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
> index 89eb4bc..06d20d6 100644
> --- a/arch/powerpc/kernel/eeh_driver.c
> +++ b/arch/powerpc/kernel/eeh_driver.c
> @@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
>   	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>   	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>
> +	edev->in_error = true;
>   	eeh_pcid_put(dev);
>   	return NULL;
>   }
> @@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>
>   	if (!driver->err_handler ||
>   	    !driver->err_handler->slot_reset ||
> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
> +	    (edev->mode & EEH_DEV_NO_HANDLER) ||
> +	    (!edev->in_error)) {
>   		eeh_pcid_put(dev);
>   		return NULL;
>   	}
> @@ -326,6 +328,7 @@ static void *eeh_report_resume(void *data, void *userdata)
>   {
>   	struct eeh_dev *edev = (struct eeh_dev *)data;
>   	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
> +	bool was_in_error;
>   	struct pci_driver *driver;
>
>   	if (!dev || eeh_dev_removed(edev))
> @@ -335,11 +338,13 @@ static void *eeh_report_resume(void *data, void *userdata)
>   	driver = eeh_pcid_get(dev);
>   	if (!driver) return NULL;
>
> +	was_in_error = edev->in_error;
> +	edev->in_error = false;
>   	eeh_enable_irq(dev);
>
>   	if (!driver->err_handler ||
>   	    !driver->err_handler->resume ||
> -	    (edev->mode & EEH_DEV_NO_HANDLER)) {
> +	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
>   		edev->mode &= ~EEH_DEV_NO_HANDLER;
>   		eeh_pcid_put(dev);
>   		return NULL;
> @@ -386,12 +391,39 @@ static void *eeh_report_failure(void *data, void *userdata)
>   	return NULL;
>   }
>
> +static void *eeh_add_virt_device(void *data, void *userdata)
> +{
> +	struct pci_driver *driver;
> +	struct eeh_dev *edev = (struct eeh_dev *)data;
> +	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
> +
> +	if (!(edev->physfn)) {
> +		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
> +			__func__, edev->phb->global_number, pdn->busno,
> +			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
> +		return NULL;
> +	}
> +
> +	driver = eeh_pcid_get(dev);
> +	if (driver) {
> +		eeh_pcid_put(dev);
> +		if (driver->err_handler)
> +			return NULL;
> +	}
> +
> +	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
> +	return NULL;
> +}
> +
>   static void *eeh_rmv_device(void *data, void *userdata)
>   {
>   	struct pci_driver *driver;
>   	struct eeh_dev *edev = (struct eeh_dev *)data;
>   	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
> -	int *removed = (int *)userdata;
> +	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
> +	int *removed = rmv_data ? &rmv_data->removed : NULL;
> +	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>
>   	/*
>   	 * Actually, we should remove the PCI bridges as well.
> @@ -416,7 +448,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>   	driver = eeh_pcid_get(dev);
>   	if (driver) {
>   		eeh_pcid_put(dev);
> -		if (driver->err_handler)
> +		if (removed && driver->err_handler)
>   			return NULL;
>   	}
>
> @@ -425,11 +457,26 @@ static void *eeh_rmv_device(void *data, void *userdata)
>   		 pci_name(dev));
>   	edev->bus = dev->bus;
>   	edev->mode |= EEH_DEV_DISCONNECTED;
> -	(*removed)++;
> +	if (removed)
> +		(*removed)++;
>
> -	pci_lock_rescan_remove();
> -	pci_stop_and_remove_bus_device(dev);
> -	pci_unlock_rescan_remove();
> +	if (edev->physfn) {
> +		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
> +		edev->pdev = NULL;
> +
> +		/*
> +		 * We have to set the VF PE number to invalid one, which is
> +		 * required to plug the VF successfully.
> +		 */
> +		pdn->pe_number = IODA_INVALID_PE;
> +
> +		if (rmv_data)
> +			list_add(&edev->rmv_list, &rmv_data->edev_list);
> +	} else {
> +		pci_lock_rescan_remove();
> +		pci_stop_and_remove_bus_device(dev);
> +		pci_unlock_rescan_remove();
> +	}
>
>   	return NULL;
>   }
> @@ -543,11 +590,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
>    * During the reset, udev might be invoked because those affected
>    * PCI devices will be removed and then added.
>    */
> -static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
> +static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
> +				struct eeh_rmv_data *rmv_data)
>   {
>   	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>   	struct timeval tstamp;
> -	int cnt, rc, removed = 0;
> +	int cnt, rc;
> +	struct eeh_dev *edev;
>
>   	/* pcibios will clear the counter; save the value */
>   	cnt = pe->freeze_count;
> @@ -561,12 +610,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>   	 */
>   	eeh_pe_state_mark(pe, EEH_PE_KEEP);
>   	if (bus) {
> -		pci_lock_rescan_remove();
> -		pcibios_remove_pci_devices(bus);
> -		pci_unlock_rescan_remove();
> -	} else if (frozen_bus) {
> -		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
> -	}
> +		if (pe->type & EEH_PE_VF) {
> +			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> +		} else {
> +			pci_lock_rescan_remove();
> +			pcibios_remove_pci_devices(bus);
> +			pci_unlock_rescan_remove();
> +		}
> +	} else if (frozen_bus)
> +		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
>
>   	/*
>   	 * Reset the pci controller. (Asserts RST#; resets config space).
> @@ -607,14 +659,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>   		 * PE. We should disconnect it so the binding can be
>   		 * rebuilt when adding PCI devices.
>   		 */
> +		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>   		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> -		pcibios_add_pci_devices(bus);
> -	} else if (frozen_bus && removed) {
> +		if (pe->type & EEH_PE_VF)
> +			eeh_add_virt_device(edev, NULL);
> +		else
> +			pcibios_add_pci_devices(bus);
> +	} else if (frozen_bus && rmv_data->removed) {
>   		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>   		ssleep(5);
>
> +		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>   		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
> -		pcibios_add_pci_devices(frozen_bus);
> +		if (pe->type & EEH_PE_VF)
> +			eeh_add_virt_device(edev, NULL);
> +		else
> +			pcibios_add_pci_devices(frozen_bus);
>   	}
>   	eeh_pe_state_clear(pe, EEH_PE_KEEP);
>
> @@ -633,8 +693,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>   static void eeh_handle_normal_event(struct eeh_pe *pe)
>   {
>   	struct pci_bus *frozen_bus;
> +	struct eeh_dev *edev, *tmp;
>   	int rc = 0;
>   	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
> +	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
>
>   	frozen_bus = eeh_pe_bus_get(pe);
>   	if (!frozen_bus) {
> @@ -681,7 +743,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
>   	 */
>   	if (result == PCI_ERS_RESULT_NONE) {
>   		pr_info("EEH: Reset with hotplug activity\n");
> -		rc = eeh_reset_device(pe, frozen_bus);
> +		rc = eeh_reset_device(pe, frozen_bus, NULL);
>   		if (rc) {
>   			pr_warn("%s: Unable to reset, err=%d\n",
>   				__func__, rc);
> @@ -733,7 +795,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
>   	/* If any device called out for a reset, then reset the slot */
>   	if (result == PCI_ERS_RESULT_NEED_RESET) {
>   		pr_info("EEH: Reset without hotplug activity\n");
> -		rc = eeh_reset_device(pe, NULL);
> +		rc = eeh_reset_device(pe, NULL, &rmv_data);
>   		if (rc) {
>   			pr_warn("%s: Cannot reset, err=%d\n",
>   				__func__, rc);
> @@ -753,6 +815,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
>   		goto hard_fail;
>   	}
>
> +	/*
> +	 * For those hot removed VFs, we should add back them after PF get
> +	 * recovered properly.
> +	 */
> +	list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
> +		eeh_add_virt_device(edev, NULL);
> +		list_del(&edev->rmv_list);
> +	}
> +
>   	/* Tell all device drivers that they can resume operations */
>   	pr_info("EEH: Notify device driver to resume\n");
>   	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
> @@ -792,11 +863,15 @@ perm_error:
>   	 * the their PCI config any more.
>   	 */
>   	if (frozen_bus) {
> -		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> -
> -		pci_lock_rescan_remove();
> -		pcibios_remove_pci_devices(frozen_bus);
> -		pci_unlock_rescan_remove();
> +		if (pe->type & EEH_PE_VF) {
> +			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
> +			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> +		} else {
> +			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
> +			pci_lock_rescan_remove();
> +			pcibios_remove_pci_devices(frozen_bus);
> +			pci_unlock_rescan_remove();
> +		}
>   	}
>   }
>
> diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
> index 29240ad..b7facb9 100644
> --- a/arch/powerpc/kernel/eeh_pe.c
> +++ b/arch/powerpc/kernel/eeh_pe.c
> @@ -936,7 +936,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>   	if (pe->type & EEH_PE_PHB) {
>   		bus = pe->phb->bus;
>   	} else if (pe->type & EEH_PE_BUS ||
> -		   pe->type & EEH_PE_DEVICE) {
> +		   pe->type & EEH_PE_DEVICE ||
> +		   pe->type & EEH_PE_VF) {
>   		if (pe->bus) {
>   			bus = pe->bus;
>   			goto out;
> diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
> index 5091b05..9e9cb16 100644
> --- a/arch/powerpc/kernel/pci_dn.c
> +++ b/arch/powerpc/kernel/pci_dn.c
> @@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
>   #ifdef CONFIG_PCI_IOV
>   static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
>   					   struct pci_dev *pdev,
> +					   int vf_index,
>   					   int busno, int devfn)
>   {
>   	struct pci_dn *pdn;
> @@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
>   	pdn->busno = busno;
>   	pdn->devfn = devfn;
>   #ifdef CONFIG_PPC_POWERNV
> +	pdn->vf_index = vf_index;
>   	pdn->pe_number = IODA_INVALID_PE;
>   #endif
>   	INIT_LIST_HEAD(&pdn->child_list);
> @@ -198,7 +200,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
>   		return NULL;
>
>   	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
> -		pdn = add_one_dev_pci_data(parent, NULL,
> +		pdn = add_one_dev_pci_data(parent, NULL, i,
>   					   pci_iov_virtfn_bus(pdev, i),
>   					   pci_iov_virtfn_devfn(pdev, i));
>   		if (!pdn) {
>
Wei Yang Nov. 4, 2015, 7:16 a.m. UTC | #2
On Wed, Nov 04, 2015 at 04:01:50PM +1100, Alexey Kardashevskiy wrote:
>On 11/04/2015 02:28 PM, Wei Yang wrote:
>>PFs are enumerated on PCI bus, while VFs are created by PF's driver.
>>
>>In EEH recovery, it has two cases:
>>1. Device and driver is EEH aware, error handlers are called.
>>2. Device and driver is not EEH aware, un-plug the device and plug it again
>>by enumerating it.
>>
>>The special thing happens on the second case. For a PF, we could use the
>>original pci core to enumerate the bus, while for VF we need to record the
>>VFs which aer un-plugged then plug it again.
>>
>>Also The patch caches the VF index in pci_dn, which can be used to
>>calculate VF's bus, device and function number. Those information helps to
>>locate the VF's PCI device instance when doing hotplug during EEH recovery
>>if necessary.
>>
>>Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
>>---
>>  arch/powerpc/include/asm/eeh.h        |   7 ++
>>  arch/powerpc/include/asm/pci-bridge.h |   1 +
>>  arch/powerpc/kernel/eeh.c             |   8 +++
>>  arch/powerpc/kernel/eeh_dev.c         |   1 +
>>  arch/powerpc/kernel/eeh_driver.c      | 127 +++++++++++++++++++++++++++-------
>>  arch/powerpc/kernel/eeh_pe.c          |   3 +-
>>  arch/powerpc/kernel/pci_dn.c          |   4 +-
>>  7 files changed, 123 insertions(+), 28 deletions(-)
>>
>>diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
>>index 331c856..1f68190 100644
>>--- a/arch/powerpc/include/asm/eeh.h
>>+++ b/arch/powerpc/include/asm/eeh.h
>>@@ -127,6 +127,11 @@ static inline bool eeh_pe_passed(struct eeh_pe *pe)
>>  #define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
>>  #define EEH_DEV_REMOVED		(1 << 10)	/* Removed permanently	*/
>>
>>+struct eeh_rmv_data {
>>+	struct list_head edev_list;
>>+	int removed;
>>+};
>
>
>This struct is only used in arch/powerpc/kernel/eeh_driver.c so move it there.
>

Will move this in next version.

>
>>+
>>  struct eeh_dev {
>>  	int mode;			/* EEH mode			*/
>>  	int class_code;			/* Class code of the device	*/
>>@@ -139,9 +144,11 @@ struct eeh_dev {
>>  	int af_cap;			/* Saved AF capability		*/
>>  	struct eeh_pe *pe;		/* Associated PE		*/
>>  	struct list_head list;		/* Form link list in the PE	*/
>>+	struct list_head rmv_list;	/* Record the removed edev 	*/
>>  	struct pci_controller *phb;	/* Associated PHB		*/
>>  	struct pci_dn *pdn;		/* Associated PCI device node	*/
>>  	struct pci_dev *pdev;		/* Associated PCI device	*/
>>+	bool   in_error;		/* Error flag for eeh_dev	*/
>>  	struct pci_dev *physfn;		/* Associated PF PORT		*/
>>  	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
>>  };
>>diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
>>index 9b365d6..533e6e9 100644
>>--- a/arch/powerpc/include/asm/pci-bridge.h
>>+++ b/arch/powerpc/include/asm/pci-bridge.h
>>@@ -211,6 +211,7 @@ struct pci_dn {
>>  #define IODA_INVALID_PE		(-1)
>>  #ifdef CONFIG_PPC_POWERNV
>>  	int	pe_number;
>>+	int     vf_index;		/* VF index in the PF */
>>  #ifdef CONFIG_PCI_IOV
>>  	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
>>  	u16     num_vfs;		/* number of VFs enabled*/
>>diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>index 41a4b30..0f36750 100644
>>--- a/arch/powerpc/kernel/eeh.c
>>+++ b/arch/powerpc/kernel/eeh.c
>>@@ -1245,6 +1245,14 @@ void eeh_remove_device(struct pci_dev *dev)
>>  	 * from the parent PE during the BAR resotre.
>>  	 */
>>  	edev->pdev = NULL;
>>+
>>+	/*
>>+	 * The flag "in_error" is used to trace EEH devices for VFs
>>+	 * in error state or not. It's set in eeh_report_error(). If
>>+	 * it's not set, eeh_report_{reset,resume}() won't be called
>>+	 * for the VF EEH device.
>>+	 */
>>+	edev->in_error = 0;
>
>
>It is a bool, so "= false".
>

Correct.

>
>>  	dev->dev.archdata.edev = NULL;
>>  	if (!(edev->pe->state & EEH_PE_KEEP))
>>  		eeh_rmv_from_parent_pe(edev);
>>diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
>>index aabba94..7815095 100644
>>--- a/arch/powerpc/kernel/eeh_dev.c
>>+++ b/arch/powerpc/kernel/eeh_dev.c
>>@@ -67,6 +67,7 @@ void *eeh_dev_init(struct pci_dn *pdn, void *data)
>>  	edev->pdn = pdn;
>>  	edev->phb = phb;
>>  	INIT_LIST_HEAD(&edev->list);
>>+	INIT_LIST_HEAD(&edev->rmv_list);
>>
>>  	return NULL;
>>  }
>>diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
>>index 89eb4bc..06d20d6 100644
>>--- a/arch/powerpc/kernel/eeh_driver.c
>>+++ b/arch/powerpc/kernel/eeh_driver.c
>>@@ -211,6 +211,7 @@ static void *eeh_report_error(void *data, void *userdata)
>>  	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
>>  	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
>>
>>+	edev->in_error = true;
>>  	eeh_pcid_put(dev);
>>  	return NULL;
>>  }
>>@@ -282,7 +283,8 @@ static void *eeh_report_reset(void *data, void *userdata)
>>
>>  	if (!driver->err_handler ||
>>  	    !driver->err_handler->slot_reset ||
>>-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
>>+	    (!edev->in_error)) {
>>  		eeh_pcid_put(dev);
>>  		return NULL;
>>  	}
>>@@ -326,6 +328,7 @@ static void *eeh_report_resume(void *data, void *userdata)
>>  {
>>  	struct eeh_dev *edev = (struct eeh_dev *)data;
>>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>+	bool was_in_error;
>>  	struct pci_driver *driver;
>>
>>  	if (!dev || eeh_dev_removed(edev))
>>@@ -335,11 +338,13 @@ static void *eeh_report_resume(void *data, void *userdata)
>>  	driver = eeh_pcid_get(dev);
>>  	if (!driver) return NULL;
>>
>>+	was_in_error = edev->in_error;
>>+	edev->in_error = false;
>>  	eeh_enable_irq(dev);
>>
>>  	if (!driver->err_handler ||
>>  	    !driver->err_handler->resume ||
>>-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
>>+	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
>>  		edev->mode &= ~EEH_DEV_NO_HANDLER;
>>  		eeh_pcid_put(dev);
>>  		return NULL;
>>@@ -386,12 +391,39 @@ static void *eeh_report_failure(void *data, void *userdata)
>>  	return NULL;
>>  }
>>
>>+static void *eeh_add_virt_device(void *data, void *userdata)
>>+{
>>+	struct pci_driver *driver;
>>+	struct eeh_dev *edev = (struct eeh_dev *)data;
>>+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>+
>>+	if (!(edev->physfn)) {
>>+		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
>>+			__func__, edev->phb->global_number, pdn->busno,
>>+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>+		return NULL;
>>+	}
>>+
>>+	driver = eeh_pcid_get(dev);
>>+	if (driver) {
>>+		eeh_pcid_put(dev);
>>+		if (driver->err_handler)
>>+			return NULL;
>>+	}
>>+
>>+	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
>>+	return NULL;
>>+}
>>+
>>  static void *eeh_rmv_device(void *data, void *userdata)
>>  {
>>  	struct pci_driver *driver;
>>  	struct eeh_dev *edev = (struct eeh_dev *)data;
>>  	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
>>-	int *removed = (int *)userdata;
>>+	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
>>+	int *removed = rmv_data ? &rmv_data->removed : NULL;
>>+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
>>
>>  	/*
>>  	 * Actually, we should remove the PCI bridges as well.
>>@@ -416,7 +448,7 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>  	driver = eeh_pcid_get(dev);
>>  	if (driver) {
>>  		eeh_pcid_put(dev);
>>-		if (driver->err_handler)
>>+		if (removed && driver->err_handler)
>>  			return NULL;
>>  	}
>>
>>@@ -425,11 +457,26 @@ static void *eeh_rmv_device(void *data, void *userdata)
>>  		 pci_name(dev));
>>  	edev->bus = dev->bus;
>>  	edev->mode |= EEH_DEV_DISCONNECTED;
>>-	(*removed)++;
>>+	if (removed)
>>+		(*removed)++;
>>
>>-	pci_lock_rescan_remove();
>>-	pci_stop_and_remove_bus_device(dev);
>>-	pci_unlock_rescan_remove();
>>+	if (edev->physfn) {
>>+		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
>>+		edev->pdev = NULL;
>>+
>>+		/*
>>+		 * We have to set the VF PE number to invalid one, which is
>>+		 * required to plug the VF successfully.
>>+		 */
>>+		pdn->pe_number = IODA_INVALID_PE;
>>+
>>+		if (rmv_data)
>>+			list_add(&edev->rmv_list, &rmv_data->edev_list);
>>+	} else {
>>+		pci_lock_rescan_remove();
>>+		pci_stop_and_remove_bus_device(dev);
>>+		pci_unlock_rescan_remove();
>>+	}
>>
>>  	return NULL;
>>  }
>>@@ -543,11 +590,13 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
>>   * During the reset, udev might be invoked because those affected
>>   * PCI devices will be removed and then added.
>>   */
>>-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
>>+				struct eeh_rmv_data *rmv_data)
>>  {
>>  	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
>>  	struct timeval tstamp;
>>-	int cnt, rc, removed = 0;
>>+	int cnt, rc;
>>+	struct eeh_dev *edev;
>>
>>  	/* pcibios will clear the counter; save the value */
>>  	cnt = pe->freeze_count;
>>@@ -561,12 +610,15 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>  	 */
>>  	eeh_pe_state_mark(pe, EEH_PE_KEEP);
>>  	if (bus) {
>>-		pci_lock_rescan_remove();
>>-		pcibios_remove_pci_devices(bus);
>>-		pci_unlock_rescan_remove();
>>-	} else if (frozen_bus) {
>>-		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
>>-	}
>>+		if (pe->type & EEH_PE_VF) {
>>+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>+		} else {
>>+			pci_lock_rescan_remove();
>>+			pcibios_remove_pci_devices(bus);
>>+			pci_unlock_rescan_remove();
>>+		}
>>+	} else if (frozen_bus)
>>+		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
>>
>>  	/*
>>  	 * Reset the pci controller. (Asserts RST#; resets config space).
>>@@ -607,14 +659,22 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>  		 * PE. We should disconnect it so the binding can be
>>  		 * rebuilt when adding PCI devices.
>>  		 */
>>+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>  		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>-		pcibios_add_pci_devices(bus);
>>-	} else if (frozen_bus && removed) {
>>+		if (pe->type & EEH_PE_VF)
>>+			eeh_add_virt_device(edev, NULL);
>>+		else
>>+			pcibios_add_pci_devices(bus);
>>+	} else if (frozen_bus && rmv_data->removed) {
>>  		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
>>  		ssleep(5);
>>
>>+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
>>  		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
>>-		pcibios_add_pci_devices(frozen_bus);
>>+		if (pe->type & EEH_PE_VF)
>>+			eeh_add_virt_device(edev, NULL);
>>+		else
>>+			pcibios_add_pci_devices(frozen_bus);
>>  	}
>>  	eeh_pe_state_clear(pe, EEH_PE_KEEP);
>>
>>@@ -633,8 +693,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
>>  static void eeh_handle_normal_event(struct eeh_pe *pe)
>>  {
>>  	struct pci_bus *frozen_bus;
>>+	struct eeh_dev *edev, *tmp;
>>  	int rc = 0;
>>  	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
>>+	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
>>
>>  	frozen_bus = eeh_pe_bus_get(pe);
>>  	if (!frozen_bus) {
>>@@ -681,7 +743,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
>>  	 */
>>  	if (result == PCI_ERS_RESULT_NONE) {
>>  		pr_info("EEH: Reset with hotplug activity\n");
>>-		rc = eeh_reset_device(pe, frozen_bus);
>>+		rc = eeh_reset_device(pe, frozen_bus, NULL);
>>  		if (rc) {
>>  			pr_warn("%s: Unable to reset, err=%d\n",
>>  				__func__, rc);
>>@@ -733,7 +795,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
>>  	/* If any device called out for a reset, then reset the slot */
>>  	if (result == PCI_ERS_RESULT_NEED_RESET) {
>>  		pr_info("EEH: Reset without hotplug activity\n");
>>-		rc = eeh_reset_device(pe, NULL);
>>+		rc = eeh_reset_device(pe, NULL, &rmv_data);
>>  		if (rc) {
>>  			pr_warn("%s: Cannot reset, err=%d\n",
>>  				__func__, rc);
>>@@ -753,6 +815,15 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
>>  		goto hard_fail;
>>  	}
>>
>>+	/*
>>+	 * For those hot removed VFs, we should add back them after PF get
>>+	 * recovered properly.
>>+	 */
>>+	list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
>>+		eeh_add_virt_device(edev, NULL);
>>+		list_del(&edev->rmv_list);
>>+	}
>>+
>>  	/* Tell all device drivers that they can resume operations */
>>  	pr_info("EEH: Notify device driver to resume\n");
>>  	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
>>@@ -792,11 +863,15 @@ perm_error:
>>  	 * the their PCI config any more.
>>  	 */
>>  	if (frozen_bus) {
>>-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>-
>>-		pci_lock_rescan_remove();
>>-		pcibios_remove_pci_devices(frozen_bus);
>>-		pci_unlock_rescan_remove();
>>+		if (pe->type & EEH_PE_VF) {
>>+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
>>+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>+		} else {
>>+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
>>+			pci_lock_rescan_remove();
>>+			pcibios_remove_pci_devices(frozen_bus);
>>+			pci_unlock_rescan_remove();
>>+		}
>>  	}
>>  }
>>
>>diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
>>index 29240ad..b7facb9 100644
>>--- a/arch/powerpc/kernel/eeh_pe.c
>>+++ b/arch/powerpc/kernel/eeh_pe.c
>>@@ -936,7 +936,8 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
>>  	if (pe->type & EEH_PE_PHB) {
>>  		bus = pe->phb->bus;
>>  	} else if (pe->type & EEH_PE_BUS ||
>>-		   pe->type & EEH_PE_DEVICE) {
>>+		   pe->type & EEH_PE_DEVICE ||
>>+		   pe->type & EEH_PE_VF) {
>>  		if (pe->bus) {
>>  			bus = pe->bus;
>>  			goto out;
>>diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
>>index 5091b05..9e9cb16 100644
>>--- a/arch/powerpc/kernel/pci_dn.c
>>+++ b/arch/powerpc/kernel/pci_dn.c
>>@@ -139,6 +139,7 @@ struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
>>  #ifdef CONFIG_PCI_IOV
>>  static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
>>  					   struct pci_dev *pdev,
>>+					   int vf_index,
>>  					   int busno, int devfn)
>>  {
>>  	struct pci_dn *pdn;
>>@@ -158,6 +159,7 @@ static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
>>  	pdn->busno = busno;
>>  	pdn->devfn = devfn;
>>  #ifdef CONFIG_PPC_POWERNV
>>+	pdn->vf_index = vf_index;
>>  	pdn->pe_number = IODA_INVALID_PE;
>>  #endif
>>  	INIT_LIST_HEAD(&pdn->child_list);
>>@@ -198,7 +200,7 @@ struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
>>  		return NULL;
>>
>>  	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
>>-		pdn = add_one_dev_pci_data(parent, NULL,
>>+		pdn = add_one_dev_pci_data(parent, NULL, i,
>>  					   pci_iov_virtfn_bus(pdev, i),
>>  					   pci_iov_virtfn_devfn(pdev, i));
>>  		if (!pdn) {
>>
>
>
>-- 
>Alexey
diff mbox

Patch

diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 331c856..1f68190 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -127,6 +127,11 @@  static inline bool eeh_pe_passed(struct eeh_pe *pe)
 #define EEH_DEV_SYSFS		(1 << 9)	/* Sysfs created	*/
 #define EEH_DEV_REMOVED		(1 << 10)	/* Removed permanently	*/
 
+struct eeh_rmv_data {
+	struct list_head edev_list;
+	int removed;
+};
+
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
 	int class_code;			/* Class code of the device	*/
@@ -139,9 +144,11 @@  struct eeh_dev {
 	int af_cap;			/* Saved AF capability		*/
 	struct eeh_pe *pe;		/* Associated PE		*/
 	struct list_head list;		/* Form link list in the PE	*/
+	struct list_head rmv_list;	/* Record the removed edev 	*/
 	struct pci_controller *phb;	/* Associated PHB		*/
 	struct pci_dn *pdn;		/* Associated PCI device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
+	bool   in_error;		/* Error flag for eeh_dev	*/
 	struct pci_dev *physfn;		/* Associated PF PORT		*/
 	struct pci_bus *bus;		/* PCI bus for partial hotplug	*/
 };
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 9b365d6..533e6e9 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -211,6 +211,7 @@  struct pci_dn {
 #define IODA_INVALID_PE		(-1)
 #ifdef CONFIG_PPC_POWERNV
 	int	pe_number;
+	int     vf_index;		/* VF index in the PF */
 #ifdef CONFIG_PCI_IOV
 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
 	u16     num_vfs;		/* number of VFs enabled*/
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 41a4b30..0f36750 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1245,6 +1245,14 @@  void eeh_remove_device(struct pci_dev *dev)
 	 * from the parent PE during the BAR resotre.
 	 */
 	edev->pdev = NULL;
+
+	/*
+	 * The flag "in_error" is used to trace EEH devices for VFs
+	 * in error state or not. It's set in eeh_report_error(). If
+	 * it's not set, eeh_report_{reset,resume}() won't be called
+	 * for the VF EEH device.
+	 */
+	edev->in_error = 0;
 	dev->dev.archdata.edev = NULL;
 	if (!(edev->pe->state & EEH_PE_KEEP))
 		eeh_rmv_from_parent_pe(edev);
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
index aabba94..7815095 100644
--- a/arch/powerpc/kernel/eeh_dev.c
+++ b/arch/powerpc/kernel/eeh_dev.c
@@ -67,6 +67,7 @@  void *eeh_dev_init(struct pci_dn *pdn, void *data)
 	edev->pdn = pdn;
 	edev->phb = phb;
 	INIT_LIST_HEAD(&edev->list);
+	INIT_LIST_HEAD(&edev->rmv_list);
 
 	return NULL;
 }
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 89eb4bc..06d20d6 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -211,6 +211,7 @@  static void *eeh_report_error(void *data, void *userdata)
 	if (rc == PCI_ERS_RESULT_NEED_RESET) *res = rc;
 	if (*res == PCI_ERS_RESULT_NONE) *res = rc;
 
+	edev->in_error = true;
 	eeh_pcid_put(dev);
 	return NULL;
 }
@@ -282,7 +283,8 @@  static void *eeh_report_reset(void *data, void *userdata)
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->slot_reset ||
-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
+	    (edev->mode & EEH_DEV_NO_HANDLER) ||
+	    (!edev->in_error)) {
 		eeh_pcid_put(dev);
 		return NULL;
 	}
@@ -326,6 +328,7 @@  static void *eeh_report_resume(void *data, void *userdata)
 {
 	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	bool was_in_error;
 	struct pci_driver *driver;
 
 	if (!dev || eeh_dev_removed(edev))
@@ -335,11 +338,13 @@  static void *eeh_report_resume(void *data, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (!driver) return NULL;
 
+	was_in_error = edev->in_error;
+	edev->in_error = false;
 	eeh_enable_irq(dev);
 
 	if (!driver->err_handler ||
 	    !driver->err_handler->resume ||
-	    (edev->mode & EEH_DEV_NO_HANDLER)) {
+	    (edev->mode & EEH_DEV_NO_HANDLER) || !was_in_error) {
 		edev->mode &= ~EEH_DEV_NO_HANDLER;
 		eeh_pcid_put(dev);
 		return NULL;
@@ -386,12 +391,39 @@  static void *eeh_report_failure(void *data, void *userdata)
 	return NULL;
 }
 
+static void *eeh_add_virt_device(void *data, void *userdata)
+{
+	struct pci_driver *driver;
+	struct eeh_dev *edev = (struct eeh_dev *)data;
+	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	if (!(edev->physfn)) {
+		pr_warn("%s: EEH dev %04x:%02x:%02x.%01x not for VF\n",
+			__func__, edev->phb->global_number, pdn->busno,
+			PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+		return NULL;
+	}
+
+	driver = eeh_pcid_get(dev);
+	if (driver) {
+		eeh_pcid_put(dev);
+		if (driver->err_handler)
+			return NULL;
+	}
+
+	pci_iov_virtfn_add(edev->physfn, pdn->vf_index, 0);
+	return NULL;
+}
+
 static void *eeh_rmv_device(void *data, void *userdata)
 {
 	struct pci_driver *driver;
 	struct eeh_dev *edev = (struct eeh_dev *)data;
 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
-	int *removed = (int *)userdata;
+	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
+	int *removed = rmv_data ? &rmv_data->removed : NULL;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 
 	/*
 	 * Actually, we should remove the PCI bridges as well.
@@ -416,7 +448,7 @@  static void *eeh_rmv_device(void *data, void *userdata)
 	driver = eeh_pcid_get(dev);
 	if (driver) {
 		eeh_pcid_put(dev);
-		if (driver->err_handler)
+		if (removed && driver->err_handler)
 			return NULL;
 	}
 
@@ -425,11 +457,26 @@  static void *eeh_rmv_device(void *data, void *userdata)
 		 pci_name(dev));
 	edev->bus = dev->bus;
 	edev->mode |= EEH_DEV_DISCONNECTED;
-	(*removed)++;
+	if (removed)
+		(*removed)++;
 
-	pci_lock_rescan_remove();
-	pci_stop_and_remove_bus_device(dev);
-	pci_unlock_rescan_remove();
+	if (edev->physfn) {
+		pci_iov_virtfn_remove(edev->physfn, pdn->vf_index, 0);
+		edev->pdev = NULL;
+
+		/*
+		 * We have to set the VF PE number to invalid one, which is
+		 * required to plug the VF successfully.
+		 */
+		pdn->pe_number = IODA_INVALID_PE;
+
+		if (rmv_data)
+			list_add(&edev->rmv_list, &rmv_data->edev_list);
+	} else {
+		pci_lock_rescan_remove();
+		pci_stop_and_remove_bus_device(dev);
+		pci_unlock_rescan_remove();
+	}
 
 	return NULL;
 }
@@ -543,11 +590,13 @@  int eeh_pe_reset_and_recover(struct eeh_pe *pe)
  * During the reset, udev might be invoked because those affected
  * PCI devices will be removed and then added.
  */
-static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
+static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
+				struct eeh_rmv_data *rmv_data)
 {
 	struct pci_bus *frozen_bus = eeh_pe_bus_get(pe);
 	struct timeval tstamp;
-	int cnt, rc, removed = 0;
+	int cnt, rc;
+	struct eeh_dev *edev;
 
 	/* pcibios will clear the counter; save the value */
 	cnt = pe->freeze_count;
@@ -561,12 +610,15 @@  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 	 */
 	eeh_pe_state_mark(pe, EEH_PE_KEEP);
 	if (bus) {
-		pci_lock_rescan_remove();
-		pcibios_remove_pci_devices(bus);
-		pci_unlock_rescan_remove();
-	} else if (frozen_bus) {
-		eeh_pe_dev_traverse(pe, eeh_rmv_device, &removed);
-	}
+		if (pe->type & EEH_PE_VF) {
+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+		} else {
+			pci_lock_rescan_remove();
+			pcibios_remove_pci_devices(bus);
+			pci_unlock_rescan_remove();
+		}
+	} else if (frozen_bus)
+		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
 
 	/*
 	 * Reset the pci controller. (Asserts RST#; resets config space).
@@ -607,14 +659,22 @@  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 		 * PE. We should disconnect it so the binding can be
 		 * rebuilt when adding PCI devices.
 		 */
+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		pcibios_add_pci_devices(bus);
-	} else if (frozen_bus && removed) {
+		if (pe->type & EEH_PE_VF)
+			eeh_add_virt_device(edev, NULL);
+		else
+			pcibios_add_pci_devices(bus);
+	} else if (frozen_bus && rmv_data->removed) {
 		pr_info("EEH: Sleep 5s ahead of partial hotplug\n");
 		ssleep(5);
 
+		edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
-		pcibios_add_pci_devices(frozen_bus);
+		if (pe->type & EEH_PE_VF)
+			eeh_add_virt_device(edev, NULL);
+		else
+			pcibios_add_pci_devices(frozen_bus);
 	}
 	eeh_pe_state_clear(pe, EEH_PE_KEEP);
 
@@ -633,8 +693,10 @@  static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
 static void eeh_handle_normal_event(struct eeh_pe *pe)
 {
 	struct pci_bus *frozen_bus;
+	struct eeh_dev *edev, *tmp;
 	int rc = 0;
 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
+	struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
 
 	frozen_bus = eeh_pe_bus_get(pe);
 	if (!frozen_bus) {
@@ -681,7 +743,7 @@  static void eeh_handle_normal_event(struct eeh_pe *pe)
 	 */
 	if (result == PCI_ERS_RESULT_NONE) {
 		pr_info("EEH: Reset with hotplug activity\n");
-		rc = eeh_reset_device(pe, frozen_bus);
+		rc = eeh_reset_device(pe, frozen_bus, NULL);
 		if (rc) {
 			pr_warn("%s: Unable to reset, err=%d\n",
 				__func__, rc);
@@ -733,7 +795,7 @@  static void eeh_handle_normal_event(struct eeh_pe *pe)
 	/* If any device called out for a reset, then reset the slot */
 	if (result == PCI_ERS_RESULT_NEED_RESET) {
 		pr_info("EEH: Reset without hotplug activity\n");
-		rc = eeh_reset_device(pe, NULL);
+		rc = eeh_reset_device(pe, NULL, &rmv_data);
 		if (rc) {
 			pr_warn("%s: Cannot reset, err=%d\n",
 				__func__, rc);
@@ -753,6 +815,15 @@  static void eeh_handle_normal_event(struct eeh_pe *pe)
 		goto hard_fail;
 	}
 
+	/*
+	 * For those hot removed VFs, we should add back them after PF get
+	 * recovered properly.
+	 */
+	list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
+		eeh_add_virt_device(edev, NULL);
+		list_del(&edev->rmv_list);
+	}
+
 	/* Tell all device drivers that they can resume operations */
 	pr_info("EEH: Notify device driver to resume\n");
 	eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
@@ -792,11 +863,15 @@  perm_error:
 	 * the their PCI config any more.
 	 */
 	if (frozen_bus) {
-		eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
-
-		pci_lock_rescan_remove();
-		pcibios_remove_pci_devices(frozen_bus);
-		pci_unlock_rescan_remove();
+		if (pe->type & EEH_PE_VF) {
+			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+		} else {
+			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+			pci_lock_rescan_remove();
+			pcibios_remove_pci_devices(frozen_bus);
+			pci_unlock_rescan_remove();
+		}
 	}
 }
 
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 29240ad..b7facb9 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -936,7 +936,8 @@  struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
 	if (pe->type & EEH_PE_PHB) {
 		bus = pe->phb->bus;
 	} else if (pe->type & EEH_PE_BUS ||
-		   pe->type & EEH_PE_DEVICE) {
+		   pe->type & EEH_PE_DEVICE ||
+		   pe->type & EEH_PE_VF) {
 		if (pe->bus) {
 			bus = pe->bus;
 			goto out;
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 5091b05..9e9cb16 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -139,6 +139,7 @@  struct pci_dn *pci_get_pdn(struct pci_dev *pdev)
 #ifdef CONFIG_PCI_IOV
 static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
 					   struct pci_dev *pdev,
+					   int vf_index,
 					   int busno, int devfn)
 {
 	struct pci_dn *pdn;
@@ -158,6 +159,7 @@  static struct pci_dn *add_one_dev_pci_data(struct pci_dn *parent,
 	pdn->busno = busno;
 	pdn->devfn = devfn;
 #ifdef CONFIG_PPC_POWERNV
+	pdn->vf_index = vf_index;
 	pdn->pe_number = IODA_INVALID_PE;
 #endif
 	INIT_LIST_HEAD(&pdn->child_list);
@@ -198,7 +200,7 @@  struct pci_dn *add_dev_pci_data(struct pci_dev *pdev)
 		return NULL;
 
 	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
-		pdn = add_one_dev_pci_data(parent, NULL,
+		pdn = add_one_dev_pci_data(parent, NULL, i,
 					   pci_iov_virtfn_bus(pdev, i),
 					   pci_iov_virtfn_devfn(pdev, i));
 		if (!pdn) {