diff mbox series

[v4,04/13] iommu/vt-d: Add pasid replace helpers

Message ID 20241104131842.13303-5-yi.l.liu@intel.com (mailing list archive)
State New
Headers show
Series Make set_dev_pasid op supporting domain replacement | expand

Commit Message

Yi Liu Nov. 4, 2024, 1:18 p.m. UTC
pasid replacement allows converting a present pasid entry to be FS, SS,
PT or nested, hence add helpers for such operations. This simplifies the
callers as well since the caller can switch the pasid to the new domain
by one-shot.

Suggested-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/intel/pasid.c | 173 ++++++++++++++++++++++++++++++++++++
 drivers/iommu/intel/pasid.h |  12 +++
 2 files changed, 185 insertions(+)

Comments

Baolu Lu Nov. 5, 2024, 2:06 a.m. UTC | #1
On 11/4/24 21:18, Yi Liu wrote:
> pasid replacement allows converting a present pasid entry to be FS, SS,
> PT or nested, hence add helpers for such operations. This simplifies the
> callers as well since the caller can switch the pasid to the new domain
> by one-shot.
> 
> Suggested-by: Lu Baolu<baolu.lu@linux.intel.com>
> Signed-off-by: Yi Liu<yi.l.liu@intel.com>
> ---
>   drivers/iommu/intel/pasid.c | 173 ++++++++++++++++++++++++++++++++++++
>   drivers/iommu/intel/pasid.h |  12 +++
>   2 files changed, 185 insertions(+)

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>

with a nit below

> 
> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
> index 65fd2fee01b7..b7c2d65b8726 100644
> --- a/drivers/iommu/intel/pasid.c
> +++ b/drivers/iommu/intel/pasid.c
> @@ -390,6 +390,40 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
>   	return 0;
>   }
>   
> +int intel_pasid_replace_first_level(struct intel_iommu *iommu,
> +				    struct device *dev, pgd_t *pgd,
> +				    u32 pasid, u16 did, int flags)
> +{
> +	struct pasid_entry *pte;
> +	u16 old_did;
> +
> +	if (!ecap_flts(iommu->ecap) ||
> +	    ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
> +		return -EINVAL;
> +
> +	spin_lock(&iommu->lock);
> +	pte = intel_pasid_get_entry(dev, pasid);
> +	if (!pte) {
> +		spin_unlock(&iommu->lock);
> +		return -ENODEV;
> +	}
> +
> +	if (!pasid_pte_is_present(pte)) {
> +		spin_unlock(&iommu->lock);
> +		return -EINVAL;
> +	}
> +
> +	old_did = pasid_get_domain_id(pte);
> +
> +	pasid_pte_config_first_level(iommu, pte, pgd, did, flags);
> +	spin_unlock(&iommu->lock);
> +
> +	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
> +	intel_drain_pasid_prq(dev, pasid);
> +
> +	return 0;
> +}
> +
>   /*
>    * Skip top levels of page tables for iommu which has less agaw
>    * than default. Unnecessary for PT mode.
> @@ -483,6 +517,55 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
>   	return 0;
>   }
>   
> +int intel_pasid_replace_second_level(struct intel_iommu *iommu,
> +				     struct dmar_domain *domain,
> +				     struct device *dev, u32 pasid)
> +{
> +	struct pasid_entry *pte;
> +	struct dma_pte *pgd;
> +	u16 did, old_did;
> +	u64 pgd_val;
> +	int agaw;
> +
> +	/*
> +	 * If hardware advertises no support for second level
> +	 * translation, return directly.
> +	 */
> +	if (!ecap_slts(iommu->ecap))
> +		return -EINVAL;
> +
> +	pgd = domain->pgd;
> +	agaw = iommu_skip_agaw(domain, iommu, &pgd);

iommu_skip_agaw() has been removed after domain_alloc_paging is
supported in this driver. Perhaps you need a rebase if you have a new
version.

> +	if (agaw < 0)
> +		return -EINVAL;
> +
> +	pgd_val = virt_to_phys(pgd);
> +	did = domain_id_iommu(domain, iommu);
> +
> +	spin_lock(&iommu->lock);
> +	pte = intel_pasid_get_entry(dev, pasid);
> +	if (!pte) {
> +		spin_unlock(&iommu->lock);
> +		return -ENODEV;
> +	}
> +
> +	if (!pasid_pte_is_present(pte)) {
> +		spin_unlock(&iommu->lock);
> +		return -EINVAL;
> +	}
> +
> +	old_did = pasid_get_domain_id(pte);
> +
> +	pasid_pte_config_second_level(iommu, pte, pgd_val, agaw,
> +				      did, domain->dirty_tracking);
> +	spin_unlock(&iommu->lock);
> +
> +	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
> +	intel_drain_pasid_prq(dev, pasid);
> +
> +	return 0;
> +}

--
baolu
Yi Liu Nov. 5, 2024, 5:11 a.m. UTC | #2
On 2024/11/5 10:06, Baolu Lu wrote:
> On 11/4/24 21:18, Yi Liu wrote:
>> pasid replacement allows converting a present pasid entry to be FS, SS,
>> PT or nested, hence add helpers for such operations. This simplifies the
>> callers as well since the caller can switch the pasid to the new domain
>> by one-shot.
>>
>> Suggested-by: Lu Baolu<baolu.lu@linux.intel.com>
>> Signed-off-by: Yi Liu<yi.l.liu@intel.com>
>> ---
>>   drivers/iommu/intel/pasid.c | 173 ++++++++++++++++++++++++++++++++++++
>>   drivers/iommu/intel/pasid.h |  12 +++
>>   2 files changed, 185 insertions(+)
> 
> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
> 
> with a nit below
> 
>>
>> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
>> index 65fd2fee01b7..b7c2d65b8726 100644
>> --- a/drivers/iommu/intel/pasid.c
>> +++ b/drivers/iommu/intel/pasid.c
>> @@ -390,6 +390,40 @@ int intel_pasid_setup_first_level(struct intel_iommu 
>> *iommu,
>>       return 0;
>>   }
>> +int intel_pasid_replace_first_level(struct intel_iommu *iommu,
>> +                    struct device *dev, pgd_t *pgd,
>> +                    u32 pasid, u16 did, int flags)
>> +{
>> +    struct pasid_entry *pte;
>> +    u16 old_did;
>> +
>> +    if (!ecap_flts(iommu->ecap) ||
>> +        ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
>> +        return -EINVAL;
>> +
>> +    spin_lock(&iommu->lock);
>> +    pte = intel_pasid_get_entry(dev, pasid);
>> +    if (!pte) {
>> +        spin_unlock(&iommu->lock);
>> +        return -ENODEV;
>> +    }
>> +
>> +    if (!pasid_pte_is_present(pte)) {
>> +        spin_unlock(&iommu->lock);
>> +        return -EINVAL;
>> +    }
>> +
>> +    old_did = pasid_get_domain_id(pte);
>> +
>> +    pasid_pte_config_first_level(iommu, pte, pgd, did, flags);
>> +    spin_unlock(&iommu->lock);
>> +
>> +    intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
>> +    intel_drain_pasid_prq(dev, pasid);
>> +
>> +    return 0;
>> +}
>> +
>>   /*
>>    * Skip top levels of page tables for iommu which has less agaw
>>    * than default. Unnecessary for PT mode.
>> @@ -483,6 +517,55 @@ int intel_pasid_setup_second_level(struct 
>> intel_iommu *iommu,
>>       return 0;
>>   }
>> +int intel_pasid_replace_second_level(struct intel_iommu *iommu,
>> +                     struct dmar_domain *domain,
>> +                     struct device *dev, u32 pasid)
>> +{
>> +    struct pasid_entry *pte;
>> +    struct dma_pte *pgd;
>> +    u16 did, old_did;
>> +    u64 pgd_val;
>> +    int agaw;
>> +
>> +    /*
>> +     * If hardware advertises no support for second level
>> +     * translation, return directly.
>> +     */
>> +    if (!ecap_slts(iommu->ecap))
>> +        return -EINVAL;
>> +
>> +    pgd = domain->pgd;
>> +    agaw = iommu_skip_agaw(domain, iommu, &pgd);
> 
> iommu_skip_agaw() has been removed after domain_alloc_paging is
> supported in this driver. Perhaps you need a rebase if you have a new
> version.

yep.

> 
>> +    if (agaw < 0)
>> +        return -EINVAL;
>> +
>> +    pgd_val = virt_to_phys(pgd);
>> +    did = domain_id_iommu(domain, iommu);
>> +
>> +    spin_lock(&iommu->lock);
>> +    pte = intel_pasid_get_entry(dev, pasid);
>> +    if (!pte) {
>> +        spin_unlock(&iommu->lock);
>> +        return -ENODEV;
>> +    }
>> +
>> +    if (!pasid_pte_is_present(pte)) {
>> +        spin_unlock(&iommu->lock);
>> +        return -EINVAL;
>> +    }
>> +
>> +    old_did = pasid_get_domain_id(pte);
>> +
>> +    pasid_pte_config_second_level(iommu, pte, pgd_val, agaw,
>> +                      did, domain->dirty_tracking);
>> +    spin_unlock(&iommu->lock);
>> +
>> +    intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
>> +    intel_drain_pasid_prq(dev, pasid);
>> +
>> +    return 0;
>> +}
> 
> -- 
> baolu
>
Tian, Kevin Nov. 6, 2024, 7:31 a.m. UTC | #3
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Monday, November 4, 2024 9:19 PM
> 
> pasid replacement allows converting a present pasid entry to be FS, SS,
> PT or nested, hence add helpers for such operations. This simplifies the
> callers as well since the caller can switch the pasid to the new domain
> by one-shot.

'simplify' compared to what? if it's an obvious result from creating
the helpers then no need to talk about it.

> 
> Suggested-by: Lu Baolu <baolu.lu@linux.intel.com>
> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
> ---
>  drivers/iommu/intel/pasid.c | 173
> ++++++++++++++++++++++++++++++++++++
>  drivers/iommu/intel/pasid.h |  12 +++
>  2 files changed, 185 insertions(+)
> 
> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
> index 65fd2fee01b7..b7c2d65b8726 100644
> --- a/drivers/iommu/intel/pasid.c
> +++ b/drivers/iommu/intel/pasid.c
> @@ -390,6 +390,40 @@ int intel_pasid_setup_first_level(struct intel_iommu
> *iommu,
>  	return 0;
>  }
> 
> +int intel_pasid_replace_first_level(struct intel_iommu *iommu,
> +				    struct device *dev, pgd_t *pgd,
> +				    u32 pasid, u16 did, int flags)
> +{
> +	struct pasid_entry *pte;
> +	u16 old_did;
> +
> +	if (!ecap_flts(iommu->ecap) ||
> +	    ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
> +		return -EINVAL;

better copy the error messages from the setup part.

there may be further chance to consolidate them later but no clear
reason why different error warning schemes should be used
between them.

same for other helpers.

> +
> +	spin_lock(&iommu->lock);
> +	pte = intel_pasid_get_entry(dev, pasid);
> +	if (!pte) {
> +		spin_unlock(&iommu->lock);
> +		return -ENODEV;
> +	}
> +
> +	if (!pasid_pte_is_present(pte)) {
> +		spin_unlock(&iommu->lock);
> +		return -EINVAL;
> +	}
> +
> +	old_did = pasid_get_domain_id(pte);

probably should pass the old domain in and check whether the
domain->did is same as the one in the pasid entry and warn otherwise.

Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Yi Liu Nov. 6, 2024, 9:31 a.m. UTC | #4
On 2024/11/6 15:31, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Monday, November 4, 2024 9:19 PM
>>
>> pasid replacement allows converting a present pasid entry to be FS, SS,
>> PT or nested, hence add helpers for such operations. This simplifies the
>> callers as well since the caller can switch the pasid to the new domain
>> by one-shot.
> 
> 'simplify' compared to what? if it's an obvious result from creating
> the helpers then no need to talk about it.

agreed, no need to talk about it.

>>
>> Suggested-by: Lu Baolu <baolu.lu@linux.intel.com>
>> Signed-off-by: Yi Liu <yi.l.liu@intel.com>
>> ---
>>   drivers/iommu/intel/pasid.c | 173
>> ++++++++++++++++++++++++++++++++++++
>>   drivers/iommu/intel/pasid.h |  12 +++
>>   2 files changed, 185 insertions(+)
>>
>> diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
>> index 65fd2fee01b7..b7c2d65b8726 100644
>> --- a/drivers/iommu/intel/pasid.c
>> +++ b/drivers/iommu/intel/pasid.c
>> @@ -390,6 +390,40 @@ int intel_pasid_setup_first_level(struct intel_iommu
>> *iommu,
>>   	return 0;
>>   }
>>
>> +int intel_pasid_replace_first_level(struct intel_iommu *iommu,
>> +				    struct device *dev, pgd_t *pgd,
>> +				    u32 pasid, u16 did, int flags)
>> +{
>> +	struct pasid_entry *pte;
>> +	u16 old_did;
>> +
>> +	if (!ecap_flts(iommu->ecap) ||
>> +	    ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
>> +		return -EINVAL;
> 
> better copy the error messages from the setup part.
> 
> there may be further chance to consolidate them later but no clear
> reason why different error warning schemes should be used
> between them.
> 
> same for other helpers.

sure. I think Baolu has a point that this may be trigger-able by userspace
hence drop the error message to avoid DOS.

>> +
>> +	spin_lock(&iommu->lock);
>> +	pte = intel_pasid_get_entry(dev, pasid);
>> +	if (!pte) {
>> +		spin_unlock(&iommu->lock);
>> +		return -ENODEV;
>> +	}
>> +
>> +	if (!pasid_pte_is_present(pte)) {
>> +		spin_unlock(&iommu->lock);
>> +		return -EINVAL;
>> +	}
>> +
>> +	old_did = pasid_get_domain_id(pte);
> 
> probably should pass the old domain in and check whether the
> domain->did is same as the one in the pasid entry and warn otherwise.

this would be a sw bug. :) Do we really want to catch every bug by warn? :)

> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tian, Kevin Nov. 6, 2024, 9:51 a.m. UTC | #5
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Wednesday, November 6, 2024 5:31 PM
> 
> On 2024/11/6 15:31, Tian, Kevin wrote:
> >> From: Liu, Yi L <yi.l.liu@intel.com>
> >> Sent: Monday, November 4, 2024 9:19 PM
> >>
> >>
> >> +int intel_pasid_replace_first_level(struct intel_iommu *iommu,
> >> +				    struct device *dev, pgd_t *pgd,
> >> +				    u32 pasid, u16 did, int flags)
> >> +{
> >> +	struct pasid_entry *pte;
> >> +	u16 old_did;
> >> +
> >> +	if (!ecap_flts(iommu->ecap) ||
> >> +	    ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
> >> +		return -EINVAL;
> >
> > better copy the error messages from the setup part.
> >
> > there may be further chance to consolidate them later but no clear
> > reason why different error warning schemes should be used
> > between them.
> >
> > same for other helpers.
> 
> sure. I think Baolu has a point that this may be trigger-able by userspace
> hence drop the error message to avoid DOS.
>

Isn't the existing path also trigger-able by userspace? It's better to
have a consistent policy cross all paths then you can clean it up
together later. 

 
> >> +
> >> +	spin_lock(&iommu->lock);
> >> +	pte = intel_pasid_get_entry(dev, pasid);
> >> +	if (!pte) {
> >> +		spin_unlock(&iommu->lock);
> >> +		return -ENODEV;
> >> +	}
> >> +
> >> +	if (!pasid_pte_is_present(pte)) {
> >> +		spin_unlock(&iommu->lock);
> >> +		return -EINVAL;
> >> +	}
> >> +
> >> +	old_did = pasid_get_domain_id(pte);
> >
> > probably should pass the old domain in and check whether the
> > domain->did is same as the one in the pasid entry and warn otherwise.
> 
> this would be a sw bug. :) Do we really want to catch every bug by warn? :)
> 

this one should not happen. If it does, something severe jumps out...
Yi Liu Nov. 6, 2024, 10:02 a.m. UTC | #6
On 2024/11/6 17:51, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Wednesday, November 6, 2024 5:31 PM
>>
>> On 2024/11/6 15:31, Tian, Kevin wrote:
>>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>>> Sent: Monday, November 4, 2024 9:19 PM
>>>>
>>>>
>>>> +int intel_pasid_replace_first_level(struct intel_iommu *iommu,
>>>> +				    struct device *dev, pgd_t *pgd,
>>>> +				    u32 pasid, u16 did, int flags)
>>>> +{
>>>> +	struct pasid_entry *pte;
>>>> +	u16 old_did;
>>>> +
>>>> +	if (!ecap_flts(iommu->ecap) ||
>>>> +	    ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
>>>> +		return -EINVAL;
>>>
>>> better copy the error messages from the setup part.
>>>
>>> there may be further chance to consolidate them later but no clear
>>> reason why different error warning schemes should be used
>>> between them.
>>>
>>> same for other helpers.
>>
>> sure. I think Baolu has a point that this may be trigger-able by userspace
>> hence drop the error message to avoid DOS.
>>
> 
> Isn't the existing path also trigger-able by userspace? It's better to
> have a consistent policy cross all paths then you can clean it up
> together later.

I see. May we add ratelimit to it.

>   
>>>> +
>>>> +	spin_lock(&iommu->lock);
>>>> +	pte = intel_pasid_get_entry(dev, pasid);
>>>> +	if (!pte) {
>>>> +		spin_unlock(&iommu->lock);
>>>> +		return -ENODEV;
>>>> +	}
>>>> +
>>>> +	if (!pasid_pte_is_present(pte)) {
>>>> +		spin_unlock(&iommu->lock);
>>>> +		return -EINVAL;
>>>> +	}
>>>> +
>>>> +	old_did = pasid_get_domain_id(pte);
>>>
>>> probably should pass the old domain in and check whether the
>>> domain->did is same as the one in the pasid entry and warn otherwise.
>>
>> this would be a sw bug. :) Do we really want to catch every bug by warn? :)
>>
> 
> this one should not happen. If it does, something severe jumps out...

yes. that's why I doubt if it's valuable to do it. It should be a vital
bug that bring us this warn. or instead of passing id old domain, how
about just old_did? We use the passed in did instead of using the did
from pte.
Tian, Kevin Nov. 6, 2024, 10:05 a.m. UTC | #7
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Wednesday, November 6, 2024 6:02 PM
> 
> On 2024/11/6 17:51, Tian, Kevin wrote:
> >> From: Liu, Yi L <yi.l.liu@intel.com>
> >> Sent: Wednesday, November 6, 2024 5:31 PM
> >>
> >> On 2024/11/6 15:31, Tian, Kevin wrote:
> >>>> From: Liu, Yi L <yi.l.liu@intel.com>
> >>>> Sent: Monday, November 4, 2024 9:19 PM
> >>>>
> >>>> +
> >>>> +	spin_lock(&iommu->lock);
> >>>> +	pte = intel_pasid_get_entry(dev, pasid);
> >>>> +	if (!pte) {
> >>>> +		spin_unlock(&iommu->lock);
> >>>> +		return -ENODEV;
> >>>> +	}
> >>>> +
> >>>> +	if (!pasid_pte_is_present(pte)) {
> >>>> +		spin_unlock(&iommu->lock);
> >>>> +		return -EINVAL;
> >>>> +	}
> >>>> +
> >>>> +	old_did = pasid_get_domain_id(pte);
> >>>
> >>> probably should pass the old domain in and check whether the
> >>> domain->did is same as the one in the pasid entry and warn otherwise.
> >>
> >> this would be a sw bug. :) Do we really want to catch every bug by warn? :)
> >>
> >
> > this one should not happen. If it does, something severe jumps out...
> 
> yes. that's why I doubt if it's valuable to do it. It should be a vital
> bug that bring us this warn. or instead of passing id old domain, how
> about just old_did? We use the passed in did instead of using the did
> from pte.
> 

My personal feeling - it's worth as such rare bug once happening 
would be very difficult to debug. the warning provides useful hint.

passing did is OK.
Yi Liu Nov. 6, 2024, 10:27 a.m. UTC | #8
On 2024/11/6 18:05, Tian, Kevin wrote:

> 
> My personal feeling - it's worth as such rare bug once happening
> would be very difficult to debug. the warning provides useful hint.
> 
> passing did is OK.

deal.
Baolu Lu Nov. 6, 2024, 10:43 a.m. UTC | #9
On 2024/11/6 18:05, Tian, Kevin wrote:
>> From: Liu, Yi L<yi.l.liu@intel.com>
>> Sent: Wednesday, November 6, 2024 6:02 PM
>>
>> On 2024/11/6 17:51, Tian, Kevin wrote:
>>>> From: Liu, Yi L<yi.l.liu@intel.com>
>>>> Sent: Wednesday, November 6, 2024 5:31 PM
>>>>
>>>> On 2024/11/6 15:31, Tian, Kevin wrote:
>>>>>> From: Liu, Yi L<yi.l.liu@intel.com>
>>>>>> Sent: Monday, November 4, 2024 9:19 PM
>>>>>>
>>>>>> +
>>>>>> +	spin_lock(&iommu->lock);
>>>>>> +	pte = intel_pasid_get_entry(dev, pasid);
>>>>>> +	if (!pte) {
>>>>>> +		spin_unlock(&iommu->lock);
>>>>>> +		return -ENODEV;
>>>>>> +	}
>>>>>> +
>>>>>> +	if (!pasid_pte_is_present(pte)) {
>>>>>> +		spin_unlock(&iommu->lock);
>>>>>> +		return -EINVAL;
>>>>>> +	}
>>>>>> +
>>>>>> +	old_did = pasid_get_domain_id(pte);
>>>>> probably should pass the old domain in and check whether the
>>>>> domain->did is same as the one in the pasid entry and warn otherwise.
>>>> this would be a sw bug. 
diff mbox series

Patch

diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 65fd2fee01b7..b7c2d65b8726 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -390,6 +390,40 @@  int intel_pasid_setup_first_level(struct intel_iommu *iommu,
 	return 0;
 }
 
+int intel_pasid_replace_first_level(struct intel_iommu *iommu,
+				    struct device *dev, pgd_t *pgd,
+				    u32 pasid, u16 did, int flags)
+{
+	struct pasid_entry *pte;
+	u16 old_did;
+
+	if (!ecap_flts(iommu->ecap) ||
+	    ((flags & PASID_FLAG_FL5LP) && !cap_fl5lp_support(iommu->cap)))
+		return -EINVAL;
+
+	spin_lock(&iommu->lock);
+	pte = intel_pasid_get_entry(dev, pasid);
+	if (!pte) {
+		spin_unlock(&iommu->lock);
+		return -ENODEV;
+	}
+
+	if (!pasid_pte_is_present(pte)) {
+		spin_unlock(&iommu->lock);
+		return -EINVAL;
+	}
+
+	old_did = pasid_get_domain_id(pte);
+
+	pasid_pte_config_first_level(iommu, pte, pgd, did, flags);
+	spin_unlock(&iommu->lock);
+
+	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+	intel_drain_pasid_prq(dev, pasid);
+
+	return 0;
+}
+
 /*
  * Skip top levels of page tables for iommu which has less agaw
  * than default. Unnecessary for PT mode.
@@ -483,6 +517,55 @@  int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 	return 0;
 }
 
+int intel_pasid_replace_second_level(struct intel_iommu *iommu,
+				     struct dmar_domain *domain,
+				     struct device *dev, u32 pasid)
+{
+	struct pasid_entry *pte;
+	struct dma_pte *pgd;
+	u16 did, old_did;
+	u64 pgd_val;
+	int agaw;
+
+	/*
+	 * If hardware advertises no support for second level
+	 * translation, return directly.
+	 */
+	if (!ecap_slts(iommu->ecap))
+		return -EINVAL;
+
+	pgd = domain->pgd;
+	agaw = iommu_skip_agaw(domain, iommu, &pgd);
+	if (agaw < 0)
+		return -EINVAL;
+
+	pgd_val = virt_to_phys(pgd);
+	did = domain_id_iommu(domain, iommu);
+
+	spin_lock(&iommu->lock);
+	pte = intel_pasid_get_entry(dev, pasid);
+	if (!pte) {
+		spin_unlock(&iommu->lock);
+		return -ENODEV;
+	}
+
+	if (!pasid_pte_is_present(pte)) {
+		spin_unlock(&iommu->lock);
+		return -EINVAL;
+	}
+
+	old_did = pasid_get_domain_id(pte);
+
+	pasid_pte_config_second_level(iommu, pte, pgd_val, agaw,
+				      did, domain->dirty_tracking);
+	spin_unlock(&iommu->lock);
+
+	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+	intel_drain_pasid_prq(dev, pasid);
+
+	return 0;
+}
+
 /*
  * Set up dirty tracking on a second only or nested translation type.
  */
@@ -595,6 +678,35 @@  int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
 	return 0;
 }
 
+int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
+				     struct device *dev, u32 pasid)
+{
+	u16 did = FLPT_DEFAULT_DID, old_did;
+	struct pasid_entry *pte;
+
+	spin_lock(&iommu->lock);
+	pte = intel_pasid_get_entry(dev, pasid);
+	if (!pte) {
+		spin_unlock(&iommu->lock);
+		return -ENODEV;
+	}
+
+	if (!pasid_pte_is_present(pte)) {
+		spin_unlock(&iommu->lock);
+		return -EINVAL;
+	}
+
+	old_did = pasid_get_domain_id(pte);
+
+	pasid_pte_config_pass_through(iommu, pte, did);
+	spin_unlock(&iommu->lock);
+
+	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+	intel_drain_pasid_prq(dev, pasid);
+
+	return 0;
+}
+
 /*
  * Set the page snoop control for a pasid entry which has been set up.
  */
@@ -725,6 +837,67 @@  int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
 	return 0;
 }
 
+int intel_pasid_replace_nested(struct intel_iommu *iommu,
+			       struct device *dev, u32 pasid,
+			       struct dmar_domain *domain)
+{
+	struct iommu_hwpt_vtd_s1 *s1_cfg = &domain->s1_cfg;
+	u16 did = domain_id_iommu(domain, iommu), old_did;
+	struct dmar_domain *s2_domain = domain->s2_domain;
+	struct pasid_entry *pte;
+
+	/* Address width should match the address width supported by hardware */
+	switch (s1_cfg->addr_width) {
+	case ADDR_WIDTH_4LEVEL:
+		break;
+	case ADDR_WIDTH_5LEVEL:
+		if (!cap_fl5lp_support(iommu->cap)) {
+			dev_err_ratelimited(dev,
+					    "5-level paging not supported\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		dev_err_ratelimited(dev, "Invalid stage-1 address width %d\n",
+				    s1_cfg->addr_width);
+		return -EINVAL;
+	}
+
+	if ((s1_cfg->flags & IOMMU_VTD_S1_SRE) && !ecap_srs(iommu->ecap)) {
+		pr_err_ratelimited("No supervisor request support on %s\n",
+				   iommu->name);
+		return -EINVAL;
+	}
+
+	if ((s1_cfg->flags & IOMMU_VTD_S1_EAFE) && !ecap_eafs(iommu->ecap)) {
+		pr_err_ratelimited("No extended access flag support on %s\n",
+				   iommu->name);
+		return -EINVAL;
+	}
+
+	spin_lock(&iommu->lock);
+	pte = intel_pasid_get_entry(dev, pasid);
+	if (!pte) {
+		spin_unlock(&iommu->lock);
+		return -ENODEV;
+	}
+
+	if (!pasid_pte_is_present(pte)) {
+		spin_unlock(&iommu->lock);
+		return -EINVAL;
+	}
+
+	old_did = pasid_get_domain_id(pte);
+
+	pasid_pte_config_nestd(iommu, pte, s1_cfg, s2_domain, did);
+	spin_unlock(&iommu->lock);
+
+	intel_pasid_flush_present(iommu, dev, pasid, old_did, pte);
+	intel_drain_pasid_prq(dev, pasid);
+
+	return 0;
+}
+
 /*
  * Interfaces to setup or teardown a pasid table to the scalable-mode
  * context table entry:
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index dde6d3ba5ae0..228938f3be51 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -303,6 +303,18 @@  int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
 				   struct device *dev, u32 pasid);
 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev,
 			     u32 pasid, struct dmar_domain *domain);
+int intel_pasid_replace_first_level(struct intel_iommu *iommu,
+				    struct device *dev, pgd_t *pgd,
+				    u32 pasid, u16 did, int flags);
+int intel_pasid_replace_second_level(struct intel_iommu *iommu,
+				     struct dmar_domain *domain,
+				     struct device *dev, u32 pasid);
+int intel_pasid_replace_pass_through(struct intel_iommu *iommu,
+				     struct device *dev, u32 pasid);
+int intel_pasid_replace_nested(struct intel_iommu *iommu,
+			       struct device *dev, u32 pasid,
+			       struct dmar_domain *domain);
+
 void intel_pasid_tear_down_entry(struct intel_iommu *iommu,
 				 struct device *dev, u32 pasid,
 				 bool fault_ignore);