diff mbox series

[RFC,XEN,v9,5/5] domctl: Add XEN_DOMCTL_gsi_permission to grant gsi

Message ID 20240607081127.126593-6-Jiqian.Chen@amd.com (mailing list archive)
State Superseded
Headers show
Series Support device passthrough when dom0 is PVH on Xen | expand

Commit Message

Chen, Jiqian June 7, 2024, 8:11 a.m. UTC
Some type of domain don't have PIRQ, like PVH, it do not do
PHYSDEVOP_map_pirq for each gsi. When passthrough a device
to guest on PVH dom0, callstack
pci_add_dm_done->XEN_DOMCTL_irq_permission will failed at
domain_pirq_to_irq, because PVH has no mapping of gsi, pirq
and irq on Xen side.

What's more, current hypercall XEN_DOMCTL_irq_permission require
passing in pirq and grant the access of irq, it is not suitable
for dom0 that has no PIRQ flag, because passthrough a device
needs gsi and grant the corresponding irq to guest. So, add a
new hypercall to grant gsi permission when dom0 is not PV or dom0
has not PIRQ flag.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
---
RFC: it needs review and needs to wait for the corresponding third patch on linux kernel side to be merged.
---
 tools/include/xenctrl.h            |  5 +++
 tools/libs/ctrl/xc_domain.c        | 15 +++++++
 tools/libs/light/libxl_pci.c       | 72 +++++++++++++++++++++++-------
 xen/arch/x86/domctl.c              | 38 ++++++++++++++++
 xen/arch/x86/include/asm/io_apic.h |  2 +
 xen/arch/x86/io_apic.c             | 21 +++++++++
 xen/arch/x86/mpparse.c             |  3 +-
 xen/include/public/domctl.h        | 10 +++++
 xen/xsm/flask/hooks.c              |  1 +
 9 files changed, 149 insertions(+), 18 deletions(-)

Comments

Jan Beulich June 11, 2024, 2:39 p.m. UTC | #1
On 07.06.2024 10:11, Jiqian Chen wrote:
> Some type of domain don't have PIRQ, like PVH, it do not do
> PHYSDEVOP_map_pirq for each gsi. When passthrough a device
> to guest on PVH dom0, callstack
> pci_add_dm_done->XEN_DOMCTL_irq_permission will failed at
> domain_pirq_to_irq, because PVH has no mapping of gsi, pirq
> and irq on Xen side.

All of this is, to me at least, in pretty sharp contradiction to what
patch 2 says and does. IOW: Do we want the concept of pIRQ in PVH, or
do we want to keep that to PV?

> What's more, current hypercall XEN_DOMCTL_irq_permission require
> passing in pirq and grant the access of irq, it is not suitable
> for dom0 that has no PIRQ flag, because passthrough a device
> needs gsi and grant the corresponding irq to guest. So, add a
> new hypercall to grant gsi permission when dom0 is not PV or dom0
> has not PIRQ flag.
> 
> Signed-off-by: Huang Rui <ray.huang@amd.com>
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>

A problem throughout the series as it seems: Who's the author of these
patches? There's no From: saying it's not you, but your S-o-b also
isn't first.

> --- a/tools/libs/light/libxl_pci.c
> +++ b/tools/libs/light/libxl_pci.c
> @@ -1412,6 +1412,37 @@ static bool pci_supp_legacy_irq(void)
>  #define PCI_SBDF(seg, bus, devfn) \
>              ((((uint32_t)(seg)) << 16) | (PCI_DEVID(bus, devfn)))
>  
> +static int pci_device_set_gsi(libxl_ctx *ctx,
> +                              libxl_domid domid,
> +                              libxl_device_pci *pci,
> +                              bool map,
> +                              int *gsi_back)
> +{
> +    int r, gsi, pirq;
> +    uint32_t sbdf;
> +
> +    sbdf = PCI_SBDF(pci->domain, pci->bus, (PCI_DEVFN(pci->dev, pci->func)));
> +    r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
> +    *gsi_back = r;
> +    if (r < 0)
> +        return r;
> +
> +    gsi = r;
> +    pirq = r;

r is a GSI as per above; why would you store such in a variable named pirq?
And how can ...

> +    if (map)
> +        r = xc_physdev_map_pirq(ctx->xch, domid, gsi, &pirq);
> +    else
> +        r = xc_physdev_unmap_pirq(ctx->xch, domid, pirq);

... that value be the correct one to pass into here? In fact, the pIRQ number
you obtain above in the "map" case isn't handed to the caller, i.e. it is
effectively lost. Yet that's what would need passing into such an unmap call.

> +    if (r)
> +        return r;
> +
> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);

Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
you better would avoid making the call in the first place.

> +    if (r && errno == EOPNOTSUPP)

Before here you don't really need the pIRQ number; if all it really is needed
for is ...

> +        r = xc_domain_irq_permission(ctx->xch, domid, pirq, map);

... this, then it probably also should only be obtained when it's needed. Yet
overall the intentions here aren't quite clear to me.

> @@ -1485,6 +1516,19 @@ static void pci_add_dm_done(libxl__egc *egc,
>      fclose(f);
>      if (!pci_supp_legacy_irq())
>          goto out_no_irq;
> +
> +    r = pci_device_set_gsi(ctx, domid, pci, 1, &gsi);
> +    if (gsi >= 0) {
> +        if (r < 0) {

This unusual way of error checking likely wants a comment.

> +            rc = ERROR_FAIL;
> +            LOGED(ERROR, domainid,
> +                  "pci_device_set_gsi gsi=%d (error=%d)", gsi, errno);
> +            goto out;
> +        } else {
> +            goto process_permissive;
> +        }

Btw, no need for "else" when the earlier if ends in "goto" or alike.

> @@ -1493,13 +1537,6 @@ static void pci_add_dm_done(libxl__egc *egc,
>          goto out_no_irq;
>      }
>      if ((fscanf(f, "%u", &irq) == 1) && irq) {
> -        sbdf = PCI_SBDF(pci->domain, pci->bus,
> -                        (PCI_DEVFN(pci->dev, pci->func)));
> -        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
> -        /* if fail, keep using irq; if success, r is gsi, use gsi */
> -        if (r != -1) {
> -            irq = r;
> -        }

If I'm not mistaken, this and ...

> @@ -2255,13 +2302,6 @@ skip_bar:
>      }
>  
>      if ((fscanf(f, "%u", &irq) == 1) && irq) {
> -        sbdf = PCI_SBDF(pci->domain, pci->bus,
> -                        (PCI_DEVFN(pci->dev, pci->func)));
> -        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
> -        /* if fail, keep using irq; if success, r is gsi, use gsi */
> -        if (r != -1) {
> -            irq = r;
> -        }

... this is code added by the immediately preceding patch. It's pretty odd
for that to be deleted here again right away. Can the interaction of the
two patches perhaps be re-arranged to avoid this anomaly?

> @@ -237,6 +238,43 @@ long arch_do_domctl(
>          break;
>      }
>  
> +    case XEN_DOMCTL_gsi_permission:
> +    {
> +        unsigned int gsi = domctl->u.gsi_permission.gsi;
> +        int irq = gsi_2_irq(gsi);

I'm not sure it is a good idea to issue this call ahead of the basic error
checks below.

> +        bool allow = domctl->u.gsi_permission.allow_access;

This allows any non-zero values to mean "true". I think you want to bail
on values larger than 1, much like you also want to check that the padding
fields are all zero.

> +        /*
> +         * If current domain is PV or it has PIRQ flag, it has a mapping
> +         * of gsi, pirq and irq, so it should use XEN_DOMCTL_irq_permission
> +         * to grant irq permission.
> +         */
> +        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )

Please use currd here (and also again below).

> +        {
> +            ret = -EOPNOTSUPP;
> +            break;
> +        }
> +
> +        if ( gsi >= nr_irqs_gsi || irq < 0 )
> +        {
> +            ret = -EINVAL;
> +            break;
> +        }
> +
> +        if ( !irq_access_permitted(current->domain, irq) ||
> +             xsm_irq_permission(XSM_HOOK, d, irq, allow) )

Daniel, is it okay to issue the XSM check using the translated value, not
the one that was originally passed into the hypercall?

> --- a/xen/arch/x86/io_apic.c
> +++ b/xen/arch/x86/io_apic.c
> @@ -955,6 +955,27 @@ static int pin_2_irq(int idx, int apic, int pin)
>      return irq;
>  }
>  
> +int gsi_2_irq(int gsi)
> +{
> +    int entry, ioapic, pin;
> +
> +    ioapic = mp_find_ioapic(gsi);
> +    if ( ioapic < 0 )
> +        return -1;

Can this be a proper errno value (likely -EINVAL), please?

> +    pin = gsi - io_apic_gsi_base(ioapic);

Hmm, instead of the below: Once you have an (ioapic,pin) tuple, can't
you simply call apic_pin_2_gsi_irq()?

> +    entry = find_irq_entry(ioapic, pin, mp_INT);
> +    /*
> +     * If there is no override mapping for irq and gsi in mp_irqs,
> +     * then the default identity mapping applies.
> +     */
> +    if ( entry < 0 )
> +        return gsi;
> +
> +    return pin_2_irq(entry, ioapic, pin);

Under certain conditions this may return 0. Yet you surely don't want
to pass IRQ0 back as a result; you want to hand an error back instead.

> --- a/xen/include/public/domctl.h
> +++ b/xen/include/public/domctl.h
> @@ -465,6 +465,14 @@ struct xen_domctl_irq_permission {
>  };
>  
>  
> +/* XEN_DOMCTL_gsi_permission */
> +struct xen_domctl_gsi_permission {
> +    uint32_t gsi;
> +    uint8_t allow_access;    /* flag to specify enable/disable of x86 gsi access */
> +    uint8_t pad[3];
> +};
> +
> +

Nit: No (new) double blank lines please. In fact (didn't I say this before
already?) you could insert between the two above, such that the existing issue
also disappears.

Jan
Chen, Jiqian June 12, 2024, 10:12 a.m. UTC | #2
Hi Jan,

On 2024/6/11 22:39, Jan Beulich wrote:
> On 07.06.2024 10:11, Jiqian Chen wrote:
>> Some type of domain don't have PIRQ, like PVH, it do not do
>> PHYSDEVOP_map_pirq for each gsi. When passthrough a device
>> to guest on PVH dom0, callstack
>> pci_add_dm_done->XEN_DOMCTL_irq_permission will failed at
>> domain_pirq_to_irq, because PVH has no mapping of gsi, pirq
>> and irq on Xen side.
> 
> All of this is, to me at least, in pretty sharp contradiction to what
> patch 2 says and does. IOW: Do we want the concept of pIRQ in PVH, or
> do we want to keep that to PV?
It's not contradictory.
What I did is not to add the concept of PIRQs for PVH.
All previous passthrough code was implemented on the basis of pv dom0 + hvm domU.
For pv dom0, it has PIRQs. For hvm domU, it has PIRQs too.
So the codes are not suitable for PVH dom0 + hvm domU, because PVH dom0 has no PIRQs.
Patch 2 do PHYSDEVOP_map_pirq for hvm domU even when dom0 is PVH instead of PV. It didn't add PIRQs for PVH.
This patch is to grant irq( that get from gsi ) to hvm domU, why XEN_DOMCTL_irq_permission is not useful is because PVH has no PIRQs, we can't get irq through pirq like PV does.

> 
>> What's more, current hypercall XEN_DOMCTL_irq_permission require
>> passing in pirq and grant the access of irq, it is not suitable
>> for dom0 that has no PIRQ flag, because passthrough a device
>> needs gsi and grant the corresponding irq to guest. So, add a
>> new hypercall to grant gsi permission when dom0 is not PV or dom0
>> has not PIRQ flag.
>>
>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
> 
> A problem throughout the series as it seems: Who's the author of these
> patches? There's no From: saying it's not you, but your S-o-b also
> isn't first.
So I need to change to:
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> means I am the author.
Signed-off-by: Huang Rui <ray.huang@amd.com> means Rui sent them to upstream firstly.
Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> means I take continue to upstream.

> 
>> --- a/tools/libs/light/libxl_pci.c
>> +++ b/tools/libs/light/libxl_pci.c
>> @@ -1412,6 +1412,37 @@ static bool pci_supp_legacy_irq(void)
>>  #define PCI_SBDF(seg, bus, devfn) \
>>              ((((uint32_t)(seg)) << 16) | (PCI_DEVID(bus, devfn)))
>>  
>> +static int pci_device_set_gsi(libxl_ctx *ctx,
>> +                              libxl_domid domid,
>> +                              libxl_device_pci *pci,
>> +                              bool map,
>> +                              int *gsi_back)
>> +{
>> +    int r, gsi, pirq;
>> +    uint32_t sbdf;
>> +
>> +    sbdf = PCI_SBDF(pci->domain, pci->bus, (PCI_DEVFN(pci->dev, pci->func)));
>> +    r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
>> +    *gsi_back = r;
>> +    if (r < 0)
>> +        return r;
>> +
>> +    gsi = r;
>> +    pirq = r;
> 
> r is a GSI as per above; why would you store such in a variable named pirq?
> And how can ...
> 
>> +    if (map)
>> +        r = xc_physdev_map_pirq(ctx->xch, domid, gsi, &pirq);
>> +    else
>> +        r = xc_physdev_unmap_pirq(ctx->xch, domid, pirq);
> 
> ... that value be the correct one to pass into here? In fact, the pIRQ number
> you obtain above in the "map" case isn't handed to the caller, i.e. it is
> effectively lost. Yet that's what would need passing into such an unmap call.
Yes r is GSI and I know pirq will be replaced by xc_physdev_map_pirq.
What I do "pirq = r" is for xc_physdev_unmap_pirq, unmap need passing in pirq,
and the number of pirq is always equal to gsi.

> 
>> +    if (r)
>> +        return r;
>> +
>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
> 
> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
> you better would avoid making the call in the first place.
Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.

> 
>> +    if (r && errno == EOPNOTSUPP)
> 
> Before here you don't really need the pIRQ number; if all it really is needed
> for is ...
> 
>> +        r = xc_domain_irq_permission(ctx->xch, domid, pirq, map);
> 
> ... this, then it probably also should only be obtained when it's needed. Yet
> overall the intentions here aren't quite clear to me.
Adding the function pci_device_set_gsi is for PVH dom0, while also ensuring compatibility with PV dom0.
When PVH dom0, it does xc_physdev_map_pirq and xc_domain_gsi_permission(new hypercall for PVH dom0)
When PV dom0, it keeps the same actions as before codes, it does xc_physdev_map_pirq and xc_domain_irq_permission.

> 
>> @@ -1485,6 +1516,19 @@ static void pci_add_dm_done(libxl__egc *egc,
>>      fclose(f);
>>      if (!pci_supp_legacy_irq())
>>          goto out_no_irq;
>> +
>> +    r = pci_device_set_gsi(ctx, domid, pci, 1, &gsi);
>> +    if (gsi >= 0) {
>> +        if (r < 0) {
> 
> This unusual way of error checking likely wants a comment.
Will add in next version.

> 
>> +            rc = ERROR_FAIL;
>> +            LOGED(ERROR, domainid,
>> +                  "pci_device_set_gsi gsi=%d (error=%d)", gsi, errno);
>> +            goto out;
>> +        } else {
>> +            goto process_permissive;
>> +        }
> 
> Btw, no need for "else" when the earlier if ends in "goto" or alike.
OK, I will change in next version.

> 
>> @@ -1493,13 +1537,6 @@ static void pci_add_dm_done(libxl__egc *egc,
>>          goto out_no_irq;
>>      }
>>      if ((fscanf(f, "%u", &irq) == 1) && irq) {
>> -        sbdf = PCI_SBDF(pci->domain, pci->bus,
>> -                        (PCI_DEVFN(pci->dev, pci->func)));
>> -        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
>> -        /* if fail, keep using irq; if success, r is gsi, use gsi */
>> -        if (r != -1) {
>> -            irq = r;
>> -        }
> 
> If I'm not mistaken, this and ...
> 
>> @@ -2255,13 +2302,6 @@ skip_bar:
>>      }
>>  
>>      if ((fscanf(f, "%u", &irq) == 1) && irq) {
>> -        sbdf = PCI_SBDF(pci->domain, pci->bus,
>> -                        (PCI_DEVFN(pci->dev, pci->func)));
>> -        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
>> -        /* if fail, keep using irq; if success, r is gsi, use gsi */
>> -        if (r != -1) {
>> -            irq = r;
>> -        }
> 
> ... this is code added by the immediately preceding patch. It's pretty odd
> for that to be deleted here again right away. Can the interaction of the
> two patches perhaps be re-arranged to avoid this anomaly?
OK, I will do in next version.

> 
>> @@ -237,6 +238,43 @@ long arch_do_domctl(
>>          break;
>>      }
>>  
>> +    case XEN_DOMCTL_gsi_permission:
>> +    {
>> +        unsigned int gsi = domctl->u.gsi_permission.gsi;
>> +        int irq = gsi_2_irq(gsi);
> 
> I'm not sure it is a good idea to issue this call ahead of the basic error
> checks below.
I will move it below the checks.

> 
>> +        bool allow = domctl->u.gsi_permission.allow_access;
> 
> This allows any non-zero values to mean "true". I think you want to bail
> on values larger than 1, much like you also want to check that the padding
> fields are all zero.
Will change in next version.

> 
>> +        /*
>> +         * If current domain is PV or it has PIRQ flag, it has a mapping
>> +         * of gsi, pirq and irq, so it should use XEN_DOMCTL_irq_permission
>> +         * to grant irq permission.
>> +         */
>> +        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )
> 
> Please use currd here (and also again below).
Will change in next version.
> 
>> +        {
>> +            ret = -EOPNOTSUPP;
>> +            break;
>> +        }
>> +
>> +        if ( gsi >= nr_irqs_gsi || irq < 0 )
>> +        {
>> +            ret = -EINVAL;
>> +            break;
>> +        }
>> +
>> +        if ( !irq_access_permitted(current->domain, irq) ||
>> +             xsm_irq_permission(XSM_HOOK, d, irq, allow) )
> 
> Daniel, is it okay to issue the XSM check using the translated value, not
> the one that was originally passed into the hypercall?
> 
>> --- a/xen/arch/x86/io_apic.c
>> +++ b/xen/arch/x86/io_apic.c
>> @@ -955,6 +955,27 @@ static int pin_2_irq(int idx, int apic, int pin)
>>      return irq;
>>  }
>>  
>> +int gsi_2_irq(int gsi)
>> +{
>> +    int entry, ioapic, pin;
>> +
>> +    ioapic = mp_find_ioapic(gsi);
>> +    if ( ioapic < 0 )
>> +        return -1;
> 
> Can this be a proper errno value (likely -EINVAL), please?
Will change in next version.
> 
>> +    pin = gsi - io_apic_gsi_base(ioapic);
> 
> Hmm, instead of the below: Once you have an (ioapic,pin) tuple, can't
> you simply call apic_pin_2_gsi_irq()?
Will change in next version.
> 
>> +    entry = find_irq_entry(ioapic, pin, mp_INT);
>> +    /*
>> +     * If there is no override mapping for irq and gsi in mp_irqs,
>> +     * then the default identity mapping applies.
>> +     */
>> +    if ( entry < 0 )
>> +        return gsi;
>> +
>> +    return pin_2_irq(entry, ioapic, pin);
> 
> Under certain conditions this may return 0. Yet you surely don't want
> to pass IRQ0 back as a result; you want to hand an error back instead.
Will add in next version.
> 
>> --- a/xen/include/public/domctl.h
>> +++ b/xen/include/public/domctl.h
>> @@ -465,6 +465,14 @@ struct xen_domctl_irq_permission {
>>  };
>>  
>>  
>> +/* XEN_DOMCTL_gsi_permission */
>> +struct xen_domctl_gsi_permission {
>> +    uint32_t gsi;
>> +    uint8_t allow_access;    /* flag to specify enable/disable of x86 gsi access */
>> +    uint8_t pad[3];
>> +};
>> +
>> +
> 
> Nit: No (new) double blank lines please. In fact (didn't I say this before
> already?) you could insert between the two above, such that the existing issue
> also disappears.
I remember I changed it here, maybe I made a mistake somewhere, and I will modify it in the next version.

> 
> Jan
Jan Beulich June 12, 2024, 10:34 a.m. UTC | #3
On 12.06.2024 12:12, Chen, Jiqian wrote:
> On 2024/6/11 22:39, Jan Beulich wrote:
>> On 07.06.2024 10:11, Jiqian Chen wrote:
>>> Some type of domain don't have PIRQ, like PVH, it do not do
>>> PHYSDEVOP_map_pirq for each gsi. When passthrough a device
>>> to guest on PVH dom0, callstack
>>> pci_add_dm_done->XEN_DOMCTL_irq_permission will failed at
>>> domain_pirq_to_irq, because PVH has no mapping of gsi, pirq
>>> and irq on Xen side.
>>
>> All of this is, to me at least, in pretty sharp contradiction to what
>> patch 2 says and does. IOW: Do we want the concept of pIRQ in PVH, or
>> do we want to keep that to PV?
> It's not contradictory.
> What I did is not to add the concept of PIRQs for PVH.

After your further explanations on patch 2 - yes, I see now. But in particular
there it needs making more clear what case it is that is being enabled by the
changes.

>>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>>
>> A problem throughout the series as it seems: Who's the author of these
>> patches? There's no From: saying it's not you, but your S-o-b also
>> isn't first.
> So I need to change to:
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> means I am the author.
> Signed-off-by: Huang Rui <ray.huang@amd.com> means Rui sent them to upstream firstly.
> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> means I take continue to upstream.

I guess so, yes.

>>> --- a/tools/libs/light/libxl_pci.c
>>> +++ b/tools/libs/light/libxl_pci.c
>>> @@ -1412,6 +1412,37 @@ static bool pci_supp_legacy_irq(void)
>>>  #define PCI_SBDF(seg, bus, devfn) \
>>>              ((((uint32_t)(seg)) << 16) | (PCI_DEVID(bus, devfn)))
>>>  
>>> +static int pci_device_set_gsi(libxl_ctx *ctx,
>>> +                              libxl_domid domid,
>>> +                              libxl_device_pci *pci,
>>> +                              bool map,
>>> +                              int *gsi_back)
>>> +{
>>> +    int r, gsi, pirq;
>>> +    uint32_t sbdf;
>>> +
>>> +    sbdf = PCI_SBDF(pci->domain, pci->bus, (PCI_DEVFN(pci->dev, pci->func)));
>>> +    r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
>>> +    *gsi_back = r;
>>> +    if (r < 0)
>>> +        return r;
>>> +
>>> +    gsi = r;
>>> +    pirq = r;
>>
>> r is a GSI as per above; why would you store such in a variable named pirq?
>> And how can ...
>>
>>> +    if (map)
>>> +        r = xc_physdev_map_pirq(ctx->xch, domid, gsi, &pirq);
>>> +    else
>>> +        r = xc_physdev_unmap_pirq(ctx->xch, domid, pirq);
>>
>> ... that value be the correct one to pass into here? In fact, the pIRQ number
>> you obtain above in the "map" case isn't handed to the caller, i.e. it is
>> effectively lost. Yet that's what would need passing into such an unmap call.
> Yes r is GSI and I know pirq will be replaced by xc_physdev_map_pirq.
> What I do "pirq = r" is for xc_physdev_unmap_pirq, unmap need passing in pirq,
> and the number of pirq is always equal to gsi.

Why would that be? pIRQ is purely a software construct (of Xen's), I
don't think there's any guarantee whatsoever on the numbering. And even
if there was (for e.g. non-MSI ones), it would be pIRQ == IRQ. And recall
that elsewhere I think I meanwhile succeeded in explaining to you that
IRQ != GSI (in the common case, even if in most cases they match).

>>> +    if (r)
>>> +        return r;
>>> +
>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
>>
>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
>> you better would avoid making the call in the first place.
> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.

Hence why call xc_domain_gsi_permission() at all on a PV Dom0?

>>> +    if (r && errno == EOPNOTSUPP)
>>
>> Before here you don't really need the pIRQ number; if all it really is needed
>> for is ...
>>
>>> +        r = xc_domain_irq_permission(ctx->xch, domid, pirq, map);
>>
>> ... this, then it probably also should only be obtained when it's needed. Yet
>> overall the intentions here aren't quite clear to me.
> Adding the function pci_device_set_gsi is for PVH dom0, while also ensuring compatibility with PV dom0.
> When PVH dom0, it does xc_physdev_map_pirq and xc_domain_gsi_permission(new hypercall for PVH dom0)
> When PV dom0, it keeps the same actions as before codes, it does xc_physdev_map_pirq and xc_domain_irq_permission.

And why does PVH Dom0 need to call xc_physdev_map_pirq(), when in that case
the pIRQ isn't used?

Jan
Chen, Jiqian June 12, 2024, 10:55 a.m. UTC | #4
On 2024/6/12 18:34, Jan Beulich wrote:
> On 12.06.2024 12:12, Chen, Jiqian wrote:
>> On 2024/6/11 22:39, Jan Beulich wrote:
>>> On 07.06.2024 10:11, Jiqian Chen wrote:
>>>> Some type of domain don't have PIRQ, like PVH, it do not do
>>>> PHYSDEVOP_map_pirq for each gsi. When passthrough a device
>>>> to guest on PVH dom0, callstack
>>>> pci_add_dm_done->XEN_DOMCTL_irq_permission will failed at
>>>> domain_pirq_to_irq, because PVH has no mapping of gsi, pirq
>>>> and irq on Xen side.
>>>
>>> All of this is, to me at least, in pretty sharp contradiction to what
>>> patch 2 says and does. IOW: Do we want the concept of pIRQ in PVH, or
>>> do we want to keep that to PV?
>> It's not contradictory.
>> What I did is not to add the concept of PIRQs for PVH.
> 
> After your further explanations on patch 2 - yes, I see now. But in particular
> there it needs making more clear what case it is that is being enabled by the
> changes.
OK, I will add some descriptions in next version.

> 
>>>> Signed-off-by: Huang Rui <ray.huang@amd.com>
>>>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com>
>>>
>>> A problem throughout the series as it seems: Who's the author of these
>>> patches? There's no From: saying it's not you, but your S-o-b also
>>> isn't first.
>> So I need to change to:
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> means I am the author.
>> Signed-off-by: Huang Rui <ray.huang@amd.com> means Rui sent them to upstream firstly.
>> Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> means I take continue to upstream.
> 
> I guess so, yes.
Thanks.

> 
>>>> --- a/tools/libs/light/libxl_pci.c
>>>> +++ b/tools/libs/light/libxl_pci.c
>>>> @@ -1412,6 +1412,37 @@ static bool pci_supp_legacy_irq(void)
>>>>  #define PCI_SBDF(seg, bus, devfn) \
>>>>              ((((uint32_t)(seg)) << 16) | (PCI_DEVID(bus, devfn)))
>>>>  
>>>> +static int pci_device_set_gsi(libxl_ctx *ctx,
>>>> +                              libxl_domid domid,
>>>> +                              libxl_device_pci *pci,
>>>> +                              bool map,
>>>> +                              int *gsi_back)
>>>> +{
>>>> +    int r, gsi, pirq;
>>>> +    uint32_t sbdf;
>>>> +
>>>> +    sbdf = PCI_SBDF(pci->domain, pci->bus, (PCI_DEVFN(pci->dev, pci->func)));
>>>> +    r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
>>>> +    *gsi_back = r;
>>>> +    if (r < 0)
>>>> +        return r;
>>>> +
>>>> +    gsi = r;
>>>> +    pirq = r;
>>>
>>> r is a GSI as per above; why would you store such in a variable named pirq?
>>> And how can ...
>>>
>>>> +    if (map)
>>>> +        r = xc_physdev_map_pirq(ctx->xch, domid, gsi, &pirq);
>>>> +    else
>>>> +        r = xc_physdev_unmap_pirq(ctx->xch, domid, pirq);
>>>
>>> ... that value be the correct one to pass into here? In fact, the pIRQ number
>>> you obtain above in the "map" case isn't handed to the caller, i.e. it is
>>> effectively lost. Yet that's what would need passing into such an unmap call.
>> Yes r is GSI and I know pirq will be replaced by xc_physdev_map_pirq.
>> What I do "pirq = r" is for xc_physdev_unmap_pirq, unmap need passing in pirq,
>> and the number of pirq is always equal to gsi.
> 
> Why would that be? pIRQ is purely a software construct (of Xen's), I
> don't think there's any guarantee whatsoever on the numbering. And even
> if there was (for e.g. non-MSI ones), it would be pIRQ == IRQ. And recall
> that elsewhere I think I meanwhile succeeded in explaining to you that
> IRQ != GSI (in the common case, even if in most cases they match).
OK, will change in next version.

> 
>>>> +    if (r)
>>>> +        return r;
>>>> +
>>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
>>>
>>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
>>> you better would avoid making the call in the first place.
>> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.
> 
> Hence why call xc_domain_gsi_permission() at all on a PV Dom0?
Is there a function to distinguish that current dom0 is PV or PVH dom0 in tools/libs?

> 
>>>> +    if (r && errno == EOPNOTSUPP)
>>>
>>> Before here you don't really need the pIRQ number; if all it really is needed
>>> for is ...
>>>
>>>> +        r = xc_domain_irq_permission(ctx->xch, domid, pirq, map);
>>>
>>> ... this, then it probably also should only be obtained when it's needed. Yet
>>> overall the intentions here aren't quite clear to me.
>> Adding the function pci_device_set_gsi is for PVH dom0, while also ensuring compatibility with PV dom0.
>> When PVH dom0, it does xc_physdev_map_pirq and xc_domain_gsi_permission(new hypercall for PVH dom0)
>> When PV dom0, it keeps the same actions as before codes, it does xc_physdev_map_pirq and xc_domain_irq_permission.
> 
> And why does PVH Dom0 need to call xc_physdev_map_pirq(), when in that case
> the pIRQ isn't used?
I didn't expect that introducing pci_device_set_gsi causes so many confusions.
I will remove it in the next version and make modifications directly in pci_add_dm_done.

> 
> Jan
Jan Beulich June 12, 2024, 11:43 a.m. UTC | #5
On 12.06.2024 12:55, Chen, Jiqian wrote:
> On 2024/6/12 18:34, Jan Beulich wrote:
>> On 12.06.2024 12:12, Chen, Jiqian wrote:
>>> On 2024/6/11 22:39, Jan Beulich wrote:
>>>> On 07.06.2024 10:11, Jiqian Chen wrote:
>>>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
>>>>
>>>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
>>>> you better would avoid making the call in the first place.
>>> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.
>>
>> Hence why call xc_domain_gsi_permission() at all on a PV Dom0?
> Is there a function to distinguish that current dom0 is PV or PVH dom0 in tools/libs?

That's a question to the tools folks, I suppose?

Jan
Anthony PERARD June 13, 2024, 12:51 p.m. UTC | #6
On Wed, Jun 12, 2024 at 10:55:14AM +0000, Chen, Jiqian wrote:
> On 2024/6/12 18:34, Jan Beulich wrote:
> > On 12.06.2024 12:12, Chen, Jiqian wrote:
> >> On 2024/6/11 22:39, Jan Beulich wrote:
> >>> On 07.06.2024 10:11, Jiqian Chen wrote:
> >>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
> >>>
> >>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
> >>> you better would avoid making the call in the first place.
> >> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.
> > 
> > Hence why call xc_domain_gsi_permission() at all on a PV Dom0?
> Is there a function to distinguish that current dom0 is PV or PVH dom0 in tools/libs?

That might have never been needed before, so probably not. There's
libxl__domain_type() but if that works with dom0 it might return "HVM"
for PVH dom0. So if xc_domain_getinfo_single() works and give the right
info about dom0, libxl__domain_type() could be extended to deal with
dom0 I guess. I don't know if there's a good way to find out which
flavor of dom0 is running.

Cheers,
Chen, Jiqian June 14, 2024, 3:11 a.m. UTC | #7
On 2024/6/13 20:51, Anthony PERARD wrote:
> On Wed, Jun 12, 2024 at 10:55:14AM +0000, Chen, Jiqian wrote:
>> On 2024/6/12 18:34, Jan Beulich wrote:
>>> On 12.06.2024 12:12, Chen, Jiqian wrote:
>>>> On 2024/6/11 22:39, Jan Beulich wrote:
>>>>> On 07.06.2024 10:11, Jiqian Chen wrote:
>>>>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
>>>>>
>>>>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
>>>>> you better would avoid making the call in the first place.
>>>> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.
>>>
>>> Hence why call xc_domain_gsi_permission() at all on a PV Dom0?
>> Is there a function to distinguish that current dom0 is PV or PVH dom0 in tools/libs?
> 
> That might have never been needed before, so probably not. There's
> libxl__domain_type() but if that works with dom0 it might return "HVM"
> for PVH dom0. So if xc_domain_getinfo_single() works and give the right
> info about dom0, libxl__domain_type() could be extended to deal with
> dom0 I guess. I don't know if there's a good way to find out which
> flavor of dom0 is running.
Thanks Anthony!
I think here we really need to check is that whether current domain has PIRQ flag(X86_EMU_USE_PIRQ) or not.
And it seems xc_domain_gsi_permission already return the information.
If current domain has no PIRQs, then I should use xc_domain_gsi_permission to grant permission, otherwise I should
keep the original function xc_domain_irq_permission.

> 
> Cheers,
>
Chen, Jiqian June 14, 2024, 4:01 a.m. UTC | #8
Hi Daniel,

On 2024/6/11 22:39, Jan Beulich wrote:
> On 07.06.2024 10:11, Jiqian Chen wrote: 
>> +    case XEN_DOMCTL_gsi_permission:
>> +    {
>> +        unsigned int gsi = domctl->u.gsi_permission.gsi;
>> +        int irq = gsi_2_irq(gsi); 
>> +        bool allow = domctl->u.gsi_permission.allow_access; 
>> +        /*
>> +         * If current domain is PV or it has PIRQ flag, it has a mapping
>> +         * of gsi, pirq and irq, so it should use XEN_DOMCTL_irq_permission
>> +         * to grant irq permission.
>> +         */
>> +        if ( is_pv_domain(current->domain) || has_pirq(current->domain) ) 
>> +        {
>> +            ret = -EOPNOTSUPP;
>> +            break;
>> +        }
>> +
>> +        if ( gsi >= nr_irqs_gsi || irq < 0 )
>> +        {
>> +            ret = -EINVAL;
>> +            break;
>> +        }
>> +
>> +        if ( !irq_access_permitted(current->domain, irq) ||
>> +             xsm_irq_permission(XSM_HOOK, d, irq, allow) )
> 
> Daniel, is it okay to issue the XSM check using the translated value, not
> the one that was originally passed into the hypercall?
Is it okay?

> 
> Jan
Jan Beulich June 14, 2024, 6:41 a.m. UTC | #9
On 14.06.2024 05:11, Chen, Jiqian wrote:
> On 2024/6/13 20:51, Anthony PERARD wrote:
>> On Wed, Jun 12, 2024 at 10:55:14AM +0000, Chen, Jiqian wrote:
>>> On 2024/6/12 18:34, Jan Beulich wrote:
>>>> On 12.06.2024 12:12, Chen, Jiqian wrote:
>>>>> On 2024/6/11 22:39, Jan Beulich wrote:
>>>>>> On 07.06.2024 10:11, Jiqian Chen wrote:
>>>>>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
>>>>>>
>>>>>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
>>>>>> you better would avoid making the call in the first place.
>>>>> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.
>>>>
>>>> Hence why call xc_domain_gsi_permission() at all on a PV Dom0?
>>> Is there a function to distinguish that current dom0 is PV or PVH dom0 in tools/libs?
>>
>> That might have never been needed before, so probably not. There's
>> libxl__domain_type() but if that works with dom0 it might return "HVM"
>> for PVH dom0. So if xc_domain_getinfo_single() works and give the right
>> info about dom0, libxl__domain_type() could be extended to deal with
>> dom0 I guess. I don't know if there's a good way to find out which
>> flavor of dom0 is running.
> Thanks Anthony!
> I think here we really need to check is that whether current domain has PIRQ flag(X86_EMU_USE_PIRQ) or not.
> And it seems xc_domain_gsi_permission already return the information.

By way of failing, if I'm not mistaken? As indicated before, I don't
think you should invoke the function when it's clear it's going to fail.

Jan

> If current domain has no PIRQs, then I should use xc_domain_gsi_permission to grant permission, otherwise I should
> keep the original function xc_domain_irq_permission.
> 
>>
>> Cheers,
>>
>
Chen, Jiqian June 14, 2024, 6:53 a.m. UTC | #10
On 2024/6/14 14:41, Jan Beulich wrote:
> On 14.06.2024 05:11, Chen, Jiqian wrote:
>> On 2024/6/13 20:51, Anthony PERARD wrote:
>>> On Wed, Jun 12, 2024 at 10:55:14AM +0000, Chen, Jiqian wrote:
>>>> On 2024/6/12 18:34, Jan Beulich wrote:
>>>>> On 12.06.2024 12:12, Chen, Jiqian wrote:
>>>>>> On 2024/6/11 22:39, Jan Beulich wrote:
>>>>>>> On 07.06.2024 10:11, Jiqian Chen wrote:
>>>>>>>> +    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
>>>>>>>
>>>>>>> Looking at the hypervisor side, this will fail for PV Dom0. In which case imo
>>>>>>> you better would avoid making the call in the first place.
>>>>>> Yes, for PV dom0, the errno is EOPNOTSUPP, then it will do below xc_domain_irq_permission.
>>>>>
>>>>> Hence why call xc_domain_gsi_permission() at all on a PV Dom0?
>>>> Is there a function to distinguish that current dom0 is PV or PVH dom0 in tools/libs?
>>>
>>> That might have never been needed before, so probably not. There's
>>> libxl__domain_type() but if that works with dom0 it might return "HVM"
>>> for PVH dom0. So if xc_domain_getinfo_single() works and give the right
>>> info about dom0, libxl__domain_type() could be extended to deal with
>>> dom0 I guess. I don't know if there's a good way to find out which
>>> flavor of dom0 is running.
>> Thanks Anthony!
>> I think here we really need to check is that whether current domain has PIRQ flag(X86_EMU_USE_PIRQ) or not.
>> And it seems xc_domain_gsi_permission already return the information.
> 
> By way of failing, if I'm not mistaken? As indicated before, I don't
> think you should invoke the function when it's clear it's going to fail.
Sorry, I wrote wrong here, it should be " And it seems xc_domain_getinfo_single already return the information."
And next version will be like:
xc_domaininfo_t xcinfo;
xc_domain_getinfo_single(xc_handle, domid, &xcinfo);
if( xcinfo.arch_config.emulation_flags & XEN_X86_EMU_USE_PIRQ )
	xc_domain_irq_permission
else
	xc_domain_gsi_permission

> 
> Jan
> 
>> If current domain has no PIRQs, then I should use xc_domain_gsi_permission to grant permission, otherwise I should
>> keep the original function xc_domain_irq_permission.
>>
>>>
>>> Cheers,
>>>
>>
>
diff mbox series

Patch

diff --git a/tools/include/xenctrl.h b/tools/include/xenctrl.h
index a0381f74d24b..f3feb6848e25 100644
--- a/tools/include/xenctrl.h
+++ b/tools/include/xenctrl.h
@@ -1382,6 +1382,11 @@  int xc_domain_irq_permission(xc_interface *xch,
                              uint32_t pirq,
                              bool allow_access);
 
+int xc_domain_gsi_permission(xc_interface *xch,
+                             uint32_t domid,
+                             uint32_t gsi,
+                             bool allow_access);
+
 int xc_domain_iomem_permission(xc_interface *xch,
                                uint32_t domid,
                                unsigned long first_mfn,
diff --git a/tools/libs/ctrl/xc_domain.c b/tools/libs/ctrl/xc_domain.c
index f2d9d14b4d9f..8540e84fda93 100644
--- a/tools/libs/ctrl/xc_domain.c
+++ b/tools/libs/ctrl/xc_domain.c
@@ -1394,6 +1394,21 @@  int xc_domain_irq_permission(xc_interface *xch,
     return do_domctl(xch, &domctl);
 }
 
+int xc_domain_gsi_permission(xc_interface *xch,
+                             uint32_t domid,
+                             uint32_t gsi,
+                             bool allow_access)
+{
+    struct xen_domctl domctl = {
+        .cmd = XEN_DOMCTL_gsi_permission,
+        .domain = domid,
+        .u.gsi_permission.gsi = gsi,
+        .u.gsi_permission.allow_access = allow_access,
+    };
+
+    return do_domctl(xch, &domctl);
+}
+
 int xc_domain_iomem_permission(xc_interface *xch,
                                uint32_t domid,
                                unsigned long first_mfn,
diff --git a/tools/libs/light/libxl_pci.c b/tools/libs/light/libxl_pci.c
index 7e44d4c3ae2b..b8ec37d8d7e3 100644
--- a/tools/libs/light/libxl_pci.c
+++ b/tools/libs/light/libxl_pci.c
@@ -1412,6 +1412,37 @@  static bool pci_supp_legacy_irq(void)
 #define PCI_SBDF(seg, bus, devfn) \
             ((((uint32_t)(seg)) << 16) | (PCI_DEVID(bus, devfn)))
 
+static int pci_device_set_gsi(libxl_ctx *ctx,
+                              libxl_domid domid,
+                              libxl_device_pci *pci,
+                              bool map,
+                              int *gsi_back)
+{
+    int r, gsi, pirq;
+    uint32_t sbdf;
+
+    sbdf = PCI_SBDF(pci->domain, pci->bus, (PCI_DEVFN(pci->dev, pci->func)));
+    r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
+    *gsi_back = r;
+    if (r < 0)
+        return r;
+
+    gsi = r;
+    pirq = r;
+    if (map)
+        r = xc_physdev_map_pirq(ctx->xch, domid, gsi, &pirq);
+    else
+        r = xc_physdev_unmap_pirq(ctx->xch, domid, pirq);
+    if (r)
+        return r;
+
+    r = xc_domain_gsi_permission(ctx->xch, domid, gsi, map);
+    if (r && errno == EOPNOTSUPP)
+        r = xc_domain_irq_permission(ctx->xch, domid, pirq, map);
+
+    return r;
+}
+
 static void pci_add_dm_done(libxl__egc *egc,
                             pci_add_state *pas,
                             int rc)
@@ -1424,10 +1455,10 @@  static void pci_add_dm_done(libxl__egc *egc,
     unsigned long long start, end, flags, size;
     int irq, i;
     int r;
-    uint32_t sbdf;
     uint32_t flag = XEN_DOMCTL_DEV_RDM_RELAXED;
     uint32_t domainid = domid;
     bool isstubdom = libxl_is_stubdom(ctx, domid, &domainid);
+    int gsi;
 
     /* Convenience aliases */
     bool starting = pas->starting;
@@ -1485,6 +1516,19 @@  static void pci_add_dm_done(libxl__egc *egc,
     fclose(f);
     if (!pci_supp_legacy_irq())
         goto out_no_irq;
+
+    r = pci_device_set_gsi(ctx, domid, pci, 1, &gsi);
+    if (gsi >= 0) {
+        if (r < 0) {
+            rc = ERROR_FAIL;
+            LOGED(ERROR, domainid,
+                  "pci_device_set_gsi gsi=%d (error=%d)", gsi, errno);
+            goto out;
+        } else {
+            goto process_permissive;
+        }
+    }
+    /* if gsi < 0, keep using irq */
     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
                                 pci->bus, pci->dev, pci->func);
     f = fopen(sysfs_path, "r");
@@ -1493,13 +1537,6 @@  static void pci_add_dm_done(libxl__egc *egc,
         goto out_no_irq;
     }
     if ((fscanf(f, "%u", &irq) == 1) && irq) {
-        sbdf = PCI_SBDF(pci->domain, pci->bus,
-                        (PCI_DEVFN(pci->dev, pci->func)));
-        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
-        /* if fail, keep using irq; if success, r is gsi, use gsi */
-        if (r != -1) {
-            irq = r;
-        }
         r = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
         if (r < 0) {
             LOGED(ERROR, domainid, "xc_physdev_map_pirq irq=%d (error=%d)",
@@ -1519,6 +1556,7 @@  static void pci_add_dm_done(libxl__egc *egc,
     }
     fclose(f);
 
+process_permissive:
     /* Don't restrict writes to the PCI config space from this VM */
     if (pci->permissive) {
         if ( sysfs_write_bdf(gc, SYSFS_PCIBACK_DRIVER"/permissive",
@@ -2186,10 +2224,10 @@  static void pci_remove_detached(libxl__egc *egc,
     int  irq = 0, i, stubdomid = 0;
     const char *sysfs_path;
     FILE *f;
-    uint32_t sbdf;
     uint32_t domainid = prs->domid;
     bool isstubdom;
     int r;
+    int gsi;
 
     /* Convenience aliases */
     libxl_device_pci *const pci = &prs->pci;
@@ -2245,6 +2283,15 @@  skip_bar:
     if (!pci_supp_legacy_irq())
         goto skip_legacy_irq;
 
+    r = pci_device_set_gsi(ctx, domid, pci, 0, &gsi);
+    if (gsi >= 0) {
+        if (r < 0) {
+            LOGED(ERROR, domainid,
+                  "pci_device_set_gsi gsi=%d (error=%d)", gsi, errno);
+        }
+        goto skip_legacy_irq;
+    }
+    /* if gsi < 0, keep using irq */
     sysfs_path = GCSPRINTF(SYSFS_PCI_DEV"/"PCI_BDF"/irq", pci->domain,
                            pci->bus, pci->dev, pci->func);
 
@@ -2255,13 +2302,6 @@  skip_bar:
     }
 
     if ((fscanf(f, "%u", &irq) == 1) && irq) {
-        sbdf = PCI_SBDF(pci->domain, pci->bus,
-                        (PCI_DEVFN(pci->dev, pci->func)));
-        r = xc_physdev_gsi_from_dev(ctx->xch, sbdf);
-        /* if fail, keep using irq; if success, r is gsi, use gsi */
-        if (r != -1) {
-            irq = r;
-        }
         rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
         if (rc < 0) {
             /*
diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
index 9a72d57333e9..c69b4566ac4f 100644
--- a/xen/arch/x86/domctl.c
+++ b/xen/arch/x86/domctl.c
@@ -36,6 +36,7 @@ 
 #include <asm/xstate.h>
 #include <asm/psr.h>
 #include <asm/cpu-policy.h>
+#include <asm/io_apic.h>
 
 static int update_domain_cpu_policy(struct domain *d,
                                     xen_domctl_cpu_policy_t *xdpc)
@@ -237,6 +238,43 @@  long arch_do_domctl(
         break;
     }
 
+    case XEN_DOMCTL_gsi_permission:
+    {
+        unsigned int gsi = domctl->u.gsi_permission.gsi;
+        int irq = gsi_2_irq(gsi);
+        bool allow = domctl->u.gsi_permission.allow_access;
+
+        /*
+         * If current domain is PV or it has PIRQ flag, it has a mapping
+         * of gsi, pirq and irq, so it should use XEN_DOMCTL_irq_permission
+         * to grant irq permission.
+         */
+        if ( is_pv_domain(current->domain) || has_pirq(current->domain) )
+        {
+            ret = -EOPNOTSUPP;
+            break;
+        }
+
+        if ( gsi >= nr_irqs_gsi || irq < 0 )
+        {
+            ret = -EINVAL;
+            break;
+        }
+
+        if ( !irq_access_permitted(current->domain, irq) ||
+             xsm_irq_permission(XSM_HOOK, d, irq, allow) )
+        {
+            ret = -EPERM;
+            break;
+        }
+
+        if ( allow )
+            ret = irq_permit_access(d, irq);
+        else
+            ret = irq_deny_access(d, irq);
+        break;
+    }
+
     case XEN_DOMCTL_getpageframeinfo3:
     {
         unsigned int num = domctl->u.getpageframeinfo3.num;
diff --git a/xen/arch/x86/include/asm/io_apic.h b/xen/arch/x86/include/asm/io_apic.h
index 78268ea8f666..7e86d8337758 100644
--- a/xen/arch/x86/include/asm/io_apic.h
+++ b/xen/arch/x86/include/asm/io_apic.h
@@ -213,5 +213,7 @@  unsigned highest_gsi(void);
 
 int ioapic_guest_read( unsigned long physbase, unsigned int reg, u32 *pval);
 int ioapic_guest_write(unsigned long physbase, unsigned int reg, u32 val);
+int mp_find_ioapic(int gsi);
+int gsi_2_irq(int gsi);
 
 #endif
diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c
index b48a64246548..d03bcdef4d19 100644
--- a/xen/arch/x86/io_apic.c
+++ b/xen/arch/x86/io_apic.c
@@ -955,6 +955,27 @@  static int pin_2_irq(int idx, int apic, int pin)
     return irq;
 }
 
+int gsi_2_irq(int gsi)
+{
+    int entry, ioapic, pin;
+
+    ioapic = mp_find_ioapic(gsi);
+    if ( ioapic < 0 )
+        return -1;
+
+    pin = gsi - io_apic_gsi_base(ioapic);
+
+    entry = find_irq_entry(ioapic, pin, mp_INT);
+    /*
+     * If there is no override mapping for irq and gsi in mp_irqs,
+     * then the default identity mapping applies.
+     */
+    if ( entry < 0 )
+        return gsi;
+
+    return pin_2_irq(entry, ioapic, pin);
+}
+
 static inline int IO_APIC_irq_trigger(int irq)
 {
     int apic, idx, pin;
diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
index d8ccab2449c6..c95da0de5770 100644
--- a/xen/arch/x86/mpparse.c
+++ b/xen/arch/x86/mpparse.c
@@ -841,8 +841,7 @@  static struct mp_ioapic_routing {
 } mp_ioapic_routing[MAX_IO_APICS];
 
 
-static int mp_find_ioapic (
-	int			gsi)
+int mp_find_ioapic(int gsi)
 {
 	unsigned int		i;
 
diff --git a/xen/include/public/domctl.h b/xen/include/public/domctl.h
index 2a49fe46ce25..f933af8722f4 100644
--- a/xen/include/public/domctl.h
+++ b/xen/include/public/domctl.h
@@ -465,6 +465,14 @@  struct xen_domctl_irq_permission {
 };
 
 
+/* XEN_DOMCTL_gsi_permission */
+struct xen_domctl_gsi_permission {
+    uint32_t gsi;
+    uint8_t allow_access;    /* flag to specify enable/disable of x86 gsi access */
+    uint8_t pad[3];
+};
+
+
 /* XEN_DOMCTL_iomem_permission */
 struct xen_domctl_iomem_permission {
     uint64_aligned_t first_mfn;/* first page (physical page number) in range */
@@ -1306,6 +1314,7 @@  struct xen_domctl {
 #define XEN_DOMCTL_get_paging_mempool_size       85
 #define XEN_DOMCTL_set_paging_mempool_size       86
 #define XEN_DOMCTL_dt_overlay                    87
+#define XEN_DOMCTL_gsi_permission                88
 #define XEN_DOMCTL_gdbsx_guestmemio            1000
 #define XEN_DOMCTL_gdbsx_pausevcpu             1001
 #define XEN_DOMCTL_gdbsx_unpausevcpu           1002
@@ -1328,6 +1337,7 @@  struct xen_domctl {
         struct xen_domctl_setdomainhandle   setdomainhandle;
         struct xen_domctl_setdebugging      setdebugging;
         struct xen_domctl_irq_permission    irq_permission;
+        struct xen_domctl_gsi_permission    gsi_permission;
         struct xen_domctl_iomem_permission  iomem_permission;
         struct xen_domctl_ioport_permission ioport_permission;
         struct xen_domctl_hypercall_init    hypercall_init;
diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c
index 5e88c71b8e22..a5b134c91101 100644
--- a/xen/xsm/flask/hooks.c
+++ b/xen/xsm/flask/hooks.c
@@ -685,6 +685,7 @@  static int cf_check flask_domctl(struct domain *d, int cmd)
     case XEN_DOMCTL_shadow_op:
     case XEN_DOMCTL_ioport_permission:
     case XEN_DOMCTL_ioport_mapping:
+    case XEN_DOMCTL_gsi_permission:
 #endif
 #ifdef CONFIG_HAS_PASSTHROUGH
     /*