diff mbox series

[v4,3/3] KVM: arm64: Release the ownership of the hyp rx buffer to Trustzone

Message ID 20250326113901.3308804-4-sebastianene@google.com (mailing list archive)
State New
Headers show
Series KVM: arm64: Separate the hyp FF-A buffers init from the host | expand

Commit Message

Sebastian Ene March 26, 2025, 11:39 a.m. UTC
Introduce the release FF-A call to notify Trustzone that the hypervisor
has finished copying the data from the buffer shared with Trustzone to
the non-secure partition.

Reported-by: Andrei Homescu <ahomescu@google.com>
Signed-off-by: Sebastian Ene <sebastianene@google.com>
---
 arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

Comments

Quentin Perret March 26, 2025, 4:48 p.m. UTC | #1
On Wednesday 26 Mar 2025 at 11:39:01 (+0000), Sebastian Ene wrote:
> Introduce the release FF-A call to notify Trustzone that the hypervisor
> has finished copying the data from the buffer shared with Trustzone to
> the non-secure partition.
>
> Reported-by: Andrei Homescu <ahomescu@google.com>
> Signed-off-by: Sebastian Ene <sebastianene@google.com>
> ---
>  arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
>  1 file changed, 6 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> index 6df6131f1107..ac898ea6274a 100644
> --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> @@ -749,6 +749,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
>  	DECLARE_REG(u32, uuid3, ctxt, 4);
>  	DECLARE_REG(u32, flags, ctxt, 5);
>  	u32 count, partition_sz, copy_sz;
> +	struct arm_smccc_res _res;
>  
>  	hyp_spin_lock(&host_buffers.lock);
>  	if (!host_buffers.rx) {
> @@ -765,11 +766,11 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
>  
>  	count = res->a2;
>  	if (!count)
> -		goto out_unlock;
> +		goto release_rx;
>  
>  	if (hyp_ffa_version > FFA_VERSION_1_0) {
>  		/* Get the number of partitions deployed in the system */
> -		if (flags & 0x1)
> +		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
>  			goto out_unlock;
>  
>  		partition_sz  = res->a3;
> @@ -781,10 +782,12 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
>  	copy_sz = partition_sz * count;
>  	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
>  		ffa_to_smccc_res(res, FFA_RET_ABORTED);
> -		goto out_unlock;
> +		goto release_rx;
>  	}
>  
>  	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
> +release_rx:
> +	ffa_rx_release(&_res);

I'm a bit confused about this release call here. In the pKVM FF-A proxy
model, the hypervisor is essentially 'transparent', so do we not expect
EL1 to issue that instead? How is EL1 supposed to know that the
hypervisor has already sent the release call? And isn't EL1 going to be
confused if the content of the buffer is overridden before is has issued
the release call itself? What would otherwise prevent that from
happening?

Thanks,
Quentin

>  out_unlock:
>  	hyp_spin_unlock(&host_buffers.lock);
>  }
> -- 
> 2.49.0.395.g12beb8f557-goog
>
Sebastian Ene March 27, 2025, 9:37 a.m. UTC | #2
On Wed, Mar 26, 2025 at 04:48:33PM +0000, Quentin Perret wrote:
> On Wednesday 26 Mar 2025 at 11:39:01 (+0000), Sebastian Ene wrote:
> > Introduce the release FF-A call to notify Trustzone that the hypervisor
> > has finished copying the data from the buffer shared with Trustzone to
> > the non-secure partition.
> >
> > Reported-by: Andrei Homescu <ahomescu@google.com>
> > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > ---
> >  arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
> >  1 file changed, 6 insertions(+), 3 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > index 6df6131f1107..ac898ea6274a 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > @@ -749,6 +749,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> >  	DECLARE_REG(u32, uuid3, ctxt, 4);
> >  	DECLARE_REG(u32, flags, ctxt, 5);
> >  	u32 count, partition_sz, copy_sz;
> > +	struct arm_smccc_res _res;
> >  
> >  	hyp_spin_lock(&host_buffers.lock);
> >  	if (!host_buffers.rx) {
> > @@ -765,11 +766,11 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> >  
> >  	count = res->a2;
> >  	if (!count)
> > -		goto out_unlock;
> > +		goto release_rx;
> >  
> >  	if (hyp_ffa_version > FFA_VERSION_1_0) {
> >  		/* Get the number of partitions deployed in the system */
> > -		if (flags & 0x1)
> > +		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
> >  			goto out_unlock;
> >  
> >  		partition_sz  = res->a3;
> > @@ -781,10 +782,12 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> >  	copy_sz = partition_sz * count;
> >  	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
> >  		ffa_to_smccc_res(res, FFA_RET_ABORTED);
> > -		goto out_unlock;
> > +		goto release_rx;
> >  	}
> >  
> >  	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
> > +release_rx:
> > +	ffa_rx_release(&_res);

Hi,

> 
> I'm a bit confused about this release call here. In the pKVM FF-A proxy
> model, the hypervisor is essentially 'transparent', so do we not expect
> EL1 to issue that instead?

I think the EL1 should also issue this call irrespective of what the
hypervisor is doing. Sudeep can correct me here if I am wrong, but this
is my take on this.

I am looking at this as a way of signaling the availability of the rx
buffer across partitions. There are some calls that when invoked, they
place the buffer in a 'locked state'.


> How is EL1 supposed to know that the
> hypervisor has already sent the release call?

It doesn't need to know, it issues the call as there is no hypervisor
in-between, why would it need to know ?

> And isn't EL1 going to be
> confused if the content of the buffer is overridden before is has issued
> the release call itself?

The hypervisor should prevent changes to the buffer mapped between the
host and itself until the release_rx call is issued from the host.
If another call that wants to make use of the rx buffer sneaks in, we
would have to revoke it with BUSY until rx_release is sent.

>What would otherwise prevent that from
> happening?

> 
> Thanks,
> Quentin
>

Thanks,
Sebastian

> >  out_unlock:
> >  	hyp_spin_unlock(&host_buffers.lock);
> >  }
> > -- 
> > 2.49.0.395.g12beb8f557-goog
> >
Sudeep Holla March 27, 2025, 9:48 a.m. UTC | #3
On Thu, Mar 27, 2025 at 09:37:31AM +0000, Sebastian Ene wrote:
> On Wed, Mar 26, 2025 at 04:48:33PM +0000, Quentin Perret wrote:
> > On Wednesday 26 Mar 2025 at 11:39:01 (+0000), Sebastian Ene wrote:
> > > Introduce the release FF-A call to notify Trustzone that the hypervisor
> > > has finished copying the data from the buffer shared with Trustzone to
> > > the non-secure partition.
> > >
> > > Reported-by: Andrei Homescu <ahomescu@google.com>
> > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > ---
> > >  arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
> > >  1 file changed, 6 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > index 6df6131f1107..ac898ea6274a 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > @@ -749,6 +749,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > >  	DECLARE_REG(u32, uuid3, ctxt, 4);
> > >  	DECLARE_REG(u32, flags, ctxt, 5);
> > >  	u32 count, partition_sz, copy_sz;
> > > +	struct arm_smccc_res _res;
> > >  
> > >  	hyp_spin_lock(&host_buffers.lock);
> > >  	if (!host_buffers.rx) {
> > > @@ -765,11 +766,11 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > >  
> > >  	count = res->a2;
> > >  	if (!count)
> > > -		goto out_unlock;
> > > +		goto release_rx;
> > >  
> > >  	if (hyp_ffa_version > FFA_VERSION_1_0) {
> > >  		/* Get the number of partitions deployed in the system */
> > > -		if (flags & 0x1)
> > > +		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
> > >  			goto out_unlock;
> > >  
> > >  		partition_sz  = res->a3;
> > > @@ -781,10 +782,12 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > >  	copy_sz = partition_sz * count;
> > >  	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
> > >  		ffa_to_smccc_res(res, FFA_RET_ABORTED);
> > > -		goto out_unlock;
> > > +		goto release_rx;
> > >  	}
> > >  
> > >  	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
> > > +release_rx:
> > > +	ffa_rx_release(&_res);
> 
> Hi,
> 
> > 
> > I'm a bit confused about this release call here. In the pKVM FF-A proxy
> > model, the hypervisor is essentially 'transparent', so do we not expect
> > EL1 to issue that instead?
> 
> I think the EL1 should also issue this call irrespective of what the
> hypervisor is doing. Sudeep can correct me here if I am wrong, but this
> is my take on this.
>

Indeed, the driver will not know if it is running in EL1 with or without
FF-A proxy or even at EL2.

> I am looking at this as a way of signaling the availability of the rx
> buffer across partitions. There are some calls that when invoked, they
> place the buffer in a 'locked state'.
> 
> 
> > How is EL1 supposed to know that the
> > hypervisor has already sent the release call?
> 
> It doesn't need to know, it issues the call as there is no hypervisor
> in-between, why would it need to know ?
> 

Exactly.

> > And isn't EL1 going to be
> > confused if the content of the buffer is overridden before is has issued
> > the release call itself?
> 

Yes good point. I need to recall the details, but I am assuming FF-A proxy
in pKVM maps the Tx/Rx buffers with the host in EL2 and maintains another
Tx/Rx pair with SPMC on the secure side right ?

> The hypervisor should prevent changes to the buffer mapped between the
> host and itself until the release_rx call is issued from the host.

OK, this sounds like my understand above is indeed correct ?

> If another call that wants to make use of the rx buffer sneaks in, we
> would have to revoke it with BUSY until rx_release is sent.
>

Sounds good to me.
Quentin Perret March 28, 2025, 11:39 a.m. UTC | #4
On Thursday 27 Mar 2025 at 09:37:31 (+0000), Sebastian Ene wrote:
> On Wed, Mar 26, 2025 at 04:48:33PM +0000, Quentin Perret wrote:
> > On Wednesday 26 Mar 2025 at 11:39:01 (+0000), Sebastian Ene wrote:
> > > Introduce the release FF-A call to notify Trustzone that the hypervisor
> > > has finished copying the data from the buffer shared with Trustzone to
> > > the non-secure partition.
> > >
> > > Reported-by: Andrei Homescu <ahomescu@google.com>
> > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > ---
> > >  arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
> > >  1 file changed, 6 insertions(+), 3 deletions(-)
> > > 
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > index 6df6131f1107..ac898ea6274a 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > @@ -749,6 +749,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > >  	DECLARE_REG(u32, uuid3, ctxt, 4);
> > >  	DECLARE_REG(u32, flags, ctxt, 5);
> > >  	u32 count, partition_sz, copy_sz;
> > > +	struct arm_smccc_res _res;
> > >  
> > >  	hyp_spin_lock(&host_buffers.lock);
> > >  	if (!host_buffers.rx) {
> > > @@ -765,11 +766,11 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > >  
> > >  	count = res->a2;
> > >  	if (!count)
> > > -		goto out_unlock;
> > > +		goto release_rx;
> > >  
> > >  	if (hyp_ffa_version > FFA_VERSION_1_0) {
> > >  		/* Get the number of partitions deployed in the system */
> > > -		if (flags & 0x1)
> > > +		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
> > >  			goto out_unlock;
> > >  
> > >  		partition_sz  = res->a3;
> > > @@ -781,10 +782,12 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > >  	copy_sz = partition_sz * count;
> > >  	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
> > >  		ffa_to_smccc_res(res, FFA_RET_ABORTED);
> > > -		goto out_unlock;
> > > +		goto release_rx;
> > >  	}
> > >  
> > >  	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
> > > +release_rx:
> > > +	ffa_rx_release(&_res);
> 
> Hi,
> 
> > 
> > I'm a bit confused about this release call here. In the pKVM FF-A proxy
> > model, the hypervisor is essentially 'transparent', so do we not expect
> > EL1 to issue that instead?
> 
> I think the EL1 should also issue this call irrespective of what the
> hypervisor is doing. Sudeep can correct me here if I am wrong, but this
> is my take on this.

Agreed, but with the code as it is implemented in this patch, I think
that from the host perspective there is a difference in semantic for
the release call. W/o pKVM the buffer is essentially 'locked' until
the host issues the release call. With pKVM, the buffer is effectively
unlocked immediately upon return from the PARTITION_INFO_GET call
because the hypervisor happened to have issued the release call
behind our back. And there is no way the host to know the difference.

I understand that we can argue the hypervisor-issued call is for the
EL2-TZ buffers while the EL1-issued call is for the EL1-EL2 buffers,
but that's not quite working that way since pKVM just blindly forwards
the release calls coming from EL1 w/o implementing the expected
semantic.

> I am looking at this as a way of signaling the availability of the rx
> buffer across partitions. There are some calls that when invoked, they
> place the buffer in a 'locked state'.
> 
> 
> > How is EL1 supposed to know that the
> > hypervisor has already sent the release call?
> 
> It doesn't need to know, it issues the call as there is no hypervisor
> in-between, why would it need to know ?

As per the comment above, there is a host-visible difference in semantic
with or without pKVM which IMO is problematic.

For example, if the host issues two PARTITION_INFO_GET calls back to
back w/o a release call in between, IIUC the expectation from the
FF-A spec is for the second one to fail. With this patch applied, the
second call would succeed thanks to the implicit release-call issued by
pKVM. But it would fail as it is supposed to do w/o pKVM.

I'm not entirely sure if that's gonna cause real-world problem, but it
does feel unecessary at best. Are we trying to fix an EL1 bug in the
hypervisor here?

> > And isn't EL1 going to be
> > confused if the content of the buffer is overridden before is has issued
> > the release call itself?
> 
> The hypervisor should prevent changes to the buffer mapped between the
> host and itself until the release_rx call is issued from the host.
> If another call that wants to make use of the rx buffer sneaks in, we
> would have to revoke it with BUSY until rx_release is sent.

Right, exactly, but that's not implemented at the moment. IMO it is much
simpler to rely on the host to issue the release call and just not do it
from the PARTITION_INFO_GET path in pKVM. And if we're scared about a
release call racing with PARTITION_INFO_GET at pKVM level, all we should
need to do is forward the release call with the host_buffers.lock held I
think. Wdyt?

Thanks,
Quentin
Sebastian Ene March 28, 2025, 2:18 p.m. UTC | #5
On Fri, Mar 28, 2025 at 11:39:45AM +0000, Quentin Perret wrote:
> On Thursday 27 Mar 2025 at 09:37:31 (+0000), Sebastian Ene wrote:
> > On Wed, Mar 26, 2025 at 04:48:33PM +0000, Quentin Perret wrote:
> > > On Wednesday 26 Mar 2025 at 11:39:01 (+0000), Sebastian Ene wrote:
> > > > Introduce the release FF-A call to notify Trustzone that the hypervisor
> > > > has finished copying the data from the buffer shared with Trustzone to
> > > > the non-secure partition.
> > > >
> > > > Reported-by: Andrei Homescu <ahomescu@google.com>
> > > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > > ---
> > > >  arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
> > > >  1 file changed, 6 insertions(+), 3 deletions(-)
> > > > 
> > > > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > > index 6df6131f1107..ac898ea6274a 100644
> > > > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > > @@ -749,6 +749,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > > >  	DECLARE_REG(u32, uuid3, ctxt, 4);
> > > >  	DECLARE_REG(u32, flags, ctxt, 5);
> > > >  	u32 count, partition_sz, copy_sz;
> > > > +	struct arm_smccc_res _res;
> > > >  
> > > >  	hyp_spin_lock(&host_buffers.lock);
> > > >  	if (!host_buffers.rx) {
> > > > @@ -765,11 +766,11 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > > >  
> > > >  	count = res->a2;
> > > >  	if (!count)
> > > > -		goto out_unlock;
> > > > +		goto release_rx;
> > > >  
> > > >  	if (hyp_ffa_version > FFA_VERSION_1_0) {
> > > >  		/* Get the number of partitions deployed in the system */
> > > > -		if (flags & 0x1)
> > > > +		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
> > > >  			goto out_unlock;
> > > >  
> > > >  		partition_sz  = res->a3;
> > > > @@ -781,10 +782,12 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > > >  	copy_sz = partition_sz * count;
> > > >  	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
> > > >  		ffa_to_smccc_res(res, FFA_RET_ABORTED);
> > > > -		goto out_unlock;
> > > > +		goto release_rx;
> > > >  	}
> > > >  
> > > >  	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
> > > > +release_rx:
> > > > +	ffa_rx_release(&_res);
> > 
> > Hi,
> > 
> > > 
> > > I'm a bit confused about this release call here. In the pKVM FF-A proxy
> > > model, the hypervisor is essentially 'transparent', so do we not expect
> > > EL1 to issue that instead?
> > 
> > I think the EL1 should also issue this call irrespective of what the
> > hypervisor is doing. Sudeep can correct me here if I am wrong, but this
> > is my take on this.

> 
> Agreed, but with the code as it is implemented in this patch, I think
> that from the host perspective there is a difference in semantic for
> the release call. W/o pKVM the buffer is essentially 'locked' until
> the host issues the release call. With pKVM, the buffer is effectively
> unlocked immediately upon return from the PARTITION_INFO_GET call
> because the hypervisor happened to have issued the release call
> behind our back. And there is no way the host to know the difference.

I understand your point that you are trying to make the hypervisor
transparent, but it is not behaving in this way. One example is that we still
enforce a limit on the size of the ffa_descr_buffer for reclaiming memory.
Letting this aside, I am curios (maybe on another thread) what do we
gain by trying to keep the same behaviour w/o pkvm ?

> 
> I understand that we can argue the hypervisor-issued call is for the
> EL2-TZ buffers while the EL1-issued call is for the EL1-EL2 buffers,
> but that's not quite working that way since pKVM just blindly forwards
> the release calls coming from EL1 w/o implementing the expected
> semantic.
>

I think blindly-forwarding the release call is problematic and we should
prevent this from happening. It is wrong from multipple pov: the host is not
the owner of the hyp_rx buffer and you are asking TZ to release the
hypervisor RX buffer by forwarding it. Do you agree on that ? I think
like this patch should include this.

> > I am looking at this as a way of signaling the availability of the rx
> > buffer across partitions. There are some calls that when invoked, they
> > place the buffer in a 'locked state'.
> > 
> > 
> > > How is EL1 supposed to know that the
> > > hypervisor has already sent the release call?
> > 
> > It doesn't need to know, it issues the call as there is no hypervisor
> > in-between, why would it need to know ?
> 
> As per the comment above, there is a host-visible difference in semantic
> with or without pKVM which IMO is problematic.

If we apply what I suggested earlier we won't have an issue with the
semantic for this call but it would make the code a mess. I don't think
for this particular call keeping semantics really makes a difference.

> 
> For example, if the host issues two PARTITION_INFO_GET calls back to
> back w/o a release call in between, IIUC the expectation from the
> FF-A spec is for the second one to fail. With this patch applied, the
> second call would succeed thanks to the implicit release-call issued by
> pKVM. But it would fail as it is supposed to do w/o pKVM.
> 
> I'm not entirely sure if that's gonna cause real-world problem, but it
> does feel unecessary at best. Are we trying to fix an EL1 bug in the
> hypervisor here?
>

This was most likely observed from an issue from the EL1 driver (by not
calling release explicitly), it was reported by Andrei Homescu
<ahomescu@google.com>. it appears that we also have to do something
in the hyp about it and we agreed with Will and Sudeep in the previous version of
the patch:
https://lore.kernel.org/all/20250313121559.GB7356@willie-the-truck/

> > > And isn't EL1 going to be
> > > confused if the content of the buffer is overridden before is has issued
> > > the release call itself?
> > 
> > The hypervisor should prevent changes to the buffer mapped between the
> > host and itself until the release_rx call is issued from the host.
> > If another call that wants to make use of the rx buffer sneaks in, we
> > would have to revoke it with BUSY until rx_release is sent.
> 
> Right, exactly, but that's not implemented at the moment. IMO it is much
> simpler to rely on the host to issue the release call and just not do it
> from the PARTITION_INFO_GET path in pKVM. And if we're scared about a
> release call racing with PARTITION_INFO_GET at pKVM level, all we should
> need to do is forward the release call with the host_buffers.lock held I
> think. Wdyt?
>


> Thanks,
> Quentin
Quentin Perret April 1, 2025, noon UTC | #6
On Friday 28 Mar 2025 at 14:18:55 (+0000), Sebastian Ene wrote:
> On Fri, Mar 28, 2025 at 11:39:45AM +0000, Quentin Perret wrote:
> > On Thursday 27 Mar 2025 at 09:37:31 (+0000), Sebastian Ene wrote:
> > > On Wed, Mar 26, 2025 at 04:48:33PM +0000, Quentin Perret wrote:
> > > > On Wednesday 26 Mar 2025 at 11:39:01 (+0000), Sebastian Ene wrote:
> > > > > Introduce the release FF-A call to notify Trustzone that the hypervisor
> > > > > has finished copying the data from the buffer shared with Trustzone to
> > > > > the non-secure partition.
> > > > >
> > > > > Reported-by: Andrei Homescu <ahomescu@google.com>
> > > > > Signed-off-by: Sebastian Ene <sebastianene@google.com>
> > > > > ---
> > > > >  arch/arm64/kvm/hyp/nvhe/ffa.c | 9 ++++++---
> > > > >  1 file changed, 6 insertions(+), 3 deletions(-)
> > > > > 
> > > > > diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > > > index 6df6131f1107..ac898ea6274a 100644
> > > > > --- a/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > > > +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
> > > > > @@ -749,6 +749,7 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > > > >  	DECLARE_REG(u32, uuid3, ctxt, 4);
> > > > >  	DECLARE_REG(u32, flags, ctxt, 5);
> > > > >  	u32 count, partition_sz, copy_sz;
> > > > > +	struct arm_smccc_res _res;
> > > > >  
> > > > >  	hyp_spin_lock(&host_buffers.lock);
> > > > >  	if (!host_buffers.rx) {
> > > > > @@ -765,11 +766,11 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > > > >  
> > > > >  	count = res->a2;
> > > > >  	if (!count)
> > > > > -		goto out_unlock;
> > > > > +		goto release_rx;
> > > > >  
> > > > >  	if (hyp_ffa_version > FFA_VERSION_1_0) {
> > > > >  		/* Get the number of partitions deployed in the system */
> > > > > -		if (flags & 0x1)
> > > > > +		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
> > > > >  			goto out_unlock;
> > > > >  
> > > > >  		partition_sz  = res->a3;
> > > > > @@ -781,10 +782,12 @@ static void do_ffa_part_get(struct arm_smccc_res *res,
> > > > >  	copy_sz = partition_sz * count;
> > > > >  	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
> > > > >  		ffa_to_smccc_res(res, FFA_RET_ABORTED);
> > > > > -		goto out_unlock;
> > > > > +		goto release_rx;
> > > > >  	}
> > > > >  
> > > > >  	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
> > > > > +release_rx:
> > > > > +	ffa_rx_release(&_res);
> > > 
> > > Hi,
> > > 
> > > > 
> > > > I'm a bit confused about this release call here. In the pKVM FF-A proxy
> > > > model, the hypervisor is essentially 'transparent', so do we not expect
> > > > EL1 to issue that instead?
> > > 
> > > I think the EL1 should also issue this call irrespective of what the
> > > hypervisor is doing. Sudeep can correct me here if I am wrong, but this
> > > is my take on this.
> 
> > 
> > Agreed, but with the code as it is implemented in this patch, I think
> > that from the host perspective there is a difference in semantic for
> > the release call. W/o pKVM the buffer is essentially 'locked' until
> > the host issues the release call. With pKVM, the buffer is effectively
> > unlocked immediately upon return from the PARTITION_INFO_GET call
> > because the hypervisor happened to have issued the release call
> > behind our back. And there is no way the host to know the difference.
> 
> I understand your point that you are trying to make the hypervisor
> transparent, but it is not behaving in this way. One example is that we still
> enforce a limit on the size of the ffa_descr_buffer for reclaiming memory.
> Letting this aside, I am curios (maybe on another thread) what do we
> gain by trying to keep the same behaviour w/o pkvm ?

The idea was to avoid as much as possible needing driver-side changes
depending on pKVM being present or not, to allow code re-use as much as
possible.

> > 
> > I understand that we can argue the hypervisor-issued call is for the
> > EL2-TZ buffers while the EL1-issued call is for the EL1-EL2 buffers,
> > but that's not quite working that way since pKVM just blindly forwards
> > the release calls coming from EL1 w/o implementing the expected
> > semantic.
> >
> 
> I think blindly-forwarding the release call is problematic and we should
> prevent this from happening. It is wrong from multipple pov: the host is not
> the owner of the hyp_rx buffer and you are asking TZ to release the
> hypervisor RX buffer by forwarding it. Do you agree on that ? I think
> like this patch should include this.
> 
> > > I am looking at this as a way of signaling the availability of the rx
> > > buffer across partitions. There are some calls that when invoked, they
> > > place the buffer in a 'locked state'.
> > > 
> > > 
> > > > How is EL1 supposed to know that the
> > > > hypervisor has already sent the release call?
> > > 
> > > It doesn't need to know, it issues the call as there is no hypervisor
> > > in-between, why would it need to know ?
> > 
> > As per the comment above, there is a host-visible difference in semantic
> > with or without pKVM which IMO is problematic.
> 
> If we apply what I suggested earlier we won't have an issue with the
> semantic for this call but it would make the code a mess. I don't think
> for this particular call keeping semantics really makes a difference.

Right, if we implemented the release call properly in pKVM I'd be happy
with this patch, but I just don't think we should only do one half. We
either do it properly in pKVM or leave it with to the host -- the latter
feels simpler to me, but no strong opinions.

> 
> > 
> > For example, if the host issues two PARTITION_INFO_GET calls back to
> > back w/o a release call in between, IIUC the expectation from the
> > FF-A spec is for the second one to fail. With this patch applied, the
> > second call would succeed thanks to the implicit release-call issued by
> > pKVM. But it would fail as it is supposed to do w/o pKVM.
> > 
> > I'm not entirely sure if that's gonna cause real-world problem, but it
> > does feel unecessary at best. Are we trying to fix an EL1 bug in the
> > hypervisor here?
> >
> 
> This was most likely observed from an issue from the EL1 driver (by not
> calling release explicitly), it was reported by Andrei Homescu
> <ahomescu@google.com>. it appears that we also have to do something
> in the hyp about it and we agreed with Will and Sudeep in the previous version of
> the patch:
> https://lore.kernel.org/all/20250313121559.GB7356@willie-the-truck/

Thanks for the context. I'm still not convinced issueing the release
call in that way is fully correct, but happy to be corrected on my
understanding of the spec.

Thanks,
Quentin

> > > > And isn't EL1 going to be
> > > > confused if the content of the buffer is overridden before is has issued
> > > > the release call itself?
> > > 
> > > The hypervisor should prevent changes to the buffer mapped between the
> > > host and itself until the release_rx call is issued from the host.
> > > If another call that wants to make use of the rx buffer sneaks in, we
> > > would have to revoke it with BUSY until rx_release is sent.
> > 
> > Right, exactly, but that's not implemented at the moment. IMO it is much
> > simpler to rely on the host to issue the release call and just not do it
> > from the PARTITION_INFO_GET path in pKVM. And if we're scared about a
> > release call racing with PARTITION_INFO_GET at pKVM level, all we should
> > need to do is forward the release call with the host_buffers.lock held I
> > think. Wdyt?
> >
> 
> 
> > Thanks,
> > Quentin
Sudeep Holla April 1, 2025, 12:55 p.m. UTC | #7
On Tue, Apr 01, 2025 at 12:00:38PM +0000, Quentin Perret wrote:
> On Friday 28 Mar 2025 at 14:18:55 (+0000), Sebastian Ene wrote:
> > 
> > If we apply what I suggested earlier we won't have an issue with the
> > semantic for this call but it would make the code a mess. I don't think
> > for this particular call keeping semantics really makes a difference.
> 
> Right, if we implemented the release call properly in pKVM I'd be happy
> with this patch, but I just don't think we should only do one half. We
> either do it properly in pKVM or leave it with to the host -- the latter
> feels simpler to me, but no strong opinions.
> 

FYI:

As part of the earlier discussion with respect to clarification on this
from the FF-A spec, I found even the driver was not handling this correctly.
I have posted the fix since and plan to get it merged as fix for v6.15
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 6df6131f1107..ac898ea6274a 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -749,6 +749,7 @@  static void do_ffa_part_get(struct arm_smccc_res *res,
 	DECLARE_REG(u32, uuid3, ctxt, 4);
 	DECLARE_REG(u32, flags, ctxt, 5);
 	u32 count, partition_sz, copy_sz;
+	struct arm_smccc_res _res;
 
 	hyp_spin_lock(&host_buffers.lock);
 	if (!host_buffers.rx) {
@@ -765,11 +766,11 @@  static void do_ffa_part_get(struct arm_smccc_res *res,
 
 	count = res->a2;
 	if (!count)
-		goto out_unlock;
+		goto release_rx;
 
 	if (hyp_ffa_version > FFA_VERSION_1_0) {
 		/* Get the number of partitions deployed in the system */
-		if (flags & 0x1)
+		if (flags & PARTITION_INFO_GET_RETURN_COUNT_ONLY)
 			goto out_unlock;
 
 		partition_sz  = res->a3;
@@ -781,10 +782,12 @@  static void do_ffa_part_get(struct arm_smccc_res *res,
 	copy_sz = partition_sz * count;
 	if (copy_sz > KVM_FFA_MBOX_NR_PAGES * PAGE_SIZE) {
 		ffa_to_smccc_res(res, FFA_RET_ABORTED);
-		goto out_unlock;
+		goto release_rx;
 	}
 
 	memcpy(host_buffers.rx, hyp_buffers.rx, copy_sz);
+release_rx:
+	ffa_rx_release(&_res);
 out_unlock:
 	hyp_spin_unlock(&host_buffers.lock);
 }