diff mbox series

s390/vfio-ap: Clean up vfio_ap resources when KVM pointer invalidated

Message ID 20201202234101.32169-1-akrowiak@linux.ibm.com (mailing list archive)
State New, archived
Headers show
Series s390/vfio-ap: Clean up vfio_ap resources when KVM pointer invalidated | expand

Commit Message

Anthony Krowiak Dec. 2, 2020, 11:41 p.m. UTC
The vfio_ap device driver registers a group notifier with VFIO when the
file descriptor for a VFIO mediated device for a KVM guest is opened to
receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
and calls the kvm_get_kvm() function to increment its reference counter.
When the notifier is called to make notification that the KVM pointer has
been set to NULL, the driver should clean up any resources associated with
the KVM pointer and decrement its reference counter. The current
implementation does not take care of this clean up.

Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

Cornelia Huck Dec. 3, 2020, 10:19 a.m. UTC | #1
On Wed,  2 Dec 2020 18:41:01 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> The vfio_ap device driver registers a group notifier with VFIO when the
> file descriptor for a VFIO mediated device for a KVM guest is opened to
> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
> and calls the kvm_get_kvm() function to increment its reference counter.
> When the notifier is called to make notification that the KVM pointer has
> been set to NULL, the driver should clean up any resources associated with
> the KVM pointer and decrement its reference counter. The current
> implementation does not take care of this clean up.
> 
> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> ---
>  drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index e0bde8518745..eeb9c9130756 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>  	return NOTIFY_DONE;
>  }
>  
> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
> +{
> +	if (matrix_mdev->kvm) {
> +		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> +		kvm_put_kvm(matrix_mdev->kvm);
> +		matrix_mdev->kvm = NULL;
> +	}
> +}
> +
>  static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>  				       unsigned long action, void *data)
>  {
> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>  	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>  
>  	if (!data) {
> -		matrix_mdev->kvm = NULL;
> +		vfio_ap_mdev_put_kvm(matrix_mdev);

Hm. I'm wondering whether you need to hold the maxtrix_dev lock here as
well?

>  		return NOTIFY_OK;
>  	}
>  
> @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>  	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>  
>  	mutex_lock(&matrix_dev->lock);
> -	if (matrix_mdev->kvm) {
> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> -		vfio_ap_mdev_reset_queues(mdev);
> -		kvm_put_kvm(matrix_mdev->kvm);
> -		matrix_mdev->kvm = NULL;
> -	}
> +	vfio_ap_mdev_put_kvm(matrix_mdev);
>  	mutex_unlock(&matrix_dev->lock);
>  
>  	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
Halil Pasic Dec. 3, 2020, 5:01 p.m. UTC | #2
On Thu, 3 Dec 2020 11:19:07 +0100
Cornelia Huck <cohuck@redhat.com> wrote:

> > @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> >  	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
> >  
> >  	if (!data) {
> > -		matrix_mdev->kvm = NULL;
> > +		vfio_ap_mdev_put_kvm(matrix_mdev);  
> 
> Hm. I'm wondering whether you need to hold the maxtrix_dev lock here as
> well?

In v12 we eventually did come along and patch "s390/vfio-ap: allow hot
plug/unplug of AP resources using mdev device" made this a part of a
critical section protected by the matrix_dev->lock.

IMHO the cleanup should definitely happen with the matrix_dev->lock held.

Regards,
Halil
Halil Pasic Dec. 3, 2020, 5:55 p.m. UTC | #3
On Wed,  2 Dec 2020 18:41:01 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> The vfio_ap device driver registers a group notifier with VFIO when the
> file descriptor for a VFIO mediated device for a KVM guest is opened to
> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
> and calls the kvm_get_kvm() function to increment its reference counter.
> When the notifier is called to make notification that the KVM pointer has
> been set to NULL, the driver should clean up any resources associated with
> the KVM pointer and decrement its reference counter. The current
> implementation does not take care of this clean up.
> 
> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>

Do we need a Fixes tag? Do we need this backported? In my opinion
this is necessary since the interrupt patches.

> ---
>  drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index e0bde8518745..eeb9c9130756 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>  	return NOTIFY_DONE;
>  }
>  
> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)

I don't like the name. The function does more that put_kvm. Maybe
something  like _disconnect_kvm()?

> +{
> +	if (matrix_mdev->kvm) {
> +		(matrix_mdev->kvm);
> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;

Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
to take more care?

For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
kvm->arch.crypto.crycb.

> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> +		kvm_put_kvm(matrix_mdev->kvm);
> +		matrix_mdev->kvm = NULL;
> +	}
> +}
> +
>  static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>  				       unsigned long action, void *data)
>  {
> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>  	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>  
>  	if (!data) {
> -		matrix_mdev->kvm = NULL;
> +		vfio_ap_mdev_put_kvm(matrix_mdev);

The lock question was already raised.

What are the exact circumstances under which this branch can be taken?

>  		return NOTIFY_OK;
>  	}
>  
> @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>  	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>  
>  	mutex_lock(&matrix_dev->lock);
> -	if (matrix_mdev->kvm) {
> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> -		vfio_ap_mdev_reset_queues(mdev);
> -		kvm_put_kvm(matrix_mdev->kvm);
> -		matrix_mdev->kvm = NULL;
> -	}
> +	vfio_ap_mdev_put_kvm(matrix_mdev);
>  	mutex_unlock(&matrix_dev->lock);
>  
>  	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
Anthony Krowiak Dec. 3, 2020, 7:14 p.m. UTC | #4
On 12/3/20 12:01 PM, Halil Pasic wrote:
> On Thu, 3 Dec 2020 11:19:07 +0100
> Cornelia Huck <cohuck@redhat.com> wrote:
>
>>> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>>>   
>>>   	if (!data) {
>>> -		matrix_mdev->kvm = NULL;
>>> +		vfio_ap_mdev_put_kvm(matrix_mdev);
>> Hm. I'm wondering whether you need to hold the maxtrix_dev lock here as
>> well?
> In v12 we eventually did come along and patch "s390/vfio-ap: allow hot
> plug/unplug of AP resources using mdev device" made this a part of a
> critical section protected by the matrix_dev->lock.
>
> IMHO the cleanup should definitely happen with the matrix_dev->lock held.

Agreed!

>
> Regards,
> Halil
Anthony Krowiak Dec. 4, 2020, 2:43 p.m. UTC | #5
On 12/3/20 12:55 PM, Halil Pasic wrote:
> On Wed,  2 Dec 2020 18:41:01 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>> The vfio_ap device driver registers a group notifier with VFIO when the
>> file descriptor for a VFIO mediated device for a KVM guest is opened to
>> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
>> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
>> and calls the kvm_get_kvm() function to increment its reference counter.
>> When the notifier is called to make notification that the KVM pointer has
>> been set to NULL, the driver should clean up any resources associated with
>> the KVM pointer and decrement its reference counter. The current
>> implementation does not take care of this clean up.
>>
>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> Do we need a Fixes tag? Do we need this backported? In my opinion
> this is necessary since the interrupt patches.
>
>> ---
>>   drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>>   1 file changed, 13 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
>> index e0bde8518745..eeb9c9130756 100644
>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>>   	return NOTIFY_DONE;
>>   }
>>   
>> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
> I don't like the name. The function does more that put_kvm. Maybe
> something  like _disconnect_kvm()?
>
>> +{
>> +	if (matrix_mdev->kvm) {
>> +		(matrix_mdev->kvm);
>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
> to take more care?
>
> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
> kvm->arch.crypto.crycb.

I do not think so. The CRYCB is used by KVM to provide crypto resources
to the guest so it makes sense to protect it from changes to it while 
passing
the AP devices through to the guest. The hook is used only when an AQIC
executed on the guest is intercepted by KVM. If the notifier
is being invoked to notify vfio_ap that KVM has been set to NULL, this means
the guest is gone in which case there will be no AP instructions to 
intercept.

>
>> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
>> +		kvm_put_kvm(matrix_mdev->kvm);
>> +		matrix_mdev->kvm = NULL;
>> +	}
>> +}
>> +
>>   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>   				       unsigned long action, void *data)
>>   {
>> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>>   
>>   	if (!data) {
>> -		matrix_mdev->kvm = NULL;
>> +		vfio_ap_mdev_put_kvm(matrix_mdev);
> The lock question was already raised.
>
> What are the exact circumstances under which this branch can be taken?

Under normal circumstances (i.e., the mdev fd is closed before the guest
terminates), this notifier is not be called because the release callback
(invoked when the mdev fd is closed) unregisters the notifier. This fix is
primarily to ensure that proper cleanup is done should the notifier get
called; for example, if userspace does not close the mdev fd before
shutting the guest down.

>
>>   		return NOTIFY_OK;
>>   	}
>>   
>> @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>>   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>>   
>>   	mutex_lock(&matrix_dev->lock);
>> -	if (matrix_mdev->kvm) {
>> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>> -		vfio_ap_mdev_reset_queues(mdev);
>> -		kvm_put_kvm(matrix_mdev->kvm);
>> -		matrix_mdev->kvm = NULL;
>> -	}
>> +	vfio_ap_mdev_put_kvm(matrix_mdev);
>>   	mutex_unlock(&matrix_dev->lock);
>>   
>>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
Anthony Krowiak Dec. 4, 2020, 4:48 p.m. UTC | #6
On 12/3/20 12:55 PM, Halil Pasic wrote:
> On Wed,  2 Dec 2020 18:41:01 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>> The vfio_ap device driver registers a group notifier with VFIO when the
>> file descriptor for a VFIO mediated device for a KVM guest is opened to
>> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
>> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
>> and calls the kvm_get_kvm() function to increment its reference counter.
>> When the notifier is called to make notification that the KVM pointer has
>> been set to NULL, the driver should clean up any resources associated with
>> the KVM pointer and decrement its reference counter. The current
>> implementation does not take care of this clean up.
>>
>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> Do we need a Fixes tag? Do we need this backported? In my opinion
> this is necessary since the interrupt patches.

I'll put in a fixes tag:
Fixes: 258287c994de (s390: vfio-ap: implement mediated device open callback)

Yes, this should probably be backported.

>
>> ---
>>   drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>>   1 file changed, 13 insertions(+), 8 deletions(-)
>>
>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
>> index e0bde8518745..eeb9c9130756 100644
>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>>   	return NOTIFY_DONE;
>>   }
>>   
>> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
> I don't like the name. The function does more that put_kvm. Maybe
> something  like _disconnect_kvm()?
Since the vfio_ap_mdev_set_kvm() function is called by the
notifier when the KVM pointer is set, how about:

vfio_ap_mdev_unset_kvm()

for when the KVM pointer is nullified?

>
>> +{
>> +	if (matrix_mdev->kvm) {
>> +		(matrix_mdev->kvm);
>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
> to take more care?
>
> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
> kvm->arch.crypto.crycb.
>
>> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
>> +		kvm_put_kvm(matrix_mdev->kvm);
>> +		matrix_mdev->kvm = NULL;
>> +	}
>> +}
>> +
>>   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>   				       unsigned long action, void *data)
>>   {
>> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>>   
>>   	if (!data) {
>> -		matrix_mdev->kvm = NULL;
>> +		vfio_ap_mdev_put_kvm(matrix_mdev);
> The lock question was already raised.
>
> What are the exact circumstances under which this branch can be taken?
>
>>   		return NOTIFY_OK;
>>   	}
>>   
>> @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>>   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>>   
>>   	mutex_lock(&matrix_dev->lock);
>> -	if (matrix_mdev->kvm) {
>> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>> -		vfio_ap_mdev_reset_queues(mdev);
>> -		kvm_put_kvm(matrix_mdev->kvm);
>> -		matrix_mdev->kvm = NULL;
>> -	}
>> +	vfio_ap_mdev_put_kvm(matrix_mdev);
>>   	mutex_unlock(&matrix_dev->lock);
>>   
>>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
Cornelia Huck Dec. 4, 2020, 4:57 p.m. UTC | #7
On Fri, 4 Dec 2020 11:48:24 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> On 12/3/20 12:55 PM, Halil Pasic wrote:
> > On Wed,  2 Dec 2020 18:41:01 -0500
> > Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >  
> >> The vfio_ap device driver registers a group notifier with VFIO when the
> >> file descriptor for a VFIO mediated device for a KVM guest is opened to
> >> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
> >> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
> >> and calls the kvm_get_kvm() function to increment its reference counter.
> >> When the notifier is called to make notification that the KVM pointer has
> >> been set to NULL, the driver should clean up any resources associated with
> >> the KVM pointer and decrement its reference counter. The current
> >> implementation does not take care of this clean up.
> >>
> >> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>  
> > Do we need a Fixes tag? Do we need this backported? In my opinion
> > this is necessary since the interrupt patches.  
> 
> I'll put in a fixes tag:
> Fixes: 258287c994de (s390: vfio-ap: implement mediated device open callback)

The canonical format would be

Fixes: 258287c994de ("s390: vfio-ap: implement mediated device open callback")

> 
> Yes, this should probably be backported.
> 
> >  
> >> ---
> >>   drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
> >>   1 file changed, 13 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> >> index e0bde8518745..eeb9c9130756 100644
> >> --- a/drivers/s390/crypto/vfio_ap_ops.c
> >> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> >> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> >>   	return NOTIFY_DONE;
> >>   }
> >>   
> >> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)  
> > I don't like the name. The function does more that put_kvm. Maybe
> > something  like _disconnect_kvm()?  
> Since the vfio_ap_mdev_set_kvm() function is called by the
> notifier when the KVM pointer is set, how about:
> 
> vfio_ap_mdev_unset_kvm()
> 
> for when the KVM pointer is nullified?

Sounds good to me.
Halil Pasic Dec. 4, 2020, 7:05 p.m. UTC | #8
On Fri, 4 Dec 2020 09:43:59 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> >> +{
> >> +	if (matrix_mdev->kvm) {
> >> +		(matrix_mdev->kvm);
> >> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;  
> > Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
> > to take more care?
> >
> > For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
> > kvm->arch.crypto.crycb.  
> 
> I do not think so. The CRYCB is used by KVM to provide crypto resources
> to the guest so it makes sense to protect it from changes to it while 
> passing
> the AP devices through to the guest. The hook is used only when an AQIC
> executed on the guest is intercepted by KVM. If the notifier
> is being invoked to notify vfio_ap that KVM has been set to NULL, this means
> the guest is gone in which case there will be no AP instructions to 
> intercept.

If the update to pqap_hook isn't observed as atomic we still have a
problem. With torn writes or reads we would try to use a corrupt function
pointer. While the compiler probably ain't likely to generate silly code
for the above assignment (multiple write instructions less then
quadword wide), I know of nothing that would prohibit the compiler to do
so.

I'm not certain about the scope of the kvm->lock (if it's supposed to
protect the whole sub-tree of objects). Maybe Janosch can help us out.
@Janosch: what do you think?

Regards,
Halil
Anthony Krowiak Dec. 4, 2020, 7:46 p.m. UTC | #9
On 12/4/20 2:05 PM, Halil Pasic wrote:
> On Fri, 4 Dec 2020 09:43:59 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>>>> +{
>>>> +	if (matrix_mdev->kvm) {
>>>> +		(matrix_mdev->kvm);
>>>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>>> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
>>> to take more care?
>>>
>>> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
>>> kvm->arch.crypto.crycb.
>> I do not think so. The CRYCB is used by KVM to provide crypto resources
>> to the guest so it makes sense to protect it from changes to it while
>> passing
>> the AP devices through to the guest. The hook is used only when an AQIC
>> executed on the guest is intercepted by KVM. If the notifier
>> is being invoked to notify vfio_ap that KVM has been set to NULL, this means
>> the guest is gone in which case there will be no AP instructions to
>> intercept.
> If the update to pqap_hook isn't observed as atomic we still have a
> problem. With torn writes or reads we would try to use a corrupt function
> pointer. While the compiler probably ain't likely to generate silly code
> for the above assignment (multiple write instructions less then
> quadword wide), I know of nothing that would prohibit the compiler to do
> so.

I see that in the handle_pqap() function in arch/s390/kvm/priv.c
that gets called when the AQIC instruction is intercepted,
the pqap_hook is protected by locking the owner of the hook:

         if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
             return -EOPNOTSUPP;
         ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);

Maybe that is what we should do when the kvm->arch.crypto.pqap_hook
is set to NULL?

>
> I'm not certain about the scope of the kvm->lock (if it's supposed to
> protect the whole sub-tree of objects). Maybe Janosch can help us out.
> @Janosch: what do you think?
>
> Regards,
> Halil
Anthony Krowiak Dec. 4, 2020, 7:47 p.m. UTC | #10
On 12/4/20 11:57 AM, Cornelia Huck wrote:
> On Fri, 4 Dec 2020 11:48:24 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>> On 12/3/20 12:55 PM, Halil Pasic wrote:
>>> On Wed,  2 Dec 2020 18:41:01 -0500
>>> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>>>   
>>>> The vfio_ap device driver registers a group notifier with VFIO when the
>>>> file descriptor for a VFIO mediated device for a KVM guest is opened to
>>>> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
>>>> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
>>>> and calls the kvm_get_kvm() function to increment its reference counter.
>>>> When the notifier is called to make notification that the KVM pointer has
>>>> been set to NULL, the driver should clean up any resources associated with
>>>> the KVM pointer and decrement its reference counter. The current
>>>> implementation does not take care of this clean up.
>>>>
>>>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
>>> Do we need a Fixes tag? Do we need this backported? In my opinion
>>> this is necessary since the interrupt patches.
>> I'll put in a fixes tag:
>> Fixes: 258287c994de (s390: vfio-ap: implement mediated device open callback)
> The canonical format would be
>
> Fixes: 258287c994de ("s390: vfio-ap: implement mediated device open callback")

Okay.

>
>> Yes, this should probably be backported.
>>
>>>   
>>>> ---
>>>>    drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>>>>    1 file changed, 13 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
>>>> index e0bde8518745..eeb9c9130756 100644
>>>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>>>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>>>> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>>>>    	return NOTIFY_DONE;
>>>>    }
>>>>    
>>>> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
>>> I don't like the name. The function does more that put_kvm. Maybe
>>> something  like _disconnect_kvm()?
>> Since the vfio_ap_mdev_set_kvm() function is called by the
>> notifier when the KVM pointer is set, how about:
>>
>> vfio_ap_mdev_unset_kvm()
>>
>> for when the KVM pointer is nullified?
> Sounds good to me.
>
Halil Pasic Dec. 4, 2020, 9:54 p.m. UTC | #11
On Fri, 4 Dec 2020 14:46:30 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> On 12/4/20 2:05 PM, Halil Pasic wrote:
> > On Fri, 4 Dec 2020 09:43:59 -0500
> > Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >  
> >>>> +{
> >>>> +	if (matrix_mdev->kvm) {
> >>>> +		(matrix_mdev->kvm);
> >>>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;  
> >>> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
> >>> to take more care?
> >>>
> >>> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
> >>> kvm->arch.crypto.crycb.  
> >> I do not think so. The CRYCB is used by KVM to provide crypto resources
> >> to the guest so it makes sense to protect it from changes to it while
> >> passing
> >> the AP devices through to the guest. The hook is used only when an AQIC
> >> executed on the guest is intercepted by KVM. If the notifier
> >> is being invoked to notify vfio_ap that KVM has been set to NULL, this means
> >> the guest is gone in which case there will be no AP instructions to
> >> intercept.  
> > If the update to pqap_hook isn't observed as atomic we still have a
> > problem. With torn writes or reads we would try to use a corrupt function
> > pointer. While the compiler probably ain't likely to generate silly code
> > for the above assignment (multiple write instructions less then
> > quadword wide), I know of nothing that would prohibit the compiler to do
> > so.  
> 
> I see that in the handle_pqap() function in arch/s390/kvm/priv.c
> that gets called when the AQIC instruction is intercepted,
> the pqap_hook is protected by locking the owner of the hook:
> 
>          if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
>              return -EOPNOTSUPP;
>          ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
> module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
> 
> Maybe that is what we should do when the kvm->arch.crypto.pqap_hook
> is set to NULL?

To my best knowledge that ain't no locking but mere refcounting. The
purpose of that is probably to prevent the owner module, and the code
pointed to by the 'hook' function pointer from being unloaded while we
are executing that very same code.

Why is that necessary, frankly I have no idea. We do tend to invalidate
the callback before doing our module_put in vfio_ap_mdev_release(). Maybe
the case you are handling right now is the reason (because the
callback is invalidated in vfio_ap_mdev_release() only if !!kvm.

Regards,
Halil
Halil Pasic Dec. 7, 2020, 3:24 p.m. UTC | #12
On Fri, 4 Dec 2020 11:48:24 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> On 12/3/20 12:55 PM, Halil Pasic wrote:
> > On Wed,  2 Dec 2020 18:41:01 -0500
> > Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >  
> >> The vfio_ap device driver registers a group notifier with VFIO when the
> >> file descriptor for a VFIO mediated device for a KVM guest is opened to
> >> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
> >> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
> >> and calls the kvm_get_kvm() function to increment its reference counter.
> >> When the notifier is called to make notification that the KVM pointer has
> >> been set to NULL, the driver should clean up any resources associated with
> >> the KVM pointer and decrement its reference counter. The current
> >> implementation does not take care of this clean up.
> >>
> >> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>  
> > Do we need a Fixes tag? Do we need this backported? In my opinion
> > this is necessary since the interrupt patches.  
> 
> I'll put in a fixes tag:
> Fixes: 258287c994de (s390: vfio-ap: implement mediated device open callback)
> 
> Yes, this should probably be backported.

I changed my mind regarding the severity of this issue. I was paranoid
about post-mortem interrupts, and resulting notifier byte updates by the
machine. What I overlooked is that the pin is going to prevent the memory
form getting repurposed. I.e. if we have something like vmalloc(),
vfio_pin(notifier_page), vfree(), I believe the notifier_page is not free
(available for allocation). So the worst case scenario is IMHO a resource
leak and not corruption. So I'm not sure this must be backported.
Opinions?

Regards,
Halil
Christian Borntraeger Dec. 7, 2020, 3:42 p.m. UTC | #13
On 07.12.20 16:24, Halil Pasic wrote:
> On Fri, 4 Dec 2020 11:48:24 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> 
>> On 12/3/20 12:55 PM, Halil Pasic wrote:
>>> On Wed,  2 Dec 2020 18:41:01 -0500
>>> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>>>  
>>>> The vfio_ap device driver registers a group notifier with VFIO when the
>>>> file descriptor for a VFIO mediated device for a KVM guest is opened to
>>>> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
>>>> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
>>>> and calls the kvm_get_kvm() function to increment its reference counter.
>>>> When the notifier is called to make notification that the KVM pointer has
>>>> been set to NULL, the driver should clean up any resources associated with
>>>> the KVM pointer and decrement its reference counter. The current
>>>> implementation does not take care of this clean up.
>>>>
>>>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>  
>>> Do we need a Fixes tag? Do we need this backported? In my opinion
>>> this is necessary since the interrupt patches.  
>>
>> I'll put in a fixes tag:
>> Fixes: 258287c994de (s390: vfio-ap: implement mediated device open callback)
>>
>> Yes, this should probably be backported.
> 
> I changed my mind regarding the severity of this issue. I was paranoid
> about post-mortem interrupts, and resulting notifier byte updates by the
> machine. What I overlooked is that the pin is going to prevent the memory
> form getting repurposed. I.e. if we have something like vmalloc(),
> vfio_pin(notifier_page), vfree(), I believe the notifier_page is not free
> (available for allocation). So the worst case scenario is IMHO a resource
> leak and not corruption. So I'm not sure this must be backported.
> Opinions?

Resource leaks qualify for backport and cc stable, but it is not a security
issue so this has no urgency and CVE and these kind of things.

So lets finish this without hurry, add cc stable and then look for necessary
distro backports.
Anthony Krowiak Dec. 7, 2020, 6:50 p.m. UTC | #14
On 12/4/20 2:05 PM, Halil Pasic wrote:
> On Fri, 4 Dec 2020 09:43:59 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>>>> +{
>>>> +	if (matrix_mdev->kvm) {
>>>> +		(matrix_mdev->kvm);
>>>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>>> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
>>> to take more care?
>>>
>>> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
>>> kvm->arch.crypto.crycb.
>> I do not think so. The CRYCB is used by KVM to provide crypto resources
>> to the guest so it makes sense to protect it from changes to it while
>> passing
>> the AP devices through to the guest. The hook is used only when an AQIC
>> executed on the guest is intercepted by KVM. If the notifier
>> is being invoked to notify vfio_ap that KVM has been set to NULL, this means
>> the guest is gone in which case there will be no AP instructions to
>> intercept.
> If the update to pqap_hook isn't observed as atomic we still have a
> problem. With torn writes or reads we would try to use a corrupt function
> pointer. While the compiler probably ain't likely to generate silly code
> for the above assignment (multiple write instructions less then
> quadword wide), I know of nothing that would prohibit the compiler to do
> so.

I'm sorry, but I still don't understand why you think this is a problem
given what I stated above.

>
> I'm not certain about the scope of the kvm->lock (if it's supposed to
> protect the whole sub-tree of objects). Maybe Janosch can help us out.
> @Janosch: what do you think?
>
> Regards,
> Halil
Anthony Krowiak Dec. 7, 2020, 7:05 p.m. UTC | #15
On 12/2/20 6:41 PM, Tony Krowiak wrote:
> The vfio_ap device driver registers a group notifier with VFIO when the
> file descriptor for a VFIO mediated device for a KVM guest is opened to
> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
> and calls the kvm_get_kvm() function to increment its reference counter.
> When the notifier is called to make notification that the KVM pointer has
> been set to NULL, the driver should clean up any resources associated with
> the KVM pointer and decrement its reference counter. The current
> implementation does not take care of this clean up.
>
> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> ---
>   drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>   1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index e0bde8518745..eeb9c9130756 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>   	return NOTIFY_DONE;
>   }
>   
> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
> +{
> +	if (matrix_mdev->kvm) {
> +		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);

This reset probably does not belong here since there is no
reason to reset the queues in the group notifier (see below).
The reset should be done in the release callback only regardless
of whether the KVM pointer exists or not.

> +		kvm_put_kvm(matrix_mdev->kvm);
> +		matrix_mdev->kvm = NULL;
> +	}
> +}
> +
>   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>   				       unsigned long action, void *data)
>   {
> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>   
>   	if (!data) {
> -		matrix_mdev->kvm = NULL;
> +		vfio_ap_mdev_put_kvm(matrix_mdev);
>   		return NOTIFY_OK;
>   	}
>   
> @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>   
>   	mutex_lock(&matrix_dev->lock);
> -	if (matrix_mdev->kvm) {
> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> -		vfio_ap_mdev_reset_queues(mdev);

This release should be moved outside of the block and
performed regardless of whether the KVM pointer exists or
not.

> -		kvm_put_kvm(matrix_mdev->kvm);
> -		matrix_mdev->kvm = NULL;
> -	}
> +	vfio_ap_mdev_put_kvm(matrix_mdev);
>   	mutex_unlock(&matrix_dev->lock);
>   
>   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
Halil Pasic Dec. 8, 2020, 12:01 a.m. UTC | #16
On Mon, 7 Dec 2020 13:50:36 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> On 12/4/20 2:05 PM, Halil Pasic wrote:
> > On Fri, 4 Dec 2020 09:43:59 -0500
> > Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >  
> >>>> +{
> >>>> +	if (matrix_mdev->kvm) {
> >>>> +		(matrix_mdev->kvm);
> >>>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;  
> >>> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
> >>> to take more care?
> >>>
> >>> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
> >>> kvm->arch.crypto.crycb.  
> >> I do not think so. The CRYCB is used by KVM to provide crypto resources
> >> to the guest so it makes sense to protect it from changes to it while
> >> passing
> >> the AP devices through to the guest. The hook is used only when an AQIC
> >> executed on the guest is intercepted by KVM. If the notifier
> >> is being invoked to notify vfio_ap that KVM has been set to NULL, this means
> >> the guest is gone in which case there will be no AP instructions to
> >> intercept.  
> > If the update to pqap_hook isn't observed as atomic we still have a
> > problem. With torn writes or reads we would try to use a corrupt function
> > pointer. While the compiler probably ain't likely to generate silly code
> > for the above assignment (multiple write instructions less then
> > quadword wide), I know of nothing that would prohibit the compiler to do
> > so.  
> 
> I'm sorry, but I still don't understand why you tkvm_vfio_group_set_kvmhink this is a problem
> given what I stated above.

I assume you are specifically referring to 'the guest is gone in which
case there will be no AP instructions to intercept'.  I assume by 'guest
is gone' you mean that the VM is being destroyed, and the vcpus are out
of SIE. You are probably right for the invocation of
kvm_vfio_group_set_kvm() in kvm_vfio_destroy(), but is that true for
the invocation in the KVM_DEV_VFIO_GROUP_DEL case in
kvm_vfio_set_group()? I.e. can't we get the notifier called when the
qemu device is hot unplugged (modulo remove which unregisters the
notifier and usually precludes the notifier being with NULL called at
all)?

Regards,
Halil
Halil Pasic Dec. 8, 2020, 12:40 a.m. UTC | #17
On Mon, 7 Dec 2020 14:05:55 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> 
> 
> On 12/2/20 6:41 PM, Tony Krowiak wrote:
> > The vfio_ap device driver registers a group notifier with VFIO when the
> > file descriptor for a VFIO mediated device for a KVM guest is opened to
> > receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
> > event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
> > and calls the kvm_get_kvm() function to increment its reference counter.
> > When the notifier is called to make notification that the KVM pointer has
> > been set to NULL, the driver should clean up any resources associated with
> > the KVM pointer and decrement its reference counter. The current
> > implementation does not take care of this clean up.
> >
> > Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
> > ---
> >   drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
> >   1 file changed, 13 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> > index e0bde8518745..eeb9c9130756 100644
> > --- a/drivers/s390/crypto/vfio_ap_ops.c
> > +++ b/drivers/s390/crypto/vfio_ap_ops.c
> > @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
> >   	return NOTIFY_DONE;
> >   }
> >   
> > +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
> > +{
> > +	if (matrix_mdev->kvm) {
> > +		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> > +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> > +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
> 
> This reset probably does not belong here since there is no
> reason to reset the queues in the group notifier (see below).

What about kvm_s390_gisc_unregister()? That needs a valid kvm
pointer, or? Or is it OK to not pair a kvm_s390_gisc_register()
with an kvm_s390_gisc_unregister()?

Regards,
Halil

> The reset should be done in the release callback only regardless
> of whether the KVM pointer exists or not.
> 
> > +		kvm_put_kvm(matrix_mdev->kvm);
> > +		matrix_mdev->kvm = NULL;
> > +	}
> > +}
> > +
> >   static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> >   				       unsigned long action, void *data)
> >   {
> > @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
> >   	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
> >   
> >   	if (!data) {
> > -		matrix_mdev->kvm = NULL;
> > +		vfio_ap_mdev_put_kvm(matrix_mdev);
> >   		return NOTIFY_OK;
> >   	}
> >   
> > @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
> >   	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
> >   
> >   	mutex_lock(&matrix_dev->lock);
> > -	if (matrix_mdev->kvm) {
> > -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> > -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> > -		vfio_ap_mdev_reset_queues(mdev);
> 
> This release should be moved outside of the block and
> performed regardless of whether the KVM pointer exists or
> not.
> 
> > -		kvm_put_kvm(matrix_mdev->kvm);
> > -		matrix_mdev->kvm = NULL;
> > -	}
> > +	vfio_ap_mdev_put_kvm(matrix_mdev);
> >   	mutex_unlock(&matrix_dev->lock);
> >   
> >   	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
>
Anthony Krowiak Dec. 11, 2020, 9:08 p.m. UTC | #18
On 12/7/20 7:40 PM, Halil Pasic wrote:
> On Mon, 7 Dec 2020 14:05:55 -0500
> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
>
>>
>> On 12/2/20 6:41 PM, Tony Krowiak wrote:
>>> The vfio_ap device driver registers a group notifier with VFIO when the
>>> file descriptor for a VFIO mediated device for a KVM guest is opened to
>>> receive notification that the KVM pointer is set (VFIO_GROUP_NOTIFY_SET_KVM
>>> event). When the KVM pointer is set, the vfio_ap driver stashes the pointer
>>> and calls the kvm_get_kvm() function to increment its reference counter.
>>> When the notifier is called to make notification that the KVM pointer has
>>> been set to NULL, the driver should clean up any resources associated with
>>> the KVM pointer and decrement its reference counter. The current
>>> implementation does not take care of this clean up.
>>>
>>> Signed-off-by: Tony Krowiak <akrowiak@linux.ibm.com>
>>> ---
>>>    drivers/s390/crypto/vfio_ap_ops.c | 21 +++++++++++++--------
>>>    1 file changed, 13 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
>>> index e0bde8518745..eeb9c9130756 100644
>>> --- a/drivers/s390/crypto/vfio_ap_ops.c
>>> +++ b/drivers/s390/crypto/vfio_ap_ops.c
>>> @@ -1083,6 +1083,17 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
>>>    	return NOTIFY_DONE;
>>>    }
>>>    
>>> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
>>> +{
>>> +	if (matrix_mdev->kvm) {
>>> +		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>>> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
>> This reset probably does not belong here since there is no
>> reason to reset the queues in the group notifier (see below).
> What about kvm_s390_gisc_unregister()? That needs a valid kvm
> pointer, or? Or is it OK to not pair a kvm_s390_gisc_register()
> with an kvm_s390_gisc_unregister()?

I probably should have been more specific about what I meant.
I was thinking that the reset should not be dependent upon
whether there is a KVM pointer or not since this function is
also called from the release callback. On the other hand,
the vfio_ap_mdev_reset_queues function calls the
vfio_ap_irq_disable (AQIC) function after each queue is reset.
The vfio_ap_irq_disable function also cleans up the AQIC
resources which requires that the KVM point is valid, so if
the vfio_ap_reset_queues function is not called with a
valid KVM pointer, that could result in an exception.

The thing is, it is unnecessary to disable interrupts after
resetting a queue because the reset disables interrupts,
so I think I should include a patch for this fix that does the
following:

1. Removes the disabling of interrupts subsequent to resetting
     a queue.
2. Includes the cleanup of AQIC resources when a queue is
     reset if a KVM pointer is present.

This will allow us to keep the reset in the function above as well
as the other places from which reset is executed.

>
> Regards,
> Halil
>
>> The reset should be done in the release callback only regardless
>> of whether the KVM pointer exists or not.
>>
>>> +		kvm_put_kvm(matrix_mdev->kvm);
>>> +		matrix_mdev->kvm = NULL;
>>> +	}
>>> +}
>>> +
>>>    static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>>    				       unsigned long action, void *data)
>>>    {
>>> @@ -1095,7 +1106,7 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
>>>    	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
>>>    
>>>    	if (!data) {
>>> -		matrix_mdev->kvm = NULL;
>>> +		vfio_ap_mdev_put_kvm(matrix_mdev);
>>>    		return NOTIFY_OK;
>>>    	}
>>>    
>>> @@ -1222,13 +1233,7 @@ static void vfio_ap_mdev_release(struct mdev_device *mdev)
>>>    	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
>>>    
>>>    	mutex_lock(&matrix_dev->lock);
>>> -	if (matrix_mdev->kvm) {
>>> -		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
>>> -		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
>>> -		vfio_ap_mdev_reset_queues(mdev);
>> This release should be moved outside of the block and
>> performed regardless of whether the KVM pointer exists or
>> not.
>>
>>> -		kvm_put_kvm(matrix_mdev->kvm);
>>> -		matrix_mdev->kvm = NULL;
>>> -	}
>>> +	vfio_ap_mdev_put_kvm(matrix_mdev);
>>>    	mutex_unlock(&matrix_dev->lock);
>>>    
>>>    	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
Halil Pasic Dec. 13, 2020, 10:57 p.m. UTC | #19
On Fri, 11 Dec 2020 15:52:55 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> 
> 
> On 12/7/20 7:01 PM, Halil Pasic wrote:
> > On Mon, 7 Dec 2020 13:50:36 -0500
> > Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >
> >> On 12/4/20 2:05 PM, Halil Pasic wrote:
> >>> On Fri, 4 Dec 2020 09:43:59 -0500
> >>> Tony Krowiak <akrowiak@linux.ibm.com> wrote:
> >>>   
> >>>>>> +{
> >>>>>> +	if (matrix_mdev->kvm) {
> >>>>>> +		(matrix_mdev->kvm);
> >>>>>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> >>>>> Is a plain assignment to arch.crypto.pqap_hook apropriate, or do we need
> >>>>> to take more care?
> >>>>>
> >>>>> For instance kvm_arch_crypto_set_masks() takes kvm->lock before poking
> >>>>> kvm->arch.crypto.crycb.
> >>>> I do not think so. The CRYCB is used by KVM to provide crypto resources
> >>>> to the guest so it makes sense to protect it from changes to it while
> >>>> passing
> >>>> the AP devices through to the guest. The hook is used only when an AQIC
> >>>> executed on the guest is intercepted by KVM. If the notifier
> >>>> is being invoked to notify vfio_ap that KVM has been set to NULL, this means
> >>>> the guest is gone in which case there will be no AP instructions to
> >>>> intercept.
> >>> If the update to pqap_hook isn't observed as atomic we still have a
> >>> problem. With torn writes or reads we would try to use a corrupt function
> >>> pointer. While the compiler probably ain't likely to generate silly code
> >>> for the above assignment (multiple write instructions less then
> >>> quadword wide), I know of nothing that would prohibit the compiler to do
> >>> so.
> >> I'm sorry, but I still don't understand why you tkvm_vfio_group_set_kvmhink this is a problem
> >> given what I stated above.
> > I assume you are specifically referring to 'the guest is gone in which
> > case there will be no AP instructions to intercept'.  I assume by 'guest
> > is gone' you mean that the VM is being destroyed, and the vcpus are out
> > of SIE. You are probably right for the invocation of
> > kvm_vfio_group_set_kvm() in kvm_vfio_destroy(), but is that true for
> > the invocation in the KVM_DEV_VFIO_GROUP_DEL case in
> > kvm_vfio_set_group()? I.e. can't we get the notifier called when the
> > qemu device is hot unplugged (modulo remove which unregisters the
> > notifier and usually precludes the notifier being with NULL called at
> > all)?
> 
> I am assuming by your question that the qemu device you are
> talking about the '-device vfio-ap' specified on the qemu command
> line or attached vi||||||a qemu device_add. 

Yes.

> When an mdev is hot 
> unplugged, the
> vfio_ap driver's release callback gets invoked when the mdev fd is 
> closed. The
> release callback unregisters the notifier, so it does not get called
> when the guest subsequently shuts down.
> 

That is what I meant by 'modulo remove which unregisters the notifier
and usually precludes the notifier being with NULL called at all', but
unfortunately I mixed up remove and release.

AFAIU release should be called before the notifier gets invoked
regardless of whether we have a hot-unplug of '-device vfio-ap' or
a shutdown. The whole effort is about what happens if userspace does
not adhered to this. If I apply the logic of your last response to the
whole situation, then there is nothing to do (AFAIU).

The point I'm trying to make is, that in a case of the hot-unplug, the
guest may survive the call to the notifier and also the vfio_mdev device
it was associated to at some point. So your argument that 'the guest is
gone in which case there will be no AP instructions to interpret' does
not hold.

Regards,
Halil
Halil Pasic Dec. 13, 2020, 11:13 p.m. UTC | #20
On Fri, 11 Dec 2020 16:08:53 -0500
Tony Krowiak <akrowiak@linux.ibm.com> wrote:

> >>> +static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
> >>> +{
> >>> +	if (matrix_mdev->kvm) {
> >>> +		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
> >>> +		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
> >>> +		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);  
> >> This reset probably does not belong here since there is no
> >> reason to reset the queues in the group notifier (see below).  
> > What about kvm_s390_gisc_unregister()? That needs a valid kvm
> > pointer, or? Or is it OK to not pair a kvm_s390_gisc_register()
> > with an kvm_s390_gisc_unregister()?  
> 
> I probably should have been more specific about what I meant.
> I was thinking that the reset should not be dependent upon
> whether there is a KVM pointer or not since this function is
> also called from the release callback. On the other hand,
> the vfio_ap_mdev_reset_queues function calls the
> vfio_ap_irq_disable (AQIC) function after each queue is reset.
> The vfio_ap_irq_disable function also cleans up the AQIC
> resources which requires that the KVM point is valid, so if
> the vfio_ap_reset_queues function is not called with a
> valid KVM pointer, that could result in an exception.
> 
> The thing is, it is unnecessary to disable interrupts after
> resetting a queue because the reset disables interrupts,
> so I think I should include a patch for this fix that does the
> following:
> 
> 1. Removes the disabling of interrupts subsequent to resetting
>      a queue.
> 2. Includes the cleanup of AQIC resources when a queue is
>      reset if a KVM pointer is present.

Sounds like a plan. I see, in your v2 vfio_ap_mdev_unset_kvm()
does call vfio_ap_mdev_reset_queues() even when called from the
group notifier. I also like that the cleanup of AQIC resources is
a part of queue_reset. In fact I asked a while ago (Message-ID:
<20201027074846.30ee0ddc.pasic@linux.ibm.com> in October) to make
vfio_ap_mdev_reset_queue() call vfio_ap_free_aqic_resources(q).

Regards,
Halil
diff mbox series

Patch

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index e0bde8518745..eeb9c9130756 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1083,6 +1083,17 @@  static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static void vfio_ap_mdev_put_kvm(struct ap_matrix_mdev *matrix_mdev)
+{
+	if (matrix_mdev->kvm) {
+		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
+		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
+		vfio_ap_mdev_reset_queues(matrix_mdev->mdev);
+		kvm_put_kvm(matrix_mdev->kvm);
+		matrix_mdev->kvm = NULL;
+	}
+}
+
 static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
 				       unsigned long action, void *data)
 {
@@ -1095,7 +1106,7 @@  static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
 	matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
 
 	if (!data) {
-		matrix_mdev->kvm = NULL;
+		vfio_ap_mdev_put_kvm(matrix_mdev);
 		return NOTIFY_OK;
 	}
 
@@ -1222,13 +1233,7 @@  static void vfio_ap_mdev_release(struct mdev_device *mdev)
 	struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
 
 	mutex_lock(&matrix_dev->lock);
-	if (matrix_mdev->kvm) {
-		kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
-		matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
-		vfio_ap_mdev_reset_queues(mdev);
-		kvm_put_kvm(matrix_mdev->kvm);
-		matrix_mdev->kvm = NULL;
-	}
+	vfio_ap_mdev_put_kvm(matrix_mdev);
 	mutex_unlock(&matrix_dev->lock);
 
 	vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,