diff mbox

[6/8,v2] Move IO APIC to its own lock.

Message ID 1249993895-11119-7-git-send-email-gleb@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Gleb Natapov Aug. 11, 2009, 12:31 p.m. UTC
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |   27 ++++++++++++++++++------
 arch/x86/kvm/i8259.c     |    4 +++
 arch/x86/kvm/lapic.c     |    5 +---
 arch/x86/kvm/x86.c       |   30 ++++++++++++++++++---------
 virt/kvm/ioapic.c        |   49 ++++++++++++++++++++++++++++-----------------
 virt/kvm/ioapic.h        |    1 +
 6 files changed, 76 insertions(+), 40 deletions(-)

Comments

Avi Kivity Aug. 12, 2009, 8:27 a.m. UTC | #1
On 08/11/2009 03:31 PM, Gleb Natapov wrote:


What is the motivation for this change?

Why a spinlock and not a mutex?

> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
> index 0ad09f0..dd7ef2d 100644
> --- a/arch/ia64/kvm/kvm-ia64.c
> +++ b/arch/ia64/kvm/kvm-ia64.c
> @@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
>
>   	r = 0;
>   	switch (chip->chip_id) {
> -	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
> -				sizeof(struct kvm_ioapic_state));
> +	case KVM_IRQCHIP_IOAPIC: {
> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> +		if (ioapic) {
> +			spin_lock(&ioapic->lock);
> +			memcpy(&chip->chip.ioapic, ioapic,
> +			       sizeof(struct kvm_ioapic_state));
> +			spin_unlock(&ioapic->lock);
>    

Better to add an accessor than to reach into internals like this.

> +		} else
> +			r = -EINVAL;
> +	}
>   		break;
>   	default:
>   		r = -EINVAL;
> @@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>
>   	r = 0;
>   	switch (chip->chip_id) {
> -	case KVM_IRQCHIP_IOAPIC:
> -		memcpy(ioapic_irqchip(kvm),
> -				&chip->chip.ioapic,
> -				sizeof(struct kvm_ioapic_state));
> +	case KVM_IRQCHIP_IOAPIC: {
> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
> +		if (ioapic) {
> +			spin_lock(&ioapic->lock);
> +			memcpy(ioapic,&chip->chip.ioapic,
> +			       sizeof(struct kvm_ioapic_state));
> +			spin_unlock(&ioapic->lock);
> +		} else
> +			r = -EINVAL;
> +	}
>    

... and better to deduplicate the code too.

>   		break;
>   	default:
>   		r = -EINVAL;
> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
> index 01f1516..a988c0e 100644
> --- a/arch/x86/kvm/i8259.c
> +++ b/arch/x86/kvm/i8259.c
> @@ -38,7 +38,9 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
>   	s->isr_ack |= (1<<  irq);
>   	if (s !=&s->pics_state->pics[0])
>   		irq += 8;
> +	spin_unlock(&s->pics_state->lock);
>   	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
> +	spin_lock(&s->pics_state->lock);
>   }
>    

Need to explain why this is safe.  I'm not sure it is, because we touch 
state afterwards in pic_intack().  We need to do all vcpu-synchronous 
operations before dropping the lock.

>    void kvm_pic_clear_isr_ack(struct kvm *kvm)
> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>   		if (vcpu0&&  kvm_apic_accept_pic_intr(vcpu0))
>   			if (s->irr&  (1<<  irq) || s->isr&  (1<<  irq)) {
>   				n = irq + irqbase;
> +				spin_unlock(&s->pics_state->lock);
>   				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
> +				spin_lock(&s->pics_state->lock);
>    

Ditto here, needs to be moved until after done changing state.

>
> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
> -				    int trigger_mode)
> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
> +				     int trigger_mode)
>   {
> -	union kvm_ioapic_redirect_entry *ent;
> +	int i;
> +
> +	for (i = 0; i<  IOAPIC_NUM_PINS; i++) {
> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
> +
> +		if (ent->fields.vector != vector)
> +			continue;
>
> -	ent =&ioapic->redirtbl[pin];
> +		spin_unlock(&ioapic->lock);
> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
> +		spin_lock(&ioapic->lock);
>
>    

I *think* we need to clear remote_irr before dropping the lock.  I 
*know* there's a missing comment here.

> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
> +			continue;
>
> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>   		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>   		ent->fields.remote_irr = 0;
> -		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  pin)))
> -			ioapic_service(ioapic, pin);
> +		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  i)))
> +			ioapic_service(ioapic, i);
>   	}
>   }
>    

To make the patch easier to read, suggest keeping the loop in the other 
function.

>
>   void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
>   {
>   	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
> -	int i;
>
> -	for (i = 0; i<  IOAPIC_NUM_PINS; i++)
> -		if (ioapic->redirtbl[i].fields.vector == vector)
> -			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
> +	spin_lock(&ioapic->lock);
> +	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
> +	spin_unlock(&ioapic->lock);
>   }
>
>
Gleb Natapov Aug. 12, 2009, 9:04 a.m. UTC | #2
On Wed, Aug 12, 2009 at 11:27:13AM +0300, Avi Kivity wrote:
> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>
>
> What is the motivation for this change?
>
The motivation was explained in 0/0. I want to get rid of lock on
general irq injection path so the lock have to be pushed into ioapic
since multiple cpus can access it concurrently. PIC has such lock
already.

> Why a spinlock and not a mutex?
>
Protected sections are small and we do not sleep there.

>> diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
>> index 0ad09f0..dd7ef2d 100644
>> --- a/arch/ia64/kvm/kvm-ia64.c
>> +++ b/arch/ia64/kvm/kvm-ia64.c
>> @@ -850,9 +850,16 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
>>
>>   	r = 0;
>>   	switch (chip->chip_id) {
>> -	case KVM_IRQCHIP_IOAPIC:
>> -		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
>> -				sizeof(struct kvm_ioapic_state));
>> +	case KVM_IRQCHIP_IOAPIC: {
>> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
>> +		if (ioapic) {
>> +			spin_lock(&ioapic->lock);
>> +			memcpy(&chip->chip.ioapic, ioapic,
>> +			       sizeof(struct kvm_ioapic_state));
>> +			spin_unlock(&ioapic->lock);
>>    
>
> Better to add an accessor than to reach into internals like this.
>
Agree.

>> +		} else
>> +			r = -EINVAL;
>> +	}
>>   		break;
>>   	default:
>>   		r = -EINVAL;
>> @@ -867,10 +874,16 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
>>
>>   	r = 0;
>>   	switch (chip->chip_id) {
>> -	case KVM_IRQCHIP_IOAPIC:
>> -		memcpy(ioapic_irqchip(kvm),
>> -				&chip->chip.ioapic,
>> -				sizeof(struct kvm_ioapic_state));
>> +	case KVM_IRQCHIP_IOAPIC: {
>> +		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
>> +		if (ioapic) {
>> +			spin_lock(&ioapic->lock);
>> +			memcpy(ioapic,&chip->chip.ioapic,
>> +			       sizeof(struct kvm_ioapic_state));
>> +			spin_unlock(&ioapic->lock);
>> +		} else
>> +			r = -EINVAL;
>> +	}
>>    
>
> ... and better to deduplicate the code too.
>
>>   		break;
>>   	default:
>>   		r = -EINVAL;
>> diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
>> index 01f1516..a988c0e 100644
>> --- a/arch/x86/kvm/i8259.c
>> +++ b/arch/x86/kvm/i8259.c
>> @@ -38,7 +38,9 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
>>   	s->isr_ack |= (1<<  irq);
>>   	if (s !=&s->pics_state->pics[0])
>>   		irq += 8;
>> +	spin_unlock(&s->pics_state->lock);
>>   	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
>> +	spin_lock(&s->pics_state->lock);
>>   }
>>    
>
> Need to explain why this is safe.  I'm not sure it is, because we touch  
> state afterwards in pic_intack().  We need to do all vcpu-synchronous  
> operations before dropping the lock.
Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
is already broken for assigned devices. Second for level triggered
interrupts pic_intack() does nothing after calling pic_clear_isr() and
third I can move pic_clear_isr() call to the end of pic_intack().
>
>>    void kvm_pic_clear_isr_ack(struct kvm *kvm)
>> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>>   		if (vcpu0&&  kvm_apic_accept_pic_intr(vcpu0))
>>   			if (s->irr&  (1<<  irq) || s->isr&  (1<<  irq)) {
>>   				n = irq + irqbase;
>> +				spin_unlock(&s->pics_state->lock);
>>   				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
>> +				spin_lock(&s->pics_state->lock);
>>    
>
> Ditto here, needs to be moved until after done changing state.
>
I am not sure this code is even needed. IOAPIC don't call notifiers on
reset.

>>
>> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
>> -				    int trigger_mode)
>> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
>> +				     int trigger_mode)
>>   {
>> -	union kvm_ioapic_redirect_entry *ent;
>> +	int i;
>> +
>> +	for (i = 0; i<  IOAPIC_NUM_PINS; i++) {
>> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
>> +
>> +		if (ent->fields.vector != vector)
>> +			continue;
>>
>> -	ent =&ioapic->redirtbl[pin];
>> +		spin_unlock(&ioapic->lock);
>> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>> +		spin_lock(&ioapic->lock);
>>
>>    
>
> I *think* we need to clear remote_irr before dropping the lock.  I  
> *know* there's a missing comment here.
I don't see why we clear remote_irr before dropping the lock. If, while
lock was dropped, interrupt was delivered to this entry it will be
injected when ack notifier returns.

>
>> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
>> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
>> +			continue;
>>
>> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>>   		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>   		ent->fields.remote_irr = 0;
>> -		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  pin)))
>> -			ioapic_service(ioapic, pin);
>> +		if (!ent->fields.mask&&  (ioapic->irr&  (1<<  i)))
>> +			ioapic_service(ioapic, i);
>>   	}
>>   }
>>    
>
> To make the patch easier to read, suggest keeping the loop in the other  
> function.
>
I don't follow. All __kvm_ioapic_update_eoi() contains is the loop, so
the loop is already in its own function. Do you mean move the context of
the loop into the other function and leave only for(;;) fun(); in
__kvm_ioapic_update_eoi()?

>>
>>   void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
>>   {
>>   	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
>> -	int i;
>>
>> -	for (i = 0; i<  IOAPIC_NUM_PINS; i++)
>> -		if (ioapic->redirtbl[i].fields.vector == vector)
>> -			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
>> +	spin_lock(&ioapic->lock);
>> +	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
>> +	spin_unlock(&ioapic->lock);
>>   }
>>
>>    
>
> -- 
> error compiling committee.c: too many arguments to function

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 12, 2009, 9:18 a.m. UTC | #3
On 08/12/2009 12:04 PM, Gleb Natapov wrote:
> On Wed, Aug 12, 2009 at 11:27:13AM +0300, Avi Kivity wrote:
>    
>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>
>>
>> What is the motivation for this change?
>>
>>      
> The motivation was explained in 0/0. I want to get rid of lock on
> general irq injection path so the lock have to be pushed into ioapic
> since multiple cpus can access it concurrently. PIC has such lock
> already.
>    

Ah, the real motivation is msi.  Pushing locks down doesn't help if we 
keep locking them.  But for msi we avoid the lock entirely.

>> Why a spinlock and not a mutex?
>>
>>      
> Protected sections are small and we do not sleep there.
>    

So what?  A mutex is better since it allows preemption (and still has 
spinlock performance if it isn't preempted).



>> Need to explain why this is safe.  I'm not sure it is, because we touch
>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>> operations before dropping the lock.
>>      
> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
> is already broken for assigned devices. Second for level triggered
> interrupts pic_intack() does nothing after calling pic_clear_isr() and
> third I can move pic_clear_isr() call to the end of pic_intack().
>    

I meant, in a comment.

>>>     void kvm_pic_clear_isr_ack(struct kvm *kvm)
>>> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>>>    		if (vcpu0&&   kvm_apic_accept_pic_intr(vcpu0))
>>>    			if (s->irr&   (1<<   irq) || s->isr&   (1<<   irq)) {
>>>    				n = irq + irqbase;
>>> +				spin_unlock(&s->pics_state->lock);
>>>    				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
>>> +				spin_lock(&s->pics_state->lock);
>>>
>>>        
>> Ditto here, needs to be moved until after done changing state.
>>
>>      
> I am not sure this code is even needed. IOAPIC don't call notifiers on
> reset.
>    

It should.  What if there's a reset with an assigned device?  We need to 
release the device interrupt (after doing FLR?).

>    
>>> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
>>> -				    int trigger_mode)
>>> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
>>> +				     int trigger_mode)
>>>    {
>>> -	union kvm_ioapic_redirect_entry *ent;
>>> +	int i;
>>> +
>>> +	for (i = 0; i<   IOAPIC_NUM_PINS; i++) {
>>> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
>>> +
>>> +		if (ent->fields.vector != vector)
>>> +			continue;
>>>
>>> -	ent =&ioapic->redirtbl[pin];
>>> +		spin_unlock(&ioapic->lock);
>>> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>>> +		spin_lock(&ioapic->lock);
>>>
>>>
>>>        
>> I *think* we need to clear remote_irr before dropping the lock.  I
>> *know* there's a missing comment here.
>>      
> I don't see why we clear remote_irr before dropping the lock. If, while
> lock was dropped, interrupt was delivered to this entry it will be
> injected when ack notifier returns.
>    

But we'll clear remote_irr afterward the redelivery, and we should to 
that only after the new interrupt is acked.

>>> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
>>> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
>>> +			continue;
>>>
>>> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>>>    		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>>    		ent->fields.remote_irr = 0;
>>> -		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   pin)))
>>> -			ioapic_service(ioapic, pin);
>>> +		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   i)))
>>> +			ioapic_service(ioapic, i);
>>>    	}
>>>    }
>>>
>>>        
>> To make the patch easier to read, suggest keeping the loop in the other
>> function.
>>
>>      
> I don't follow. All __kvm_ioapic_update_eoi() contains is the loop, so
> the loop is already in its own function. Do you mean move the context of
> the loop into the other function and leave only for(;;) fun(); in
> __kvm_ioapic_update_eoi()?
>    

No, I mean keep the for loop in kvm_ioapic_update_eoi().
Gleb Natapov Aug. 12, 2009, 9:47 a.m. UTC | #4
On Wed, Aug 12, 2009 at 12:18:10PM +0300, Avi Kivity wrote:
> On 08/12/2009 12:04 PM, Gleb Natapov wrote:
>> On Wed, Aug 12, 2009 at 11:27:13AM +0300, Avi Kivity wrote:
>>    
>>> On 08/11/2009 03:31 PM, Gleb Natapov wrote:
>>>
>>>
>>> What is the motivation for this change?
>>>
>>>      
>> The motivation was explained in 0/0. I want to get rid of lock on
>> general irq injection path so the lock have to be pushed into ioapic
>> since multiple cpus can access it concurrently. PIC has such lock
>> already.
>>    
>
> Ah, the real motivation is msi.  Pushing locks down doesn't help if we  
> keep locking them.  But for msi we avoid the lock entirely.
>
Yes. MSI is one. Multiple IOAPICs may inject interrupt in parallel too
(if we will choose to implement multiple IOAPICs sometime).

>>> Why a spinlock and not a mutex?
>>>
>>>      
>> Protected sections are small and we do not sleep there.
>>    
>
> So what?  A mutex is better since it allows preemption (and still has  
> spinlock performance if it isn't preempted).
>
This lock will be taken during irq injection from irqfd, so may be leave
it as spinlock and take it _irqsave()? Do we want to allow irq injection
from interrupt context? Otherwise if you say that performance is the
same I don't care one way or the other.

>
>
>>> Need to explain why this is safe.  I'm not sure it is, because we touch
>>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>>> operations before dropping the lock.
>>>      
>> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
>> is already broken for assigned devices. Second for level triggered
>> interrupts pic_intack() does nothing after calling pic_clear_isr() and
>> third I can move pic_clear_isr() call to the end of pic_intack().
>>    
>
> I meant, in a comment.
I you agree with above I'll add it as a comment.

>
>>>>     void kvm_pic_clear_isr_ack(struct kvm *kvm)
>>>> @@ -238,7 +240,9 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
>>>>    		if (vcpu0&&   kvm_apic_accept_pic_intr(vcpu0))
>>>>    			if (s->irr&   (1<<   irq) || s->isr&   (1<<   irq)) {
>>>>    				n = irq + irqbase;
>>>> +				spin_unlock(&s->pics_state->lock);
>>>>    				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
>>>> +				spin_lock(&s->pics_state->lock);
>>>>
>>>>        
>>> Ditto here, needs to be moved until after done changing state.
>>>
>>>      
>> I am not sure this code is even needed. IOAPIC don't call notifiers on
>> reset.
>>    
>
> It should.  What if there's a reset with an assigned device?  We need to  
> release the device interrupt (after doing FLR?).
Doing this will just re-enable host interrupt while irq condition is not
cleared in the device. The host will hang. So I think we really shouldn't.

>
>>    
>>>> -static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
>>>> -				    int trigger_mode)
>>>> +static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
>>>> +				     int trigger_mode)
>>>>    {
>>>> -	union kvm_ioapic_redirect_entry *ent;
>>>> +	int i;
>>>> +
>>>> +	for (i = 0; i<   IOAPIC_NUM_PINS; i++) {
>>>> +		union kvm_ioapic_redirect_entry *ent =&ioapic->redirtbl[i];
>>>> +
>>>> +		if (ent->fields.vector != vector)
>>>> +			continue;
>>>>
>>>> -	ent =&ioapic->redirtbl[pin];
>>>> +		spin_unlock(&ioapic->lock);
>>>> +		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
>>>> +		spin_lock(&ioapic->lock);
>>>>
>>>>
>>>>        
>>> I *think* we need to clear remote_irr before dropping the lock.  I
>>> *know* there's a missing comment here.
>>>      
>> I don't see why we clear remote_irr before dropping the lock. If, while
>> lock was dropped, interrupt was delivered to this entry it will be
>> injected when ack notifier returns.
>>    
>
> But we'll clear remote_irr afterward the redelivery, and we should to  
> that only after the new interrupt is acked.
It depend on whether you consider calling ack notifiers a part of
interrupt acknowledgement process. If you do then remote_irr should not
be cleared before ack notifiers since ack process is not completed yet.
With current users functionally it shouldn't matter when we clear
remote_irr. I prefer doing it like we do it now since this how it was
before my patches and since code is simpler this way.

>
>>>> -	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
>>>> +		if (trigger_mode != IOAPIC_LEVEL_TRIG)
>>>> +			continue;
>>>>
>>>> -	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
>>>>    		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
>>>>    		ent->fields.remote_irr = 0;
>>>> -		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   pin)))
>>>> -			ioapic_service(ioapic, pin);
>>>> +		if (!ent->fields.mask&&   (ioapic->irr&   (1<<   i)))
>>>> +			ioapic_service(ioapic, i);
>>>>    	}
>>>>    }
>>>>
>>>>        
>>> To make the patch easier to read, suggest keeping the loop in the other
>>> function.
>>>
>>>      
>> I don't follow. All __kvm_ioapic_update_eoi() contains is the loop, so
>> the loop is already in its own function. Do you mean move the context of
>> the loop into the other function and leave only for(;;) fun(); in
>> __kvm_ioapic_update_eoi()?
>>    
>
> No, I mean keep the for loop in kvm_ioapic_update_eoi().
>
Can't do that. __kvm_ioapic_update_eoi() is called from other place with
lock held already.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 12, 2009, 9:57 a.m. UTC | #5
On 08/12/2009 12:47 PM, Gleb Natapov wrote:
>> Ah, the real motivation is msi.  Pushing locks down doesn't help if we
>> keep locking them.  But for msi we avoid the lock entirely.
>>
>>      
> Yes. MSI is one. Multiple IOAPICs may inject interrupt in parallel too
> (if we will choose to implement multiple IOAPICs sometime).
>    

Right.  Given msi, I don't think we'll have multiple ioapics though.

>>>> Why a spinlock and not a mutex?
>>>>
>>>>
>>>>          
>>> Protected sections are small and we do not sleep there.
>>>
>>>        
>> So what?  A mutex is better since it allows preemption (and still has
>> spinlock performance if it isn't preempted).
>>
>>      
> This lock will be taken during irq injection from irqfd, so may be leave
> it as spinlock and take it _irqsave()? Do we want to allow irq injection
> from interrupt context? Otherwise if you say that performance is the
> same I don't care one way or the other.
>    

Let's leave _irqsave() until later since that has other implications.

>>>> Need to explain why this is safe.  I'm not sure it is, because we touch
>>>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>>>> operations before dropping the lock.
>>>>
>>>>          
>>> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
>>> is already broken for assigned devices. Second for level triggered
>>> interrupts pic_intack() does nothing after calling pic_clear_isr() and
>>> third I can move pic_clear_isr() call to the end of pic_intack().
>>>
>>>        
>> I meant, in a comment.
>>      
> I you agree with above I'll add it as a comment.
>    

Sure.

>> It should.  What if there's a reset with an assigned device?  We need to
>> release the device interrupt (after doing FLR?).
>>      
> Doing this will just re-enable host interrupt while irq condition is not
> cleared in the device. The host will hang. So I think we really shouldn't.
>    

Ok.  What about timer acks?




>>> I don't see why we clear remote_irr before dropping the lock. If, while
>>> lock was dropped, interrupt was delivered to this entry it will be
>>> injected when ack notifier returns.
>>>
>>>        
>> But we'll clear remote_irr afterward the redelivery, and we should to
>> that only after the new interrupt is acked.
>>      
> It depend on whether you consider calling ack notifiers a part of
> interrupt acknowledgement process.

I don't really care, but the ack process has to be atomic.  Since we 
need to drop the lock, it means the notifier is not part of the process.

> If you do then remote_irr should not
> be cleared before ack notifiers since ack process is not completed yet.
> With current users functionally it shouldn't matter when we clear
> remote_irr. I prefer doing it like we do it now since this how it was
> before my patches and since code is simpler this way.
>    

No, I think it introduces a race if an interrupt is raised while the ack 
notifier is running.



>> No, I mean keep the for loop in kvm_ioapic_update_eoi().
>>
>>      
> Can't do that. __kvm_ioapic_update_eoi() is called from other place with
> lock held already.
>    

Ok.
Gleb Natapov Aug. 12, 2009, 10:05 a.m. UTC | #6
On Wed, Aug 12, 2009 at 12:57:07PM +0300, Avi Kivity wrote:
> On 08/12/2009 12:47 PM, Gleb Natapov wrote:
>>> Ah, the real motivation is msi.  Pushing locks down doesn't help if we
>>> keep locking them.  But for msi we avoid the lock entirely.
>>>
>>>      
>> Yes. MSI is one. Multiple IOAPICs may inject interrupt in parallel too
>> (if we will choose to implement multiple IOAPICs sometime).
>>    
>
> Right.  Given msi, I don't think we'll have multiple ioapics though.
>
>>>>> Why a spinlock and not a mutex?
>>>>>
>>>>>
>>>>>          
>>>> Protected sections are small and we do not sleep there.
>>>>
>>>>        
>>> So what?  A mutex is better since it allows preemption (and still has
>>> spinlock performance if it isn't preempted).
>>>
>>>      
>> This lock will be taken during irq injection from irqfd, so may be leave
>> it as spinlock and take it _irqsave()? Do we want to allow irq injection
>> from interrupt context? Otherwise if you say that performance is the
>> same I don't care one way or the other.
>>    
>
> Let's leave _irqsave() until later since that has other implications.
>
So change it to mutex then?

>>>>> Need to explain why this is safe.  I'm not sure it is, because we touch
>>>>> state afterwards in pic_intack().  We need to do all vcpu-synchronous
>>>>> operations before dropping the lock.
>>>>>
>>>>>          
>>>> Forst pic_intack() calls pic_clear_isr() only in auto eoi mode and this mode
>>>> is already broken for assigned devices. Second for level triggered
>>>> interrupts pic_intack() does nothing after calling pic_clear_isr() and
>>>> third I can move pic_clear_isr() call to the end of pic_intack().
>>>>
>>>>        
>>> I meant, in a comment.
>>>      
>> I you agree with above I'll add it as a comment.
>>    
>
> Sure.
>
>>> It should.  What if there's a reset with an assigned device?  We need to
>>> release the device interrupt (after doing FLR?).
>>>      
>> Doing this will just re-enable host interrupt while irq condition is not
>> cleared in the device. The host will hang. So I think we really shouldn't.
>>    
>
> Ok.  What about timer acks?
>
Interrupt wasn't processed by a guest. Why should it be accounted as
such?

>
>
>
>>>> I don't see why we clear remote_irr before dropping the lock. If, while
>>>> lock was dropped, interrupt was delivered to this entry it will be
>>>> injected when ack notifier returns.
>>>>
>>>>        
>>> But we'll clear remote_irr afterward the redelivery, and we should to
>>> that only after the new interrupt is acked.
>>>      
>> It depend on whether you consider calling ack notifiers a part of
>> interrupt acknowledgement process.
>
> I don't really care, but the ack process has to be atomic.  Since we  
> need to drop the lock, it means the notifier is not part of the process.
>
>> If you do then remote_irr should not
>> be cleared before ack notifiers since ack process is not completed yet.
>> With current users functionally it shouldn't matter when we clear
>> remote_irr. I prefer doing it like we do it now since this how it was
>> before my patches and since code is simpler this way.
>>    
>
> No, I think it introduces a race if an interrupt is raised while the ack  
> notifier is running.
>
>
>
>>> No, I mean keep the for loop in kvm_ioapic_update_eoi().
>>>
>>>      
>> Can't do that. __kvm_ioapic_update_eoi() is called from other place with
>> lock held already.
>>    
>
> Ok.
>
> -- 
> error compiling committee.c: too many arguments to function

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Avi Kivity Aug. 12, 2009, 10:07 a.m. UTC | #7
On 08/12/2009 01:05 PM, Gleb Natapov wrote:
>
>    
>> Let's leave _irqsave() until later since that has other implications.
>>
>>      
> So change it to mutex then?
>
>    

Yes please.

>>> Doing this will just re-enable host interrupt while irq condition is not
>>> cleared in the device. The host will hang. So I think we really shouldn't.
>>>
>>>        
>> Ok.  What about timer acks?
>>
>>      
> Interrupt wasn't processed by a guest. Why should it be accounted as
> such?
>    

Good question.  I guess it shouldn't.
diff mbox

Patch

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 0ad09f0..dd7ef2d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -850,9 +850,16 @@  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
 
 	r = 0;
 	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
-				sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(&chip->chip.ioapic, ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
@@ -867,10 +874,16 @@  static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
 	r = 0;
 	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(ioapic_irqchip(kvm),
-				&chip->chip.ioapic,
-				sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(ioapic, &chip->chip.ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 01f1516..a988c0e 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,9 @@  static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 	s->isr_ack |= (1 << irq);
 	if (s != &s->pics_state->pics[0])
 		irq += 8;
+	spin_unlock(&s->pics_state->lock);
 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
+	spin_lock(&s->pics_state->lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
@@ -238,7 +240,9 @@  void kvm_pic_reset(struct kvm_kpic_state *s)
 		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
 			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
 				n = irq + irqbase;
+				spin_unlock(&s->pics_state->lock);
 				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
+				spin_lock(&s->pics_state->lock);
 			}
 	}
 	s->last_irr = 0;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce195f8..f24d4d0 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -471,11 +471,8 @@  static void apic_set_eoi(struct kvm_lapic *apic)
 		trigger_mode = IOAPIC_LEVEL_TRIG;
 	else
 		trigger_mode = IOAPIC_EDGE_TRIG;
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
-		mutex_lock(&apic->vcpu->kvm->irq_lock);
+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
-		mutex_unlock(&apic->vcpu->kvm->irq_lock);
-	}
 }
 
 static void apic_send_ipi(struct kvm_lapic *apic)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 850cf56..b0906a0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2022,10 +2022,16 @@  static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			&pic_irqchip(kvm)->pics[1],
 			sizeof(struct kvm_pic_state));
 		break;
-	case KVM_IRQCHIP_IOAPIC:
-		memcpy(&chip->chip.ioapic,
-			ioapic_irqchip(kvm),
-			sizeof(struct kvm_ioapic_state));
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(&chip->chip.ioapic, ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
@@ -2054,12 +2060,16 @@  static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 			sizeof(struct kvm_pic_state));
 		spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
-	case KVM_IRQCHIP_IOAPIC:
-		mutex_lock(&kvm->irq_lock);
-		memcpy(ioapic_irqchip(kvm),
-			&chip->chip.ioapic,
-			sizeof(struct kvm_ioapic_state));
-		mutex_unlock(&kvm->irq_lock);
+	case KVM_IRQCHIP_IOAPIC: {
+		struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+		if (ioapic) {
+			spin_lock(&ioapic->lock);
+			memcpy(ioapic, &chip->chip.ioapic,
+			       sizeof(struct kvm_ioapic_state));
+			spin_unlock(&ioapic->lock);
+		} else
+			r = -EINVAL;
+	}
 		break;
 	default:
 		r = -EINVAL;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index fa05f67..881d083 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -182,6 +182,7 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
+	spin_lock(&ioapic->lock);
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
 		level ^= entry.fields.polarity;
@@ -196,34 +197,43 @@  int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 		}
 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	}
+	spin_unlock(&ioapic->lock);
+
 	return ret;
 }
 
-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
-				    int trigger_mode)
+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
+				     int trigger_mode)
 {
-	union kvm_ioapic_redirect_entry *ent;
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.vector != vector)
+			continue;
 
-	ent = &ioapic->redirtbl[pin];
+		spin_unlock(&ioapic->lock);
+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+		spin_lock(&ioapic->lock);
 
-	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+			continue;
 
-	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
 		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
-			ioapic_service(ioapic, pin);
+		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+			ioapic_service(ioapic, i);
 	}
 }
 
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	int i;
 
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		if (ioapic->redirtbl[i].fields.vector == vector)
-			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
+	spin_lock(&ioapic->lock);
+	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
+	spin_unlock(&ioapic->lock);
 }
 
 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
@@ -248,8 +258,8 @@  static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	addr &= 0xff;
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		result = ioapic->ioregsel;
@@ -263,6 +273,8 @@  static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 		result = 0;
 		break;
 	}
+	spin_unlock(&ioapic->lock);
+
 	switch (len) {
 	case 8:
 		*(u64 *) val = result;
@@ -275,7 +287,6 @@  static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 	default:
 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
 	}
-	mutex_unlock(&ioapic->kvm->irq_lock);
 	return 0;
 }
 
@@ -291,15 +302,15 @@  static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		     (void*)addr, len, val);
 	ASSERT(!(addr & 0xf));	/* check alignment */
 
-	mutex_lock(&ioapic->kvm->irq_lock);
 	if (len == 4 || len == 8)
 		data = *(u32 *) val;
 	else {
 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-		goto unlock;
+		return 0;
 	}
 
 	addr &= 0xff;
+	spin_lock(&ioapic->lock);
 	switch (addr) {
 	case IOAPIC_REG_SELECT:
 		ioapic->ioregsel = data;
@@ -310,15 +321,14 @@  static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 		break;
 #ifdef	CONFIG_IA64
 	case IOAPIC_REG_EOI:
-		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
+		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
 		break;
 #endif
 
 	default:
 		break;
 	}
-unlock:
-	mutex_unlock(&ioapic->kvm->irq_lock);
+	spin_unlock(&ioapic->lock);
 	return 0;
 }
 
@@ -347,6 +357,7 @@  int kvm_ioapic_init(struct kvm *kvm)
 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
 	if (!ioapic)
 		return -ENOMEM;
+	spin_lock_init(&ioapic->lock);
 	kvm->arch.vioapic = ioapic;
 	kvm_ioapic_reset(ioapic);
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index 7080b71..557107e 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -44,6 +44,7 @@  struct kvm_ioapic {
 	struct kvm_io_device dev;
 	struct kvm *kvm;
 	void (*ack_notifier)(void *opaque, int irq);
+	spinlock_t lock;
 };
 
 #ifdef DEBUG