diff mbox

KVM: Fix kvm_irqfd_init initialization

Message ID 1367938456-21005-1-git-send-email-asias@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Asias He May 7, 2013, 2:54 p.m. UTC
In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
kvm_arch_init() will fail with -EEXIST, then kvm_irqfd_exit() will be
called on the error handling path. This way, the kvm_irqfd system will
not be ready.

This patch fix the following:

BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
PGD 0
Oops: 0002 [#1] SMP
Modules linked in: vhost_net
CPU 6
Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
Stack:
 ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
 0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
 ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
Call Trace:
 [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
 [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
 [<ffffffff810ac949>] queue_work+0x19/0x20
 [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
 [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
 [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
 [<ffffffff810c9545>] ? update_curr+0xf5/0x180
 [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
 [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
 [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
 [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
 [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
 [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
RSP <ffff880221721cc8>
CR2: 0000000000000000
---[ end trace 13fb1e4b6e5ab21f ]---

Signed-off-by: Asias He <asias@redhat.com>
---
 virt/kvm/kvm_main.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

Comments

Michael S. Tsirkin May 7, 2013, 2:59 p.m. UTC | #1
On Tue, May 07, 2013 at 10:54:16PM +0800, Asias He wrote:
> In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> kvm_arch_init() will fail with -EEXIST,

Wow. Is this intentional?

> then kvm_irqfd_exit() will be
> called on the error handling path. This way, the kvm_irqfd system will
> not be ready.
> 
> This patch fix the following:
> 
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> PGD 0
> Oops: 0002 [#1] SMP
> Modules linked in: vhost_net
> CPU 6
> Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
> RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
> RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
> RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
> RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
> R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
> R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
> FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
> Stack:
>  ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
>  0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
>  ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
> Call Trace:
>  [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
>  [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
>  [<ffffffff810ac949>] queue_work+0x19/0x20
>  [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
>  [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
>  [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
>  [<ffffffff810c9545>] ? update_curr+0xf5/0x180
>  [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
>  [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
>  [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
>  [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
>  [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
>  [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
> Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
> RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> RSP <ffff880221721cc8>
> CR2: 0000000000000000
> ---[ end trace 13fb1e4b6e5ab21f ]---
> 
> Signed-off-by: Asias He <asias@redhat.com>
> ---
>  virt/kvm/kvm_main.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 8fd325a..3c8a992 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3078,13 +3078,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
>  	int r;
>  	int cpu;
>  
> -	r = kvm_irqfd_init();
> -	if (r)
> -		goto out_irqfd;
>  	r = kvm_arch_init(opaque);
>  	if (r)
>  		goto out_fail;
>  
> +	r = kvm_irqfd_init();
> +	if (r)
> +		goto out_irqfd;
> +
>  	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
>  		r = -ENOMEM;
>  		goto out_free_0;
> @@ -3159,10 +3160,10 @@ out_free_1:
>  out_free_0a:
>  	free_cpumask_var(cpus_hardware_enabled);
>  out_free_0:
> -	kvm_arch_exit();
> -out_fail:
>  	kvm_irqfd_exit();
>  out_irqfd:
> +	kvm_arch_exit();
> +out_fail:
>  	return r;
>  }
>  EXPORT_SYMBOL_GPL(kvm_init);
> -- 
> 1.8.1.4
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Asias He May 7, 2013, 3:05 p.m. UTC | #2
On Tue, May 07, 2013 at 05:59:38PM +0300, Michael S. Tsirkin wrote:
> On Tue, May 07, 2013 at 10:54:16PM +0800, Asias He wrote:
> > In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> > kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> > kvm_arch_init() will fail with -EEXIST,
> 
> Wow. Is this intentional?

I think it is. You can not be amd and intel at the same time ;-)

kvm_arch_init

    if (kvm_x86_ops) {
            printk(KERN_ERR "kvm: already loaded the other module\n");
            r = -EEXIST;
            goto out;
    }       


> > then kvm_irqfd_exit() will be
> > called on the error handling path. This way, the kvm_irqfd system will
> > not be ready.
> > 
> > This patch fix the following:
> > 
> > BUG: unable to handle kernel NULL pointer dereference at           (null)
> > IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > PGD 0
> > Oops: 0002 [#1] SMP
> > Modules linked in: vhost_net
> > CPU 6
> > Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
> > RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
> > RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
> > RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
> > RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
> > R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
> > R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
> > FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
> > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
> > Stack:
> >  ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
> >  0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
> >  ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
> > Call Trace:
> >  [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
> >  [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
> >  [<ffffffff810ac949>] queue_work+0x19/0x20
> >  [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
> >  [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
> >  [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
> >  [<ffffffff810c9545>] ? update_curr+0xf5/0x180
> >  [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
> >  [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
> >  [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
> >  [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
> >  [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
> >  [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
> > Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
> > RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > RSP <ffff880221721cc8>
> > CR2: 0000000000000000
> > ---[ end trace 13fb1e4b6e5ab21f ]---
> > 
> > Signed-off-by: Asias He <asias@redhat.com>
> > ---
> >  virt/kvm/kvm_main.c | 11 ++++++-----
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 8fd325a..3c8a992 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -3078,13 +3078,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> >  	int r;
> >  	int cpu;
> >  
> > -	r = kvm_irqfd_init();
> > -	if (r)
> > -		goto out_irqfd;
> >  	r = kvm_arch_init(opaque);
> >  	if (r)
> >  		goto out_fail;
> >  
> > +	r = kvm_irqfd_init();
> > +	if (r)
> > +		goto out_irqfd;
> > +
> >  	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
> >  		r = -ENOMEM;
> >  		goto out_free_0;
> > @@ -3159,10 +3160,10 @@ out_free_1:
> >  out_free_0a:
> >  	free_cpumask_var(cpus_hardware_enabled);
> >  out_free_0:
> > -	kvm_arch_exit();
> > -out_fail:
> >  	kvm_irqfd_exit();
> >  out_irqfd:
> > +	kvm_arch_exit();
> > +out_fail:
> >  	return r;
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_init);
> > -- 
> > 1.8.1.4
Michael S. Tsirkin May 7, 2013, 3:11 p.m. UTC | #3
On Tue, May 07, 2013 at 11:05:12PM +0800, Asias He wrote:
> On Tue, May 07, 2013 at 05:59:38PM +0300, Michael S. Tsirkin wrote:
> > On Tue, May 07, 2013 at 10:54:16PM +0800, Asias He wrote:
> > > In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> > > kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> > > kvm_arch_init() will fail with -EEXIST,
> > 
> > Wow. Is this intentional?
> 
> I think it is. You can not be amd and intel at the same time ;-)
> 
> kvm_arch_init
> 
>     if (kvm_x86_ops) {
>             printk(KERN_ERR "kvm: already loaded the other module\n");
>             r = -EEXIST;
>             goto out;
>     }       
> 

Interesting. So we check it with
	if (kvm_x86_ops)
 and later we do
        kvm_x86_ops = ops;


This looks racy - or is something serializing
module loading?


> > > called on the error handling path. This way, the kvm_irqfd system will
> > > not be ready.
> > > 
> > > This patch fix the following:
> > > 
> > > BUG: unable to handle kernel NULL pointer dereference at           (null)
> > > IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > > PGD 0
> > > Oops: 0002 [#1] SMP
> > > Modules linked in: vhost_net
> > > CPU 6
> > > Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
> > > RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > > RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
> > > RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
> > > RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
> > > RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
> > > R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
> > > R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
> > > FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
> > > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > > CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
> > > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > > Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
> > > Stack:
> > >  ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
> > >  0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
> > >  ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
> > > Call Trace:
> > >  [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
> > >  [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
> > >  [<ffffffff810ac949>] queue_work+0x19/0x20
> > >  [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
> > >  [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
> > >  [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
> > >  [<ffffffff810c9545>] ? update_curr+0xf5/0x180
> > >  [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
> > >  [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
> > >  [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
> > >  [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
> > >  [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
> > >  [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
> > > Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
> > > RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > > RSP <ffff880221721cc8>
> > > CR2: 0000000000000000
> > > ---[ end trace 13fb1e4b6e5ab21f ]---
> > > 
> > > Signed-off-by: Asias He <asias@redhat.com>
> > > ---
> > >  virt/kvm/kvm_main.c | 11 ++++++-----
> > >  1 file changed, 6 insertions(+), 5 deletions(-)
> > > 
> > > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > > index 8fd325a..3c8a992 100644
> > > --- a/virt/kvm/kvm_main.c
> > > +++ b/virt/kvm/kvm_main.c
> > > @@ -3078,13 +3078,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> > >  	int r;
> > >  	int cpu;
> > >  
> > > -	r = kvm_irqfd_init();
> > > -	if (r)
> > > -		goto out_irqfd;
> > >  	r = kvm_arch_init(opaque);
> > >  	if (r)
> > >  		goto out_fail;
> > >  
> > > +	r = kvm_irqfd_init();
> > > +	if (r)
> > > +		goto out_irqfd;
> > > +
> > >  	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
> > >  		r = -ENOMEM;
> > >  		goto out_free_0;
> > > @@ -3159,10 +3160,10 @@ out_free_1:
> > >  out_free_0a:
> > >  	free_cpumask_var(cpus_hardware_enabled);
> > >  out_free_0:
> > > -	kvm_arch_exit();
> > > -out_fail:
> > >  	kvm_irqfd_exit();
> > >  out_irqfd:
> > > +	kvm_arch_exit();
> > > +out_fail:
> > >  	return r;
> > >  }
> > >  EXPORT_SYMBOL_GPL(kvm_init);
> > > -- 
> > > 1.8.1.4
> 
> -- 
> Asias
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov May 7, 2013, 3:16 p.m. UTC | #4
On Tue, May 07, 2013 at 06:11:33PM +0300, Michael S. Tsirkin wrote:
> On Tue, May 07, 2013 at 11:05:12PM +0800, Asias He wrote:
> > On Tue, May 07, 2013 at 05:59:38PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, May 07, 2013 at 10:54:16PM +0800, Asias He wrote:
> > > > In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> > > > kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> > > > kvm_arch_init() will fail with -EEXIST,
> > > 
> > > Wow. Is this intentional?
> > 
> > I think it is. You can not be amd and intel at the same time ;-)
> > 
> > kvm_arch_init
> > 
> >     if (kvm_x86_ops) {
> >             printk(KERN_ERR "kvm: already loaded the other module\n");
> >             r = -EEXIST;
> >             goto out;
> >     }       
> > 
> 
> Interesting. So we check it with
> 	if (kvm_x86_ops)
>  and later we do
>         kvm_x86_ops = ops;
> 
> 
> This looks racy - or is something serializing
> module loading?
> 
I think module loading is serialized.

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin May 7, 2013, 3:24 p.m. UTC | #5
On Tue, May 07, 2013 at 06:16:09PM +0300, Gleb Natapov wrote:
> On Tue, May 07, 2013 at 06:11:33PM +0300, Michael S. Tsirkin wrote:
> > On Tue, May 07, 2013 at 11:05:12PM +0800, Asias He wrote:
> > > On Tue, May 07, 2013 at 05:59:38PM +0300, Michael S. Tsirkin wrote:
> > > > On Tue, May 07, 2013 at 10:54:16PM +0800, Asias He wrote:
> > > > > In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> > > > > kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> > > > > kvm_arch_init() will fail with -EEXIST,
> > > > 
> > > > Wow. Is this intentional?
> > > 
> > > I think it is. You can not be amd and intel at the same time ;-)
> > > 
> > > kvm_arch_init
> > > 
> > >     if (kvm_x86_ops) {
> > >             printk(KERN_ERR "kvm: already loaded the other module\n");
> > >             r = -EEXIST;
> > >             goto out;
> > >     }       
> > > 
> > 
> > Interesting. So we check it with
> > 	if (kvm_x86_ops)
> >  and later we do
> >         kvm_x86_ops = ops;
> > 
> > 
> > This looks racy - or is something serializing
> > module loading?
> > 
> I think module loading is serialized.
>

Hmm then I don't understand ... both kvm-intel
and kvm-amd *can't* be loaded at the same time:
module loading fails for one of them.

 
> --
> 			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Gleb Natapov May 7, 2013, 3:29 p.m. UTC | #6
On Tue, May 07, 2013 at 06:24:48PM +0300, Michael S. Tsirkin wrote:
> On Tue, May 07, 2013 at 06:16:09PM +0300, Gleb Natapov wrote:
> > On Tue, May 07, 2013 at 06:11:33PM +0300, Michael S. Tsirkin wrote:
> > > On Tue, May 07, 2013 at 11:05:12PM +0800, Asias He wrote:
> > > > On Tue, May 07, 2013 at 05:59:38PM +0300, Michael S. Tsirkin wrote:
> > > > > On Tue, May 07, 2013 at 10:54:16PM +0800, Asias He wrote:
> > > > > > In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> > > > > > kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> > > > > > kvm_arch_init() will fail with -EEXIST,
> > > > > 
> > > > > Wow. Is this intentional?
> > > > 
> > > > I think it is. You can not be amd and intel at the same time ;-)
> > > > 
> > > > kvm_arch_init
> > > > 
> > > >     if (kvm_x86_ops) {
> > > >             printk(KERN_ERR "kvm: already loaded the other module\n");
> > > >             r = -EEXIST;
> > > >             goto out;
> > > >     }       
> > > > 
> > > 
> > > Interesting. So we check it with
> > > 	if (kvm_x86_ops)
> > >  and later we do
> > >         kvm_x86_ops = ops;
> > > 
> > > 
> > > This looks racy - or is something serializing
> > > module loading?
> > > 
> > I think module loading is serialized.
> >
> 
> Hmm then I don't understand ... both kvm-intel
> and kvm-amd *can't* be loaded at the same time:
> module loading fails for one of them.
> 
Why would it fail?

--
			Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Cornelia Huck May 7, 2013, 3:54 p.m. UTC | #7
On Tue,  7 May 2013 22:54:16 +0800
Asias He <asias@redhat.com> wrote:

> In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> kvm_arch_init() will fail with -EEXIST, then kvm_irqfd_exit() will be
> called on the error handling path. This way, the kvm_irqfd system will
> not be ready.

Since we can't have the initialization as kvm module init function
without forcing everyone to split modules, this is probably the best
way to handle this.

Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>

> 
> This patch fix the following:
> 
> BUG: unable to handle kernel NULL pointer dereference at           (null)
> IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> PGD 0
> Oops: 0002 [#1] SMP
> Modules linked in: vhost_net
> CPU 6
> Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
> RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
> RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
> RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
> RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
> R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
> R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
> FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
> DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
> Stack:
>  ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
>  0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
>  ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
> Call Trace:
>  [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
>  [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
>  [<ffffffff810ac949>] queue_work+0x19/0x20
>  [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
>  [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
>  [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
>  [<ffffffff810c9545>] ? update_curr+0xf5/0x180
>  [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
>  [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
>  [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
>  [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
>  [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
>  [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
> Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
> RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> RSP <ffff880221721cc8>
> CR2: 0000000000000000
> ---[ end trace 13fb1e4b6e5ab21f ]---
> 
> Signed-off-by: Asias He <asias@redhat.com>
> ---
>  virt/kvm/kvm_main.c | 11 ++++++-----
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 8fd325a..3c8a992 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -3078,13 +3078,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
>  	int r;
>  	int cpu;
> 
> -	r = kvm_irqfd_init();
> -	if (r)
> -		goto out_irqfd;
>  	r = kvm_arch_init(opaque);
>  	if (r)
>  		goto out_fail;
> 
> +	r = kvm_irqfd_init();
> +	if (r)
> +		goto out_irqfd;
> +
>  	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
>  		r = -ENOMEM;
>  		goto out_free_0;
> @@ -3159,10 +3160,10 @@ out_free_1:
>  out_free_0a:
>  	free_cpumask_var(cpus_hardware_enabled);
>  out_free_0:
> -	kvm_arch_exit();
> -out_fail:
>  	kvm_irqfd_exit();
>  out_irqfd:
> +	kvm_arch_exit();
> +out_fail:
>  	return r;
>  }
>  EXPORT_SYMBOL_GPL(kvm_init);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Michael S. Tsirkin May 7, 2013, 4:01 p.m. UTC | #8
On Tue, May 07, 2013 at 05:54:20PM +0200, Cornelia Huck wrote:
> On Tue,  7 May 2013 22:54:16 +0800
> Asias He <asias@redhat.com> wrote:
> 
> > In commit a0f155e96 'KVM: Initialize irqfd from kvm_init()', when
> > kvm_init() is called the second time (e.g kvm-amd.ko and kvm-intel.ko),
> > kvm_arch_init() will fail with -EEXIST, then kvm_irqfd_exit() will be
> > called on the error handling path. This way, the kvm_irqfd system will
> > not be ready.
> 
> Since we can't have the initialization as kvm module init function
> without forcing everyone to split modules, this is probably the best
> way to handle this.
> 
> Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>

Right. And please add a comment. Something like
/*
 * kvm_arch_init makes sure there's at most one caller
 * for architectures that support multiple implementations,
 * like intel and amd on x86.
 * Must be called first to avoid creating conflicts
 * in case kvm is already setup for another implementation.
*/

> > 
> > This patch fix the following:
> > 
> > BUG: unable to handle kernel NULL pointer dereference at           (null)
> > IP: [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > PGD 0
> > Oops: 0002 [#1] SMP
> > Modules linked in: vhost_net
> > CPU 6
> > Pid: 4257, comm: qemu-system-x86 Not tainted 3.9.0-rc3+ #757 Dell Inc. OptiPlex 790/0V5HMK
> > RIP: 0010:[<ffffffff81c0721e>]  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > RSP: 0018:ffff880221721cc8  EFLAGS: 00010046
> > RAX: 0000000000000100 RBX: ffff88022dcc003f RCX: ffff880221734950
> > RDX: ffff8802208f6ca8 RSI: 000000007fffffff RDI: 0000000000000000
> > RBP: ffff880221721cc8 R08: 0000000000000002 R09: 0000000000000002
> > R10: 00007f7fd01087e0 R11: 0000000000000246 R12: ffff8802208f6ca8
> > R13: 0000000000000080 R14: ffff880223e2a900 R15: 0000000000000000
> > FS:  00007f7fd38488e0(0000) GS:ffff88022dcc0000(0000) knlGS:0000000000000000
> > CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > CR2: 0000000000000000 CR3: 000000022309f000 CR4: 00000000000427e0
> > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
> > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
> > Process qemu-system-x86 (pid: 4257, threadinfo ffff880221720000, task ffff880222bd5640)
> > Stack:
> >  ffff880221721d08 ffffffff810ac5c5 ffff88022431dc00 0000000000000086
> >  0000000000000080 ffff880223e2a900 ffff8802208f6ca8 0000000000000000
> >  ffff880221721d48 ffffffff810ac8fe 0000000000000000 ffff880221734000
> > Call Trace:
> >  [<ffffffff810ac5c5>] __queue_work+0x45/0x2d0
> >  [<ffffffff810ac8fe>] queue_work_on+0x8e/0xa0
> >  [<ffffffff810ac949>] queue_work+0x19/0x20
> >  [<ffffffff81009b6b>] irqfd_deactivate+0x4b/0x60
> >  [<ffffffff8100a69d>] kvm_irqfd+0x39d/0x580
> >  [<ffffffff81007a27>] kvm_vm_ioctl+0x207/0x5b0
> >  [<ffffffff810c9545>] ? update_curr+0xf5/0x180
> >  [<ffffffff811b66e8>] do_vfs_ioctl+0x98/0x550
> >  [<ffffffff810c1f5e>] ? finish_task_switch+0x4e/0xe0
> >  [<ffffffff81c054aa>] ? __schedule+0x2ea/0x710
> >  [<ffffffff811b6bf7>] sys_ioctl+0x57/0x90
> >  [<ffffffff8140ae9e>] ? trace_hardirqs_on_thunk+0x3a/0x3c
> >  [<ffffffff81c0f602>] system_call_fastpath+0x16/0x1b
> > Code: c1 ea 08 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b c9 c3 55 48 89 e5 66 66 66 66 90 b8 00 01 00 00 <f0> 66 0f c1 07 89 c2 66 c1 ea 08 38 c2 74 0c 0f 1f 00 f3 90 0f
> > RIP  [<ffffffff81c0721e>] _raw_spin_lock+0xe/0x30
> > RSP <ffff880221721cc8>
> > CR2: 0000000000000000
> > ---[ end trace 13fb1e4b6e5ab21f ]---
> > 
> > Signed-off-by: Asias He <asias@redhat.com>
> > ---
> >  virt/kvm/kvm_main.c | 11 ++++++-----
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> > index 8fd325a..3c8a992 100644
> > --- a/virt/kvm/kvm_main.c
> > +++ b/virt/kvm/kvm_main.c
> > @@ -3078,13 +3078,14 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
> >  	int r;
> >  	int cpu;
> > 
> > -	r = kvm_irqfd_init();
> > -	if (r)
> > -		goto out_irqfd;
> >  	r = kvm_arch_init(opaque);
> >  	if (r)
> >  		goto out_fail;
> > 
> > +	r = kvm_irqfd_init();
> > +	if (r)
> > +		goto out_irqfd;
> > +
> >  	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
> >  		r = -ENOMEM;
> >  		goto out_free_0;
> > @@ -3159,10 +3160,10 @@ out_free_1:
> >  out_free_0a:
> >  	free_cpumask_var(cpus_hardware_enabled);
> >  out_free_0:
> > -	kvm_arch_exit();
> > -out_fail:
> >  	kvm_irqfd_exit();
> >  out_irqfd:
> > +	kvm_arch_exit();
> > +out_fail:
> >  	return r;
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_init);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8fd325a..3c8a992 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3078,13 +3078,14 @@  int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
 	int r;
 	int cpu;
 
-	r = kvm_irqfd_init();
-	if (r)
-		goto out_irqfd;
 	r = kvm_arch_init(opaque);
 	if (r)
 		goto out_fail;
 
+	r = kvm_irqfd_init();
+	if (r)
+		goto out_irqfd;
+
 	if (!zalloc_cpumask_var(&cpus_hardware_enabled, GFP_KERNEL)) {
 		r = -ENOMEM;
 		goto out_free_0;
@@ -3159,10 +3160,10 @@  out_free_1:
 out_free_0a:
 	free_cpumask_var(cpus_hardware_enabled);
 out_free_0:
-	kvm_arch_exit();
-out_fail:
 	kvm_irqfd_exit();
 out_irqfd:
+	kvm_arch_exit();
+out_fail:
 	return r;
 }
 EXPORT_SYMBOL_GPL(kvm_init);