From patchwork Fri Nov 16 02:07:55 2012 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Marcelo Tosatti X-Patchwork-Id: 1752561 Return-Path: X-Original-To: patchwork-kvm@patchwork.kernel.org Delivered-To: patchwork-process-083081@patchwork1.kernel.org Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by patchwork1.kernel.org (Postfix) with ESMTP id 0FFE34005F for ; Fri, 16 Nov 2012 02:09:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1751224Ab2KPCJQ (ORCPT ); Thu, 15 Nov 2012 21:09:16 -0500 Received: from mx1.redhat.com ([209.132.183.28]:41922 "EHLO mx1.redhat.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751169Ab2KPCJL (ORCPT ); Thu, 15 Nov 2012 21:09:11 -0500 Received: from int-mx01.intmail.prod.int.phx2.redhat.com (int-mx01.intmail.prod.int.phx2.redhat.com [10.5.11.11]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id qAG28lcX032173 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Thu, 15 Nov 2012 21:08:47 -0500 Received: from amt.cnet (vpn1-5-40.gru2.redhat.com [10.97.5.40]) by int-mx01.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id qAG28ich015309; Thu, 15 Nov 2012 21:08:45 -0500 Received: from amt.cnet (amt.cnet [127.0.0.1]) by amt.cnet (Postfix) with ESMTP id 6D94868A09C; Fri, 16 Nov 2012 00:07:56 -0200 (BRST) Received: (from marcelo@localhost) by amt.cnet (8.14.5/8.14.5/Submit) id qAG27u36018652; Fri, 16 Nov 2012 00:07:56 -0200 Date: Fri, 16 Nov 2012 00:07:55 -0200 From: Marcelo Tosatti To: Glauber Costa Cc: kvm@vger.kernel.org, johnstul@us.ibm.com, jeremy@goop.org, zamsden@gmail.com, gleb@redhat.com, avi@redhat.com, pbonzini@redhat.com Subject: [patch 02/18] x86: kvmclock: allocate pvclock shared memory area (v2) Message-ID: <20121116020755.GB16948@amt.cnet> References: <20121115000823.285102321@redhat.com> <20121115000944.034452554@redhat.com> <50A520F3.8060105@parallels.com> MIME-Version: 1.0 Content-Disposition: inline In-Reply-To: <50A520F3.8060105@parallels.com> User-Agent: Mutt/1.5.21 (2010-09-15) X-Scanned-By: MIMEDefang 2.67 on 10.5.11.11 Sender: kvm-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: kvm@vger.kernel.org We want to expose the pvclock shared memory areas, which the hypervisor periodically updates, to userspace. For a linear mapping from userspace, it is necessary that entire page sized regions are used for array of pvclock structures. There is no such guarantee with per cpu areas, therefore move to memblock_alloc based allocation. Signed-off-by: Marcelo Tosatti Acked-by: Glauber Costa --- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Index: vsyscall/arch/x86/kernel/kvmclock.c =================================================================== --- vsyscall.orig/arch/x86/kernel/kvmclock.c +++ vsyscall/arch/x86/kernel/kvmclock.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,11 @@ static int parse_no_kvmclock(char *arg) early_param("no-kvmclock", parse_no_kvmclock); /* The hypervisor will put information about time periodically here */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct pvclock_vcpu_time_info, hv_clock); +struct pvclock_aligned_vcpu_time_info { + struct pvclock_vcpu_time_info clock; +} __attribute__((__aligned__(SMP_CACHE_BYTES))); + +static struct pvclock_aligned_vcpu_time_info *hv_clock; static struct pvclock_wall_clock wall_clock; /* @@ -52,15 +57,20 @@ static unsigned long kvm_get_wallclock(v struct pvclock_vcpu_time_info *vcpu_time; struct timespec ts; int low, high; + int cpu; low = (int)__pa_symbol(&wall_clock); high = ((u64)__pa_symbol(&wall_clock) >> 32); native_write_msr(msr_kvm_wall_clock, low, high); - vcpu_time = &get_cpu_var(hv_clock); + preempt_disable(); + cpu = smp_processor_id(); + + vcpu_time = &hv_clock[cpu].clock; pvclock_read_wallclock(&wall_clock, vcpu_time, &ts); - put_cpu_var(hv_clock); + + preempt_enable(); return ts.tv_sec; } @@ -74,9 +84,11 @@ static cycle_t kvm_clock_read(void) { struct pvclock_vcpu_time_info *src; cycle_t ret; + int cpu; preempt_disable_notrace(); - src = &__get_cpu_var(hv_clock); + cpu = smp_processor_id(); + src = &hv_clock[cpu].clock; ret = pvclock_clocksource_read(src); preempt_enable_notrace(); return ret; @@ -99,8 +111,15 @@ static cycle_t kvm_clock_get_cycles(stru static unsigned long kvm_get_tsc_khz(void) { struct pvclock_vcpu_time_info *src; - src = &per_cpu(hv_clock, 0); - return pvclock_tsc_khz(src); + int cpu; + unsigned long tsc_khz; + + preempt_disable(); + cpu = smp_processor_id(); + src = &hv_clock[cpu].clock; + tsc_khz = pvclock_tsc_khz(src); + preempt_enable(); + return tsc_khz; } static void kvm_get_preset_lpj(void) @@ -119,10 +138,14 @@ bool kvm_check_and_clear_guest_paused(vo { bool ret = false; struct pvclock_vcpu_time_info *src; + int cpu = smp_processor_id(); - src = &__get_cpu_var(hv_clock); + if (!hv_clock) + return ret; + + src = &hv_clock[cpu].clock; if ((src->flags & PVCLOCK_GUEST_STOPPED) != 0) { - __this_cpu_and(hv_clock.flags, ~PVCLOCK_GUEST_STOPPED); + src->flags &= ~PVCLOCK_GUEST_STOPPED; ret = true; } @@ -141,9 +164,10 @@ int kvm_register_clock(char *txt) { int cpu = smp_processor_id(); int low, high, ret; + struct pvclock_vcpu_time_info *src = &hv_clock[cpu].clock; - low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; - high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); + low = (int)__pa(src) | 1; + high = ((u64)__pa(src) >> 32); ret = native_write_msr_safe(msr_kvm_system_time, low, high); printk(KERN_INFO "kvm-clock: cpu %d, msr %x:%x, %s\n", cpu, high, low, txt); @@ -197,6 +221,8 @@ static void kvm_shutdown(void) void __init kvmclock_init(void) { + unsigned long mem; + if (!kvm_para_available()) return; @@ -209,8 +235,18 @@ void __init kvmclock_init(void) printk(KERN_INFO "kvm-clock: Using msrs %x and %x", msr_kvm_system_time, msr_kvm_wall_clock); - if (kvm_register_clock("boot clock")) + mem = memblock_alloc(sizeof(struct pvclock_aligned_vcpu_time_info) * NR_CPUS, + PAGE_SIZE); + if (!mem) return; + hv_clock = __va(mem); + + if (kvm_register_clock("boot clock")) { + hv_clock = NULL; + memblock_free(mem, + sizeof(struct pvclock_aligned_vcpu_time_info)*NR_CPUS); + return; + } pv_time_ops.sched_clock = kvm_clock_read; x86_platform.calibrate_tsc = kvm_get_tsc_khz; x86_platform.get_wallclock = kvm_get_wallclock;