@@ -741,6 +741,8 @@ void hvm_domain_relinquish_resources(struct domain *d)
ioreq_server_destroy_all(d);
+ stats_free_vcpu_mfn(d);
+
msixtbl_pt_cleanup(d);
/* Stop all asynchronous timer actions. */
@@ -1078,6 +1078,12 @@ unsigned int ioreq_server_max_frames(const struct domain *d)
return nr;
}
+unsigned int stats_table_max_frames(const struct domain *d)
+{
+ /* One frame per 512 vcpus. */
+ return 1;
+}
+
/*
* Return 0 on any kind of error. Caller converts to -EINVAL.
*
@@ -1099,6 +1105,9 @@ static unsigned int resource_max_frames(const struct domain *d,
case XENMEM_resource_vmtrace_buf:
return d->vmtrace_size >> PAGE_SHIFT;
+ case XENMEM_resource_stats_table:
+ return stats_table_max_frames(d);
+
default:
return -EOPNOTSUPP;
}
@@ -1162,6 +1171,88 @@ static int acquire_vmtrace_buf(
return nr_frames;
}
+void stats_free_vcpu_mfn(struct domain * d)
+{
+ struct page_info *pg = d->vcpustats_page.pg;
+
+ if ( !pg )
+ return;
+
+ d->vcpustats_page.pg = NULL;
+
+ if ( d->vcpustats_page.va )
+ unmap_domain_page_global(d->vcpustats_page.va);
+
+ d->vcpustats_page.va = NULL;
+
+ put_page_alloc_ref(pg);
+ put_page_and_type(pg);
+}
+
+static int stats_vcpu_alloc_mfn(struct domain *d)
+{
+ struct page_info *pg;
+
+ pg = alloc_domheap_page(d, MEMF_no_refcount);
+
+ if ( !pg )
+ return -ENOMEM;
+
+ if ( !get_page_and_type(pg, d, PGT_writable_page) ) {
+ put_page_alloc_ref(pg);
+ return -ENODATA;
+ }
+
+ d->vcpustats_page.va = __map_domain_page_global(pg);
+ if ( !d->vcpustats_page.va )
+ goto fail;
+
+ d->vcpustats_page.pg = pg;
+ clear_page(d->vcpustats_page.va);
+ return 1;
+
+fail:
+ put_page_alloc_ref(pg);
+ put_page_and_type(pg);
+
+ return -ENOMEM;
+}
+
+static int acquire_stats_table(struct domain *d,
+ unsigned int id,
+ unsigned int frame,
+ unsigned int nr_frames,
+ xen_pfn_t mfn_list[])
+{
+ mfn_t mfn;
+ int rc;
+ unsigned int i;
+
+ if ( !d )
+ return -ENOENT;
+
+ for ( i = 0; i < nr_frames; i++ )
+ {
+ switch ( i )
+ {
+ case XENMEM_resource_stats_frame_vcpustats:
+ if ( !d->vcpustats_page.pg ) {
+ rc = stats_vcpu_alloc_mfn(d);
+ if ( rc < 1 )
+ return rc;
+ }
+ mfn = page_to_mfn(d->vcpustats_page.pg);
+ mfn_list[i] = mfn_x(mfn);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+ }
+
+ return nr_frames;
+}
+
/*
* Returns -errno on error, or positive in the range [1, nr_frames] on
* success. Returning less than nr_frames contitutes a request for a
@@ -1182,6 +1273,9 @@ static int _acquire_resource(
case XENMEM_resource_vmtrace_buf:
return acquire_vmtrace_buf(d, id, frame, nr_frames, mfn_list);
+ case XENMEM_resource_stats_table:
+ return acquire_stats_table(d, id, frame, nr_frames, mfn_list);
+
default:
return -EOPNOTSUPP;
}
@@ -264,6 +264,8 @@ static inline void vcpu_runstate_change(
{
s_time_t delta;
struct sched_unit *unit = v->sched_unit;
+ shared_vcpustatspage_t * vcpustats_va;
+ struct domain *d = v->domain;
ASSERT(spin_is_locked(get_sched_res(v->processor)->schedule_lock));
if ( v->runstate.state == new_state )
@@ -287,6 +289,11 @@ static inline void vcpu_runstate_change(
}
v->runstate.state = new_state;
+
+ vcpustats_va = (shared_vcpustatspage_t*)d->vcpustats_page.va;
+ if ( vcpustats_va )
+ memcpy(&vcpustats_va->vcpu_info[v->vcpu_id].runstate_running_time,
+ &v->runstate.time[RUNSTATE_running], sizeof(v->runstate.time[RUNSTATE_running]));
}
void sched_guest_idle(void (*idle) (void), unsigned int cpu)
@@ -626,6 +626,7 @@ struct xen_mem_acquire_resource {
#define XENMEM_resource_ioreq_server 0
#define XENMEM_resource_grant_table 1
#define XENMEM_resource_vmtrace_buf 2
+#define XENMEM_resource_stats_table 3
/*
* IN - a type-specific resource identifier, which must be zero
@@ -683,6 +684,8 @@ struct xen_mem_acquire_resource {
typedef struct xen_mem_acquire_resource xen_mem_acquire_resource_t;
DEFINE_XEN_GUEST_HANDLE(xen_mem_acquire_resource_t);
+#define XENMEM_resource_stats_frame_vcpustats 0
+
/*
* XENMEM_get_vnumainfo used by guest to get
* vNUMA topology from hypervisor.
@@ -235,6 +235,16 @@ struct vcpu_register_time_memory_area {
typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);
+struct vcpu_stats{
+ uint64_t runstate_running_time;
+};
+
+struct shared_vcpustatspage {
+ struct vcpu_stats vcpu_info[1];
+};
+
+typedef struct shared_vcpustatspage shared_vcpustatspage_t;
+
#endif /* __XEN_PUBLIC_VCPU_H__ */
/*
@@ -134,6 +134,8 @@ int assign_pages(
/* Dump info to serial console */
void arch_dump_shared_mem_info(void);
+void stats_free_vcpu_mfn(struct domain * d);
+
/*
* Extra fault info types which are used to further describe
* the source of an access violation.
@@ -577,6 +577,11 @@ struct domain
struct ioreq_server *server[MAX_NR_IOREQ_SERVERS];
} ioreq_server;
#endif
+ /* Page that hosts vcpu stats */
+ struct {
+ struct page_info *pg;
+ void *va;
+ } vcpustats_page;
};
static inline struct page_list_head *page_to_list(
This commit proposes a new mechanism to query the RUNSTATE_running counter for a given vcpu from a dom0 userspace application. This commit proposes to expose that counter by using the acquire_resource interface. The current mechanism relies on the XEN_DOMCTL_getvcpuinfo and holds a single global domctl_lock for the entire hypercall; and iterate over every vcpu in the system for every update thus impacting operations that share that lock. This commit proposes to expose vcpu RUNSTATE_running via the xenforeignmemory interface thus preventing to issue the hypercall and holding the lock. For that purpose, a new resource type named stats_table is added. The first frame of this resource stores per-vcpu counters. The frame has one entry of type struct vcpu_stats per vcpu. The allocation of this frame only happens if the resource is requested. The frame is released after the domain is destroyed. Note that the updating of this counter is in a hot path, thus, in this commit, copying only happens if it is specifically required. Note that the exposed structure is extensible in two ways. First, the structure vcpu_stats can be extended with new per-vcpu counters while it fits in a frame. Second, new frames can be added in case new counters are required. Signed-off-by: Matias Ezequiel Vara Larsen <matias.vara@vates.fr> --- Changes in v1: - rework the allocation and releasing of the frames - use the zero frame for per-vcpu counters that are listed as an array - allocate vcpu stats frames only when the resource is requested - rewrite commit message - add the vcpu_stats structure to keep per-vcpu counters - add the shared_vcpustatspage to keep an array of per-vcpu counters for a given domain - declare the structures in a public header - define the vcpustats_page in the domain structure --- xen/arch/x86/hvm/hvm.c | 2 + xen/common/memory.c | 94 +++++++++++++++++++++++++++++++++++++ xen/common/sched/core.c | 7 +++ xen/include/public/memory.h | 3 ++ xen/include/public/vcpu.h | 10 ++++ xen/include/xen/mm.h | 2 + xen/include/xen/sched.h | 5 ++ 7 files changed, 123 insertions(+)