diff mbox series

[i-g-t] tests/xe/pmu: Add pmu tests

Message ID 20241030205232.1511031-1-vinay.belgaumkar@intel.com (mailing list archive)
State New, archived
Headers show
Series [i-g-t] tests/xe/pmu: Add pmu tests | expand

Commit Message

Vinay Belgaumkar Oct. 30, 2024, 8:52 p.m. UTC
Simple tests for validating the PMU implementation for GT C6
residencies and frequency.

These tests validate the kernel series which is currently in review
here - https://patchwork.freedesktop.org/series/139121/

Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
---
 lib/igt_perf.c       |  18 ++
 lib/igt_perf.h       |   2 +
 tests/intel/xe_pmu.c | 412 +++++++++++++++++++++++++++++++++++++++++++
 tests/meson.build    |   1 +
 4 files changed, 433 insertions(+)
 create mode 100644 tests/intel/xe_pmu.c

Comments

Riana Tauro Nov. 4, 2024, 6:34 a.m. UTC | #1
Hi Vinay

On 10/31/2024 2:22 AM, Vinay Belgaumkar wrote:
> Simple tests for validating the PMU implementation for GT C6
> residencies and frequency.
> 
> These tests validate the kernel series which is currently in review
> here - https://patchwork.freedesktop.org/series/139121/
> 
> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
> Signed-off-by: Vinay Belgaumkar <vinay.belgaumkar@intel.com>
> ---
>   lib/igt_perf.c       |  18 ++
>   lib/igt_perf.h       |   2 +
>   tests/intel/xe_pmu.c | 412 +++++++++++++++++++++++++++++++++++++++++++
>   tests/meson.build    |   1 +
>   4 files changed, 433 insertions(+)
>   create mode 100644 tests/intel/xe_pmu.c
> 
> diff --git a/lib/igt_perf.c b/lib/igt_perf.c
> index 3866c6d77..88ea66ffc 100644
> --- a/lib/igt_perf.c
> +++ b/lib/igt_perf.c
> @@ -129,6 +129,18 @@ uint64_t igt_perf_type_id(const char *device)
>   	return strtoull(buf, NULL, 0);
>   }
>   
> +int igt_xe_perf_events_dir(int xe)
> +{
> +	char buf[80];
> +	char path[PATH_MAX];
> +
> +	memset(buf, 0, sizeof(buf));
> +
> +	xe_perf_device(xe, buf, sizeof(buf));
> +	snprintf(path, sizeof(path), "/sys/bus/event_source/devices/%s/events", buf);
> +	return open(path, O_RDONLY);
> +}
> +
existing lib function igt_perf_events_dir can be modified to pass buf as 
parameter.
The function will be common for both i915 and xe
>   int igt_perf_events_dir(int i915)
>   {
>   	char buf[80];
> @@ -183,6 +195,12 @@ int perf_xe_open(int xe, uint64_t config)
>   			PERF_FORMAT_TOTAL_TIME_ENABLED);
>   }
>   
> +int perf_xe_open_group(int xe, uint64_t config, int group)
> +{
> +	return _perf_open(xe_perf_type_id(xe), config, group,
> +			  PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
> +}

A new function is not needed

Can use  igt_perf_open_group(xe_perf_type_id(xe), config, group) directly
> +
>   int perf_i915_open(int i915, uint64_t config)
>   {
>   	return _perf_open(i915_perf_type_id(i915), config, -1,
> diff --git a/lib/igt_perf.h b/lib/igt_perf.h
> index 3d9ba2917..8aff78d0e 100644
> --- a/lib/igt_perf.h
> +++ b/lib/igt_perf.h
> @@ -55,6 +55,7 @@ perf_event_open(struct perf_event_attr *attr,
>   
>   uint64_t igt_perf_type_id(const char *device);
>   int igt_perf_events_dir(int i915);
> +int igt_xe_perf_events_dir(int xe);
>   int igt_perf_open(uint64_t type, uint64_t config);
>   int igt_perf_open_group(uint64_t type, uint64_t config, int group);
>   
> @@ -71,5 +72,6 @@ int perf_i915_open(int i915, uint64_t config);
>   int perf_i915_open_group(int i915, uint64_t config, int group);
>   
>   int perf_xe_open(int xe, uint64_t config);
> +int perf_xe_open_group(int xe, uint64_t config, int group);
>   
>   #endif /* I915_PERF_H */
> diff --git a/tests/intel/xe_pmu.c b/tests/intel/xe_pmu.c
> new file mode 100644
> index 000000000..f5ef24757
> --- /dev/null
> +++ b/tests/intel/xe_pmu.c
> @@ -0,0 +1,412 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2024 Intel Corporation
> + */
> +
> +/**
> + * TEST: Test Xe PMU functionality
> + * Category: Perf Monitoring Unit
> + * Mega feature: Perf Monitoring Unit
> + * Sub-category: Power Management
> + * Functionality: Power/Perf
> + * Test category: Functional tests
> + */
> +
> +#include <fcntl.h>
> +#include <limits.h>
> +#include <time.h>
> +#include <errno.h>
> +#include <dirent.h>
> +#include <string.h>
> +#include <sys/time.h>
alphabetical
> +
> +#include "igt.h"
> +#include "igt_device.h"
> +#include "igt_power.h"
> +#include "igt_sysfs.h"
> +#include "igt_perf.h"
> +
> +#include "lib/igt_syncobj.h"
> +#include "xe/xe_ioctl.h"
> +#include "xe/xe_query.h"
> +#include "xe/xe_spin.h"
> +#include "xe/xe_util.h"
> +
> +#define SLEEP_DURATION 2 /* in seconds */
> +const double tolerance = 0.1;
> +const unsigned long batch_duration_ns = 500e6;
> +const char *no_debug_data = "\0";
> +
> +#define __assert_within_epsilon(x, ref, tol_up, tol_down, debug_data) \
> +	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
> +		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
> +		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n%s\n",\
> +		     #x, #ref, (double)(x), \
> +		     (tol_up) * 100.0, (tol_down) * 100.0, \
> +		     (double)(ref), debug_data)
debug_data is not being used in this file. We can have the assert 
without this parameter
> +
> +#define assert_within_epsilon(x, ref, tolerance) \
> +	__assert_within_epsilon(x, ref, tolerance, tolerance, no_debug_data)
> +
> +#define assert_within_epsilon_debug(x, ref, tolerance, debug_data) \
> +	__assert_within_epsilon(x, ref, tolerance, tolerance, debug_data)
> +
> +struct workload {
> +	struct drm_xe_sync sync[2];
> +	struct drm_xe_exec exec;
> +	uint64_t addr;
> +	struct xe_spin_opts spin_opts;
> +	struct xe_spin *spin;
> +	uint32_t exec_queue;
> +	uint32_t syncobj;
> +	size_t bo_size;
> +	uint32_t bo;
> +	uint32_t vm;
> +};
> +
> +static int open_pmu(int xe, uint64_t config)
> +{
> +	int fd;
> +
> +	fd = perf_xe_open(xe, config);
> +	igt_skip_on(fd < 0 && errno == ENODEV);
> +	igt_assert(fd >= 0);
> +
> +	return fd;
> +}
> +
> +static int open_group(int xe, uint64_t config, int group)
> +{
> +	int fd;
> +
> +	fd = perf_xe_open_group(xe, config, group);
> +	igt_skip_on(fd < 0 && errno == ENODEV);
> +	igt_assert(fd >= 0);
> +
> +	return fd;
> +}
> +
> +static uint64_t __pmu_read_single(int fd, uint64_t *ts)
> +{
> +	uint64_t data[2];
> +
> +	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
> +	if (ts)
> +		*ts = data[1];
> +
> +	return data[0];
> +}
> +
> +static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
> +{
> +	uint64_t buf[2 + num];
> +	unsigned int i;
> +
> +	igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
> +
> +	for (i = 0; i < num; i++)
> +		val[i] = buf[2 + i];
> +
> +	return buf[1];
> +}
> +
> +static unsigned long read_pmu_config(int fd, char *pmu_str)
> +{
> +	int dir_fd;
> +	int ret;
> +	unsigned long config;
> +	char config_str[128];
> +
> +	dir_fd = igt_xe_perf_events_dir(fd);
> +	igt_assert(dir_fd >= 0);
> +	igt_assert_eq(igt_sysfs_scanf(dir_fd, pmu_str, "%127s", config_str), 1);
> +	ret = sscanf(config_str, "config=0x%lx", &config);
> +	igt_assert(ret == 1);
> +
> +	close(dir_fd);
> +
> +	return config;
> +}
> +
> +/**
> + * SUBTEST: c6
> + * Description: Basic residency test to validate idle residency
> + *		measured over a time interval is within the tolerance
> + *
> + * SUBTEST: frequency
> + * Description: Read requested freq and actual frequency via PMU within
> + *		specified time interval while workload runs
> + */
The description should be above the test functions or start of file
> +static unsigned int measured_usleep(unsigned int usec)
> +{
> +	struct timespec ts = { };
> +	unsigned int slept;
> +
> +	slept = igt_nsec_elapsed(&ts);
> +	igt_assert(slept == 0);
> +	do {
> +		usleep(usec - slept);
> +		slept = igt_nsec_elapsed(&ts) / 1000;
> +	} while (slept < usec);
> +
> +	return igt_nsec_elapsed(&ts) / 1000;
> +}
> +
> +static unsigned long read_idle_residency(int fd, int gt)
> +{
> +	unsigned long residency = 0;
> +	int gt_fd;
> +
> +	gt_fd = xe_sysfs_gt_open(fd, gt);
> +	igt_assert(gt_fd >= 0);
> +	igt_assert(igt_sysfs_scanf(gt_fd, "gtidle/idle_residency_ms", "%lu", &residency) == 1);
> +	close(gt_fd);
> +
> +	return residency;
> +}
> +
> +static void test_rc6(int xe, unsigned int gt)
Should be test_c6/ test_gt_c6 as Xe uses a generic name
> +{
> +	int pmu_fd;
> +	int pmu_config;
> +	char event_str[100];
> +	uint64_t ts[2];
> +	unsigned long slept, start, end;
> +	uint64_t val;
Inverted xmas tree pattern?
> +
> +	sprintf(event_str, "rc6-residency-gt%d", gt);
Should be c6-residency/gt-c6-residnecy as Xe uses a generic name

Thanks,
Riana
> +	pmu_config = read_pmu_config(xe, event_str);
> +	pmu_fd = open_pmu(xe, pmu_config);
> +
> +	igt_assert_f(igt_wait(xe_is_gt_in_c6(xe, gt), 3000, 1), "GT %d not in C6\n", gt);
> +
> +	/* While idle check full RC6. */
> +	start = read_idle_residency(xe, gt);
> +	val = __pmu_read_single(pmu_fd, &ts[0]);
> +	slept = measured_usleep(SLEEP_DURATION * USEC_PER_SEC) / 1000;
> +	end = read_idle_residency(xe, gt);
> +	val = __pmu_read_single(pmu_fd, &ts[1]) - val;
> +
> +	igt_debug("gt%u: slept=%lu, perf=%"PRIu64"\n",
> +		  gt, slept,  val);
> +
> +	igt_debug("Start res: %lu, end_res: %lu", start, end);
> +
> +	assert_within_epsilon(val,
> +			      (ts[1] - ts[0])/1000000,
> +			      tolerance);
> +	close(pmu_fd);
> +}
> +
> +static int set_freq(int fd, int gt_id, const char *freq_name, uint32_t freq)
> +{
> +	int ret = -EAGAIN;
> +	char freq_attr[22];
> +	int gt_fd;
> +
> +	snprintf(freq_attr, sizeof(freq_attr), "freq0/%s_freq", freq_name);
> +	gt_fd = xe_sysfs_gt_open(fd, gt_id);
> +	igt_assert(gt_fd >= 0);
> +
> +	while (ret == -EAGAIN)
> +		ret = igt_sysfs_printf(gt_fd, freq_attr, "%u", freq);
> +
> +	close(gt_fd);
> +	return ret;
> +}
> +
> +static uint32_t get_freq(int fd, int gt_id, const char *freq_name)
> +{
> +	uint32_t freq;
> +	int err = -EAGAIN;
> +	char freq_attr[22];
> +	int gt_fd;
> +
> +	snprintf(freq_attr, sizeof(freq_attr), "freq0/%s_freq", freq_name);
> +	gt_fd = xe_sysfs_gt_open(fd, gt_id);
> +	igt_assert(gt_fd >= 0);
> +
> +	while (err == -EAGAIN)
> +		err = igt_sysfs_scanf(gt_fd, freq_attr, "%u", &freq);
> +
> +	igt_debug("gt%d: %s freq %u\n", gt_id, freq_name, freq);
> +
> +	close(gt_fd);
> +	return freq;
> +}
> +
> +static void run_workload(int fd, int gt, struct drm_xe_engine_class_instance *eci,
> +			 struct workload *wl)
> +{
> +	struct drm_xe_sync sync[2] = {
> +		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
> +		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
> +	};
> +	struct drm_xe_exec exec = {
> +		.num_batch_buffer = 1,
> +		.num_syncs = 2,
> +		.syncs = to_user_pointer(sync),
> +	};
> +	struct xe_spin_opts spin_opts = {
> +		.addr = 0x1a0000,
> +		.preempt = false
> +	};
> +	struct xe_spin *spin;
> +
> +	wl->addr = 0x1a0000;
> +
> +	wl->vm = xe_vm_create(fd, 0, 0);
> +	wl->bo_size = sizeof(*spin);
> +	wl->bo_size = xe_bb_size(fd, wl->bo_size);
> +
> +	wl->bo = xe_bo_create(fd, wl->vm, wl->bo_size,
> +			  vram_if_possible(fd, eci->gt_id), 0);
> +	wl->spin = xe_bo_map(fd, wl->bo, wl->bo_size);
> +
> +	wl->exec_queue = xe_exec_queue_create(fd, wl->vm, eci, 0);
> +	wl->syncobj = syncobj_create(fd, 0);
> +
> +	sync[0].handle = syncobj_create(fd, 0);
> +	xe_vm_bind_async(fd, wl->vm, 0, wl->bo, 0, wl->addr, wl->bo_size, sync, 1);
> +
> +	xe_spin_init(wl->spin, &spin_opts);
> +
> +	sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
> +	sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
> +	sync[1].handle = wl->syncobj;
> +
> +	exec.exec_queue_id = wl->exec_queue;
> +	exec.address = wl->addr;
> +	xe_exec(fd, &exec);
> +
> +	xe_spin_wait_started(wl->spin);
> +	usleep(50000);
> +	igt_assert(!syncobj_wait(fd, &wl->syncobj, 1, 1, 0, NULL));
> +
> +	igt_info("Running on GT %d Engine %s:%d\n", eci->gt_id,
> +		 xe_engine_class_string(eci->engine_class), eci->engine_instance);
> +
> +	/* Save it for the end_workload function */
> +	wl->sync[0] = sync[0];
> +	wl->sync[1] = sync[1];
> +}
> +
> +static void end_workload(int fd, struct workload *wl)
> +{
> +	xe_spin_end(wl->spin);
> +
> +	igt_assert(syncobj_wait(fd, &wl->syncobj, 1, INT64_MAX, 0, NULL));
> +	igt_assert(syncobj_wait(fd, &wl->sync[0].handle, 1, INT64_MAX, 0, NULL));
> +
> +	wl->sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
> +	xe_vm_unbind_async(fd, wl->vm, 0, 0, wl->addr, wl->bo_size, wl->sync, 1);
> +	igt_assert(syncobj_wait(fd, &wl->sync[0].handle, 1, INT64_MAX, 0, NULL));
> +
> +	syncobj_destroy(fd, wl->sync[0].handle);
> +	syncobj_destroy(fd, wl->syncobj);
> +	xe_exec_queue_destroy(fd, wl->exec_queue);
> +
> +	munmap(wl->spin, wl->bo_size);
> +	gem_close(fd, wl->bo);
> +	xe_vm_destroy(fd, wl->vm);
> +}
> +
> +static void test_frequency(int fd, int gt, struct drm_xe_engine_class_instance *eci)
> +{
> +	struct workload wl;
> +
> +	uint64_t val[2], start[2], slept;
> +	double min[2], max[2];
> +	int pmu_fd[2];
> +	uint32_t orig_min = get_freq(fd, gt, "min");
> +	uint32_t orig_max = get_freq(fd, gt, "max");
> +	unsigned long config_rq_freq, config_act_freq;
> +	char event_str[100];
> +
> +
> +	sprintf(event_str, "requested-frequency-gt%d", gt);
> +	config_rq_freq = read_pmu_config(fd, event_str);
> +	pmu_fd[0] = open_group(fd, config_rq_freq, -1);
> +
> +	memset(event_str, 0, 100);
> +	sprintf(event_str, "actual-frequency-gt%d", gt);
> +	config_act_freq = read_pmu_config(fd, event_str);
> +	pmu_fd[1] = open_group(fd, config_act_freq, pmu_fd[0]);
> +
> +	run_workload(fd, gt, eci, &wl);
> +	/*
> +	 * Set GPU to min frequency and read PMU counters.
> +	 */
> +	igt_assert(set_freq(fd, gt, "max", orig_min) > 0);
> +	igt_assert(get_freq(fd, gt, "max") == orig_min);
> +
> +	slept = pmu_read_multi(pmu_fd[0], 2, start);
> +	measured_usleep(batch_duration_ns / 1000);
> +	slept = pmu_read_multi(pmu_fd[0], 2, val) - slept;
> +
> +	min[0] = 1e9*(val[0] - start[0]) / slept;
> +	min[1] = 1e9*(val[1] - start[1]) / slept;
> +
> +	/*
> +	 * Set GPU to max frequency and read PMU counters.
> +	 */
> +	igt_assert(set_freq(fd, gt, "max", orig_max) > 0);
> +	igt_assert(get_freq(fd, gt, "max") == orig_max);
> +	igt_assert(set_freq(fd, gt, "min", orig_max) > 0);
> +	igt_assert(get_freq(fd, gt, "min") == orig_max);
> +
> +	slept = pmu_read_multi(pmu_fd[0], 2, start);
> +	measured_usleep(batch_duration_ns / 1000);
> +	slept = pmu_read_multi(pmu_fd[0], 2, val) - slept;
> +
> +	max[0] = 1e9*(val[0] - start[0]) / slept;
> +	max[1] = 1e9*(val[1] - start[1]) / slept;
> +
> +	/*
> +	 * Restore min/max.
> +	 */
> +	igt_assert(set_freq(fd, gt, "min", orig_min) > 0);
> +	igt_assert(get_freq(fd, gt, "min") == orig_min);
> +
> +	igt_info("Minimum frequency: requested %.1f, actual %.1f\n",
> +		 min[0], min[1]);
> +	igt_info("Maximum frequency: requested %.1f, actual %.1f\n",
> +		 max[0], max[1]);
> +
> +	close(pmu_fd[0]);
> +	close(pmu_fd[1]);
> +
> +	end_workload(fd, &wl);
> +
> +	assert_within_epsilon(min[0], orig_min, tolerance);
> +	/*
> +	 * On thermally throttled devices we cannot be sure maximum frequency
> +	 * can be reached so use larger tolerance downards.
> +	 */
> +	__assert_within_epsilon(max[0], orig_max, tolerance, 0.15f, no_debug_data);
> +}
> +
> +igt_main
> +{
> +	int fd, gt;
> +	struct drm_xe_engine_class_instance *hwe;
> +
> +	igt_fixture {
> +		fd = drm_open_driver(DRIVER_XE);
> +		igt_require(!IS_PONTEVECCHIO(xe_dev_id(fd)));
> +	}
> +
> +	igt_describe("Validate PMU C6 residency counters");
> +	igt_subtest("c6")
> +		xe_for_each_gt(fd, gt)
> +			test_rc6(fd, gt);
> +
> +	igt_describe("Validate PMU GT freq measured over a time interval is within the tolerance");
> +	igt_subtest("frequency")
> +		xe_for_each_engine(fd, hwe)
> +			test_frequency(fd, hwe->gt_id, hwe);
> +
> +	igt_fixture {
> +		close(fd);
> +	}
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 34b87b125..dc84ef748 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -308,6 +308,7 @@ intel_xe_progs = [
>   	'xe_pat',
>   	'xe_peer2peer',
>   	'xe_pm',
> +	'xe_pmu',
>   	'xe_pm_residency',
>   	'xe_prime_self_import',
>   	'xe_query',
diff mbox series

Patch

diff --git a/lib/igt_perf.c b/lib/igt_perf.c
index 3866c6d77..88ea66ffc 100644
--- a/lib/igt_perf.c
+++ b/lib/igt_perf.c
@@ -129,6 +129,18 @@  uint64_t igt_perf_type_id(const char *device)
 	return strtoull(buf, NULL, 0);
 }
 
+int igt_xe_perf_events_dir(int xe)
+{
+	char buf[80];
+	char path[PATH_MAX];
+
+	memset(buf, 0, sizeof(buf));
+
+	xe_perf_device(xe, buf, sizeof(buf));
+	snprintf(path, sizeof(path), "/sys/bus/event_source/devices/%s/events", buf);
+	return open(path, O_RDONLY);
+}
+
 int igt_perf_events_dir(int i915)
 {
 	char buf[80];
@@ -183,6 +195,12 @@  int perf_xe_open(int xe, uint64_t config)
 			PERF_FORMAT_TOTAL_TIME_ENABLED);
 }
 
+int perf_xe_open_group(int xe, uint64_t config, int group)
+{
+	return _perf_open(xe_perf_type_id(xe), config, group,
+			  PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_GROUP);
+}
+
 int perf_i915_open(int i915, uint64_t config)
 {
 	return _perf_open(i915_perf_type_id(i915), config, -1,
diff --git a/lib/igt_perf.h b/lib/igt_perf.h
index 3d9ba2917..8aff78d0e 100644
--- a/lib/igt_perf.h
+++ b/lib/igt_perf.h
@@ -55,6 +55,7 @@  perf_event_open(struct perf_event_attr *attr,
 
 uint64_t igt_perf_type_id(const char *device);
 int igt_perf_events_dir(int i915);
+int igt_xe_perf_events_dir(int xe);
 int igt_perf_open(uint64_t type, uint64_t config);
 int igt_perf_open_group(uint64_t type, uint64_t config, int group);
 
@@ -71,5 +72,6 @@  int perf_i915_open(int i915, uint64_t config);
 int perf_i915_open_group(int i915, uint64_t config, int group);
 
 int perf_xe_open(int xe, uint64_t config);
+int perf_xe_open_group(int xe, uint64_t config, int group);
 
 #endif /* I915_PERF_H */
diff --git a/tests/intel/xe_pmu.c b/tests/intel/xe_pmu.c
new file mode 100644
index 000000000..f5ef24757
--- /dev/null
+++ b/tests/intel/xe_pmu.c
@@ -0,0 +1,412 @@ 
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+
+/**
+ * TEST: Test Xe PMU functionality
+ * Category: Perf Monitoring Unit
+ * Mega feature: Perf Monitoring Unit
+ * Sub-category: Power Management
+ * Functionality: Power/Perf
+ * Test category: Functional tests
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <time.h>
+#include <errno.h>
+#include <dirent.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "igt.h"
+#include "igt_device.h"
+#include "igt_power.h"
+#include "igt_sysfs.h"
+#include "igt_perf.h"
+
+#include "lib/igt_syncobj.h"
+#include "xe/xe_ioctl.h"
+#include "xe/xe_query.h"
+#include "xe/xe_spin.h"
+#include "xe/xe_util.h"
+
+#define SLEEP_DURATION 2 /* in seconds */
+const double tolerance = 0.1;
+const unsigned long batch_duration_ns = 500e6;
+const char *no_debug_data = "\0";
+
+#define __assert_within_epsilon(x, ref, tol_up, tol_down, debug_data) \
+	igt_assert_f((double)(x) <= (1.0 + (tol_up)) * (double)(ref) && \
+		     (double)(x) >= (1.0 - (tol_down)) * (double)(ref), \
+		     "'%s' != '%s' (%f not within +%.1f%%/-%.1f%% tolerance of %f)\n%s\n",\
+		     #x, #ref, (double)(x), \
+		     (tol_up) * 100.0, (tol_down) * 100.0, \
+		     (double)(ref), debug_data)
+
+#define assert_within_epsilon(x, ref, tolerance) \
+	__assert_within_epsilon(x, ref, tolerance, tolerance, no_debug_data)
+
+#define assert_within_epsilon_debug(x, ref, tolerance, debug_data) \
+	__assert_within_epsilon(x, ref, tolerance, tolerance, debug_data)
+
+struct workload {
+	struct drm_xe_sync sync[2];
+	struct drm_xe_exec exec;
+	uint64_t addr;
+	struct xe_spin_opts spin_opts;
+	struct xe_spin *spin;
+	uint32_t exec_queue;
+	uint32_t syncobj;
+	size_t bo_size;
+	uint32_t bo;
+	uint32_t vm;
+};
+
+static int open_pmu(int xe, uint64_t config)
+{
+	int fd;
+
+	fd = perf_xe_open(xe, config);
+	igt_skip_on(fd < 0 && errno == ENODEV);
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static int open_group(int xe, uint64_t config, int group)
+{
+	int fd;
+
+	fd = perf_xe_open_group(xe, config, group);
+	igt_skip_on(fd < 0 && errno == ENODEV);
+	igt_assert(fd >= 0);
+
+	return fd;
+}
+
+static uint64_t __pmu_read_single(int fd, uint64_t *ts)
+{
+	uint64_t data[2];
+
+	igt_assert_eq(read(fd, data, sizeof(data)), sizeof(data));
+	if (ts)
+		*ts = data[1];
+
+	return data[0];
+}
+
+static uint64_t pmu_read_multi(int fd, unsigned int num, uint64_t *val)
+{
+	uint64_t buf[2 + num];
+	unsigned int i;
+
+	igt_assert_eq(read(fd, buf, sizeof(buf)), sizeof(buf));
+
+	for (i = 0; i < num; i++)
+		val[i] = buf[2 + i];
+
+	return buf[1];
+}
+
+static unsigned long read_pmu_config(int fd, char *pmu_str)
+{
+	int dir_fd;
+	int ret;
+	unsigned long config;
+	char config_str[128];
+
+	dir_fd = igt_xe_perf_events_dir(fd);
+	igt_assert(dir_fd >= 0);
+	igt_assert_eq(igt_sysfs_scanf(dir_fd, pmu_str, "%127s", config_str), 1);
+	ret = sscanf(config_str, "config=0x%lx", &config);
+	igt_assert(ret == 1);
+
+	close(dir_fd);
+
+	return config;
+}
+
+/**
+ * SUBTEST: c6
+ * Description: Basic residency test to validate idle residency
+ *		measured over a time interval is within the tolerance
+ *
+ * SUBTEST: frequency
+ * Description: Read requested freq and actual frequency via PMU within
+ *		specified time interval while workload runs
+ */
+static unsigned int measured_usleep(unsigned int usec)
+{
+	struct timespec ts = { };
+	unsigned int slept;
+
+	slept = igt_nsec_elapsed(&ts);
+	igt_assert(slept == 0);
+	do {
+		usleep(usec - slept);
+		slept = igt_nsec_elapsed(&ts) / 1000;
+	} while (slept < usec);
+
+	return igt_nsec_elapsed(&ts) / 1000;
+}
+
+static unsigned long read_idle_residency(int fd, int gt)
+{
+	unsigned long residency = 0;
+	int gt_fd;
+
+	gt_fd = xe_sysfs_gt_open(fd, gt);
+	igt_assert(gt_fd >= 0);
+	igt_assert(igt_sysfs_scanf(gt_fd, "gtidle/idle_residency_ms", "%lu", &residency) == 1);
+	close(gt_fd);
+
+	return residency;
+}
+
+static void test_rc6(int xe, unsigned int gt)
+{
+	int pmu_fd;
+	int pmu_config;
+	char event_str[100];
+	uint64_t ts[2];
+	unsigned long slept, start, end;
+	uint64_t val;
+
+	sprintf(event_str, "rc6-residency-gt%d", gt);
+	pmu_config = read_pmu_config(xe, event_str);
+	pmu_fd = open_pmu(xe, pmu_config);
+
+	igt_assert_f(igt_wait(xe_is_gt_in_c6(xe, gt), 3000, 1), "GT %d not in C6\n", gt);
+
+	/* While idle check full RC6. */
+	start = read_idle_residency(xe, gt);
+	val = __pmu_read_single(pmu_fd, &ts[0]);
+	slept = measured_usleep(SLEEP_DURATION * USEC_PER_SEC) / 1000;
+	end = read_idle_residency(xe, gt);
+	val = __pmu_read_single(pmu_fd, &ts[1]) - val;
+
+	igt_debug("gt%u: slept=%lu, perf=%"PRIu64"\n",
+		  gt, slept,  val);
+
+	igt_debug("Start res: %lu, end_res: %lu", start, end);
+
+	assert_within_epsilon(val,
+			      (ts[1] - ts[0])/1000000,
+			      tolerance);
+	close(pmu_fd);
+}
+
+static int set_freq(int fd, int gt_id, const char *freq_name, uint32_t freq)
+{
+	int ret = -EAGAIN;
+	char freq_attr[22];
+	int gt_fd;
+
+	snprintf(freq_attr, sizeof(freq_attr), "freq0/%s_freq", freq_name);
+	gt_fd = xe_sysfs_gt_open(fd, gt_id);
+	igt_assert(gt_fd >= 0);
+
+	while (ret == -EAGAIN)
+		ret = igt_sysfs_printf(gt_fd, freq_attr, "%u", freq);
+
+	close(gt_fd);
+	return ret;
+}
+
+static uint32_t get_freq(int fd, int gt_id, const char *freq_name)
+{
+	uint32_t freq;
+	int err = -EAGAIN;
+	char freq_attr[22];
+	int gt_fd;
+
+	snprintf(freq_attr, sizeof(freq_attr), "freq0/%s_freq", freq_name);
+	gt_fd = xe_sysfs_gt_open(fd, gt_id);
+	igt_assert(gt_fd >= 0);
+
+	while (err == -EAGAIN)
+		err = igt_sysfs_scanf(gt_fd, freq_attr, "%u", &freq);
+
+	igt_debug("gt%d: %s freq %u\n", gt_id, freq_name, freq);
+
+	close(gt_fd);
+	return freq;
+}
+
+static void run_workload(int fd, int gt, struct drm_xe_engine_class_instance *eci,
+			 struct workload *wl)
+{
+	struct drm_xe_sync sync[2] = {
+		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
+		{ .type = DRM_XE_SYNC_TYPE_SYNCOBJ, .flags = DRM_XE_SYNC_FLAG_SIGNAL, },
+	};
+	struct drm_xe_exec exec = {
+		.num_batch_buffer = 1,
+		.num_syncs = 2,
+		.syncs = to_user_pointer(sync),
+	};
+	struct xe_spin_opts spin_opts = {
+		.addr = 0x1a0000,
+		.preempt = false
+	};
+	struct xe_spin *spin;
+
+	wl->addr = 0x1a0000;
+
+	wl->vm = xe_vm_create(fd, 0, 0);
+	wl->bo_size = sizeof(*spin);
+	wl->bo_size = xe_bb_size(fd, wl->bo_size);
+
+	wl->bo = xe_bo_create(fd, wl->vm, wl->bo_size,
+			  vram_if_possible(fd, eci->gt_id), 0);
+	wl->spin = xe_bo_map(fd, wl->bo, wl->bo_size);
+
+	wl->exec_queue = xe_exec_queue_create(fd, wl->vm, eci, 0);
+	wl->syncobj = syncobj_create(fd, 0);
+
+	sync[0].handle = syncobj_create(fd, 0);
+	xe_vm_bind_async(fd, wl->vm, 0, wl->bo, 0, wl->addr, wl->bo_size, sync, 1);
+
+	xe_spin_init(wl->spin, &spin_opts);
+
+	sync[0].flags &= ~DRM_XE_SYNC_FLAG_SIGNAL;
+	sync[1].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
+	sync[1].handle = wl->syncobj;
+
+	exec.exec_queue_id = wl->exec_queue;
+	exec.address = wl->addr;
+	xe_exec(fd, &exec);
+
+	xe_spin_wait_started(wl->spin);
+	usleep(50000);
+	igt_assert(!syncobj_wait(fd, &wl->syncobj, 1, 1, 0, NULL));
+
+	igt_info("Running on GT %d Engine %s:%d\n", eci->gt_id,
+		 xe_engine_class_string(eci->engine_class), eci->engine_instance);
+
+	/* Save it for the end_workload function */
+	wl->sync[0] = sync[0];
+	wl->sync[1] = sync[1];
+}
+
+static void end_workload(int fd, struct workload *wl)
+{
+	xe_spin_end(wl->spin);
+
+	igt_assert(syncobj_wait(fd, &wl->syncobj, 1, INT64_MAX, 0, NULL));
+	igt_assert(syncobj_wait(fd, &wl->sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+	wl->sync[0].flags |= DRM_XE_SYNC_FLAG_SIGNAL;
+	xe_vm_unbind_async(fd, wl->vm, 0, 0, wl->addr, wl->bo_size, wl->sync, 1);
+	igt_assert(syncobj_wait(fd, &wl->sync[0].handle, 1, INT64_MAX, 0, NULL));
+
+	syncobj_destroy(fd, wl->sync[0].handle);
+	syncobj_destroy(fd, wl->syncobj);
+	xe_exec_queue_destroy(fd, wl->exec_queue);
+
+	munmap(wl->spin, wl->bo_size);
+	gem_close(fd, wl->bo);
+	xe_vm_destroy(fd, wl->vm);
+}
+
+static void test_frequency(int fd, int gt, struct drm_xe_engine_class_instance *eci)
+{
+	struct workload wl;
+
+	uint64_t val[2], start[2], slept;
+	double min[2], max[2];
+	int pmu_fd[2];
+	uint32_t orig_min = get_freq(fd, gt, "min");
+	uint32_t orig_max = get_freq(fd, gt, "max");
+	unsigned long config_rq_freq, config_act_freq;
+	char event_str[100];
+
+
+	sprintf(event_str, "requested-frequency-gt%d", gt);
+	config_rq_freq = read_pmu_config(fd, event_str);
+	pmu_fd[0] = open_group(fd, config_rq_freq, -1);
+
+	memset(event_str, 0, 100);
+	sprintf(event_str, "actual-frequency-gt%d", gt);
+	config_act_freq = read_pmu_config(fd, event_str);
+	pmu_fd[1] = open_group(fd, config_act_freq, pmu_fd[0]);
+
+	run_workload(fd, gt, eci, &wl);
+	/*
+	 * Set GPU to min frequency and read PMU counters.
+	 */
+	igt_assert(set_freq(fd, gt, "max", orig_min) > 0);
+	igt_assert(get_freq(fd, gt, "max") == orig_min);
+
+	slept = pmu_read_multi(pmu_fd[0], 2, start);
+	measured_usleep(batch_duration_ns / 1000);
+	slept = pmu_read_multi(pmu_fd[0], 2, val) - slept;
+
+	min[0] = 1e9*(val[0] - start[0]) / slept;
+	min[1] = 1e9*(val[1] - start[1]) / slept;
+
+	/*
+	 * Set GPU to max frequency and read PMU counters.
+	 */
+	igt_assert(set_freq(fd, gt, "max", orig_max) > 0);
+	igt_assert(get_freq(fd, gt, "max") == orig_max);
+	igt_assert(set_freq(fd, gt, "min", orig_max) > 0);
+	igt_assert(get_freq(fd, gt, "min") == orig_max);
+
+	slept = pmu_read_multi(pmu_fd[0], 2, start);
+	measured_usleep(batch_duration_ns / 1000);
+	slept = pmu_read_multi(pmu_fd[0], 2, val) - slept;
+
+	max[0] = 1e9*(val[0] - start[0]) / slept;
+	max[1] = 1e9*(val[1] - start[1]) / slept;
+
+	/*
+	 * Restore min/max.
+	 */
+	igt_assert(set_freq(fd, gt, "min", orig_min) > 0);
+	igt_assert(get_freq(fd, gt, "min") == orig_min);
+
+	igt_info("Minimum frequency: requested %.1f, actual %.1f\n",
+		 min[0], min[1]);
+	igt_info("Maximum frequency: requested %.1f, actual %.1f\n",
+		 max[0], max[1]);
+
+	close(pmu_fd[0]);
+	close(pmu_fd[1]);
+
+	end_workload(fd, &wl);
+
+	assert_within_epsilon(min[0], orig_min, tolerance);
+	/*
+	 * On thermally throttled devices we cannot be sure maximum frequency
+	 * can be reached so use larger tolerance downards.
+	 */
+	__assert_within_epsilon(max[0], orig_max, tolerance, 0.15f, no_debug_data);
+}
+
+igt_main
+{
+	int fd, gt;
+	struct drm_xe_engine_class_instance *hwe;
+
+	igt_fixture {
+		fd = drm_open_driver(DRIVER_XE);
+		igt_require(!IS_PONTEVECCHIO(xe_dev_id(fd)));
+	}
+
+	igt_describe("Validate PMU C6 residency counters");
+	igt_subtest("c6")
+		xe_for_each_gt(fd, gt)
+			test_rc6(fd, gt);
+
+	igt_describe("Validate PMU GT freq measured over a time interval is within the tolerance");
+	igt_subtest("frequency")
+		xe_for_each_engine(fd, hwe)
+			test_frequency(fd, hwe->gt_id, hwe);
+
+	igt_fixture {
+		close(fd);
+	}
+}
diff --git a/tests/meson.build b/tests/meson.build
index 34b87b125..dc84ef748 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -308,6 +308,7 @@  intel_xe_progs = [
 	'xe_pat',
 	'xe_peer2peer',
 	'xe_pm',
+	'xe_pmu',
 	'xe_pm_residency',
 	'xe_prime_self_import',
 	'xe_query',