@@ -11,7 +11,7 @@ enum psi_task_count {
NR_IOWAIT,
NR_MEMSTALL,
NR_RUNNING,
- NR_PSI_TASK_COUNTS,
+ NR_PSI_TASK_COUNTS = 3,
};
/* Task state bitmasks */
@@ -24,7 +24,7 @@ enum psi_res {
PSI_IO,
PSI_MEM,
PSI_CPU,
- NR_PSI_RESOURCES,
+ NR_PSI_RESOURCES = 3,
};
/*
@@ -41,7 +41,7 @@ enum psi_states {
PSI_CPU_SOME,
/* Only per-CPU, to weigh the CPU in the global average: */
PSI_NONIDLE,
- NR_PSI_STATES,
+ NR_PSI_STATES = 6,
};
struct psi_group_cpu {
@@ -53,6 +53,9 @@ struct psi_group_cpu {
/* States of the tasks belonging to this group */
unsigned int tasks[NR_PSI_TASK_COUNTS];
+ /* Aggregate pressure state derived from the tasks */
+ u32 state_mask;
+
/* Period time sampling buckets for each state of interest (ns) */
u32 times[NR_PSI_STATES];
@@ -212,17 +212,17 @@ static bool test_state(unsigned int *tasks, enum psi_states state)
static void get_recent_times(struct psi_group *group, int cpu, u32 *times)
{
struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
- unsigned int tasks[NR_PSI_TASK_COUNTS];
u64 now, state_start;
+ enum psi_states s;
unsigned int seq;
- int s;
+ u32 state_mask;
/* Snapshot a coherent view of the CPU state */
do {
seq = read_seqcount_begin(&groupc->seq);
now = cpu_clock(cpu);
memcpy(times, groupc->times, sizeof(groupc->times));
- memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
+ state_mask = groupc->state_mask;
state_start = groupc->state_start;
} while (read_seqcount_retry(&groupc->seq, seq));
@@ -238,7 +238,7 @@ static void get_recent_times(struct psi_group *group, int cpu, u32 *times)
* (u32) and our reported pressure close to what's
* actually happening.
*/
- if (test_state(tasks, s))
+ if (state_mask & (1 << s))
times[s] += now - state_start;
delta = times[s] - groupc->times_prev[s];
@@ -406,15 +406,15 @@ static void record_times(struct psi_group_cpu *groupc, int cpu,
delta = now - groupc->state_start;
groupc->state_start = now;
- if (test_state(groupc->tasks, PSI_IO_SOME)) {
+ if (groupc->state_mask & (1 << PSI_IO_SOME)) {
groupc->times[PSI_IO_SOME] += delta;
- if (test_state(groupc->tasks, PSI_IO_FULL))
+ if (groupc->state_mask & (1 << PSI_IO_FULL))
groupc->times[PSI_IO_FULL] += delta;
}
- if (test_state(groupc->tasks, PSI_MEM_SOME)) {
+ if (groupc->state_mask & (1 << PSI_MEM_SOME)) {
groupc->times[PSI_MEM_SOME] += delta;
- if (test_state(groupc->tasks, PSI_MEM_FULL))
+ if (groupc->state_mask & (1 << PSI_MEM_FULL))
groupc->times[PSI_MEM_FULL] += delta;
else if (memstall_tick) {
u32 sample;
@@ -435,10 +435,10 @@ static void record_times(struct psi_group_cpu *groupc, int cpu,
}
}
- if (test_state(groupc->tasks, PSI_CPU_SOME))
+ if (groupc->state_mask & (1 << PSI_CPU_SOME))
groupc->times[PSI_CPU_SOME] += delta;
- if (test_state(groupc->tasks, PSI_NONIDLE))
+ if (groupc->state_mask & (1 << PSI_NONIDLE))
groupc->times[PSI_NONIDLE] += delta;
}
@@ -447,6 +447,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
{
struct psi_group_cpu *groupc;
unsigned int t, m;
+ enum psi_states s;
+ u32 state_mask = 0;
groupc = per_cpu_ptr(group->pcpu, cpu);
@@ -479,6 +481,13 @@ static void psi_group_change(struct psi_group *group, int cpu,
if (set & (1 << t))
groupc->tasks[t]++;
+ /* Calculate state mask representing active states */
+ for (s = 0; s < NR_PSI_STATES; s++) {
+ if (test_state(groupc->tasks, s))
+ state_mask |= (1 << s);
+ }
+ groupc->state_mask = state_mask;
+
write_seqcount_end(&groupc->seq);
if (!delayed_work_pending(&group->clock_work))
The psi monitoring patches will need to determine the same states as record_times(). To avoid calculating them twice, maintain a state mask that can be consulted cheaply. Do this in a separate patch to keep the churn in the main feature patch at a minimum. This adds 4-byte state_mask member into psi_group_cpu struct which results in its first cacheline-aligned part to become 52 bytes long. Add explicit values to enumeration element counters that affect psi_group_cpu struct size. Signed-off-by: Suren Baghdasaryan <surenb@google.com> --- include/linux/psi_types.h | 9 ++++++--- kernel/sched/psi.c | 29 +++++++++++++++++++---------- 2 files changed, 25 insertions(+), 13 deletions(-)