Message ID | 20200222144647.10120-1-laoar.shao@gmail.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | psi: move PF_MEMSTALL into psi specific psi_flags | expand |
Hello Yafang, On Sat, Feb 22, 2020 at 09:46:47AM -0500, Yafang Shao wrote: > The task->flags is a 32-bits flag, in which 31 bits have already been > consumed. So it is hardly to introduce other new per process flag. > As there's a psi specific flag psi_flags, we'd better move the psi specific > per process flag PF_MEMSTALL into it. Currently, psi_flags is used only for debugging: if (((task->psi_flags & set) || (task->psi_flags & clear) != clear) && !psi_bug) { printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n", task->pid, task->comm, cpu, task->psi_flags, clear, set); psi_bug = 1; } task->psi_flags &= ~clear; task->psi_flags |= set; While this has caught a few bugs while the code was new, I'm planning on moving it to a CONFIG option that is only enabled in debug builds. If you need the room in task->flags, can you please make the memstall state a single bit in task_struct instead? AFAICS there is still space in this section: /* Force alignment to the next boundary: */ unsigned :0; /* Unserialized, strictly 'current' */ ... #ifdef CONFIG_PSI unsigned in_memstall:1; #endif It would also avoid the mixed-bit masking headache: > @@ -17,11 +17,21 @@ enum psi_task_count { > NR_PSI_TASK_COUNTS = 3, > }; > > -/* Task state bitmasks */ > +/* > + * Task state bitmasks: > + * These flags are stored in the lower PSI_TSK_BITS bits of > + * task->psi_flags, and the higher bits are set with per process flag which > + * persists across sleeps. > + */ > +#define PSI_TSK_STATE_BITS 16 > +#define PSI_TSK_STATE_MASK ((1 << PSI_TSK_STATE_BITS) - 1) > #define TSK_IOWAIT (1 << NR_IOWAIT) > #define TSK_MEMSTALL (1 << NR_MEMSTALL) > #define TSK_RUNNING (1 << NR_RUNNING) > > +/* Stalled due to lack of memory, that's per process flag. */ > +#define PSI_PF_MEMSTALL (1 << PSI_TSK_STATE_BITS) > + > /* Resources that workloads could be stalled on */ > enum psi_res { > PSI_IO, > diff --git a/include/linux/sched.h b/include/linux/sched.h > index f314790cb527..2d4c04d35d9b 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -1025,7 +1025,11 @@ struct task_struct { > > struct task_io_accounting ioac; > #ifdef CONFIG_PSI > - /* Pressure stall state */ > + /* > + * Pressure stall state: > + * Bits 0 ~ PSI_TSK_STATE_BITS-1: PSI task states > + * Bits PSI_TSK_STATE_BITS ~ 31: Per process flags > + */ > unsigned int psi_flags; > #endif > #ifdef CONFIG_TASK_XACCT Thanks
On Tue, Feb 25, 2020 at 12:25 AM Johannes Weiner <hannes@cmpxchg.org> wrote: > > Hello Yafang, > > On Sat, Feb 22, 2020 at 09:46:47AM -0500, Yafang Shao wrote: > > The task->flags is a 32-bits flag, in which 31 bits have already been > > consumed. So it is hardly to introduce other new per process flag. > > As there's a psi specific flag psi_flags, we'd better move the psi specific > > per process flag PF_MEMSTALL into it. > > Currently, psi_flags is used only for debugging: > > if (((task->psi_flags & set) || > (task->psi_flags & clear) != clear) && > !psi_bug) { > printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n", > task->pid, task->comm, cpu, > task->psi_flags, clear, set); > psi_bug = 1; > } > > task->psi_flags &= ~clear; > task->psi_flags |= set; > > While this has caught a few bugs while the code was new, I'm planning > on moving it to a CONFIG option that is only enabled in debug builds. > Got it. Many thanks for you explanation. > If you need the room in task->flags, can you please make the memstall > state a single bit in task_struct instead? AFAICS there is still space > in this section: > > /* Force alignment to the next boundary: */ > unsigned :0; > > /* Unserialized, strictly 'current' */ > > ... > > #ifdef CONFIG_PSI > unsigned in_memstall:1; > #endif > > It would also avoid the mixed-bit masking headache: > Seems that's a better solution. I will update with it. Thanks for your suggestion. > > @@ -17,11 +17,21 @@ enum psi_task_count { > > NR_PSI_TASK_COUNTS = 3, > > }; > > > > -/* Task state bitmasks */ > > +/* > > + * Task state bitmasks: > > + * These flags are stored in the lower PSI_TSK_BITS bits of > > + * task->psi_flags, and the higher bits are set with per process flag which > > + * persists across sleeps. > > + */ > > +#define PSI_TSK_STATE_BITS 16 > > +#define PSI_TSK_STATE_MASK ((1 << PSI_TSK_STATE_BITS) - 1) > > #define TSK_IOWAIT (1 << NR_IOWAIT) > > #define TSK_MEMSTALL (1 << NR_MEMSTALL) > > #define TSK_RUNNING (1 << NR_RUNNING) > > > > +/* Stalled due to lack of memory, that's per process flag. */ > > +#define PSI_PF_MEMSTALL (1 << PSI_TSK_STATE_BITS) > > + > > /* Resources that workloads could be stalled on */ > > enum psi_res { > > PSI_IO, > > diff --git a/include/linux/sched.h b/include/linux/sched.h > > index f314790cb527..2d4c04d35d9b 100644 > > --- a/include/linux/sched.h > > +++ b/include/linux/sched.h > > @@ -1025,7 +1025,11 @@ struct task_struct { > > > > struct task_io_accounting ioac; > > #ifdef CONFIG_PSI > > - /* Pressure stall state */ > > + /* > > + * Pressure stall state: > > + * Bits 0 ~ PSI_TSK_STATE_BITS-1: PSI task states > > + * Bits PSI_TSK_STATE_BITS ~ 31: Per process flags > > + */ > > unsigned int psi_flags; > > #endif > > #ifdef CONFIG_TASK_XACCT > > Thanks
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index 07aaf9b82241..411dbbf57d51 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -17,11 +17,21 @@ enum psi_task_count { NR_PSI_TASK_COUNTS = 3, }; -/* Task state bitmasks */ +/* + * Task state bitmasks: + * These flags are stored in the lower PSI_TSK_BITS bits of + * task->psi_flags, and the higher bits are set with per process flag which + * persists across sleeps. + */ +#define PSI_TSK_STATE_BITS 16 +#define PSI_TSK_STATE_MASK ((1 << PSI_TSK_STATE_BITS) - 1) #define TSK_IOWAIT (1 << NR_IOWAIT) #define TSK_MEMSTALL (1 << NR_MEMSTALL) #define TSK_RUNNING (1 << NR_RUNNING) +/* Stalled due to lack of memory, that's per process flag. */ +#define PSI_PF_MEMSTALL (1 << PSI_TSK_STATE_BITS) + /* Resources that workloads could be stalled on */ enum psi_res { PSI_IO, diff --git a/include/linux/sched.h b/include/linux/sched.h index f314790cb527..2d4c04d35d9b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1025,7 +1025,11 @@ struct task_struct { struct task_io_accounting ioac; #ifdef CONFIG_PSI - /* Pressure stall state */ + /* + * Pressure stall state: + * Bits 0 ~ PSI_TSK_STATE_BITS-1: PSI task states + * Bits PSI_TSK_STATE_BITS ~ 31: Per process flags + */ unsigned int psi_flags; #endif #ifdef CONFIG_TASK_XACCT @@ -1490,7 +1494,6 @@ extern struct pid *cad_pid; #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_MEMSTALL 0x01000000 /* Stalled due to lack of memory */ #define PF_UMH 0x02000000 /* I'm an Usermodehelper process */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 028520702717..34363fc77ecc 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -759,7 +759,8 @@ void psi_task_change(struct task_struct *task, int clear, int set) !psi_bug) { printk_deferred(KERN_ERR "psi: inconsistent task state! task=%d:%s cpu=%d psi_flags=%x clear=%x set=%x\n", task->pid, task->comm, cpu, - task->psi_flags, clear, set); + task->psi_flags & PSI_TSK_STATE_MASK, + clear, set); psi_bug = 1; } @@ -818,17 +819,17 @@ void psi_memstall_enter(unsigned long *flags) if (static_branch_likely(&psi_disabled)) return; - *flags = current->flags & PF_MEMSTALL; + *flags = current->psi_flags & PSI_PF_MEMSTALL; if (*flags) return; /* - * PF_MEMSTALL setting & accounting needs to be atomic wrt + * PSI_PF_MEMSTALL setting & accounting needs to be atomic wrt * changes to the task's scheduling state, otherwise we can * race with CPU migration. */ rq = this_rq_lock_irq(&rf); - current->flags |= PF_MEMSTALL; + current->psi_flags |= PSI_PF_MEMSTALL; psi_task_change(current, 0, TSK_MEMSTALL); rq_unlock_irq(rq, &rf); @@ -851,13 +852,13 @@ void psi_memstall_leave(unsigned long *flags) if (*flags) return; /* - * PF_MEMSTALL clearing & accounting needs to be atomic wrt + * PSI_PF_MEMSTALL clearing & accounting needs to be atomic wrt * changes to the task's scheduling state, otherwise we could * race with CPU migration. */ rq = this_rq_lock_irq(&rf); - current->flags &= ~PF_MEMSTALL; + current->psi_flags &= ~PSI_PF_MEMSTALL; psi_task_change(current, TSK_MEMSTALL, 0); rq_unlock_irq(rq, &rf); @@ -921,7 +922,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) else if (task->in_iowait) task_flags = TSK_IOWAIT; - if (task->flags & PF_MEMSTALL) + if (task->psi_flags & PSI_PF_MEMSTALL) task_flags |= TSK_MEMSTALL; if (task_flags) diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index ba683fe81a6e..164f97b1ce7f 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -70,7 +70,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) return; if (!wakeup || p->sched_psi_wake_requeue) { - if (p->flags & PF_MEMSTALL) + if (p->psi_flags & PSI_PF_MEMSTALL) set |= TSK_MEMSTALL; if (p->sched_psi_wake_requeue) p->sched_psi_wake_requeue = 0; @@ -90,7 +90,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) return; if (!sleep) { - if (p->flags & PF_MEMSTALL) + if (p->psi_flags & PSI_PF_MEMSTALL) clear |= TSK_MEMSTALL; } else { if (p->in_iowait) @@ -109,14 +109,14 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) * deregister its sleep-persistent psi states from the old * queue, and let psi_enqueue() know it has to requeue. */ - if (unlikely(p->in_iowait || (p->flags & PF_MEMSTALL))) { + if (unlikely(p->in_iowait || (p->psi_flags & PSI_PF_MEMSTALL))) { struct rq_flags rf; struct rq *rq; int clear = 0; if (p->in_iowait) clear |= TSK_IOWAIT; - if (p->flags & PF_MEMSTALL) + if (p->psi_flags & PSI_PF_MEMSTALL) clear |= TSK_MEMSTALL; rq = __task_rq_lock(p, &rf); @@ -131,7 +131,7 @@ static inline void psi_task_tick(struct rq *rq) if (static_branch_likely(&psi_disabled)) return; - if (unlikely(rq->curr->flags & PF_MEMSTALL)) + if (unlikely(rq->curr->psi_flags & PSI_PF_MEMSTALL)) psi_memstall_tick(rq->curr, cpu_of(rq)); } #else /* CONFIG_PSI */
The task->flags is a 32-bits flag, in which 31 bits have already been consumed. So it is hardly to introduce other new per process flag. As there's a psi specific flag psi_flags, we'd better move the psi specific per process flag PF_MEMSTALL into it. Signed-off-by: Yafang Shao <laoar.shao@gmail.com> --- include/linux/psi_types.h | 12 +++++++++++- include/linux/sched.h | 7 +++++-- kernel/sched/psi.c | 15 ++++++++------- kernel/sched/stats.h | 10 +++++----- 4 files changed, 29 insertions(+), 15 deletions(-)