@@ -367,19 +367,29 @@ static int oom_evaluate_task(struct task_struct *task, void *arg)
* Simple selection loop. We choose the process with the highest number of
* 'points'. In case scan was aborted, oc->chosen is set to -1.
*/
-static void select_bad_process(struct oom_control *oc)
+static const char *select_bad_process(struct oom_control *oc)
{
- if (is_memcg_oom(oc))
- mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
- else {
- struct task_struct *p;
+ struct task_struct *p;
- rcu_read_lock();
- for_each_process(p)
- if (oom_evaluate_task(p, oc))
- break;
- rcu_read_unlock();
+ if (is_memcg_oom(oc)) {
+ mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
+ return "Memory cgroup out of memory";
}
+ rcu_read_lock();
+ for_each_process(p)
+ if (oom_evaluate_task(p, oc))
+ break;
+ rcu_read_unlock();
+ if (sysctl_oom_kill_allocating_task && oc->chosen != (void *)-1UL) {
+ list_for_each_entry(p, &oom_candidate_list,
+ oom_candidate_list) {
+ if (!same_thread_group(p, current))
+ continue;
+ oc->chosen = current;
+ return "Out of memory (oom_kill_allocating_task)";
+ }
+ }
+ return "Out of memory";
}
/**
@@ -1021,6 +1031,7 @@ bool out_of_memory(struct oom_control *oc)
{
unsigned long freed = 0;
enum oom_constraint constraint = CONSTRAINT_NONE;
+ const char *message;
if (oom_killer_disabled)
return false;
@@ -1061,15 +1072,7 @@ bool out_of_memory(struct oom_control *oc)
oc->nodemask = NULL;
check_panic_on_oom(oc, constraint);
- if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
- current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
- current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
- oc->chosen = current;
- oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
- return true;
- }
-
- select_bad_process(oc);
+ message = select_bad_process(oc);
/* Found nothing?!?! */
if (!oc->chosen) {
dump_header(oc, NULL);
@@ -1083,8 +1086,7 @@ bool out_of_memory(struct oom_control *oc)
panic("System is deadlocked on memory\n");
}
if (oc->chosen && oc->chosen != (void *)-1UL)
- oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
- "Memory cgroup out of memory");
+ oom_kill_process(oc, message);
while (!list_empty(&oom_candidate_list)) {
struct task_struct *p = list_first_entry(&oom_candidate_list,
struct task_struct,
"mm, oom: Avoid potential RCU stall at dump_tasks()." changed to imply oom_dump_tasks == 0 if oom_kill_allocating_task != 0. But since we can expect the OOM reaper to reclaim memory quickly, and majority of latency is not for_each_process() from select_bad_process() but printk() from dump_header(), waiting for in-flight OOM victims until the OOM reaper completes should generate preferable results (i.e. minimal number of OOM victims). As side effects of this patch, oom_kill_allocating_task != 0 no longer implies oom_dump_tasks == 0, complicated conditions for whether to enter oom_kill_allocating_task path are simplified, and a theoretical bug that the OOM killer forever retries oom_kill_allocating_task path even after the OOM reaper set MMF_OOM_SKIP is fixed. Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> --- mm/oom_kill.c | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-)