diff mbox series

[3/4] mm, oom: Wait for OOM victims even if oom_kill_allocating_task case.

Message ID 1558519686-16057-3-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp (mailing list archive)
State New, archived
Headers show
Series [1/4] mm, oom: Remove redundant OOM score normalization at select_bad_process(). | expand

Commit Message

Tetsuo Handa May 22, 2019, 10:08 a.m. UTC
"mm, oom: Avoid potential RCU stall at dump_tasks()." changed to imply
oom_dump_tasks == 0 if oom_kill_allocating_task != 0. But since we can
expect the OOM reaper to reclaim memory quickly, and majority of latency
is not for_each_process() from select_bad_process() but printk() from
dump_header(), waiting for in-flight OOM victims until the OOM reaper
completes should generate preferable results (i.e. minimal number of
OOM victims).

As side effects of this patch, oom_kill_allocating_task != 0 no longer
implies oom_dump_tasks == 0, complicated conditions for whether to enter
oom_kill_allocating_task path are simplified, and a theoretical bug that
the OOM killer forever retries oom_kill_allocating_task path even after
the OOM reaper set MMF_OOM_SKIP is fixed.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
---
 mm/oom_kill.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)
diff mbox series

Patch

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 00b594c..64e582e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -367,19 +367,29 @@  static int oom_evaluate_task(struct task_struct *task, void *arg)
  * Simple selection loop. We choose the process with the highest number of
  * 'points'. In case scan was aborted, oc->chosen is set to -1.
  */
-static void select_bad_process(struct oom_control *oc)
+static const char *select_bad_process(struct oom_control *oc)
 {
-	if (is_memcg_oom(oc))
-		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
-	else {
-		struct task_struct *p;
+	struct task_struct *p;
 
-		rcu_read_lock();
-		for_each_process(p)
-			if (oom_evaluate_task(p, oc))
-				break;
-		rcu_read_unlock();
+	if (is_memcg_oom(oc)) {
+		mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc);
+		return "Memory cgroup out of memory";
 	}
+	rcu_read_lock();
+	for_each_process(p)
+		if (oom_evaluate_task(p, oc))
+			break;
+	rcu_read_unlock();
+	if (sysctl_oom_kill_allocating_task && oc->chosen != (void *)-1UL) {
+		list_for_each_entry(p, &oom_candidate_list,
+				    oom_candidate_list) {
+			if (!same_thread_group(p, current))
+				continue;
+			oc->chosen = current;
+			return "Out of memory (oom_kill_allocating_task)";
+		}
+	}
+	return "Out of memory";
 }
 
 /**
@@ -1021,6 +1031,7 @@  bool out_of_memory(struct oom_control *oc)
 {
 	unsigned long freed = 0;
 	enum oom_constraint constraint = CONSTRAINT_NONE;
+	const char *message;
 
 	if (oom_killer_disabled)
 		return false;
@@ -1061,15 +1072,7 @@  bool out_of_memory(struct oom_control *oc)
 		oc->nodemask = NULL;
 	check_panic_on_oom(oc, constraint);
 
-	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
-	    current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
-	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
-		oc->chosen = current;
-		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
-		return true;
-	}
-
-	select_bad_process(oc);
+	message = select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
 		dump_header(oc, NULL);
@@ -1083,8 +1086,7 @@  bool out_of_memory(struct oom_control *oc)
 			panic("System is deadlocked on memory\n");
 	}
 	if (oc->chosen && oc->chosen != (void *)-1UL)
-		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :
-				 "Memory cgroup out of memory");
+		oom_kill_process(oc, message);
 	while (!list_empty(&oom_candidate_list)) {
 		struct task_struct *p = list_first_entry(&oom_candidate_list,
 							 struct task_struct,