@@ -1940,6 +1940,8 @@ static inline void clear_page_pfmemalloc(struct page *page)
* Can be called by the pagefault handler when it gets a VM_FAULT_OOM.
*/
extern void pagefault_out_of_memory(void);
+extern void pid_max_oom_check(struct pid_namespace *ns);
+
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
#define offset_in_thp(page, p) ((unsigned long)(p) & (thp_size(page) - 1))
@@ -237,7 +237,11 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
idr_preload_end();
if (nr < 0) {
- retval = (nr == -ENOSPC) ? -EAGAIN : nr;
+ retval = nr;
+ if (nr == -ENOSPC) {
+ retval = -EAGAIN;
+ pid_max_oom_check(tmp);
+ }
goto out_free;
}
@@ -1260,3 +1260,73 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
return -ENOSYS;
#endif /* CONFIG_MMU */
}
+
+static void oom_pid_evaluate_task(struct task_struct *p,
+ struct task_struct **max_zombie_task, int *max_zombie_num)
+{
+ struct task_struct *child;
+ int zombie_num = 0;
+
+ list_for_each_entry(child, &p->children, sibling) {
+ if (child->exit_state == EXIT_ZOMBIE)
+ zombie_num++;
+ }
+ if (zombie_num > *max_zombie_num) {
+ *max_zombie_num = zombie_num;
+ *max_zombie_task = p;
+ }
+}
+#define MAX_ZOMBIE_NUM 10
+struct task_struct *pid_max_bad_process(struct pid_namespace *ns)
+{
+ int max_zombie_num = 0;
+ struct task_struct *max_zombie_task = &init_task;
+ struct task_struct *p;
+
+ rcu_read_lock();
+ for_each_process(p)
+ oom_pid_evaluate_task(p, &max_zombie_task, &max_zombie_num);
+ rcu_read_unlock();
+
+ if (max_zombie_num > MAX_ZOMBIE_NUM) {
+ pr_info("process %d has %d zombie child\n",
+ task_pid_nr_ns(max_zombie_task, ns), max_zombie_num);
+ return max_zombie_task;
+ }
+
+ return NULL;
+}
+
+void pid_max_oom_kill_process(struct task_struct *task)
+{
+ struct oom_control oc = {
+ .zonelist = NULL,
+ .nodemask = NULL,
+ .memcg = NULL,
+ .gfp_mask = 0,
+ .order = 0,
+ };
+
+ get_task_struct(task);
+ oc.chosen = task;
+
+ if (mem_cgroup_oom_synchronize(true))
+ return;
+
+ if (!mutex_trylock(&oom_lock))
+ return;
+
+ oom_kill_process(&oc, "Out of pid max(oom_kill_allocating_task)");
+ mutex_unlock(&oom_lock);
+}
+
+void pid_max_oom_check(struct pid_namespace *ns)
+{
+ struct task_struct *p;
+
+ p = pid_max_bad_process(ns);
+ if (p) {
+ pr_info("oom_kill process %d\n", task_pid_nr_ns(p, ns));
+ pid_max_oom_kill_process(p);
+ }
+}
There is a common situation that a parent process forks many child processes to execute tasks, but the parent process does not execute wait/waitpid when the child process exits, resulting in a large number of child processes becoming zombie processes. At this time, if the number of processes in the system out of kernel.pid_max, the new fork syscall will fail, and the system will not be able to execute any command at this time (unless an old process exits) eg: [root@lq-workstation ~]# ls -bash: fork: retry: Resource temporarily unavailable -bash: fork: retry: Resource temporarily unavailable -bash: fork: retry: Resource temporarily unavailable -bash: fork: retry: Resource temporarily unavailable -bash: fork: Resource temporarily unavailable [root@lq-workstation ~]# reboot -bash: fork: retry: Resource temporarily unavailable -bash: fork: retry: Resource temporarily unavailable -bash: fork: retry: Resource temporarily unavailable -bash: fork: retry: Resource temporarily unavailable -bash: fork: Resource temporarily unavailable I dealt with this situation in the alloc_pid function, and found a process with the most zombie subprocesses, and more than 10(or other reasonable values?) zombie subprocesses, so I tried to kill this process to release the pid resources. Signed-off-by: liuq <liuq131@chinatelecom.cn> --- include/linux/mm.h | 2 ++ kernel/pid.c | 6 +++- mm/oom_kill.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-)