@@ -1831,6 +1831,12 @@ static int __do_execve_file(int fd, struct filename *filename,
if (IS_ERR(filename))
return PTR_ERR(filename);
+ if (current->flags & PF_NPROC_UNS_EXCEEDED) {
+ current->flags &= ~PF_NPROC_UNS_EXCEEDED;
+ retval = -EAGAIN;
+ goto out_ret;
+ }
+
processes = get_rlimit_counter(&init_user_ns, current_euid(), UCOUNT_RLIMIT_NPROC);
/*
@@ -352,10 +352,11 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
dec_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
} else {
+ if (!inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC))
+ return;
worker->flags &= ~IO_WORKER_F_BOUND;
wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
- inc_rlimit_counter(&init_user_ns, wqe->wq->user->uid, UCOUNT_RLIMIT_NPROC);
}
io_wqe_inc_running(wqe, worker);
}
@@ -660,6 +661,12 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
return false;
}
+ if (index == IO_WQ_ACCT_UNBOUND &&
+ !inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC)) {
+ kfree(worker);
+ return false;
+ }
+
spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
list_add_tail_rcu(&worker->all_list, &wqe->all_list);
@@ -671,9 +678,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
acct->nr_workers++;
spin_unlock_irq(&wqe->lock);
- if (index == IO_WQ_ACCT_UNBOUND)
- inc_rlimit_counter(&init_user_ns, wq->user->uid, UCOUNT_RLIMIT_NPROC);
-
wake_up_process(worker->task);
return true;
}
@@ -1506,6 +1506,9 @@ extern struct pid *cad_pid;
#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
#define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
+#define PF_NPROC_UNS_EXCEEDED 0x01000000 /* It means that we have reached the RLIMIT_NPROC
+ * in the current user namespace or in one of
+ * the parent's and we can't fork */
#define PF_UMH 0x02000000 /* I'm an Usermodehelper process */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
@@ -345,13 +345,14 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
#endif
clone_flags & CLONE_THREAD
) {
+ if (!inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC))
+ return -EACCES;
p->real_cred = get_cred(p->cred);
get_cred(p->cred);
alter_cred_subscribers(p->cred, 2);
kdebug("share_creds(%p{%d,%d})",
p->cred, atomic_read(&p->cred->usage),
read_cred_subscribers(p->cred));
- inc_rlimit_counter(&init_user_ns, task_euid(p), UCOUNT_RLIMIT_NPROC);
return 0;
}
@@ -384,7 +385,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
}
#endif
- inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
+ if (!inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC))
+ return -EACCES;
p->cred = p->real_cred = get_cred(new);
alter_cred_subscribers(new, 2);
validate_creds(new);
@@ -480,13 +482,14 @@ int commit_creds(struct cred *new)
if (!gid_eq(new->fsgid, old->fsgid))
key_fsgid_changed(new);
- /* do it
- * RLIMIT_NPROC limits on user->processes have already been checked
- * in set_user().
+ /*
+ * The RLIMIT_NPROC limits have already been checked in set_user(), but
+ * perhaps this limit is exceeded in the parent user namespace.
*/
alter_cred_subscribers(new, 2);
- if (new->user != old->user)
- inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC);
+ if (new->user != old->user &&
+ !inc_rlimit_counter(&init_user_ns, new->euid, UCOUNT_RLIMIT_NPROC))
+ task->flags |= PF_NPROC_UNS_EXCEEDED;
rcu_assign_pointer(task->real_cred, new);
rcu_assign_pointer(task->cred, new);
if (new->user != old->user)
@@ -1958,9 +1958,13 @@ static __latent_entropy struct task_struct *copy_process(
DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
#endif
+ retval = -EAGAIN;
+ if (current->flags & PF_NPROC_UNS_EXCEEDED) {
+ current->flags &= ~PF_NPROC_UNS_EXCEEDED;
+ goto bad_fork_free;
+ }
processes = get_rlimit_counter(&init_user_ns, p->real_cred->euid,
UCOUNT_RLIMIT_NPROC);
- retval = -EAGAIN;
if (processes >= task_rlimit(p, RLIMIT_NPROC)) {
if (p->real_cred->user != INIT_USER &&
!capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
Since RLIMIT_NPROC is counted per user namespace, the existing over-limit check in the current user namespace is not sufficient. We must consider exceeding this limit in parent user namespaces. Signed-off-by: Alexey Gladkov <gladkov.alexey@gmail.com> --- fs/exec.c | 6 ++++++ fs/io-wq.c | 12 ++++++++---- include/linux/sched.h | 3 +++ kernel/cred.c | 17 ++++++++++------- kernel/fork.c | 6 +++++- 5 files changed, 32 insertions(+), 12 deletions(-)