diff mbox series

[RFC,5/6] selftests: Challenge RLIMIT_NPROC in user namespaces

Message ID 20220207121800.5079-6-mkoutny@suse.com (mailing list archive)
State New
Headers show
Series RLIMIT_NPROC in ucounts fixups | expand

Commit Message

Michal Koutný Feb. 7, 2022, 12:17 p.m. UTC
The services are started in descendant user namepaces, each of them
should honor the RLIMIT_NPROC that's passed during user namespace
creation.

	main [user_ns_0]
	  ` service [user_ns_1]
	    ` worker 1
	    ` worker 2
	    ...
	    ` worker k
	  ...
	  ` service [user_ns_n]
	    ` worker 1
	    ` worker 2
	    ...
	    ` worker k

Test uses explicit synchronization, to make sure original parent's limit
does not interfere with descendants.

Signed-off-by: Michal Koutný <mkoutny@suse.com>
---
 .../selftests/rlimits/rlimits-per-userns.c    | 154 ++++++++++++++----
 1 file changed, 125 insertions(+), 29 deletions(-)

Comments

Shuah Khan Feb. 10, 2022, 1:22 a.m. UTC | #1
On 2/7/22 5:17 AM, Michal Koutný wrote:
> The services are started in descendant user namepaces, each of them
> should honor the RLIMIT_NPROC that's passed during user namespace
> creation.
> 
> 	main [user_ns_0]
> 	  ` service [user_ns_1]
> 	    ` worker 1
> 	    ` worker 2
> 	    ...
> 	    ` worker k
> 	  ...
> 	  ` service [user_ns_n]
> 	    ` worker 1
> 	    ` worker 2
> 	    ...
> 	    ` worker k
> 
> Test uses explicit synchronization, to make sure original parent's limit
> does not interfere with descendants.
> 

Thank you for updating the test with the kernel updates. Please see
comments below. A bit of a concern with how long this test will run.
Did you time it?

> Signed-off-by: Michal Koutný <mkoutny@suse.com>
> ---
>   .../selftests/rlimits/rlimits-per-userns.c    | 154 ++++++++++++++----
>   1 file changed, 125 insertions(+), 29 deletions(-)
> 
> diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
> index 26dc949e93ea..54c1b345e42b 100644
> --- a/tools/testing/selftests/rlimits/rlimits-per-userns.c
> +++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
> @@ -9,7 +9,9 @@
>   #include <sys/resource.h>
>   #include <sys/prctl.h>
>   #include <sys/stat.h>
> +#include <sys/socket.h>
>   
> +#include <assert.h>
>   #include <unistd.h>
>   #include <stdlib.h>
>   #include <stdio.h>
> @@ -21,38 +23,74 @@
>   #include <errno.h>
>   #include <err.h>
>   
> -#define NR_CHILDS 2
> +#define THE_LIMIT 4
> +#define NR_CHILDREN 5
> +
> +static_assert(NR_CHILDREN >= THE_LIMIT-1, "Need slots for limit-1 children.");
>   
>   static char *service_prog;
>   static uid_t user   = 60000;
>   static uid_t group  = 60000;
> +static struct rlimit saved_limit;
> +
> +/* Two uses: main and service */
> +static pid_t child[NR_CHILDREN];
> +static pid_t pid;
>   
>   static void setrlimit_nproc(rlim_t n)
>   {
> -	pid_t pid = getpid();
>   	struct rlimit limit = {
>   		.rlim_cur = n,
>   		.rlim_max = n
>   	};
> -
> -	warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
> +	if (getrlimit(RLIMIT_NPROC, &saved_limit) < 0)
> +		err(EXIT_FAILURE, "(pid=%d): getrlimit(RLIMIT_NPROC)", pid);
>   
>   	if (setrlimit(RLIMIT_NPROC, &limit) < 0)
>   		err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
> +
> +	warnx("(pid=%d): Set RLIMIT_NPROC=%ld", pid, n);
> +}
> +
> +static void restore_rlimit_nproc(void)
> +{
> +	if (setrlimit(RLIMIT_NPROC, &saved_limit) < 0)
> +		err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC, saved)", pid);
> +	warnx("(pid=%d) Restored RLIMIT_NPROC", pid);
>   }
>   
> -static pid_t fork_child(void)
> +enum msg_sync {
> +	UNSHARE,
> +	RLIMIT_RESTORE,
> +};
> +
> +static void sync_notify(int fd, enum msg_sync m)
>   {
> -	pid_t pid = fork();
> +	char tmp = m;
> +
> +	if (write(fd, &tmp, 1) < 0)
> +		warnx("(pid=%d): failed sync-write", pid);
> +}
>   
> -	if (pid < 0)
> +static void sync_wait(int fd, enum msg_sync m)
> +{
> +	char tmp;
> +
> +	if (read(fd, &tmp, 1) < 0)
> +		warnx("(pid=%d): failed sync-read", pid);
> +}
> +
> +static pid_t fork_child(int control_fd)
> +{
> +	pid_t new_pid = fork();
> +
> +	if (new_pid < 0)
>   		err(EXIT_FAILURE, "fork");
>   
> -	if (pid > 0)
> -		return pid;
> +	if (new_pid > 0)
> +		return new_pid;
>   
>   	pid = getpid();
> -
>   	warnx("(pid=%d): New process starting ...", pid);
>   
>   	if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
> @@ -73,6 +111,9 @@ static pid_t fork_child(void)
>   	if (unshare(CLONE_NEWUSER) < 0)
>   		err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
>   
> +	sync_notify(control_fd, UNSHARE);
> +	sync_wait(control_fd, RLIMIT_RESTORE);
> +
>   	char *const argv[] = { "service", NULL };
>   	char *const envp[] = { "I_AM_SERVICE=1", NULL };
>   
> @@ -82,37 +123,92 @@ static pid_t fork_child(void)
>   	err(EXIT_FAILURE, "(pid=%d): execve", pid);
>   }
>   
> +static void run_service(void)
> +{
> +	size_t i;
> +	int ret = EXIT_SUCCESS;
> +	struct rlimit limit;
> +	char user_ns[PATH_MAX];
> +
> +	if (getrlimit(RLIMIT_NPROC, &limit) < 0)
> +		err(EXIT_FAILURE, "(pid=%d) failed getrlimit", pid);
> +	if (readlink("/proc/self/ns/user", user_ns, PATH_MAX) < 0)
> +		err(EXIT_FAILURE, "(pid=%d) failed readlink", pid);
> +
> +	warnx("(pid=%d) Service instance attempts %i children, limit %lu:%lu, ns=%s",
> +	      pid, THE_LIMIT, limit.rlim_cur, limit.rlim_max, user_ns);
> +
> +	/* test rlimit inside the service, effectively THE_LIMIT-1 becaue of service itself */
> +	for (i = 0; i < THE_LIMIT; i++) {
> +		child[i] = fork();
> +		if (child[i] == 0) {
> +			/* service child */
> +			pause();
> +			exit(EXIT_SUCCESS);
> +		}
> +		if (child[i] < 0) {
> +			warnx("(pid=%d) service fork %lu failed, errno = %i", pid, i+1, errno);
> +			if (!(i == THE_LIMIT-1 && errno == EAGAIN))
> +				ret = EXIT_FAILURE;
> +		} else if (i == THE_LIMIT-1) {
> +			warnx("(pid=%d) RLIMIT_NPROC not honored", pid);
> +			ret = EXIT_FAILURE;
> +		}
> +	}
> +
> +	/* service cleanup */
> +	for (i = 0; i < THE_LIMIT; i++)
> +		if (child[i] > 0)
> +			kill(child[i], SIGUSR1);
> +
> +	for (i = 0; i < THE_LIMIT; i++)
> +		if (child[i] > 0)
> +			waitpid(child[i], NULL, WNOHANG);
> +
> +	if (ret)
> +		exit(ret);
> +	pause();
> +}
> +
>   int main(int argc, char **argv)
>   {
>   	size_t i;
> -	pid_t child[NR_CHILDS];
> -	int wstatus[NR_CHILDS];
> -	int childs = NR_CHILDS;
> -	pid_t pid;
> +	int control_fd[NR_CHILDREN];
> +	int wstatus[NR_CHILDREN];
> +	int children = NR_CHILDREN;
> +	int sockets[2];
> +
> +	pid = getpid();
>   
>   	if (getenv("I_AM_SERVICE")) {
> -		pause();
> -		exit(EXIT_SUCCESS);
> +		run_service();
> +		exit(EXIT_FAILURE);

Why is this a failure unconditionally?

>   	}
>   
>   	service_prog = argv[0];
> -	pid = getpid();
>   
>   	warnx("(pid=%d) Starting testcase", pid);
>   
> -	/*
> -	 * This rlimit is not a problem for root because it can be exceeded.
> -	 */
> -	setrlimit_nproc(1);
> -
> -	for (i = 0; i < NR_CHILDS; i++) {
> -		child[i] = fork_child();
> +	setrlimit_nproc(THE_LIMIT);
> +	for (i = 0; i < NR_CHILDREN; i++) {
> +		if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, sockets) < 0)
> +			err(EXIT_FAILURE, "(pid=%d) socketpair failed", pid);
> +		control_fd[i] = sockets[0];
> +		child[i] = fork_child(sockets[1]);
>   		wstatus[i] = 0;
> +	}
> +
> +	for (i = 0; i < NR_CHILDREN; i++)
> +		sync_wait(control_fd[i], UNSHARE);
> +	restore_rlimit_nproc();
> +
> +	for (i = 0; i < NR_CHILDREN; i++) {
> +		sync_notify(control_fd[i], RLIMIT_RESTORE);
>   		usleep(250000);

How long does this test now run for with this loop?

>   	}
>   
>   	while (1) {
> -		for (i = 0; i < NR_CHILDS; i++) {
> +		for (i = 0; i < NR_CHILDREN; i++) {
>   			if (child[i] <= 0)
>   				continue;
>   
> @@ -126,22 +222,22 @@ int main(int argc, char **argv)
>   				warn("(pid=%d): waitpid(%d)", pid, child[i]);
>   
>   			child[i] *= -1;
> -			childs -= 1;
> +			children -= 1;
>   		}
>   
> -		if (!childs)
> +		if (!children)
>   			break;
>   
>   		usleep(250000);
>   
> -		for (i = 0; i < NR_CHILDS; i++) {
> +		for (i = 0; i < NR_CHILDREN; i++) {
>   			if (child[i] <= 0)
>   				continue;
>   			kill(child[i], SIGUSR1);
>   		}
>   	}
>   
> -	for (i = 0; i < NR_CHILDS; i++) {
> +	for (i = 0; i < NR_CHILDREN; i++) {
>   		if (WIFEXITED(wstatus[i]))
>   			warnx("(pid=%d): pid %d exited, status=%d",
>   				pid, -child[i], WEXITSTATUS(wstatus[i]));
> 

Please a add few more comments in the code path.

thanks,
-- Shuah
Michal Koutný Feb. 15, 2022, 9:45 a.m. UTC | #2
On Wed, Feb 09, 2022 at 06:22:18PM -0700, Shuah Khan <skhan@linuxfoundation.org> wrote:
> Please see comments below. A bit of a concern with how long this test
> will run.  Did you time it?

It runs around 1 s, I didn't measure it and I used it manually only.

> How long does this test now run for with this loop?

I kept this sleep to space output from individual tasks for
better readability of output. It's not necessary for the sake of the
test. I'll remove it in next version.

> > -	for (i = 0; i < NR_CHILDS; i++) {
> > +	for (i = 0; i < NR_CHILDREN; i++) {
> >   		if (WIFEXITED(wstatus[i]))
> >   			warnx("(pid=%d): pid %d exited, status=%d",
> >   				pid, -child[i], WEXITSTATUS(wstatus[i]));
> > 
> 
> Please a add few more comments in the code path.

Hehe, this is inherited from the original version. (True, it's not
overly clear on its own.)

Michal
diff mbox series

Patch

diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
index 26dc949e93ea..54c1b345e42b 100644
--- a/tools/testing/selftests/rlimits/rlimits-per-userns.c
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -9,7 +9,9 @@ 
 #include <sys/resource.h>
 #include <sys/prctl.h>
 #include <sys/stat.h>
+#include <sys/socket.h>
 
+#include <assert.h>
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -21,38 +23,74 @@ 
 #include <errno.h>
 #include <err.h>
 
-#define NR_CHILDS 2
+#define THE_LIMIT 4
+#define NR_CHILDREN 5
+
+static_assert(NR_CHILDREN >= THE_LIMIT-1, "Need slots for limit-1 children.");
 
 static char *service_prog;
 static uid_t user   = 60000;
 static uid_t group  = 60000;
+static struct rlimit saved_limit;
+
+/* Two uses: main and service */
+static pid_t child[NR_CHILDREN];
+static pid_t pid;
 
 static void setrlimit_nproc(rlim_t n)
 {
-	pid_t pid = getpid();
 	struct rlimit limit = {
 		.rlim_cur = n,
 		.rlim_max = n
 	};
-
-	warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+	if (getrlimit(RLIMIT_NPROC, &saved_limit) < 0)
+		err(EXIT_FAILURE, "(pid=%d): getrlimit(RLIMIT_NPROC)", pid);
 
 	if (setrlimit(RLIMIT_NPROC, &limit) < 0)
 		err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+
+	warnx("(pid=%d): Set RLIMIT_NPROC=%ld", pid, n);
+}
+
+static void restore_rlimit_nproc(void)
+{
+	if (setrlimit(RLIMIT_NPROC, &saved_limit) < 0)
+		err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC, saved)", pid);
+	warnx("(pid=%d) Restored RLIMIT_NPROC", pid);
 }
 
-static pid_t fork_child(void)
+enum msg_sync {
+	UNSHARE,
+	RLIMIT_RESTORE,
+};
+
+static void sync_notify(int fd, enum msg_sync m)
 {
-	pid_t pid = fork();
+	char tmp = m;
+
+	if (write(fd, &tmp, 1) < 0)
+		warnx("(pid=%d): failed sync-write", pid);
+}
 
-	if (pid < 0)
+static void sync_wait(int fd, enum msg_sync m)
+{
+	char tmp;
+
+	if (read(fd, &tmp, 1) < 0)
+		warnx("(pid=%d): failed sync-read", pid);
+}
+
+static pid_t fork_child(int control_fd)
+{
+	pid_t new_pid = fork();
+
+	if (new_pid < 0)
 		err(EXIT_FAILURE, "fork");
 
-	if (pid > 0)
-		return pid;
+	if (new_pid > 0)
+		return new_pid;
 
 	pid = getpid();
-
 	warnx("(pid=%d): New process starting ...", pid);
 
 	if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
@@ -73,6 +111,9 @@  static pid_t fork_child(void)
 	if (unshare(CLONE_NEWUSER) < 0)
 		err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
 
+	sync_notify(control_fd, UNSHARE);
+	sync_wait(control_fd, RLIMIT_RESTORE);
+
 	char *const argv[] = { "service", NULL };
 	char *const envp[] = { "I_AM_SERVICE=1", NULL };
 
@@ -82,37 +123,92 @@  static pid_t fork_child(void)
 	err(EXIT_FAILURE, "(pid=%d): execve", pid);
 }
 
+static void run_service(void)
+{
+	size_t i;
+	int ret = EXIT_SUCCESS;
+	struct rlimit limit;
+	char user_ns[PATH_MAX];
+
+	if (getrlimit(RLIMIT_NPROC, &limit) < 0)
+		err(EXIT_FAILURE, "(pid=%d) failed getrlimit", pid);
+	if (readlink("/proc/self/ns/user", user_ns, PATH_MAX) < 0)
+		err(EXIT_FAILURE, "(pid=%d) failed readlink", pid);
+
+	warnx("(pid=%d) Service instance attempts %i children, limit %lu:%lu, ns=%s",
+	      pid, THE_LIMIT, limit.rlim_cur, limit.rlim_max, user_ns);
+
+	/* test rlimit inside the service, effectively THE_LIMIT-1 becaue of service itself */
+	for (i = 0; i < THE_LIMIT; i++) {
+		child[i] = fork();
+		if (child[i] == 0) {
+			/* service child */
+			pause();
+			exit(EXIT_SUCCESS);
+		}
+		if (child[i] < 0) {
+			warnx("(pid=%d) service fork %lu failed, errno = %i", pid, i+1, errno);
+			if (!(i == THE_LIMIT-1 && errno == EAGAIN))
+				ret = EXIT_FAILURE;
+		} else if (i == THE_LIMIT-1) {
+			warnx("(pid=%d) RLIMIT_NPROC not honored", pid);
+			ret = EXIT_FAILURE;
+		}
+	}
+
+	/* service cleanup */
+	for (i = 0; i < THE_LIMIT; i++)
+		if (child[i] > 0)
+			kill(child[i], SIGUSR1);
+
+	for (i = 0; i < THE_LIMIT; i++)
+		if (child[i] > 0)
+			waitpid(child[i], NULL, WNOHANG);
+
+	if (ret)
+		exit(ret);
+	pause();
+}
+
 int main(int argc, char **argv)
 {
 	size_t i;
-	pid_t child[NR_CHILDS];
-	int wstatus[NR_CHILDS];
-	int childs = NR_CHILDS;
-	pid_t pid;
+	int control_fd[NR_CHILDREN];
+	int wstatus[NR_CHILDREN];
+	int children = NR_CHILDREN;
+	int sockets[2];
+
+	pid = getpid();
 
 	if (getenv("I_AM_SERVICE")) {
-		pause();
-		exit(EXIT_SUCCESS);
+		run_service();
+		exit(EXIT_FAILURE);
 	}
 
 	service_prog = argv[0];
-	pid = getpid();
 
 	warnx("(pid=%d) Starting testcase", pid);
 
-	/*
-	 * This rlimit is not a problem for root because it can be exceeded.
-	 */
-	setrlimit_nproc(1);
-
-	for (i = 0; i < NR_CHILDS; i++) {
-		child[i] = fork_child();
+	setrlimit_nproc(THE_LIMIT);
+	for (i = 0; i < NR_CHILDREN; i++) {
+		if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, sockets) < 0)
+			err(EXIT_FAILURE, "(pid=%d) socketpair failed", pid);
+		control_fd[i] = sockets[0];
+		child[i] = fork_child(sockets[1]);
 		wstatus[i] = 0;
+	}
+
+	for (i = 0; i < NR_CHILDREN; i++)
+		sync_wait(control_fd[i], UNSHARE);
+	restore_rlimit_nproc();
+
+	for (i = 0; i < NR_CHILDREN; i++) {
+		sync_notify(control_fd[i], RLIMIT_RESTORE);
 		usleep(250000);
 	}
 
 	while (1) {
-		for (i = 0; i < NR_CHILDS; i++) {
+		for (i = 0; i < NR_CHILDREN; i++) {
 			if (child[i] <= 0)
 				continue;
 
@@ -126,22 +222,22 @@  int main(int argc, char **argv)
 				warn("(pid=%d): waitpid(%d)", pid, child[i]);
 
 			child[i] *= -1;
-			childs -= 1;
+			children -= 1;
 		}
 
-		if (!childs)
+		if (!children)
 			break;
 
 		usleep(250000);
 
-		for (i = 0; i < NR_CHILDS; i++) {
+		for (i = 0; i < NR_CHILDREN; i++) {
 			if (child[i] <= 0)
 				continue;
 			kill(child[i], SIGUSR1);
 		}
 	}
 
-	for (i = 0; i < NR_CHILDS; i++) {
+	for (i = 0; i < NR_CHILDREN; i++) {
 		if (WIFEXITED(wstatus[i]))
 			warnx("(pid=%d): pid %d exited, status=%d",
 				pid, -child[i], WEXITSTATUS(wstatus[i]));