@@ -75,6 +75,7 @@ extern const struct file_operations pidfd_fops;
struct file;
extern struct pid *pidfd_pid(const struct file *file);
+extern struct pid *pidfd_get_pid(unsigned int fd);
static inline struct pid *get_pid(struct pid *pid)
{
@@ -877,7 +877,8 @@ asmlinkage long sys_munlockall(void);
asmlinkage long sys_mincore(unsigned long start, size_t len,
unsigned char __user * vec);
asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
-asmlinkage long sys_process_madvise(int pidfd, unsigned long start,
+
+asmlinkage long sys_process_madvise(int which, pid_t pid, unsigned long start,
size_t len, int behavior, unsigned long flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
@@ -1466,23 +1466,6 @@ static long do_wait(struct wait_opts *wo)
return retval;
}
-static struct pid *pidfd_get_pid(unsigned int fd)
-{
- struct fd f;
- struct pid *pid;
-
- f = fdget(fd);
- if (!f.file)
- return ERR_PTR(-EBADF);
-
- pid = pidfd_pid(f.file);
- if (!IS_ERR(pid))
- get_pid(pid);
-
- fdput(f);
- return pid;
-}
-
static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop,
int options, struct rusage *ru)
{
@@ -496,6 +496,23 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
return idr_get_next(&ns->idr, &nr);
}
+struct pid *pidfd_get_pid(unsigned int fd)
+{
+ struct fd f;
+ struct pid *pid;
+
+ f = fdget(fd);
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ pid = pidfd_pid(f.file);
+ if (!IS_ERR(pid))
+ get_pid(pid);
+
+ fdput(f);
+ return pid;
+}
+
/**
* pidfd_create() - Create a new pid file descriptor.
*
@@ -1192,11 +1192,10 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
return madvise_common(current, current->mm, start, len_in, behavior);
}
-SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
+SYSCALL_DEFINE6(process_madvise, int, which, pid_t, upid, unsigned long, start,
size_t, len_in, int, behavior, unsigned long, flags)
{
int ret;
- struct fd f;
struct pid *pid;
struct task_struct *task;
struct mm_struct *mm;
@@ -1207,20 +1206,31 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
if (!process_madvise_behavior_valid(behavior))
return -EINVAL;
- f = fdget(pidfd);
- if (!f.file)
- return -EBADF;
+ switch (which) {
+ case P_PID:
+ if (upid <= 0)
+ return -EINVAL;
+
+ pid = find_get_pid(upid);
+ if (!pid)
+ return -ESRCH;
+ break;
+ case P_PIDFD:
+ if (upid < 0)
+ return -EINVAL;
- pid = pidfd_pid(f.file);
- if (IS_ERR(pid)) {
- ret = PTR_ERR(pid);
- goto fdput;
+ pid = pidfd_get_pid(upid);
+ if (IS_ERR(pid))
+ return PTR_ERR(pid);
+ break;
+ default:
+ return -EINVAL;
}
task = get_pid_task(pid, PIDTYPE_PID);
if (!task) {
ret = -ESRCH;
- goto fdput;
+ goto put_pid;
}
mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
@@ -1233,7 +1243,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, unsigned long, start,
mmput(mm);
release_task:
put_task_struct(task);
-fdput:
- fdput(f);
+put_pid:
+ put_pid(pid);
return ret;
}
There is a demand[1] to support pid as well pidfd for process_madvise to reduce unncessary syscall to get pidfd if the user has control of the targer process(ie, they could gaurantee the process is not gone or pid is not reused. Or, it might be okay to give a hint to wrong process). This patch aims for supporting both options like waitid(2). So, the syscall is currently, int process_madvise(int which, pid_t pid, void *addr, size_t length, int advise, unsigned long flag); @which is actually idtype_t for userspace libray and currently, it supports P_PID and P_PIDFD. [1] https://lore.kernel.org/linux-mm/9d849087-3359-c4ab-fbec-859e8186c509@virtuozzo.com/ Signed-off-by: Minchan Kim <minchan@kernel.org> --- include/linux/pid.h | 1 + include/linux/syscalls.h | 3 ++- kernel/exit.c | 17 ----------------- kernel/pid.c | 17 +++++++++++++++++ mm/madvise.c | 34 ++++++++++++++++++++++------------ 5 files changed, 42 insertions(+), 30 deletions(-)