Message ID | 20201120231441.29911-16-ebiederm@xmission.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2,01/24] exec: Move unshare_files to fix posix file locking during exec | expand |
On 11/20/20 3:14 PM, Eric W. Biederman wrote: > When discussing[1] exec and posix file locks it was realized that none > of the callers of get_files_struct fundamentally needed to call > get_files_struct, and that by switching them to helper functions > instead it will both simplify their code and remove unnecessary > increments of files_struct.count. Those unnecessary increments can > result in exec unnecessarily unsharing files_struct which breaking > posix locks, and it can result in fget_light having to fallback to > fget reducing system performance. > > Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by > moving the checking for the maximum file descritor into the generic > code, and by remvoing the need for capturing and releasing a reference > on files_struct. As the reference count of files_struct no longer > needs to be maintained bpf_iter_seq_task_file_info can have it's files > member removed and task_file_seq_get_next no longer needs it's fstruct > argument. > > The curr_fd local variable does need to become unsigned to be used > with fnext_task. As curr_fd is assigned from and assigned a u32 > making curr_fd an unsigned int won't cause problems and might prevent > them. > > [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com > Suggested-by: Oleg Nesterov <oleg@redhat.com> > v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com > Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> > --- > kernel/bpf/task_iter.c | 44 ++++++++++-------------------------------- > 1 file changed, 10 insertions(+), 34 deletions(-) Just a heads-up. The following patch bpf-next: 91b2db27d3ff ("bpf: Simplify task_file_seq_get_next()") https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/commit/?id=91b2db27d3ff9ad29e8b3108dfbf1e2f49fe9bd3 has been merged in bpf-next tree. It will have merge conflicts with this patch. The above patch is a refactoring for simplification with no functionality change. > > diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > index 5ab2ccfb96cb..4ec63170c741 100644 > --- a/kernel/bpf/task_iter.c > +++ b/kernel/bpf/task_iter.c > @@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info { > */ > struct bpf_iter_seq_task_common common; > struct task_struct *task; > - struct files_struct *files; > u32 tid; > u32 fd; > }; > > static struct file * > task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, > - struct task_struct **task, struct files_struct **fstruct) > + struct task_struct **task) > { > struct pid_namespace *ns = info->common.ns; > - u32 curr_tid = info->tid, max_fds; > - struct files_struct *curr_files; > + u32 curr_tid = info->tid; > struct task_struct *curr_task; > - int curr_fd = info->fd; > + unsigned int curr_fd = info->fd; > > /* If this function returns a non-NULL file object, > - * it held a reference to the task/files_struct/file. > + * it held a reference to the task/file. > * Otherwise, it does not hold any reference. > */ > again: > if (*task) { > curr_task = *task; > - curr_files = *fstruct; > curr_fd = info->fd; > } else { > curr_task = task_seq_get_next(ns, &curr_tid, true); > if (!curr_task) > return NULL; > > - curr_files = get_files_struct(curr_task); > - if (!curr_files) { > - put_task_struct(curr_task); > - curr_tid = ++(info->tid); > - info->fd = 0; > - goto again; > - } > - > - /* set *fstruct, *task and info->tid */ > - *fstruct = curr_files; > + /* set *task and info->tid */ > *task = curr_task; > if (curr_tid == info->tid) { > curr_fd = info->fd; > @@ -179,13 +167,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, > } > > rcu_read_lock(); > - max_fds = files_fdtable(curr_files)->max_fds; > - for (; curr_fd < max_fds; curr_fd++) { > + for (;; curr_fd++) { > struct file *f; > - > - f = files_lookup_fd_rcu(curr_files, curr_fd); > + f = task_lookup_next_fd_rcu(curr_task, &curr_fd); > if (!f) > - continue; > + break; > if (!get_file_rcu(f)) > continue; > > @@ -197,10 +183,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, > > /* the current task is done, go to the next task */ > rcu_read_unlock(); > - put_files_struct(curr_files); > put_task_struct(curr_task); > *task = NULL; > - *fstruct = NULL; > info->fd = 0; > curr_tid = ++(info->tid); > goto again; > @@ -209,13 +193,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, > static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) > { > struct bpf_iter_seq_task_file_info *info = seq->private; > - struct files_struct *files = NULL; > struct task_struct *task = NULL; > struct file *file; > > - file = task_file_seq_get_next(info, &task, &files); > + file = task_file_seq_get_next(info, &task); > if (!file) { > - info->files = NULL; > info->task = NULL; > return NULL; > } > @@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) > if (*pos == 0) > ++*pos; > info->task = task; > - info->files = files; > > return file; > } > @@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) > static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) > { > struct bpf_iter_seq_task_file_info *info = seq->private; > - struct files_struct *files = info->files; > struct task_struct *task = info->task; > struct file *file; > > ++*pos; > ++info->fd; > fput((struct file *)v); > - file = task_file_seq_get_next(info, &task, &files); > + file = task_file_seq_get_next(info, &task); > if (!file) { > - info->files = NULL; > info->task = NULL; > return NULL; > } > > info->task = task; > - info->files = files; > > return file; > } > @@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) > (void)__task_file_seq_show(seq, v, true); > } else { > fput((struct file *)v); > - put_files_struct(info->files); > put_task_struct(info->task); > - info->files = NULL; > info->task = NULL; > } > } >
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 5ab2ccfb96cb..4ec63170c741 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -130,45 +130,33 @@ struct bpf_iter_seq_task_file_info { */ struct bpf_iter_seq_task_common common; struct task_struct *task; - struct files_struct *files; u32 tid; u32 fd; }; static struct file * task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, - struct task_struct **task, struct files_struct **fstruct) + struct task_struct **task) { struct pid_namespace *ns = info->common.ns; - u32 curr_tid = info->tid, max_fds; - struct files_struct *curr_files; + u32 curr_tid = info->tid; struct task_struct *curr_task; - int curr_fd = info->fd; + unsigned int curr_fd = info->fd; /* If this function returns a non-NULL file object, - * it held a reference to the task/files_struct/file. + * it held a reference to the task/file. * Otherwise, it does not hold any reference. */ again: if (*task) { curr_task = *task; - curr_files = *fstruct; curr_fd = info->fd; } else { curr_task = task_seq_get_next(ns, &curr_tid, true); if (!curr_task) return NULL; - curr_files = get_files_struct(curr_task); - if (!curr_files) { - put_task_struct(curr_task); - curr_tid = ++(info->tid); - info->fd = 0; - goto again; - } - - /* set *fstruct, *task and info->tid */ - *fstruct = curr_files; + /* set *task and info->tid */ *task = curr_task; if (curr_tid == info->tid) { curr_fd = info->fd; @@ -179,13 +167,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, } rcu_read_lock(); - max_fds = files_fdtable(curr_files)->max_fds; - for (; curr_fd < max_fds; curr_fd++) { + for (;; curr_fd++) { struct file *f; - - f = files_lookup_fd_rcu(curr_files, curr_fd); + f = task_lookup_next_fd_rcu(curr_task, &curr_fd); if (!f) - continue; + break; if (!get_file_rcu(f)) continue; @@ -197,10 +183,8 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, /* the current task is done, go to the next task */ rcu_read_unlock(); - put_files_struct(curr_files); put_task_struct(curr_task); *task = NULL; - *fstruct = NULL; info->fd = 0; curr_tid = ++(info->tid); goto again; @@ -209,13 +193,11 @@ task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info, static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) { struct bpf_iter_seq_task_file_info *info = seq->private; - struct files_struct *files = NULL; struct task_struct *task = NULL; struct file *file; - file = task_file_seq_get_next(info, &task, &files); + file = task_file_seq_get_next(info, &task); if (!file) { - info->files = NULL; info->task = NULL; return NULL; } @@ -223,7 +205,6 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) if (*pos == 0) ++*pos; info->task = task; - info->files = files; return file; } @@ -231,22 +212,19 @@ static void *task_file_seq_start(struct seq_file *seq, loff_t *pos) static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bpf_iter_seq_task_file_info *info = seq->private; - struct files_struct *files = info->files; struct task_struct *task = info->task; struct file *file; ++*pos; ++info->fd; fput((struct file *)v); - file = task_file_seq_get_next(info, &task, &files); + file = task_file_seq_get_next(info, &task); if (!file) { - info->files = NULL; info->task = NULL; return NULL; } info->task = task; - info->files = files; return file; } @@ -295,9 +273,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) (void)__task_file_seq_show(seq, v, true); } else { fput((struct file *)v); - put_files_struct(info->files); put_task_struct(info->task); - info->files = NULL; info->task = NULL; } }
When discussing[1] exec and posix file locks it was realized that none of the callers of get_files_struct fundamentally needed to call get_files_struct, and that by switching them to helper functions instead it will both simplify their code and remove unnecessary increments of files_struct.count. Those unnecessary increments can result in exec unnecessarily unsharing files_struct which breaking posix locks, and it can result in fget_light having to fallback to fget reducing system performance. Using task_lookup_next_fd_rcu simplifies task_file_seq_get_next, by moving the checking for the maximum file descritor into the generic code, and by remvoing the need for capturing and releasing a reference on files_struct. As the reference count of files_struct no longer needs to be maintained bpf_iter_seq_task_file_info can have it's files member removed and task_file_seq_get_next no longer needs it's fstruct argument. The curr_fd local variable does need to become unsigned to be used with fnext_task. As curr_fd is assigned from and assigned a u32 making curr_fd an unsigned int won't cause problems and might prevent them. [1] https://lkml.kernel.org/r/20180915160423.GA31461@redhat.com Suggested-by: Oleg Nesterov <oleg@redhat.com> v1: https://lkml.kernel.org/r/20200817220425.9389-11-ebiederm@xmission.com Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- kernel/bpf/task_iter.c | 44 ++++++++++-------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-)