diff mbox

[11/40] ipv6/flowlabel: simplify pid namespace lookup

Message ID 20180425154827.32251-12-hch@lst.de (mailing list archive)
State Not Applicable, archived
Headers show

Commit Message

Christoph Hellwig April 25, 2018, 3:47 p.m. UTC
The shole seq_file sequence already operates under a single RCU lock pair,
so move the pid namespace lookup into it, and stop grabbing a reference
and remove all kinds of boilerplate code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 net/ipv6/ip6_flowlabel.c | 28 +++++-----------------------
 1 file changed, 5 insertions(+), 23 deletions(-)

Comments

Eric W. Biederman May 5, 2018, 12:37 p.m. UTC | #1
Christoph Hellwig <hch@lst.de> writes:

> The shole seq_file sequence already operates under a single RCU lock pair,
> so move the pid namespace lookup into it, and stop grabbing a reference
> and remove all kinds of boilerplate code.

This is wrong.

Move task_active_pid_ns(current) from open to seq_start actually means
that the results if you pass this proc file between callers the results
will change.  So this breaks file descriptor passing.

Open is a bad place to access current.  In the middle of read/write is
broken.


In this particular instance looking up the pid namespace with
task_active_pid_ns was a personal brain fart.  What the code should be
doing (with an appropriate helper) is:

struct pid_namespace *pid_ns = inode->i_sb->s_fs_info;

Because each mount of proc is bound to a pid namespace.  Looking up the
pid namespace from the super_block is a much better way to go.

Eric



> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  net/ipv6/ip6_flowlabel.c | 28 +++++-----------------------
>  1 file changed, 5 insertions(+), 23 deletions(-)
>
> diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
> index c05c4e82a7ca..a9f221d45ef9 100644
> --- a/net/ipv6/ip6_flowlabel.c
> +++ b/net/ipv6/ip6_flowlabel.c
> @@ -754,7 +754,10 @@ static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
>  static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
>  	__acquires(RCU)
>  {
> +	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
> +
>  	rcu_read_lock_bh();
> +	state->pid_ns = task_active_pid_ns(current);
>  	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
>  }
>  
> @@ -810,36 +813,15 @@ static const struct seq_operations ip6fl_seq_ops = {
>  
>  static int ip6fl_seq_open(struct inode *inode, struct file *file)
>  {
> -	struct seq_file *seq;
> -	struct ip6fl_iter_state *state;
> -	int err;
> -
> -	err = seq_open_net(inode, file, &ip6fl_seq_ops,
> +	return seq_open_net(inode, file, &ip6fl_seq_ops,
>  			   sizeof(struct ip6fl_iter_state));
> -
> -	if (!err) {
> -		seq = file->private_data;
> -		state = ip6fl_seq_private(seq);
> -		rcu_read_lock();
> -		state->pid_ns = get_pid_ns(task_active_pid_ns(current));
> -		rcu_read_unlock();
> -	}
> -	return err;
> -}
> -
> -static int ip6fl_seq_release(struct inode *inode, struct file *file)
> -{
> -	struct seq_file *seq = file->private_data;
> -	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
> -	put_pid_ns(state->pid_ns);
> -	return seq_release_net(inode, file);
>  }
>  
>  static const struct file_operations ip6fl_seq_fops = {
>  	.open		=	ip6fl_seq_open,
>  	.read		=	seq_read,
>  	.llseek		=	seq_lseek,
> -	.release	=	ip6fl_seq_release,
> +	.release	=	seq_release_net,
>  };
>  
>  static int __net_init ip6_flowlabel_proc_init(struct net *net)
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 15, 2018, 2:56 p.m. UTC | #2
On Sat, May 05, 2018 at 07:37:33AM -0500, Eric W. Biederman wrote:
> Christoph Hellwig <hch@lst.de> writes:
> 
> > The shole seq_file sequence already operates under a single RCU lock pair,
> > so move the pid namespace lookup into it, and stop grabbing a reference
> > and remove all kinds of boilerplate code.
> 
> This is wrong.
> 
> Move task_active_pid_ns(current) from open to seq_start actually means
> that the results if you pass this proc file between callers the results
> will change.  So this breaks file descriptor passing.
> 
> Open is a bad place to access current.  In the middle of read/write is
> broken.
> 
> 
> In this particular instance looking up the pid namespace with
> task_active_pid_ns was a personal brain fart.  What the code should be
> doing (with an appropriate helper) is:
> 
> struct pid_namespace *pid_ns = inode->i_sb->s_fs_info;
> 
> Because each mount of proc is bound to a pid namespace.  Looking up the
> pid namespace from the super_block is a much better way to go.

What do you have in mind for the helper?  For now I've thrown it in
opencoded into my working tree, but I'd be glad to add a helper.

struct pid_namespace *proc_pid_namespace(struct inode *inode)
{
	// maybe warn on for s_magic not on procfs??
	return inode->i_sb->s_fs_info;
}

?
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman May 17, 2018, 5:28 a.m. UTC | #3
Christoph Hellwig <hch@lst.de> writes:

> On Sat, May 05, 2018 at 07:37:33AM -0500, Eric W. Biederman wrote:
>> Christoph Hellwig <hch@lst.de> writes:
>> 
>> > The shole seq_file sequence already operates under a single RCU lock pair,
>> > so move the pid namespace lookup into it, and stop grabbing a reference
>> > and remove all kinds of boilerplate code.
>> 
>> This is wrong.
>> 
>> Move task_active_pid_ns(current) from open to seq_start actually means
>> that the results if you pass this proc file between callers the results
>> will change.  So this breaks file descriptor passing.
>> 
>> Open is a bad place to access current.  In the middle of read/write is
>> broken.
>> 
>> 
>> In this particular instance looking up the pid namespace with
>> task_active_pid_ns was a personal brain fart.  What the code should be
>> doing (with an appropriate helper) is:
>> 
>> struct pid_namespace *pid_ns = inode->i_sb->s_fs_info;
>> 
>> Because each mount of proc is bound to a pid namespace.  Looking up the
>> pid namespace from the super_block is a much better way to go.
>
> What do you have in mind for the helper?  For now I've thrown it in
> opencoded into my working tree, but I'd be glad to add a helper.
>
> struct pid_namespace *proc_pid_namespace(struct inode *inode)
> {
> 	// maybe warn on for s_magic not on procfs??
> 	return inode->i_sb->s_fs_info;
> }

That should work.  Ideally out of line for the proc_fs.h version.
Basically it should be a cousin of PDE_DATA.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Christoph Hellwig May 17, 2018, 6:42 a.m. UTC | #4
On Thu, May 17, 2018 at 12:28:01AM -0500, Eric W. Biederman wrote:
> > struct pid_namespace *proc_pid_namespace(struct inode *inode)
> > {
> > 	// maybe warn on for s_magic not on procfs??
> > 	return inode->i_sb->s_fs_info;
> > }
> 
> That should work.  Ideally out of line for the proc_fs.h version.
> Basically it should be a cousin of PDE_DATA.

The version in Al's tree is inline and without the warning as
I didn't want to drag in the magic.h include.  Please look at it for
additional comments, I can send incremental fixups if needed.
--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric W. Biederman May 17, 2018, 7:14 p.m. UTC | #5
Christoph Hellwig <hch@lst.de> writes:

> On Thu, May 17, 2018 at 12:28:01AM -0500, Eric W. Biederman wrote:
>> > struct pid_namespace *proc_pid_namespace(struct inode *inode)
>> > {
>> > 	// maybe warn on for s_magic not on procfs??
>> > 	return inode->i_sb->s_fs_info;
>> > }
>> 
>> That should work.  Ideally out of line for the proc_fs.h version.
>> Basically it should be a cousin of PDE_DATA.
>
> The version in Al's tree is inline and without the warning as
> I didn't want to drag in the magic.h include.  Please look at it for
> additional comments, I can send incremental fixups if needed.

Sounds good.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index c05c4e82a7ca..a9f221d45ef9 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -754,7 +754,10 @@  static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
 static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(RCU)
 {
+	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
+
 	rcu_read_lock_bh();
+	state->pid_ns = task_active_pid_ns(current);
 	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
 }
 
@@ -810,36 +813,15 @@  static const struct seq_operations ip6fl_seq_ops = {
 
 static int ip6fl_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	struct ip6fl_iter_state *state;
-	int err;
-
-	err = seq_open_net(inode, file, &ip6fl_seq_ops,
+	return seq_open_net(inode, file, &ip6fl_seq_ops,
 			   sizeof(struct ip6fl_iter_state));
-
-	if (!err) {
-		seq = file->private_data;
-		state = ip6fl_seq_private(seq);
-		rcu_read_lock();
-		state->pid_ns = get_pid_ns(task_active_pid_ns(current));
-		rcu_read_unlock();
-	}
-	return err;
-}
-
-static int ip6fl_seq_release(struct inode *inode, struct file *file)
-{
-	struct seq_file *seq = file->private_data;
-	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
-	put_pid_ns(state->pid_ns);
-	return seq_release_net(inode, file);
 }
 
 static const struct file_operations ip6fl_seq_fops = {
 	.open		=	ip6fl_seq_open,
 	.read		=	seq_read,
 	.llseek		=	seq_lseek,
-	.release	=	ip6fl_seq_release,
+	.release	=	seq_release_net,
 };
 
 static int __net_init ip6_flowlabel_proc_init(struct net *net)