[DRAFT,RFC] : file: reclaim 24 bytes from f_owner

Message ID	20240809-koriander-biobauer-6237cbc106f3@brauner (mailing list archive)
State	New
Headers	show Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 45141161328 for <linux-fsdevel@vger.kernel.org>; Fri, 9 Aug 2024 20:11:03 +0000 (UTC) From: Christian Brauner <brauner@kernel.org> To: Linus Torvalds <torvalds@linux-foundation.org> Cc: Christian Brauner <brauner@kernel.org>, Al Viro <viro@zeniv.linux.org.uk>, Jeff Layton <jlayton@kernel.org>, Jan Kara <jack@suse.com>, Josef Bacik <josef@toxicpanda.com>, linux-fsdevel@vger.kernel.org Subject: [PATCH] [DRAFT RFC]: file: reclaim 24 bytes from f_owner Date: Fri, 9 Aug 2024 22:10:40 +0200 Message-ID: <20240809-koriander-biobauer-6237cbc106f3@brauner> Precedence: bulk MIME-Version: 1.0 Content-Transfer-Encoding: 8bit
Series	[DRAFT,RFC] : file: reclaim 24 bytes from f_owner \| expand [DRAFT,RFC] : file: reclaim 24 bytes from f_owner

diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1d06c560c5e6..1aab0bde1532 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3467,8 +3467,13 @@ static int tun_chr_fasync(int fd, struct file *file, int on) static int tun_chr_open(struct inode *inode, struct file * file) { struct net *net = current->nsproxy->net_ns; + struct fown_struct *f_owner = __free(kfree) = NULL; struct tun_file *tfile; + f_owner = file_f_owner_allocate(file); + if (IS_ERR(fowner)) + return PTR_ERR(fowner); + tfile = (struct tun_file *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto, 0); if (!tfile) @@ -3500,6 +3505,7 @@ static int tun_chr_open(struct inode *inode, struct file * file) /* tun groks IOCB_NOWAIT just fine, mark it as such */ file->f_mode |= FMODE_NOWAIT; + f_owner = NULL; return 0; } diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 407b0d87b7c1..c6ac57d289b3 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -2119,12 +2119,17 @@ static struct tty_struct *tty_open_by_driver(dev_t device, static int tty_open(struct inode *inode, struct file *filp) { struct tty_struct *tty; + struct fown_struct *f_owner __free(kfree) = NULL; int noctty, retval; dev_t device = inode->i_rdev; unsigned saved_flags = filp->f_flags; nonseekable_open(inode, filp); + f_owner = file_f_owner_allocate(filp); + if (IS_ERR(f_owner)) + return PTR_ERR(f_owner); + retry_open: retval = tty_alloc_file(filp); if (retval) @@ -2183,6 +2188,7 @@ static int tty_open(struct inode *inode, struct file *filp) if (!noctty) tty_open_proc_set_tty(filp, tty); tty_unlock(tty); + f_owner = NULL; return 0; } diff --git a/fs/fcntl.c b/fs/fcntl.c index 300e5d9ad913..01bc626c881e 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -87,22 +87,63 @@ static int setfl(int fd, struct file * filp, unsigned int arg) return error; } +struct fown_struct nop_fown_struct = { + .lock = __RW_LOCK_UNLOCKED(nop_fown_struct.lock), + .pid = NULL, + .pid_type = PIDTYPE_MAX, + .uid = INVALID_UID, + .euid = INVALID_UID, + .signum = 0, +}; + +/* + * Allocate an file->f_owner struct if it doesn't exist, handling racing + * allocations correctly. + */ +struct fown_struct *file_f_owner_allocate(struct file *file) +{ + struct fown_struct *f_owner; + + f_owner = smp_load_acquire(&file->f_owner); + if (f_owner != &nop_fown_struct) + return NULL; + + f_owner = kzalloc(sizeof(struct fown_struct), GFP_KERNEL); + if (!f_owner) + return ERR_PTR(-ENOMEM); + + rwlock_init(&f_owner->lock); + f_owner->file = file; + /* If someone else raced us, drop our allocation. */ + if (unlikely(cmpxchg(&file->f_owner, &nop_fown_struct, f_owner) != + &nop_fown_struct)) { + kfree(f_owner); + return NULL; + } + + return f_owner; +} + static void f_modown(struct file *filp, struct pid *pid, enum pid_type type, int force) { - write_lock_irq(&filp->f_owner.lock); - if (force || !filp->f_owner.pid) { - put_pid(filp->f_owner.pid); - filp->f_owner.pid = get_pid(pid); - filp->f_owner.pid_type = type; + struct fown_struct *f_owner = smp_load_acquire(&filp->f_owner); + if (WARN_ON_ONCE(f_owner == &nop_fown_struct)) + return; + + write_lock_irq(&f_owner->lock); + if (force || !f_owner->pid) { + put_pid(f_owner->pid); + f_owner->pid = get_pid(pid); + f_owner->pid_type = type; if (pid) { const struct cred *cred = current_cred(); - filp->f_owner.uid = cred->uid; - filp->f_owner.euid = cred->euid; + f_owner->uid = cred->uid; + f_owner->euid = cred->euid; } } - write_unlock_irq(&filp->f_owner.lock); + write_unlock_irq(&f_owner->lock); } void __f_setown(struct file *filp, struct pid *pid, enum pid_type type, @@ -119,6 +160,8 @@ int f_setown(struct file *filp, int who, int force) struct pid *pid = NULL; int ret = 0; + might_sleep(); + type = PIDTYPE_TGID; if (who < 0) { /* avoid overflow below */ @@ -129,6 +172,9 @@ int f_setown(struct file *filp, int who, int force) who = -who; } + if (IS_ERR(file_f_owner_allocate(filp))) + return -ENOMEM; + rcu_read_lock(); if (who) { pid = find_vpid(who); @@ -152,16 +198,20 @@ void f_delown(struct file *filp) pid_t f_getown(struct file *filp) { pid_t pid = 0; + struct fown_struct *f_owner = smp_load_acquire(&filp->f_owner); + + if (f_owner == &nop_fown_struct) + return pid; - read_lock_irq(&filp->f_owner.lock); + read_lock_irq(&f_owner->lock); rcu_read_lock(); - if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) { - pid = pid_vnr(filp->f_owner.pid); - if (filp->f_owner.pid_type == PIDTYPE_PGID) + if (pid_task(f_owner->pid, f_owner->pid_type)) { + pid = pid_vnr(f_owner->pid); + if (f_owner->pid_type == PIDTYPE_PGID) pid = -pid; } rcu_read_unlock(); - read_unlock_irq(&filp->f_owner.lock); + read_unlock_irq(&f_owner->lock); return pid; } @@ -210,13 +260,20 @@ static int f_getown_ex(struct file *filp, unsigned long arg) struct f_owner_ex __user *owner_p = (void __user *)arg; struct f_owner_ex owner = {}; int ret = 0; + struct fown_struct *f_owner; + enum pid_type pid_type = PIDTYPE_PID; - read_lock_irq(&filp->f_owner.lock); - rcu_read_lock(); - if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) - owner.pid = pid_vnr(filp->f_owner.pid); - rcu_read_unlock(); - switch (filp->f_owner.pid_type) { + f_owner = smp_load_acquire(&filp->f_owner); + if (f_owner != &nop_fown_struct) { + read_lock_irq(&f_owner->lock); + rcu_read_lock(); + if (pid_task(f_owner->pid, f_owner->pid_type)) + owner.pid = pid_vnr(f_owner->pid); + rcu_read_unlock(); + pid_type = f_owner->pid_type; + } + + switch (pid_type) { case PIDTYPE_PID: owner.type = F_OWNER_TID; break; @@ -234,7 +291,8 @@ static int f_getown_ex(struct file *filp, unsigned long arg) ret = -EINVAL; break; } - read_unlock_irq(&filp->f_owner.lock); + if (f_owner != &nop_fown_struct) + read_unlock_irq(&f_owner->lock); if (!ret) { ret = copy_to_user(owner_p, &owner, sizeof(owner)); @@ -248,14 +306,18 @@ static int f_getown_ex(struct file *filp, unsigned long arg) static int f_getowner_uids(struct file *filp, unsigned long arg) { struct user_namespace *user_ns = current_user_ns(); + struct fown_struct *f_owner; uid_t __user *dst = (void __user *)arg; - uid_t src[2]; + uid_t src[2] = {0, 0}; int err; - read_lock_irq(&filp->f_owner.lock); - src[0] = from_kuid(user_ns, filp->f_owner.uid); - src[1] = from_kuid(user_ns, filp->f_owner.euid); - read_unlock_irq(&filp->f_owner.lock); + f_owner = smp_load_acquire(&filp->f_owner); + if (f_owner != &nop_fown_struct) { + read_lock_irq(&f_owner->lock); + src[0] = from_kuid(user_ns, f_owner->uid); + src[1] = from_kuid(user_ns, f_owner->euid); + read_unlock_irq(&f_owner->lock); + } err = put_user(src[0], &dst[0]); err |= put_user(src[1], &dst[1]); @@ -343,6 +405,24 @@ static long f_dupfd_query(int fd, struct file *filp) return f.file == filp; } +static int f_owner_sig(struct file *filp, int signum, bool setsig) +{ + struct fown_struct *f_owner; + + f_owner = file_f_owner_allocate(filp); + if (IS_ERR(f_owner)) + return PTR_ERR(f_owner); + f_owner = filp->f_owner; + if (setsig) { + if (!valid_signal(signum)) + return -EINVAL; + f_owner->signum = signum; + return 0; + } + + return f_owner->signum; +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -421,15 +501,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, err = f_getowner_uids(filp, arg); break; case F_GETSIG: - err = filp->f_owner.signum; + err = f_owner_sig(filp, 0, false); break; case F_SETSIG: - /* arg == 0 restores default behaviour. */ - if (!valid_signal(argi)) { - break; - } - err = 0; - filp->f_owner.signum = argi; + err = f_owner_sig(filp, 0, true); break; case F_GETLEASE: err = fcntl_getlease(filp); @@ -844,14 +919,19 @@ static void send_sigurg_to_task(struct task_struct *p, do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type); } -int send_sigurg(struct fown_struct *fown) +int send_sigurg(struct file *file) { + struct fown_struct *fown; struct task_struct *p; enum pid_type type; struct pid *pid; unsigned long flags; int ret = 0; + fown = smp_load_acquire(&file->f_owner); + if (fown == &nop_fown_struct) + return 0; + read_lock_irqsave(&fown->lock, flags); type = fown->pid_type; @@ -1027,13 +1107,16 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) } read_lock_irqsave(&fa->fa_lock, flags); if (fa->fa_file) { - fown = &fa->fa_file->f_owner; + fown = smp_load_acquire(&fa->fa_file->f_owner); + if (fown == &nop_fown_struct) + goto next; /* Don't send SIGURG to processes which have not set a queued signum: SIGURG has its own default signalling mechanism. */ if (!(sig == SIGURG && fown->signum == 0)) send_sigio(fown, fa->fa_fd, band); } +next: read_unlock_irqrestore(&fa->fa_lock, flags); fa = rcu_dereference(fa->fa_next); } diff --git a/fs/file_table.c b/fs/file_table.c index ca7843dde56d..b8dd40454784 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -155,11 +155,11 @@ static int init_file(struct file *f, int flags, const struct cred *cred) return error; } - rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); f->f_flags = flags; f->f_mode = OPEN_FMODE(flags); + f->f_owner = &nop_fown_struct; /* f->f_version: 0 */ /* @@ -425,7 +425,10 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); } fops_put(file->f_op); - put_pid(file->f_owner.pid); + if (file->f_owner != &nop_fown_struct) { + put_pid(file->f_owner->pid); + kfree(file->f_owner); + } put_file_access(file); dput(dentry); if (unlikely(mode & FMODE_NEED_UNMOUNT)) diff --git a/fs/locks.c b/fs/locks.c index 9afb16e0683f..32c5260f7a17 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -603,11 +603,16 @@ static int lease_init(struct file *filp, int type, struct file_lease *fl) static struct file_lease *lease_alloc(struct file *filp, int type) { struct file_lease *fl = locks_alloc_lease(); + struct fown_struct *f_owner; int error = -ENOMEM; if (fl == NULL) return ERR_PTR(error); + f_owner = file_f_owner_allocate(filp); + if (IS_ERR(f_owner)) + return ERR_CAST(f_owner); + error = lease_init(filp, type, fl); if (error) { locks_free_lease(fl); @@ -1451,7 +1456,7 @@ int lease_modify(struct file_lease *fl, int arg, struct list_head *dispose) struct file *filp = fl->c.flc_file; f_delown(filp); - filp->f_owner.signum = 0; + filp->f_owner->signum = 0; fasync_helper(0, fl->c.flc_file, 0, &fl->fl_fasync); if (fl->fl_fasync != NULL) { printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync); diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c index f3669403fabf..480b932cb1d0 100644 --- a/fs/notify/dnotify/dnotify.c +++ b/fs/notify/dnotify/dnotify.c @@ -110,7 +110,7 @@ static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask, prev = &dn->dn_next; continue; } - fown = &dn->dn_filp->f_owner; + fown = smp_load_acquire(&dn->dn_filp->f_owner); send_sigio(fown, dn->dn_fd, POLL_MSG); if (dn->dn_mask & FS_DN_MULTISHOT) prev = &dn->dn_next; @@ -316,6 +316,9 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg) goto out_err; } + if (IS_ERR(file_f_owner_allocate(filp))) + return -ENOMEM; + /* set up the new_fsn_mark and new_dn_mark */ new_fsn_mark = &new_dn_mark->fsn_mark; fsnotify_init_mark(new_fsn_mark, dnotify_group); diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b..39eec530591d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -947,6 +947,7 @@ static inline unsigned imajor(const struct inode *inode) } struct fown_struct { + struct file *file; /* backpointer for security modules */ rwlock_t lock; /* protects pid, uid, euid fields */ struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ @@ -954,6 +955,10 @@ struct fown_struct { int signum; /* posix.1b rt signal to be delivered on IO */ }; +extern struct fown_struct nop_fown_struct; + +struct fown_struct *file_f_owner_allocate(struct file *file); + /** * struct file_ra_state - Track a file's readahead state. * @start: Where the most recent readahead started. @@ -1011,7 +1016,7 @@ struct file { struct mutex f_pos_lock; loff_t f_pos; unsigned int f_flags; - struct fown_struct f_owner; + struct fown_struct *f_owner; const struct cred *f_cred; struct file_ra_state f_ra; struct path f_path; @@ -1124,7 +1129,7 @@ extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force extern int f_setown(struct file *filp, int who, int force); extern void f_delown(struct file *filp); extern pid_t f_getown(struct file *filp); -extern int send_sigurg(struct fown_struct *fown); +extern int send_sigurg(struct file *file); /* * sb->s_flags. Note that these mirror the equivalent MS_* flags where diff --git a/net/core/sock.c b/net/core/sock.c index 9abc4fe25953..bbe4c58470c3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3429,7 +3429,7 @@ static void sock_def_destruct(struct sock *sk) void sk_send_sigurg(struct sock *sk) { if (sk->sk_socket && sk->sk_socket->file) - if (send_sigurg(&sk->sk_socket->file->f_owner)) + if (send_sigurg(sk->sk_socket->file)) sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } EXPORT_SYMBOL(sk_send_sigurg); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 55c78c318ccd..ed252cfba4e9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3940,7 +3940,7 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, struct file_security_struct *fsec; /* struct fown_struct is never outside the context of a struct file */ - file = container_of(fown, struct file, f_owner); + file = fown->file; fsec = selinux_file(file); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 4164699cd4f6..cb33920ab67c 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1950,7 +1950,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, /* * struct fown_struct is never outside the context of a struct file */ - file = container_of(fown, struct file, f_owner); + file = fown->file; /* we don't log here as rc can be overriden */ blob = smack_file(file);

[DRAFT,RFC] : file: reclaim 24 bytes from f_owner

Commit Message

Comments

Patch