@@ -890,6 +890,32 @@ static bool flock_locks_conflict(struct file_lock *caller_fl,
return locks_conflict(caller_fl, sys_fl);
}
+int flock_mandatory_locked(struct file *filp)
+{
+ struct file_lock_context *ctx;
+ struct file_lock *fl;
+ int flags = 0;
+
+ ctx = smp_load_acquire(&file_inode(filp)->i_flctx);
+ if (!ctx)
+ goto out;
+
+ spin_lock(&ctx->flc_lock);
+ list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
+ if (!(fl->fl_type & LOCK_MAND))
+ continue;
+
+ if (fl->fl_file != filp)
+ flags = fl->fl_type & (LOCK_MAND | LOCK_RW);
+
+ break;
+ }
+ spin_unlock(&ctx->flc_lock);
+out:
+ return flags;
+}
+EXPORT_SYMBOL(flock_mandatory_locked);
+
void
posix_test_lock(struct file *filp, struct file_lock *fl)
{
@@ -561,10 +561,30 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
return err;
}
+static bool proc_mandatory_locked(struct file *filp, int write)
+{
+ int flags = flock_mandatory_locked(filp);
+
+ if (flags & LOCK_MAND) {
+ if (write) {
+ if (flags & LOCK_WRITE)
+ return false;
+ } else {
+ if (flags & LOCK_READ)
+ return false;
+ }
+
+ return true;
+ }
+
+ return false;
+}
+
static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
int write)
{
- struct inode *inode = file_inode(iocb->ki_filp);
+ struct file *filp = iocb->ki_filp;
+ struct inode *inode = file_inode(filp);
struct ctl_table_header *head = grab_header(inode);
struct ctl_table *table = PROC_I(inode)->sysctl_entry;
size_t count = iov_iter_count(iter);
@@ -582,6 +602,9 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
if (sysctl_perm(head, table, write ? MAY_WRITE : MAY_READ))
goto out;
+ if (proc_mandatory_locked(filp, write))
+ goto out;
+
/* if that can happen at all, it should be -EINVAL, not -EISDIR */
error = -EINVAL;
if (!table->proc_handler)
@@ -1164,6 +1164,7 @@ extern void locks_copy_conflock(struct file_lock *, struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t);
extern void locks_remove_file(struct file *);
extern void locks_release_private(struct file_lock *);
+int flock_mandatory_locked(struct file *filp);
extern void posix_test_lock(struct file *, struct file_lock *);
extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
extern int locks_delete_block(struct file_lock *);
The preceding patch added LOCK_MAND support for flock(), and this patch adds read/write protection on sysctl knobs. The read/write operations will return -EPERM if the file is mandatory-locked. The following patches introduce sysctl knobs which are read in clone() or unshare() to control a per-netns hash table size for TCP/UDP. In such a case, we can use write protection to guarantee the hash table's size for the child netns. The difference between BPF_PROG_TYPE_CGROUP_SYSCTL is that the BPF prog requires processes to be in the same cgroup to allow/deny read/write to sysctl knobs. Note that the read protection might be useless, especially for some sysctl knobs whose value we can know in another way. For example, we can know fs.nr_open by opening too many files and checking the error, and net.ipv4.tcp_syn_retries by dropping SYN and dumping packets. Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com> --- fs/locks.c | 26 ++++++++++++++++++++++++++ fs/proc/proc_sysctl.c | 25 ++++++++++++++++++++++++- include/linux/fs.h | 1 + 3 files changed, 51 insertions(+), 1 deletion(-)