@@ -60,12 +60,11 @@ typedef struct {
char *name;
struct dentry *dentry;
struct file *interp_file;
+ refcount_t users; /* sync removal with load_misc_binary() */
} Node;
static DEFINE_RWLOCK(entries_lock);
static struct file_system_type bm_fs_type;
-static struct vfsmount *bm_mnt;
-static int entry_count;
/*
* Max length of the register string. Determined by:
@@ -82,19 +81,22 @@ static int entry_count;
*/
#define MAX_REGISTER_LENGTH 1920
-/*
- * Check if we support the binfmt
- * if we do, return the node, else NULL
- * locking is done in load_misc_binary
+/**
+ * search_binfmt_handler - search for a binary handler for @bprm
+ * @bprm: binary for which we are looking for a handler
+ *
+ * Search for a binary type handler for @bprm in the list of registered binary
+ * type handlers.
+ *
+ * Return: binary type list entry on success, NULL on failure
*/
-static Node *check_file(struct linux_binprm *bprm)
+static Node *search_binfmt_handler(struct linux_binprm *bprm)
{
char *p = strrchr(bprm->interp, '.');
- struct list_head *l;
+ Node *e;
/* Walk all the registered handlers. */
- list_for_each(l, &entries) {
- Node *e = list_entry(l, Node, list);
+ list_for_each_entry(e, &entries, list) {
char *s;
int j;
@@ -123,9 +125,48 @@ static Node *check_file(struct linux_binprm *bprm)
if (j == e->size)
return e;
}
+
return NULL;
}
+/**
+ * get_binfmt_handler - try to find a binary type handler
+ * @bprm: binary for which we are looking for a handler
+ *
+ * Try to find a binfmt handler for the binary type. If one is found take a
+ * reference to protect against removal via bm_{entry,status}_write().
+ *
+ * Return: binary type list entry on success, NULL on failure
+ */
+static Node *get_binfmt_handler(struct linux_binprm *bprm)
+{
+ Node *e;
+
+ read_lock(&entries_lock);
+ e = search_binfmt_handler(bprm);
+ if (e)
+ refcount_inc(&e->users);
+ read_unlock(&entries_lock);
+ return e;
+}
+
+/**
+ * put_binfmt_handler - put binary handler node
+ * @e: node to put
+ *
+ * Free node syncing with load_misc_binary() and defer final free to
+ * load_misc_binary() in case it is using the binary type handler we were
+ * requested to remove.
+ */
+static void put_binfmt_handler(Node *e)
+{
+ if (refcount_dec_and_test(&e->users)) {
+ if (e->flags & MISC_FMT_OPEN_FILE)
+ filp_close(e->interp_file, NULL);
+ kfree(e);
+ }
+}
+
/*
* the loader itself
*/
@@ -139,12 +180,7 @@ static int load_misc_binary(struct linux_binprm *bprm)
if (!enabled)
return retval;
- /* to keep locking time low, we copy the interpreter string */
- read_lock(&entries_lock);
- fmt = check_file(bprm);
- if (fmt)
- dget(fmt->dentry);
- read_unlock(&entries_lock);
+ fmt = get_binfmt_handler(bprm);
if (!fmt)
return retval;
@@ -198,7 +234,16 @@ static int load_misc_binary(struct linux_binprm *bprm)
retval = 0;
ret:
- dput(fmt->dentry);
+
+ /*
+ * If we actually put the node here all concurrent calls to
+ * load_misc_binary() will have finished. We also know
+ * that for the refcount to be zero ->evict_inode() must have removed
+ * the node to be deleted from the list. All that is left for us is to
+ * close and free.
+ */
+ put_binfmt_handler(fmt);
+
return retval;
}
@@ -553,30 +598,89 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
return inode;
}
+/**
+ * bm_evict_inode - cleanup data associated with @inode
+ * @inode: inode to which the data is attached
+ *
+ * Cleanup the binary type handler data associated with @inode if a binary type
+ * entry is removed or the filesystem is unmounted and the super block is
+ * shutdown.
+ *
+ * If the ->evict call was not caused by a super block shutdown but by a write
+ * to remove the entry or all entries via bm_{entry,status}_write() the entry
+ * will have already been removed from the list. We keep the list_empty() check
+ * to make that explicit.
+*/
static void bm_evict_inode(struct inode *inode)
{
Node *e = inode->i_private;
- if (e && e->flags & MISC_FMT_OPEN_FILE)
- filp_close(e->interp_file, NULL);
-
clear_inode(inode);
- kfree(e);
+
+ if (e) {
+ write_lock(&entries_lock);
+ if (!list_empty(&e->list))
+ list_del_init(&e->list);
+ write_unlock(&entries_lock);
+ put_binfmt_handler(e);
+ }
}
-static void kill_node(Node *e)
+/**
+ * unlink_binfmt_dentry - remove the dentry for the binary type handler
+ * @dentry: dentry associated with the binary type handler
+ *
+ * Do the actual filesystem work to remove a dentry for a registered binary
+ * type handler. Since binfmt_misc only allows simple files to be created
+ * directly under the root dentry of the filesystem we ensure that we are
+ * indeed passed a dentry directly beneath the root dentry, that the inode
+ * associated with the root dentry is locked, and that it is a regular file we
+ * are asked to remove.
+ */
+static void unlink_binfmt_dentry(struct dentry *dentry)
{
- struct dentry *dentry;
+ struct dentry *parent = dentry->d_parent;
+ struct inode *inode, *parent_inode;
+
+ /* All entries are immediate descendants of the root dentry. */
+ if (WARN_ON_ONCE(dentry->d_sb->s_root != parent))
+ return;
+ /* We only expect to be called on regular files. */
+ inode = d_inode(dentry);
+ if (WARN_ON_ONCE(!S_ISREG(inode->i_mode)))
+ return;
+
+ /* The parent inode must be locked. */
+ parent_inode = d_inode(parent);
+ if (WARN_ON_ONCE(!inode_is_locked(parent_inode)))
+ return;
+
+ if (simple_positive(dentry)) {
+ dget(dentry);
+ simple_unlink(parent_inode, dentry);
+ d_delete(dentry);
+ dput(dentry);
+ }
+}
+
+/**
+ * remove_binfmt_handler - remove a binary type handler
+ * @e: binary type handler to remove
+ *
+ * Remove a binary type handler from the list of binary type handlers and
+ * remove its associated dentry. This is called from
+ * binfmt_{entry,status}_write(). In the future, we might want to think about
+ * adding a proper ->unlink() method to binfmt_misc instead of forcing caller's
+ * to use writes to files in order to delete binary type handlers. But it has
+ * worked for so long that it's not a pressing issue.
+ */
+static void remove_binfmt_handler(Node *e)
+{
write_lock(&entries_lock);
list_del_init(&e->list);
write_unlock(&entries_lock);
-
- dentry = e->dentry;
- drop_nlink(d_inode(dentry));
- d_drop(dentry);
- dput(dentry);
- simple_release_fs(&bm_mnt, &entry_count);
+ unlink_binfmt_dentry(e->dentry);
}
/* /<entry> */
@@ -603,8 +707,8 @@ bm_entry_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- struct dentry *root;
- Node *e = file_inode(file)->i_private;
+ struct inode *inode = file_inode(file);
+ Node *e = inode->i_private;
int res = parse_command(buffer, count);
switch (res) {
@@ -618,13 +722,22 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
break;
case 3:
/* Delete this handler. */
- root = file_inode(file)->i_sb->s_root;
- inode_lock(d_inode(root));
+ inode = d_inode(inode->i_sb->s_root);
+ inode_lock(inode);
+ /*
+ * In order to add new element or remove elements from the list
+ * via bm_{entry,register,status}_write() inode_lock() on the
+ * root inode must be held.
+ * The lock is exclusive ensuring that the list can't be
+ * modified. Only load_misc_binary() can access but does so
+ * read-only. So we only need to take the write lock when we
+ * actually remove the entry from the list.
+ */
if (!list_empty(&e->list))
- kill_node(e);
+ remove_binfmt_handler(e);
- inode_unlock(d_inode(root));
+ inode_unlock(inode);
break;
default:
return res;
@@ -683,13 +796,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
if (!inode)
goto out2;
- err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count);
- if (err) {
- iput(inode);
- inode = NULL;
- goto out2;
- }
-
+ refcount_set(&e->users, 1);
e->dentry = dget(dentry);
inode->i_private = e;
inode->i_fop = &bm_entry_operations;
@@ -733,7 +840,8 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
int res = parse_command(buffer, count);
- struct dentry *root;
+ Node *e, *next;
+ struct inode *inode;
switch (res) {
case 1:
@@ -746,13 +854,22 @@ static ssize_t bm_status_write(struct file *file, const char __user *buffer,
break;
case 3:
/* Delete all handlers. */
- root = file_inode(file)->i_sb->s_root;
- inode_lock(d_inode(root));
+ inode = d_inode(file_inode(file)->i_sb->s_root);
+ inode_lock(inode);
- while (!list_empty(&entries))
- kill_node(list_first_entry(&entries, Node, list));
+ /*
+ * In order to add new element or remove elements from the list
+ * via bm_{entry,register,status}_write() inode_lock() on the
+ * root inode must be held.
+ * The lock is exclusive ensuring that the list can't be
+ * modified. Only load_misc_binary() can access but does so
+ * read-only. So we only need to take the write lock when we
+ * actually remove the entry from the list.
+ */
+ list_for_each_entry_safe(e, next, &entries, list)
+ remove_binfmt_handler(e);
- inode_unlock(d_inode(root));
+ inode_unlock(inode);
break;
default:
return res;