@@ -27,6 +27,12 @@ struct mountpoint {
int m_count;
};
+struct mountroot {
+ struct hlist_node r_hash;
+ struct dentry *r_dentry;
+ long r_count;
+};
+
struct mount {
struct hlist_node mnt_hash;
struct mount *mnt_parent;
@@ -31,6 +31,8 @@ static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
static unsigned int mp_hash_mask __read_mostly;
static unsigned int mp_hash_shift __read_mostly;
+static unsigned int mr_hash_mask __read_mostly;
+static unsigned int mr_hash_shift __read_mostly;
static __initdata unsigned long mhash_entries;
static int __init set_mhash_entries(char *str)
@@ -52,6 +54,16 @@ static int __init set_mphash_entries(char *str)
}
__setup("mphash_entries=", set_mphash_entries);
+static __initdata unsigned long mrhash_entries;
+static int __init set_mrhash_entries(char *str)
+{
+ if (!str)
+ return 0;
+ mrhash_entries = simple_strtoul(str, &str, 0);
+ return 1;
+}
+__setup("mrhash_entries=", set_mrhash_entries);
+
static u64 event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
@@ -61,6 +73,7 @@ static int mnt_group_start = 1;
static struct hlist_head *mount_hashtable __read_mostly;
static struct hlist_head *mountpoint_hashtable __read_mostly;
+static struct hlist_head *mountroot_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
static DECLARE_RWSEM(namespace_sem);
@@ -93,6 +106,13 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry)
return &mountpoint_hashtable[tmp & mp_hash_mask];
}
+static inline struct hlist_head *mr_hash(struct dentry *dentry)
+{
+ unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
+ tmp = tmp + (tmp >> mr_hash_shift);
+ return &mountroot_hashtable[tmp & mr_hash_mask];
+}
+
/*
* allocation is serialized by namespace_sem, but we need the spinlock to
* serialize with freeing.
@@ -768,6 +788,76 @@ static void put_mountpoint(struct mountpoint *mp)
}
}
+static struct mountroot *lookup_mountroot(struct dentry *dentry)
+{
+ struct hlist_head *chain = mr_hash(dentry);
+ struct mountroot *mr;
+
+ hlist_for_each_entry(mr, chain, r_hash) {
+ if (mr->r_dentry == dentry)
+ return mr;
+ }
+ return NULL;
+}
+
+static int mnt_set_root(struct mount *mnt, struct dentry *root)
+{
+ struct mountroot *mr = NULL;
+
+ lock_mount_hash();
+ if (d_mountroot(root))
+ mr = lookup_mountroot(root);
+ if (!mr) {
+ struct mountroot *new;
+ unlock_mount_hash();
+
+ new = kmalloc(sizeof(struct mountroot), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ lock_mount_hash();
+ mr = lookup_mountroot(root);
+ if (mr) {
+ kfree(new);
+ } else {
+ struct hlist_head *chain = mr_hash(root);
+
+ mr = new;
+ mr->r_dentry = root;
+ mr->r_count = 0;
+ hlist_add_head(&mr->r_hash, chain);
+
+ spin_lock(&root->d_lock);
+ root->d_flags |= DCACHE_MOUNTROOT;
+ spin_unlock(&root->d_lock);
+ }
+ }
+ mnt->mnt.mnt_root = root;
+ mr->r_count++;
+ unlock_mount_hash();
+
+ return 0;
+}
+
+static void mnt_put_root(struct mount *mnt)
+{
+ struct dentry *root = mnt->mnt.mnt_root;
+ struct mountroot *mr;
+
+ lock_mount_hash();
+ mr = lookup_mountroot(root);
+ BUG_ON(!mr);
+ if (!--mr->r_count) {
+ hlist_del(&mr->r_hash);
+ spin_lock(&root->d_lock);
+ root->d_flags &= ~DCACHE_MOUNTROOT;
+ spin_unlock(&root->d_lock);
+ kfree(mr);
+ }
+ unlock_mount_hash();
+ dput(root);
+}
+
static inline int check_mnt(struct mount *mnt)
{
return mnt->mnt_ns == current->nsproxy->mnt_ns;
@@ -923,6 +1013,7 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
{
struct mount *mnt;
struct dentry *root;
+ int err;
if (!type)
return ERR_PTR(-ENODEV);
@@ -941,8 +1032,16 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
return ERR_CAST(root);
}
- mnt->mnt.mnt_root = root;
mnt->mnt.mnt_sb = root->d_sb;
+ err = mnt_set_root(mnt, root);
+ if (err) {
+ dput(root);
+ deactivate_super(mnt->mnt.mnt_sb);
+ mnt_free_id(mnt);
+ free_vfsmnt(mnt);
+ return ERR_PTR(err);
+ }
+
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
lock_mount_hash();
@@ -974,6 +1073,10 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
goto out_free;
}
+ err = mnt_set_root(mnt, root);
+ if (err)
+ goto out_free;
+
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED);
/* Don't allow unprivileged users to change mount flags */
if (flag & CL_UNPRIVILEGED) {
@@ -999,7 +1102,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
- mnt->mnt.mnt_root = dget(root);
+ dget(root);
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
lock_mount_hash();
@@ -1052,7 +1155,7 @@ static void cleanup_mnt(struct mount *mnt)
if (unlikely(mnt->mnt_pins.first))
mnt_pin_kill(mnt);
fsnotify_vfsmount_delete(&mnt->mnt);
- dput(mnt->mnt.mnt_root);
+ mnt_put_root(mnt);
deactivate_super(mnt->mnt.mnt_sb);
mnt_free_id(mnt);
call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
@@ -3079,14 +3182,21 @@ void __init mnt_init(void)
mphash_entries, 19,
0,
&mp_hash_shift, &mp_hash_mask, 0, 0);
+ mountroot_hashtable = alloc_large_system_hash("Mountroot-cache",
+ sizeof(struct hlist_head),
+ mrhash_entries, 19,
+ 0,
+ &mr_hash_shift, &mr_hash_mask, 0, 0);
- if (!mount_hashtable || !mountpoint_hashtable)
+ if (!mount_hashtable || !mountpoint_hashtable || !mountroot_hashtable)
panic("Failed to allocate mount hash table\n");
for (u = 0; u <= m_hash_mask; u++)
INIT_HLIST_HEAD(&mount_hashtable[u]);
for (u = 0; u <= mp_hash_mask; u++)
INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
+ for (u = 0; u <= mr_hash_mask; u++)
+ INIT_HLIST_HEAD(&mountroot_hashtable[u]);
kernfs_init();
@@ -226,6 +226,8 @@ struct dentry_operations {
#define DCACHE_MAY_FREE 0x00800000
#define DCACHE_FALLTHRU 0x01000000 /* Fall through to lower layer */
+#define DCACHE_MOUNTROOT 0x02000000 /* Root of a vfsmount */
+
extern seqlock_t rename_lock;
/*
@@ -401,6 +403,11 @@ static inline bool d_mountpoint(const struct dentry *dentry)
return dentry->d_flags & DCACHE_MOUNTED;
}
+static inline bool d_mountroot(const struct dentry *dentry)
+{
+ return dentry->d_flags & DCACHE_MOUNTROOT;
+}
+
/*
* Directory cache entry type accessor functions.
*/
Cc: stable@vger.kernel.org Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com> --- fs/mount.h | 6 +++ fs/namespace.c | 118 +++++++++++++++++++++++++++++++++++++++++++++++-- include/linux/dcache.h | 7 +++ 3 files changed, 127 insertions(+), 4 deletions(-)