@@ -3,4 +3,4 @@
# Makefile for persistent kernel filesystem
#
-obj-y += guestmemfs.o
+obj-y += guestmemfs.o inode.o dir.o
new file mode 100644
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "guestmemfs.h"
+
+static int guestmemfs_dir_iterate(struct file *dir, struct dir_context *ctx)
+{
+ struct guestmemfs_inode *guestmemfs_inode;
+ struct super_block *sb = dir->f_inode->i_sb;
+
+ /* Indication from previous invoke that there's no more to iterate. */
+ if (ctx->pos == -1)
+ return 0;
+
+ if (!dir_emit_dots(dir, ctx))
+ return 0;
+
+ /*
+ * Just emitted this dir; go to dir contents. Use pos to smuggle
+ * the next inode number to emit across iterations.
+ * -1 indicates no valid inode. Can't use 0 because first loop has pos=0
+ */
+ if (ctx->pos == 2) {
+ ctx->pos = guestmemfs_get_persisted_inode(sb, dir->f_inode->i_ino)->child_ino;
+ /* Empty dir case. */
+ if (ctx->pos == 0)
+ ctx->pos = -1;
+ }
+
+ while (ctx->pos > 1) {
+ guestmemfs_inode = guestmemfs_get_persisted_inode(sb, ctx->pos);
+ dir_emit(ctx, guestmemfs_inode->filename, GUESTMEMFS_FILENAME_LEN,
+ ctx->pos, DT_UNKNOWN);
+ ctx->pos = guestmemfs_inode->sibling_ino;
+ if (!ctx->pos)
+ ctx->pos = -1;
+ }
+ return 0;
+}
+
+const struct file_operations guestmemfs_dir_fops = {
+ .owner = THIS_MODULE,
+ .iterate_shared = guestmemfs_dir_iterate,
+};
@@ -18,6 +18,9 @@ static int statfs(struct dentry *root, struct kstatfs *buf)
buf->f_bsize = PMD_SIZE;
buf->f_blocks = guestmemfs_size / PMD_SIZE;
buf->f_bfree = buf->f_bavail = buf->f_blocks;
+ buf->f_files = PMD_SIZE / sizeof(struct guestmemfs_inode);
+ buf->f_ffree = buf->f_files -
+ GUESTMEMFS_PSB(root->d_sb)->allocated_inodes;
return 0;
}
@@ -31,24 +34,34 @@ static int guestmemfs_fill_super(struct super_block *sb, struct fs_context *fc)
struct dentry *dentry;
psb = kzalloc(sizeof(*psb), GFP_KERNEL);
+ psb->inodes = kzalloc(2 << 20, GFP_KERNEL);
+ if (!psb->inodes)
+ return -ENOMEM;
+
/*
* Keep a reference to the persistent super block in the
* ephemeral super block.
*/
sb->s_fs_info = psb;
+ spin_lock_init(&psb->allocation_lock);
+ guestmemfs_initialise_inode_store(sb);
+ guestmemfs_get_persisted_inode(sb, 1)->flags = GUESTMEMFS_INODE_FLAG_DIR;
+ strscpy(guestmemfs_get_persisted_inode(sb, 1)->filename, ".",
+ GUESTMEMFS_FILENAME_LEN);
+ psb->next_free_ino = 2;
+
sb->s_op = &guestmemfs_super_ops;
- inode = new_inode(sb);
+ inode = guestmemfs_inode_get(sb, 1);
if (!inode)
return -ENOMEM;
- inode->i_ino = 1;
inode->i_mode = S_IFDIR;
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
+ inode->i_fop = &guestmemfs_dir_fops;
simple_inode_init_ts(inode);
/* directory inodes start off with i_nlink == 2 (for "." entry) */
inc_nlink(inode);
+ inode_init_owner(&nop_mnt_idmap, inode, NULL, inode->i_mode);
dentry = d_make_root(inode);
if (!dentry)
@@ -3,7 +3,41 @@
#define pr_fmt(fmt) "guestmemfs: " KBUILD_MODNAME ": " fmt
#include <linux/guestmemfs.h>
+#include <linux/fs.h>
+
+#define GUESTMEMFS_FILENAME_LEN 255
+#define GUESTMEMFS_PSB(sb) ((struct guestmemfs_sb *)sb->s_fs_info)
struct guestmemfs_sb {
- /* Will be populated soon... */
+ /* Inode number */
+ unsigned long next_free_ino;
+ unsigned long allocated_inodes;
+ struct guestmemfs_inode *inodes;
+ spinlock_t allocation_lock;
+};
+
+// If neither of these are set the inode is not in use.
+#define GUESTMEMFS_INODE_FLAG_FILE (1 << 0)
+#define GUESTMEMFS_INODE_FLAG_DIR (1 << 1)
+struct guestmemfs_inode {
+ int flags;
+ /*
+ * Points to next inode in the same directory, or
+ * 0 if last file in directory.
+ */
+ unsigned long sibling_ino;
+ /*
+ * If this inode is a directory, this points to the
+ * first inode *in* that directory.
+ */
+ unsigned long child_ino;
+ char filename[GUESTMEMFS_FILENAME_LEN];
+ void *mappings;
+ int num_mappings;
};
+
+void guestmemfs_initialise_inode_store(struct super_block *sb);
+struct inode *guestmemfs_inode_get(struct super_block *sb, unsigned long ino);
+struct guestmemfs_inode *guestmemfs_get_persisted_inode(struct super_block *sb, int ino);
+
+extern const struct file_operations guestmemfs_dir_fops;
new file mode 100644
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "guestmemfs.h"
+#include <linux/fs.h>
+
+const struct inode_operations guestmemfs_dir_inode_operations;
+
+struct guestmemfs_inode *guestmemfs_get_persisted_inode(struct super_block *sb, int ino)
+{
+ /*
+ * Inode index starts at 1, so -1 to get memory index.
+ */
+ return GUESTMEMFS_PSB(sb)->inodes + ino - 1;
+}
+
+struct inode *guestmemfs_inode_get(struct super_block *sb, unsigned long ino)
+{
+ struct inode *inode = iget_locked(sb, ino);
+
+ /* If this inode is cached it is already populated; just return */
+ if (!(inode->i_state & I_NEW))
+ return inode;
+ inode->i_op = &guestmemfs_dir_inode_operations;
+ inode->i_sb = sb;
+ inode->i_mode = S_IFREG;
+ unlock_new_inode(inode);
+ return inode;
+}
+
+static unsigned long guestmemfs_allocate_inode(struct super_block *sb)
+{
+
+ unsigned long next_free_ino = -ENOMEM;
+ struct guestmemfs_sb *psb = GUESTMEMFS_PSB(sb);
+
+ spin_lock(&psb->allocation_lock);
+ next_free_ino = psb->next_free_ino;
+ psb->allocated_inodes += 1;
+ if (!next_free_ino)
+ goto out;
+ psb->next_free_ino =
+ guestmemfs_get_persisted_inode(sb, next_free_ino)->sibling_ino;
+out:
+ spin_unlock(&psb->allocation_lock);
+ return next_free_ino;
+}
+
+/*
+ * Zeroes the inode and makes it the head of the free list.
+ */
+static void guestmemfs_free_inode(struct super_block *sb, unsigned long ino)
+{
+ struct guestmemfs_sb *psb = GUESTMEMFS_PSB(sb);
+ struct guestmemfs_inode *inode = guestmemfs_get_persisted_inode(sb, ino);
+
+ spin_lock(&psb->allocation_lock);
+ memset(inode, 0, sizeof(struct guestmemfs_inode));
+ inode->sibling_ino = psb->next_free_ino;
+ psb->next_free_ino = ino;
+ psb->allocated_inodes -= 1;
+ spin_unlock(&psb->allocation_lock);
+}
+
+/*
+ * Sets all inodes as free and points each free inode to the next one.
+ */
+void guestmemfs_initialise_inode_store(struct super_block *sb)
+{
+ /* Inode store is a PMD sized (ie: 2 MiB) page */
+ memset(guestmemfs_get_persisted_inode(sb, 1), 0, PMD_SIZE);
+ /* Point each inode for the next one; linked-list initialisation. */
+ for (unsigned long ino = 2; ino * sizeof(struct guestmemfs_inode) < PMD_SIZE; ino++)
+ guestmemfs_get_persisted_inode(sb, ino - 1)->sibling_ino = ino;
+}
+
+static int guestmemfs_create(struct mnt_idmap *id, struct inode *dir,
+ struct dentry *dentry, umode_t mode, bool excl)
+{
+ unsigned long free_inode;
+ struct guestmemfs_inode *guestmemfs_inode;
+ struct inode *vfs_inode;
+
+ free_inode = guestmemfs_allocate_inode(dir->i_sb);
+ if (free_inode <= 0)
+ return -ENOMEM;
+
+ guestmemfs_inode = guestmemfs_get_persisted_inode(dir->i_sb, free_inode);
+ guestmemfs_inode->sibling_ino =
+ guestmemfs_get_persisted_inode(dir->i_sb, dir->i_ino)->child_ino;
+ guestmemfs_get_persisted_inode(dir->i_sb, dir->i_ino)->child_ino = free_inode;
+ strscpy(guestmemfs_inode->filename, dentry->d_name.name, GUESTMEMFS_FILENAME_LEN);
+ guestmemfs_inode->flags = GUESTMEMFS_INODE_FLAG_FILE;
+ /* TODO: make dynamic */
+ guestmemfs_inode->mappings = kzalloc(PAGE_SIZE, GFP_KERNEL);
+
+ vfs_inode = guestmemfs_inode_get(dir->i_sb, free_inode);
+ d_instantiate(dentry, vfs_inode);
+ return 0;
+}
+
+static struct dentry *guestmemfs_lookup(struct inode *dir,
+ struct dentry *dentry,
+ unsigned int flags)
+{
+ struct guestmemfs_inode *guestmemfs_inode;
+ unsigned long ino;
+
+ guestmemfs_inode = guestmemfs_get_persisted_inode(dir->i_sb, dir->i_ino);
+ ino = guestmemfs_inode->child_ino;
+ while (ino) {
+ guestmemfs_inode = guestmemfs_get_persisted_inode(dir->i_sb, ino);
+ if (!strncmp(guestmemfs_inode->filename,
+ dentry->d_name.name,
+ GUESTMEMFS_FILENAME_LEN)) {
+ d_add(dentry, guestmemfs_inode_get(dir->i_sb, ino));
+ break;
+ }
+ ino = guestmemfs_inode->sibling_ino;
+ }
+ return NULL;
+}
+
+static int guestmemfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ unsigned long ino;
+ struct guestmemfs_inode *inode;
+
+ ino = guestmemfs_get_persisted_inode(dir->i_sb, dir->i_ino)->child_ino;
+
+ /* Special case for first file in dir */
+ if (ino == dentry->d_inode->i_ino) {
+ guestmemfs_get_persisted_inode(dir->i_sb, dir->i_ino)->child_ino =
+ guestmemfs_get_persisted_inode(dir->i_sb,
+ dentry->d_inode->i_ino)->sibling_ino;
+ guestmemfs_free_inode(dir->i_sb, ino);
+ return 0;
+ }
+
+ /*
+ * Although we know exactly the inode to free, because we maintain only
+ * a singly linked list we need to scan for it to find the previous
+ * element so it's "next" pointer can be updated.
+ */
+ while (ino) {
+ inode = guestmemfs_get_persisted_inode(dir->i_sb, ino);
+ /* We've found the one pointing to the one we want to delete */
+ if (inode->sibling_ino == dentry->d_inode->i_ino) {
+ inode->sibling_ino =
+ guestmemfs_get_persisted_inode(dir->i_sb,
+ dentry->d_inode->i_ino)->sibling_ino;
+ guestmemfs_free_inode(dir->i_sb, dentry->d_inode->i_ino);
+ break;
+ }
+ ino = guestmemfs_get_persisted_inode(dir->i_sb, ino)->sibling_ino;
+ }
+
+ return 0;
+}
+
+const struct inode_operations guestmemfs_dir_inode_operations = {
+ .create = guestmemfs_create,
+ .lookup = guestmemfs_lookup,
+ .unlink = guestmemfs_unlink,
+};
Here inodes are added to the filesystem: inodes for both regular files and directories. This involes supporting the callbacks to create inodes in a directory, as well as being able to list the contents of a directory and lookup and inode by name. The inode store is implemented as a 2 MiB page which is an array of struct guestmemfs_inode. The reason to have a large allocation and put them all in a big flat array is to make persistence easy: when it's time to introduce persistence to the filesystem it will need to persist this one big chunk of inodes across kexec using KHO. Free inodes in the page form a slab type structure, the first free inode pointing to the next free inode, etc. The super block points to the first free, so allocating involves popping the head, and freeing an inode involves pushing a new head. Directories point to the first inode in the directory via a child_inode reference. Subsequent inodes within the same directory are pointed to via a sibling_inode member. Essentially forming a linked list of inodes within the directory. Looking up an inode in a directory involves traversing the sibling_inode linked list until one with a matching name is found. Filesystem stats are updated to account for total and allocated inodes. Signed-off-by: James Gowans <jgowans@amazon.com> --- fs/guestmemfs/Makefile | 2 +- fs/guestmemfs/dir.c | 43 ++++++++++ fs/guestmemfs/guestmemfs.c | 21 ++++- fs/guestmemfs/guestmemfs.h | 36 +++++++- fs/guestmemfs/inode.c | 164 +++++++++++++++++++++++++++++++++++++ 5 files changed, 260 insertions(+), 6 deletions(-) create mode 100644 fs/guestmemfs/dir.c create mode 100644 fs/guestmemfs/inode.c