Message ID | 20240805093245.889357-6-jgowans@amazon.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Introduce guestmemfs: persistent in-memory filesystem | expand |
On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > Make the file data usable to userspace by adding mmap. That's all that > QEMU needs for guest RAM, so that's all be bother implementing for now. > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > are no struct pages for the memory in this VMA. Remap_pfn_range() is > used to actually populate the page tables. All PTEs are pre-faulted into > the pgtables at mmap time so that the pgtables are usable when this > virtual address range is given to VFIO's MAP_DMA. Thanks for sending this out! I'm going through the series with the intention to see how it might fit within the existing guest_memfd work for pKVM/CoCo/Gunyah. It might've been mentioned in the MM alignment session -- you might be interested to join the guest_memfd bi-weekly call to see how we are overlapping [1]. [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@redhat.com/T/ --- Was the decision to pre-fault everything because it was convenient to do or otherwise intentionally different from hugetlb? > > Signed-off-by: James Gowans <jgowans@amazon.com> > --- > fs/guestmemfs/file.c | 43 +++++++++++++++++++++++++++++++++++++- > fs/guestmemfs/guestmemfs.c | 2 +- > fs/guestmemfs/guestmemfs.h | 3 +++ > 3 files changed, 46 insertions(+), 2 deletions(-) > > diff --git a/fs/guestmemfs/file.c b/fs/guestmemfs/file.c > index 618c93b12196..b1a52abcde65 100644 > --- a/fs/guestmemfs/file.c > +++ b/fs/guestmemfs/file.c > @@ -1,6 +1,7 @@ > // SPDX-License-Identifier: GPL-2.0-only > > #include "guestmemfs.h" > +#include <linux/mm.h> > > static int truncate(struct inode *inode, loff_t newsize) > { > @@ -41,6 +42,46 @@ static int inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct > return 0; > } > > +/* > + * To be able to use PFNMAP VMAs for VFIO DMA mapping we need the page tables > + * populated with mappings. Pre-fault everything. > + */ > +static int mmap(struct file *filp, struct vm_area_struct *vma) > +{ > + int rc; > + unsigned long *mappings_block; > + struct guestmemfs_inode *guestmemfs_inode; > + > + guestmemfs_inode = guestmemfs_get_persisted_inode(filp->f_inode->i_sb, > + filp->f_inode->i_ino); > + > + mappings_block = guestmemfs_inode->mappings; > + > + /* Remap-pfn-range will mark the range VM_IO */ > + for (unsigned long vma_addr_offset = vma->vm_start; > + vma_addr_offset < vma->vm_end; > + vma_addr_offset += PMD_SIZE) { > + int block, mapped_block; > + unsigned long map_size = min(PMD_SIZE, vma->vm_end - vma_addr_offset); > + > + block = (vma_addr_offset - vma->vm_start) / PMD_SIZE; > + mapped_block = *(mappings_block + block); > + /* > + * It's wrong to use rempa_pfn_range; this will install PTE-level entries. > + * The whole point of 2 MiB allocs is to improve TLB perf! > + * We should use something like mm/huge_memory.c#insert_pfn_pmd > + * but that is currently static. > + * TODO: figure out the best way to install PMDs. > + */ > + rc = remap_pfn_range(vma, > + vma_addr_offset, > + (guestmemfs_base >> PAGE_SHIFT) + (mapped_block * 512), > + map_size, > + vma->vm_page_prot); > + } > + return 0; > +} > + > const struct inode_operations guestmemfs_file_inode_operations = { > .setattr = inode_setattr, > .getattr = simple_getattr, > @@ -48,5 +89,5 @@ const struct inode_operations guestmemfs_file_inode_operations = { > > const struct file_operations guestmemfs_file_fops = { > .owner = THIS_MODULE, > - .iterate_shared = NULL, > + .mmap = mmap, > }; > diff --git a/fs/guestmemfs/guestmemfs.c b/fs/guestmemfs/guestmemfs.c > index c45c796c497a..38f20ad25286 100644 > --- a/fs/guestmemfs/guestmemfs.c > +++ b/fs/guestmemfs/guestmemfs.c > @@ -9,7 +9,7 @@ > #include <linux/memblock.h> > #include <linux/statfs.h> > > -static phys_addr_t guestmemfs_base, guestmemfs_size; > +phys_addr_t guestmemfs_base, guestmemfs_size; > struct guestmemfs_sb *psb; > > static int statfs(struct dentry *root, struct kstatfs *buf) > diff --git a/fs/guestmemfs/guestmemfs.h b/fs/guestmemfs/guestmemfs.h > index 7ea03ac8ecca..0f2788ce740e 100644 > --- a/fs/guestmemfs/guestmemfs.h > +++ b/fs/guestmemfs/guestmemfs.h > @@ -8,6 +8,9 @@ > #define GUESTMEMFS_FILENAME_LEN 255 > #define GUESTMEMFS_PSB(sb) ((struct guestmemfs_sb *)sb->s_fs_info) > > +/* Units of bytes */ > +extern phys_addr_t guestmemfs_base, guestmemfs_size; > + > struct guestmemfs_sb { > /* Inode number */ > unsigned long next_free_ino; > -- > 2.34.1 > >
On Tue, Oct 29, 2024 at 4:06 PM Elliot Berman <quic_eberman@quicinc.com> wrote: > > On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > > Make the file data usable to userspace by adding mmap. That's all that > > QEMU needs for guest RAM, so that's all be bother implementing for now. > > > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > > are no struct pages for the memory in this VMA. Remap_pfn_range() is > > used to actually populate the page tables. All PTEs are pre-faulted into > > the pgtables at mmap time so that the pgtables are usable when this > > virtual address range is given to VFIO's MAP_DMA. > > Thanks for sending this out! I'm going through the series with the > intention to see how it might fit within the existing guest_memfd work > for pKVM/CoCo/Gunyah. > > It might've been mentioned in the MM alignment session -- you might be > interested to join the guest_memfd bi-weekly call to see how we are > overlapping [1]. > > [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@redhat.com/T/ > > --- > > Was the decision to pre-fault everything because it was convenient to do > or otherwise intentionally different from hugetlb? > It's memory that is placed outside of of page allocator control, or even outside of System RAM - VM_PFNMAP only. So you don't have much of a choice.. In general, for things like guest memory or persistent memory, even if struct pages were available, it doesn't seem all that useful to adhere to the !MAP_POPULATE standard, why go through any faults to begin with? For guest_memfd: as I understand it, it's folio-based. And this is VM_PFNMAP memory without struct pages / folios. So the main task there is probably to teach guest_memfd about VM_PFNMAP memory. That would be great, since it then ties in guest_memfd with external guest memory. - Frank
On Tue, 2024-10-29 at 16:05 -0700, Elliot Berman wrote: > On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > > Make the file data usable to userspace by adding mmap. That's all that > > QEMU needs for guest RAM, so that's all be bother implementing for now. > > > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > > are no struct pages for the memory in this VMA. Remap_pfn_range() is > > used to actually populate the page tables. All PTEs are pre-faulted into > > the pgtables at mmap time so that the pgtables are usable when this > > virtual address range is given to VFIO's MAP_DMA. > > Thanks for sending this out! I'm going through the series with the > intention to see how it might fit within the existing guest_memfd work > for pKVM/CoCo/Gunyah. > > It might've been mentioned in the MM alignment session -- you might be > interested to join the guest_memfd bi-weekly call to see how we are > overlapping [1]. > > [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@redhat.com/T/ Hi Elliot, yes, I think that there is a lot more overlap with guest_memfd necessary here. The idea was to extend guestmemfs at some point to have a guest_memfd style interface, but it was pointed out at the MM alignment call that doing so would require guestmemfs to duplicate the API surface of guest_memfd. This is undesirable. Better would be to have persistence implemented as a custom allocator behind a normal guest_memfd. I'm not too sure how this would be actually done in practice, specifically: - how the persistent pool would be defined - how it would be supplied to guest_memfd - how the guest_memfds would be re-discovered after kexec But assuming we can figure out some way to do this, I think it's a better way to go. I'll join the guest_memfd call shortly to see the developments there and where persistence would fit best. Hopefully we can figure out in theory how this could work, the I'll put together another RFC sketching it out. JG
On Thu, Oct 31, 2024 at 03:30:59PM +0000, Gowans, James wrote: > On Tue, 2024-10-29 at 16:05 -0700, Elliot Berman wrote: > > On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > > > Make the file data usable to userspace by adding mmap. That's all that > > > QEMU needs for guest RAM, so that's all be bother implementing for now. > > > > > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > > > are no struct pages for the memory in this VMA. Remap_pfn_range() is > > > used to actually populate the page tables. All PTEs are pre-faulted into > > > the pgtables at mmap time so that the pgtables are usable when this > > > virtual address range is given to VFIO's MAP_DMA. > > > > Thanks for sending this out! I'm going through the series with the > > intention to see how it might fit within the existing guest_memfd work > > for pKVM/CoCo/Gunyah. > > > > It might've been mentioned in the MM alignment session -- you might be > > interested to join the guest_memfd bi-weekly call to see how we are > > overlapping [1]. > > > > [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@redhat.com/T/ > > Hi Elliot, yes, I think that there is a lot more overlap with > guest_memfd necessary here. The idea was to extend guestmemfs at some > point to have a guest_memfd style interface, but it was pointed out at > the MM alignment call that doing so would require guestmemfs to > duplicate the API surface of guest_memfd. This is undesirable. Better > would be to have persistence implemented as a custom allocator behind a > normal guest_memfd. I'm not too sure how this would be actually done in > practice, specifically: > - how the persistent pool would be defined > - how it would be supplied to guest_memfd > - how the guest_memfds would be re-discovered after kexec > But assuming we can figure out some way to do this, I think it's a > better way to go. I think the filesystem interface seemed reasonable, you just want open() on the filesystem to return back a normal guest_memfd and re-use all of that code to implement it. When opened through the filesystem guest_memfd would get hooked by the KHO stuff to manage its memory, somehow. Really KHO just needs to keep track of the addresess in the guest_memfd when it serializes, right? So maybe all it needs is a way to freeze the guest_memfd so it's memory map doesn't change anymore, then a way to extract the addresses from it for serialization? Jason
On Wed, 2024-10-30 at 15:18 -0700, Frank van der Linden wrote: > On Tue, Oct 29, 2024 at 4:06 PM Elliot Berman <quic_eberman@quicinc.com> wrote: > > > > On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > > > Make the file data usable to userspace by adding mmap. That's all that > > > QEMU needs for guest RAM, so that's all be bother implementing for now. > > > > > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > > > are no struct pages for the memory in this VMA. Remap_pfn_range() is > > > used to actually populate the page tables. All PTEs are pre-faulted into > > > the pgtables at mmap time so that the pgtables are usable when this > > > virtual address range is given to VFIO's MAP_DMA. > > > > Thanks for sending this out! I'm going through the series with the > > intention to see how it might fit within the existing guest_memfd work > > for pKVM/CoCo/Gunyah. > > > > It might've been mentioned in the MM alignment session -- you might be > > interested to join the guest_memfd bi-weekly call to see how we are > > overlapping [1]. > > > > [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@redhat.com/T/ > > > > --- > > > > Was the decision to pre-fault everything because it was convenient to do > > or otherwise intentionally different from hugetlb? > > > > It's memory that is placed outside of of page allocator control, or > even outside of System RAM - VM_PFNMAP only. So you don't have much of > a choice.. > > In general, for things like guest memory or persistent memory, even if > struct pages were available, it doesn't seem all that useful to adhere > to the !MAP_POPULATE standard, why go through any faults to begin > with? > > For guest_memfd: as I understand it, it's folio-based. And this is > VM_PFNMAP memory without struct pages / folios. So the main task there > is probably to teach guest_memfd about VM_PFNMAP memory. That would be > great, since it then ties in guest_memfd with external guest memory. Exactly - I think all of the comments on this series are heading in a similar direction: let's add a custom reserved (PFNMAP) persistent memory allocator behind guest_memfd and expose that as a filesystem. This will be what the next version of patch series will do. JG
On Thu, 2024-10-31 at 13:06 -0300, Jason Gunthorpe wrote: > On Thu, Oct 31, 2024 at 03:30:59PM +0000, Gowans, James wrote: > > On Tue, 2024-10-29 at 16:05 -0700, Elliot Berman wrote: > > > On Mon, Aug 05, 2024 at 11:32:40AM +0200, James Gowans wrote: > > > > Make the file data usable to userspace by adding mmap. That's all that > > > > QEMU needs for guest RAM, so that's all be bother implementing for now. > > > > > > > > When mmaping the file the VMA is marked as PFNMAP to indicate that there > > > > are no struct pages for the memory in this VMA. Remap_pfn_range() is > > > > used to actually populate the page tables. All PTEs are pre-faulted into > > > > the pgtables at mmap time so that the pgtables are usable when this > > > > virtual address range is given to VFIO's MAP_DMA. > > > > > > Thanks for sending this out! I'm going through the series with the > > > intention to see how it might fit within the existing guest_memfd work > > > for pKVM/CoCo/Gunyah. > > > > > > It might've been mentioned in the MM alignment session -- you might be > > > interested to join the guest_memfd bi-weekly call to see how we are > > > overlapping [1]. > > > > > > [1]: https://lore.kernel.org/kvm/ae794891-fe69-411a-b82e-6963b594a62a@redhat.com/T/ > > > > Hi Elliot, yes, I think that there is a lot more overlap with > > guest_memfd necessary here. The idea was to extend guestmemfs at some > > point to have a guest_memfd style interface, but it was pointed out at > > the MM alignment call that doing so would require guestmemfs to > > duplicate the API surface of guest_memfd. This is undesirable. Better > > would be to have persistence implemented as a custom allocator behind a > > normal guest_memfd. I'm not too sure how this would be actually done in > > practice, specifically: > > - how the persistent pool would be defined > > - how it would be supplied to guest_memfd > > - how the guest_memfds would be re-discovered after kexec > > But assuming we can figure out some way to do this, I think it's a > > better way to go. > > I think the filesystem interface seemed reasonable, you just want > open() on the filesystem to return back a normal guest_memfd and > re-use all of that code to implement it. > > When opened through the filesystem guest_memfd would get hooked by the > KHO stuff to manage its memory, somehow. > > Really KHO just needs to keep track of the addresess in the > guest_memfd when it serializes, right? So maybe all it needs is a way > to freeze the guest_memfd so it's memory map doesn't change anymore, > then a way to extract the addresses from it for serialization? Thanks Jason, that sounds perfect. I'll work on the next rev which will: - expose a filesystem which owns reserved/persistent memory, just like this patch. - rebased on top of the patches which pull out the guest_memfd code into a library - rebased on top of the guest_memfd patches which supports adding a different backing allocator (hugetlbfs) to guest_memfd - when a file in guestmemfs is opened, create a guest_memfd object from the guest_memfd library code and set guestmemfs as the custom allocator for the file. - serialise and re-hydrate the guest_memfds which have been created in guestmemfs on kexec via KHO. The main difference is that opening a guestmemfs file won't give a regular file, rather it will give a guest_memfd library object. This will give good code re-used with guest_memfd library and prevent needing to re-implement the guest_memfd API surface here. Sounds like a great path forward. :-) JG > > Jason
On Fri, Nov 01, 2024 at 01:01:00PM +0000, Gowans, James wrote: > Thanks Jason, that sounds perfect. I'll work on the next rev which will: > - expose a filesystem which owns reserved/persistent memory, just like > this patch. Is this step needed? If the guest memfd is already told to get 1G pages in some normal way, why do we need a dedicated pool just for the KHO filesystem? Back to my suggestion, can't KHO simply freeze the guest memfd and then extract the memory layout, and just use the normal allocator? Or do you have a hard requirement that only KHO allocated memory can be preserved across kexec? Jason
On Fri, 2024-11-01 at 10:42 -0300, Jason Gunthorpe wrote: > > On Fri, Nov 01, 2024 at 01:01:00PM +0000, Gowans, James wrote: > > > Thanks Jason, that sounds perfect. I'll work on the next rev which will: > > - expose a filesystem which owns reserved/persistent memory, just like > > this patch. > > Is this step needed? > > If the guest memfd is already told to get 1G pages in some normal way, > why do we need a dedicated pool just for the KHO filesystem? > > Back to my suggestion, can't KHO simply freeze the guest memfd and > then extract the memory layout, and just use the normal allocator? > > Or do you have a hard requirement that only KHO allocated memory can > be preserved across kexec? KHO can persist any memory ranges which are not MOVABLE. Provided that guest_memfd does non-movable allocations then serialising and persisting should be possible. There are other requirements here, specifically the ability to be *guaranteed* GiB-level allocations, have the guest memory out of the direct map for secret hiding, and remove the struct page overhead. Struct page overhead could be handled via HVO. But considering that the memory must be out of the direct map it seems unnecessary to have struct pages, and unnecessary to have it managed by an existing allocator. The only existing 1 GiB allocator I know of is hugetlbfs? Let me know if there's something else that can be used. That's the main motivation for a separate pool allocated on early boot. This is quite similar to hugetlbfs, so a natural question is if we could use and serialise hugetlbfs instead, but that probably opens another can of worms of complexity. There's more than just the guest_memfds and their allocations to serialise; it's probably useful to be able to have a directory structure in the filesystem, POSIX file ACLs, and perhaps some other filesystem metadata. For this reason I still think that having a new filesystem designed for this use-case which creates guest_memfd objects when files are opened is the way to go. Let me know what you think. JG
On Fri, Nov 01, 2024 at 10:42:02AM -0300, Jason Gunthorpe wrote: > On Fri, Nov 01, 2024 at 01:01:00PM +0000, Gowans, James wrote: > > > Thanks Jason, that sounds perfect. I'll work on the next rev which will: > > - expose a filesystem which owns reserved/persistent memory, just like > > this patch. > > Is this step needed? > > If the guest memfd is already told to get 1G pages in some normal way, > why do we need a dedicated pool just for the KHO filesystem? > > Back to my suggestion, can't KHO simply freeze the guest memfd and > then extract the memory layout, and just use the normal allocator? > > Or do you have a hard requirement that only KHO allocated memory can > be preserved across kexec? KHO does not allocate memory, it gets the ranges to preserve, makes sure they are not overwritten during kexec and can be retrieved by the second kernel. For KHO it does not matter if the memory comes from a normal or a special allocator. > Jason
On Sat, Nov 02, 2024 at 08:24:15AM +0000, Gowans, James wrote: > On Fri, 2024-11-01 at 10:42 -0300, Jason Gunthorpe wrote: > > > > On Fri, Nov 01, 2024 at 01:01:00PM +0000, Gowans, James wrote: > > > > > Thanks Jason, that sounds perfect. I'll work on the next rev which will: > > > - expose a filesystem which owns reserved/persistent memory, just like > > > this patch. > > > > Is this step needed? > > > > If the guest memfd is already told to get 1G pages in some normal way, > > why do we need a dedicated pool just for the KHO filesystem? > > > > Back to my suggestion, can't KHO simply freeze the guest memfd and > > then extract the memory layout, and just use the normal allocator? > > > > Or do you have a hard requirement that only KHO allocated memory can > > be preserved across kexec? > > KHO can persist any memory ranges which are not MOVABLE. Provided that > guest_memfd does non-movable allocations then serialising and persisting > should be possible. > > There are other requirements here, specifically the ability to be > *guaranteed* GiB-level allocations, have the guest memory out of the > direct map for secret hiding, and remove the struct page overhead. > Struct page overhead could be handled via HVO. But considering that the > memory must be out of the direct map it seems unnecessary to have struct > pages, and unnecessary to have it managed by an existing allocator. Having memory out of direct map does not preclude manipulations of struct page unless that memory is completely out of the kernel control (e.g. excluded by mem=X) and this is not necessarily the case even for VM hosts. It's not not necessary to manage the memory using an existing allocator, but I think a specialized allocator should not be a part of guestmemfs.` > JG
On Sat, Nov 02, 2024 at 08:24:15AM +0000, Gowans, James wrote: > KHO can persist any memory ranges which are not MOVABLE. Provided that > guest_memfd does non-movable allocations then serialising and persisting > should be possible. > > There are other requirements here, specifically the ability to be > *guaranteed* GiB-level allocations, have the guest memory out of the > direct map for secret hiding, and remove the struct page overhead. > Struct page overhead could be handled via HVO. IMHO this should all be handled as part of normal guestmemfd operation because it has nothing to do with KHO. Many others have asked for the same things in guest memfd already. So I would start by assuming guest memfd will get those things eventually and design around a 'freeze and record' model for KHO of a guestmemfd, instead of yet another special memory allocator.. Jason
diff --git a/fs/guestmemfs/file.c b/fs/guestmemfs/file.c index 618c93b12196..b1a52abcde65 100644 --- a/fs/guestmemfs/file.c +++ b/fs/guestmemfs/file.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include "guestmemfs.h" +#include <linux/mm.h> static int truncate(struct inode *inode, loff_t newsize) { @@ -41,6 +42,46 @@ static int inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct return 0; } +/* + * To be able to use PFNMAP VMAs for VFIO DMA mapping we need the page tables + * populated with mappings. Pre-fault everything. + */ +static int mmap(struct file *filp, struct vm_area_struct *vma) +{ + int rc; + unsigned long *mappings_block; + struct guestmemfs_inode *guestmemfs_inode; + + guestmemfs_inode = guestmemfs_get_persisted_inode(filp->f_inode->i_sb, + filp->f_inode->i_ino); + + mappings_block = guestmemfs_inode->mappings; + + /* Remap-pfn-range will mark the range VM_IO */ + for (unsigned long vma_addr_offset = vma->vm_start; + vma_addr_offset < vma->vm_end; + vma_addr_offset += PMD_SIZE) { + int block, mapped_block; + unsigned long map_size = min(PMD_SIZE, vma->vm_end - vma_addr_offset); + + block = (vma_addr_offset - vma->vm_start) / PMD_SIZE; + mapped_block = *(mappings_block + block); + /* + * It's wrong to use rempa_pfn_range; this will install PTE-level entries. + * The whole point of 2 MiB allocs is to improve TLB perf! + * We should use something like mm/huge_memory.c#insert_pfn_pmd + * but that is currently static. + * TODO: figure out the best way to install PMDs. + */ + rc = remap_pfn_range(vma, + vma_addr_offset, + (guestmemfs_base >> PAGE_SHIFT) + (mapped_block * 512), + map_size, + vma->vm_page_prot); + } + return 0; +} + const struct inode_operations guestmemfs_file_inode_operations = { .setattr = inode_setattr, .getattr = simple_getattr, @@ -48,5 +89,5 @@ const struct inode_operations guestmemfs_file_inode_operations = { const struct file_operations guestmemfs_file_fops = { .owner = THIS_MODULE, - .iterate_shared = NULL, + .mmap = mmap, }; diff --git a/fs/guestmemfs/guestmemfs.c b/fs/guestmemfs/guestmemfs.c index c45c796c497a..38f20ad25286 100644 --- a/fs/guestmemfs/guestmemfs.c +++ b/fs/guestmemfs/guestmemfs.c @@ -9,7 +9,7 @@ #include <linux/memblock.h> #include <linux/statfs.h> -static phys_addr_t guestmemfs_base, guestmemfs_size; +phys_addr_t guestmemfs_base, guestmemfs_size; struct guestmemfs_sb *psb; static int statfs(struct dentry *root, struct kstatfs *buf) diff --git a/fs/guestmemfs/guestmemfs.h b/fs/guestmemfs/guestmemfs.h index 7ea03ac8ecca..0f2788ce740e 100644 --- a/fs/guestmemfs/guestmemfs.h +++ b/fs/guestmemfs/guestmemfs.h @@ -8,6 +8,9 @@ #define GUESTMEMFS_FILENAME_LEN 255 #define GUESTMEMFS_PSB(sb) ((struct guestmemfs_sb *)sb->s_fs_info) +/* Units of bytes */ +extern phys_addr_t guestmemfs_base, guestmemfs_size; + struct guestmemfs_sb { /* Inode number */ unsigned long next_free_ino;
Make the file data usable to userspace by adding mmap. That's all that QEMU needs for guest RAM, so that's all be bother implementing for now. When mmaping the file the VMA is marked as PFNMAP to indicate that there are no struct pages for the memory in this VMA. Remap_pfn_range() is used to actually populate the page tables. All PTEs are pre-faulted into the pgtables at mmap time so that the pgtables are usable when this virtual address range is given to VFIO's MAP_DMA. Signed-off-by: James Gowans <jgowans@amazon.com> --- fs/guestmemfs/file.c | 43 +++++++++++++++++++++++++++++++++++++- fs/guestmemfs/guestmemfs.c | 2 +- fs/guestmemfs/guestmemfs.h | 3 +++ 3 files changed, 46 insertions(+), 2 deletions(-)