@@ -141,15 +141,13 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio)
*/
static long
axon_ram_direct_access(struct block_device *device, sector_t sector,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, pfn_t *pfn)
{
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
- void *addr = (void *)(bank->ph_addr + offset);
-
- *kaddr = (void __pmem *)addr;
- *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
+ *kaddr = (void __pmem __force *) bank->io_addr + offset;
+ *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset;
}
@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector,
#ifdef CONFIG_BLK_DEV_RAM_DAX
static long brd_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, pfn_t *pfn)
{
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@@ -385,7 +385,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector,
if (!page)
return -ENOSPC;
*kaddr = (void __pmem *)page_address(page);
- *pfn = page_to_pfn(page);
+ *pfn = page_to_pfn_t(page);
return PAGE_SIZE;
}
@@ -39,6 +39,7 @@ struct pmem_device {
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
+ unsigned long pfn_flags;
void __pmem *virt_addr;
size_t size;
};
@@ -108,25 +109,22 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
}
static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, pfn_t *pfn)
{
struct pmem_device *pmem = bdev->bd_disk->private_data;
resource_size_t offset = sector * 512 + pmem->data_offset;
- resource_size_t size;
+ resource_size_t size = pmem->size - offset;
- if (pmem->data_offset) {
+ *kaddr = pmem->virt_addr + offset;
+ *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+ if (pfn_t_has_page(*pfn)) {
/*
* Limit the direct_access() size to what is covered by
* the memmap
*/
- size = (pmem->size - offset) & ~ND_PFN_MASK;
- } else
- size = pmem->size - offset;
-
- /* FIXME convert DAX to comprehend that this mapping has a lifetime */
- *kaddr = pmem->virt_addr + offset;
- *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
-
+ size &= ~ND_PFN_MASK;
+ }
return size;
}
@@ -158,9 +156,11 @@ static struct pmem_device *pmem_alloc(struct device *dev,
return ERR_PTR(-EBUSY);
}
- if (pmem_should_map_pages(dev))
+ pmem->pfn_flags = PFN_DEV;
+ if (pmem_should_map_pages(dev)) {
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res);
- else
+ pmem->pfn_flags |= PFN_MAP;
+ } else
pmem->virt_addr = (void __pmem *) devm_memremap(dev,
pmem->phys_addr, pmem->size,
ARCH_MEMREMAP_PMEM);
@@ -371,6 +371,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
pmem = dev_get_drvdata(dev);
devm_memunmap(dev, (void __force *) pmem->virt_addr);
pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res);
+ pmem->pfn_flags |= PFN_MAP;
if (IS_ERR(pmem->virt_addr)) {
rc = PTR_ERR(pmem->virt_addr);
goto err;
@@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode);
static void dcssblk_release(struct gendisk *disk, fmode_t mode);
static void dcssblk_make_request(struct request_queue *q, struct bio *bio);
static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum,
- void __pmem **kaddr, unsigned long *pfn);
+ void __pmem **kaddr, pfn_t *pfn);
static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0";
@@ -881,20 +881,18 @@ fail:
static long
dcssblk_direct_access (struct block_device *bdev, sector_t secnum,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, pfn_t *pfn)
{
struct dcssblk_dev_info *dev_info;
unsigned long offset, dev_sz;
- void *addr;
dev_info = bdev->bd_disk->private_data;
if (!dev_info)
return -ENODEV;
dev_sz = dev_info->end - dev_info->start;
offset = secnum * 512;
- addr = (void *) (dev_info->start + offset);
- *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
- *kaddr = (void __pmem *) addr;
+ *kaddr = (void __pmem *) (dev_info->start + offset);
+ *pfn = phys_to_pfn_t(dev_info->start + offset, PFN_DEV);
return dev_sz - offset;
}
@@ -442,7 +442,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page);
* accessible at this address.
*/
long bdev_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **addr, unsigned long *pfn, long size)
+ void __pmem **addr, pfn_t *pfn, long size)
{
long avail;
const struct block_device_operations *ops = bdev->bd_disk->fops;
@@ -37,7 +37,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
might_sleep();
do {
void __pmem *addr;
- unsigned long pfn;
+ pfn_t pfn;
long count;
count = bdev_direct_access(bdev, sector, &addr, &pfn, size);
@@ -64,7 +64,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size)
EXPORT_SYMBOL_GPL(dax_clear_blocks);
static void __pmem *__dax_map_bh(const struct buffer_head *bh, unsigned blkbits,
- unsigned long *pfn, long *len)
+ pfn_t *pfn, long *len)
{
long rc;
void __pmem *addr;
@@ -87,7 +87,7 @@ static void __pmem *__dax_map_bh(const struct buffer_head *bh, unsigned blkbits,
static void __pmem *dax_map_bh(const struct buffer_head *bh, unsigned blkbits)
{
- unsigned long pfn;
+ pfn_t pfn;
return __dax_map_bh(bh, blkbits, &pfn, NULL);
}
@@ -138,7 +138,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter,
loff_t pos = start, max = start, bh_max = start;
int rw = iov_iter_rw(iter), rc;
long map_len = 0;
- unsigned long pfn;
+ pfn_t pfn;
void __pmem *addr = NULL;
void __pmem *kmap = (void __pmem *) ERR_PTR(-EIO);
bool hole = false;
@@ -324,9 +324,9 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
struct vm_area_struct *vma, struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long)vmf->virtual_address;
- unsigned long pfn;
void __pmem *addr;
pgoff_t size;
+ pfn_t pfn;
int error;
/*
@@ -354,7 +354,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh,
}
dax_unmap_bh(bh, addr);
- error = vm_insert_mixed(vma, vaddr, pfn);
+ error = vm_insert_mixed(vma, vaddr, pfn_t_to_pfn(pfn));
out:
return error;
@@ -604,7 +604,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if (buffer_unwritten(&bh) || buffer_new(&bh)) {
int i;
long length;
- unsigned long pfn;
+ pfn_t pfn;
void __pmem *kaddr = __dax_map_bh(&bh, blkbits, &pfn, &length);
if (IS_ERR(kaddr)) {
@@ -612,7 +612,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
goto out;
}
- if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ if ((length < PMD_SIZE) || (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR))
goto fallback;
for (i = 0; i < PTRS_PER_PMD; i++)
@@ -668,8 +668,8 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
result = VM_FAULT_NOPAGE;
spin_unlock(ptl);
} else {
+ pfn_t pfn;
long length;
- unsigned long pfn;
void __pmem *kaddr = __dax_map_bh(&bh, blkbits, &pfn, &length);
if (IS_ERR(kaddr)) {
@@ -677,10 +677,11 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
goto out;
}
dax_unmap_bh(&bh, kaddr);
- if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
+ if ((length < PMD_SIZE) || (pfn_t_to_pfn(pfn) & PG_PMD_COLOUR))
goto fallback;
- result |= vmf_insert_pfn_pmd(vma, address, pmd, pfn, write);
+ result |= vmf_insert_pfn_pmd(vma, address, pmd,
+ pfn_t_to_pfn(pfn), write);
}
out:
@@ -1634,7 +1634,7 @@ struct block_device_operations {
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
long (*direct_access)(struct block_device *, sector_t, void __pmem **,
- unsigned long *pfn);
+ pfn_t *);
unsigned int (*check_events) (struct gendisk *disk,
unsigned int clearing);
/* ->media_changed() is DEPRECATED, use ->check_events() instead */
@@ -1653,7 +1653,7 @@ extern int bdev_read_page(struct block_device *, sector_t, struct page *);
extern int bdev_write_page(struct block_device *, sector_t, struct page *,
struct writeback_control *);
extern long bdev_direct_access(struct block_device *, sector_t,
- void __pmem **addr, unsigned long *pfn, long size);
+ void __pmem **addr, pfn_t *pfn, long size);
#else /* CONFIG_BLOCK */
struct block_device;
@@ -906,6 +906,71 @@ static inline void set_page_links(struct page *page, enum zone_type zone,
}
/*
+ * PFN_FLAGS_MASK - mask of all the possible valid pfn_t flags
+ * PFN_SG_CHAIN - pfn is a pointer to the next scatterlist entry
+ * PFN_SG_LAST - pfn references a page and is the last scatterlist entry
+ * PFN_DEV - pfn is not covered by system memmap by default
+ * PFN_MAP - pfn has a dynamic page mapping established by a device driver
+ */
+#define PFN_FLAGS_MASK (~PAGE_MASK << (BITS_PER_LONG - PAGE_SHIFT))
+#define PFN_SG_CHAIN (1UL << (BITS_PER_LONG - 1))
+#define PFN_SG_LAST (1UL << (BITS_PER_LONG - 2))
+#define PFN_DEV (1UL << (BITS_PER_LONG - 3))
+#define PFN_MAP (1UL << (BITS_PER_LONG - 4))
+
+static inline pfn_t __pfn_to_pfn_t(unsigned long pfn, unsigned long flags)
+{
+ pfn_t pfn_t = { .val = pfn | (flags & PFN_FLAGS_MASK), };
+
+ return pfn_t;
+}
+
+/* a default pfn to pfn_t conversion assumes that @pfn is pfn_valid() */
+static inline pfn_t pfn_to_pfn_t(unsigned long pfn)
+{
+ return __pfn_to_pfn_t(pfn, 0);
+}
+
+static inline pfn_t phys_to_pfn_t(dma_addr_t addr, unsigned long flags)
+{
+ return __pfn_to_pfn_t(addr >> PAGE_SHIFT, flags);
+}
+
+static inline bool pfn_t_has_page(pfn_t pfn)
+{
+ return (pfn.val & PFN_MAP) == PFN_MAP || (pfn.val & PFN_DEV) == 0;
+}
+
+static inline unsigned long pfn_t_to_pfn(pfn_t pfn)
+{
+ return pfn.val & ~PFN_FLAGS_MASK;
+}
+
+static inline struct page *pfn_t_to_page(pfn_t pfn)
+{
+ if (pfn_t_has_page(pfn))
+ return pfn_to_page(pfn_t_to_pfn(pfn));
+ return NULL;
+}
+
+static inline dma_addr_t pfn_t_to_phys(pfn_t pfn)
+{
+ return PFN_PHYS(pfn_t_to_pfn(pfn));
+}
+
+static inline void *pfn_t_to_virt(pfn_t pfn)
+{
+ if (pfn_t_has_page(pfn))
+ return __va(pfn_t_to_phys(pfn));
+ return NULL;
+}
+
+static inline pfn_t page_to_pfn_t(struct page *page)
+{
+ return pfn_to_pfn_t(page_to_pfn(page));
+}
+
+/*
* Some inline functions in vmstat.h depend on page_zone()
*/
#include <linux/vmstat.h>
@@ -3,6 +3,15 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+
+/*
+ * pfn_t: encapsulates a page-frame number that is optionally backed
+ * by memmap (struct page). Whether a pfn_t has a 'struct page'
+ * backing is indicated by flags in the high bits of the value.
+ */
+typedef struct {
+ unsigned long val;
+} pfn_t;
#endif
#define PFN_ALIGN(x) (((unsigned long)(x) + (PAGE_SIZE - 1)) & PAGE_MASK)