@@ -3932,6 +3932,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
struct page *page;
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
+ bool folio_allocated = false;
bool exclusive = false;
swp_entry_t entry;
pte_t pte;
@@ -3991,35 +3992,29 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (unlikely(!si))
goto out;
- folio = swap_cache_get_folio(entry, vma, vmf->address);
- if (folio)
- page = folio_file_page(folio, swp_offset(entry));
- swapcache = folio;
+ if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && __swap_count(entry) == 1) {
+ folio = swapin_direct(entry, GFP_HIGHUSER_MOVABLE, vmf, &folio_allocated);
+ } else {
+ folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf, &folio_allocated);
+ }
if (!folio) {
- if (data_race(si->flags & SWP_SYNCHRONOUS_IO) &&
- __swap_count(entry) == 1) {
- folio = swapin_direct(entry, GFP_HIGHUSER_MOVABLE, vmf);
- } else {
- folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE, vmf);
- }
-
- if (!folio) {
- /*
- * Back out if somebody else faulted in this pte
- * while we released the pte lock.
- */
- vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
- vmf->address, &vmf->ptl);
- if (likely(vmf->pte &&
- pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
- ret = VM_FAULT_OOM;
- goto unlock;
- }
+ /*
+ * Back out if somebody else faulted in this pte
+ * while we released the pte lock.
+ */
+ vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+ vmf->address, &vmf->ptl);
+ if (likely(vmf->pte &&
+ pte_same(ptep_get(vmf->pte), vmf->orig_pte)))
+ ret = VM_FAULT_OOM;
+ goto unlock;
+ }
- swapcache = folio;
- page = folio_file_page(folio, swp_offset(entry));
+ swapcache = folio;
+ page = folio_file_page(folio, swp_offset(entry));
+ if (folio_allocated) {
/* Had to read the page from swap area: Major fault */
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
@@ -1570,20 +1570,6 @@ static inline struct mempolicy *shmem_get_sbmpol(struct shmem_sb_info *sbinfo)
static struct mempolicy *shmem_get_pgoff_policy(struct shmem_inode_info *info,
pgoff_t index, unsigned int order, pgoff_t *ilx);
-static struct folio *shmem_swapin_cluster(swp_entry_t swap, gfp_t gfp,
- struct shmem_inode_info *info, pgoff_t index)
-{
- struct mempolicy *mpol;
- pgoff_t ilx;
- struct folio *folio;
-
- mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
- folio = swap_cluster_readahead(swap, gfp, mpol, ilx);
- mpol_cond_put(mpol);
-
- return folio;
-}
-
/*
* Make sure huge_gfp is always more limited than limit_gfp.
* Some of the flags set permissions, while others set limitations.
@@ -1857,9 +1843,12 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
+ bool folio_allocated = false;
struct swap_info_struct *si;
struct folio *folio = NULL;
+ struct mempolicy *mpol;
swp_entry_t swap;
+ pgoff_t ilx;
int error;
VM_BUG_ON(!*foliop || !xa_is_value(*foliop));
@@ -1878,22 +1867,28 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
}
/* Look it up and read it in.. */
- folio = swap_cache_get_folio(swap, NULL, 0);
+ folio = swap_cache_try_get(swap);
if (!folio) {
- /* Or update major stats only when swapin succeeds?? */
- if (fault_type) {
- *fault_type |= VM_FAULT_MAJOR;
- count_vm_event(PGMAJFAULT);
- count_memcg_event_mm(fault_mm, PGMAJFAULT);
- }
/* Here we actually start the io */
- folio = shmem_swapin_cluster(swap, gfp, info, index);
+ mpol = shmem_get_pgoff_policy(info, index, 0, &ilx);
+ folio = swap_cluster_readahead(swap, gfp, mpol, ilx, &folio_allocated);
+ mpol_cond_put(mpol);
if (!folio) {
error = -ENOMEM;
goto failed;
}
+
+ /* Update major stats only when swapin succeeds */
+ if (folio_allocated && fault_type) {
+ *fault_type |= VM_FAULT_MAJOR;
+ count_vm_event(PGMAJFAULT);
+ count_memcg_event_mm(fault_mm, PGMAJFAULT);
+ }
}
+ if (!folio_allocated)
+ swap_cache_update_ra(folio, NULL, 0);
+
/* We have to do this with folio locked to prevent races */
folio_lock(folio);
if (!folio_test_swapcache(folio) ||
@@ -39,7 +39,8 @@ void __delete_from_swap_cache(struct folio *folio,
void delete_from_swap_cache(struct folio *folio);
void clear_shadow_from_swap_cache(swp_entry_t entry);
int swap_cache_add_wait(struct folio *folio, swp_entry_t entry, gfp_t gfp);
-struct folio *swap_cache_get_folio(swp_entry_t entry,
+struct folio *swap_cache_try_get(swp_entry_t entry);
+void swap_cache_update_ra(struct folio *folio,
struct vm_area_struct *vma, unsigned long addr);
struct folio *filemap_get_incore_folio(struct address_space *mapping,
pgoff_t index);
@@ -49,16 +50,18 @@ struct folio *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
struct folio *swap_cache_alloc_or_get(swp_entry_t entry, gfp_t gfp_flags,
struct mempolicy *mpol, pgoff_t ilx, bool *folio_allocated);
struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t flag,
- struct mempolicy *mpol, pgoff_t ilx);
+ struct mempolicy *mpol, pgoff_t ilx, bool *folio_allocated);
struct folio *swapin_direct(swp_entry_t entry, gfp_t flag,
- struct vm_fault *vmf);
+ struct vm_fault *vmf, bool *folio_allocated);
struct folio *swapin_readahead(swp_entry_t entry, gfp_t flag,
- struct vm_fault *vmf);
+ struct vm_fault *vmf, bool *folio_allocated);
static inline unsigned int folio_swap_flags(struct folio *folio)
{
return swp_swap_info(folio->swap)->flags;
}
+
+bool __swap_has_cache(swp_entry_t entry);
#else /* CONFIG_SWAP */
struct swap_iocb;
static inline void swap_read_folio(struct folio *folio, bool do_poll,
@@ -151,5 +154,10 @@ static inline unsigned int folio_swap_flags(struct folio *folio)
{
return 0;
}
+
+static inline bool __swap_has_cache(swp_entry_t entry);
+{
+ return false;
+}
#endif /* CONFIG_SWAP */
#endif /* _MM_SWAP_H */
@@ -300,54 +300,54 @@ static inline bool swap_use_vma_readahead(void)
}
/*
- * Lookup a swap entry in the swap cache. A found folio will be returned
- * unlocked and with its refcount incremented - we rely on the kernel
- * lock getting page table operations atomic even if we drop the folio
- * lock before returning.
- *
- * Caller must lock the swap device or hold a reference to keep it valid.
+ * Try get the swap cache, bail out quickly if swapcache bit is not set.
*/
-struct folio *swap_cache_get_folio(swp_entry_t entry,
- struct vm_area_struct *vma, unsigned long addr)
+struct folio *swap_cache_try_get(swp_entry_t entry)
{
struct folio *folio;
- folio = filemap_get_folio(swap_address_space(entry), swp_offset(entry));
- if (!IS_ERR(folio)) {
- bool vma_ra = swap_use_vma_readahead();
- bool readahead;
-
- /*
- * At the moment, we don't support PG_readahead for anon THP
- * so let's bail out rather than confusing the readahead stat.
- */
- if (unlikely(folio_test_large(folio)))
+ if (__swap_has_cache(entry)) {
+ folio = filemap_get_folio(swap_address_space(entry),
+ swp_offset(entry));
+ if (!IS_ERR(folio))
return folio;
+ }
- readahead = folio_test_clear_readahead(folio);
- if (vma && vma_ra) {
- unsigned long ra_val;
- int win, hits;
-
- ra_val = GET_SWAP_RA_VAL(vma);
- win = SWAP_RA_WIN(ra_val);
- hits = SWAP_RA_HITS(ra_val);
- if (readahead)
- hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
- atomic_long_set(&vma->swap_readahead_info,
- SWAP_RA_VAL(addr, win, hits));
- }
+ return NULL;
+}
- if (readahead) {
- count_vm_event(SWAP_RA_HIT);
- if (!vma || !vma_ra)
- atomic_inc(&swapin_readahead_hits);
- }
- } else {
- folio = NULL;
+void swap_cache_update_ra(struct folio *folio, struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ bool vma_ra = swap_use_vma_readahead();
+ bool readahead;
+
+ /*
+ * At the moment, we don't support PG_readahead for anon THP
+ * so let's bail out rather than confusing the readahead stat.
+ */
+ if (unlikely(folio_test_large(folio)))
+ return;
+
+ readahead = folio_test_clear_readahead(folio);
+ if (vma && vma_ra) {
+ unsigned long ra_val;
+ int win, hits;
+
+ ra_val = GET_SWAP_RA_VAL(vma);
+ win = SWAP_RA_WIN(ra_val);
+ hits = SWAP_RA_HITS(ra_val);
+ if (readahead)
+ hits = min_t(int, hits + 1, SWAP_RA_HITS_MAX);
+ atomic_long_set(&vma->swap_readahead_info,
+ SWAP_RA_VAL(addr, win, hits));
}
- return folio;
+ if (readahead) {
+ count_vm_event(SWAP_RA_HIT);
+ if (!vma || !vma_ra)
+ atomic_inc(&swapin_readahead_hits);
+ }
}
/**
@@ -485,6 +485,11 @@ struct folio *swap_cache_alloc_or_get(swp_entry_t entry, gfp_t gfp_mask,
if (!si)
goto out_no_device;
+ /* First do a racy check if cache is already loaded. */
+ swapcache = swap_cache_try_get(entry);
+ if (swapcache)
+ goto out_no_alloc;
+
/* We are very likely the first user, alloc and try add to the swapcache. */
folio = (struct folio *)alloc_pages_mpol(gfp_mask, 0, mpol, ilx,
numa_node_id());
@@ -614,7 +619,8 @@ static unsigned long swapin_nr_pages(unsigned long offset)
* are fairly likely to have been swapped out from the same node.
*/
struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
- struct mempolicy *mpol, pgoff_t ilx)
+ struct mempolicy *mpol, pgoff_t ilx,
+ bool *folio_allocated)
{
struct folio *folio;
unsigned long entry_offset = swp_offset(entry);
@@ -644,6 +650,10 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
folio = swap_cache_alloc_or_get(
swp_entry(swp_type(entry), offset),
gfp_mask, mpol, ilx, &page_allocated);
+ if (offset == entry_offset) {
+ *folio_allocated = page_allocated;
+ folio_allocated = NULL;
+ }
if (!folio)
continue;
if (page_allocated) {
@@ -666,6 +676,8 @@ struct folio *swap_cluster_readahead(swp_entry_t entry, gfp_t gfp_mask,
zswap_folio_swapin(folio);
swap_read_folio(folio, false, NULL);
}
+ if (folio_allocated)
+ *folio_allocated = page_allocated;
return folio;
}
@@ -779,7 +791,8 @@ static void swap_ra_info(struct vm_fault *vmf,
*
*/
static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
- struct mempolicy *mpol, pgoff_t targ_ilx, struct vm_fault *vmf)
+ struct mempolicy *mpol, pgoff_t targ_ilx,
+ struct vm_fault *vmf, bool *folio_allocated)
{
struct blk_plug plug;
struct swap_iocb *splug = NULL;
@@ -818,6 +831,10 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
pte = NULL;
folio = swap_cache_alloc_or_get(entry, gfp_mask, mpol, ilx,
&page_allocated);
+ if (i == ra_info.offset) {
+ *folio_allocated = page_allocated;
+ folio_allocated = NULL;
+ }
if (!folio)
continue;
if (page_allocated) {
@@ -842,6 +859,8 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
zswap_folio_swapin(folio);
swap_read_folio(folio, false, NULL);
}
+ if (folio_allocated)
+ *folio_allocated = page_allocated;
return folio;
}
@@ -854,20 +873,21 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask,
* Returns the folio for entry after it is read in.
*/
struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
- struct vm_fault *vmf)
+ struct vm_fault *vmf, bool *folio_allocated)
{
struct mempolicy *mpol;
struct folio *folio;
- bool page_allocated;
pgoff_t ilx;
mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx);
folio = swap_cache_alloc_or_get(entry, gfp_mask, mpol, ilx,
- &page_allocated);
+ folio_allocated);
mpol_cond_put(mpol);
- if (page_allocated)
+ if (*folio_allocated)
swap_read_folio(folio, true, NULL);
+ else if (folio)
+ swap_cache_update_ra(folio, vmf->vma, vmf->address);
return folio;
}
@@ -885,18 +905,22 @@ struct folio *swapin_direct(swp_entry_t entry, gfp_t gfp_mask,
* or vma-based(ie, virtual address based on faulty address) readahead.
*/
struct folio *swapin_readahead(swp_entry_t entry, gfp_t gfp_mask,
- struct vm_fault *vmf)
+ struct vm_fault *vmf, bool *folio_allocated)
{
struct mempolicy *mpol;
- pgoff_t ilx;
struct folio *folio;
+ bool allocated;
+ pgoff_t ilx;
mpol = get_vma_policy(vmf->vma, vmf->address, 0, &ilx);
folio = swap_use_vma_readahead() ?
- swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf) :
- swap_cluster_readahead(entry, gfp_mask, mpol, ilx);
+ swap_vma_readahead(entry, gfp_mask, mpol, ilx, vmf, &allocated) :
+ swap_cluster_readahead(entry, gfp_mask, mpol, ilx, &allocated);
mpol_cond_put(mpol);
+ if (!*folio_allocated && folio)
+ swap_cache_update_ra(folio, vmf->vma, vmf->address);
+
return folio;
}
@@ -1455,6 +1455,15 @@ int __swap_count(swp_entry_t entry)
return swap_count(si->swap_map[offset]);
}
+bool __swap_has_cache(swp_entry_t entry)
+{
+ pgoff_t offset = swp_offset(entry);
+ struct swap_info_struct *si = swp_swap_info(entry);
+ unsigned char count = READ_ONCE(si->swap_map[offset]);
+
+ return swap_count(count) && (count & SWAP_HAS_CACHE);
+}
+
/*
* How many references to @entry are currently swapped out?
* This does not give an exact answer when swap count is continued,
@@ -1862,10 +1871,18 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
struct folio *folio;
unsigned long offset;
unsigned char swp_count;
+ bool folio_allocated;
swp_entry_t entry;
int ret;
pte_t ptent;
+ struct vm_fault vmf = {
+ .vma = vma,
+ .address = addr,
+ .real_address = addr,
+ .pmd = pmd,
+ };
+
if (!pte++) {
pte = pte_offset_map(pmd, addr);
if (!pte)
@@ -1884,19 +1901,8 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
offset = swp_offset(entry);
pte_unmap(pte);
pte = NULL;
-
- folio = swap_cache_get_folio(entry, vma, addr);
- if (!folio) {
- struct vm_fault vmf = {
- .vma = vma,
- .address = addr,
- .real_address = addr,
- .pmd = pmd,
- };
-
- folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
- &vmf);
- }
+ folio = swapin_readahead(entry, GFP_HIGHUSER_MOVABLE,
+ &vmf, &folio_allocated);
if (!folio) {
swp_count = READ_ONCE(si->swap_map[offset]);
if (swap_count(swp_count) == 0 || swp_count == SWAP_MAP_BAD)