[v5,1/2] mm: store zero pages to be swapped out in a bitmap

Message ID	20240614100902.3469724-2-usamaarif642@gmail.com (mailing list archive)
State	New
Headers	show Return-Path: <owner-linux-mm@kvack.org> From: Usama Arif <usamaarif642@gmail.com> To: akpm@linux-foundation.org Cc: hannes@cmpxchg.org, shakeel.butt@linux.dev, david@redhat.com, ying.huang@intel.com, hughd@google.com, willy@infradead.org, yosryahmed@google.com, nphamcs@gmail.com, chengming.zhou@linux.dev, linux-mm@kvack.org, linux-kernel@vger.kernel.org, kernel-team@meta.com, Usama Arif <usamaarif642@gmail.com> Subject: [PATCH v5 1/2] mm: store zero pages to be swapped out in a bitmap Date: Fri, 14 Jun 2024 11:07:05 +0100 Message-ID: <20240614100902.3469724-2-usamaarif642@gmail.com> In-Reply-To: <20240614100902.3469724-1-usamaarif642@gmail.com> References: <20240614100902.3469724-1-usamaarif642@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Sender: owner-linux-mm@kvack.org Precedence: bulk
Series	mm: store zero pages to be swapped out in a bitmap \| expand [v5,0/2] mm: store zero pages to be swapped out in a bitmap [v5,1/2] mm: store zero pages to be swapped out in a bitmap [v5,2/2] mm: remove code to handle same filled pages

diff --git a/include/linux/swap.h b/include/linux/swap.h index 3df75d62a835..ed03d421febd 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -299,6 +299,7 @@ struct swap_info_struct { signed char type; /* strange name for an index */ unsigned int max; /* extent of the swap_map */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */ + unsigned long *zeromap; /* vmalloc'ed bitmap to track zero pages */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct swap_cluster_list free_clusters; /* free clusters list */ unsigned int lowest_bit; /* index of first free in swap_map */ diff --git a/mm/page_io.c b/mm/page_io.c index 6c1c1828bb88..480b8f221d90 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -172,6 +172,88 @@ int generic_swapfile_activate(struct swap_info_struct *sis, goto out; } +static bool is_folio_page_zero_filled(struct folio *folio, int i) +{ + unsigned long *data; + unsigned int pos, last_pos = PAGE_SIZE / sizeof(*data) - 1; + bool ret = false; + + data = kmap_local_folio(folio, i * PAGE_SIZE); + if (data[last_pos]) + goto out; + for (pos = 0; pos < PAGE_SIZE / sizeof(*data); pos++) { + if (data[pos]) + goto out; + } + ret = true; +out: + kunmap_local(data); + return ret; +} + +static bool is_folio_zero_filled(struct folio *folio) +{ + unsigned int i; + + for (i = 0; i < folio_nr_pages(folio); i++) { + if (!is_folio_page_zero_filled(folio, i)) + return false; + } + return true; +} + +static void folio_zero_fill(struct folio *folio) +{ + unsigned int i; + + for (i = 0; i < folio_nr_pages(folio); i++) + clear_highpage(folio_page(folio, i)); +} + +static void swap_zeromap_folio_set(struct folio *folio) +{ + struct swap_info_struct *sis = swp_swap_info(folio->swap); + swp_entry_t entry; + unsigned int i; + + for (i = 0; i < folio_nr_pages(folio); i++) { + entry = page_swap_entry(folio_page(folio, i)); + set_bit(swp_offset(entry), sis->zeromap); + } +} + +static void swap_zeromap_folio_clear(struct folio *folio) +{ + struct swap_info_struct *sis = swp_swap_info(folio->swap); + swp_entry_t entry; + unsigned int i; + + for (i = 0; i < folio_nr_pages(folio); i++) { + entry = page_swap_entry(folio_page(folio, i)); + clear_bit(swp_offset(entry), sis->zeromap); + } +} + +/* + * Return the index of the first subpage which is not zero-filled + * according to swap_info_struct->zeromap. + * If all pages are zero-filled according to zeromap, it will return + * folio_nr_pages(folio). + */ +static unsigned int swap_zeromap_folio_test(struct folio *folio) +{ + struct swap_info_struct *sis = swp_swap_info(folio->swap); + swp_entry_t entry; + unsigned int i; + + for (i = 0; i < folio_nr_pages(folio); i++) { + entry = page_swap_entry(folio_page(folio, i)); + if (!test_bit(swp_offset(entry), sis->zeromap)) + return i; + } + return i; +} + /* * We may have stale swap cache pages in memory: notice * them here and get rid of the unnecessary final write. @@ -195,6 +277,13 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) folio_unlock(folio); return ret; } + + if (is_folio_zero_filled(folio)) { + swap_zeromap_folio_set(folio); + folio_unlock(folio); + return 0; + } + swap_zeromap_folio_clear(folio); if (zswap_store(folio)) { folio_unlock(folio); return 0; @@ -424,6 +513,26 @@ static void sio_read_complete(struct kiocb *iocb, long ret) mempool_free(sio, sio_pool); } +static bool swap_read_folio_zeromap(struct folio *folio) +{ + unsigned int idx = swap_zeromap_folio_test(folio); + + if (idx == 0) + return false; + + /* + * Swapping in a large folio that is partially in the zeromap is not + * currently handled. Return true without marking the folio uptodate so + * that an IO error is emitted (e.g. do_swap_page() will sigbus). + */ + if (WARN_ON_ONCE(idx < folio_nr_pages(folio))) + return true; + + folio_zero_fill(folio); + folio_mark_uptodate(folio); + return true; +} + static void swap_read_folio_fs(struct folio *folio, struct swap_iocb **plug) { struct swap_info_struct *sis = swp_swap_info(folio->swap); @@ -514,7 +623,9 @@ void swap_read_folio(struct folio *folio, struct swap_iocb **plug) } delayacct_swapin_start(); - if (zswap_load(folio)) { + if (swap_read_folio_zeromap(folio)) { + folio_unlock(folio); + } else if (zswap_load(folio)) { folio_unlock(folio); } else if (data_race(sis->flags & SWP_FS_OPS)) { swap_read_folio_fs(folio, plug); diff --git a/mm/swapfile.c b/mm/swapfile.c index 9c6d8e557c0f..0b8270359bcf 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -747,6 +747,14 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset, unsigned long begin = offset; unsigned long end = offset + nr_entries - 1; void (*swap_slot_free_notify)(struct block_device *, unsigned long); + unsigned int i; + + /* + * Use atomic clear_bit operations only on zeromap instead of non-atomic + * bitmap_clear to prevent adjacent bits corruption due to simultaneous writes. + */ + for (i = 0; i < nr_entries; i++) + clear_bit(offset + i, si->zeromap); if (offset < si->lowest_bit) si->lowest_bit = offset; @@ -2635,6 +2643,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) free_percpu(p->cluster_next_cpu); p->cluster_next_cpu = NULL; vfree(swap_map); + bitmap_free(p->zeromap); kvfree(cluster_info); /* Destroy swap account information */ swap_cgroup_swapoff(p->type); @@ -3161,6 +3170,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) goto bad_swap_unlock_inode; } + p->zeromap = bitmap_zalloc(maxpages, GFP_KERNEL); + if (!p->zeromap) { + error = -ENOMEM; + goto bad_swap_unlock_inode; + } + if (p->bdev && bdev_stable_writes(p->bdev)) p->flags |= SWP_STABLE_WRITES;

[v5,1/2] mm: store zero pages to be swapped out in a bitmap

Commit Message

Comments

Patch