diff mbox series

mempolicy: alloc_pages_mpol() for NUMA policy without vma: fix

Message ID 00dc4f56-e623-7c85-29ea-4211e93063f6@google.com (mailing list archive)
State New
Headers show
Series mempolicy: alloc_pages_mpol() for NUMA policy without vma: fix | expand

Commit Message

Hugh Dickins Oct. 24, 2023, 6:44 a.m. UTC
mm-unstable commit 48a7bd12d57f ("mempolicy: alloc_pages_mpol() for NUMA
policy without vma") ended read_swap_cache_async() supporting NULL vma -
okay; but missed the NULL mpol being passed to __read_swap_cache_async()
by zswap_writeback_entry() - oops!

Since its other callers all give good mpol, add get_task_policy(current)
there in mm/zswap.c, to produce the same good-enough behaviour as before
(and task policy, acted on in current task, does not require the refcount
to be dup'ed).

But if that policy is (quite reasonably) MPOL_INTERLEAVE, then ilx must
be NO_INTERLEAVE_INDEX rather than 0, to provide the same distribution
as before: move that definition from mempolicy.c to mempolicy.h.

Reported-by: Domenico Cerasuolo <mimmocerasuolo@gmail.com>
Closes: https://lore.kernel.org/linux-mm/74e34633-6060-f5e3-aee-7040d43f2e93@google.com/T/#mf08c877d1884fc7867f9e328cdf02257ff3b3ae9
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Fixes: 48a7bd12d57f ("mempolicy: alloc_pages_mpol() for NUMA policy without vma")
Signed-off-by: Hugh Dickins <hughd@google.com>
---
 include/linux/mempolicy.h | 2 ++
 mm/mempolicy.c            | 2 --
 mm/zswap.c                | 7 +++++--
 3 files changed, 7 insertions(+), 4 deletions(-)

Comments

kernel test robot Oct. 24, 2023, 8:17 a.m. UTC | #1
Hi Hugh,

kernel test robot noticed the following build warnings:



url:    https://github.com/intel-lab-lkp/linux/commits/UPDATE-20231024-144517/Hugh-Dickins/hugetlbfs-drop-shared-NUMA-mempolicy-pretence/20231003-173301
base:   the 10th patch of https://lore.kernel.org/r/74e34633-6060-f5e3-aee-7040d43f2e93%40google.com
patch link:    https://lore.kernel.org/r/00dc4f56-e623-7c85-29ea-4211e93063f6%40google.com
patch subject: [PATCH] mempolicy: alloc_pages_mpol() for NUMA policy without vma: fix
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202310241551.uY40myKo-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from mm/zswap.c:41:
   mm/internal.h: In function 'shrinker_debugfs_name_alloc':
   mm/internal.h:1232:9: warning: function 'shrinker_debugfs_name_alloc' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
    1232 |         shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap);
         |         ^~~~~~~~
   mm/zswap.c: In function 'zswap_writeback_entry':
   mm/zswap.c:1322:16: error: implicit declaration of function 'get_task_policy'; did you mean 'get_vma_policy'? [-Werror=implicit-function-declaration]
    1322 |         mpol = get_task_policy(current);
         |                ^~~~~~~~~~~~~~~
         |                get_vma_policy
>> mm/zswap.c:1322:14: warning: assignment to 'struct mempolicy *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
    1322 |         mpol = get_task_policy(current);
         |              ^
   cc1: some warnings being treated as errors


vim +1322 mm/zswap.c

  1282	
  1283	/*********************************
  1284	* writeback code
  1285	**********************************/
  1286	/*
  1287	 * Attempts to free an entry by adding a page to the swap cache,
  1288	 * decompressing the entry data into the page, and issuing a
  1289	 * bio write to write the page back to the swap device.
  1290	 *
  1291	 * This can be thought of as a "resumed writeback" of the page
  1292	 * to the swap device.  We are basically resuming the same swap
  1293	 * writeback path that was intercepted with the zswap_store()
  1294	 * in the first place.  After the page has been decompressed into
  1295	 * the swap cache, the compressed version stored by zswap can be
  1296	 * freed.
  1297	 */
  1298	static int zswap_writeback_entry(struct zswap_entry *entry,
  1299					 struct zswap_tree *tree)
  1300	{
  1301		swp_entry_t swpentry = entry->swpentry;
  1302		struct page *page;
  1303		struct mempolicy *mpol;
  1304		struct scatterlist input, output;
  1305		struct crypto_acomp_ctx *acomp_ctx;
  1306		struct zpool *pool = zswap_find_zpool(entry);
  1307		bool page_was_allocated;
  1308		u8 *src, *tmp = NULL;
  1309		unsigned int dlen;
  1310		int ret;
  1311		struct writeback_control wbc = {
  1312			.sync_mode = WB_SYNC_NONE,
  1313		};
  1314	
  1315		if (!zpool_can_sleep_mapped(pool)) {
  1316			tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
  1317			if (!tmp)
  1318				return -ENOMEM;
  1319		}
  1320	
  1321		/* try to allocate swap cache page */
> 1322		mpol = get_task_policy(current);
  1323		page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
  1324					NO_INTERLEAVE_INDEX, &page_was_allocated);
  1325		if (!page) {
  1326			ret = -ENOMEM;
  1327			goto fail;
  1328		}
  1329	
  1330		/* Found an existing page, we raced with load/swapin */
  1331		if (!page_was_allocated) {
  1332			put_page(page);
  1333			ret = -EEXIST;
  1334			goto fail;
  1335		}
  1336	
  1337		/*
  1338		 * Page is locked, and the swapcache is now secured against
  1339		 * concurrent swapping to and from the slot. Verify that the
  1340		 * swap entry hasn't been invalidated and recycled behind our
  1341		 * backs (our zswap_entry reference doesn't prevent that), to
  1342		 * avoid overwriting a new swap page with old compressed data.
  1343		 */
  1344		spin_lock(&tree->lock);
  1345		if (zswap_rb_search(&tree->rbroot, swp_offset(entry->swpentry)) != entry) {
  1346			spin_unlock(&tree->lock);
  1347			delete_from_swap_cache(page_folio(page));
  1348			ret = -ENOMEM;
  1349			goto fail;
  1350		}
  1351		spin_unlock(&tree->lock);
  1352	
  1353		/* decompress */
  1354		acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
  1355		dlen = PAGE_SIZE;
  1356	
  1357		src = zpool_map_handle(pool, entry->handle, ZPOOL_MM_RO);
  1358		if (!zpool_can_sleep_mapped(pool)) {
  1359			memcpy(tmp, src, entry->length);
  1360			src = tmp;
  1361			zpool_unmap_handle(pool, entry->handle);
  1362		}
  1363	
  1364		mutex_lock(acomp_ctx->mutex);
  1365		sg_init_one(&input, src, entry->length);
  1366		sg_init_table(&output, 1);
  1367		sg_set_page(&output, page, PAGE_SIZE, 0);
  1368		acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, dlen);
  1369		ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
  1370		dlen = acomp_ctx->req->dlen;
  1371		mutex_unlock(acomp_ctx->mutex);
  1372	
  1373		if (!zpool_can_sleep_mapped(pool))
  1374			kfree(tmp);
  1375		else
  1376			zpool_unmap_handle(pool, entry->handle);
  1377	
  1378		BUG_ON(ret);
  1379		BUG_ON(dlen != PAGE_SIZE);
  1380	
  1381		/* page is up to date */
  1382		SetPageUptodate(page);
  1383	
  1384		/* move it to the tail of the inactive list after end_writeback */
  1385		SetPageReclaim(page);
  1386	
  1387		/* start writeback */
  1388		__swap_writepage(page, &wbc);
  1389		put_page(page);
  1390		zswap_written_back_pages++;
  1391	
  1392		return ret;
  1393	
  1394	fail:
  1395		if (!zpool_can_sleep_mapped(pool))
  1396			kfree(tmp);
  1397	
  1398		/*
  1399		 * If we get here because the page is already in swapcache, a
  1400		 * load may be happening concurrently. It is safe and okay to
  1401		 * not free the entry. It is also okay to return !0.
  1402		 */
  1403		return ret;
  1404	}
  1405
Hugh Dickins Oct. 24, 2023, 3:56 p.m. UTC | #2
On Tue, 24 Oct 2023, kernel test robot wrote:

> Hi Hugh,
> 
> kernel test robot noticed the following build warnings:
> 
> 
> 
> url:    https://github.com/intel-lab-lkp/linux/commits/UPDATE-20231024-144517/Hugh-Dickins/hugetlbfs-drop-shared-NUMA-mempolicy-pretence/20231003-173301
> base:   the 10th patch of https://lore.kernel.org/r/74e34633-6060-f5e3-aee-7040d43f2e93%40google.com
> patch link:    https://lore.kernel.org/r/00dc4f56-e623-7c85-29ea-4211e93063f6%40google.com
> patch subject: [PATCH] mempolicy: alloc_pages_mpol() for NUMA policy without vma: fix
> config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/config)
> compiler: m68k-linux-gcc (GCC) 13.2.0
> reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231024/202310241551.uY40myKo-lkp@intel.com/reproduce)
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new version of
> the same patch/commit), kindly add following tags
> | Reported-by: kernel test robot <lkp@intel.com>
> | Closes: https://lore.kernel.org/oe-kbuild-all/202310241551.uY40myKo-lkp@intel.com/
> 
> All warnings (new ones prefixed by >>):
> 
>    In file included from mm/zswap.c:41:
>    mm/internal.h: In function 'shrinker_debugfs_name_alloc':
>    mm/internal.h:1232:9: warning: function 'shrinker_debugfs_name_alloc' might be a candidate for 'gnu_printf' format attribute [-Wsuggest-attribute=format]
>     1232 |         shrinker->name = kvasprintf_const(GFP_KERNEL, fmt, ap);
>          |         ^~~~~~~~
>    mm/zswap.c: In function 'zswap_writeback_entry':
>    mm/zswap.c:1322:16: error: implicit declaration of function 'get_task_policy'; did you mean 'get_vma_policy'? [-Werror=implicit-function-declaration]
>     1322 |         mpol = get_task_policy(current);
>          |                ^~~~~~~~~~~~~~~
>          |                get_vma_policy
> >> mm/zswap.c:1322:14: warning: assignment to 'struct mempolicy *' from 'int' makes pointer from integer without a cast [-Wint-conversion]
>     1322 |         mpol = get_task_policy(current);
>          |              ^
>    cc1: some warnings being treated as errors

Gaah, thanks for that, I never built it without CONFIG_NUMA=y:
v2 patch with a get_task_policy() without CONFIG_NUMA coming up,
built this time.

Hugh
diff mbox series

Patch

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 2801d5b0a4e9..dd9ed2ce7fd5 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -17,6 +17,8 @@ 
 
 struct mm_struct;
 
+#define NO_INTERLEAVE_INDEX (-1UL)	/* use task il_prev for interleaving */
+
 #ifdef CONFIG_NUMA
 
 /*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 898ee2e3c85b..989293180eb6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -114,8 +114,6 @@ 
 #define MPOL_MF_INVERT       (MPOL_MF_INTERNAL << 1)	/* Invert check for nodemask */
 #define MPOL_MF_WRLOCK       (MPOL_MF_INTERNAL << 2)	/* Write-lock walked vmas */
 
-#define NO_INTERLEAVE_INDEX (-1UL)
-
 static struct kmem_cache *policy_cache;
 static struct kmem_cache *sn_cache;
 
diff --git a/mm/zswap.c b/mm/zswap.c
index 37d2b1cb2ecb..060857adca76 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -24,6 +24,7 @@ 
 #include <linux/swap.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
+#include <linux/mempolicy.h>
 #include <linux/mempool.h>
 #include <linux/zpool.h>
 #include <crypto/acompress.h>
@@ -1057,6 +1058,7 @@  static int zswap_writeback_entry(struct zswap_entry *entry,
 {
 	swp_entry_t swpentry = entry->swpentry;
 	struct page *page;
+	struct mempolicy *mpol;
 	struct scatterlist input, output;
 	struct crypto_acomp_ctx *acomp_ctx;
 	struct zpool *pool = zswap_find_zpool(entry);
@@ -1075,8 +1077,9 @@  static int zswap_writeback_entry(struct zswap_entry *entry,
 	}
 
 	/* try to allocate swap cache page */
-	page = __read_swap_cache_async(swpentry, GFP_KERNEL, NULL, 0,
-				       &page_was_allocated);
+	mpol = get_task_policy(current);
+	page = __read_swap_cache_async(swpentry, GFP_KERNEL, mpol,
+				NO_INTERLEAVE_INDEX, &page_was_allocated);
 	if (!page) {
 		ret = -ENOMEM;
 		goto fail;