Message ID | 163969850289.20885.1044395970457169316.stgit@noble.brown (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Repair SWAP-over-NFS | expand |
On Thu, 16 Dec 2021 at 23:54, NeilBrown <neilb@suse.de> wrote: > > To submit an async read with ->swap_rw() we need to allocate > a structure to hold the kiocb and other details. swap_readpage() cannot > handle transient failure, so create a mempool to provide the structures. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > mm/page_io.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++------ > mm/swap.h | 1 + > mm/swapfile.c | 5 +++++ > 3 files changed, 58 insertions(+), 6 deletions(-) ... > diff --git a/mm/swapfile.c b/mm/swapfile.c > index f23d9ff21cf8..43539be38e68 100644 > --- a/mm/swapfile.c > +++ b/mm/swapfile.c > @@ -2401,6 +2401,11 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) > if (ret < 0) > return ret; > sis->flags |= SWP_ACTIVATED; > + if ((sis->flags & SWP_FS_OPS) && > + sio_pool_init() != 0) { > + destroy_swap_extents(sis); > + return -ENOMEM; > + } > return ret; > } This code is called before 'swapon_mutex' is taken in the swapon code path, so possible for multiple swapons to race here creating two (or more) memory pools. Mark
> +int sio_pool_init(void) > +{ > + if (!sio_pool) > + sio_pool = mempool_create_kmalloc_pool( > + SWAP_CLUSTER_MAX, sizeof(struct swap_iocb)); I can't see anything serializing access here, so we'll need a lock or cmpxchg dance. > + if (sio_pool) > + return 0; > + else > + return -ENOMEM; Nit: This would flow much nicer as: if (!sio_pool) return -ENOMEM; return 0; > int swap_readpage(struct page *page, bool synchronous) > { > struct bio *bio; > @@ -378,13 +412,25 @@ int swap_readpage(struct page *page, bool synchronous) > } > > if (data_race(sis->flags & SWP_FS_OPS)) { > - //struct file *swap_file = sis->swap_file; > - //struct address_space *mapping = swap_file->f_mapping; This should not be left by the previous patch. In fact I suspect the part of the previous patch that adds ->swap_rw should probably be folded into this patch. > + struct file *swap_file = sis->swap_file; > + struct address_space *mapping = swap_file->f_mapping; > + struct iov_iter from; > + struct swap_iocb *sio; > + loff_t pos = page_file_offset(page); > + > + sio = mempool_alloc(sio_pool, GFP_KERNEL); > + init_sync_kiocb(&sio->iocb, swap_file); > + sio->iocb.ki_pos = pos; > + sio->iocb.ki_complete = sio_read_complete; > + sio->bvec.bv_page = page; > + sio->bvec.bv_len = PAGE_SIZE; > + sio->bvec.bv_offset = 0; > + > + iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE); > + ret = mapping->a_ops->swap_rw(&sio->iocb, &from); > + if (ret != -EIOCBQUEUED) > + sio_read_complete(&sio->iocb, ret); > > goto out; I'd be tempted to split the SWP_FS_OPS into a helper to keep the code tidy.
diff --git a/mm/page_io.c b/mm/page_io.c index a9fe5de5dc32..47d7e7866e33 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -283,6 +283,23 @@ static void bio_associate_blkg_from_page(struct bio *bio, struct page *page) #define bio_associate_blkg_from_page(bio, page) do { } while (0) #endif /* CONFIG_MEMCG && CONFIG_BLK_CGROUP */ +struct swap_iocb { + struct kiocb iocb; + struct bio_vec bvec; +}; +static mempool_t *sio_pool; + +int sio_pool_init(void) +{ + if (!sio_pool) + sio_pool = mempool_create_kmalloc_pool( + SWAP_CLUSTER_MAX, sizeof(struct swap_iocb)); + if (sio_pool) + return 0; + else + return -ENOMEM; +} + int __swap_writepage(struct page *page, struct writeback_control *wbc, bio_end_io_t end_write_func) { @@ -353,6 +370,23 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, return 0; } +static void sio_read_complete(struct kiocb *iocb, long ret) +{ + struct swap_iocb *sio = container_of(iocb, struct swap_iocb, iocb); + struct page *page = sio->bvec.bv_page; + + if (ret != 0 && ret != PAGE_SIZE) { + SetPageError(page); + ClearPageUptodate(page); + pr_alert_ratelimited("Read-error on swap-device\n"); + } else { + SetPageUptodate(page); + count_vm_event(PSWPIN); + } + unlock_page(page); + mempool_free(sio, sio_pool); +} + int swap_readpage(struct page *page, bool synchronous) { struct bio *bio; @@ -378,13 +412,25 @@ int swap_readpage(struct page *page, bool synchronous) } if (data_race(sis->flags & SWP_FS_OPS)) { - //struct file *swap_file = sis->swap_file; - //struct address_space *mapping = swap_file->f_mapping; + struct file *swap_file = sis->swap_file; + struct address_space *mapping = swap_file->f_mapping; + struct iov_iter from; + struct swap_iocb *sio; + loff_t pos = page_file_offset(page); + + sio = mempool_alloc(sio_pool, GFP_KERNEL); + init_sync_kiocb(&sio->iocb, swap_file); + sio->iocb.ki_pos = pos; + sio->iocb.ki_complete = sio_read_complete; + sio->bvec.bv_page = page; + sio->bvec.bv_len = PAGE_SIZE; + sio->bvec.bv_offset = 0; + + iov_iter_bvec(&from, READ, &sio->bvec, 1, PAGE_SIZE); + ret = mapping->a_ops->swap_rw(&sio->iocb, &from); + if (ret != -EIOCBQUEUED) + sio_read_complete(&sio->iocb, ret); - /* This needs to use ->swap_rw() */ - ret = -EINVAL; - if (!ret) - count_vm_event(PSWPIN); goto out; } diff --git a/mm/swap.h b/mm/swap.h index 13e72a5023aa..128a1d3e5558 100644 --- a/mm/swap.h +++ b/mm/swap.h @@ -3,6 +3,7 @@ #include <linux/blk_types.h> /* for bio_end_io_t */ /* linux/mm/page_io.c */ +int sio_pool_init(void); int swap_readpage(struct page *page, bool do_poll); int swap_writepage(struct page *page, struct writeback_control *wbc); void end_swap_bio_write(struct bio *bio); diff --git a/mm/swapfile.c b/mm/swapfile.c index f23d9ff21cf8..43539be38e68 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2401,6 +2401,11 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span) if (ret < 0) return ret; sis->flags |= SWP_ACTIVATED; + if ((sis->flags & SWP_FS_OPS) && + sio_pool_init() != 0) { + destroy_swap_extents(sis); + return -ENOMEM; + } return ret; }
To submit an async read with ->swap_rw() we need to allocate a structure to hold the kiocb and other details. swap_readpage() cannot handle transient failure, so create a mempool to provide the structures. Signed-off-by: NeilBrown <neilb@suse.de> --- mm/page_io.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++------ mm/swap.h | 1 + mm/swapfile.c | 5 +++++ 3 files changed, 58 insertions(+), 6 deletions(-)