[0/3] smb3, afs: Revert changes to {cifs,afs}_writepages_region()

Message ID	20230302231638.521280-1-dhowells@redhat.com (mailing list archive)
Headers	show Return-Path: <linux-cifs-owner@vger.kernel.org> From: David Howells <dhowells@redhat.com> To: Linus Torvalds <torvalds@linux-foundation.org>, Steve French <smfrench@gmail.com> Cc: David Howells <dhowells@redhat.com>, Vishal Moola <vishal.moola@gmail.com>, Shyam Prasad N <nspmangalore@gmail.com>, Rohith Surabattula <rohiths.msft@gmail.com>, Tom Talpey <tom@talpey.com>, Stefan Metzmacher <metze@samba.org>, Paulo Alcantara <pc@cjr.nz>, Jeff Layton <jlayton@kernel.org>, Matthew Wilcox <willy@infradead.org>, Marc Dionne <marc.dionne@auristor.com>, linux-afs@lists.infradead.org, linux-cifs@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org Subject: [PATCH 0/3] smb3, afs: Revert changes to {cifs,afs}_writepages_region() Date: Thu, 2 Mar 2023 23:16:35 +0000 Message-Id: <20230302231638.521280-1-dhowells@redhat.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	smb3, afs: Revert changes to {cifs,afs}_writepages_region() \| expand [0/3] smb3, afs: Revert changes to {cifs,afs}_writepages_region() [1/3] mm: Add a function to get a single tagged folio from a file [2/3] afs: Partially revert and use filemap_get_folio_tag() [3/3] cifs: Partially revert and use filemap_get_folio_tag()

David Howells <dhowells@redhat.com> wrote: > AFS firstly. ... > > Base + write_cache_pages(): > WRITE: bw=280MiB/s (294MB/s), 69.7MiB/s-70.5MiB/s (73.0MB/s-73.9MB/s) > WRITE: bw=285MiB/s (299MB/s), 70.9MiB/s-71.5MiB/s (74.4MB/s-74.9MB/s) > WRITE: bw=290MiB/s (304MB/s), 71.6MiB/s-73.2MiB/s (75.1MB/s-76.8MB/s) Here's the patch to convert AFS to use write_cache_pages(), retaining the use of page->private to track the dirtied part of the page. David --- write.c | 382 +++++++++++++--------------------------------------------------- 1 file changed, 78 insertions(+), 304 deletions(-) diff --git a/fs/afs/write.c b/fs/afs/write.c index 571f3b9a417e..01323fa58e1c 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -14,11 +14,6 @@ #include <linux/netfs.h> #include "internal.h" -static int afs_writepages_region(struct address_space *mapping, - struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next, - bool max_one_loop); - static void afs_write_to_cache(struct afs_vnode *vnode, loff_t start, size_t len, loff_t i_size, bool caching); @@ -56,10 +51,8 @@ static int afs_flush_conflicting_write(struct address_space *mapping, .range_start = folio_pos(folio), .range_end = LLONG_MAX, }; - loff_t next; - return afs_writepages_region(mapping, &wbc, folio_pos(folio), LLONG_MAX, - &next, true); + return afs_writepages(mapping, &wbc); } /* @@ -449,212 +442,57 @@ static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t return afs_put_operation(op); } -/* - * Extend the region to be written back to include subsequent contiguously - * dirty pages if possible, but don't sleep while doing so. - * - * If this page holds new content, then we can include filler zeros in the - * writeback. - */ -static void afs_extend_writeback(struct address_space *mapping, - struct afs_vnode *vnode, - long *_count, - loff_t start, - loff_t max_len, - bool new_content, - bool caching, - unsigned int *_len) -{ - struct pagevec pvec; - struct folio *folio; - unsigned long priv; - unsigned int psize, filler = 0; - unsigned int f, t; - loff_t len = *_len; - pgoff_t index = (start + len) / PAGE_SIZE; - bool stop = true; - unsigned int i; - - XA_STATE(xas, &mapping->i_pages, index); - pagevec_init(&pvec); - - do { - /* Firstly, we gather up a batch of contiguous dirty pages - * under the RCU read lock - but we can't clear the dirty flags - * there if any of those pages are mapped. - */ - rcu_read_lock(); - - xas_for_each(&xas, folio, ULONG_MAX) { - stop = true; - if (xas_retry(&xas, folio)) - continue; - if (xa_is_value(folio)) - break; - if (folio_index(folio) != index) - break; - - if (!folio_try_get_rcu(folio)) { - xas_reset(&xas); - continue; - } - - /* Has the page moved or been split? */ - if (unlikely(folio != xas_reload(&xas))) { - folio_put(folio); - break; - } - - if (!folio_trylock(folio)) { - folio_put(folio); - break; - } - if (!folio_test_dirty(folio) || - folio_test_writeback(folio) || - folio_test_fscache(folio)) { - folio_unlock(folio); - folio_put(folio); - break; - } - - psize = folio_size(folio); - priv = (unsigned long)folio_get_private(folio); - f = afs_folio_dirty_from(folio, priv); - t = afs_folio_dirty_to(folio, priv); - if (f != 0 && !new_content) { - folio_unlock(folio); - folio_put(folio); - break; - } - - len += filler + t; - filler = psize - t; - if (len >= max_len || *_count <= 0) - stop = true; - else if (t == psize || new_content) - stop = false; - - index += folio_nr_pages(folio); - if (!pagevec_add(&pvec, &folio->page)) - break; - if (stop) - break; - } - - if (!stop) - xas_pause(&xas); - rcu_read_unlock(); - - /* Now, if we obtained any pages, we can shift them to being - * writable and mark them for caching. - */ - if (!pagevec_count(&pvec)) - break; - - for (i = 0; i < pagevec_count(&pvec); i++) { - folio = page_folio(pvec.pages[i]); - trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio); - - if (!folio_clear_dirty_for_io(folio)) - BUG(); - if (folio_start_writeback(folio)) - BUG(); - afs_folio_start_fscache(caching, folio); - - *_count -= folio_nr_pages(folio); - folio_unlock(folio); - } - - pagevec_release(&pvec); - cond_resched(); - } while (!stop); - - *_len = len; -} +struct afs_writepages_context { + unsigned long long start; + unsigned long long end; + unsigned long long annex_at; + bool begun; + bool caching; + bool new_content; +}; /* - * Synchronously write back the locked page and any subsequent non-locked dirty - * pages. + * Flush a block of pages to the server and the cache. */ -static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, - struct writeback_control *wbc, - struct folio *folio, - loff_t start, loff_t end) +static int afs_writepages_submit(struct address_space *mapping, + struct writeback_control *wbc, + struct afs_writepages_context *ctx) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); struct iov_iter iter; - unsigned long priv; - unsigned int offset, to, len, max_len; - loff_t i_size = i_size_read(&vnode->netfs.inode); - bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags); - bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode)); - long count = wbc->nr_to_write; + unsigned long long i_size = i_size_read(&vnode->netfs.inode); + size_t len = ctx->end - ctx->start; int ret; - _enter(",%lx,%llx-%llx", folio_index(folio), start, end); - - if (folio_start_writeback(folio)) - BUG(); - afs_folio_start_fscache(caching, folio); - - count -= folio_nr_pages(folio); - - /* Find all consecutive lockable dirty pages that have contiguous - * written regions, stopping when we find a page that is not - * immediately lockable, is not dirty or is missing, or we reach the - * end of the range. - */ - priv = (unsigned long)folio_get_private(folio); - offset = afs_folio_dirty_from(folio, priv); - to = afs_folio_dirty_to(folio, priv); - trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio); - - len = to - offset; - start += offset; - if (start < i_size) { - /* Trim the write to the EOF; the extra data is ignored. Also - * put an upper limit on the size of a single storedata op. - */ - max_len = 65536 * 4096; - max_len = min_t(unsigned long long, max_len, end - start + 1); - max_len = min_t(unsigned long long, max_len, i_size - start); - - if (len < max_len && - (to == folio_size(folio) || new_content)) - afs_extend_writeback(mapping, vnode, &count, - start, max_len, new_content, - caching, &len); - len = min_t(loff_t, len, max_len); - } + _enter("%llx-%llx", ctx->start, ctx->start + len - 1); /* We now have a contiguous set of dirty pages, each with writeback - * set; the first page is still locked at this point, but all the rest - * have been unlocked. + * set. */ - folio_unlock(folio); - - if (start < i_size) { - _debug("write back %x @%llx [%llx]", len, start, i_size); + if (ctx->start < i_size) { + if (len > i_size - ctx->start) + len = i_size - ctx->start; + _debug("write back %zx @%llx [%llx]", len, ctx->start, i_size); /* Speculatively write to the cache. We have to fix this up * later if the store fails. */ - afs_write_to_cache(vnode, start, len, i_size, caching); + afs_write_to_cache(vnode, ctx->start, len, i_size, ctx->caching); - iov_iter_xarray(&iter, ITER_SOURCE, &mapping->i_pages, start, len); - ret = afs_store_data(vnode, &iter, start, false); + iov_iter_xarray(&iter, ITER_SOURCE, + &mapping->i_pages, ctx->start, len); + ret = afs_store_data(vnode, &iter, ctx->start, false); } else { - _debug("write discard %x @%llx [%llx]", len, start, i_size); + _debug("write discard %zx @%llx [%llx]", len, ctx->start, i_size); /* The dirty region was entirely beyond the EOF. */ - fscache_clear_page_bits(mapping, start, len, caching); - afs_pages_written_back(vnode, start, len); + fscache_clear_page_bits(mapping, ctx->start, len, ctx->caching); + afs_pages_written_back(vnode, ctx->start, len); ret = 0; } switch (ret) { case 0: - wbc->nr_to_write = count; ret = len; break; @@ -668,13 +506,13 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, case -EKEYREJECTED: case -EKEYREVOKED: case -ENETRESET: - afs_redirty_pages(wbc, mapping, start, len); + afs_redirty_pages(wbc, mapping, ctx->start, len); mapping_set_error(mapping, ret); break; case -EDQUOT: case -ENOSPC: - afs_redirty_pages(wbc, mapping, start, len); + afs_redirty_pages(wbc, mapping, ctx->start, len); mapping_set_error(mapping, -ENOSPC); break; @@ -686,7 +524,7 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, case -ENOMEDIUM: case -ENXIO: trace_afs_file_error(vnode, ret, afs_file_error_writeback_fail); - afs_kill_pages(mapping, start, len); + afs_kill_pages(mapping, ctx->start, len); mapping_set_error(mapping, ret); break; } @@ -696,100 +534,51 @@ static ssize_t afs_write_back_from_locked_folio(struct address_space *mapping, } /* - * write a region of pages back to the server + * Add a page to the set and flush when large enough. */ -static int afs_writepages_region(struct address_space *mapping, - struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next, - bool max_one_loop) +static int afs_writepages_add_folio(struct folio *folio, + struct writeback_control *wbc, void *data) { - struct folio *folio; - struct folio_batch fbatch; - ssize_t ret; - unsigned int i; - int n, skips = 0; - - _enter("%llx,%llx,", start, end); - folio_batch_init(&fbatch); - - do { - pgoff_t index = start / PAGE_SIZE; + struct afs_writepages_context *ctx = data; + struct afs_vnode *vnode = AFS_FS_I(folio->mapping->host); + unsigned long long pos = folio_pos(folio); + unsigned long priv; + size_t f, t; + int ret; - n = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, - PAGECACHE_TAG_DIRTY, &fbatch); + priv = (unsigned long)folio_get_private(folio); + f = afs_folio_dirty_from(folio, priv); + t = afs_folio_dirty_to(folio, priv); - if (!n) - break; - for (i = 0; i < n; i++) { - folio = fbatch.folios[i]; - start = folio_pos(folio); /* May regress with THPs */ - - _debug("wback %lx", folio_index(folio)); - - /* At this point we hold neither the i_pages lock nor the - * page lock: the page may be truncated or invalidated - * (changing page->mapping to NULL), or even swizzled - * back from swapper_space to tmpfs file mapping - */ - if (wbc->sync_mode != WB_SYNC_NONE) { - ret = folio_lock_killable(folio); - if (ret < 0) { - folio_batch_release(&fbatch); - return ret; - } - } else { - if (!folio_trylock(folio)) - continue; - } - - if (folio->mapping != mapping || - !folio_test_dirty(folio)) { - start += folio_size(folio); - folio_unlock(folio); - continue; - } - - if (folio_test_writeback(folio) || - folio_test_fscache(folio)) { - folio_unlock(folio); - if (wbc->sync_mode != WB_SYNC_NONE) { - folio_wait_writeback(folio); -#ifdef CONFIG_AFS_FSCACHE - folio_wait_fscache(folio); -#endif - } else { - start += folio_size(folio); - } - if (wbc->sync_mode == WB_SYNC_NONE) { - if (skips >= 5 || need_resched()) { - *_next = start; - _leave(" = 0 [%llx]", *_next); - return 0; - } - skips++; - } - continue; - } - - if (!folio_clear_dirty_for_io(folio)) - BUG(); - ret = afs_write_back_from_locked_folio(mapping, wbc, - folio, start, end); - if (ret < 0) { - _leave(" = %zd", ret); - folio_batch_release(&fbatch); - return ret; - } - - start += ret; + if (ctx->begun) { + if ((f == 0 || ctx->new_content) && + pos == ctx->annex_at) { + trace_afs_folio_dirty(vnode, tracepoint_string("store+"), folio); + goto add; } + ret = afs_writepages_submit(folio->mapping, wbc, ctx); + if (ret < 0) + return ret; + } + + ctx->begun = true; + ctx->start = pos + f; + trace_afs_folio_dirty(vnode, tracepoint_string("store"), folio); +add: + ctx->end = pos + t; + ctx->annex_at = pos + folio_size(folio); - folio_batch_release(&fbatch); - cond_resched(); - } while (wbc->nr_to_write > 0); + folio_wait_fscache(folio); + folio_start_writeback(folio); + afs_folio_start_fscache(ctx->caching, folio); + folio_unlock(folio); - *_next = start; - _leave(" = 0 [%llx]", *_next); + if (ctx->end - ctx->start >= 65536 * 4096) { + ret = afs_writepages_submit(folio->mapping, wbc, ctx); + if (ret < 0) + return ret; + ctx->begun = false; + } return 0; } @@ -800,7 +589,10 @@ int afs_writepages(struct address_space *mapping, struct writeback_control *wbc) { struct afs_vnode *vnode = AFS_FS_I(mapping->host); - loff_t start, next; + struct afs_writepages_context ctx = { + .caching = fscache_cookie_enabled(afs_vnode_cache(vnode)), + .new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags), + }; int ret; _enter(""); @@ -814,29 +606,11 @@ int afs_writepages(struct address_space *mapping, else if (!down_read_trylock(&vnode->validate_lock)) return 0; - if (wbc->range_cyclic) { - start = mapping->writeback_index * PAGE_SIZE; - ret = afs_writepages_region(mapping, wbc, start, LLONG_MAX, - &next, false); - if (ret == 0) { - mapping->writeback_index = next / PAGE_SIZE; - if (start > 0 && wbc->nr_to_write > 0) { - ret = afs_writepages_region(mapping, wbc, 0, - start, &next, false); - if (ret == 0) - mapping->writeback_index = - next / PAGE_SIZE; - } - } - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { - ret = afs_writepages_region(mapping, wbc, 0, LLONG_MAX, - &next, false); - if (wbc->nr_to_write > 0 && ret == 0) - mapping->writeback_index = next / PAGE_SIZE; - } else { - ret = afs_writepages_region(mapping, wbc, - wbc->range_start, wbc->range_end, - &next, false); + ret = write_cache_pages(mapping, wbc, afs_writepages_add_folio, &ctx); + if (ret >= 0 && ctx.begun) { + ret = afs_writepages_submit(mapping, wbc, &ctx); + if (ret < 0) + return ret; } up_read(&vnode->validate_lock);

[0/3] smb3, afs: Revert changes to {cifs,afs}_writepages_region()

Message

Comments