From patchwork Mon Jul 10 13:02:45 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307056 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 64B56EB64D9 for ; Mon, 10 Jul 2023 13:04:56 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232350AbjGJNEy (ORCPT ); Mon, 10 Jul 2023 09:04:54 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49804 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232529AbjGJNER (ORCPT ); Mon, 10 Jul 2023 09:04:17 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E46AE26BD; Mon, 10 Jul 2023 06:03:18 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=fgATlNpbiNqq83Ojja38NAx+RR0jSyfX0XbqMlmQLy0=; b=GfIT+KlVwp0XjNATiVkrAOJPIF UhU4g8bxNTdo6Y3p00xDAfr3NMSSiZByaDjzJMwAHa0v0FO49eUCoPcES0Uf3p+am9iFgiT6Hrt2F bMX8JfVCmFBhP6bDm/zWiAMz83V981H26bIPRW/ZMhp+8UQbS7usZvs/WMOXIxb8+YVGmo6W3rS5t wxvYC5/8Y1i6Qdno1CzrXZJDkEtG5dzbSrJL9F4dFbPzSjDpwBM62gbEDMl+p1Jpoabt2Ki17woSI Rf17MY7PceKxsaDsQkkg+IZhrOBm34uY49/x5UTzk0An/OqHI8qQulR+YElWFVhB6vbxHRnCUX0wm d5LDVnaQ==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcX8-AK; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet Subject: [PATCH v4 1/9] iov_iter: Handle compound highmem pages in copy_page_from_iter_atomic() Date: Mon, 10 Jul 2023 14:02:45 +0100 Message-Id: <20230710130253.3484695-2-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org copy_page_from_iter_atomic() already handles !highmem compound pages correctly, but if we are passed a highmem compound page, each base page needs to be mapped & unmapped individually. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Kent Overstreet Reviewed-by: Darrick J. Wong --- lib/iov_iter.c | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/lib/iov_iter.c b/lib/iov_iter.c index b667b1e2f688..c728b6e4fb18 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -566,24 +566,37 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_zero); -size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes, - struct iov_iter *i) +size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, + size_t bytes, struct iov_iter *i) { - char *kaddr = kmap_atomic(page), *p = kaddr + offset; - if (!page_copy_sane(page, offset, bytes)) { - kunmap_atomic(kaddr); + size_t n, copied = 0; + + if (!page_copy_sane(page, offset, bytes)) return 0; - } - if (WARN_ON_ONCE(!i->data_source)) { - kunmap_atomic(kaddr); + if (WARN_ON_ONCE(!i->data_source)) return 0; - } - iterate_and_advance(i, bytes, base, len, off, - copyin(p + off, base, len), - memcpy_from_iter(i, p + off, base, len) - ) - kunmap_atomic(kaddr); - return bytes; + + do { + char *p; + + n = bytes - copied; + if (PageHighMem(page)) { + page += offset / PAGE_SIZE; + offset %= PAGE_SIZE; + n = min_t(size_t, n, PAGE_SIZE - offset); + } + + p = kmap_atomic(page) + offset; + iterate_and_advance(i, n, base, len, off, + copyin(p + off, base, len), + memcpy_from_iter(i, p + off, base, len) + ) + kunmap_atomic(p); + copied += n; + offset += n; + } while (PageHighMem(page) && copied != bytes && n > 0); + + return copied; } EXPORT_SYMBOL(copy_page_from_iter_atomic); From patchwork Mon Jul 10 13:02:46 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307054 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 6B38CEB64DA for ; Mon, 10 Jul 2023 13:04:23 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232312AbjGJNEW (ORCPT ); Mon, 10 Jul 2023 09:04:22 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48370 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S233074AbjGJNEK (ORCPT ); Mon, 10 Jul 2023 09:04:10 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 251F6E7F; Mon, 10 Jul 2023 06:03:05 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=TvAG4zvqCXDWU2qbJkekP8mvYlTFnLqtA5p6bdq1nY4=; b=Qbg/E6Jnq1hfaF8D+6CqrhdQu1 1RIAwvu2eUk7YdWV4MxA0aU3cwU6kZJuAP70mp9CpsrmeJEU5jv7ruOVJlUcprRZSjzK6GcYFgB/7 d0RskUbIsv0CHUVZm8qMPVEnxIg9RDKGIwoGa4OInHr989/NA/FxfnMEpddMf5KhBKXbqKVILbIMy s9nScSoemYxB7MT6Z5FqUP3wVVJO8E+ZpYOl8yGyWzl41/vtneGJqViwJ94T3HZOovQMLwMAIk5Mb 9RFKRJ+iKikfW9LVgx2IByFf/ps9ELkckPJ/wmNOphUP7VsFIZRCw/ePjjlPq/sRaWsF3Bo/mrMAT wtjySNqA==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXB-E8; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet Subject: [PATCH v4 2/9] iov_iter: Add copy_folio_from_iter_atomic() Date: Mon, 10 Jul 2023 14:02:46 +0100 Message-Id: <20230710130253.3484695-3-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org Add a folio wrapper around copy_page_from_iter_atomic(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong --- include/linux/uio.h | 9 ++++++++- lib/iov_iter.c | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/include/linux/uio.h b/include/linux/uio.h index ff81e5ccaef2..42bce38a8e87 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -163,7 +163,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, +size_t copy_page_from_iter_atomic(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); @@ -184,6 +184,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, { return copy_page_to_iter(&folio->page, offset, bytes, i); } + +static inline size_t copy_folio_from_iter_atomic(struct folio *folio, + size_t offset, size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); +} + size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index c728b6e4fb18..8da566a549ad 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -566,7 +566,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) } EXPORT_SYMBOL(iov_iter_zero); -size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, +size_t copy_page_from_iter_atomic(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t n, copied = 0; From patchwork Mon Jul 10 13:02:47 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307055 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id CF429EB64D9 for ; Mon, 10 Jul 2023 13:04:34 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232427AbjGJNEb (ORCPT ); Mon, 10 Jul 2023 09:04:31 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48958 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230223AbjGJNEL (ORCPT ); Mon, 10 Jul 2023 09:04:11 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C4A70118; Mon, 10 Jul 2023 06:03:07 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=B5bV0iovg+lW23zRXzSduES5qszRr1vQXDi2cK3VfU8=; b=sOqOUKRAZbw5CqoZEVLL2ovbl0 IZdPKYm00UliXiQl54gOjcM/BsR+T6KQ57611Tgk27m3IGxhYXB0g7BFHO1+oMOVw/ep+O5hhXAjZ xE2JnKz78UP9M1VQtL3Y3M3OFQW8DPyBT96WCsGLJOiedFnUJw8VtVq2UVI5h/XdtDgicgb8SIw9v 7XEaDgURS9UZ12igBF+4dwxxrtJlPVO0bB5+odyK92MjAAgGVlBieI+GyCuf299QQgW9hY02739K/ tpNMjBALKdstcurywrw/LfmrNSpKINYZ4Vj3XIgA5sTknOGqzRZg4z0f9ARNklRjfrAInwfJ1G7gD mym54xEQ==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXG-HW; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 3/9] iomap: Remove large folio handling in iomap_invalidate_folio() Date: Mon, 10 Jul 2023 14:02:47 +0100 Message-Id: <20230710130253.3484695-4-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org We do not need to release the iomap_page in iomap_invalidate_folio() to allow the folio to be split. The splitting code will call ->release_folio() if there is still per-fs private data attached to the folio. At that point, we will check if the folio is still dirty and decline to release the iomap_page. It is possible to trigger the warning in perfectly legitimate circumstances (eg if a disk read fails, we do a partial write to the folio, then we truncate the folio), which will cause those writes to be lost. Fixes: 60d8231089f0 ("iomap: Support large folios in invalidatepage") Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/iomap/buffered-io.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index adb92cdb24b0..1cb905140528 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -508,11 +508,6 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len) WARN_ON_ONCE(folio_test_writeback(folio)); folio_cancel_dirty(folio); iomap_page_release(folio); - } else if (folio_test_large(folio)) { - /* Must release the iop so the page can be split */ - WARN_ON_ONCE(!folio_test_uptodate(folio) && - folio_test_dirty(folio)); - iomap_page_release(folio); } } EXPORT_SYMBOL_GPL(iomap_invalidate_folio); From patchwork Mon Jul 10 13:02:48 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307059 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E122AEB64DA for ; Mon, 10 Jul 2023 13:05:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229823AbjGJNFK (ORCPT ); Mon, 10 Jul 2023 09:05:10 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:48860 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231195AbjGJNEu (ORCPT ); Mon, 10 Jul 2023 09:04:50 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 377FE10C2; Mon, 10 Jul 2023 06:03:38 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=CKv8MScTKmwiTmt3fYuPX1fpKU+qfHCuB387pZR9VUI=; b=JNcCJESpelSY1xMgaEcnWzv0YU jm8tRwpAkfKjeqM/E8Mnvb9ZcRwEefuM5EliERwE8IAsGaHgmACZ5PkH4uFegCAcrZoXTX3A5qMzB 2nCZlbQz6SJBQMXg3UeqxFQ4KONZ4LXiMFWqjvPqnW8+J3SU/hFhpjXU80hyK/magkiIT0B7S/QhG 58e6hBIeL7k4f1mncJhAvAiqjCRgR5MdrEa1WM+DcAVfO7BQi0OSYjTjp7qVeDPtH8zQHrFKjr/H7 4iCkhzUeNhner8Orr6EoGEZKNWofRPyrSL7ydxndm2bNUGlkSCYYrM2N5fIryxsYkdf9my15YPKmD HbWd/CVg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXJ-Kc; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 4/9] doc: Correct the description of ->release_folio Date: Mon, 10 Jul 2023 14:02:48 +0100 Message-Id: <20230710130253.3484695-5-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org The filesystem ->release_folio method is called under more circumstances now than when the documentation was written. The second sentence describing the interpretation of the return value is the wrong polarity (false indicates failure, not success). And the third sentence is also wrong (the kernel calls try_to_free_buffers() instead). So replace the entire paragraph with a detailed description of what the state of the folio may be, the meaning of the gfp parameter, why the method is being called and what the filesystem is expected to do. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- Documentation/filesystems/locking.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index ed148919e11a..ca3d24becbf5 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -374,10 +374,17 @@ invalidate_lock before invalidating page cache in truncate / hole punch path (and thus calling into ->invalidate_folio) to block races between page cache invalidation and page cache filling functions (fault, read, ...). -->release_folio() is called when the kernel is about to try to drop the -buffers from the folio in preparation for freeing it. It returns false to -indicate that the buffers are (or may be) freeable. If ->release_folio is -NULL, the kernel assumes that the fs has no private interest in the buffers. +->release_folio() is called when the MM wants to make a change to the +folio that would invalidate the filesystem's private data. For example, +it may be about to be removed from the address_space or split. The folio +is locked and not under writeback. It may be dirty. The gfp parameter +is not usually used for allocation, but rather to indicate what the +filesystem may do to attempt to free the private data. The filesystem may +return false to indicate that the folio's private data cannot be freed. +If it returns true, it should have already removed the private data from +the folio. If a filesystem does not provide a ->release_folio method, +the pagecache will assume that private data is buffer_heads and call +try_to_free_buffers(). ->free_folio() is called when the kernel has dropped the folio from the page cache. From patchwork Mon Jul 10 13:02:49 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307058 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id F2D51EB64D9 for ; Mon, 10 Jul 2023 13:05:08 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229924AbjGJNFG (ORCPT ); Mon, 10 Jul 2023 09:05:06 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49322 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232902AbjGJNEc (ORCPT ); Mon, 10 Jul 2023 09:04:32 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 786D91B1; Mon, 10 Jul 2023 06:03:28 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=VUQDsCY2lUnUk2xg7tsQfzEWDS9nF90sU8cxFuMW9kg=; b=OiiqmXc1G50qSVQo01wvgYWZNc v4TBRfH5DGL6q1w9K/cd26zviqpTdNGRsxSK1IC9o4B9xz8K/07kVeQu203Fo1A0V/vJTq3E2ihwA h01LCJfOTeRo0rTMT7Cg5jA7ipAnlyJPHQ6mmuu5cAcgQkIHIbTFGyRtCoVjDEWFBMnfTak4l0MVx 28Q78/yhtBdhwuekU+yxej+72U3ceXl2Sk+vCZ+B3LAy5mVbb709dTN0KxhxQBIctC3BK8X2D+QpH TG39kQ2LfQzHEFxznIVyw6Lnmwq/GJPcrvE5uV98PzZyAXuyxZa6MEz7Srj8uNWVsg0ZwX8KxnlAF dHMWMJgw==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXL-NM; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 5/9] iomap: Remove unnecessary test from iomap_release_folio() Date: Mon, 10 Jul 2023 14:02:49 +0100 Message-Id: <20230710130253.3484695-6-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org The check for the folio being under writeback is unnecessary; the caller has checked this and the folio is locked, so the folio cannot be under writeback at this point. The comment is somewhat misleading in that it talks about one specific situation in which we can see a dirty folio. There are others, so change the comment to explain why we can't release the iomap_page. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 1cb905140528..7aa3009f907f 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -483,12 +483,11 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags) folio_size(folio)); /* - * mm accommodates an old ext3 case where clean folios might - * not have had the dirty bit cleared. Thus, it can send actual - * dirty folios to ->release_folio() via shrink_active_list(); - * skip those here. + * If the folio is dirty, we refuse to release our metadata because + * it may be partially dirty. Once we track per-block dirty state, + * we can release the metadata if every block is dirty. */ - if (folio_test_dirty(folio) || folio_test_writeback(folio)) + if (folio_test_dirty(folio)) return false; iomap_page_release(folio); return true; From patchwork Mon Jul 10 13:02:50 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307060 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id DF50AEB64D9 for ; Mon, 10 Jul 2023 13:05:12 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231817AbjGJNFL (ORCPT ); Mon, 10 Jul 2023 09:05:11 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49754 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231681AbjGJNEt (ORCPT ); Mon, 10 Jul 2023 09:04:49 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 25965E4F; Mon, 10 Jul 2023 06:03:35 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=sbr1NGlI7+RFnLVvcRC3lY8aRLAQgmN5s+9l8Cr/VzI=; b=Db2r9eKkSCMtCBhVYY0469uwLA jz53XalMc7BSX6l3ryht+i24rQM2qwDG+575jhTsLDI963QofKRI0lIsIVlSvMFqZajXDttRewpz+ afjfWLZ4dzMF5mDNAp9/7zHNkBaz8+09FjKX4TuepbVZjl7pMzSK+zk34SwW/FP5Nhtqku8fyhFgr AWNIEk7nVeektaZXUDv9i4W//a8h4qsEgaVq+nfDIMH2x08UKkbcYQ2Qrsr5/ZW9B8lYkQQnXYaIj K0/smvMYcOjS3RMmKk7ElJpkfXJnAG4BDSjjo55XeDCxXevlp6SKzOe3IsceEetigxzP15XinpznW 7ibCEItg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXN-Q6; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 6/9] filemap: Add fgf_t typedef Date: Mon, 10 Jul 2023 14:02:50 +0100 Message-Id: <20230710130253.3484695-7-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org Similarly to gfp_t, define fgf_t as its own type to prevent various misuses and confusion. Leave the flags as FGP_* for now to reduce the size of this patch; they will be converted to FGF_* later. Move the documentation to the definition of the type insted of burying it in the __filemap_get_folio() documentation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Kent Overstreet --- fs/btrfs/file.c | 6 +++--- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/iomap/buffered-io.c | 2 +- include/linux/pagemap.h | 48 +++++++++++++++++++++++++++++++---------- mm/filemap.c | 19 ++-------------- mm/folio-compat.c | 2 +- 7 files changed, 46 insertions(+), 35 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fd03e689a6be..3876fae90fc3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -876,9 +876,9 @@ static int prepare_uptodate_page(struct inode *inode, return 0; } -static unsigned int get_prepare_fgp_flags(bool nowait) +static fgf_t get_prepare_fgp_flags(bool nowait) { - unsigned int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT; + fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT; if (nowait) fgp_flags |= FGP_NOWAIT; @@ -910,7 +910,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, int i; unsigned long index = pos >> PAGE_SHIFT; gfp_t mask = get_prepare_gfp_flags(inode, nowait); - unsigned int fgp_flags = get_prepare_fgp_flags(nowait); + fgf_t fgp_flags = get_prepare_fgp_flags(nowait); int err = 0; int faili; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 236d890f560b..0f7df9c11af3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1045,7 +1045,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, struct address_space *mapping = cc->inode->i_mapping; struct page *page; sector_t last_block_in_bio; - unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; + fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; pgoff_t start_idx = start_idx_of_cluster(cc); int i, ret; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7cb2177b252..c275ff2753c2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2736,7 +2736,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, static inline struct page *f2fs_pagecache_get_page( struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp_mask) + fgf_t fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) return NULL; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 7aa3009f907f..5e9380cc3e83 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -467,7 +467,7 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); */ struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos) { - unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS; + fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; if (iter->flags & IOMAP_NOWAIT) fgp |= FGP_NOWAIT; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 716953ee1ebd..911201fc41fc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -501,22 +501,48 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); -#define FGP_ACCESSED 0x00000001 -#define FGP_LOCK 0x00000002 -#define FGP_CREAT 0x00000004 -#define FGP_WRITE 0x00000008 -#define FGP_NOFS 0x00000010 -#define FGP_NOWAIT 0x00000020 -#define FGP_FOR_MMAP 0x00000040 -#define FGP_STABLE 0x00000080 +/** + * typedef fgf_t - Flags for getting folios from the page cache. + * + * Most users of the page cache will not need to use these flags; + * there are convenience functions such as filemap_get_folio() and + * filemap_lock_folio(). For users which need more control over exactly + * what is done with the folios, these flags to __filemap_get_folio() + * are available. + * + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. + * * %FGP_CREAT - If no folio is present then a new folio is allocated, + * added to the page cache and the VM's LRU list. The folio is + * returned locked. + * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the + * folio is already in cache. If the folio was allocated, unlock it + * before returning so the caller can do the same dance. + * * %FGP_WRITE - The folio will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't block on the folio lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() + * implementation. + */ +typedef unsigned int __bitwise fgf_t; + +#define FGP_ACCESSED ((__force fgf_t)0x00000001) +#define FGP_LOCK ((__force fgf_t)0x00000002) +#define FGP_CREAT ((__force fgf_t)0x00000004) +#define FGP_WRITE ((__force fgf_t)0x00000008) +#define FGP_NOFS ((__force fgf_t)0x00000010) +#define FGP_NOWAIT ((__force fgf_t)0x00000020) +#define FGP_FOR_MMAP ((__force fgf_t)0x00000040) +#define FGP_STABLE ((__force fgf_t)0x00000080) #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); /** * filemap_get_folio - Find and get a folio. @@ -590,7 +616,7 @@ static inline struct page *find_get_page(struct address_space *mapping, } static inline struct page *find_get_page_flags(struct address_space *mapping, - pgoff_t offset, int fgp_flags) + pgoff_t offset, fgf_t fgp_flags) { return pagecache_get_page(mapping, offset, fgp_flags, 0); } diff --git a/mm/filemap.c b/mm/filemap.c index 9e44a49bbd74..8a669fecfd1c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1855,30 +1855,15 @@ void *filemap_get_entry(struct address_space *mapping, pgoff_t index) * * Looks up the page cache entry at @mapping & @index. * - * @fgp_flags can be zero or more of these flags: - * - * * %FGP_ACCESSED - The folio will be marked accessed. - * * %FGP_LOCK - The folio is returned locked. - * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp and added to the page cache and the VM's LRU list. - * The page is returned locked and with an increased refcount. - * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the - * page is already in cache. If the page was allocated, unlock it before - * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written to by the caller. - * * %FGP_NOFS - __GFP_FS will get cleared in gfp. - * * %FGP_NOWAIT - Don't get blocked by page lock. - * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) - * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * - * If there is a page cache page, it is returned with an increased refcount. + * If this function returns a folio, it is returned with an increased refcount. * * Return: The found folio or an ERR_PTR() otherwise. */ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp) + fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; diff --git a/mm/folio-compat.c b/mm/folio-compat.c index c6f056c20503..10c3247542cb 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL(add_to_page_cache_lru); noinline struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp) + fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; From patchwork Mon Jul 10 13:02:51 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307057 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 55C22EB64DC for ; Mon, 10 Jul 2023 13:04:57 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232016AbjGJNEz (ORCPT ); Mon, 10 Jul 2023 09:04:55 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49794 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232520AbjGJNER (ORCPT ); Mon, 10 Jul 2023 09:04:17 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 212FF26B8; Mon, 10 Jul 2023 06:03:18 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=4J42/98YLd6hctRWDh1afVdKFphtRDBJnnxGmpLlWrU=; b=DyMHwwRo8MVYSCzXTuqX2aF3nY AdhjJ7kWYP55GugjXb6ySOuEWqUB0NGWBF+F/p+B9maPYC9Y0pIVH7KRm4gJgtfMnVnNTTZO2CS9B jSlTg/AJ5IM9VGa/LpIZQ+chkx498zvMrOV8pmkmScj+ru1RDCVXVWg2yoZmytq/ja3yrxkZlgmxf VkuJcaMUHbd20B9LZYhLQ+OwWGp49oA2hBn6GCA1jbPWeoU0P0no35xz1YyBTKCNOcKWMsX/oDJsa acWUK5EJj0N8+yEWQTRu+c6TCD+Pe3eOFW0hoGuPoIkkTwyr3o9jjEkANABaVetqgBdF/kK51PQZ2 gMVeElzA==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXP-T5; Mon, 10 Jul 2023 13:02:55 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 7/9] filemap: Allow __filemap_get_folio to allocate large folios Date: Mon, 10 Jul 2023 14:02:51 +0100 Message-Id: <20230710130253.3484695-8-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org Allow callers of __filemap_get_folio() to specify a preferred folio order in the FGP flags. This is only honoured in the FGP_CREATE path; if there is already a folio in the page cache that covers the index, we will return it, no matter what its order is. No create-around is attempted; we will only create folios which start at the specified index. Unmodified callers will continue to allocate order 0 folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/pagemap.h | 34 ++++++++++++++++++++++++++++++ mm/filemap.c | 46 +++++++++++++++++++++++++++++------------ mm/readahead.c | 13 ------------ 3 files changed, 67 insertions(+), 26 deletions(-) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 911201fc41fc..d87840acbfb2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page) return folio_detach_private(page_folio(page)); } +/* + * There are some parts of the kernel which assume that PMD entries + * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, + * limit the maximum allocation order to PMD size. I'm not aware of any + * assumptions about maximum order if THP are disabled, but 8 seems like + * a good order (that's 1MB if you're using 4kB pages) + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#else +#define MAX_PAGECACHE_ORDER 8 +#endif + #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else @@ -535,9 +548,30 @@ typedef unsigned int __bitwise fgf_t; #define FGP_NOWAIT ((__force fgf_t)0x00000020) #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) #define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) +/** + * fgf_set_order - Encode a length in the fgf_t flags. + * @size: The suggested size of the folio to create. + * + * The caller of __filemap_get_folio() can use this to suggest a preferred + * size for the folio that is created. If there is already a folio at + * the index, it will be returned, no matter what its size. If a folio + * is freshly created, it may be of a different size than requested + * due to alignment constraints, memory pressure, or the presence of + * other folios at nearby indices. + */ +static inline fgf_t fgf_set_order(size_t size) +{ + unsigned int shift = ilog2(size); + + if (shift <= PAGE_SHIFT) + return 0; + return (__force fgf_t)((shift - PAGE_SHIFT) << 26); +} + void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); diff --git a/mm/filemap.c b/mm/filemap.c index 8a669fecfd1c..baafbf324c9f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1905,7 +1905,9 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, folio_wait_stable(folio); no_page: if (!folio && (fgp_flags & FGP_CREAT)) { + unsigned order = FGF_GET_ORDER(fgp_flags); int err; + if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) @@ -1914,26 +1916,44 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, gfp &= ~GFP_KERNEL; gfp |= GFP_NOWAIT | __GFP_NOWARN; } - - folio = filemap_alloc_folio(gfp, 0); - if (!folio) - return ERR_PTR(-ENOMEM); - if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) fgp_flags |= FGP_LOCK; - /* Init accessed so avoid atomic mark_page_accessed later */ - if (fgp_flags & FGP_ACCESSED) - __folio_set_referenced(folio); + if (!mapping_large_folio_support(mapping)) + order = 0; + if (order > MAX_PAGECACHE_ORDER) + order = MAX_PAGECACHE_ORDER; + /* If we're not aligned, allocate a smaller folio */ + if (index & ((1UL << order) - 1)) + order = __ffs(index); - err = filemap_add_folio(mapping, folio, index, gfp); - if (unlikely(err)) { + do { + gfp_t alloc_gfp = gfp; + + err = -ENOMEM; + if (order == 1) + order = 0; + if (order > 0) + alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; + folio = filemap_alloc_folio(alloc_gfp, order); + if (!folio) + continue; + + /* Init accessed so avoid atomic mark_page_accessed later */ + if (fgp_flags & FGP_ACCESSED) + __folio_set_referenced(folio); + + err = filemap_add_folio(mapping, folio, index, gfp); + if (!err) + break; folio_put(folio); folio = NULL; - if (err == -EEXIST) - goto repeat; - } + } while (order-- > 0); + if (err == -EEXIST) + goto repeat; + if (err) + return ERR_PTR(err); /* * filemap_add_folio locks the page, and for mmap * we expect an unlocked page. diff --git a/mm/readahead.c b/mm/readahead.c index a9c999aa19af..e815c114de21 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -461,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping, return 1; } -/* - * There are some parts of the kernel which assume that PMD entries - * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, - * limit the maximum allocation order to PMD size. I'm not aware of any - * assumptions about maximum order if THP are disabled, but 8 seems like - * a good order (that's 1MB if you're using 4kB pages) - */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER -#else -#define MAX_PAGECACHE_ORDER 8 -#endif - static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, pgoff_t mark, unsigned int order, gfp_t gfp) { From patchwork Mon Jul 10 13:02:52 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307061 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E1829EB64D9 for ; Mon, 10 Jul 2023 13:05:26 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232191AbjGJNFZ (ORCPT ); Mon, 10 Jul 2023 09:05:25 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49192 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232165AbjGJNEv (ORCPT ); Mon, 10 Jul 2023 09:04:51 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 15AA810CB; Mon, 10 Jul 2023 06:03:39 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=kOPQSW3baHvjgB3pDeR7jNHNiOTaGhgJgxOuVXaYj8I=; b=ibpxQGd6Pmc2W/3kYz82aeFeOT egK+Yv4QyMR/JeS6d124bykKNmgCbv2iC4JidSW/eeiiOEcT+hLko+lOI0BNXla/VMsaVFLuKkjg4 UQyvzm4txtYqtclQcfDATLZAQuTH5HOxXbUfVWSPjdplba6skBe6smpsyLHwEzDVbHIkJercOWSVp 6S08gM72mlORUB9hZgsAx76k9etsltyUSUvkYQ1dCh28gA54wUxNaZdK9p5NJ/kzgm8Fkp4WhbCcG lZINr1rLjb05+jikNvPqgMYib+nFXCh0cd4tCjMTTuScM1ME5S6641EqGSePBERSlUEBFcMW8o0sQ 8/NZrUPg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXX-00EcXR-W1; Mon, 10 Jul 2023 13:02:56 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 8/9] iomap: Create large folios in the buffered write path Date: Mon, 10 Jul 2023 14:02:52 +0100 Message-Id: <20230710130253.3484695-9-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org Use the size of the write as a hint for the size of the folio to create. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/gfs2/bmap.c | 2 +- fs/iomap/buffered-io.c | 6 ++++-- include/linux/iomap.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8d611fbcf0bd..521d1e00a0ff 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -971,7 +971,7 @@ gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) if (status) return ERR_PTR(status); - folio = iomap_get_folio(iter, pos); + folio = iomap_get_folio(iter, pos, len); if (IS_ERR(folio)) gfs2_trans_end(sdp); return folio; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 5e9380cc3e83..2d3e90f4d16e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -461,16 +461,18 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); * iomap_get_folio - get a folio reference for writing * @iter: iteration structure * @pos: start offset of write + * @len: Suggested size of folio to create. * * Returns a locked reference to the folio at @pos, or an error pointer if the * folio could not be obtained. */ -struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos) +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) { fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; if (iter->flags & IOMAP_NOWAIT) fgp |= FGP_NOWAIT; + fgp |= fgf_set_order(len); return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(iter->inode->i_mapping)); @@ -597,7 +599,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos, if (folio_ops && folio_ops->get_folio) return folio_ops->get_folio(iter, pos, len); else - return iomap_get_folio(iter, pos); + return iomap_get_folio(iter, pos, len); } static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e2b836c2e119..80facb9c9e5b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -261,7 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); -struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos); +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, From patchwork Mon Jul 10 13:02:53 2023 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: "Matthew Wilcox (Oracle)" X-Patchwork-Id: 13307053 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5863CEB64D9 for ; Mon, 10 Jul 2023 13:04:22 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232848AbjGJNEV (ORCPT ); Mon, 10 Jul 2023 09:04:21 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:49836 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232312AbjGJNEJ (ORCPT ); Mon, 10 Jul 2023 09:04:09 -0400 Received: from casper.infradead.org (casper.infradead.org [IPv6:2001:8b0:10b:1236::1]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id C406F268D; Mon, 10 Jul 2023 06:03:03 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; q=dns/txt; c=relaxed/relaxed; d=infradead.org; s=casper.20170209; h=Content-Transfer-Encoding:MIME-Version: References:In-Reply-To:Message-Id:Date:Subject:Cc:To:From:Sender:Reply-To: Content-Type:Content-ID:Content-Description; bh=Xa62x9tUe4HdESmmryNJxUBaFH7m3AqjBIhlcBkeDm8=; b=FFhwt7/0AsvUiMBzPstg+Qq1DT bDT+M53lIT+ER0YOXK++7ZawneOi8wN4m2wfLwI8AMjAY7BqsAdxHqWxzQXE+Q9gytc9vko6ITMk7 ucmC0zUEI3vT/dDt2R3IAA+nqMefP7Tk49JuwBMTcr/8bxggh/VjfYaxtKfucpIesU+owRhV/AkRN WQL+tpVAe5rWx4VGiHV93hkIShXBugd9eNe6aVsfYE3MEsJIx1sEPnjz9EYbaLJBa72GB+xCWIL0t 6Kz1L19a76SqED9GDggf+yRgGukriA3ijPyE1uOalPxb6/keeZGSNmwrlBKwDcHB9gyIZw6+ZRO13 TEV4AUeg==; Received: from willy by casper.infradead.org with local (Exim 4.94.2 #2 (Red Hat Linux)) id 1qIqXY-00EcXT-2T; Mon, 10 Jul 2023 13:02:56 +0000 From: "Matthew Wilcox (Oracle)" To: linux-fsdevel@vger.kernel.org Cc: "Matthew Wilcox (Oracle)" , linux-xfs@vger.kernel.org, Wang Yugui , Dave Chinner , Christoph Hellwig , "Darrick J . Wong" , Kent Overstreet , Christoph Hellwig Subject: [PATCH v4 9/9] iomap: Copy larger chunks from userspace Date: Mon, 10 Jul 2023 14:02:53 +0100 Message-Id: <20230710130253.3484695-10-willy@infradead.org> X-Mailer: git-send-email 2.37.1 In-Reply-To: <20230710130253.3484695-1-willy@infradead.org> References: <20230710130253.3484695-1-willy@infradead.org> MIME-Version: 1.0 Precedence: bulk List-ID: X-Mailing-List: linux-xfs@vger.kernel.org If we have a large folio, we can copy in larger chunks than PAGE_SIZE. Start at the maximum page cache size and shrink by half every time we hit the "we are short on memory" problem. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/iomap/buffered-io.c | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 2d3e90f4d16e..f21f1f641c4a 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -769,6 +769,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) { loff_t length = iomap_length(iter); + size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; loff_t pos = iter->pos; ssize_t written = 0; long status = 0; @@ -777,15 +778,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) do { struct folio *folio; - struct page *page; - unsigned long offset; /* Offset into pagecache page */ - unsigned long bytes; /* Bytes to write to page */ + size_t offset; /* Offset into folio */ + size_t bytes; /* Bytes to write to folio */ size_t copied; /* Bytes copied from user */ - offset = offset_in_page(pos); - bytes = min_t(unsigned long, PAGE_SIZE - offset, - iov_iter_count(i)); -again: + offset = pos & (chunk - 1); + bytes = min(chunk - offset, iov_iter_count(i)); status = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags); if (unlikely(status)) @@ -815,12 +813,14 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) if (iter->iomap.flags & IOMAP_F_STALE) break; - page = folio_file_page(folio, pos >> PAGE_SHIFT); - if (mapping_writably_mapped(mapping)) - flush_dcache_page(page); + offset = offset_in_folio(folio, pos); + if (bytes > folio_size(folio) - offset) + bytes = folio_size(folio) - offset; - copied = copy_page_from_iter_atomic(page, offset, bytes, i); + if (mapping_writably_mapped(mapping)) + flush_dcache_folio(folio); + copied = copy_folio_from_iter_atomic(folio, offset, bytes, i); status = iomap_write_end(iter, pos, bytes, copied, folio); if (unlikely(copied != status)) @@ -836,11 +836,13 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) */ if (copied) bytes = copied; - goto again; + if (chunk > PAGE_SIZE) + chunk /= 2; + } else { + pos += status; + written += status; + length -= status; } - pos += status; - written += status; - length -= status; } while (iov_iter_count(i) && length); if (status == -EAGAIN) {