Message ID | 20240611030203.1719072-3-mcgrof@kernel.org (mailing list archive) |
---|---|
State | Superseded, archived |
Headers | show |
Series | fstests: add some new LBS inspired tests | expand |
On Mon, Jun 10, 2024 at 08:01:59PM -0700, Luis Chamberlain wrote: > mmap() POSIX compliance says we should zero fill data beyond a file > size up to page boundary, and issue a SIGBUS if we go beyond. While fsx > helps us test zero-fill sometimes, fsstress also let's us sometimes test > for SIGBUS however that is based on a random value and its not likely we > always test it. Dedicate a specic test for this to make testing for > this specific situation and to easily expand on other corner cases. > > The only filesystem currently known to fail is tmpfs with huge pages, > but the pending upstream patch "filemap: cap PTE range to be created to > allowed zero fill in folio_map_range()" fixes this issue for tmpfs and > also for LBS support. > > Suggested-by: Matthew Wilcox <willy@infradead.org> > Signed-off-by: Pankaj Raghav <p.raghav@samsung.com> > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> > --- > common/rc | 5 +- > tests/generic/749 | 238 ++++++++++++++++++++++++++++++++++++++++++ > tests/generic/749.out | 2 + > 3 files changed, 244 insertions(+), 1 deletion(-) > create mode 100755 tests/generic/749 > create mode 100644 tests/generic/749.out > > diff --git a/common/rc b/common/rc > index fa7942809d6c..e812a2f7cc67 100644 > --- a/common/rc > +++ b/common/rc > @@ -60,12 +60,15 @@ _round_up_to_page_boundary() > echo $(( (n + page_size - 1) & ~(page_size - 1) )) > } > > +# You can override the $map_len but its optional, by default we use the > +# max allowed size. If you use a length greater than the default you can > +# expect a SIBGUS and test for it. > _mread() > { > local file=$1 > local offset=$2 > local length=$3 > - local map_len=$(_round_up_to_page_boundary $(_get_filesize $file)) > + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } > > # Some callers expect xfs_io to crash with SIGBUS due to the mread, > # causing the shell to print "Bus error" to stderr. To allow this > diff --git a/tests/generic/749 b/tests/generic/749 > new file mode 100755 > index 000000000000..c1d3a2028549 > --- /dev/null > +++ b/tests/generic/749 > @@ -0,0 +1,238 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) Luis Chamberlain. All Rights Reserved. > +# > +# FS QA Test 749 > +# > +# As per POSIX NOTES mmap(2) maps multiples of the system page size, but if the > +# data mapped is not multiples of the page size the remaining bytes are zeroed > +# out when mapped and modifications to that region are not written to the file. > +# On Linux when you write data to such partial page after the end of the > +# object, the data stays in the page cache even after the file is closed and > +# unmapped and even though the data is never written to the file itself, > +# subsequent mappings may see the modified content. If you go *beyond* this Does this happen (mwrite data beyond eof sticks around) with large folios as well? > +# page, you should get a SIGBUS. This test verifies we zero-fill to page > +# boundary and ensures we get a SIGBUS if we write to data beyond the system > +# page size even if the block size is greater than the system page size. > +. ./common/preamble > +. ./common/rc > +_begin_fstest auto quick prealloc > + > +# Import common functions. > +. ./common/filter > + > +# real QA test starts here > +_supported_fs generic > +_require_scratch_nocheck > +_require_test > +_require_xfs_io_command "truncate" > +_require_xfs_io_command "falloc" > + > +# _fixed_by_git_commit kernel <pending-upstream> \ > +# "filemap: cap PTE range to be created to allowed zero fill in folio_map_range()" > + > +filter_xfs_io_data_unique() > +{ > + _filter_xfs_io_offset | sed -e 's| |\n|g' | grep -E -v "\.|XX|\*" | \ > + sort -u | tr -d '\n' > +} > + > + > +setup_zeroed_file() > +{ > + local file_len=$1 > + local sparse=$2 > + > + if $sparse; then > + $XFS_IO_PROG -f -c "truncate $file_len" $test_file > + else > + $XFS_IO_PROG -f -c "falloc 0 $file_len" $test_file > + fi > +} > + > +mwrite() > +{ > + local file=$1 > + local offset=$2 > + local length=$3 > + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } > + > + # Some callers expect xfs_io to crash with SIGBUS due to the mread, > + # causing the shell to print "Bus error" to stderr. To allow this > + # message to be redirected, execute xfs_io in a new shell instance. > + # However, for this to work reliably, we also need to prevent the new > + # shell instance from optimizing out the fork and directly exec'ing > + # xfs_io. The easiest way to do that is to append 'true' to the > + # commands, so that xfs_io is no longer the last command the shell sees. > + bash -c "trap '' SIGBUS; ulimit -c 0; \ > + $XFS_IO_PROG $file \ > + -c 'mmap -w 0 $map_len' \ > + -c 'mwrite $offset $length'; \ > + true" > +} > + > +do_mmap_tests() > +{ > + local block_size=$1 > + local file_len=$2 > + local offset=$3 > + local len=$4 > + local use_sparse_file=${5:-false} > + local new_filelen=0 > + local map_len=0 > + local csum=0 > + local fs_block_size=$(_get_file_block_size $SCRATCH_MNT) > + > + echo -en "\n\n==> Testing blocksize $block_size " >> $seqres.full > + echo -en "file_len: $file_len offset: $offset " >> $seqres.full > + echo -e "len: $len sparse: $use_sparse_file" >> $seqres.full > + > + if ((fs_block_size != block_size)); then > + _fail "Block size created ($block_size) doesn't match _get_file_block_size on mount ($fs_block_size)" > + fi > + > + rm -rf "${SCRATCH_MNT:?}"/* rm -f $SCRATCH_MNT/file ? > + > + # This let's us also test against sparse files > + setup_zeroed_file $file_len $use_sparse_file > + > + # This will overwrite the old data, the file size is the > + # delta between offset and len now. > + $XFS_IO_PROG -f -c "pwrite -S 0xaa -b 512 $offset $len" \ > + $test_file >> $seqres.full > + > + sync > + new_filelen=$(_get_filesize $test_file) > + map_len=$(_round_up_to_page_boundary $new_filelen) > + csum_orig="$(_md5_checksum $test_file)" > + > + # A couple of mmap() tests: > + # > + # We are allowed to mmap() up to the boundary of the page size of a > + # data object, but there a few rules to follow we must check for: > + # > + # a) zero-fill test for the data: POSIX says we should zero fill any > + # partial page after the end of the object. Verify zero-fill. > + # b) do not write this bogus data to disk: on Linux, if we write data > + # to a partially filled page, it will stay in the page cache even > + # after the file is closed and unmapped even if it never reaches the > + # file. Subsequent mappings *may* see the modified content, but it > + # also can get other data. Since the data read after the actual What other data? > + # object data can vary we just verify the filesize does not change. > + if [[ $map_len -gt $new_filelen ]]; then > + zero_filled_data_len=$((map_len - new_filelen)) > + _scratch_cycle_mount > + expected_zero_data="00" > + zero_filled_data=$($XFS_IO_PROG -r $test_file \ > + -c "mmap -r 0 $map_len" \ > + -c "mread -v $new_filelen $zero_filled_data_len" \ > + -c "munmap" | \ > + filter_xfs_io_data_unique) > + if [[ "$zero_filled_data" != "$expected_zero_data" ]]; then > + echo "Expected data: $expected_zero_data" > + echo " Actual data: $zero_filled_data" > + _fail "Zero-fill expectations with mmap() not respected" > + fi > + > + _scratch_cycle_mount > + $XFS_IO_PROG $test_file \ > + -c "mmap -w 0 $map_len" \ > + -c "mwrite $new_filelen $zero_filled_data_len" \ > + -c "munmap" > + sync > + csum_post="$(_md5_checksum $test_file)" > + if [[ "$csum_orig" != "$csum_post" ]]; then > + echo "Expected csum: $csum_orig" > + echo " Actual csum: $csum_post" > + _fail "mmap() write up to page boundary should not change actual file contents" Do you really want to stop the test immediately? Or keep going and see what other errors fall out? (i.e. s/_fail/echo/ here) > + fi > + > + local filelen_test=$(_get_filesize $test_file) > + if [[ "$filelen_test" != "$new_filelen" ]]; then > + echo "Expected file length: $new_filelen" > + echo " Actual file length: $filelen_test" > + _fail "mmap() write up to page boundary should not change actual file size" > + fi > + fi > + > + # Now lets ensure we get SIGBUS when we go beyond the page boundary > + _scratch_cycle_mount > + new_filelen=$(_get_filesize $test_file) > + map_len=$(_round_up_to_page_boundary $new_filelen) > + csum_orig="$(_md5_checksum $test_file)" > + _mread $test_file 0 $map_len >> $seqres.full 2>$tmp.err > + if grep -q 'Bus error' $tmp.err; then > + cat $tmp.err > + _fail "Not expecting SIGBUS when reading up to page boundary" > + fi > + > + # This should just work > + _mread $test_file 0 $map_len >> $seqres.full 2>$tmp.err > + if [[ $? -ne 0 ]]; then > + _fail "mmap() read up to page boundary should work" > + fi > + > + # This should just work > + mwrite $map_len 0 $map_len >> $seqres.full 2>$tmp.err > + if [[ $? -ne 0 ]]; then > + _fail "mmap() write up to page boundary should work" > + fi > + > + # If we mmap() on the boundary but try to read beyond it just > + # fails, we don't get a SIGBUS > + $XFS_IO_PROG -r $test_file \ > + -c "mmap -r 0 $map_len" \ > + -c "mread 0 $((map_len + 10))" >> $seqres.full 2>$tmp.err > + local mread_err=$? > + if [[ $mread_err -eq 0 ]]; then > + _fail "mmap() to page boundary works as expected but reading beyond should fail: $mread_err" > + fi > + > + $XFS_IO_PROG -w $test_file \ > + -c "mmap -w 0 $map_len" \ > + -c "mwrite 0 $((map_len + 10))" >> $seqres.full 2>$tmp.err > + local mwrite_err=$? > + if [[ $mwrite_err -eq 0 ]]; then > + _fail "mmap() to page boundary works as expected but writing beyond should fail: $mwrite_err" > + fi > + > + # Now let's go beyond the allowed mmap() page boundary > + _mread $test_file 0 $((map_len + 10)) $((map_len + 10)) >> $seqres.full 2>$tmp.err > + if ! grep -q 'Bus error' $tmp.err; then > + _fail "Expected SIGBUS when mmap() reading beyond page boundary" > + fi > + > + mwrite $test_file 0 $((map_len + 10)) $((map_len + 10)) >> $seqres.full 2>$tmp.err > + if ! grep -q 'Bus error' $tmp.err; then > + _fail "Expected SIGBUS when mmap() writing beyond page boundary" > + fi > + > + local filelen_test=$(_get_filesize $test_file) > + if [[ "$filelen_test" != "$new_filelen" ]]; then > + echo "Expected file length: $new_filelen" > + echo " Actual file length: $filelen_test" > + _fail "reading or writing beyond file size up to mmap() page boundary should not change file size" > + fi > +} > + > +test_block_size() > +{ > + local block_size=$1 > + > + do_mmap_tests $block_size 512 3 5 > + do_mmap_tests $block_size 11k 0 $((4096 * 3 + 3)) > + do_mmap_tests $block_size 16k 0 $((16384+3)) > + do_mmap_tests $block_size 16k $((16384-10)) $((16384+20)) > + do_mmap_tests $block_size 64k 0 $((65536+3)) > + do_mmap_tests $block_size 4k 4090 30 true > +} > + > +_scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed" > +_scratch_mount > +test_file=$SCRATCH_MNT/file > +block_size=$(_get_file_block_size "$SCRATCH_MNT") > +test_block_size $block_size > + > +echo "Silence is golden" > +status=0 > +exit > diff --git a/tests/generic/749.out b/tests/generic/749.out > new file mode 100644 > index 000000000000..24658deddb99 > --- /dev/null > +++ b/tests/generic/749.out > @@ -0,0 +1,2 @@ > +QA output created by 749 > +Silence is golden > -- > 2.43.0 > >
On Tue, Jun 11, 2024 at 09:48:11AM -0700, Darrick J. Wong wrote: > On Mon, Jun 10, 2024 at 08:01:59PM -0700, Luis Chamberlain wrote: > > +# As per POSIX NOTES mmap(2) maps multiples of the system page size, but if the > > +# data mapped is not multiples of the page size the remaining bytes are zeroed > > +# out when mapped and modifications to that region are not written to the file. > > +# On Linux when you write data to such partial page after the end of the > > +# object, the data stays in the page cache even after the file is closed and > > +# unmapped and even though the data is never written to the file itself, > > +# subsequent mappings may see the modified content. If you go *beyond* this > > Does this happen (mwrite data beyond eof sticks around) with large > folios as well? That corner case of checking to see if it stays is not tested by this test, but we could / should extend this test later for that. But then the question becomes, what is right, given we are in grey area, if we don't have any defined standard for it, it seems odd to test for it. So the test currently only tests for correctness of what we expect for POSIX and what we all have agreed for Linux. Hurding everyone to follow suit for the other corner cases is something perhaps we should do. Do we have a "strict fail" ? So that perhaps we can later add a test case for it and so that onnce and if we get consensus on what we do we can enable say a "strict-Linux" mode where we are pedantic about a new world order? > > + rm -rf "${SCRATCH_MNT:?}"/* > > rm -f $SCRATCH_MNT/file ? Sure. > > + # A couple of mmap() tests: > > + # > > + # We are allowed to mmap() up to the boundary of the page size of a > > + # data object, but there a few rules to follow we must check for: > > + # > > + # a) zero-fill test for the data: POSIX says we should zero fill any > > + # partial page after the end of the object. Verify zero-fill. > > + # b) do not write this bogus data to disk: on Linux, if we write data > > + # to a partially filled page, it will stay in the page cache even > > + # after the file is closed and unmapped even if it never reaches the > > + # file. Subsequent mappings *may* see the modified content, but it > > + # also can get other data. Since the data read after the actual > > What other data? Beats me, got that from the man page bible on mmap. I think its homework for us to find out who is spewing that out, which gives a bit more value to the idea of that strict-linux thing. How else will we find out? > > + # object data can vary we just verify the filesize does not change. > > + if [[ $map_len -gt $new_filelen ]]; then > > + zero_filled_data_len=$((map_len - new_filelen)) > > + _scratch_cycle_mount > > + expected_zero_data="00" > > + zero_filled_data=$($XFS_IO_PROG -r $test_file \ > > + -c "mmap -r 0 $map_len" \ > > + -c "mread -v $new_filelen $zero_filled_data_len" \ > > + -c "munmap" | \ > > + filter_xfs_io_data_unique) > > + if [[ "$zero_filled_data" != "$expected_zero_data" ]]; then > > + echo "Expected data: $expected_zero_data" > > + echo " Actual data: $zero_filled_data" > > + _fail "Zero-fill expectations with mmap() not respected" > > + fi > > + > > + _scratch_cycle_mount > > + $XFS_IO_PROG $test_file \ > > + -c "mmap -w 0 $map_len" \ > > + -c "mwrite $new_filelen $zero_filled_data_len" \ > > + -c "munmap" > > + sync > > + csum_post="$(_md5_checksum $test_file)" > > + if [[ "$csum_orig" != "$csum_post" ]]; then > > + echo "Expected csum: $csum_orig" > > + echo " Actual csum: $csum_post" > > + _fail "mmap() write up to page boundary should not change actual file contents" > > Do you really want to stop the test immediately? Or keep going and see > what other errors fall out? (i.e. s/_fail/echo/ here) Good point. We could go on, I'll change on the next v2. Thanks! Luis
On Tue, Jun 11, 2024 at 11:10:13AM -0700, Luis Chamberlain wrote: > On Tue, Jun 11, 2024 at 09:48:11AM -0700, Darrick J. Wong wrote: > > On Mon, Jun 10, 2024 at 08:01:59PM -0700, Luis Chamberlain wrote: > > > +# As per POSIX NOTES mmap(2) maps multiples of the system page size, but if the > > > +# data mapped is not multiples of the page size the remaining bytes are zeroed > > > +# out when mapped and modifications to that region are not written to the file. > > > +# On Linux when you write data to such partial page after the end of the > > > +# object, the data stays in the page cache even after the file is closed and > > > +# unmapped and even though the data is never written to the file itself, > > > +# subsequent mappings may see the modified content. If you go *beyond* this > > > > Does this happen (mwrite data beyond eof sticks around) with large > > folios as well? > > That corner case of checking to see if it stays is not tested by this > test, but we could / should extend this test later for that. But then > the question becomes, what is right, given we are in grey area, if we > don't have any defined standard for it, it seems odd to test for it. > > So the test currently only tests for correctness of what we expect for > POSIX and what we all have agreed for Linux. > > Hurding everyone to follow suit for the other corner cases is something > perhaps we should do. Do we have a "strict fail" ? So that perhaps we can > later add a test case for it and so that onnce and if we get consensus > on what we do we can enable say a "strict-Linux" mode where we are > pedantic about a new world order? I doubt there's an easy way to guarantee more than "initialized to zero, contents may stay around in memory but will not be written to disk". You could do asinine things like fault on every access and manually inject zero bytes, but ... yuck. That said -- let's say you have a 33k file, and a space mapping for 0-63k (e.g. it was preallocated). Can the pagecache grab (say) a 64k folio for the EOF part of the pagecache? And can you mmap that whole region? And see even more grey area mmapping? Or does mmap always cut off the mapping at roundup(i_size_read(), PAGE_SIZE) ? (I feel like I've asked this before, and forgotten the answer. :() > > > + rm -rf "${SCRATCH_MNT:?}"/* > > > > rm -f $SCRATCH_MNT/file ? > > Sure. > > > > + # A couple of mmap() tests: > > > + # > > > + # We are allowed to mmap() up to the boundary of the page size of a > > > + # data object, but there a few rules to follow we must check for: > > > + # > > > + # a) zero-fill test for the data: POSIX says we should zero fill any > > > + # partial page after the end of the object. Verify zero-fill. > > > + # b) do not write this bogus data to disk: on Linux, if we write data > > > + # to a partially filled page, it will stay in the page cache even > > > + # after the file is closed and unmapped even if it never reaches the > > > + # file. Subsequent mappings *may* see the modified content, but it > > > + # also can get other data. Since the data read after the actual > > > > What other data? > > Beats me, got that from the man page bible on mmap. I think its homework > for us to find out who is spewing that out, which gives a bit more value > to the idea of that strict-linux thing. How else will we find out? Oh, ok. I couldn't tell if *you* had seen "other" data emerging from the murk, or if that was merely what a spec says. Please cite the particular bible you were reading. ;) > > > + # object data can vary we just verify the filesize does not change. > > > + if [[ $map_len -gt $new_filelen ]]; then > > > + zero_filled_data_len=$((map_len - new_filelen)) > > > + _scratch_cycle_mount > > > + expected_zero_data="00" > > > + zero_filled_data=$($XFS_IO_PROG -r $test_file \ > > > + -c "mmap -r 0 $map_len" \ > > > + -c "mread -v $new_filelen $zero_filled_data_len" \ > > > + -c "munmap" | \ > > > + filter_xfs_io_data_unique) > > > + if [[ "$zero_filled_data" != "$expected_zero_data" ]]; then > > > + echo "Expected data: $expected_zero_data" > > > + echo " Actual data: $zero_filled_data" > > > + _fail "Zero-fill expectations with mmap() not respected" > > > + fi > > > + > > > + _scratch_cycle_mount > > > + $XFS_IO_PROG $test_file \ > > > + -c "mmap -w 0 $map_len" \ > > > + -c "mwrite $new_filelen $zero_filled_data_len" \ > > > + -c "munmap" > > > + sync > > > + csum_post="$(_md5_checksum $test_file)" > > > + if [[ "$csum_orig" != "$csum_post" ]]; then > > > + echo "Expected csum: $csum_orig" > > > + echo " Actual csum: $csum_post" > > > + _fail "mmap() write up to page boundary should not change actual file contents" > > > > Do you really want to stop the test immediately? Or keep going and see > > what other errors fall out? (i.e. s/_fail/echo/ here) > > Good point. We could go on, I'll change on the next v2. > > Thanks! NP. --D > > Luis >
On Tue, Jun 11, 2024 at 11:46:03AM -0700, Darrick J. Wong wrote: > On Tue, Jun 11, 2024 at 11:10:13AM -0700, Luis Chamberlain wrote: > > On Tue, Jun 11, 2024 at 09:48:11AM -0700, Darrick J. Wong wrote: > > > On Mon, Jun 10, 2024 at 08:01:59PM -0700, Luis Chamberlain wrote: > > > > +# As per POSIX NOTES mmap(2) maps multiples of the system page size, but if the > > > > +# data mapped is not multiples of the page size the remaining bytes are zeroed > > > > +# out when mapped and modifications to that region are not written to the file. > > > > +# On Linux when you write data to such partial page after the end of the > > > > +# object, the data stays in the page cache even after the file is closed and > > > > +# unmapped and even though the data is never written to the file itself, > > > > +# subsequent mappings may see the modified content. If you go *beyond* this > > > > > > Does this happen (mwrite data beyond eof sticks around) with large > > > folios as well? > > > > That corner case of checking to see if it stays is not tested by this > > test, but we could / should extend this test later for that. But then > > the question becomes, what is right, given we are in grey area, if we > > don't have any defined standard for it, it seems odd to test for it. > > > > So the test currently only tests for correctness of what we expect for > > POSIX and what we all have agreed for Linux. > > > > Hurding everyone to follow suit for the other corner cases is something > > perhaps we should do. Do we have a "strict fail" ? So that perhaps we can > > later add a test case for it and so that onnce and if we get consensus > > on what we do we can enable say a "strict-Linux" mode where we are > > pedantic about a new world order? > > I doubt there's an easy way to guarantee more than "initialized to zero, > contents may stay around in memory but will not be written to disk". > You could do asinine things like fault on every access and manually > inject zero bytes, but ... yuck. Sure, but I suspect the real issue is if it does something like leak data which it should not. The test as-is does test to ensure the data is zeroed. If we want to add a test to close the mmap and ensure the data beyond the file content up to PAGE_SIZE is still zeroed out, it's easy to do, it was just that it seems that *could* end up with different results depending on the filesystem. > That said -- let's say you have a 33k file, and a space mapping for > 0-63k What block size filesystem in this example? If the lengh is 33k, whether or not it was truncated does not matter, the file size is what matters. The block size is what we use for the minimum order folio, and sincee we start at offset 0, a 33k sized file on a 64k block size filesystem will get a 64k folio. On a 32k block size filesystem, it will get two 32k foios. > (e.g. it was preallocated). Do you mean sparse or what? Because if its a sparse file it will still change the size of the file, so just wanted to check. > Can the pagecache grab (say) a 64k folio for the EOF part of the pagecache? It depends on the block size of the filesystem. If 4k, we'd go up to 36k, and 33k-46k would be zereod. With min order, we'd have a folio of 8k, 32k, or 64k. For 8k we'd have 5 folios of 8k size each, the last one have only 1k of data, and 3k zeroed out. No PTEs would be assigned for that folio beyond 36k boundary and so we'd SIGBUS on access beyond it. We test for this in this test. > And can you mmap that whole region? No, we test for this too here. You can only mmap up to the aligned PAGE_SIZE of the file size. > And see even more grey area mmapping? No, we limit up to PAGE_SIZE alignement. > Or does mmap always cut > off the mapping at roundup(i_size_read(), PAGE_SIZE) ? That's right, we do this, without LBS this was implied, but with LBS we have to be explicit about using the PAGE_SIZE alignment restriction. This test checks for all that, and checks for both integrity of the contents and file size even if you muck with the extra fluff allowed by mmap(). > > > What other data? > > > > Beats me, got that from the man page bible on mmap. I think its homework > > for us to find out who is spewing that out, which gives a bit more value > > to the idea of that strict-linux thing. How else will we find out? > > Oh, ok. I couldn't tell if *you* had seen "other" data emerging from > the murk, or if that was merely what a spec says. Please cite the > particular bible you were reading. ;) From the mmap(2) man page: "subsequent mappings may see the modified content." so I extended this with the implications of it using *may*. Speaking of the man page, I see also that huge pages are addressed there and when a huge page is used it says: "The system automatically aligns length to be a multiple of the underlying huge page size" And so I believes that means we need to check for the huge page on filemap_map_pages() and also the test and adjust it to align to the specific huge page size if used... Or just skip tmpfs / hugetlbfs for now... Luis
On Mon, Jun 10, 2024 at 08:01:59PM -0700, Luis Chamberlain wrote: > mmap() POSIX compliance says we should zero fill data beyond a file > size up to page boundary, and issue a SIGBUS if we go beyond. While fsx > helps us test zero-fill sometimes, fsstress also let's us sometimes test > for SIGBUS however that is based on a random value and its not likely we > always test it. Dedicate a specic test for this to make testing for > this specific situation and to easily expand on other corner cases. > > The only filesystem currently known to fail is tmpfs with huge pages, > but the pending upstream patch "filemap: cap PTE range to be created to > allowed zero fill in folio_map_range()" fixes this issue for tmpfs and > also for LBS support. > > Suggested-by: Matthew Wilcox <willy@infradead.org> > Signed-off-by: Pankaj Raghav <p.raghav@samsung.com> > Signed-off-by: Luis Chamberlain <mcgrof@kernel.org> > --- > common/rc | 5 +- > tests/generic/749 | 238 ++++++++++++++++++++++++++++++++++++++++++ > tests/generic/749.out | 2 + > 3 files changed, 244 insertions(+), 1 deletion(-) > create mode 100755 tests/generic/749 > create mode 100644 tests/generic/749.out > > diff --git a/common/rc b/common/rc > index fa7942809d6c..e812a2f7cc67 100644 > --- a/common/rc > +++ b/common/rc > @@ -60,12 +60,15 @@ _round_up_to_page_boundary() > echo $(( (n + page_size - 1) & ~(page_size - 1) )) > } > > +# You can override the $map_len but its optional, by default we use the > +# max allowed size. If you use a length greater than the default you can > +# expect a SIBGUS and test for it. > _mread() > { > local file=$1 > local offset=$2 > local length=$3 > - local map_len=$(_round_up_to_page_boundary $(_get_filesize $file)) > + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } > > # Some callers expect xfs_io to crash with SIGBUS due to the mread, > # causing the shell to print "Bus error" to stderr. To allow this > diff --git a/tests/generic/749 b/tests/generic/749 > new file mode 100755 > index 000000000000..c1d3a2028549 > --- /dev/null > +++ b/tests/generic/749 > @@ -0,0 +1,238 @@ > +#! /bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# Copyright (c) Luis Chamberlain. All Rights Reserved. > +# > +# FS QA Test 749 > +# > +# As per POSIX NOTES mmap(2) maps multiples of the system page size, but if the > +# data mapped is not multiples of the page size the remaining bytes are zeroed > +# out when mapped and modifications to that region are not written to the file. > +# On Linux when you write data to such partial page after the end of the > +# object, the data stays in the page cache even after the file is closed and > +# unmapped and even though the data is never written to the file itself, > +# subsequent mappings may see the modified content. If you go *beyond* this > +# page, you should get a SIGBUS. This test verifies we zero-fill to page > +# boundary and ensures we get a SIGBUS if we write to data beyond the system > +# page size even if the block size is greater than the system page size. > +. ./common/preamble > +. ./common/rc > +_begin_fstest auto quick prealloc > + > +# Import common functions. > +. ./common/filter > + > +# real QA test starts here > +_supported_fs generic > +_require_scratch_nocheck > +_require_test > +_require_xfs_io_command "truncate" > +_require_xfs_io_command "falloc" > + > +# _fixed_by_git_commit kernel <pending-upstream> \ > +# "filemap: cap PTE range to be created to allowed zero fill in folio_map_range()" > + > +filter_xfs_io_data_unique() > +{ > + _filter_xfs_io_offset | sed -e 's| |\n|g' | grep -E -v "\.|XX|\*" | \ > + sort -u | tr -d '\n' > +} > + > + > +setup_zeroed_file() > +{ > + local file_len=$1 > + local sparse=$2 > + > + if $sparse; then > + $XFS_IO_PROG -f -c "truncate $file_len" $test_file > + else > + $XFS_IO_PROG -f -c "falloc 0 $file_len" $test_file > + fi > +} > + > +mwrite() > +{ > + local file=$1 > + local offset=$2 > + local length=$3 > + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } > + > + # Some callers expect xfs_io to crash with SIGBUS due to the mread, > + # causing the shell to print "Bus error" to stderr. To allow this > + # message to be redirected, execute xfs_io in a new shell instance. > + # However, for this to work reliably, we also need to prevent the new > + # shell instance from optimizing out the fork and directly exec'ing > + # xfs_io. The easiest way to do that is to append 'true' to the > + # commands, so that xfs_io is no longer the last command the shell sees. > + bash -c "trap '' SIGBUS; ulimit -c 0; \ > + $XFS_IO_PROG $file \ > + -c 'mmap -w 0 $map_len' \ > + -c 'mwrite $offset $length'; \ > + true" > +} As you've moved the _mread to common/rc, why not do the same for this mwrite? Thanks, Zorro > + > +do_mmap_tests() > +{ > + local block_size=$1 > + local file_len=$2 > + local offset=$3 > + local len=$4 > + local use_sparse_file=${5:-false} > + local new_filelen=0 > + local map_len=0 > + local csum=0 > + local fs_block_size=$(_get_file_block_size $SCRATCH_MNT) > + > + echo -en "\n\n==> Testing blocksize $block_size " >> $seqres.full > + echo -en "file_len: $file_len offset: $offset " >> $seqres.full > + echo -e "len: $len sparse: $use_sparse_file" >> $seqres.full > + > + if ((fs_block_size != block_size)); then > + _fail "Block size created ($block_size) doesn't match _get_file_block_size on mount ($fs_block_size)" > + fi > + > + rm -rf "${SCRATCH_MNT:?}"/* > + > + # This let's us also test against sparse files > + setup_zeroed_file $file_len $use_sparse_file > + > + # This will overwrite the old data, the file size is the > + # delta between offset and len now. > + $XFS_IO_PROG -f -c "pwrite -S 0xaa -b 512 $offset $len" \ > + $test_file >> $seqres.full > + > + sync > + new_filelen=$(_get_filesize $test_file) > + map_len=$(_round_up_to_page_boundary $new_filelen) > + csum_orig="$(_md5_checksum $test_file)" > + > + # A couple of mmap() tests: > + # > + # We are allowed to mmap() up to the boundary of the page size of a > + # data object, but there a few rules to follow we must check for: > + # > + # a) zero-fill test for the data: POSIX says we should zero fill any > + # partial page after the end of the object. Verify zero-fill. > + # b) do not write this bogus data to disk: on Linux, if we write data > + # to a partially filled page, it will stay in the page cache even > + # after the file is closed and unmapped even if it never reaches the > + # file. Subsequent mappings *may* see the modified content, but it > + # also can get other data. Since the data read after the actual > + # object data can vary we just verify the filesize does not change. > + if [[ $map_len -gt $new_filelen ]]; then > + zero_filled_data_len=$((map_len - new_filelen)) > + _scratch_cycle_mount > + expected_zero_data="00" > + zero_filled_data=$($XFS_IO_PROG -r $test_file \ > + -c "mmap -r 0 $map_len" \ > + -c "mread -v $new_filelen $zero_filled_data_len" \ > + -c "munmap" | \ > + filter_xfs_io_data_unique) > + if [[ "$zero_filled_data" != "$expected_zero_data" ]]; then > + echo "Expected data: $expected_zero_data" > + echo " Actual data: $zero_filled_data" > + _fail "Zero-fill expectations with mmap() not respected" > + fi > + > + _scratch_cycle_mount > + $XFS_IO_PROG $test_file \ > + -c "mmap -w 0 $map_len" \ > + -c "mwrite $new_filelen $zero_filled_data_len" \ > + -c "munmap" > + sync > + csum_post="$(_md5_checksum $test_file)" > + if [[ "$csum_orig" != "$csum_post" ]]; then > + echo "Expected csum: $csum_orig" > + echo " Actual csum: $csum_post" > + _fail "mmap() write up to page boundary should not change actual file contents" > + fi > + > + local filelen_test=$(_get_filesize $test_file) > + if [[ "$filelen_test" != "$new_filelen" ]]; then > + echo "Expected file length: $new_filelen" > + echo " Actual file length: $filelen_test" > + _fail "mmap() write up to page boundary should not change actual file size" > + fi > + fi > + > + # Now lets ensure we get SIGBUS when we go beyond the page boundary > + _scratch_cycle_mount > + new_filelen=$(_get_filesize $test_file) > + map_len=$(_round_up_to_page_boundary $new_filelen) > + csum_orig="$(_md5_checksum $test_file)" > + _mread $test_file 0 $map_len >> $seqres.full 2>$tmp.err > + if grep -q 'Bus error' $tmp.err; then > + cat $tmp.err > + _fail "Not expecting SIGBUS when reading up to page boundary" > + fi > + > + # This should just work > + _mread $test_file 0 $map_len >> $seqres.full 2>$tmp.err > + if [[ $? -ne 0 ]]; then > + _fail "mmap() read up to page boundary should work" > + fi > + > + # This should just work > + mwrite $map_len 0 $map_len >> $seqres.full 2>$tmp.err > + if [[ $? -ne 0 ]]; then > + _fail "mmap() write up to page boundary should work" > + fi > + > + # If we mmap() on the boundary but try to read beyond it just > + # fails, we don't get a SIGBUS > + $XFS_IO_PROG -r $test_file \ > + -c "mmap -r 0 $map_len" \ > + -c "mread 0 $((map_len + 10))" >> $seqres.full 2>$tmp.err > + local mread_err=$? > + if [[ $mread_err -eq 0 ]]; then > + _fail "mmap() to page boundary works as expected but reading beyond should fail: $mread_err" > + fi > + > + $XFS_IO_PROG -w $test_file \ > + -c "mmap -w 0 $map_len" \ > + -c "mwrite 0 $((map_len + 10))" >> $seqres.full 2>$tmp.err > + local mwrite_err=$? > + if [[ $mwrite_err -eq 0 ]]; then > + _fail "mmap() to page boundary works as expected but writing beyond should fail: $mwrite_err" > + fi > + > + # Now let's go beyond the allowed mmap() page boundary > + _mread $test_file 0 $((map_len + 10)) $((map_len + 10)) >> $seqres.full 2>$tmp.err > + if ! grep -q 'Bus error' $tmp.err; then > + _fail "Expected SIGBUS when mmap() reading beyond page boundary" > + fi > + > + mwrite $test_file 0 $((map_len + 10)) $((map_len + 10)) >> $seqres.full 2>$tmp.err > + if ! grep -q 'Bus error' $tmp.err; then > + _fail "Expected SIGBUS when mmap() writing beyond page boundary" > + fi > + > + local filelen_test=$(_get_filesize $test_file) > + if [[ "$filelen_test" != "$new_filelen" ]]; then > + echo "Expected file length: $new_filelen" > + echo " Actual file length: $filelen_test" > + _fail "reading or writing beyond file size up to mmap() page boundary should not change file size" > + fi > +} > + > +test_block_size() > +{ > + local block_size=$1 > + > + do_mmap_tests $block_size 512 3 5 > + do_mmap_tests $block_size 11k 0 $((4096 * 3 + 3)) > + do_mmap_tests $block_size 16k 0 $((16384+3)) > + do_mmap_tests $block_size 16k $((16384-10)) $((16384+20)) > + do_mmap_tests $block_size 64k 0 $((65536+3)) > + do_mmap_tests $block_size 4k 4090 30 true > +} > + > +_scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed" > +_scratch_mount > +test_file=$SCRATCH_MNT/file > +block_size=$(_get_file_block_size "$SCRATCH_MNT") > +test_block_size $block_size > + > +echo "Silence is golden" > +status=0 > +exit > diff --git a/tests/generic/749.out b/tests/generic/749.out > new file mode 100644 > index 000000000000..24658deddb99 > --- /dev/null > +++ b/tests/generic/749.out > @@ -0,0 +1,2 @@ > +QA output created by 749 > +Silence is golden > -- > 2.43.0 > >
On Wed, Jun 12, 2024 at 04:06:34PM +0800, Zorro Lang wrote: > On Mon, Jun 10, 2024 at 08:01:59PM -0700, Luis Chamberlain wrote: > > +mwrite() > > +{ > > + local file=$1 > > + local offset=$2 > > + local length=$3 > > + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } > > + > > + # Some callers expect xfs_io to crash with SIGBUS due to the mread, > > + # causing the shell to print "Bus error" to stderr. To allow this > > + # message to be redirected, execute xfs_io in a new shell instance. > > + # However, for this to work reliably, we also need to prevent the new > > + # shell instance from optimizing out the fork and directly exec'ing > > + # xfs_io. The easiest way to do that is to append 'true' to the > > + # commands, so that xfs_io is no longer the last command the shell sees. > > + bash -c "trap '' SIGBUS; ulimit -c 0; \ > > + $XFS_IO_PROG $file \ > > + -c 'mmap -w 0 $map_len' \ > > + -c 'mwrite $offset $length'; \ > > + true" > > +} > > As you've moved the _mread to common/rc, why not do the same for this mwrite? I didn't move it as this mwrite() is only used by one test. Let me know if you want me to move it. Luis
diff --git a/common/rc b/common/rc index fa7942809d6c..e812a2f7cc67 100644 --- a/common/rc +++ b/common/rc @@ -60,12 +60,15 @@ _round_up_to_page_boundary() echo $(( (n + page_size - 1) & ~(page_size - 1) )) } +# You can override the $map_len but its optional, by default we use the +# max allowed size. If you use a length greater than the default you can +# expect a SIBGUS and test for it. _mread() { local file=$1 local offset=$2 local length=$3 - local map_len=$(_round_up_to_page_boundary $(_get_filesize $file)) + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } # Some callers expect xfs_io to crash with SIGBUS due to the mread, # causing the shell to print "Bus error" to stderr. To allow this diff --git a/tests/generic/749 b/tests/generic/749 new file mode 100755 index 000000000000..c1d3a2028549 --- /dev/null +++ b/tests/generic/749 @@ -0,0 +1,238 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) Luis Chamberlain. All Rights Reserved. +# +# FS QA Test 749 +# +# As per POSIX NOTES mmap(2) maps multiples of the system page size, but if the +# data mapped is not multiples of the page size the remaining bytes are zeroed +# out when mapped and modifications to that region are not written to the file. +# On Linux when you write data to such partial page after the end of the +# object, the data stays in the page cache even after the file is closed and +# unmapped and even though the data is never written to the file itself, +# subsequent mappings may see the modified content. If you go *beyond* this +# page, you should get a SIGBUS. This test verifies we zero-fill to page +# boundary and ensures we get a SIGBUS if we write to data beyond the system +# page size even if the block size is greater than the system page size. +. ./common/preamble +. ./common/rc +_begin_fstest auto quick prealloc + +# Import common functions. +. ./common/filter + +# real QA test starts here +_supported_fs generic +_require_scratch_nocheck +_require_test +_require_xfs_io_command "truncate" +_require_xfs_io_command "falloc" + +# _fixed_by_git_commit kernel <pending-upstream> \ +# "filemap: cap PTE range to be created to allowed zero fill in folio_map_range()" + +filter_xfs_io_data_unique() +{ + _filter_xfs_io_offset | sed -e 's| |\n|g' | grep -E -v "\.|XX|\*" | \ + sort -u | tr -d '\n' +} + + +setup_zeroed_file() +{ + local file_len=$1 + local sparse=$2 + + if $sparse; then + $XFS_IO_PROG -f -c "truncate $file_len" $test_file + else + $XFS_IO_PROG -f -c "falloc 0 $file_len" $test_file + fi +} + +mwrite() +{ + local file=$1 + local offset=$2 + local length=$3 + local map_len=${4:-$(_round_up_to_page_boundary $(_get_filesize $file)) } + + # Some callers expect xfs_io to crash with SIGBUS due to the mread, + # causing the shell to print "Bus error" to stderr. To allow this + # message to be redirected, execute xfs_io in a new shell instance. + # However, for this to work reliably, we also need to prevent the new + # shell instance from optimizing out the fork and directly exec'ing + # xfs_io. The easiest way to do that is to append 'true' to the + # commands, so that xfs_io is no longer the last command the shell sees. + bash -c "trap '' SIGBUS; ulimit -c 0; \ + $XFS_IO_PROG $file \ + -c 'mmap -w 0 $map_len' \ + -c 'mwrite $offset $length'; \ + true" +} + +do_mmap_tests() +{ + local block_size=$1 + local file_len=$2 + local offset=$3 + local len=$4 + local use_sparse_file=${5:-false} + local new_filelen=0 + local map_len=0 + local csum=0 + local fs_block_size=$(_get_file_block_size $SCRATCH_MNT) + + echo -en "\n\n==> Testing blocksize $block_size " >> $seqres.full + echo -en "file_len: $file_len offset: $offset " >> $seqres.full + echo -e "len: $len sparse: $use_sparse_file" >> $seqres.full + + if ((fs_block_size != block_size)); then + _fail "Block size created ($block_size) doesn't match _get_file_block_size on mount ($fs_block_size)" + fi + + rm -rf "${SCRATCH_MNT:?}"/* + + # This let's us also test against sparse files + setup_zeroed_file $file_len $use_sparse_file + + # This will overwrite the old data, the file size is the + # delta between offset and len now. + $XFS_IO_PROG -f -c "pwrite -S 0xaa -b 512 $offset $len" \ + $test_file >> $seqres.full + + sync + new_filelen=$(_get_filesize $test_file) + map_len=$(_round_up_to_page_boundary $new_filelen) + csum_orig="$(_md5_checksum $test_file)" + + # A couple of mmap() tests: + # + # We are allowed to mmap() up to the boundary of the page size of a + # data object, but there a few rules to follow we must check for: + # + # a) zero-fill test for the data: POSIX says we should zero fill any + # partial page after the end of the object. Verify zero-fill. + # b) do not write this bogus data to disk: on Linux, if we write data + # to a partially filled page, it will stay in the page cache even + # after the file is closed and unmapped even if it never reaches the + # file. Subsequent mappings *may* see the modified content, but it + # also can get other data. Since the data read after the actual + # object data can vary we just verify the filesize does not change. + if [[ $map_len -gt $new_filelen ]]; then + zero_filled_data_len=$((map_len - new_filelen)) + _scratch_cycle_mount + expected_zero_data="00" + zero_filled_data=$($XFS_IO_PROG -r $test_file \ + -c "mmap -r 0 $map_len" \ + -c "mread -v $new_filelen $zero_filled_data_len" \ + -c "munmap" | \ + filter_xfs_io_data_unique) + if [[ "$zero_filled_data" != "$expected_zero_data" ]]; then + echo "Expected data: $expected_zero_data" + echo " Actual data: $zero_filled_data" + _fail "Zero-fill expectations with mmap() not respected" + fi + + _scratch_cycle_mount + $XFS_IO_PROG $test_file \ + -c "mmap -w 0 $map_len" \ + -c "mwrite $new_filelen $zero_filled_data_len" \ + -c "munmap" + sync + csum_post="$(_md5_checksum $test_file)" + if [[ "$csum_orig" != "$csum_post" ]]; then + echo "Expected csum: $csum_orig" + echo " Actual csum: $csum_post" + _fail "mmap() write up to page boundary should not change actual file contents" + fi + + local filelen_test=$(_get_filesize $test_file) + if [[ "$filelen_test" != "$new_filelen" ]]; then + echo "Expected file length: $new_filelen" + echo " Actual file length: $filelen_test" + _fail "mmap() write up to page boundary should not change actual file size" + fi + fi + + # Now lets ensure we get SIGBUS when we go beyond the page boundary + _scratch_cycle_mount + new_filelen=$(_get_filesize $test_file) + map_len=$(_round_up_to_page_boundary $new_filelen) + csum_orig="$(_md5_checksum $test_file)" + _mread $test_file 0 $map_len >> $seqres.full 2>$tmp.err + if grep -q 'Bus error' $tmp.err; then + cat $tmp.err + _fail "Not expecting SIGBUS when reading up to page boundary" + fi + + # This should just work + _mread $test_file 0 $map_len >> $seqres.full 2>$tmp.err + if [[ $? -ne 0 ]]; then + _fail "mmap() read up to page boundary should work" + fi + + # This should just work + mwrite $map_len 0 $map_len >> $seqres.full 2>$tmp.err + if [[ $? -ne 0 ]]; then + _fail "mmap() write up to page boundary should work" + fi + + # If we mmap() on the boundary but try to read beyond it just + # fails, we don't get a SIGBUS + $XFS_IO_PROG -r $test_file \ + -c "mmap -r 0 $map_len" \ + -c "mread 0 $((map_len + 10))" >> $seqres.full 2>$tmp.err + local mread_err=$? + if [[ $mread_err -eq 0 ]]; then + _fail "mmap() to page boundary works as expected but reading beyond should fail: $mread_err" + fi + + $XFS_IO_PROG -w $test_file \ + -c "mmap -w 0 $map_len" \ + -c "mwrite 0 $((map_len + 10))" >> $seqres.full 2>$tmp.err + local mwrite_err=$? + if [[ $mwrite_err -eq 0 ]]; then + _fail "mmap() to page boundary works as expected but writing beyond should fail: $mwrite_err" + fi + + # Now let's go beyond the allowed mmap() page boundary + _mread $test_file 0 $((map_len + 10)) $((map_len + 10)) >> $seqres.full 2>$tmp.err + if ! grep -q 'Bus error' $tmp.err; then + _fail "Expected SIGBUS when mmap() reading beyond page boundary" + fi + + mwrite $test_file 0 $((map_len + 10)) $((map_len + 10)) >> $seqres.full 2>$tmp.err + if ! grep -q 'Bus error' $tmp.err; then + _fail "Expected SIGBUS when mmap() writing beyond page boundary" + fi + + local filelen_test=$(_get_filesize $test_file) + if [[ "$filelen_test" != "$new_filelen" ]]; then + echo "Expected file length: $new_filelen" + echo " Actual file length: $filelen_test" + _fail "reading or writing beyond file size up to mmap() page boundary should not change file size" + fi +} + +test_block_size() +{ + local block_size=$1 + + do_mmap_tests $block_size 512 3 5 + do_mmap_tests $block_size 11k 0 $((4096 * 3 + 3)) + do_mmap_tests $block_size 16k 0 $((16384+3)) + do_mmap_tests $block_size 16k $((16384-10)) $((16384+20)) + do_mmap_tests $block_size 64k 0 $((65536+3)) + do_mmap_tests $block_size 4k 4090 30 true +} + +_scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed" +_scratch_mount +test_file=$SCRATCH_MNT/file +block_size=$(_get_file_block_size "$SCRATCH_MNT") +test_block_size $block_size + +echo "Silence is golden" +status=0 +exit diff --git a/tests/generic/749.out b/tests/generic/749.out new file mode 100644 index 000000000000..24658deddb99 --- /dev/null +++ b/tests/generic/749.out @@ -0,0 +1,2 @@ +QA output created by 749 +Silence is golden