diff mbox series

btrfs: add io_stats to sysfs for dio fallbacks

Message ID 20250121183751.201556-1-maharmstone@fb.com (mailing list archive)
State New
Headers show
Series btrfs: add io_stats to sysfs for dio fallbacks | expand

Commit Message

Mark Harmstone Jan. 21, 2025, 6:36 p.m. UTC
For O_DIRECT reads and writes, both the buffer address and the file offset
need to be aligned to the block size. Otherwise, btrfs falls back to
doing buffered I/O, which is probably not what you want. It also creates
portability issues, as not all filesystems do this.

Add a new sysfs entry io_stats, to record how many times DIO falls back
to doing buffered I/O. The intention is that once this is recorded, we
can investigate the programs running on any machine where this isn't 0.

Signed-off-by: Mark Harmstone <maharmstone@fb.com>
---
 fs/btrfs/direct-io.c |  6 ++++++
 fs/btrfs/file.c      |  9 +++++++++
 fs/btrfs/fs.h        |  8 ++++++++
 fs/btrfs/sysfs.c     | 15 +++++++++++++++
 4 files changed, 38 insertions(+)

Comments

Daniel Vacek Jan. 22, 2025, 7:42 a.m. UTC | #1
On Tue, 21 Jan 2025 at 19:38, Mark Harmstone <maharmstone@fb.com> wrote:
>
> For O_DIRECT reads and writes, both the buffer address and the file offset
> need to be aligned to the block size. Otherwise, btrfs falls back to
> doing buffered I/O, which is probably not what you want. It also creates
> portability issues, as not all filesystems do this.
>
> Add a new sysfs entry io_stats, to record how many times DIO falls back
> to doing buffered I/O. The intention is that once this is recorded, we
> can investigate the programs running on any machine where this isn't 0.

No one will understand what these stats actually mean unless this is
well documented somewhere.

And the more so these are not generic stats but btrfs specific.

So I'm wondering what other filesystems do in such a situation? Fail
with -EINVALID? Or issue a ratelimited WARNING?

Logging a warning is a very good starting point for an investigation
of the running program on a machine. Even more, the warning can point
you exactly to the offending task which the stats won't do as they are
anonymous in nature.

> Signed-off-by: Mark Harmstone <maharmstone@fb.com>
> ---
>  fs/btrfs/direct-io.c |  6 ++++++
>  fs/btrfs/file.c      |  9 +++++++++
>  fs/btrfs/fs.h        |  8 ++++++++
>  fs/btrfs/sysfs.c     | 15 +++++++++++++++
>  4 files changed, 38 insertions(+)
>
> diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
> index 8567af46e16f..d388acf10f47 100644
> --- a/fs/btrfs/direct-io.c
> +++ b/fs/btrfs/direct-io.c
> @@ -946,6 +946,12 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
>                 goto out;
>         }
>
> +       /*
> +        * We're falling back to buffered writes, increase the
> +        * dio_write_fallback counter.
> +        */
> +       atomic_inc(&fs_info->io_stats.dio_write_fallback);
> +
>         pos = iocb->ki_pos;
>         written_buffered = btrfs_buffered_write(iocb, from);
>         if (written_buffered < 0) {
> diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
> index 36f51c311bb1..f74091482cb6 100644
> --- a/fs/btrfs/file.c
> +++ b/fs/btrfs/file.c
> @@ -3644,10 +3644,19 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
>         ssize_t ret = 0;
>
>         if (iocb->ki_flags & IOCB_DIRECT) {
> +               struct btrfs_fs_info *fs_info;
> +
>                 ret = btrfs_direct_read(iocb, to);
>                 if (ret < 0 || !iov_iter_count(to) ||
>                     iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
>                         return ret;
> +
> +               /*
> +                * We're falling back to buffered reads, increase the
> +                * dio_read_fallback counter.
> +                */
> +               fs_info = BTRFS_I(iocb->ki_filp->f_inode)->root->fs_info;
> +               atomic_inc(&fs_info->io_stats.dio_read_fallback);
>         }
>
>         return filemap_read(iocb, to, ret);
> diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
> index b572d6b9730b..e659fb12cae6 100644
> --- a/fs/btrfs/fs.h
> +++ b/fs/btrfs/fs.h
> @@ -406,6 +406,12 @@ struct btrfs_commit_stats {
>         u64 total_commit_dur;
>  };
>
> +/* Store data about I/O stats, exported via sysfs. */
> +struct btrfs_io_stats {
> +       atomic_t dio_read_fallback;
> +       atomic_t dio_write_fallback;
> +};
> +
>  struct btrfs_fs_info {
>         u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
>         unsigned long flags;
> @@ -851,6 +857,8 @@ struct btrfs_fs_info {
>         /* Updates are not protected by any lock */
>         struct btrfs_commit_stats commit_stats;
>
> +       struct btrfs_io_stats io_stats;
> +
>         /*
>          * Last generation where we dropped a non-relocation root.
>          * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
> diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
> index 53b846d99ece..4dc772bc7e7b 100644
> --- a/fs/btrfs/sysfs.c
> +++ b/fs/btrfs/sysfs.c
> @@ -1526,6 +1526,20 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
>  BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
>               btrfs_bg_reclaim_threshold_store);
>
> +static ssize_t btrfs_io_stats_show(struct kobject *kobj,
> +                                  struct kobj_attribute *a, char *buf)
> +{
> +       struct btrfs_fs_info *fs_info = to_fs_info(kobj);
> +
> +       return sysfs_emit(buf,
> +                       "dio_read_fallback %u\n"
> +                       "dio_write_fallback %u\n",
> +                       atomic_read(&fs_info->io_stats.dio_read_fallback),
> +                       atomic_read(&fs_info->io_stats.dio_write_fallback));
> +}
> +
> +BTRFS_ATTR(, io_stats, btrfs_io_stats_show);
> +
>  #ifdef CONFIG_BTRFS_EXPERIMENTAL
>  static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
>                                        struct kobj_attribute *a, char *buf)
> @@ -1586,6 +1600,7 @@ static const struct attribute *btrfs_attrs[] = {
>         BTRFS_ATTR_PTR(, bg_reclaim_threshold),
>         BTRFS_ATTR_PTR(, commit_stats),
>         BTRFS_ATTR_PTR(, temp_fsid),
> +       BTRFS_ATTR_PTR(, io_stats),
>  #ifdef CONFIG_BTRFS_EXPERIMENTAL
>         BTRFS_ATTR_PTR(, offload_csum),
>  #endif
> --
> 2.45.2
>
>
Mark Harmstone Jan. 22, 2025, 11:35 a.m. UTC | #2
Thanks Daniel.

On 22/1/25 07:42, Daniel Vacek wrote:
 > On Tue, 21 Jan 2025 at 19:38, Mark Harmstone <maharmstone@fb.com> wrote:
 >>
 >> For O_DIRECT reads and writes, both the buffer address and the file 
offset
 >> need to be aligned to the block size. Otherwise, btrfs falls back to
 >> doing buffered I/O, which is probably not what you want. It also creates
 >> portability issues, as not all filesystems do this.
 >>
 >> Add a new sysfs entry io_stats, to record how many times DIO falls back
 >> to doing buffered I/O. The intention is that once this is recorded, we
 >> can investigate the programs running on any machine where this isn't 0.
 >
 > No one will understand what these stats actually mean unless this is
 > well documented somewhere.
 >
 > And the more so these are not generic stats but btrfs specific.

That's fine, I'll send a patch to Documentation/ch-sysfs.rst in 
btrfs-progs once this is in. That's what we have for commit_stats.

 > So I'm wondering what other filesystems do in such a situation? Fail
 > with -EINVALID? Or issue a ratelimited WARNING?

O_DIRECT isn't part of POSIX, so there's no standard. Ext4 seems to do 
something similar to btrfs. XFS has xfs_file_dio_write_unaligned(), 
which appears to somehow do unaligned DIO. Bcachefs fails with -EINVAL. 
Nobody issues a warning as far as I can see.

 > Logging a warning is a very good starting point for an investigation
 > of the running program on a machine. Even more, the warning can point
 > you exactly to the offending task which the stats won't do as they are
 > anonymous in nature.

But then you get a closed-source program that does unaligned O_DIRECT
I/O, and now you have dmesg telling you about a problem you can't fix.

I believe btrfs' DIO fallback is more or less what Jens' proposed 
RWF_UNCACHED patches allow you to do intentionally, so it's not that 
there's not a legitimate use case for it. It's just that it's nearly 
always a programmer mistake.

Mark
Daniel Vacek Jan. 22, 2025, 1:02 p.m. UTC | #3
On Wed, 22 Jan 2025 at 12:35, Mark Harmstone <maharmstone@meta.com> wrote:
>
> Thanks Daniel.
>
> On 22/1/25 07:42, Daniel Vacek wrote:
>  > On Tue, 21 Jan 2025 at 19:38, Mark Harmstone <maharmstone@fb.com> wrote:
>  >>
>  >> For O_DIRECT reads and writes, both the buffer address and the file
> offset
>  >> need to be aligned to the block size. Otherwise, btrfs falls back to
>  >> doing buffered I/O, which is probably not what you want. It also creates
>  >> portability issues, as not all filesystems do this.
>  >>
>  >> Add a new sysfs entry io_stats, to record how many times DIO falls back
>  >> to doing buffered I/O. The intention is that once this is recorded, we
>  >> can investigate the programs running on any machine where this isn't 0.
>  >
>  > No one will understand what these stats actually mean unless this is
>  > well documented somewhere.
>  >
>  > And the more so these are not generic stats but btrfs specific.
>
> That's fine, I'll send a patch to Documentation/ch-sysfs.rst in
> btrfs-progs once this is in. That's what we have for commit_stats.
>
>  > So I'm wondering what other filesystems do in such a situation? Fail
>  > with -EINVALID? Or issue a ratelimited WARNING?
>
> O_DIRECT isn't part of POSIX, so there's no standard. Ext4 seems to do
> something similar to btrfs. XFS has xfs_file_dio_write_unaligned(),
> which appears to somehow do unaligned DIO. Bcachefs fails with -EINVAL.
> Nobody issues a warning as far as I can see.

I mean that also can be improved. Or btrfs can just do better than the others.

Maybe a warning is a bit too much in this case, I'm not really sure. I
just offered an idea to consider, coz silently falling back to cached
IO without letting the user know does not seem right.

Well, the question is - If you take it from the app developer point of
view. What's the reason to explicitly ask for DIO in the first place?
Would you like to know (would you care) it falled back to a cached
access because basically a wrong usage? I think that is the important
question here.

Imagine the developer wants to improve the performance so they change
to DIO. But btrfs will never actually end up doing DIO due to the
silent fallback. They benchmark. They get the same results. They
conclude that DIO won't make a difference and it is not worth pursuing
further. Just because not knowing they only missed an offset.

That's actually where we are right now and what you are trying to
improve, right?

What is more likely to be noticed? A warning in the log or some
counter some particular filesystem exposes somewhere no one even knows
exists.

And don't get me wrong, I'm not against the stats. I think both the
stats and the warning can be useful here. The warning can even guide
you to eventually check/monitor the stats.

>  > Logging a warning is a very good starting point for an investigation
>  > of the running program on a machine. Even more, the warning can point
>  > you exactly to the offending task which the stats won't do as they are
>  > anonymous in nature.
>
> But then you get a closed-source program that does unaligned O_DIRECT
> I/O, and now you have dmesg telling you about a problem you can't fix.

That's why I mentioned rate limiting. Actually in this case a
WARN_ONCE may be the best approach. I think that that is the usual
solution in various parts of the kernel. And I think it's always good
to know about a problem, even when you cannot fix it.

If you can't fix that problem the stat counters would be as useful as
the warning, IMO. Just more hidden.
And well, you can always reach out to the vendor of that closed source
app asking to address the issue.

The only problem would be some legacy SW where the original vendor no
longer exists. But in that case you can choose to ignore the warning,
especially if only issued once.

On the other hand, developing a new SW one may not even realize the
DIO is not aligned (by a mistake or by a bug) without getting a
warning. So being a bit more verbose seems really useful to me.

> I believe btrfs' DIO fallback is more or less what Jens' proposed
> RWF_UNCACHED patches allow you to do intentionally, so it's not that
> there's not a legitimate use case for it. It's just that it's nearly
> always a programmer mistake.

Agreed, and that's exactly where I think the warning is the most useful, IMO.
That's also why compilers shout warnings for many common mistakes,
right? Just my 2c.

> Mark
>
David Sterba Jan. 22, 2025, 3:23 p.m. UTC | #4
On Tue, Jan 21, 2025 at 06:36:53PM +0000, Mark Harmstone wrote:
> For O_DIRECT reads and writes, both the buffer address and the file offset
> need to be aligned to the block size. Otherwise, btrfs falls back to
> doing buffered I/O, which is probably not what you want. It also creates
> portability issues, as not all filesystems do this.

One thing is to track io stats, we can probably do that in general. The
buffered fallback of misaligned direct io has been with us for ages.
That the fallback is silent (and what we want) is intentional as it
makes direct io work on compressed files.

So you may be specifically interested in catching the misaligned dio
requests.

> Add a new sysfs entry io_stats, to record how many times DIO falls back
> to doing buffered I/O. The intention is that once this is recorded, we
> can investigate the programs running on any machine where this isn't 0.

Tracking just one number per whole filesystem seems quite limited in
narrowing down the problem to an application. With compression enabled,
anywhere in the filesystem and then doing a direct io will make it hard.
I think you'll get more exact results by a targeted tool, BPF based or
such.
Mark Harmstone Jan. 27, 2025, 6:06 p.m. UTC | #5
On 22/1/25 15:23, David Sterba wrote:
> On Tue, Jan 21, 2025 at 06:36:53PM +0000, Mark Harmstone wrote:
>> For O_DIRECT reads and writes, both the buffer address and the file offset
>> need to be aligned to the block size. Otherwise, btrfs falls back to
>> doing buffered I/O, which is probably not what you want. It also creates
>> portability issues, as not all filesystems do this.
> 
> One thing is to track io stats, we can probably do that in general. The
> buffered fallback of misaligned direct io has been with us for ages.
> That the fallback is silent (and what we want) is intentional as it
> makes direct io work on compressed files.
> 
> So you may be specifically interested in catching the misaligned dio
> requests.
> 
>> Add a new sysfs entry io_stats, to record how many times DIO falls back
>> to doing buffered I/O. The intention is that once this is recorded, we
>> can investigate the programs running on any machine where this isn't 0.
> 
> Tracking just one number per whole filesystem seems quite limited in
> narrowing down the problem to an application. With compression enabled,
> anywhere in the filesystem and then doing a direct io will make it hard.
> I think you'll get more exact results by a targeted tool, BPF based or
> such.

Thanks. From a Meta viewpoint, my thinking was to add this sysfs entry 
so that we can get a broad view of the problem using our internal 
monitoring tools, then do something BPF-related to identify the worst 
offenders.
Mark Harmstone Jan. 27, 2025, 6:11 p.m. UTC | #6
On 22/1/25 13:02, Daniel Vacek wrote:
> > 
> On Wed, 22 Jan 2025 at 12:35, Mark Harmstone <maharmstone@meta.com> wrote:
>>
>> Thanks Daniel.
>>
>> On 22/1/25 07:42, Daniel Vacek wrote:
>>   > On Tue, 21 Jan 2025 at 19:38, Mark Harmstone <maharmstone@fb.com> wrote:
>>   >>
>>   >> For O_DIRECT reads and writes, both the buffer address and the file
>> offset
>>   >> need to be aligned to the block size. Otherwise, btrfs falls back to
>>   >> doing buffered I/O, which is probably not what you want. It also creates
>>   >> portability issues, as not all filesystems do this.
>>   >>
>>   >> Add a new sysfs entry io_stats, to record how many times DIO falls back
>>   >> to doing buffered I/O. The intention is that once this is recorded, we
>>   >> can investigate the programs running on any machine where this isn't 0.
>>   >
>>   > No one will understand what these stats actually mean unless this is
>>   > well documented somewhere.
>>   >
>>   > And the more so these are not generic stats but btrfs specific.
>>
>> That's fine, I'll send a patch to Documentation/ch-sysfs.rst in
>> btrfs-progs once this is in. That's what we have for commit_stats.
>>
>>   > So I'm wondering what other filesystems do in such a situation? Fail
>>   > with -EINVALID? Or issue a ratelimited WARNING?
>>
>> O_DIRECT isn't part of POSIX, so there's no standard. Ext4 seems to do
>> something similar to btrfs. XFS has xfs_file_dio_write_unaligned(),
>> which appears to somehow do unaligned DIO. Bcachefs fails with -EINVAL.
>> Nobody issues a warning as far as I can see.
> 
> I mean that also can be improved. Or btrfs can just do better than the others.
> 
> Maybe a warning is a bit too much in this case, I'm not really sure. I
> just offered an idea to consider, coz silently falling back to cached
> IO without letting the user know does not seem right.
> 
> Well, the question is - If you take it from the app developer point of
> view. What's the reason to explicitly ask for DIO in the first place?
> Would you like to know (would you care) it falled back to a cached
> access because basically a wrong usage? I think that is the important
> question here.
> 
> Imagine the developer wants to improve the performance so they change
> to DIO. But btrfs will never actually end up doing DIO due to the
> silent fallback. They benchmark. They get the same results. They
> conclude that DIO won't make a difference and it is not worth pursuing
> further. Just because not knowing they only missed an offset.
> 
> That's actually where we are right now and what you are trying to
> improve, right?
> 
> What is more likely to be noticed? A warning in the log or some
> counter some particular filesystem exposes somewhere no one even knows
> exists.
> 
> And don't get me wrong, I'm not against the stats. I think both the
> stats and the warning can be useful here. The warning can even guide
> you to eventually check/monitor the stats.
> 
>>   > Logging a warning is a very good starting point for an investigation
>>   > of the running program on a machine. Even more, the warning can point
>>   > you exactly to the offending task which the stats won't do as they are
>>   > anonymous in nature.
>>
>> But then you get a closed-source program that does unaligned O_DIRECT
>> I/O, and now you have dmesg telling you about a problem you can't fix.
> 
> That's why I mentioned rate limiting. Actually in this case a
> WARN_ONCE may be the best approach. I think that that is the usual
> solution in various parts of the kernel. And I think it's always good
> to know about a problem, even when you cannot fix it.
> 
> If you can't fix that problem the stat counters would be as useful as
> the warning, IMO. Just more hidden.
> And well, you can always reach out to the vendor of that closed source
> app asking to address the issue.
> 
> The only problem would be some legacy SW where the original vendor no
> longer exists. But in that case you can choose to ignore the warning,
> especially if only issued once.
> 
> On the other hand, developing a new SW one may not even realize the
> DIO is not aligned (by a mistake or by a bug) without getting a
> warning. So being a bit more verbose seems really useful to me.
> 
>> I believe btrfs' DIO fallback is more or less what Jens' proposed
>> RWF_UNCACHED patches allow you to do intentionally, so it's not that
>> there's not a legitimate use case for it. It's just that it's nearly
>> always a programmer mistake.
> 
> Agreed, and that's exactly where I think the warning is the most useful, IMO.
> That's also why compilers shout warnings for many common mistakes,
> right? Just my 2c.
> 
>> Mark
>>

My concern is that putting it in dmesg will add to the overhead of 
distro maintainers, i.e. having to explain to end-users how a warning is 
harmless. And having once had to write a syslog rule to work around 
Citrix's chattiness, I'm not eager to inflict that on anybody else.

Plus any given piece of software will have far more users than 
developers; the warning is going to be noise for the vast majority of 
people.
Daniel Vacek Jan. 27, 2025, 6:58 p.m. UTC | #7
On Mon, 27 Jan 2025 at 19:11, Mark Harmstone <maharmstone@meta.com> wrote:
>
> On 22/1/25 13:02, Daniel Vacek wrote:
> > >
> > On Wed, 22 Jan 2025 at 12:35, Mark Harmstone <maharmstone@meta.com> wrote:
> >>
> >> Thanks Daniel.
> >>
> >> On 22/1/25 07:42, Daniel Vacek wrote:
> >>   > On Tue, 21 Jan 2025 at 19:38, Mark Harmstone <maharmstone@fb.com> wrote:
> >>   >>
> >>   >> For O_DIRECT reads and writes, both the buffer address and the file
> >> offset
> >>   >> need to be aligned to the block size. Otherwise, btrfs falls back to
> >>   >> doing buffered I/O, which is probably not what you want. It also creates
> >>   >> portability issues, as not all filesystems do this.
> >>   >>
> >>   >> Add a new sysfs entry io_stats, to record how many times DIO falls back
> >>   >> to doing buffered I/O. The intention is that once this is recorded, we
> >>   >> can investigate the programs running on any machine where this isn't 0.
> >>   >
> >>   > No one will understand what these stats actually mean unless this is
> >>   > well documented somewhere.
> >>   >
> >>   > And the more so these are not generic stats but btrfs specific.
> >>
> >> That's fine, I'll send a patch to Documentation/ch-sysfs.rst in
> >> btrfs-progs once this is in. That's what we have for commit_stats.
> >>
> >>   > So I'm wondering what other filesystems do in such a situation? Fail
> >>   > with -EINVALID? Or issue a ratelimited WARNING?
> >>
> >> O_DIRECT isn't part of POSIX, so there's no standard. Ext4 seems to do
> >> something similar to btrfs. XFS has xfs_file_dio_write_unaligned(),
> >> which appears to somehow do unaligned DIO. Bcachefs fails with -EINVAL.
> >> Nobody issues a warning as far as I can see.
> >
> > I mean that also can be improved. Or btrfs can just do better than the others.
> >
> > Maybe a warning is a bit too much in this case, I'm not really sure. I
> > just offered an idea to consider, coz silently falling back to cached
> > IO without letting the user know does not seem right.
> >
> > Well, the question is - If you take it from the app developer point of
> > view. What's the reason to explicitly ask for DIO in the first place?
> > Would you like to know (would you care) it falled back to a cached
> > access because basically a wrong usage? I think that is the important
> > question here.
> >
> > Imagine the developer wants to improve the performance so they change
> > to DIO. But btrfs will never actually end up doing DIO due to the
> > silent fallback. They benchmark. They get the same results. They
> > conclude that DIO won't make a difference and it is not worth pursuing
> > further. Just because not knowing they only missed an offset.
> >
> > That's actually where we are right now and what you are trying to
> > improve, right?
> >
> > What is more likely to be noticed? A warning in the log or some
> > counter some particular filesystem exposes somewhere no one even knows
> > exists.
> >
> > And don't get me wrong, I'm not against the stats. I think both the
> > stats and the warning can be useful here. The warning can even guide
> > you to eventually check/monitor the stats.
> >
> >>   > Logging a warning is a very good starting point for an investigation
> >>   > of the running program on a machine. Even more, the warning can point
> >>   > you exactly to the offending task which the stats won't do as they are
> >>   > anonymous in nature.
> >>
> >> But then you get a closed-source program that does unaligned O_DIRECT
> >> I/O, and now you have dmesg telling you about a problem you can't fix.
> >
> > That's why I mentioned rate limiting. Actually in this case a
> > WARN_ONCE may be the best approach. I think that that is the usual
> > solution in various parts of the kernel. And I think it's always good
> > to know about a problem, even when you cannot fix it.
> >
> > If you can't fix that problem the stat counters would be as useful as
> > the warning, IMO. Just more hidden.
> > And well, you can always reach out to the vendor of that closed source
> > app asking to address the issue.
> >
> > The only problem would be some legacy SW where the original vendor no
> > longer exists. But in that case you can choose to ignore the warning,
> > especially if only issued once.
> >
> > On the other hand, developing a new SW one may not even realize the
> > DIO is not aligned (by a mistake or by a bug) without getting a
> > warning. So being a bit more verbose seems really useful to me.
> >
> >> I believe btrfs' DIO fallback is more or less what Jens' proposed
> >> RWF_UNCACHED patches allow you to do intentionally, so it's not that
> >> there's not a legitimate use case for it. It's just that it's nearly
> >> always a programmer mistake.
> >
> > Agreed, and that's exactly where I think the warning is the most useful, IMO.
> > That's also why compilers shout warnings for many common mistakes,
> > right? Just my 2c.
> >
> >> Mark
> >>
>
> My concern is that putting it in dmesg will add to the overhead of
> distro maintainers, i.e. having to explain to end-users how a warning is
> harmless. And having once had to write a syslog rule to work around
> Citrix's chattiness, I'm not eager to inflict that on anybody else.

Understood. Luckily nowadays this is mostly covered by LLM chatbots.
Anyways, maybe a warning is a bit too strict. Even though it's
targeted on developers, not on users. Still, a misaligned DIO seems
worth reporting one way or another so that the 'buggy' application can
be fixed. And usually the more visible the issue is the faster and
more likely it is to be addressed.

Have you also considered a tracing point or a dynamic debug print alternatives?

> Plus any given piece of software will have far more users than
> developers; the warning is going to be noise for the vast majority of
> people.

As is basically any other line from the bootup. This applies
generally. I don't see one additional line making a huge difference
really.
diff mbox series

Patch

diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c
index 8567af46e16f..d388acf10f47 100644
--- a/fs/btrfs/direct-io.c
+++ b/fs/btrfs/direct-io.c
@@ -946,6 +946,12 @@  ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 		goto out;
 	}
 
+	/*
+	 * We're falling back to buffered writes, increase the
+	 * dio_write_fallback counter.
+	 */
+	atomic_inc(&fs_info->io_stats.dio_write_fallback);
+
 	pos = iocb->ki_pos;
 	written_buffered = btrfs_buffered_write(iocb, from);
 	if (written_buffered < 0) {
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 36f51c311bb1..f74091482cb6 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3644,10 +3644,19 @@  static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	ssize_t ret = 0;
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
+		struct btrfs_fs_info *fs_info;
+
 		ret = btrfs_direct_read(iocb, to);
 		if (ret < 0 || !iov_iter_count(to) ||
 		    iocb->ki_pos >= i_size_read(file_inode(iocb->ki_filp)))
 			return ret;
+
+		/*
+		 * We're falling back to buffered reads, increase the
+		 * dio_read_fallback counter.
+		 */
+		fs_info = BTRFS_I(iocb->ki_filp->f_inode)->root->fs_info;
+		atomic_inc(&fs_info->io_stats.dio_read_fallback);
 	}
 
 	return filemap_read(iocb, to, ret);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index b572d6b9730b..e659fb12cae6 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -406,6 +406,12 @@  struct btrfs_commit_stats {
 	u64 total_commit_dur;
 };
 
+/* Store data about I/O stats, exported via sysfs. */
+struct btrfs_io_stats {
+	atomic_t dio_read_fallback;
+	atomic_t dio_write_fallback;
+};
+
 struct btrfs_fs_info {
 	u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
 	unsigned long flags;
@@ -851,6 +857,8 @@  struct btrfs_fs_info {
 	/* Updates are not protected by any lock */
 	struct btrfs_commit_stats commit_stats;
 
+	struct btrfs_io_stats io_stats;
+
 	/*
 	 * Last generation where we dropped a non-relocation root.
 	 * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 53b846d99ece..4dc772bc7e7b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -1526,6 +1526,20 @@  static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
 BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
 	      btrfs_bg_reclaim_threshold_store);
 
+static ssize_t btrfs_io_stats_show(struct kobject *kobj,
+				   struct kobj_attribute *a, char *buf)
+{
+	struct btrfs_fs_info *fs_info = to_fs_info(kobj);
+
+	return sysfs_emit(buf,
+			"dio_read_fallback %u\n"
+			"dio_write_fallback %u\n",
+			atomic_read(&fs_info->io_stats.dio_read_fallback),
+			atomic_read(&fs_info->io_stats.dio_write_fallback));
+}
+
+BTRFS_ATTR(, io_stats, btrfs_io_stats_show);
+
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
 static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
 				       struct kobj_attribute *a, char *buf)
@@ -1586,6 +1600,7 @@  static const struct attribute *btrfs_attrs[] = {
 	BTRFS_ATTR_PTR(, bg_reclaim_threshold),
 	BTRFS_ATTR_PTR(, commit_stats),
 	BTRFS_ATTR_PTR(, temp_fsid),
+	BTRFS_ATTR_PTR(, io_stats),
 #ifdef CONFIG_BTRFS_EXPERIMENTAL
 	BTRFS_ATTR_PTR(, offload_csum),
 #endif