diff mbox series

[f2fs-dev,v3] f2fs: prevent writing without fallocate() for pinned files

Message ID 20240319212316.4193790-1-daeho43@gmail.com (mailing list archive)
State Superseded
Headers show
Series [f2fs-dev,v3] f2fs: prevent writing without fallocate() for pinned files | expand

Commit Message

Daeho Jeong March 19, 2024, 9:23 p.m. UTC
From: Daeho Jeong <daehojeong@google.com>

In a case writing without fallocate(), we can't guarantee it's allocated
in the conventional area for zoned stroage.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
---
v2: covered the direct io case
v3: covered the mkwrite case
---
 fs/f2fs/data.c | 14 ++++++++++++--
 fs/f2fs/file.c | 16 ++++++++--------
 2 files changed, 20 insertions(+), 10 deletions(-)

Comments

Chao Yu March 20, 2024, 9:38 a.m. UTC | #1
On 2024/3/20 5:23, Daeho Jeong wrote:
> From: Daeho Jeong <daehojeong@google.com>
> 
> In a case writing without fallocate(), we can't guarantee it's allocated
> in the conventional area for zoned stroage.
> 
> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> ---
> v2: covered the direct io case
> v3: covered the mkwrite case
> ---
>   fs/f2fs/data.c | 14 ++++++++++++--
>   fs/f2fs/file.c | 16 ++++++++--------
>   2 files changed, 20 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index c21b92f18463..d3e5ab2736a6 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
>   
>   	/* use out-place-update for direct IO under LFS mode */
>   	if (map->m_may_create &&
> -	    (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
> -		if (unlikely(f2fs_cp_error(sbi))) {
> +	    (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
> +			 !f2fs_is_pinned_file(inode)))) {
> +		if (unlikely(f2fs_cp_error(sbi)) ||
> +		    (f2fs_is_pinned_file(inode) && is_hole &&
> +		     flag != F2FS_GET_BLOCK_PRE_DIO)) {
>   			err = -EIO;
>   			goto sync_out;
>   		}
> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>   		f2fs_map_lock(sbi, flag);
>   		locked = true;
>   	} else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
> +		if (f2fs_is_pinned_file(inode))
> +			return -EIO;
>   		f2fs_map_lock(sbi, flag);
>   		locked = true;
>   	}
> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>   
>   	if (!f2fs_lookup_read_extent_cache_block(inode, index,
>   						 &dn.data_blkaddr)) {
> +		if (f2fs_is_pinned_file(inode)) {
> +			err = -EIO;
> +			goto out;
> +		}
> +
>   		if (locked) {
>   			err = f2fs_reserve_block(&dn, index);
>   			goto out;
> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> index 82277e95c88f..4db3b21c804b 100644
> --- a/fs/f2fs/file.c
> +++ b/fs/f2fs/file.c
> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
>   	struct inode *inode = file_inode(vmf->vma->vm_file);
>   	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>   	struct dnode_of_data dn;
> -	bool need_alloc = true;
> +	bool need_alloc = !f2fs_is_pinned_file(inode);

Will this check races w/ pinfile get|set?

Thanks,

>   	int err = 0;
>   	vm_fault_t ret;
>   
> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
>   		goto out_sem;
>   	}
>   
> +	set_new_dnode(&dn, inode, NULL, NULL, 0);
>   	if (need_alloc) {
>   		/* block allocation */
> -		set_new_dnode(&dn, inode, NULL, NULL, 0);
>   		err = f2fs_get_block_locked(&dn, page->index);
> -	}
> -
> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> -	if (!need_alloc) {
> -		set_new_dnode(&dn, inode, NULL, NULL, 0);
> +	} else {
>   		err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
>   		f2fs_put_dnode(&dn);
>   	}
> -#endif
> +
>   	if (err) {
>   		unlock_page(page);
>   		goto out_sem;
> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
>   			return ret;
>   	}
>   
> +	/* For pinned files, it should be fallocate()-ed in advance. */
> +	if (f2fs_is_pinned_file(inode))
> +		return 0;
> +
>   	/* Do not preallocate blocks that will be written partially in 4KB. */
>   	map.m_lblk = F2FS_BLK_ALIGN(pos);
>   	map.m_len = F2FS_BYTES_TO_BLK(pos + count);
Daeho Jeong March 20, 2024, 5:42 p.m. UTC | #2
On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote:
>
> On 2024/3/20 5:23, Daeho Jeong wrote:
> > From: Daeho Jeong <daehojeong@google.com>
> >
> > In a case writing without fallocate(), we can't guarantee it's allocated
> > in the conventional area for zoned stroage.
> >
> > Signed-off-by: Daeho Jeong <daehojeong@google.com>
> > ---
> > v2: covered the direct io case
> > v3: covered the mkwrite case
> > ---
> >   fs/f2fs/data.c | 14 ++++++++++++--
> >   fs/f2fs/file.c | 16 ++++++++--------
> >   2 files changed, 20 insertions(+), 10 deletions(-)
> >
> > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> > index c21b92f18463..d3e5ab2736a6 100644
> > --- a/fs/f2fs/data.c
> > +++ b/fs/f2fs/data.c
> > @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
> >
> >       /* use out-place-update for direct IO under LFS mode */
> >       if (map->m_may_create &&
> > -         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
> > -             if (unlikely(f2fs_cp_error(sbi))) {
> > +         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
> > +                      !f2fs_is_pinned_file(inode)))) {
> > +             if (unlikely(f2fs_cp_error(sbi)) ||
> > +                 (f2fs_is_pinned_file(inode) && is_hole &&
> > +                  flag != F2FS_GET_BLOCK_PRE_DIO)) {
> >                       err = -EIO;
> >                       goto sync_out;
> >               }
> > @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> >               f2fs_map_lock(sbi, flag);
> >               locked = true;
> >       } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
> > +             if (f2fs_is_pinned_file(inode))
> > +                     return -EIO;
> >               f2fs_map_lock(sbi, flag);
> >               locked = true;
> >       }
> > @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> >
> >       if (!f2fs_lookup_read_extent_cache_block(inode, index,
> >                                                &dn.data_blkaddr)) {
> > +             if (f2fs_is_pinned_file(inode)) {
> > +                     err = -EIO;
> > +                     goto out;
> > +             }
> > +
> >               if (locked) {
> >                       err = f2fs_reserve_block(&dn, index);
> >                       goto out;
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index 82277e95c88f..4db3b21c804b 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
> >       struct inode *inode = file_inode(vmf->vma->vm_file);
> >       struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >       struct dnode_of_data dn;
> > -     bool need_alloc = true;
> > +     bool need_alloc = !f2fs_is_pinned_file(inode);
>
> Will this check races w/ pinfile get|set?

Do you mean "set/clear" case? I believe "set" case is okay, since we
can't set if the inode already has a data block. For "clear" case, I
believe mkwrite failure is okay in racy conditions caused by clearing
the pin flag. What do you think?

>
> Thanks,
>
> >       int err = 0;
> >       vm_fault_t ret;
> >
> > @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
> >               goto out_sem;
> >       }
> >
> > +     set_new_dnode(&dn, inode, NULL, NULL, 0);
> >       if (need_alloc) {
> >               /* block allocation */
> > -             set_new_dnode(&dn, inode, NULL, NULL, 0);
> >               err = f2fs_get_block_locked(&dn, page->index);
> > -     }
> > -
> > -#ifdef CONFIG_F2FS_FS_COMPRESSION
> > -     if (!need_alloc) {
> > -             set_new_dnode(&dn, inode, NULL, NULL, 0);
> > +     } else {
> >               err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
> >               f2fs_put_dnode(&dn);
> >       }
> > -#endif
> > +
> >       if (err) {
> >               unlock_page(page);
> >               goto out_sem;
> > @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> >                       return ret;
> >       }
> >
> > +     /* For pinned files, it should be fallocate()-ed in advance. */
> > +     if (f2fs_is_pinned_file(inode))
> > +             return 0;
> > +
> >       /* Do not preallocate blocks that will be written partially in 4KB. */
> >       map.m_lblk = F2FS_BLK_ALIGN(pos);
> >       map.m_len = F2FS_BYTES_TO_BLK(pos + count);
Chao Yu March 23, 2024, 4:26 a.m. UTC | #3
On 2024/3/21 1:42, Daeho Jeong wrote:
> On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote:
>>
>> On 2024/3/20 5:23, Daeho Jeong wrote:
>>> From: Daeho Jeong <daehojeong@google.com>
>>>
>>> In a case writing without fallocate(), we can't guarantee it's allocated
>>> in the conventional area for zoned stroage.
>>>
>>> Signed-off-by: Daeho Jeong <daehojeong@google.com>
>>> ---
>>> v2: covered the direct io case
>>> v3: covered the mkwrite case
>>> ---
>>>    fs/f2fs/data.c | 14 ++++++++++++--
>>>    fs/f2fs/file.c | 16 ++++++++--------
>>>    2 files changed, 20 insertions(+), 10 deletions(-)
>>>
>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>> index c21b92f18463..d3e5ab2736a6 100644
>>> --- a/fs/f2fs/data.c
>>> +++ b/fs/f2fs/data.c
>>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
>>>
>>>        /* use out-place-update for direct IO under LFS mode */
>>>        if (map->m_may_create &&
>>> -         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
>>> -             if (unlikely(f2fs_cp_error(sbi))) {
>>> +         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
>>> +                      !f2fs_is_pinned_file(inode)))) {
>>> +             if (unlikely(f2fs_cp_error(sbi)) ||
>>> +                 (f2fs_is_pinned_file(inode) && is_hole &&
>>> +                  flag != F2FS_GET_BLOCK_PRE_DIO)) {
>>>                        err = -EIO;
>>>                        goto sync_out;
>>>                }
>>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>>>                f2fs_map_lock(sbi, flag);
>>>                locked = true;
>>>        } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
>>> +             if (f2fs_is_pinned_file(inode))
>>> +                     return -EIO;
>>>                f2fs_map_lock(sbi, flag);
>>>                locked = true;
>>>        }
>>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>>>
>>>        if (!f2fs_lookup_read_extent_cache_block(inode, index,
>>>                                                 &dn.data_blkaddr)) {
>>> +             if (f2fs_is_pinned_file(inode)) {
>>> +                     err = -EIO;
>>> +                     goto out;
>>> +             }
>>> +
>>>                if (locked) {
>>>                        err = f2fs_reserve_block(&dn, index);
>>>                        goto out;
>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>> index 82277e95c88f..4db3b21c804b 100644
>>> --- a/fs/f2fs/file.c
>>> +++ b/fs/f2fs/file.c
>>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
>>>        struct inode *inode = file_inode(vmf->vma->vm_file);
>>>        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>        struct dnode_of_data dn;
>>> -     bool need_alloc = true;
>>> +     bool need_alloc = !f2fs_is_pinned_file(inode);
>>
>> Will this check races w/ pinfile get|set?
> 
> Do you mean "set/clear" case? I believe "set" case is okay, since we

Yup,

> can't set if the inode already has a data block. For "clear" case, I

However, we can set pinfile on written inode in regular block device:

	if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
		ret = -EFBIG;
		goto out;
	}

Should we add the logic only if blkzoned feture is enabled?

> believe mkwrite failure is okay in racy conditions caused by clearing
> the pin flag. What do you think?

Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to
avoid the race condition?

Thanks,

> 
>>
>> Thanks,
>>
>>>        int err = 0;
>>>        vm_fault_t ret;
>>>
>>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
>>>                goto out_sem;
>>>        }
>>>
>>> +     set_new_dnode(&dn, inode, NULL, NULL, 0);
>>>        if (need_alloc) {
>>>                /* block allocation */
>>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
>>>                err = f2fs_get_block_locked(&dn, page->index);
>>> -     }
>>> -
>>> -#ifdef CONFIG_F2FS_FS_COMPRESSION
>>> -     if (!need_alloc) {
>>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
>>> +     } else {
>>>                err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
>>>                f2fs_put_dnode(&dn);
>>>        }
>>> -#endif
>>> +
>>>        if (err) {
>>>                unlock_page(page);
>>>                goto out_sem;
>>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
>>>                        return ret;
>>>        }
>>>
>>> +     /* For pinned files, it should be fallocate()-ed in advance. */
>>> +     if (f2fs_is_pinned_file(inode))
>>> +             return 0;
>>> +
>>>        /* Do not preallocate blocks that will be written partially in 4KB. */
>>>        map.m_lblk = F2FS_BLK_ALIGN(pos);
>>>        map.m_len = F2FS_BYTES_TO_BLK(pos + count);
Daeho Jeong March 25, 2024, 3:02 p.m. UTC | #4
On Fri, Mar 22, 2024 at 9:26 PM Chao Yu <chao@kernel.org> wrote:
>
> On 2024/3/21 1:42, Daeho Jeong wrote:
> > On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote:
> >>
> >> On 2024/3/20 5:23, Daeho Jeong wrote:
> >>> From: Daeho Jeong <daehojeong@google.com>
> >>>
> >>> In a case writing without fallocate(), we can't guarantee it's allocated
> >>> in the conventional area for zoned stroage.
> >>>
> >>> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> >>> ---
> >>> v2: covered the direct io case
> >>> v3: covered the mkwrite case
> >>> ---
> >>>    fs/f2fs/data.c | 14 ++++++++++++--
> >>>    fs/f2fs/file.c | 16 ++++++++--------
> >>>    2 files changed, 20 insertions(+), 10 deletions(-)
> >>>
> >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>> index c21b92f18463..d3e5ab2736a6 100644
> >>> --- a/fs/f2fs/data.c
> >>> +++ b/fs/f2fs/data.c
> >>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
> >>>
> >>>        /* use out-place-update for direct IO under LFS mode */
> >>>        if (map->m_may_create &&
> >>> -         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
> >>> -             if (unlikely(f2fs_cp_error(sbi))) {
> >>> +         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
> >>> +                      !f2fs_is_pinned_file(inode)))) {
> >>> +             if (unlikely(f2fs_cp_error(sbi)) ||
> >>> +                 (f2fs_is_pinned_file(inode) && is_hole &&
> >>> +                  flag != F2FS_GET_BLOCK_PRE_DIO)) {
> >>>                        err = -EIO;
> >>>                        goto sync_out;
> >>>                }
> >>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> >>>                f2fs_map_lock(sbi, flag);
> >>>                locked = true;
> >>>        } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
> >>> +             if (f2fs_is_pinned_file(inode))
> >>> +                     return -EIO;
> >>>                f2fs_map_lock(sbi, flag);
> >>>                locked = true;
> >>>        }
> >>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> >>>
> >>>        if (!f2fs_lookup_read_extent_cache_block(inode, index,
> >>>                                                 &dn.data_blkaddr)) {
> >>> +             if (f2fs_is_pinned_file(inode)) {
> >>> +                     err = -EIO;
> >>> +                     goto out;
> >>> +             }
> >>> +
> >>>                if (locked) {
> >>>                        err = f2fs_reserve_block(&dn, index);
> >>>                        goto out;
> >>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>> index 82277e95c88f..4db3b21c804b 100644
> >>> --- a/fs/f2fs/file.c
> >>> +++ b/fs/f2fs/file.c
> >>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
> >>>        struct inode *inode = file_inode(vmf->vma->vm_file);
> >>>        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>        struct dnode_of_data dn;
> >>> -     bool need_alloc = true;
> >>> +     bool need_alloc = !f2fs_is_pinned_file(inode);
> >>
> >> Will this check races w/ pinfile get|set?
> >
> > Do you mean "set/clear" case? I believe "set" case is okay, since we
>
> Yup,
>
> > can't set if the inode already has a data block. For "clear" case, I
>
> However, we can set pinfile on written inode in regular block device:

You're right. I missed it. Maybe I think we should keep the concept
consistent across devices regardless of zoned storage support. How
about preventing file pinning for already written inodes across all
device types? I am changing the pinfile concept by allowing the users
to write on only fallocate()-ed space.

>
>         if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
>                 ret = -EFBIG;
>                 goto out;
>         }
>
> Should we add the logic only if blkzoned feture is enabled?
>
> > believe mkwrite failure is okay in racy conditions caused by clearing
> > the pin flag. What do you think?
>
> Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to
> avoid the race condition?
>
> Thanks,
>
> >
> >>
> >> Thanks,
> >>
> >>>        int err = 0;
> >>>        vm_fault_t ret;
> >>>
> >>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
> >>>                goto out_sem;
> >>>        }
> >>>
> >>> +     set_new_dnode(&dn, inode, NULL, NULL, 0);
> >>>        if (need_alloc) {
> >>>                /* block allocation */
> >>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
> >>>                err = f2fs_get_block_locked(&dn, page->index);
> >>> -     }
> >>> -
> >>> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> >>> -     if (!need_alloc) {
> >>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
> >>> +     } else {
> >>>                err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
> >>>                f2fs_put_dnode(&dn);
> >>>        }
> >>> -#endif
> >>> +
> >>>        if (err) {
> >>>                unlock_page(page);
> >>>                goto out_sem;
> >>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> >>>                        return ret;
> >>>        }
> >>>
> >>> +     /* For pinned files, it should be fallocate()-ed in advance. */
> >>> +     if (f2fs_is_pinned_file(inode))
> >>> +             return 0;
> >>> +
> >>>        /* Do not preallocate blocks that will be written partially in 4KB. */
> >>>        map.m_lblk = F2FS_BLK_ALIGN(pos);
> >>>        map.m_len = F2FS_BYTES_TO_BLK(pos + count);
Chao Yu March 26, 2024, 3:39 a.m. UTC | #5
On 2024/3/25 23:02, Daeho Jeong wrote:
> On Fri, Mar 22, 2024 at 9:26 PM Chao Yu <chao@kernel.org> wrote:
>>
>> On 2024/3/21 1:42, Daeho Jeong wrote:
>>> On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote:
>>>>
>>>> On 2024/3/20 5:23, Daeho Jeong wrote:
>>>>> From: Daeho Jeong <daehojeong@google.com>
>>>>>
>>>>> In a case writing without fallocate(), we can't guarantee it's allocated
>>>>> in the conventional area for zoned stroage.
>>>>>
>>>>> Signed-off-by: Daeho Jeong <daehojeong@google.com>
>>>>> ---
>>>>> v2: covered the direct io case
>>>>> v3: covered the mkwrite case
>>>>> ---
>>>>>     fs/f2fs/data.c | 14 ++++++++++++--
>>>>>     fs/f2fs/file.c | 16 ++++++++--------
>>>>>     2 files changed, 20 insertions(+), 10 deletions(-)
>>>>>
>>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
>>>>> index c21b92f18463..d3e5ab2736a6 100644
>>>>> --- a/fs/f2fs/data.c
>>>>> +++ b/fs/f2fs/data.c
>>>>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
>>>>>
>>>>>         /* use out-place-update for direct IO under LFS mode */
>>>>>         if (map->m_may_create &&
>>>>> -         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
>>>>> -             if (unlikely(f2fs_cp_error(sbi))) {
>>>>> +         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
>>>>> +                      !f2fs_is_pinned_file(inode)))) {
>>>>> +             if (unlikely(f2fs_cp_error(sbi)) ||
>>>>> +                 (f2fs_is_pinned_file(inode) && is_hole &&
>>>>> +                  flag != F2FS_GET_BLOCK_PRE_DIO)) {
>>>>>                         err = -EIO;
>>>>>                         goto sync_out;
>>>>>                 }
>>>>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>>>>>                 f2fs_map_lock(sbi, flag);
>>>>>                 locked = true;
>>>>>         } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
>>>>> +             if (f2fs_is_pinned_file(inode))
>>>>> +                     return -EIO;
>>>>>                 f2fs_map_lock(sbi, flag);
>>>>>                 locked = true;
>>>>>         }
>>>>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
>>>>>
>>>>>         if (!f2fs_lookup_read_extent_cache_block(inode, index,
>>>>>                                                  &dn.data_blkaddr)) {
>>>>> +             if (f2fs_is_pinned_file(inode)) {
>>>>> +                     err = -EIO;
>>>>> +                     goto out;
>>>>> +             }
>>>>> +
>>>>>                 if (locked) {
>>>>>                         err = f2fs_reserve_block(&dn, index);
>>>>>                         goto out;
>>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>>> index 82277e95c88f..4db3b21c804b 100644
>>>>> --- a/fs/f2fs/file.c
>>>>> +++ b/fs/f2fs/file.c
>>>>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
>>>>>         struct inode *inode = file_inode(vmf->vma->vm_file);
>>>>>         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
>>>>>         struct dnode_of_data dn;
>>>>> -     bool need_alloc = true;
>>>>> +     bool need_alloc = !f2fs_is_pinned_file(inode);
>>>>
>>>> Will this check races w/ pinfile get|set?
>>>
>>> Do you mean "set/clear" case? I believe "set" case is okay, since we
>>
>> Yup,
>>
>>> can't set if the inode already has a data block. For "clear" case, I
>>
>> However, we can set pinfile on written inode in regular block device:
> 
> You're right. I missed it. Maybe I think we should keep the concept
> consistent across devices regardless of zoned storage support. How
> about preventing file pinning for already written inodes across all
> device types? I am changing the pinfile concept by allowing the users

I'm okay with that, or we can tries to migrate data block of target file
from seq-zone to conv-zone or regular-device before setting it w/ pin flag...

Thanks,

> to write on only fallocate()-ed space.
> 
>>
>>          if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
>>                  ret = -EFBIG;
>>                  goto out;
>>          }
>>
>> Should we add the logic only if blkzoned feture is enabled?
>>
>>> believe mkwrite failure is okay in racy conditions caused by clearing
>>> the pin flag. What do you think?
>>
>> Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to
>> avoid the race condition?
>>
>> Thanks,
>>
>>>
>>>>
>>>> Thanks,
>>>>
>>>>>         int err = 0;
>>>>>         vm_fault_t ret;
>>>>>
>>>>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
>>>>>                 goto out_sem;
>>>>>         }
>>>>>
>>>>> +     set_new_dnode(&dn, inode, NULL, NULL, 0);
>>>>>         if (need_alloc) {
>>>>>                 /* block allocation */
>>>>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
>>>>>                 err = f2fs_get_block_locked(&dn, page->index);
>>>>> -     }
>>>>> -
>>>>> -#ifdef CONFIG_F2FS_FS_COMPRESSION
>>>>> -     if (!need_alloc) {
>>>>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
>>>>> +     } else {
>>>>>                 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
>>>>>                 f2fs_put_dnode(&dn);
>>>>>         }
>>>>> -#endif
>>>>> +
>>>>>         if (err) {
>>>>>                 unlock_page(page);
>>>>>                 goto out_sem;
>>>>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
>>>>>                         return ret;
>>>>>         }
>>>>>
>>>>> +     /* For pinned files, it should be fallocate()-ed in advance. */
>>>>> +     if (f2fs_is_pinned_file(inode))
>>>>> +             return 0;
>>>>> +
>>>>>         /* Do not preallocate blocks that will be written partially in 4KB. */
>>>>>         map.m_lblk = F2FS_BLK_ALIGN(pos);
>>>>>         map.m_len = F2FS_BYTES_TO_BLK(pos + count);
Daeho Jeong March 26, 2024, 5:34 p.m. UTC | #6
On Mon, Mar 25, 2024 at 8:39 PM Chao Yu <chao@kernel.org> wrote:
>
> On 2024/3/25 23:02, Daeho Jeong wrote:
> > On Fri, Mar 22, 2024 at 9:26 PM Chao Yu <chao@kernel.org> wrote:
> >>
> >> On 2024/3/21 1:42, Daeho Jeong wrote:
> >>> On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote:
> >>>>
> >>>> On 2024/3/20 5:23, Daeho Jeong wrote:
> >>>>> From: Daeho Jeong <daehojeong@google.com>
> >>>>>
> >>>>> In a case writing without fallocate(), we can't guarantee it's allocated
> >>>>> in the conventional area for zoned stroage.
> >>>>>
> >>>>> Signed-off-by: Daeho Jeong <daehojeong@google.com>
> >>>>> ---
> >>>>> v2: covered the direct io case
> >>>>> v3: covered the mkwrite case
> >>>>> ---
> >>>>>     fs/f2fs/data.c | 14 ++++++++++++--
> >>>>>     fs/f2fs/file.c | 16 ++++++++--------
> >>>>>     2 files changed, 20 insertions(+), 10 deletions(-)
> >>>>>
> >>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> >>>>> index c21b92f18463..d3e5ab2736a6 100644
> >>>>> --- a/fs/f2fs/data.c
> >>>>> +++ b/fs/f2fs/data.c
> >>>>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
> >>>>>
> >>>>>         /* use out-place-update for direct IO under LFS mode */
> >>>>>         if (map->m_may_create &&
> >>>>> -         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
> >>>>> -             if (unlikely(f2fs_cp_error(sbi))) {
> >>>>> +         (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
> >>>>> +                      !f2fs_is_pinned_file(inode)))) {
> >>>>> +             if (unlikely(f2fs_cp_error(sbi)) ||
> >>>>> +                 (f2fs_is_pinned_file(inode) && is_hole &&
> >>>>> +                  flag != F2FS_GET_BLOCK_PRE_DIO)) {
> >>>>>                         err = -EIO;
> >>>>>                         goto sync_out;
> >>>>>                 }
> >>>>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> >>>>>                 f2fs_map_lock(sbi, flag);
> >>>>>                 locked = true;
> >>>>>         } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
> >>>>> +             if (f2fs_is_pinned_file(inode))
> >>>>> +                     return -EIO;
> >>>>>                 f2fs_map_lock(sbi, flag);
> >>>>>                 locked = true;
> >>>>>         }
> >>>>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
> >>>>>
> >>>>>         if (!f2fs_lookup_read_extent_cache_block(inode, index,
> >>>>>                                                  &dn.data_blkaddr)) {
> >>>>> +             if (f2fs_is_pinned_file(inode)) {
> >>>>> +                     err = -EIO;
> >>>>> +                     goto out;
> >>>>> +             }
> >>>>> +
> >>>>>                 if (locked) {
> >>>>>                         err = f2fs_reserve_block(&dn, index);
> >>>>>                         goto out;
> >>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> >>>>> index 82277e95c88f..4db3b21c804b 100644
> >>>>> --- a/fs/f2fs/file.c
> >>>>> +++ b/fs/f2fs/file.c
> >>>>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
> >>>>>         struct inode *inode = file_inode(vmf->vma->vm_file);
> >>>>>         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> >>>>>         struct dnode_of_data dn;
> >>>>> -     bool need_alloc = true;
> >>>>> +     bool need_alloc = !f2fs_is_pinned_file(inode);
> >>>>
> >>>> Will this check races w/ pinfile get|set?
> >>>
> >>> Do you mean "set/clear" case? I believe "set" case is okay, since we
> >>
> >> Yup,
> >>
> >>> can't set if the inode already has a data block. For "clear" case, I
> >>
> >> However, we can set pinfile on written inode in regular block device:
> >
> > You're right. I missed it. Maybe I think we should keep the concept
> > consistent across devices regardless of zoned storage support. How
> > about preventing file pinning for already written inodes across all
> > device types? I am changing the pinfile concept by allowing the users
>
> I'm okay with that, or we can tries to migrate data block of target file
> from seq-zone to conv-zone or regular-device before setting it w/ pin flag...

I can't see lots of benefits by supporting file pinning for
pre-written inodes, while the design can become complicated. Since we
consolidate the way to support file pinning as fallocate() before
using it, we might as well not support pre-written inodes.

>
> Thanks,
>
> > to write on only fallocate()-ed space.
> >
> >>
> >>          if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) {
> >>                  ret = -EFBIG;
> >>                  goto out;
> >>          }
> >>
> >> Should we add the logic only if blkzoned feture is enabled?
> >>
> >>> believe mkwrite failure is okay in racy conditions caused by clearing
> >>> the pin flag. What do you think?
> >>
> >> Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to
> >> avoid the race condition?
> >>
> >> Thanks,
> >>
> >>>
> >>>>
> >>>> Thanks,
> >>>>
> >>>>>         int err = 0;
> >>>>>         vm_fault_t ret;
> >>>>>
> >>>>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
> >>>>>                 goto out_sem;
> >>>>>         }
> >>>>>
> >>>>> +     set_new_dnode(&dn, inode, NULL, NULL, 0);
> >>>>>         if (need_alloc) {
> >>>>>                 /* block allocation */
> >>>>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
> >>>>>                 err = f2fs_get_block_locked(&dn, page->index);
> >>>>> -     }
> >>>>> -
> >>>>> -#ifdef CONFIG_F2FS_FS_COMPRESSION
> >>>>> -     if (!need_alloc) {
> >>>>> -             set_new_dnode(&dn, inode, NULL, NULL, 0);
> >>>>> +     } else {
> >>>>>                 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
> >>>>>                 f2fs_put_dnode(&dn);
> >>>>>         }
> >>>>> -#endif
> >>>>> +
> >>>>>         if (err) {
> >>>>>                 unlock_page(page);
> >>>>>                 goto out_sem;
> >>>>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
> >>>>>                         return ret;
> >>>>>         }
> >>>>>
> >>>>> +     /* For pinned files, it should be fallocate()-ed in advance. */
> >>>>> +     if (f2fs_is_pinned_file(inode))
> >>>>> +             return 0;
> >>>>> +
> >>>>>         /* Do not preallocate blocks that will be written partially in 4KB. */
> >>>>>         map.m_lblk = F2FS_BLK_ALIGN(pos);
> >>>>>         map.m_len = F2FS_BYTES_TO_BLK(pos + count);
diff mbox series

Patch

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index c21b92f18463..d3e5ab2736a6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1584,8 +1584,11 @@  int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
 
 	/* use out-place-update for direct IO under LFS mode */
 	if (map->m_may_create &&
-	    (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
-		if (unlikely(f2fs_cp_error(sbi))) {
+	    (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
+			 !f2fs_is_pinned_file(inode)))) {
+		if (unlikely(f2fs_cp_error(sbi)) ||
+		    (f2fs_is_pinned_file(inode) && is_hole &&
+		     flag != F2FS_GET_BLOCK_PRE_DIO)) {
 			err = -EIO;
 			goto sync_out;
 		}
@@ -3378,6 +3381,8 @@  static int prepare_write_begin(struct f2fs_sb_info *sbi,
 		f2fs_map_lock(sbi, flag);
 		locked = true;
 	} else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+		if (f2fs_is_pinned_file(inode))
+			return -EIO;
 		f2fs_map_lock(sbi, flag);
 		locked = true;
 	}
@@ -3407,6 +3412,11 @@  static int prepare_write_begin(struct f2fs_sb_info *sbi,
 
 	if (!f2fs_lookup_read_extent_cache_block(inode, index,
 						 &dn.data_blkaddr)) {
+		if (f2fs_is_pinned_file(inode)) {
+			err = -EIO;
+			goto out;
+		}
+
 		if (locked) {
 			err = f2fs_reserve_block(&dn, index);
 			goto out;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 82277e95c88f..4db3b21c804b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -57,7 +57,7 @@  static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 	struct inode *inode = file_inode(vmf->vma->vm_file);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
-	bool need_alloc = true;
+	bool need_alloc = !f2fs_is_pinned_file(inode);
 	int err = 0;
 	vm_fault_t ret;
 
@@ -114,19 +114,15 @@  static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
 		goto out_sem;
 	}
 
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	if (need_alloc) {
 		/* block allocation */
-		set_new_dnode(&dn, inode, NULL, NULL, 0);
 		err = f2fs_get_block_locked(&dn, page->index);
-	}
-
-#ifdef CONFIG_F2FS_FS_COMPRESSION
-	if (!need_alloc) {
-		set_new_dnode(&dn, inode, NULL, NULL, 0);
+	} else {
 		err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
 		f2fs_put_dnode(&dn);
 	}
-#endif
+
 	if (err) {
 		unlock_page(page);
 		goto out_sem;
@@ -4611,6 +4607,10 @@  static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter,
 			return ret;
 	}
 
+	/* For pinned files, it should be fallocate()-ed in advance. */
+	if (f2fs_is_pinned_file(inode))
+		return 0;
+
 	/* Do not preallocate blocks that will be written partially in 4KB. */
 	map.m_lblk = F2FS_BLK_ALIGN(pos);
 	map.m_len = F2FS_BYTES_TO_BLK(pos + count);