Message ID | 20240319212316.4193790-1-daeho43@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | [f2fs-dev,v3] f2fs: prevent writing without fallocate() for pinned files | expand |
On 2024/3/20 5:23, Daeho Jeong wrote: > From: Daeho Jeong <daehojeong@google.com> > > In a case writing without fallocate(), we can't guarantee it's allocated > in the conventional area for zoned stroage. > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > --- > v2: covered the direct io case > v3: covered the mkwrite case > --- > fs/f2fs/data.c | 14 ++++++++++++-- > fs/f2fs/file.c | 16 ++++++++-------- > 2 files changed, 20 insertions(+), 10 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index c21b92f18463..d3e5ab2736a6 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) > > /* use out-place-update for direct IO under LFS mode */ > if (map->m_may_create && > - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { > - if (unlikely(f2fs_cp_error(sbi))) { > + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && > + !f2fs_is_pinned_file(inode)))) { > + if (unlikely(f2fs_cp_error(sbi)) || > + (f2fs_is_pinned_file(inode) && is_hole && > + flag != F2FS_GET_BLOCK_PRE_DIO)) { > err = -EIO; > goto sync_out; > } > @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > f2fs_map_lock(sbi, flag); > locked = true; > } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { > + if (f2fs_is_pinned_file(inode)) > + return -EIO; > f2fs_map_lock(sbi, flag); > locked = true; > } > @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > &dn.data_blkaddr)) { > + if (f2fs_is_pinned_file(inode)) { > + err = -EIO; > + goto out; > + } > + > if (locked) { > err = f2fs_reserve_block(&dn, index); > goto out; > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 82277e95c88f..4db3b21c804b 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > struct inode *inode = file_inode(vmf->vma->vm_file); > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > struct dnode_of_data dn; > - bool need_alloc = true; > + bool need_alloc = !f2fs_is_pinned_file(inode); Will this check races w/ pinfile get|set? Thanks, > int err = 0; > vm_fault_t ret; > > @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > goto out_sem; > } > > + set_new_dnode(&dn, inode, NULL, NULL, 0); > if (need_alloc) { > /* block allocation */ > - set_new_dnode(&dn, inode, NULL, NULL, 0); > err = f2fs_get_block_locked(&dn, page->index); > - } > - > -#ifdef CONFIG_F2FS_FS_COMPRESSION > - if (!need_alloc) { > - set_new_dnode(&dn, inode, NULL, NULL, 0); > + } else { > err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); > f2fs_put_dnode(&dn); > } > -#endif > + > if (err) { > unlock_page(page); > goto out_sem; > @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > return ret; > } > > + /* For pinned files, it should be fallocate()-ed in advance. */ > + if (f2fs_is_pinned_file(inode)) > + return 0; > + > /* Do not preallocate blocks that will be written partially in 4KB. */ > map.m_lblk = F2FS_BLK_ALIGN(pos); > map.m_len = F2FS_BYTES_TO_BLK(pos + count);
On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote: > > On 2024/3/20 5:23, Daeho Jeong wrote: > > From: Daeho Jeong <daehojeong@google.com> > > > > In a case writing without fallocate(), we can't guarantee it's allocated > > in the conventional area for zoned stroage. > > > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > > --- > > v2: covered the direct io case > > v3: covered the mkwrite case > > --- > > fs/f2fs/data.c | 14 ++++++++++++-- > > fs/f2fs/file.c | 16 ++++++++-------- > > 2 files changed, 20 insertions(+), 10 deletions(-) > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > > index c21b92f18463..d3e5ab2736a6 100644 > > --- a/fs/f2fs/data.c > > +++ b/fs/f2fs/data.c > > @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) > > > > /* use out-place-update for direct IO under LFS mode */ > > if (map->m_may_create && > > - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { > > - if (unlikely(f2fs_cp_error(sbi))) { > > + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && > > + !f2fs_is_pinned_file(inode)))) { > > + if (unlikely(f2fs_cp_error(sbi)) || > > + (f2fs_is_pinned_file(inode) && is_hole && > > + flag != F2FS_GET_BLOCK_PRE_DIO)) { > > err = -EIO; > > goto sync_out; > > } > > @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > f2fs_map_lock(sbi, flag); > > locked = true; > > } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { > > + if (f2fs_is_pinned_file(inode)) > > + return -EIO; > > f2fs_map_lock(sbi, flag); > > locked = true; > > } > > @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > > > > if (!f2fs_lookup_read_extent_cache_block(inode, index, > > &dn.data_blkaddr)) { > > + if (f2fs_is_pinned_file(inode)) { > > + err = -EIO; > > + goto out; > > + } > > + > > if (locked) { > > err = f2fs_reserve_block(&dn, index); > > goto out; > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > > index 82277e95c88f..4db3b21c804b 100644 > > --- a/fs/f2fs/file.c > > +++ b/fs/f2fs/file.c > > @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > > struct inode *inode = file_inode(vmf->vma->vm_file); > > struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > struct dnode_of_data dn; > > - bool need_alloc = true; > > + bool need_alloc = !f2fs_is_pinned_file(inode); > > Will this check races w/ pinfile get|set? Do you mean "set/clear" case? I believe "set" case is okay, since we can't set if the inode already has a data block. For "clear" case, I believe mkwrite failure is okay in racy conditions caused by clearing the pin flag. What do you think? > > Thanks, > > > int err = 0; > > vm_fault_t ret; > > > > @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > > goto out_sem; > > } > > > > + set_new_dnode(&dn, inode, NULL, NULL, 0); > > if (need_alloc) { > > /* block allocation */ > > - set_new_dnode(&dn, inode, NULL, NULL, 0); > > err = f2fs_get_block_locked(&dn, page->index); > > - } > > - > > -#ifdef CONFIG_F2FS_FS_COMPRESSION > > - if (!need_alloc) { > > - set_new_dnode(&dn, inode, NULL, NULL, 0); > > + } else { > > err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); > > f2fs_put_dnode(&dn); > > } > > -#endif > > + > > if (err) { > > unlock_page(page); > > goto out_sem; > > @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > > return ret; > > } > > > > + /* For pinned files, it should be fallocate()-ed in advance. */ > > + if (f2fs_is_pinned_file(inode)) > > + return 0; > > + > > /* Do not preallocate blocks that will be written partially in 4KB. */ > > map.m_lblk = F2FS_BLK_ALIGN(pos); > > map.m_len = F2FS_BYTES_TO_BLK(pos + count);
On 2024/3/21 1:42, Daeho Jeong wrote: > On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote: >> >> On 2024/3/20 5:23, Daeho Jeong wrote: >>> From: Daeho Jeong <daehojeong@google.com> >>> >>> In a case writing without fallocate(), we can't guarantee it's allocated >>> in the conventional area for zoned stroage. >>> >>> Signed-off-by: Daeho Jeong <daehojeong@google.com> >>> --- >>> v2: covered the direct io case >>> v3: covered the mkwrite case >>> --- >>> fs/f2fs/data.c | 14 ++++++++++++-- >>> fs/f2fs/file.c | 16 ++++++++-------- >>> 2 files changed, 20 insertions(+), 10 deletions(-) >>> >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c >>> index c21b92f18463..d3e5ab2736a6 100644 >>> --- a/fs/f2fs/data.c >>> +++ b/fs/f2fs/data.c >>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) >>> >>> /* use out-place-update for direct IO under LFS mode */ >>> if (map->m_may_create && >>> - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { >>> - if (unlikely(f2fs_cp_error(sbi))) { >>> + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && >>> + !f2fs_is_pinned_file(inode)))) { >>> + if (unlikely(f2fs_cp_error(sbi)) || >>> + (f2fs_is_pinned_file(inode) && is_hole && >>> + flag != F2FS_GET_BLOCK_PRE_DIO)) { >>> err = -EIO; >>> goto sync_out; >>> } >>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, >>> f2fs_map_lock(sbi, flag); >>> locked = true; >>> } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { >>> + if (f2fs_is_pinned_file(inode)) >>> + return -EIO; >>> f2fs_map_lock(sbi, flag); >>> locked = true; >>> } >>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, >>> >>> if (!f2fs_lookup_read_extent_cache_block(inode, index, >>> &dn.data_blkaddr)) { >>> + if (f2fs_is_pinned_file(inode)) { >>> + err = -EIO; >>> + goto out; >>> + } >>> + >>> if (locked) { >>> err = f2fs_reserve_block(&dn, index); >>> goto out; >>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c >>> index 82277e95c88f..4db3b21c804b 100644 >>> --- a/fs/f2fs/file.c >>> +++ b/fs/f2fs/file.c >>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) >>> struct inode *inode = file_inode(vmf->vma->vm_file); >>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>> struct dnode_of_data dn; >>> - bool need_alloc = true; >>> + bool need_alloc = !f2fs_is_pinned_file(inode); >> >> Will this check races w/ pinfile get|set? > > Do you mean "set/clear" case? I believe "set" case is okay, since we Yup, > can't set if the inode already has a data block. For "clear" case, I However, we can set pinfile on written inode in regular block device: if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { ret = -EFBIG; goto out; } Should we add the logic only if blkzoned feture is enabled? > believe mkwrite failure is okay in racy conditions caused by clearing > the pin flag. What do you think? Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to avoid the race condition? Thanks, > >> >> Thanks, >> >>> int err = 0; >>> vm_fault_t ret; >>> >>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) >>> goto out_sem; >>> } >>> >>> + set_new_dnode(&dn, inode, NULL, NULL, 0); >>> if (need_alloc) { >>> /* block allocation */ >>> - set_new_dnode(&dn, inode, NULL, NULL, 0); >>> err = f2fs_get_block_locked(&dn, page->index); >>> - } >>> - >>> -#ifdef CONFIG_F2FS_FS_COMPRESSION >>> - if (!need_alloc) { >>> - set_new_dnode(&dn, inode, NULL, NULL, 0); >>> + } else { >>> err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); >>> f2fs_put_dnode(&dn); >>> } >>> -#endif >>> + >>> if (err) { >>> unlock_page(page); >>> goto out_sem; >>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, >>> return ret; >>> } >>> >>> + /* For pinned files, it should be fallocate()-ed in advance. */ >>> + if (f2fs_is_pinned_file(inode)) >>> + return 0; >>> + >>> /* Do not preallocate blocks that will be written partially in 4KB. */ >>> map.m_lblk = F2FS_BLK_ALIGN(pos); >>> map.m_len = F2FS_BYTES_TO_BLK(pos + count);
On Fri, Mar 22, 2024 at 9:26 PM Chao Yu <chao@kernel.org> wrote: > > On 2024/3/21 1:42, Daeho Jeong wrote: > > On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote: > >> > >> On 2024/3/20 5:23, Daeho Jeong wrote: > >>> From: Daeho Jeong <daehojeong@google.com> > >>> > >>> In a case writing without fallocate(), we can't guarantee it's allocated > >>> in the conventional area for zoned stroage. > >>> > >>> Signed-off-by: Daeho Jeong <daehojeong@google.com> > >>> --- > >>> v2: covered the direct io case > >>> v3: covered the mkwrite case > >>> --- > >>> fs/f2fs/data.c | 14 ++++++++++++-- > >>> fs/f2fs/file.c | 16 ++++++++-------- > >>> 2 files changed, 20 insertions(+), 10 deletions(-) > >>> > >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > >>> index c21b92f18463..d3e5ab2736a6 100644 > >>> --- a/fs/f2fs/data.c > >>> +++ b/fs/f2fs/data.c > >>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) > >>> > >>> /* use out-place-update for direct IO under LFS mode */ > >>> if (map->m_may_create && > >>> - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { > >>> - if (unlikely(f2fs_cp_error(sbi))) { > >>> + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && > >>> + !f2fs_is_pinned_file(inode)))) { > >>> + if (unlikely(f2fs_cp_error(sbi)) || > >>> + (f2fs_is_pinned_file(inode) && is_hole && > >>> + flag != F2FS_GET_BLOCK_PRE_DIO)) { > >>> err = -EIO; > >>> goto sync_out; > >>> } > >>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > >>> f2fs_map_lock(sbi, flag); > >>> locked = true; > >>> } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { > >>> + if (f2fs_is_pinned_file(inode)) > >>> + return -EIO; > >>> f2fs_map_lock(sbi, flag); > >>> locked = true; > >>> } > >>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > >>> > >>> if (!f2fs_lookup_read_extent_cache_block(inode, index, > >>> &dn.data_blkaddr)) { > >>> + if (f2fs_is_pinned_file(inode)) { > >>> + err = -EIO; > >>> + goto out; > >>> + } > >>> + > >>> if (locked) { > >>> err = f2fs_reserve_block(&dn, index); > >>> goto out; > >>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > >>> index 82277e95c88f..4db3b21c804b 100644 > >>> --- a/fs/f2fs/file.c > >>> +++ b/fs/f2fs/file.c > >>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > >>> struct inode *inode = file_inode(vmf->vma->vm_file); > >>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > >>> struct dnode_of_data dn; > >>> - bool need_alloc = true; > >>> + bool need_alloc = !f2fs_is_pinned_file(inode); > >> > >> Will this check races w/ pinfile get|set? > > > > Do you mean "set/clear" case? I believe "set" case is okay, since we > > Yup, > > > can't set if the inode already has a data block. For "clear" case, I > > However, we can set pinfile on written inode in regular block device: You're right. I missed it. Maybe I think we should keep the concept consistent across devices regardless of zoned storage support. How about preventing file pinning for already written inodes across all device types? I am changing the pinfile concept by allowing the users to write on only fallocate()-ed space. > > if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { > ret = -EFBIG; > goto out; > } > > Should we add the logic only if blkzoned feture is enabled? > > > believe mkwrite failure is okay in racy conditions caused by clearing > > the pin flag. What do you think? > > Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to > avoid the race condition? > > Thanks, > > > > >> > >> Thanks, > >> > >>> int err = 0; > >>> vm_fault_t ret; > >>> > >>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > >>> goto out_sem; > >>> } > >>> > >>> + set_new_dnode(&dn, inode, NULL, NULL, 0); > >>> if (need_alloc) { > >>> /* block allocation */ > >>> - set_new_dnode(&dn, inode, NULL, NULL, 0); > >>> err = f2fs_get_block_locked(&dn, page->index); > >>> - } > >>> - > >>> -#ifdef CONFIG_F2FS_FS_COMPRESSION > >>> - if (!need_alloc) { > >>> - set_new_dnode(&dn, inode, NULL, NULL, 0); > >>> + } else { > >>> err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); > >>> f2fs_put_dnode(&dn); > >>> } > >>> -#endif > >>> + > >>> if (err) { > >>> unlock_page(page); > >>> goto out_sem; > >>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > >>> return ret; > >>> } > >>> > >>> + /* For pinned files, it should be fallocate()-ed in advance. */ > >>> + if (f2fs_is_pinned_file(inode)) > >>> + return 0; > >>> + > >>> /* Do not preallocate blocks that will be written partially in 4KB. */ > >>> map.m_lblk = F2FS_BLK_ALIGN(pos); > >>> map.m_len = F2FS_BYTES_TO_BLK(pos + count);
On 2024/3/25 23:02, Daeho Jeong wrote: > On Fri, Mar 22, 2024 at 9:26 PM Chao Yu <chao@kernel.org> wrote: >> >> On 2024/3/21 1:42, Daeho Jeong wrote: >>> On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote: >>>> >>>> On 2024/3/20 5:23, Daeho Jeong wrote: >>>>> From: Daeho Jeong <daehojeong@google.com> >>>>> >>>>> In a case writing without fallocate(), we can't guarantee it's allocated >>>>> in the conventional area for zoned stroage. >>>>> >>>>> Signed-off-by: Daeho Jeong <daehojeong@google.com> >>>>> --- >>>>> v2: covered the direct io case >>>>> v3: covered the mkwrite case >>>>> --- >>>>> fs/f2fs/data.c | 14 ++++++++++++-- >>>>> fs/f2fs/file.c | 16 ++++++++-------- >>>>> 2 files changed, 20 insertions(+), 10 deletions(-) >>>>> >>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c >>>>> index c21b92f18463..d3e5ab2736a6 100644 >>>>> --- a/fs/f2fs/data.c >>>>> +++ b/fs/f2fs/data.c >>>>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) >>>>> >>>>> /* use out-place-update for direct IO under LFS mode */ >>>>> if (map->m_may_create && >>>>> - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { >>>>> - if (unlikely(f2fs_cp_error(sbi))) { >>>>> + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && >>>>> + !f2fs_is_pinned_file(inode)))) { >>>>> + if (unlikely(f2fs_cp_error(sbi)) || >>>>> + (f2fs_is_pinned_file(inode) && is_hole && >>>>> + flag != F2FS_GET_BLOCK_PRE_DIO)) { >>>>> err = -EIO; >>>>> goto sync_out; >>>>> } >>>>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, >>>>> f2fs_map_lock(sbi, flag); >>>>> locked = true; >>>>> } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { >>>>> + if (f2fs_is_pinned_file(inode)) >>>>> + return -EIO; >>>>> f2fs_map_lock(sbi, flag); >>>>> locked = true; >>>>> } >>>>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, >>>>> >>>>> if (!f2fs_lookup_read_extent_cache_block(inode, index, >>>>> &dn.data_blkaddr)) { >>>>> + if (f2fs_is_pinned_file(inode)) { >>>>> + err = -EIO; >>>>> + goto out; >>>>> + } >>>>> + >>>>> if (locked) { >>>>> err = f2fs_reserve_block(&dn, index); >>>>> goto out; >>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c >>>>> index 82277e95c88f..4db3b21c804b 100644 >>>>> --- a/fs/f2fs/file.c >>>>> +++ b/fs/f2fs/file.c >>>>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) >>>>> struct inode *inode = file_inode(vmf->vma->vm_file); >>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>>>> struct dnode_of_data dn; >>>>> - bool need_alloc = true; >>>>> + bool need_alloc = !f2fs_is_pinned_file(inode); >>>> >>>> Will this check races w/ pinfile get|set? >>> >>> Do you mean "set/clear" case? I believe "set" case is okay, since we >> >> Yup, >> >>> can't set if the inode already has a data block. For "clear" case, I >> >> However, we can set pinfile on written inode in regular block device: > > You're right. I missed it. Maybe I think we should keep the concept > consistent across devices regardless of zoned storage support. How > about preventing file pinning for already written inodes across all > device types? I am changing the pinfile concept by allowing the users I'm okay with that, or we can tries to migrate data block of target file from seq-zone to conv-zone or regular-device before setting it w/ pin flag... Thanks, > to write on only fallocate()-ed space. > >> >> if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { >> ret = -EFBIG; >> goto out; >> } >> >> Should we add the logic only if blkzoned feture is enabled? >> >>> believe mkwrite failure is okay in racy conditions caused by clearing >>> the pin flag. What do you think? >> >> Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to >> avoid the race condition? >> >> Thanks, >> >>> >>>> >>>> Thanks, >>>> >>>>> int err = 0; >>>>> vm_fault_t ret; >>>>> >>>>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) >>>>> goto out_sem; >>>>> } >>>>> >>>>> + set_new_dnode(&dn, inode, NULL, NULL, 0); >>>>> if (need_alloc) { >>>>> /* block allocation */ >>>>> - set_new_dnode(&dn, inode, NULL, NULL, 0); >>>>> err = f2fs_get_block_locked(&dn, page->index); >>>>> - } >>>>> - >>>>> -#ifdef CONFIG_F2FS_FS_COMPRESSION >>>>> - if (!need_alloc) { >>>>> - set_new_dnode(&dn, inode, NULL, NULL, 0); >>>>> + } else { >>>>> err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); >>>>> f2fs_put_dnode(&dn); >>>>> } >>>>> -#endif >>>>> + >>>>> if (err) { >>>>> unlock_page(page); >>>>> goto out_sem; >>>>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, >>>>> return ret; >>>>> } >>>>> >>>>> + /* For pinned files, it should be fallocate()-ed in advance. */ >>>>> + if (f2fs_is_pinned_file(inode)) >>>>> + return 0; >>>>> + >>>>> /* Do not preallocate blocks that will be written partially in 4KB. */ >>>>> map.m_lblk = F2FS_BLK_ALIGN(pos); >>>>> map.m_len = F2FS_BYTES_TO_BLK(pos + count);
On Mon, Mar 25, 2024 at 8:39 PM Chao Yu <chao@kernel.org> wrote: > > On 2024/3/25 23:02, Daeho Jeong wrote: > > On Fri, Mar 22, 2024 at 9:26 PM Chao Yu <chao@kernel.org> wrote: > >> > >> On 2024/3/21 1:42, Daeho Jeong wrote: > >>> On Wed, Mar 20, 2024 at 2:38 AM Chao Yu <chao@kernel.org> wrote: > >>>> > >>>> On 2024/3/20 5:23, Daeho Jeong wrote: > >>>>> From: Daeho Jeong <daehojeong@google.com> > >>>>> > >>>>> In a case writing without fallocate(), we can't guarantee it's allocated > >>>>> in the conventional area for zoned stroage. > >>>>> > >>>>> Signed-off-by: Daeho Jeong <daehojeong@google.com> > >>>>> --- > >>>>> v2: covered the direct io case > >>>>> v3: covered the mkwrite case > >>>>> --- > >>>>> fs/f2fs/data.c | 14 ++++++++++++-- > >>>>> fs/f2fs/file.c | 16 ++++++++-------- > >>>>> 2 files changed, 20 insertions(+), 10 deletions(-) > >>>>> > >>>>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > >>>>> index c21b92f18463..d3e5ab2736a6 100644 > >>>>> --- a/fs/f2fs/data.c > >>>>> +++ b/fs/f2fs/data.c > >>>>> @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) > >>>>> > >>>>> /* use out-place-update for direct IO under LFS mode */ > >>>>> if (map->m_may_create && > >>>>> - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { > >>>>> - if (unlikely(f2fs_cp_error(sbi))) { > >>>>> + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && > >>>>> + !f2fs_is_pinned_file(inode)))) { > >>>>> + if (unlikely(f2fs_cp_error(sbi)) || > >>>>> + (f2fs_is_pinned_file(inode) && is_hole && > >>>>> + flag != F2FS_GET_BLOCK_PRE_DIO)) { > >>>>> err = -EIO; > >>>>> goto sync_out; > >>>>> } > >>>>> @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > >>>>> f2fs_map_lock(sbi, flag); > >>>>> locked = true; > >>>>> } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { > >>>>> + if (f2fs_is_pinned_file(inode)) > >>>>> + return -EIO; > >>>>> f2fs_map_lock(sbi, flag); > >>>>> locked = true; > >>>>> } > >>>>> @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, > >>>>> > >>>>> if (!f2fs_lookup_read_extent_cache_block(inode, index, > >>>>> &dn.data_blkaddr)) { > >>>>> + if (f2fs_is_pinned_file(inode)) { > >>>>> + err = -EIO; > >>>>> + goto out; > >>>>> + } > >>>>> + > >>>>> if (locked) { > >>>>> err = f2fs_reserve_block(&dn, index); > >>>>> goto out; > >>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > >>>>> index 82277e95c88f..4db3b21c804b 100644 > >>>>> --- a/fs/f2fs/file.c > >>>>> +++ b/fs/f2fs/file.c > >>>>> @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > >>>>> struct inode *inode = file_inode(vmf->vma->vm_file); > >>>>> struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > >>>>> struct dnode_of_data dn; > >>>>> - bool need_alloc = true; > >>>>> + bool need_alloc = !f2fs_is_pinned_file(inode); > >>>> > >>>> Will this check races w/ pinfile get|set? > >>> > >>> Do you mean "set/clear" case? I believe "set" case is okay, since we > >> > >> Yup, > >> > >>> can't set if the inode already has a data block. For "clear" case, I > >> > >> However, we can set pinfile on written inode in regular block device: > > > > You're right. I missed it. Maybe I think we should keep the concept > > consistent across devices regardless of zoned storage support. How > > about preventing file pinning for already written inodes across all > > device types? I am changing the pinfile concept by allowing the users > > I'm okay with that, or we can tries to migrate data block of target file > from seq-zone to conv-zone or regular-device before setting it w/ pin flag... I can't see lots of benefits by supporting file pinning for pre-written inodes, while the design can become complicated. Since we consolidate the way to support file pinning as fallocate() before using it, we might as well not support pre-written inodes. > > Thanks, > > > to write on only fallocate()-ed space. > > > >> > >> if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { > >> ret = -EFBIG; > >> goto out; > >> } > >> > >> Should we add the logic only if blkzoned feture is enabled? > >> > >>> believe mkwrite failure is okay in racy conditions caused by clearing > >>> the pin flag. What do you think? > >> > >> Or we can use filemap_invalidate_lock() in f2fs_ioc_set_pin_file() to > >> avoid the race condition? > >> > >> Thanks, > >> > >>> > >>>> > >>>> Thanks, > >>>> > >>>>> int err = 0; > >>>>> vm_fault_t ret; > >>>>> > >>>>> @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) > >>>>> goto out_sem; > >>>>> } > >>>>> > >>>>> + set_new_dnode(&dn, inode, NULL, NULL, 0); > >>>>> if (need_alloc) { > >>>>> /* block allocation */ > >>>>> - set_new_dnode(&dn, inode, NULL, NULL, 0); > >>>>> err = f2fs_get_block_locked(&dn, page->index); > >>>>> - } > >>>>> - > >>>>> -#ifdef CONFIG_F2FS_FS_COMPRESSION > >>>>> - if (!need_alloc) { > >>>>> - set_new_dnode(&dn, inode, NULL, NULL, 0); > >>>>> + } else { > >>>>> err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); > >>>>> f2fs_put_dnode(&dn); > >>>>> } > >>>>> -#endif > >>>>> + > >>>>> if (err) { > >>>>> unlock_page(page); > >>>>> goto out_sem; > >>>>> @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, > >>>>> return ret; > >>>>> } > >>>>> > >>>>> + /* For pinned files, it should be fallocate()-ed in advance. */ > >>>>> + if (f2fs_is_pinned_file(inode)) > >>>>> + return 0; > >>>>> + > >>>>> /* Do not preallocate blocks that will be written partially in 4KB. */ > >>>>> map.m_lblk = F2FS_BLK_ALIGN(pos); > >>>>> map.m_len = F2FS_BYTES_TO_BLK(pos + count);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c21b92f18463..d3e5ab2736a6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1584,8 +1584,11 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) /* use out-place-update for direct IO under LFS mode */ if (map->m_may_create && - (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { - if (unlikely(f2fs_cp_error(sbi))) { + (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && + !f2fs_is_pinned_file(inode)))) { + if (unlikely(f2fs_cp_error(sbi)) || + (f2fs_is_pinned_file(inode) && is_hole && + flag != F2FS_GET_BLOCK_PRE_DIO)) { err = -EIO; goto sync_out; } @@ -3378,6 +3381,8 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, f2fs_map_lock(sbi, flag); locked = true; } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { + if (f2fs_is_pinned_file(inode)) + return -EIO; f2fs_map_lock(sbi, flag); locked = true; } @@ -3407,6 +3412,11 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (!f2fs_lookup_read_extent_cache_block(inode, index, &dn.data_blkaddr)) { + if (f2fs_is_pinned_file(inode)) { + err = -EIO; + goto out; + } + if (locked) { err = f2fs_reserve_block(&dn, index); goto out; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 82277e95c88f..4db3b21c804b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -57,7 +57,7 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) struct inode *inode = file_inode(vmf->vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - bool need_alloc = true; + bool need_alloc = !f2fs_is_pinned_file(inode); int err = 0; vm_fault_t ret; @@ -114,19 +114,15 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) goto out_sem; } + set_new_dnode(&dn, inode, NULL, NULL, 0); if (need_alloc) { /* block allocation */ - set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_get_block_locked(&dn, page->index); - } - -#ifdef CONFIG_F2FS_FS_COMPRESSION - if (!need_alloc) { - set_new_dnode(&dn, inode, NULL, NULL, 0); + } else { err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); f2fs_put_dnode(&dn); } -#endif + if (err) { unlock_page(page); goto out_sem; @@ -4611,6 +4607,10 @@ static int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *iter, return ret; } + /* For pinned files, it should be fallocate()-ed in advance. */ + if (f2fs_is_pinned_file(inode)) + return 0; + /* Do not preallocate blocks that will be written partially in 4KB. */ map.m_lblk = F2FS_BLK_ALIGN(pos); map.m_len = F2FS_BYTES_TO_BLK(pos + count);