Message ID | 20240213173812.1432663-2-daeho43@gmail.com (mailing list archive) |
---|---|
State | Accepted |
Commit | 2fded30d8dce44599da8b7e07f500faaa5c0cd53 |
Headers | show |
Series | [f2fs-dev,v3,1/2] f2fs: separate f2fs_gc_range() to use GC for a range | expand |
Hi Daeho, On 2024/2/14 1:38, Daeho Jeong wrote: > From: Daeho Jeong <daehojeong@google.com> > > Support file pinning with conventional storage area for zoned devices > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > --- > v3: check the hole when migrating blocks for swap. > do not use the remainder of cold pin section. > v2: flush previous dirty pages before swapon. > do not re-check for the last extent of swap area. > merge this patch with swap file pinning support patch. > --- > fs/f2fs/data.c | 58 ++++++++++++++++++++++++++------------- > fs/f2fs/f2fs.h | 17 +++++++++++- > fs/f2fs/file.c | 24 ++++++++++++----- > fs/f2fs/gc.c | 14 +++++++--- > fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------ > fs/f2fs/segment.h | 10 +++++++ > 6 files changed, 154 insertions(+), 38 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 828c797cd47c..0c9aa3082fcf 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, > unsigned int blkofs; > unsigned int blk_per_sec = BLKS_PER_SEC(sbi); > unsigned int secidx = start_blk / blk_per_sec; > - unsigned int end_sec = secidx + blkcnt / blk_per_sec; > + unsigned int end_sec; > int ret = 0; > > + if (!blkcnt) > + return 0; > + end_sec = secidx + (blkcnt - 1) / blk_per_sec; > + > f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); > filemap_invalidate_lock(inode->i_mapping); > > set_inode_flag(inode, FI_ALIGNED_WRITE); > set_inode_flag(inode, FI_OPU_WRITE); > > - for (; secidx < end_sec; secidx++) { > + for (; secidx <= end_sec; secidx++) { > + unsigned int blkofs_end = secidx == end_sec ? > + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; (start_blk + blkcnt - 1) % blk_per_sec ? > + > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > - f2fs_unlock_op(sbi); > + ret = f2fs_allocate_pinning_section(sbi); > + if (ret) { > + f2fs_up_write(&sbi->pin_sem); > + break; > + } > > set_inode_flag(inode, FI_SKIP_WRITES); > > - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { > + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { > struct page *page; > unsigned int blkidx = secidx * blk_per_sec + blkofs; > > @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis, > nr_pblocks = map.m_len; > > if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || > - nr_pblocks & sec_blks_mask) { > + nr_pblocks & sec_blks_mask || > + !f2fs_valid_pinned_area(sbi, pblock)) { > + bool last_extent = false; > + > not_aligned++; > > nr_pblocks = roundup(nr_pblocks, blks_per_sec); > if (cur_lblock + nr_pblocks > sis->max) > nr_pblocks -= blks_per_sec; > > + /* this extent is last one */ > if (!nr_pblocks) { > - /* this extent is last one */ > - nr_pblocks = map.m_len; > - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); > - goto next; > + nr_pblocks = last_lblock - cur_lblock; > + last_extent = true; > } > > ret = f2fs_migrate_blocks(inode, cur_lblock, > nr_pblocks); > - if (ret) > + if (ret) { > + if (ret == -ENOENT) > + ret = -EINVAL; > goto out; > - goto retry; > + } > + > + if (!last_extent) > + goto retry; > } > -next: > + > if (cur_lblock + nr_pblocks >= sis->max) > nr_pblocks = sis->max - cur_lblock; > > @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > sector_t *span) > { > struct inode *inode = file_inode(file); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > int ret; > > if (!S_ISREG(inode->i_mode)) > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > + if (f2fs_readonly(sbi->sb)) > return -EROFS; > > - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { > - f2fs_err(F2FS_I_SB(inode), > - "Swapfile not supported in LFS mode"); > + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { > + f2fs_err(sbi, "Swapfile not supported in LFS mode"); > return -EINVAL; > } > > @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > > f2fs_precache_extents(inode); > > + ret = filemap_fdatawrite(inode->i_mapping); > + if (ret < 0) > + return ret; What do you think of exchanging position of f2fs_precache_extents() and filemap_fdatawrite()? so that f2fs_precache_extents() can load extent info after physical addresses of all data are fixed. Thanks, > + > ret = check_swap_activate(sis, file, span); > if (ret < 0) > return ret; > > stat_inc_swapfile_inode(inode); > set_inode_flag(inode, FI_PIN_FILE); > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > + f2fs_update_time(sbi, REQ_TIME); > return ret; > } > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 40eb590ed646..351133a11518 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, > unsigned int *newseg, bool new_sec, int dir); > void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > unsigned int start, unsigned int end); > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); > int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); > bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, > @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); > block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); > int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); > void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > + unsigned int start_seg, unsigned int end_seg, > + bool dry_run, unsigned int dry_run_sections); > int f2fs_resize_fs(struct file *filp, __u64 block_count); > int __init f2fs_create_garbage_collection_cache(void); > void f2fs_destroy_garbage_collection_cache(void); > @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) > return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; > } > > +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, > + block_t blkaddr) > +{ > + if (f2fs_sb_has_blkzoned(sbi)) { > + int devi = f2fs_target_device_index(sbi, blkaddr); > + > + return !bdev_is_zoned(FDEV(devi).bdev); > + } > + return true; > +} > + > static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) > { > return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 2c13b340c8a0..21c3aa93a8db 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, > > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > - f2fs_unlock_op(sbi); > + err = f2fs_allocate_pinning_section(sbi); > + if (err) { > + f2fs_up_write(&sbi->pin_sem); > + goto out_err; > + } > > map.m_seg_type = CURSEG_COLD_DATA_PINNED; > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); > @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > { > struct inode *inode = file_inode(filp); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > __u32 pin; > int ret = 0; > > @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > if (!S_ISREG(inode->i_mode)) > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > + if (f2fs_readonly(sbi->sb)) > return -EROFS; > > ret = mnt_want_write_file(filp); > @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > clear_inode_flag(inode, FI_PIN_FILE); > f2fs_i_gc_failures_write(inode, 0); > goto done; > + } else if (f2fs_is_pinned_file(inode)) { > + goto done; > } > > - if (f2fs_should_update_outplace(inode, NULL)) { > + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { > + ret = -EFBIG; > + goto out; > + } > + > + /* Let's allow file pinning on zoned device. */ > + if (!f2fs_sb_has_blkzoned(sbi) && > + f2fs_should_update_outplace(inode, NULL)) { > ret = -EINVAL; > goto out; > } > @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > set_inode_flag(inode, FI_PIN_FILE); > ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; > done: > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > + f2fs_update_time(sbi, REQ_TIME); > out: > inode_unlock(inode); > mnt_drop_write_file(filp); > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index a089a938355b..3ff126316d42 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) > init_atgc_management(sbi); > } > > -static int f2fs_gc_range(struct f2fs_sb_info *sbi, > - unsigned int start_seg, unsigned int end_seg, bool dry_run) > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > + unsigned int start_seg, unsigned int end_seg, > + bool dry_run, unsigned int dry_run_sections) > { > unsigned int segno; > + unsigned int gc_secs = dry_run_sections; > > for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { > struct gc_inode_list gc_list = { > @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, > .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), > }; > > - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); > + do_garbage_collect(sbi, segno, &gc_list, FG_GC, > + dry_run_sections == 0); > put_gc_inode(&gc_list); > > if (!dry_run && get_valid_blocks(sbi, segno, true)) > return -EAGAIN; > + if (dry_run && dry_run_sections && > + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) > + break; > > if (fatal_signal_pending(current)) > return -ERESTARTSYS; > @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, > f2fs_allocate_segment_for_resize(sbi, type, start, end); > > /* do GC to move out valid blocks in the range */ > - err = f2fs_gc_range(sbi, start, end, dry_run); > + err = f2fs_gc_range(sbi, start, end, dry_run, 0); > if (err || dry_run) > goto out; > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 4e985750c938..0b72c8536ccf 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, > * This function should be returned with success, otherwise BUG > */ > static void get_new_segment(struct f2fs_sb_info *sbi, > - unsigned int *newseg, bool new_sec) > + unsigned int *newseg, bool new_sec, bool pinning) > { > struct free_segmap_info *free_i = FREE_I(sbi); > unsigned int segno, secno, zoneno; > @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, > if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) > goto got_it; > } > + > + /* > + * If we format f2fs on zoned storage, let's try to get pinned sections > + * from beginning of the storage, which should be a conventional one. > + */ > + if (f2fs_sb_has_blkzoned(sbi)) { > + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); > + hint = GET_SEC_FROM_SEG(sbi, segno); > + } > + > find_other_zone: > secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); > if (secno >= MAIN_SECS(sbi)) { > @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) > * Allocate a current working segment. > * This function always allocates a free segment in LFS manner. > */ > -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > { > struct curseg_info *curseg = CURSEG_I(sbi, type); > unsigned int segno = curseg->segno; > + bool pinning = type == CURSEG_COLD_DATA_PINNED; > > if (curseg->inited) > write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); > + > segno = __get_next_segno(sbi, type); > - get_new_segment(sbi, &segno, new_sec); > + get_new_segment(sbi, &segno, new_sec, pinning); > + if (new_sec && pinning && > + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { > + __set_free(sbi, segno); > + return -EAGAIN; > + } > + > curseg->next_segno = segno; > reset_curseg(sbi, type, 1); > curseg->alloc_type = LFS; > if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) > curseg->fragment_remained_chunk = > get_random_u32_inclusive(1, sbi->max_fragment_chunk); > + return 0; > } > > static int __next_free_blkoff(struct f2fs_sb_info *sbi, > @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > f2fs_up_read(&SM_I(sbi)->curseg_lock); > } > > -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > bool new_sec, bool force) > { > struct curseg_info *curseg = CURSEG_I(sbi, type); > @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > !curseg->next_blkoff && > !get_valid_blocks(sbi, curseg->segno, new_sec) && > !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) > - return; > + return 0; > > old_segno = curseg->segno; > - new_curseg(sbi, type, true); > + if (new_curseg(sbi, type, true)) > + return -EAGAIN; > stat_inc_seg_type(sbi, curseg); > locate_dirty_segment(sbi, old_segno); > + return 0; > } > > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > { > + int ret; > + > f2fs_down_read(&SM_I(sbi)->curseg_lock); > down_write(&SIT_I(sbi)->sentry_lock); > - __allocate_new_segment(sbi, type, true, force); > + ret = __allocate_new_segment(sbi, type, true, force); > up_write(&SIT_I(sbi)->sentry_lock); > f2fs_up_read(&SM_I(sbi)->curseg_lock); > + > + return ret; > +} > + > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) > +{ > + int err; > + bool gc_required = true; > + > +retry: > + f2fs_lock_op(sbi); > + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > + f2fs_unlock_op(sbi); > + > + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { > + f2fs_down_write(&sbi->gc_lock); > + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); > + f2fs_up_write(&sbi->gc_lock); > + > + gc_required = false; > + goto retry; > + } > + > + return err; > } > > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) > @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > * new segment. > */ > if (segment_full) { > + if (type == CURSEG_COLD_DATA_PINNED && > + !((curseg->segno + 1) % sbi->segs_per_sec)) > + goto skip_new_segment; > + > if (from_gc) { > get_atssr_segment(sbi, type, se->type, > AT_SSR, se->mtime); > @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > stat_inc_seg_type(sbi, curseg); > } > } > + > +skip_new_segment: > /* > * segment dirty status should be updated after segment allocation, > * so we just need to update status only one time after previous > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h > index 60d93a16f2ac..953af072915f 100644 > --- a/fs/f2fs/segment.h > +++ b/fs/f2fs/segment.h > @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) > dcc->discard_wake = true; > wake_up_interruptible_all(&dcc->discard_wait_queue); > } > + > +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) > +{ > + int devi; > + > + for (devi = 0; devi < sbi->s_ndevs; devi++) > + if (bdev_is_zoned(FDEV(devi).bdev)) > + return GET_SEGNO(sbi, FDEV(devi).start_blk); > + return 0; > +}
On 2024/2/14 1:38, Daeho Jeong wrote: > From: Daeho Jeong <daehojeong@google.com> > > Support file pinning with conventional storage area for zoned devices > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > --- > v3: check the hole when migrating blocks for swap. > do not use the remainder of cold pin section. > v2: flush previous dirty pages before swapon. > do not re-check for the last extent of swap area. > merge this patch with swap file pinning support patch. > --- > fs/f2fs/data.c | 58 ++++++++++++++++++++++++++------------- > fs/f2fs/f2fs.h | 17 +++++++++++- > fs/f2fs/file.c | 24 ++++++++++++----- > fs/f2fs/gc.c | 14 +++++++--- > fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------ > fs/f2fs/segment.h | 10 +++++++ > 6 files changed, 154 insertions(+), 38 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 828c797cd47c..0c9aa3082fcf 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, > unsigned int blkofs; > unsigned int blk_per_sec = BLKS_PER_SEC(sbi); > unsigned int secidx = start_blk / blk_per_sec; > - unsigned int end_sec = secidx + blkcnt / blk_per_sec; > + unsigned int end_sec; > int ret = 0; > > + if (!blkcnt) > + return 0; > + end_sec = secidx + (blkcnt - 1) / blk_per_sec; > + > f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); > filemap_invalidate_lock(inode->i_mapping); > > set_inode_flag(inode, FI_ALIGNED_WRITE); > set_inode_flag(inode, FI_OPU_WRITE); > > - for (; secidx < end_sec; secidx++) { > + for (; secidx <= end_sec; secidx++) { > + unsigned int blkofs_end = secidx == end_sec ? > + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; > + > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > - f2fs_unlock_op(sbi); > + ret = f2fs_allocate_pinning_section(sbi); > + if (ret) { > + f2fs_up_write(&sbi->pin_sem); > + break; > + } > > set_inode_flag(inode, FI_SKIP_WRITES); > > - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { > + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { > struct page *page; > unsigned int blkidx = secidx * blk_per_sec + blkofs; > > @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis, > nr_pblocks = map.m_len; > > if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || > - nr_pblocks & sec_blks_mask) { > + nr_pblocks & sec_blks_mask || > + !f2fs_valid_pinned_area(sbi, pblock)) { > + bool last_extent = false; > + > not_aligned++; > > nr_pblocks = roundup(nr_pblocks, blks_per_sec); > if (cur_lblock + nr_pblocks > sis->max) > nr_pblocks -= blks_per_sec; > > + /* this extent is last one */ > if (!nr_pblocks) { > - /* this extent is last one */ > - nr_pblocks = map.m_len; > - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); > - goto next; > + nr_pblocks = last_lblock - cur_lblock; > + last_extent = true; > } > > ret = f2fs_migrate_blocks(inode, cur_lblock, > nr_pblocks); > - if (ret) > + if (ret) { > + if (ret == -ENOENT) > + ret = -EINVAL; > goto out; > - goto retry; > + } > + > + if (!last_extent) > + goto retry; > } > -next: > + > if (cur_lblock + nr_pblocks >= sis->max) > nr_pblocks = sis->max - cur_lblock; > > @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > sector_t *span) > { > struct inode *inode = file_inode(file); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > int ret; > > if (!S_ISREG(inode->i_mode)) > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > + if (f2fs_readonly(sbi->sb)) > return -EROFS; > > - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { > - f2fs_err(F2FS_I_SB(inode), > - "Swapfile not supported in LFS mode"); > + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { > + f2fs_err(sbi, "Swapfile not supported in LFS mode"); > return -EINVAL; > } > > @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > > f2fs_precache_extents(inode); > > + ret = filemap_fdatawrite(inode->i_mapping); > + if (ret < 0) > + return ret; > + > ret = check_swap_activate(sis, file, span); > if (ret < 0) > return ret; > > stat_inc_swapfile_inode(inode); > set_inode_flag(inode, FI_PIN_FILE); > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > + f2fs_update_time(sbi, REQ_TIME); > return ret; > } > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 40eb590ed646..351133a11518 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, > unsigned int *newseg, bool new_sec, int dir); > void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > unsigned int start, unsigned int end); > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); > int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); > bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, > @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); > block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); > int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); > void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > + unsigned int start_seg, unsigned int end_seg, > + bool dry_run, unsigned int dry_run_sections); > int f2fs_resize_fs(struct file *filp, __u64 block_count); > int __init f2fs_create_garbage_collection_cache(void); > void f2fs_destroy_garbage_collection_cache(void); > @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) > return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; > } > > +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, > + block_t blkaddr) > +{ > + if (f2fs_sb_has_blkzoned(sbi)) { > + int devi = f2fs_target_device_index(sbi, blkaddr); > + > + return !bdev_is_zoned(FDEV(devi).bdev); > + } > + return true; > +} > + > static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) > { > return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 2c13b340c8a0..21c3aa93a8db 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, > > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > - f2fs_unlock_op(sbi); > + err = f2fs_allocate_pinning_section(sbi); > + if (err) { > + f2fs_up_write(&sbi->pin_sem); > + goto out_err; > + } > > map.m_seg_type = CURSEG_COLD_DATA_PINNED; > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); > @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > { > struct inode *inode = file_inode(filp); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > __u32 pin; > int ret = 0; > > @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > if (!S_ISREG(inode->i_mode)) > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > + if (f2fs_readonly(sbi->sb)) > return -EROFS; > > ret = mnt_want_write_file(filp); > @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > clear_inode_flag(inode, FI_PIN_FILE); > f2fs_i_gc_failures_write(inode, 0); > goto done; > + } else if (f2fs_is_pinned_file(inode)) { > + goto done; > } > > - if (f2fs_should_update_outplace(inode, NULL)) { > + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { > + ret = -EFBIG; > + goto out; > + } > + > + /* Let's allow file pinning on zoned device. */ > + if (!f2fs_sb_has_blkzoned(sbi) && > + f2fs_should_update_outplace(inode, NULL)) { > ret = -EINVAL; > goto out; > } > @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > set_inode_flag(inode, FI_PIN_FILE); > ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; > done: > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > + f2fs_update_time(sbi, REQ_TIME); > out: > inode_unlock(inode); > mnt_drop_write_file(filp); > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index a089a938355b..3ff126316d42 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) > init_atgc_management(sbi); > } > > -static int f2fs_gc_range(struct f2fs_sb_info *sbi, > - unsigned int start_seg, unsigned int end_seg, bool dry_run) > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > + unsigned int start_seg, unsigned int end_seg, > + bool dry_run, unsigned int dry_run_sections) > { > unsigned int segno; > + unsigned int gc_secs = dry_run_sections; > > for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { > struct gc_inode_list gc_list = { > @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, > .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), > }; > > - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); > + do_garbage_collect(sbi, segno, &gc_list, FG_GC, > + dry_run_sections == 0); > put_gc_inode(&gc_list); > > if (!dry_run && get_valid_blocks(sbi, segno, true)) > return -EAGAIN; > + if (dry_run && dry_run_sections && > + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) > + break; > > if (fatal_signal_pending(current)) > return -ERESTARTSYS; > @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, > f2fs_allocate_segment_for_resize(sbi, type, start, end); > > /* do GC to move out valid blocks in the range */ > - err = f2fs_gc_range(sbi, start, end, dry_run); > + err = f2fs_gc_range(sbi, start, end, dry_run, 0); > if (err || dry_run) > goto out; > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 4e985750c938..0b72c8536ccf 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, > * This function should be returned with success, otherwise BUG > */ > static void get_new_segment(struct f2fs_sb_info *sbi, > - unsigned int *newseg, bool new_sec) > + unsigned int *newseg, bool new_sec, bool pinning) > { > struct free_segmap_info *free_i = FREE_I(sbi); > unsigned int segno, secno, zoneno; > @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, > if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) > goto got_it; > } > + > + /* > + * If we format f2fs on zoned storage, let's try to get pinned sections > + * from beginning of the storage, which should be a conventional one. > + */ > + if (f2fs_sb_has_blkzoned(sbi)) { > + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); > + hint = GET_SEC_FROM_SEG(sbi, segno); > + } > + > find_other_zone: > secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); > if (secno >= MAIN_SECS(sbi)) { > @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) > * Allocate a current working segment. > * This function always allocates a free segment in LFS manner. > */ > -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > { > struct curseg_info *curseg = CURSEG_I(sbi, type); > unsigned int segno = curseg->segno; > + bool pinning = type == CURSEG_COLD_DATA_PINNED; > > if (curseg->inited) > write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); > + > segno = __get_next_segno(sbi, type); If type is CURSEG_COLD_DATA_PINNED, can we let __get_next_segno() return 0? then we can allocate free segment from conventional zone in priority. Thanks, > - get_new_segment(sbi, &segno, new_sec); > + get_new_segment(sbi, &segno, new_sec, pinning); > + if (new_sec && pinning && > + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { > + __set_free(sbi, segno); > + return -EAGAIN; > + } > + > curseg->next_segno = segno; > reset_curseg(sbi, type, 1); > curseg->alloc_type = LFS; > if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) > curseg->fragment_remained_chunk = > get_random_u32_inclusive(1, sbi->max_fragment_chunk); > + return 0; > } > > static int __next_free_blkoff(struct f2fs_sb_info *sbi, > @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > f2fs_up_read(&SM_I(sbi)->curseg_lock); > } > > -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > bool new_sec, bool force) > { > struct curseg_info *curseg = CURSEG_I(sbi, type); > @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > !curseg->next_blkoff && > !get_valid_blocks(sbi, curseg->segno, new_sec) && > !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) > - return; > + return 0; > > old_segno = curseg->segno; > - new_curseg(sbi, type, true); > + if (new_curseg(sbi, type, true)) > + return -EAGAIN; > stat_inc_seg_type(sbi, curseg); > locate_dirty_segment(sbi, old_segno); > + return 0; > } > > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > { > + int ret; > + > f2fs_down_read(&SM_I(sbi)->curseg_lock); > down_write(&SIT_I(sbi)->sentry_lock); > - __allocate_new_segment(sbi, type, true, force); > + ret = __allocate_new_segment(sbi, type, true, force); > up_write(&SIT_I(sbi)->sentry_lock); > f2fs_up_read(&SM_I(sbi)->curseg_lock); > + > + return ret; > +} > + > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) > +{ > + int err; > + bool gc_required = true; > + > +retry: > + f2fs_lock_op(sbi); > + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > + f2fs_unlock_op(sbi); > + > + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { > + f2fs_down_write(&sbi->gc_lock); > + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); > + f2fs_up_write(&sbi->gc_lock); > + > + gc_required = false; > + goto retry; > + } > + > + return err; > } > > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) > @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > * new segment. > */ > if (segment_full) { > + if (type == CURSEG_COLD_DATA_PINNED && > + !((curseg->segno + 1) % sbi->segs_per_sec)) > + goto skip_new_segment; > + > if (from_gc) { > get_atssr_segment(sbi, type, se->type, > AT_SSR, se->mtime); > @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > stat_inc_seg_type(sbi, curseg); > } > } > + > +skip_new_segment: > /* > * segment dirty status should be updated after segment allocation, > * so we just need to update status only one time after previous > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h > index 60d93a16f2ac..953af072915f 100644 > --- a/fs/f2fs/segment.h > +++ b/fs/f2fs/segment.h > @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) > dcc->discard_wake = true; > wake_up_interruptible_all(&dcc->discard_wait_queue); > } > + > +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) > +{ > + int devi; > + > + for (devi = 0; devi < sbi->s_ndevs; devi++) > + if (bdev_is_zoned(FDEV(devi).bdev)) > + return GET_SEGNO(sbi, FDEV(devi).start_blk); > + return 0; > +}
On 2024/2/14 1:38, Daeho Jeong wrote: > From: Daeho Jeong <daehojeong@google.com> > > Support file pinning with conventional storage area for zoned devices > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > --- > v3: check the hole when migrating blocks for swap. > do not use the remainder of cold pin section. > v2: flush previous dirty pages before swapon. > do not re-check for the last extent of swap area. > merge this patch with swap file pinning support patch. > --- > fs/f2fs/data.c | 58 ++++++++++++++++++++++++++------------- > fs/f2fs/f2fs.h | 17 +++++++++++- > fs/f2fs/file.c | 24 ++++++++++++----- > fs/f2fs/gc.c | 14 +++++++--- > fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------ > fs/f2fs/segment.h | 10 +++++++ > 6 files changed, 154 insertions(+), 38 deletions(-) > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > index 828c797cd47c..0c9aa3082fcf 100644 > --- a/fs/f2fs/data.c > +++ b/fs/f2fs/data.c > @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, > unsigned int blkofs; > unsigned int blk_per_sec = BLKS_PER_SEC(sbi); > unsigned int secidx = start_blk / blk_per_sec; > - unsigned int end_sec = secidx + blkcnt / blk_per_sec; > + unsigned int end_sec; > int ret = 0; > > + if (!blkcnt) > + return 0; > + end_sec = secidx + (blkcnt - 1) / blk_per_sec; > + > f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); > filemap_invalidate_lock(inode->i_mapping); > > set_inode_flag(inode, FI_ALIGNED_WRITE); > set_inode_flag(inode, FI_OPU_WRITE); > > - for (; secidx < end_sec; secidx++) { > + for (; secidx <= end_sec; secidx++) { > + unsigned int blkofs_end = secidx == end_sec ? > + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; > + > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > - f2fs_unlock_op(sbi); > + ret = f2fs_allocate_pinning_section(sbi); > + if (ret) { > + f2fs_up_write(&sbi->pin_sem); > + break; > + } > > set_inode_flag(inode, FI_SKIP_WRITES); > > - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { > + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { > struct page *page; > unsigned int blkidx = secidx * blk_per_sec + blkofs; > > @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis, > nr_pblocks = map.m_len; > > if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || > - nr_pblocks & sec_blks_mask) { > + nr_pblocks & sec_blks_mask || > + !f2fs_valid_pinned_area(sbi, pblock)) { > + bool last_extent = false; > + > not_aligned++; > > nr_pblocks = roundup(nr_pblocks, blks_per_sec); > if (cur_lblock + nr_pblocks > sis->max) > nr_pblocks -= blks_per_sec; > > + /* this extent is last one */ > if (!nr_pblocks) { > - /* this extent is last one */ > - nr_pblocks = map.m_len; > - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); > - goto next; > + nr_pblocks = last_lblock - cur_lblock; > + last_extent = true; > } > > ret = f2fs_migrate_blocks(inode, cur_lblock, > nr_pblocks); > - if (ret) > + if (ret) { > + if (ret == -ENOENT) > + ret = -EINVAL; > goto out; > - goto retry; > + } > + > + if (!last_extent) > + goto retry; > } > -next: > + > if (cur_lblock + nr_pblocks >= sis->max) > nr_pblocks = sis->max - cur_lblock; > > @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > sector_t *span) > { > struct inode *inode = file_inode(file); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > int ret; > > if (!S_ISREG(inode->i_mode)) > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > + if (f2fs_readonly(sbi->sb)) > return -EROFS; > > - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { > - f2fs_err(F2FS_I_SB(inode), > - "Swapfile not supported in LFS mode"); > + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { > + f2fs_err(sbi, "Swapfile not supported in LFS mode"); > return -EINVAL; > } > > @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > > f2fs_precache_extents(inode); > > + ret = filemap_fdatawrite(inode->i_mapping); > + if (ret < 0) > + return ret; > + > ret = check_swap_activate(sis, file, span); > if (ret < 0) > return ret; > > stat_inc_swapfile_inode(inode); > set_inode_flag(inode, FI_PIN_FILE); > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > + f2fs_update_time(sbi, REQ_TIME); > return ret; > } > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > index 40eb590ed646..351133a11518 100644 > --- a/fs/f2fs/f2fs.h > +++ b/fs/f2fs/f2fs.h > @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, > unsigned int *newseg, bool new_sec, int dir); > void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > unsigned int start, unsigned int end); > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); > int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); > bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, > @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); > block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); > int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); > void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > + unsigned int start_seg, unsigned int end_seg, > + bool dry_run, unsigned int dry_run_sections); > int f2fs_resize_fs(struct file *filp, __u64 block_count); > int __init f2fs_create_garbage_collection_cache(void); > void f2fs_destroy_garbage_collection_cache(void); > @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) > return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; > } > > +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, > + block_t blkaddr) > +{ > + if (f2fs_sb_has_blkzoned(sbi)) { > + int devi = f2fs_target_device_index(sbi, blkaddr); > + > + return !bdev_is_zoned(FDEV(devi).bdev); > + } > + return true; > +} > + > static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) > { > return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > index 2c13b340c8a0..21c3aa93a8db 100644 > --- a/fs/f2fs/file.c > +++ b/fs/f2fs/file.c > @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, > > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > - f2fs_unlock_op(sbi); > + err = f2fs_allocate_pinning_section(sbi); > + if (err) { > + f2fs_up_write(&sbi->pin_sem); > + goto out_err; > + } > > map.m_seg_type = CURSEG_COLD_DATA_PINNED; > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); > @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > { > struct inode *inode = file_inode(filp); > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > __u32 pin; > int ret = 0; > > @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > if (!S_ISREG(inode->i_mode)) > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > + if (f2fs_readonly(sbi->sb)) > return -EROFS; > > ret = mnt_want_write_file(filp); > @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > clear_inode_flag(inode, FI_PIN_FILE); > f2fs_i_gc_failures_write(inode, 0); > goto done; > + } else if (f2fs_is_pinned_file(inode)) { > + goto done; > } > > - if (f2fs_should_update_outplace(inode, NULL)) { > + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { > + ret = -EFBIG; > + goto out; > + } > + > + /* Let's allow file pinning on zoned device. */ > + if (!f2fs_sb_has_blkzoned(sbi) && > + f2fs_should_update_outplace(inode, NULL)) { > ret = -EINVAL; > goto out; > } > @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > set_inode_flag(inode, FI_PIN_FILE); > ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; > done: > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > + f2fs_update_time(sbi, REQ_TIME); > out: > inode_unlock(inode); > mnt_drop_write_file(filp); > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > index a089a938355b..3ff126316d42 100644 > --- a/fs/f2fs/gc.c > +++ b/fs/f2fs/gc.c > @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) > init_atgc_management(sbi); > } > > -static int f2fs_gc_range(struct f2fs_sb_info *sbi, > - unsigned int start_seg, unsigned int end_seg, bool dry_run) > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > + unsigned int start_seg, unsigned int end_seg, > + bool dry_run, unsigned int dry_run_sections) > { > unsigned int segno; > + unsigned int gc_secs = dry_run_sections; > > for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { > struct gc_inode_list gc_list = { > @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, > .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), > }; > > - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); > + do_garbage_collect(sbi, segno, &gc_list, FG_GC, > + dry_run_sections == 0); > put_gc_inode(&gc_list); > > if (!dry_run && get_valid_blocks(sbi, segno, true)) > return -EAGAIN; > + if (dry_run && dry_run_sections && > + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) > + break; > > if (fatal_signal_pending(current)) > return -ERESTARTSYS; > @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, > f2fs_allocate_segment_for_resize(sbi, type, start, end); > > /* do GC to move out valid blocks in the range */ > - err = f2fs_gc_range(sbi, start, end, dry_run); > + err = f2fs_gc_range(sbi, start, end, dry_run, 0); > if (err || dry_run) > goto out; > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > index 4e985750c938..0b72c8536ccf 100644 > --- a/fs/f2fs/segment.c > +++ b/fs/f2fs/segment.c > @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, > * This function should be returned with success, otherwise BUG > */ > static void get_new_segment(struct f2fs_sb_info *sbi, > - unsigned int *newseg, bool new_sec) > + unsigned int *newseg, bool new_sec, bool pinning) > { > struct free_segmap_info *free_i = FREE_I(sbi); > unsigned int segno, secno, zoneno; > @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, > if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) > goto got_it; > } > + > + /* > + * If we format f2fs on zoned storage, let's try to get pinned sections > + * from beginning of the storage, which should be a conventional one. > + */ > + if (f2fs_sb_has_blkzoned(sbi)) { > + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); > + hint = GET_SEC_FROM_SEG(sbi, segno); > + } > + > find_other_zone: > secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); > if (secno >= MAIN_SECS(sbi)) { > @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) > * Allocate a current working segment. > * This function always allocates a free segment in LFS manner. > */ > -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > { > struct curseg_info *curseg = CURSEG_I(sbi, type); > unsigned int segno = curseg->segno; > + bool pinning = type == CURSEG_COLD_DATA_PINNED; > > if (curseg->inited) > write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); > + > segno = __get_next_segno(sbi, type); > - get_new_segment(sbi, &segno, new_sec); > + get_new_segment(sbi, &segno, new_sec, pinning); > + if (new_sec && pinning && > + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { > + __set_free(sbi, segno); > + return -EAGAIN; > + } > + > curseg->next_segno = segno; > reset_curseg(sbi, type, 1); > curseg->alloc_type = LFS; > if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) > curseg->fragment_remained_chunk = > get_random_u32_inclusive(1, sbi->max_fragment_chunk); > + return 0; > } > > static int __next_free_blkoff(struct f2fs_sb_info *sbi, > @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > f2fs_up_read(&SM_I(sbi)->curseg_lock); > } > > -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > bool new_sec, bool force) > { > struct curseg_info *curseg = CURSEG_I(sbi, type); > @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > !curseg->next_blkoff && > !get_valid_blocks(sbi, curseg->segno, new_sec) && > !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) > - return; > + return 0; > > old_segno = curseg->segno; > - new_curseg(sbi, type, true); > + if (new_curseg(sbi, type, true)) > + return -EAGAIN; > stat_inc_seg_type(sbi, curseg); > locate_dirty_segment(sbi, old_segno); > + return 0; > } > > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > { > + int ret; > + > f2fs_down_read(&SM_I(sbi)->curseg_lock); > down_write(&SIT_I(sbi)->sentry_lock); > - __allocate_new_segment(sbi, type, true, force); > + ret = __allocate_new_segment(sbi, type, true, force); > up_write(&SIT_I(sbi)->sentry_lock); > f2fs_up_read(&SM_I(sbi)->curseg_lock); > + > + return ret; > +} > + > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) > +{ > + int err; > + bool gc_required = true; > + > +retry: > + f2fs_lock_op(sbi); > + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > + f2fs_unlock_op(sbi); > + > + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { > + f2fs_down_write(&sbi->gc_lock); > + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); > + f2fs_up_write(&sbi->gc_lock); > + > + gc_required = false; > + goto retry; > + } > + > + return err; > } > > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) > @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > * new segment. > */ > if (segment_full) { > + if (type == CURSEG_COLD_DATA_PINNED && > + !((curseg->segno + 1) % sbi->segs_per_sec)) > + goto skip_new_segment; Before we skip allocate new segment for pinned log, how about tagging curseg as uninitialized one via curseg->inited = false, and curseg->segno = NULL_SEGNO? so that we can avoid __f2fs_save_inmem_curseg() to touch this log, and not show incorrect segno of pinned log in /sys/kernel/debug/f2fs/status. Thanks, > + > if (from_gc) { > get_atssr_segment(sbi, type, se->type, > AT_SSR, se->mtime); > @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > stat_inc_seg_type(sbi, curseg); > } > } > + > +skip_new_segment: > /* > * segment dirty status should be updated after segment allocation, > * so we just need to update status only one time after previous > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h > index 60d93a16f2ac..953af072915f 100644 > --- a/fs/f2fs/segment.h > +++ b/fs/f2fs/segment.h > @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) > dcc->discard_wake = true; > wake_up_interruptible_all(&dcc->discard_wait_queue); > } > + > +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) > +{ > + int devi; > + > + for (devi = 0; devi < sbi->s_ndevs; devi++) > + if (bdev_is_zoned(FDEV(devi).bdev)) > + return GET_SEGNO(sbi, FDEV(devi).start_blk); > + return 0; > +}
Hi Chao, I've tested the patch and queued in -dev, so can you take a look at it and propose any change on top of it? Then, we can discuss further on it. On 02/23, Chao Yu wrote: > On 2024/2/14 1:38, Daeho Jeong wrote: > > From: Daeho Jeong <daehojeong@google.com> > > > > Support file pinning with conventional storage area for zoned devices > > > > Signed-off-by: Daeho Jeong <daehojeong@google.com> > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > > --- > > v3: check the hole when migrating blocks for swap. > > do not use the remainder of cold pin section. > > v2: flush previous dirty pages before swapon. > > do not re-check for the last extent of swap area. > > merge this patch with swap file pinning support patch. > > --- > > fs/f2fs/data.c | 58 ++++++++++++++++++++++++++------------- > > fs/f2fs/f2fs.h | 17 +++++++++++- > > fs/f2fs/file.c | 24 ++++++++++++----- > > fs/f2fs/gc.c | 14 +++++++--- > > fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------ > > fs/f2fs/segment.h | 10 +++++++ > > 6 files changed, 154 insertions(+), 38 deletions(-) > > > > diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c > > index 828c797cd47c..0c9aa3082fcf 100644 > > --- a/fs/f2fs/data.c > > +++ b/fs/f2fs/data.c > > @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, > > unsigned int blkofs; > > unsigned int blk_per_sec = BLKS_PER_SEC(sbi); > > unsigned int secidx = start_blk / blk_per_sec; > > - unsigned int end_sec = secidx + blkcnt / blk_per_sec; > > + unsigned int end_sec; > > int ret = 0; > > + if (!blkcnt) > > + return 0; > > + end_sec = secidx + (blkcnt - 1) / blk_per_sec; > > + > > f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); > > filemap_invalidate_lock(inode->i_mapping); > > set_inode_flag(inode, FI_ALIGNED_WRITE); > > set_inode_flag(inode, FI_OPU_WRITE); > > - for (; secidx < end_sec; secidx++) { > > + for (; secidx <= end_sec; secidx++) { > > + unsigned int blkofs_end = secidx == end_sec ? > > + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; > > + > > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > > - f2fs_unlock_op(sbi); > > + ret = f2fs_allocate_pinning_section(sbi); > > + if (ret) { > > + f2fs_up_write(&sbi->pin_sem); > > + break; > > + } > > set_inode_flag(inode, FI_SKIP_WRITES); > > - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { > > + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { > > struct page *page; > > unsigned int blkidx = secidx * blk_per_sec + blkofs; > > @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis, > > nr_pblocks = map.m_len; > > if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || > > - nr_pblocks & sec_blks_mask) { > > + nr_pblocks & sec_blks_mask || > > + !f2fs_valid_pinned_area(sbi, pblock)) { > > + bool last_extent = false; > > + > > not_aligned++; > > nr_pblocks = roundup(nr_pblocks, blks_per_sec); > > if (cur_lblock + nr_pblocks > sis->max) > > nr_pblocks -= blks_per_sec; > > + /* this extent is last one */ > > if (!nr_pblocks) { > > - /* this extent is last one */ > > - nr_pblocks = map.m_len; > > - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); > > - goto next; > > + nr_pblocks = last_lblock - cur_lblock; > > + last_extent = true; > > } > > ret = f2fs_migrate_blocks(inode, cur_lblock, > > nr_pblocks); > > - if (ret) > > + if (ret) { > > + if (ret == -ENOENT) > > + ret = -EINVAL; > > goto out; > > - goto retry; > > + } > > + > > + if (!last_extent) > > + goto retry; > > } > > -next: > > + > > if (cur_lblock + nr_pblocks >= sis->max) > > nr_pblocks = sis->max - cur_lblock; > > @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > > sector_t *span) > > { > > struct inode *inode = file_inode(file); > > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > int ret; > > if (!S_ISREG(inode->i_mode)) > > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > > + if (f2fs_readonly(sbi->sb)) > > return -EROFS; > > - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { > > - f2fs_err(F2FS_I_SB(inode), > > - "Swapfile not supported in LFS mode"); > > + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { > > + f2fs_err(sbi, "Swapfile not supported in LFS mode"); > > return -EINVAL; > > } > > @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, > > f2fs_precache_extents(inode); > > + ret = filemap_fdatawrite(inode->i_mapping); > > + if (ret < 0) > > + return ret; > > + > > ret = check_swap_activate(sis, file, span); > > if (ret < 0) > > return ret; > > stat_inc_swapfile_inode(inode); > > set_inode_flag(inode, FI_PIN_FILE); > > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > > + f2fs_update_time(sbi, REQ_TIME); > > return ret; > > } > > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h > > index 40eb590ed646..351133a11518 100644 > > --- a/fs/f2fs/f2fs.h > > +++ b/fs/f2fs/f2fs.h > > @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, > > unsigned int *newseg, bool new_sec, int dir); > > void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > > unsigned int start, unsigned int end); > > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); > > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); > > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); > > int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); > > bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, > > @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); > > block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); > > int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); > > void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); > > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > > + unsigned int start_seg, unsigned int end_seg, > > + bool dry_run, unsigned int dry_run_sections); > > int f2fs_resize_fs(struct file *filp, __u64 block_count); > > int __init f2fs_create_garbage_collection_cache(void); > > void f2fs_destroy_garbage_collection_cache(void); > > @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) > > return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; > > } > > +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, > > + block_t blkaddr) > > +{ > > + if (f2fs_sb_has_blkzoned(sbi)) { > > + int devi = f2fs_target_device_index(sbi, blkaddr); > > + > > + return !bdev_is_zoned(FDEV(devi).bdev); > > + } > > + return true; > > +} > > + > > static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) > > { > > return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; > > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c > > index 2c13b340c8a0..21c3aa93a8db 100644 > > --- a/fs/f2fs/file.c > > +++ b/fs/f2fs/file.c > > @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, > > f2fs_down_write(&sbi->pin_sem); > > - f2fs_lock_op(sbi); > > - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > > - f2fs_unlock_op(sbi); > > + err = f2fs_allocate_pinning_section(sbi); > > + if (err) { > > + f2fs_up_write(&sbi->pin_sem); > > + goto out_err; > > + } > > map.m_seg_type = CURSEG_COLD_DATA_PINNED; > > err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); > > @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) > > static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > > { > > struct inode *inode = file_inode(filp); > > + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); > > __u32 pin; > > int ret = 0; > > @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > > if (!S_ISREG(inode->i_mode)) > > return -EINVAL; > > - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) > > + if (f2fs_readonly(sbi->sb)) > > return -EROFS; > > ret = mnt_want_write_file(filp); > > @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > > clear_inode_flag(inode, FI_PIN_FILE); > > f2fs_i_gc_failures_write(inode, 0); > > goto done; > > + } else if (f2fs_is_pinned_file(inode)) { > > + goto done; > > } > > - if (f2fs_should_update_outplace(inode, NULL)) { > > + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { > > + ret = -EFBIG; > > + goto out; > > + } > > + > > + /* Let's allow file pinning on zoned device. */ > > + if (!f2fs_sb_has_blkzoned(sbi) && > > + f2fs_should_update_outplace(inode, NULL)) { > > ret = -EINVAL; > > goto out; > > } > > @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) > > set_inode_flag(inode, FI_PIN_FILE); > > ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; > > done: > > - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); > > + f2fs_update_time(sbi, REQ_TIME); > > out: > > inode_unlock(inode); > > mnt_drop_write_file(filp); > > diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c > > index a089a938355b..3ff126316d42 100644 > > --- a/fs/f2fs/gc.c > > +++ b/fs/f2fs/gc.c > > @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) > > init_atgc_management(sbi); > > } > > -static int f2fs_gc_range(struct f2fs_sb_info *sbi, > > - unsigned int start_seg, unsigned int end_seg, bool dry_run) > > +int f2fs_gc_range(struct f2fs_sb_info *sbi, > > + unsigned int start_seg, unsigned int end_seg, > > + bool dry_run, unsigned int dry_run_sections) > > { > > unsigned int segno; > > + unsigned int gc_secs = dry_run_sections; > > for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { > > struct gc_inode_list gc_list = { > > @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, > > .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), > > }; > > - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); > > + do_garbage_collect(sbi, segno, &gc_list, FG_GC, > > + dry_run_sections == 0); > > put_gc_inode(&gc_list); > > if (!dry_run && get_valid_blocks(sbi, segno, true)) > > return -EAGAIN; > > + if (dry_run && dry_run_sections && > > + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) > > + break; > > if (fatal_signal_pending(current)) > > return -ERESTARTSYS; > > @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, > > f2fs_allocate_segment_for_resize(sbi, type, start, end); > > /* do GC to move out valid blocks in the range */ > > - err = f2fs_gc_range(sbi, start, end, dry_run); > > + err = f2fs_gc_range(sbi, start, end, dry_run, 0); > > if (err || dry_run) > > goto out; > > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c > > index 4e985750c938..0b72c8536ccf 100644 > > --- a/fs/f2fs/segment.c > > +++ b/fs/f2fs/segment.c > > @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, > > * This function should be returned with success, otherwise BUG > > */ > > static void get_new_segment(struct f2fs_sb_info *sbi, > > - unsigned int *newseg, bool new_sec) > > + unsigned int *newseg, bool new_sec, bool pinning) > > { > > struct free_segmap_info *free_i = FREE_I(sbi); > > unsigned int segno, secno, zoneno; > > @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, > > if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) > > goto got_it; > > } > > + > > + /* > > + * If we format f2fs on zoned storage, let's try to get pinned sections > > + * from beginning of the storage, which should be a conventional one. > > + */ > > + if (f2fs_sb_has_blkzoned(sbi)) { > > + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); > > + hint = GET_SEC_FROM_SEG(sbi, segno); > > + } > > + > > find_other_zone: > > secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); > > if (secno >= MAIN_SECS(sbi)) { > > @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) > > * Allocate a current working segment. > > * This function always allocates a free segment in LFS manner. > > */ > > -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > > +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) > > { > > struct curseg_info *curseg = CURSEG_I(sbi, type); > > unsigned int segno = curseg->segno; > > + bool pinning = type == CURSEG_COLD_DATA_PINNED; > > if (curseg->inited) > > write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); > > + > > segno = __get_next_segno(sbi, type); > > - get_new_segment(sbi, &segno, new_sec); > > + get_new_segment(sbi, &segno, new_sec, pinning); > > + if (new_sec && pinning && > > + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { > > + __set_free(sbi, segno); > > + return -EAGAIN; > > + } > > + > > curseg->next_segno = segno; > > reset_curseg(sbi, type, 1); > > curseg->alloc_type = LFS; > > if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) > > curseg->fragment_remained_chunk = > > get_random_u32_inclusive(1, sbi->max_fragment_chunk); > > + return 0; > > } > > static int __next_free_blkoff(struct f2fs_sb_info *sbi, > > @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, > > f2fs_up_read(&SM_I(sbi)->curseg_lock); > > } > > -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > > +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > > bool new_sec, bool force) > > { > > struct curseg_info *curseg = CURSEG_I(sbi, type); > > @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, > > !curseg->next_blkoff && > > !get_valid_blocks(sbi, curseg->segno, new_sec) && > > !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) > > - return; > > + return 0; > > old_segno = curseg->segno; > > - new_curseg(sbi, type, true); > > + if (new_curseg(sbi, type, true)) > > + return -EAGAIN; > > stat_inc_seg_type(sbi, curseg); > > locate_dirty_segment(sbi, old_segno); > > + return 0; > > } > > -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > > +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) > > { > > + int ret; > > + > > f2fs_down_read(&SM_I(sbi)->curseg_lock); > > down_write(&SIT_I(sbi)->sentry_lock); > > - __allocate_new_segment(sbi, type, true, force); > > + ret = __allocate_new_segment(sbi, type, true, force); > > up_write(&SIT_I(sbi)->sentry_lock); > > f2fs_up_read(&SM_I(sbi)->curseg_lock); > > + > > + return ret; > > +} > > + > > +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) > > +{ > > + int err; > > + bool gc_required = true; > > + > > +retry: > > + f2fs_lock_op(sbi); > > + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); > > + f2fs_unlock_op(sbi); > > + > > + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { > > + f2fs_down_write(&sbi->gc_lock); > > + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); > > + f2fs_up_write(&sbi->gc_lock); > > + > > + gc_required = false; > > + goto retry; > > + } > > + > > + return err; > > } > > void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) > > @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > > * new segment. > > */ > > if (segment_full) { > > + if (type == CURSEG_COLD_DATA_PINNED && > > + !((curseg->segno + 1) % sbi->segs_per_sec)) > > + goto skip_new_segment; > > Before we skip allocate new segment for pinned log, how about > tagging curseg as uninitialized one via curseg->inited = false, and > curseg->segno = NULL_SEGNO? so that we can avoid > __f2fs_save_inmem_curseg() to touch this log, and not show incorrect > segno of pinned log in /sys/kernel/debug/f2fs/status. > > Thanks, > > > + > > if (from_gc) { > > get_atssr_segment(sbi, type, se->type, > > AT_SSR, se->mtime); > > @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, > > stat_inc_seg_type(sbi, curseg); > > } > > } > > + > > +skip_new_segment: > > /* > > * segment dirty status should be updated after segment allocation, > > * so we just need to update status only one time after previous > > diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h > > index 60d93a16f2ac..953af072915f 100644 > > --- a/fs/f2fs/segment.h > > +++ b/fs/f2fs/segment.h > > @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) > > dcc->discard_wake = true; > > wake_up_interruptible_all(&dcc->discard_wait_queue); > > } > > + > > +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) > > +{ > > + int devi; > > + > > + for (devi = 0; devi < sbi->s_ndevs; devi++) > > + if (bdev_is_zoned(FDEV(devi).bdev)) > > + return GET_SEGNO(sbi, FDEV(devi).start_blk); > > + return 0; > > +}
On 2024/2/24 1:31, Jaegeuk Kim wrote: > Hi Chao, > > I've tested the patch and queued in -dev, so can you take a look at it and > propose any change on top of it? Then, we can discuss further on it. Okay, let me send patches for comments. Thanks, > > On 02/23, Chao Yu wrote: >> On 2024/2/14 1:38, Daeho Jeong wrote: >>> From: Daeho Jeong <daehojeong@google.com> >>> >>> Support file pinning with conventional storage area for zoned devices >>> >>> Signed-off-by: Daeho Jeong <daehojeong@google.com> >>> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> >>> --- >>> v3: check the hole when migrating blocks for swap. >>> do not use the remainder of cold pin section. >>> v2: flush previous dirty pages before swapon. >>> do not re-check for the last extent of swap area. >>> merge this patch with swap file pinning support patch. >>> --- >>> fs/f2fs/data.c | 58 ++++++++++++++++++++++++++------------- >>> fs/f2fs/f2fs.h | 17 +++++++++++- >>> fs/f2fs/file.c | 24 ++++++++++++----- >>> fs/f2fs/gc.c | 14 +++++++--- >>> fs/f2fs/segment.c | 69 +++++++++++++++++++++++++++++++++++++++++------ >>> fs/f2fs/segment.h | 10 +++++++ >>> 6 files changed, 154 insertions(+), 38 deletions(-) >>> >>> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c >>> index 828c797cd47c..0c9aa3082fcf 100644 >>> --- a/fs/f2fs/data.c >>> +++ b/fs/f2fs/data.c >>> @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, >>> unsigned int blkofs; >>> unsigned int blk_per_sec = BLKS_PER_SEC(sbi); >>> unsigned int secidx = start_blk / blk_per_sec; >>> - unsigned int end_sec = secidx + blkcnt / blk_per_sec; >>> + unsigned int end_sec; >>> int ret = 0; >>> + if (!blkcnt) >>> + return 0; >>> + end_sec = secidx + (blkcnt - 1) / blk_per_sec; >>> + >>> f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); >>> filemap_invalidate_lock(inode->i_mapping); >>> set_inode_flag(inode, FI_ALIGNED_WRITE); >>> set_inode_flag(inode, FI_OPU_WRITE); >>> - for (; secidx < end_sec; secidx++) { >>> + for (; secidx <= end_sec; secidx++) { >>> + unsigned int blkofs_end = secidx == end_sec ? >>> + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; >>> + >>> f2fs_down_write(&sbi->pin_sem); >>> - f2fs_lock_op(sbi); >>> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); >>> - f2fs_unlock_op(sbi); >>> + ret = f2fs_allocate_pinning_section(sbi); >>> + if (ret) { >>> + f2fs_up_write(&sbi->pin_sem); >>> + break; >>> + } >>> set_inode_flag(inode, FI_SKIP_WRITES); >>> - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { >>> + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { >>> struct page *page; >>> unsigned int blkidx = secidx * blk_per_sec + blkofs; >>> @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis, >>> nr_pblocks = map.m_len; >>> if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || >>> - nr_pblocks & sec_blks_mask) { >>> + nr_pblocks & sec_blks_mask || >>> + !f2fs_valid_pinned_area(sbi, pblock)) { >>> + bool last_extent = false; >>> + >>> not_aligned++; >>> nr_pblocks = roundup(nr_pblocks, blks_per_sec); >>> if (cur_lblock + nr_pblocks > sis->max) >>> nr_pblocks -= blks_per_sec; >>> + /* this extent is last one */ >>> if (!nr_pblocks) { >>> - /* this extent is last one */ >>> - nr_pblocks = map.m_len; >>> - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); >>> - goto next; >>> + nr_pblocks = last_lblock - cur_lblock; >>> + last_extent = true; >>> } >>> ret = f2fs_migrate_blocks(inode, cur_lblock, >>> nr_pblocks); >>> - if (ret) >>> + if (ret) { >>> + if (ret == -ENOENT) >>> + ret = -EINVAL; >>> goto out; >>> - goto retry; >>> + } >>> + >>> + if (!last_extent) >>> + goto retry; >>> } >>> -next: >>> + >>> if (cur_lblock + nr_pblocks >= sis->max) >>> nr_pblocks = sis->max - cur_lblock; >>> @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, >>> sector_t *span) >>> { >>> struct inode *inode = file_inode(file); >>> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>> int ret; >>> if (!S_ISREG(inode->i_mode)) >>> return -EINVAL; >>> - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) >>> + if (f2fs_readonly(sbi->sb)) >>> return -EROFS; >>> - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { >>> - f2fs_err(F2FS_I_SB(inode), >>> - "Swapfile not supported in LFS mode"); >>> + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { >>> + f2fs_err(sbi, "Swapfile not supported in LFS mode"); >>> return -EINVAL; >>> } >>> @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, >>> f2fs_precache_extents(inode); >>> + ret = filemap_fdatawrite(inode->i_mapping); >>> + if (ret < 0) >>> + return ret; >>> + >>> ret = check_swap_activate(sis, file, span); >>> if (ret < 0) >>> return ret; >>> stat_inc_swapfile_inode(inode); >>> set_inode_flag(inode, FI_PIN_FILE); >>> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); >>> + f2fs_update_time(sbi, REQ_TIME); >>> return ret; >>> } >>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h >>> index 40eb590ed646..351133a11518 100644 >>> --- a/fs/f2fs/f2fs.h >>> +++ b/fs/f2fs/f2fs.h >>> @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, >>> unsigned int *newseg, bool new_sec, int dir); >>> void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, >>> unsigned int start, unsigned int end); >>> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); >>> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); >>> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); >>> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); >>> int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); >>> bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, >>> @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); >>> block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); >>> int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); >>> void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); >>> +int f2fs_gc_range(struct f2fs_sb_info *sbi, >>> + unsigned int start_seg, unsigned int end_seg, >>> + bool dry_run, unsigned int dry_run_sections); >>> int f2fs_resize_fs(struct file *filp, __u64 block_count); >>> int __init f2fs_create_garbage_collection_cache(void); >>> void f2fs_destroy_garbage_collection_cache(void); >>> @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) >>> return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; >>> } >>> +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, >>> + block_t blkaddr) >>> +{ >>> + if (f2fs_sb_has_blkzoned(sbi)) { >>> + int devi = f2fs_target_device_index(sbi, blkaddr); >>> + >>> + return !bdev_is_zoned(FDEV(devi).bdev); >>> + } >>> + return true; >>> +} >>> + >>> static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) >>> { >>> return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; >>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c >>> index 2c13b340c8a0..21c3aa93a8db 100644 >>> --- a/fs/f2fs/file.c >>> +++ b/fs/f2fs/file.c >>> @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, >>> f2fs_down_write(&sbi->pin_sem); >>> - f2fs_lock_op(sbi); >>> - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); >>> - f2fs_unlock_op(sbi); >>> + err = f2fs_allocate_pinning_section(sbi); >>> + if (err) { >>> + f2fs_up_write(&sbi->pin_sem); >>> + goto out_err; >>> + } >>> map.m_seg_type = CURSEG_COLD_DATA_PINNED; >>> err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); >>> @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) >>> static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) >>> { >>> struct inode *inode = file_inode(filp); >>> + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); >>> __u32 pin; >>> int ret = 0; >>> @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) >>> if (!S_ISREG(inode->i_mode)) >>> return -EINVAL; >>> - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) >>> + if (f2fs_readonly(sbi->sb)) >>> return -EROFS; >>> ret = mnt_want_write_file(filp); >>> @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) >>> clear_inode_flag(inode, FI_PIN_FILE); >>> f2fs_i_gc_failures_write(inode, 0); >>> goto done; >>> + } else if (f2fs_is_pinned_file(inode)) { >>> + goto done; >>> } >>> - if (f2fs_should_update_outplace(inode, NULL)) { >>> + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { >>> + ret = -EFBIG; >>> + goto out; >>> + } >>> + >>> + /* Let's allow file pinning on zoned device. */ >>> + if (!f2fs_sb_has_blkzoned(sbi) && >>> + f2fs_should_update_outplace(inode, NULL)) { >>> ret = -EINVAL; >>> goto out; >>> } >>> @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) >>> set_inode_flag(inode, FI_PIN_FILE); >>> ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; >>> done: >>> - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); >>> + f2fs_update_time(sbi, REQ_TIME); >>> out: >>> inode_unlock(inode); >>> mnt_drop_write_file(filp); >>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c >>> index a089a938355b..3ff126316d42 100644 >>> --- a/fs/f2fs/gc.c >>> +++ b/fs/f2fs/gc.c >>> @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) >>> init_atgc_management(sbi); >>> } >>> -static int f2fs_gc_range(struct f2fs_sb_info *sbi, >>> - unsigned int start_seg, unsigned int end_seg, bool dry_run) >>> +int f2fs_gc_range(struct f2fs_sb_info *sbi, >>> + unsigned int start_seg, unsigned int end_seg, >>> + bool dry_run, unsigned int dry_run_sections) >>> { >>> unsigned int segno; >>> + unsigned int gc_secs = dry_run_sections; >>> for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { >>> struct gc_inode_list gc_list = { >>> @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, >>> .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), >>> }; >>> - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); >>> + do_garbage_collect(sbi, segno, &gc_list, FG_GC, >>> + dry_run_sections == 0); >>> put_gc_inode(&gc_list); >>> if (!dry_run && get_valid_blocks(sbi, segno, true)) >>> return -EAGAIN; >>> + if (dry_run && dry_run_sections && >>> + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) >>> + break; >>> if (fatal_signal_pending(current)) >>> return -ERESTARTSYS; >>> @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, >>> f2fs_allocate_segment_for_resize(sbi, type, start, end); >>> /* do GC to move out valid blocks in the range */ >>> - err = f2fs_gc_range(sbi, start, end, dry_run); >>> + err = f2fs_gc_range(sbi, start, end, dry_run, 0); >>> if (err || dry_run) >>> goto out; >>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c >>> index 4e985750c938..0b72c8536ccf 100644 >>> --- a/fs/f2fs/segment.c >>> +++ b/fs/f2fs/segment.c >>> @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, >>> * This function should be returned with success, otherwise BUG >>> */ >>> static void get_new_segment(struct f2fs_sb_info *sbi, >>> - unsigned int *newseg, bool new_sec) >>> + unsigned int *newseg, bool new_sec, bool pinning) >>> { >>> struct free_segmap_info *free_i = FREE_I(sbi); >>> unsigned int segno, secno, zoneno; >>> @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, >>> if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) >>> goto got_it; >>> } >>> + >>> + /* >>> + * If we format f2fs on zoned storage, let's try to get pinned sections >>> + * from beginning of the storage, which should be a conventional one. >>> + */ >>> + if (f2fs_sb_has_blkzoned(sbi)) { >>> + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); >>> + hint = GET_SEC_FROM_SEG(sbi, segno); >>> + } >>> + >>> find_other_zone: >>> secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); >>> if (secno >= MAIN_SECS(sbi)) { >>> @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) >>> * Allocate a current working segment. >>> * This function always allocates a free segment in LFS manner. >>> */ >>> -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) >>> +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) >>> { >>> struct curseg_info *curseg = CURSEG_I(sbi, type); >>> unsigned int segno = curseg->segno; >>> + bool pinning = type == CURSEG_COLD_DATA_PINNED; >>> if (curseg->inited) >>> write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); >>> + >>> segno = __get_next_segno(sbi, type); >>> - get_new_segment(sbi, &segno, new_sec); >>> + get_new_segment(sbi, &segno, new_sec, pinning); >>> + if (new_sec && pinning && >>> + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { >>> + __set_free(sbi, segno); >>> + return -EAGAIN; >>> + } >>> + >>> curseg->next_segno = segno; >>> reset_curseg(sbi, type, 1); >>> curseg->alloc_type = LFS; >>> if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) >>> curseg->fragment_remained_chunk = >>> get_random_u32_inclusive(1, sbi->max_fragment_chunk); >>> + return 0; >>> } >>> static int __next_free_blkoff(struct f2fs_sb_info *sbi, >>> @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, >>> f2fs_up_read(&SM_I(sbi)->curseg_lock); >>> } >>> -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, >>> +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, >>> bool new_sec, bool force) >>> { >>> struct curseg_info *curseg = CURSEG_I(sbi, type); >>> @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, >>> !curseg->next_blkoff && >>> !get_valid_blocks(sbi, curseg->segno, new_sec) && >>> !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) >>> - return; >>> + return 0; >>> old_segno = curseg->segno; >>> - new_curseg(sbi, type, true); >>> + if (new_curseg(sbi, type, true)) >>> + return -EAGAIN; >>> stat_inc_seg_type(sbi, curseg); >>> locate_dirty_segment(sbi, old_segno); >>> + return 0; >>> } >>> -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) >>> +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) >>> { >>> + int ret; >>> + >>> f2fs_down_read(&SM_I(sbi)->curseg_lock); >>> down_write(&SIT_I(sbi)->sentry_lock); >>> - __allocate_new_segment(sbi, type, true, force); >>> + ret = __allocate_new_segment(sbi, type, true, force); >>> up_write(&SIT_I(sbi)->sentry_lock); >>> f2fs_up_read(&SM_I(sbi)->curseg_lock); >>> + >>> + return ret; >>> +} >>> + >>> +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) >>> +{ >>> + int err; >>> + bool gc_required = true; >>> + >>> +retry: >>> + f2fs_lock_op(sbi); >>> + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); >>> + f2fs_unlock_op(sbi); >>> + >>> + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { >>> + f2fs_down_write(&sbi->gc_lock); >>> + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); >>> + f2fs_up_write(&sbi->gc_lock); >>> + >>> + gc_required = false; >>> + goto retry; >>> + } >>> + >>> + return err; >>> } >>> void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) >>> @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, >>> * new segment. >>> */ >>> if (segment_full) { >>> + if (type == CURSEG_COLD_DATA_PINNED && >>> + !((curseg->segno + 1) % sbi->segs_per_sec)) >>> + goto skip_new_segment; >> >> Before we skip allocate new segment for pinned log, how about >> tagging curseg as uninitialized one via curseg->inited = false, and >> curseg->segno = NULL_SEGNO? so that we can avoid >> __f2fs_save_inmem_curseg() to touch this log, and not show incorrect >> segno of pinned log in /sys/kernel/debug/f2fs/status. >> >> Thanks, >> >>> + >>> if (from_gc) { >>> get_atssr_segment(sbi, type, se->type, >>> AT_SSR, se->mtime); >>> @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, >>> stat_inc_seg_type(sbi, curseg); >>> } >>> } >>> + >>> +skip_new_segment: >>> /* >>> * segment dirty status should be updated after segment allocation, >>> * so we just need to update status only one time after previous >>> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h >>> index 60d93a16f2ac..953af072915f 100644 >>> --- a/fs/f2fs/segment.h >>> +++ b/fs/f2fs/segment.h >>> @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) >>> dcc->discard_wake = true; >>> wake_up_interruptible_all(&dcc->discard_wait_queue); >>> } >>> + >>> +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) >>> +{ >>> + int devi; >>> + >>> + for (devi = 0; devi < sbi->s_ndevs; devi++) >>> + if (bdev_is_zoned(FDEV(devi).bdev)) >>> + return GET_SEGNO(sbi, FDEV(devi).start_blk); >>> + return 0; >>> +}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 828c797cd47c..0c9aa3082fcf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3839,25 +3839,34 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, unsigned int blkofs; unsigned int blk_per_sec = BLKS_PER_SEC(sbi); unsigned int secidx = start_blk / blk_per_sec; - unsigned int end_sec = secidx + blkcnt / blk_per_sec; + unsigned int end_sec; int ret = 0; + if (!blkcnt) + return 0; + end_sec = secidx + (blkcnt - 1) / blk_per_sec; + f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); filemap_invalidate_lock(inode->i_mapping); set_inode_flag(inode, FI_ALIGNED_WRITE); set_inode_flag(inode, FI_OPU_WRITE); - for (; secidx < end_sec; secidx++) { + for (; secidx <= end_sec; secidx++) { + unsigned int blkofs_end = secidx == end_sec ? + (blkcnt - 1) % blk_per_sec : blk_per_sec - 1; + f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + ret = f2fs_allocate_pinning_section(sbi); + if (ret) { + f2fs_up_write(&sbi->pin_sem); + break; + } set_inode_flag(inode, FI_SKIP_WRITES); - for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { + for (blkofs = 0; blkofs <= blkofs_end; blkofs++) { struct page *page; unsigned int blkidx = secidx * blk_per_sec + blkofs; @@ -3946,27 +3955,34 @@ static int check_swap_activate(struct swap_info_struct *sis, nr_pblocks = map.m_len; if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || - nr_pblocks & sec_blks_mask) { + nr_pblocks & sec_blks_mask || + !f2fs_valid_pinned_area(sbi, pblock)) { + bool last_extent = false; + not_aligned++; nr_pblocks = roundup(nr_pblocks, blks_per_sec); if (cur_lblock + nr_pblocks > sis->max) nr_pblocks -= blks_per_sec; + /* this extent is last one */ if (!nr_pblocks) { - /* this extent is last one */ - nr_pblocks = map.m_len; - f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); - goto next; + nr_pblocks = last_lblock - cur_lblock; + last_extent = true; } ret = f2fs_migrate_blocks(inode, cur_lblock, nr_pblocks); - if (ret) + if (ret) { + if (ret == -ENOENT) + ret = -EINVAL; goto out; - goto retry; + } + + if (!last_extent) + goto retry; } -next: + if (cur_lblock + nr_pblocks >= sis->max) nr_pblocks = sis->max - cur_lblock; @@ -4004,17 +4020,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); int ret; if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; - if (f2fs_lfs_mode(F2FS_I_SB(inode))) { - f2fs_err(F2FS_I_SB(inode), - "Swapfile not supported in LFS mode"); + if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) { + f2fs_err(sbi, "Swapfile not supported in LFS mode"); return -EINVAL; } @@ -4027,13 +4043,17 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, f2fs_precache_extents(inode); + ret = filemap_fdatawrite(inode->i_mapping); + if (ret < 0) + return ret; + ret = check_swap_activate(sis, file, span); if (ret < 0) return ret; stat_inc_swapfile_inode(inode); set_inode_flag(inode, FI_PIN_FILE); - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); return ret; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 40eb590ed646..351133a11518 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3696,7 +3696,8 @@ void f2fs_get_new_segment(struct f2fs_sb_info *sbi, unsigned int *newseg, bool new_sec, int dir); void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, unsigned int start, unsigned int end); -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force); +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi); void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi); int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range); bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi, @@ -3870,6 +3871,9 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi); block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode); int f2fs_gc(struct f2fs_sb_info *sbi, struct f2fs_gc_control *gc_control); void f2fs_build_gc_manager(struct f2fs_sb_info *sbi); +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections); int f2fs_resize_fs(struct file *filp, __u64 block_count); int __init f2fs_create_garbage_collection_cache(void); void f2fs_destroy_garbage_collection_cache(void); @@ -4524,6 +4528,17 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) return F2FS_OPTION(sbi).fs_mode == FS_MODE_LFS; } +static inline bool f2fs_valid_pinned_area(struct f2fs_sb_info *sbi, + block_t blkaddr) +{ + if (f2fs_sb_has_blkzoned(sbi)) { + int devi = f2fs_target_device_index(sbi, blkaddr); + + return !bdev_is_zoned(FDEV(devi).bdev); + } + return true; +} + static inline bool f2fs_low_mem_mode(struct f2fs_sb_info *sbi) { return F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2c13b340c8a0..21c3aa93a8db 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1733,9 +1733,11 @@ static int f2fs_expand_inode_data(struct inode *inode, loff_t offset, f2fs_down_write(&sbi->pin_sem); - f2fs_lock_op(sbi); - f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); - f2fs_unlock_op(sbi); + err = f2fs_allocate_pinning_section(sbi); + if (err) { + f2fs_up_write(&sbi->pin_sem); + goto out_err; + } map.m_seg_type = CURSEG_COLD_DATA_PINNED; err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_PRE_DIO); @@ -3185,6 +3187,7 @@ int f2fs_pin_file_control(struct inode *inode, bool inc) static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); __u32 pin; int ret = 0; @@ -3194,7 +3197,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) if (!S_ISREG(inode->i_mode)) return -EINVAL; - if (f2fs_readonly(F2FS_I_SB(inode)->sb)) + if (f2fs_readonly(sbi->sb)) return -EROFS; ret = mnt_want_write_file(filp); @@ -3207,9 +3210,18 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) clear_inode_flag(inode, FI_PIN_FILE); f2fs_i_gc_failures_write(inode, 0); goto done; + } else if (f2fs_is_pinned_file(inode)) { + goto done; } - if (f2fs_should_update_outplace(inode, NULL)) { + if (f2fs_sb_has_blkzoned(sbi) && F2FS_HAS_BLOCKS(inode)) { + ret = -EFBIG; + goto out; + } + + /* Let's allow file pinning on zoned device. */ + if (!f2fs_sb_has_blkzoned(sbi) && + f2fs_should_update_outplace(inode, NULL)) { ret = -EINVAL; goto out; } @@ -3231,7 +3243,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg) set_inode_flag(inode, FI_PIN_FILE); ret = F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN]; done: - f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); + f2fs_update_time(sbi, REQ_TIME); out: inode_unlock(inode); mnt_drop_write_file(filp); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a089a938355b..3ff126316d42 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1961,10 +1961,12 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi) init_atgc_management(sbi); } -static int f2fs_gc_range(struct f2fs_sb_info *sbi, - unsigned int start_seg, unsigned int end_seg, bool dry_run) +int f2fs_gc_range(struct f2fs_sb_info *sbi, + unsigned int start_seg, unsigned int end_seg, + bool dry_run, unsigned int dry_run_sections) { unsigned int segno; + unsigned int gc_secs = dry_run_sections; for (segno = start_seg; segno <= end_seg; segno += SEGS_PER_SEC(sbi)) { struct gc_inode_list gc_list = { @@ -1972,11 +1974,15 @@ static int f2fs_gc_range(struct f2fs_sb_info *sbi, .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS), }; - do_garbage_collect(sbi, segno, &gc_list, FG_GC, true); + do_garbage_collect(sbi, segno, &gc_list, FG_GC, + dry_run_sections == 0); put_gc_inode(&gc_list); if (!dry_run && get_valid_blocks(sbi, segno, true)) return -EAGAIN; + if (dry_run && dry_run_sections && + !get_valid_blocks(sbi, segno, true) && --gc_secs == 0) + break; if (fatal_signal_pending(current)) return -ERESTARTSYS; @@ -2014,7 +2020,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi, f2fs_allocate_segment_for_resize(sbi, type, start, end); /* do GC to move out valid blocks in the range */ - err = f2fs_gc_range(sbi, start, end, dry_run); + err = f2fs_gc_range(sbi, start, end, dry_run, 0); if (err || dry_run) goto out; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4e985750c938..0b72c8536ccf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2632,7 +2632,7 @@ static int is_next_segment_free(struct f2fs_sb_info *sbi, * This function should be returned with success, otherwise BUG */ static void get_new_segment(struct f2fs_sb_info *sbi, - unsigned int *newseg, bool new_sec) + unsigned int *newseg, bool new_sec, bool pinning) { struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; @@ -2650,6 +2650,16 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } + + /* + * If we format f2fs on zoned storage, let's try to get pinned sections + * from beginning of the storage, which should be a conventional one. + */ + if (f2fs_sb_has_blkzoned(sbi)) { + segno = pinning ? 0 : max(first_zoned_segno(sbi), *newseg); + hint = GET_SEC_FROM_SEG(sbi, segno); + } + find_other_zone: secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint); if (secno >= MAIN_SECS(sbi)) { @@ -2749,21 +2759,30 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) * Allocate a current working segment. * This function always allocates a free segment in LFS manner. */ -static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) +static int new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) { struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned int segno = curseg->segno; + bool pinning = type == CURSEG_COLD_DATA_PINNED; if (curseg->inited) write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, segno)); + segno = __get_next_segno(sbi, type); - get_new_segment(sbi, &segno, new_sec); + get_new_segment(sbi, &segno, new_sec, pinning); + if (new_sec && pinning && + !f2fs_valid_pinned_area(sbi, START_BLOCK(sbi, segno))) { + __set_free(sbi, segno); + return -EAGAIN; + } + curseg->next_segno = segno; reset_curseg(sbi, type, 1); curseg->alloc_type = LFS; if (F2FS_OPTION(sbi).fs_mode == FS_MODE_FRAGMENT_BLK) curseg->fragment_remained_chunk = get_random_u32_inclusive(1, sbi->max_fragment_chunk); + return 0; } static int __next_free_blkoff(struct f2fs_sb_info *sbi, @@ -3036,7 +3055,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type, f2fs_up_read(&SM_I(sbi)->curseg_lock); } -static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, +static int __allocate_new_segment(struct f2fs_sb_info *sbi, int type, bool new_sec, bool force) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -3046,21 +3065,49 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, !curseg->next_blkoff && !get_valid_blocks(sbi, curseg->segno, new_sec) && !get_ckpt_valid_blocks(sbi, curseg->segno, new_sec)) - return; + return 0; old_segno = curseg->segno; - new_curseg(sbi, type, true); + if (new_curseg(sbi, type, true)) + return -EAGAIN; stat_inc_seg_type(sbi, curseg); locate_dirty_segment(sbi, old_segno); + return 0; } -void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) +int f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force) { + int ret; + f2fs_down_read(&SM_I(sbi)->curseg_lock); down_write(&SIT_I(sbi)->sentry_lock); - __allocate_new_segment(sbi, type, true, force); + ret = __allocate_new_segment(sbi, type, true, force); up_write(&SIT_I(sbi)->sentry_lock); f2fs_up_read(&SM_I(sbi)->curseg_lock); + + return ret; +} + +int f2fs_allocate_pinning_section(struct f2fs_sb_info *sbi) +{ + int err; + bool gc_required = true; + +retry: + f2fs_lock_op(sbi); + err = f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); + f2fs_unlock_op(sbi); + + if (f2fs_sb_has_blkzoned(sbi) && err && gc_required) { + f2fs_down_write(&sbi->gc_lock); + f2fs_gc_range(sbi, 0, GET_SEGNO(sbi, FDEV(0).end_blk), true, 1); + f2fs_up_write(&sbi->gc_lock); + + gc_required = false; + goto retry; + } + + return err; } void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi) @@ -3426,6 +3473,10 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, * new segment. */ if (segment_full) { + if (type == CURSEG_COLD_DATA_PINNED && + !((curseg->segno + 1) % sbi->segs_per_sec)) + goto skip_new_segment; + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); @@ -3437,6 +3488,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_seg_type(sbi, curseg); } } + +skip_new_segment: /* * segment dirty status should be updated after segment allocation, * so we just need to update status only one time after previous diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 60d93a16f2ac..953af072915f 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -942,3 +942,13 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) dcc->discard_wake = true; wake_up_interruptible_all(&dcc->discard_wait_queue); } + +static inline unsigned int first_zoned_segno(struct f2fs_sb_info *sbi) +{ + int devi; + + for (devi = 0; devi < sbi->s_ndevs; devi++) + if (bdev_is_zoned(FDEV(devi).bdev)) + return GET_SEGNO(sbi, FDEV(devi).start_blk); + return 0; +}