Message ID | 1487381301-865-3-git-send-email-bo.li.liu@oracle.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
At 02/18/2017 09:28 AM, Liu Bo wrote: > Since DISCARD is not as important as an operation like write, we don't > copy it to target device during replace, and it makes __btrfs_map_block > less complex. Makes sense to me. > > Signed-off-by: Liu Bo <bo.li.liu@oracle.com> > --- > fs/btrfs/volumes.c | 306 +++++++++++++++++++++++++++++++++-------------------- > 1 file changed, 192 insertions(+), 114 deletions(-) > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > index c52b0fe..96228f3 100644 > --- a/fs/btrfs/volumes.c > +++ b/fs/btrfs/volumes.c > @@ -5294,6 +5294,175 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) > kfree(bbio); > } > > +/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ > +/* > + * Please note that, discard won't be sent to target device of device > + * replace. > + */ > +static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, > + u64 logical, u64 length, > + struct btrfs_bio **bbio_ret) > +{ > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; > + struct extent_map *em; > + struct map_lookup *map; > + struct btrfs_bio *bbio; > + u64 offset; > + u64 stripe_nr; > + u64 stripe_nr_end; > + u64 stripe_end_offset; > + u64 stripe_cnt; > + u64 stripe_len; > + u64 stripe_offset; > + u64 num_stripes; > + u32 stripe_index; > + u32 factor = 0; > + u32 sub_stripes = 0; > + u64 stripes_per_dev = 0; > + u32 remaining_stripes = 0; > + u32 last_stripe = 0; > + int ret = 0; > + int i; > + > + /* discard always return a bbio */ > + ASSERT(bbio_ret); > + > + read_lock(&em_tree->lock); > + em = lookup_extent_mapping(em_tree, logical, length); > + read_unlock(&em_tree->lock); It seems that get_chunk_map() in previous patch can replace such searching and error message. > + > + if (!em) { > + btrfs_crit(fs_info, "unable to find logical %llu len %llu", > + logical, length); > + return -EINVAL; > + } > + > + if (em->start > logical || em->start + em->len < logical) { > + btrfs_crit(fs_info, > + "found a bad mapping, wanted %Lu, found %Lu-%Lu", > + logical, em->start, em->start + em->len); > + free_extent_map(em); > + return -EINVAL; > + } > + > + map = em->map_lookup; > + /* we don't discard raid56 yet */ > + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { > + ret = -EOPNOTSUPP; > + goto out; > + } > + > + offset = logical - em->start; > + length = min_t(u64, em->len - offset, length); > + > + stripe_len = map->stripe_len; > + /* > + * stripe_nr counts the total number of stripes we have to stride > + * to get to this block > + */ > + stripe_nr = div64_u64(offset, stripe_len); > + stripe_offset = stripe_nr * stripe_len; > + ASSERT(offset >= stripe_offset); What about a DIV_ROUND_DOWN helper? Surprisingly we only have DIR_ROUND_UP, not not DIV_ROUND_DOWN. And if we're only going to support 64K stripe len, then round_down() is good for current usage. > + > + /* stripe_offset is the offset of this block in its stripe */ > + stripe_offset = offset - stripe_offset; This is a little confusing. What about using another variable called @stripe_start instead of using the same variable @stripe_offset to temporarily store stripe start bytenr. I prefer to do it in one run without resuing @stripe_offset variable to avoid confusion. > + > + stripe_nr_end = ALIGN(offset + length, map->stripe_len); round_up() causes less confusion. And IIRC, ALIGN/round_up can only handle power of 2, this implies the stripe_len must be power of 2, which is OK for now. If using ALIGN here, we can also use round_down() in previous stripe_nr. Thanks, Qu > + stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); > + stripe_cnt = stripe_nr_end - stripe_nr; > + stripe_end_offset = stripe_nr_end * map->stripe_len - > + (offset + length); > + /* > + * after this, stripe_nr is the number of stripes on this > + * device we have to walk to find the data, and stripe_index is > + * the number of our device in the stripe array > + */ > + num_stripes = 1; > + stripe_index = 0; > + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | > + BTRFS_BLOCK_GROUP_RAID10)) { > + if (map->type & BTRFS_BLOCK_GROUP_RAID0) > + sub_stripes = 1; > + else > + sub_stripes = map->sub_stripes; > + > + factor = map->num_stripes / sub_stripes; > + num_stripes = min_t(u64, map->num_stripes, > + sub_stripes * stripe_cnt); > + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); > + stripe_index *= sub_stripes; > + stripes_per_dev = div_u64_rem(stripe_cnt, factor, > + &remaining_stripes); > + div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); > + last_stripe *= sub_stripes; > + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | > + BTRFS_BLOCK_GROUP_DUP)) { > + num_stripes = map->num_stripes; > + } else { > + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, > + &stripe_index); > + } > + > + bbio = alloc_btrfs_bio(num_stripes, 0); > + if (!bbio) { > + ret = -ENOMEM; > + goto out; > + } > + > + for (i = 0; i < num_stripes; i++) { > + bbio->stripes[i].physical = > + map->stripes[stripe_index].physical + > + stripe_offset + stripe_nr * map->stripe_len; > + bbio->stripes[i].dev = map->stripes[stripe_index].dev; > + > + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | > + BTRFS_BLOCK_GROUP_RAID10)) { > + bbio->stripes[i].length = stripes_per_dev * > + map->stripe_len; > + > + if (i / sub_stripes < remaining_stripes) > + bbio->stripes[i].length += > + map->stripe_len; > + > + /* > + * Special for the first stripe and > + * the last stripe: > + * > + * |-------|...|-------| > + * |----------| > + * off end_off > + */ > + if (i < sub_stripes) > + bbio->stripes[i].length -= > + stripe_offset; > + > + if (stripe_index >= last_stripe && > + stripe_index <= (last_stripe + > + sub_stripes - 1)) > + bbio->stripes[i].length -= > + stripe_end_offset; > + > + if (i == sub_stripes - 1) > + stripe_offset = 0; > + } else { > + bbio->stripes[i].length = length; > + } > + > + stripe_index++; > + if (stripe_index == map->num_stripes) { > + stripe_index = 0; > + stripe_nr++; > + } > + } > + > + *bbio_ret = bbio; > + bbio->map_type = map->type; > + bbio->num_stripes = num_stripes; > +out: > + free_extent_map(em); > + return ret; > +} > + > static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > enum btrfs_map_op op, > u64 logical, u64 *length, > @@ -5304,10 +5473,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > struct map_lookup *map; > u64 offset; > u64 stripe_offset; > - u64 stripe_end_offset; > u64 stripe_nr; > - u64 stripe_nr_orig; > - u64 stripe_nr_end; > u64 stripe_len; > u32 stripe_index; > int i; > @@ -5323,6 +5489,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > u64 physical_to_patch_in_first_stripe = 0; > u64 raid56_full_stripe_start = (u64)-1; > > + if (op == BTRFS_MAP_DISCARD) > + return __btrfs_map_block_for_discard(fs_info, logical, > + *length, bbio_ret); > + > em = get_chunk_map(fs_info, logical, *length); > if (IS_ERR(em)) > return PTR_ERR(em); > @@ -5364,14 +5534,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > raid56_full_stripe_start *= full_stripe_len; > } > > - if (op == BTRFS_MAP_DISCARD) { > - /* we don't discard raid56 yet */ > - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { > - ret = -EOPNOTSUPP; > - goto out; > - } > - *length = min_t(u64, em->len - offset, *length); > - } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { > + if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { > u64 max_len; > /* For writes to RAID[56], allow a full stripeset across all disks. > For other RAID types and for RAID[56] reads, just allow a single > @@ -5402,8 +5565,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > btrfs_dev_replace_set_lock_blocking(dev_replace); > > if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && > - op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && > - op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { > + op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS && > + dev_replace->tgtdev != NULL) { > /* > * in dev-replace case, for repair case (that's the only > * case where the mirror is selected explicitly when > @@ -5483,24 +5646,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > > num_stripes = 1; > stripe_index = 0; > - stripe_nr_orig = stripe_nr; > - stripe_nr_end = ALIGN(offset + *length, map->stripe_len); > - stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); > - stripe_end_offset = stripe_nr_end * map->stripe_len - > - (offset + *length); > - > if (map->type & BTRFS_BLOCK_GROUP_RAID0) { > - if (op == BTRFS_MAP_DISCARD) > - num_stripes = min_t(u64, map->num_stripes, > - stripe_nr_end - stripe_nr_orig); > stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, > &stripe_index); > - if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && > - op != BTRFS_MAP_GET_READ_MIRRORS) > + if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS) > mirror_num = 1; > } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { > - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || > - op == BTRFS_MAP_GET_READ_MIRRORS) > + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) > num_stripes = map->num_stripes; > else if (mirror_num) > stripe_index = mirror_num - 1; > @@ -5513,8 +5665,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > } > > } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { > - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || > - op == BTRFS_MAP_GET_READ_MIRRORS) { > + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) { > num_stripes = map->num_stripes; > } else if (mirror_num) { > stripe_index = mirror_num - 1; > @@ -5530,10 +5681,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > > if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) > num_stripes = map->sub_stripes; > - else if (op == BTRFS_MAP_DISCARD) > - num_stripes = min_t(u64, map->sub_stripes * > - (stripe_nr_end - stripe_nr_orig), > - map->num_stripes); > else if (mirror_num) > stripe_index += mirror_num - 1; > else { > @@ -5576,8 +5723,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > /* We distribute the parity blocks across stripes */ > div_u64_rem(stripe_nr + stripe_index, map->num_stripes, > &stripe_index); > - if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && > - op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1) > + if ((op != BTRFS_MAP_WRITE && > + op != BTRFS_MAP_GET_READ_MIRRORS) && > + mirror_num <= 1) > mirror_num = 1; > } > } else { > @@ -5600,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > > num_alloc_stripes = num_stripes; > if (dev_replace_is_ongoing) { > - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) > + if (op == BTRFS_MAP_WRITE) > num_alloc_stripes <<= 1; > if (op == BTRFS_MAP_GET_READ_MIRRORS) > num_alloc_stripes++; > @@ -5643,84 +5791,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > RAID6_Q_STRIPE; > } > > - if (op == BTRFS_MAP_DISCARD) { > - u32 factor = 0; > - u32 sub_stripes = 0; > - u64 stripes_per_dev = 0; > - u32 remaining_stripes = 0; > - u32 last_stripe = 0; > > - if (map->type & > - (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { > - if (map->type & BTRFS_BLOCK_GROUP_RAID0) > - sub_stripes = 1; > - else > - sub_stripes = map->sub_stripes; > - > - factor = map->num_stripes / sub_stripes; > - stripes_per_dev = div_u64_rem(stripe_nr_end - > - stripe_nr_orig, > - factor, > - &remaining_stripes); > - div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); > - last_stripe *= sub_stripes; > - } > - > - for (i = 0; i < num_stripes; i++) { > - bbio->stripes[i].physical = > - map->stripes[stripe_index].physical + > - stripe_offset + stripe_nr * map->stripe_len; > - bbio->stripes[i].dev = map->stripes[stripe_index].dev; > - > - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | > - BTRFS_BLOCK_GROUP_RAID10)) { > - bbio->stripes[i].length = stripes_per_dev * > - map->stripe_len; > - > - if (i / sub_stripes < remaining_stripes) > - bbio->stripes[i].length += > - map->stripe_len; > - > - /* > - * Special for the first stripe and > - * the last stripe: > - * > - * |-------|...|-------| > - * |----------| > - * off end_off > - */ > - if (i < sub_stripes) > - bbio->stripes[i].length -= > - stripe_offset; > - > - if (stripe_index >= last_stripe && > - stripe_index <= (last_stripe + > - sub_stripes - 1)) > - bbio->stripes[i].length -= > - stripe_end_offset; > - > - if (i == sub_stripes - 1) > - stripe_offset = 0; > - } else > - bbio->stripes[i].length = *length; > - > - stripe_index++; > - if (stripe_index == map->num_stripes) { > - /* This could only happen for RAID0/10 */ > - stripe_index = 0; > - stripe_nr++; > - } > - } > - } else { > - for (i = 0; i < num_stripes; i++) { > - bbio->stripes[i].physical = > - map->stripes[stripe_index].physical + > - stripe_offset + > - stripe_nr * map->stripe_len; > - bbio->stripes[i].dev = > - map->stripes[stripe_index].dev; > - stripe_index++; > - } > + for (i = 0; i < num_stripes; i++) { > + bbio->stripes[i].physical = > + map->stripes[stripe_index].physical + > + stripe_offset + > + stripe_nr * map->stripe_len; > + bbio->stripes[i].dev = > + map->stripes[stripe_index].dev; > + stripe_index++; > } > > if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) > @@ -5730,8 +5809,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, > sort_parity_stripes(bbio, num_stripes); > > tgtdev_indexes = 0; > - if (dev_replace_is_ongoing && > - (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) && > + if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE && > dev_replace->tgtdev != NULL) { > int index_where_to_add; > u64 srcdev_devid = dev_replace->srcdev->devid; > -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, Feb 20, 2017 at 11:54:31AM +0800, Qu Wenruo wrote: > > > At 02/18/2017 09:28 AM, Liu Bo wrote: > > Since DISCARD is not as important as an operation like write, we don't > > copy it to target device during replace, and it makes __btrfs_map_block > > less complex. > > Makes sense to me. > > > > > Signed-off-by: Liu Bo <bo.li.liu@oracle.com> > > --- > > fs/btrfs/volumes.c | 306 +++++++++++++++++++++++++++++++++-------------------- > > 1 file changed, 192 insertions(+), 114 deletions(-) > > > > diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c > > index c52b0fe..96228f3 100644 > > --- a/fs/btrfs/volumes.c > > +++ b/fs/btrfs/volumes.c > > @@ -5294,6 +5294,175 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) > > kfree(bbio); > > } > > > > +/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ > > +/* > > + * Please note that, discard won't be sent to target device of device > > + * replace. > > + */ > > +static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, > > + u64 logical, u64 length, > > + struct btrfs_bio **bbio_ret) > > +{ > > + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; > > + struct extent_map *em; > > + struct map_lookup *map; > > + struct btrfs_bio *bbio; > > + u64 offset; > > + u64 stripe_nr; > > + u64 stripe_nr_end; > > + u64 stripe_end_offset; > > + u64 stripe_cnt; > > + u64 stripe_len; > > + u64 stripe_offset; > > + u64 num_stripes; > > + u32 stripe_index; > > + u32 factor = 0; > > + u32 sub_stripes = 0; > > + u64 stripes_per_dev = 0; > > + u32 remaining_stripes = 0; > > + u32 last_stripe = 0; > > + int ret = 0; > > + int i; > > + > > + /* discard always return a bbio */ > > + ASSERT(bbio_ret); > > + > > + read_lock(&em_tree->lock); > > + em = lookup_extent_mapping(em_tree, logical, length); > > + read_unlock(&em_tree->lock); > > It seems that get_chunk_map() in previous patch can replace such searching > and error message. > Yeah, I forgot to update with it. > > + > > + if (!em) { > > + btrfs_crit(fs_info, "unable to find logical %llu len %llu", > > + logical, length); > > + return -EINVAL; > > + } > > + > > + if (em->start > logical || em->start + em->len < logical) { > > + btrfs_crit(fs_info, > > + "found a bad mapping, wanted %Lu, found %Lu-%Lu", > > + logical, em->start, em->start + em->len); > > + free_extent_map(em); > > + return -EINVAL; > > + } > > + > > + map = em->map_lookup; > > + /* we don't discard raid56 yet */ > > + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { > > + ret = -EOPNOTSUPP; > > + goto out; > > + } > > + > > + offset = logical - em->start; > > + length = min_t(u64, em->len - offset, length); > > + > > + stripe_len = map->stripe_len; > > + /* > > + * stripe_nr counts the total number of stripes we have to stride > > + * to get to this block > > + */ > > + stripe_nr = div64_u64(offset, stripe_len); > > + stripe_offset = stripe_nr * stripe_len; > > + ASSERT(offset >= stripe_offset); > > What about a DIV_ROUND_DOWN helper? > Surprisingly we only have DIR_ROUND_UP, not not DIV_ROUND_DOWN. > > And if we're only going to support 64K stripe len, then round_down() is good > for current usage. > > > + > > + /* stripe_offset is the offset of this block in its stripe */ > > + stripe_offset = offset - stripe_offset; > > This is a little confusing. > What about using another variable called @stripe_start instead of using the > same variable @stripe_offset to temporarily store stripe start bytenr. > > I prefer to do it in one run without resuing @stripe_offset variable to > avoid confusion. Right, I was trying to keep the check of (offset >= stripe_offset), but it's not necessary. > > > + > > + stripe_nr_end = ALIGN(offset + length, map->stripe_len); > > round_up() causes less confusion. > > And IIRC, ALIGN/round_up can only handle power of 2, this implies the > stripe_len must be power of 2, which is OK for now. > If using ALIGN here, we can also use round_down() in previous stripe_nr. > Good point. Thanks, -liubo -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index c52b0fe..96228f3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5294,6 +5294,175 @@ void btrfs_put_bbio(struct btrfs_bio *bbio) kfree(bbio); } +/* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */ +/* + * Please note that, discard won't be sent to target device of device + * replace. + */ +static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info, + u64 logical, u64 length, + struct btrfs_bio **bbio_ret) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + struct btrfs_bio *bbio; + u64 offset; + u64 stripe_nr; + u64 stripe_nr_end; + u64 stripe_end_offset; + u64 stripe_cnt; + u64 stripe_len; + u64 stripe_offset; + u64 num_stripes; + u32 stripe_index; + u32 factor = 0; + u32 sub_stripes = 0; + u64 stripes_per_dev = 0; + u32 remaining_stripes = 0; + u32 last_stripe = 0; + int ret = 0; + int i; + + /* discard always return a bbio */ + ASSERT(bbio_ret); + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, logical, length); + read_unlock(&em_tree->lock); + + if (!em) { + btrfs_crit(fs_info, "unable to find logical %llu len %llu", + logical, length); + return -EINVAL; + } + + if (em->start > logical || em->start + em->len < logical) { + btrfs_crit(fs_info, + "found a bad mapping, wanted %Lu, found %Lu-%Lu", + logical, em->start, em->start + em->len); + free_extent_map(em); + return -EINVAL; + } + + map = em->map_lookup; + /* we don't discard raid56 yet */ + if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + ret = -EOPNOTSUPP; + goto out; + } + + offset = logical - em->start; + length = min_t(u64, em->len - offset, length); + + stripe_len = map->stripe_len; + /* + * stripe_nr counts the total number of stripes we have to stride + * to get to this block + */ + stripe_nr = div64_u64(offset, stripe_len); + stripe_offset = stripe_nr * stripe_len; + ASSERT(offset >= stripe_offset); + + /* stripe_offset is the offset of this block in its stripe */ + stripe_offset = offset - stripe_offset; + + stripe_nr_end = ALIGN(offset + length, map->stripe_len); + stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); + stripe_cnt = stripe_nr_end - stripe_nr; + stripe_end_offset = stripe_nr_end * map->stripe_len - + (offset + length); + /* + * after this, stripe_nr is the number of stripes on this + * device we have to walk to find the data, and stripe_index is + * the number of our device in the stripe array + */ + num_stripes = 1; + stripe_index = 0; + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + if (map->type & BTRFS_BLOCK_GROUP_RAID0) + sub_stripes = 1; + else + sub_stripes = map->sub_stripes; + + factor = map->num_stripes / sub_stripes; + num_stripes = min_t(u64, map->num_stripes, + sub_stripes * stripe_cnt); + stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); + stripe_index *= sub_stripes; + stripes_per_dev = div_u64_rem(stripe_cnt, factor, + &remaining_stripes); + div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); + last_stripe *= sub_stripes; + } else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | + BTRFS_BLOCK_GROUP_DUP)) { + num_stripes = map->num_stripes; + } else { + stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, + &stripe_index); + } + + bbio = alloc_btrfs_bio(num_stripes, 0); + if (!bbio) { + ret = -ENOMEM; + goto out; + } + + for (i = 0; i < num_stripes; i++) { + bbio->stripes[i].physical = + map->stripes[stripe_index].physical + + stripe_offset + stripe_nr * map->stripe_len; + bbio->stripes[i].dev = map->stripes[stripe_index].dev; + + if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | + BTRFS_BLOCK_GROUP_RAID10)) { + bbio->stripes[i].length = stripes_per_dev * + map->stripe_len; + + if (i / sub_stripes < remaining_stripes) + bbio->stripes[i].length += + map->stripe_len; + + /* + * Special for the first stripe and + * the last stripe: + * + * |-------|...|-------| + * |----------| + * off end_off + */ + if (i < sub_stripes) + bbio->stripes[i].length -= + stripe_offset; + + if (stripe_index >= last_stripe && + stripe_index <= (last_stripe + + sub_stripes - 1)) + bbio->stripes[i].length -= + stripe_end_offset; + + if (i == sub_stripes - 1) + stripe_offset = 0; + } else { + bbio->stripes[i].length = length; + } + + stripe_index++; + if (stripe_index == map->num_stripes) { + stripe_index = 0; + stripe_nr++; + } + } + + *bbio_ret = bbio; + bbio->map_type = map->type; + bbio->num_stripes = num_stripes; +out: + free_extent_map(em); + return ret; +} + static int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 logical, u64 *length, @@ -5304,10 +5473,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, struct map_lookup *map; u64 offset; u64 stripe_offset; - u64 stripe_end_offset; u64 stripe_nr; - u64 stripe_nr_orig; - u64 stripe_nr_end; u64 stripe_len; u32 stripe_index; int i; @@ -5323,6 +5489,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, u64 physical_to_patch_in_first_stripe = 0; u64 raid56_full_stripe_start = (u64)-1; + if (op == BTRFS_MAP_DISCARD) + return __btrfs_map_block_for_discard(fs_info, logical, + *length, bbio_ret); + em = get_chunk_map(fs_info, logical, *length); if (IS_ERR(em)) return PTR_ERR(em); @@ -5364,14 +5534,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, raid56_full_stripe_start *= full_stripe_len; } - if (op == BTRFS_MAP_DISCARD) { - /* we don't discard raid56 yet */ - if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { - ret = -EOPNOTSUPP; - goto out; - } - *length = min_t(u64, em->len - offset, *length); - } else if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { + if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { u64 max_len; /* For writes to RAID[56], allow a full stripeset across all disks. For other RAID types and for RAID[56] reads, just allow a single @@ -5402,8 +5565,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, btrfs_dev_replace_set_lock_blocking(dev_replace); if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && - op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && - op != BTRFS_MAP_GET_READ_MIRRORS && dev_replace->tgtdev != NULL) { + op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS && + dev_replace->tgtdev != NULL) { /* * in dev-replace case, for repair case (that's the only * case where the mirror is selected explicitly when @@ -5483,24 +5646,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, num_stripes = 1; stripe_index = 0; - stripe_nr_orig = stripe_nr; - stripe_nr_end = ALIGN(offset + *length, map->stripe_len); - stripe_nr_end = div_u64(stripe_nr_end, map->stripe_len); - stripe_end_offset = stripe_nr_end * map->stripe_len - - (offset + *length); - if (map->type & BTRFS_BLOCK_GROUP_RAID0) { - if (op == BTRFS_MAP_DISCARD) - num_stripes = min_t(u64, map->num_stripes, - stripe_nr_end - stripe_nr_orig); stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); - if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && - op != BTRFS_MAP_GET_READ_MIRRORS) + if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS) mirror_num = 1; } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || - op == BTRFS_MAP_GET_READ_MIRRORS) + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) num_stripes = map->num_stripes; else if (mirror_num) stripe_index = mirror_num - 1; @@ -5513,8 +5665,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, } } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD || - op == BTRFS_MAP_GET_READ_MIRRORS) { + if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) { num_stripes = map->num_stripes; } else if (mirror_num) { stripe_index = mirror_num - 1; @@ -5530,10 +5681,6 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) num_stripes = map->sub_stripes; - else if (op == BTRFS_MAP_DISCARD) - num_stripes = min_t(u64, map->sub_stripes * - (stripe_nr_end - stripe_nr_orig), - map->num_stripes); else if (mirror_num) stripe_index += mirror_num - 1; else { @@ -5576,8 +5723,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, /* We distribute the parity blocks across stripes */ div_u64_rem(stripe_nr + stripe_index, map->num_stripes, &stripe_index); - if ((op != BTRFS_MAP_WRITE && op != BTRFS_MAP_DISCARD && - op != BTRFS_MAP_GET_READ_MIRRORS) && mirror_num <= 1) + if ((op != BTRFS_MAP_WRITE && + op != BTRFS_MAP_GET_READ_MIRRORS) && + mirror_num <= 1) mirror_num = 1; } } else { @@ -5600,7 +5748,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, num_alloc_stripes = num_stripes; if (dev_replace_is_ongoing) { - if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) + if (op == BTRFS_MAP_WRITE) num_alloc_stripes <<= 1; if (op == BTRFS_MAP_GET_READ_MIRRORS) num_alloc_stripes++; @@ -5643,84 +5791,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, RAID6_Q_STRIPE; } - if (op == BTRFS_MAP_DISCARD) { - u32 factor = 0; - u32 sub_stripes = 0; - u64 stripes_per_dev = 0; - u32 remaining_stripes = 0; - u32 last_stripe = 0; - if (map->type & - (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { - if (map->type & BTRFS_BLOCK_GROUP_RAID0) - sub_stripes = 1; - else - sub_stripes = map->sub_stripes; - - factor = map->num_stripes / sub_stripes; - stripes_per_dev = div_u64_rem(stripe_nr_end - - stripe_nr_orig, - factor, - &remaining_stripes); - div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); - last_stripe *= sub_stripes; - } - - for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = - map->stripes[stripe_index].physical + - stripe_offset + stripe_nr * map->stripe_len; - bbio->stripes[i].dev = map->stripes[stripe_index].dev; - - if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | - BTRFS_BLOCK_GROUP_RAID10)) { - bbio->stripes[i].length = stripes_per_dev * - map->stripe_len; - - if (i / sub_stripes < remaining_stripes) - bbio->stripes[i].length += - map->stripe_len; - - /* - * Special for the first stripe and - * the last stripe: - * - * |-------|...|-------| - * |----------| - * off end_off - */ - if (i < sub_stripes) - bbio->stripes[i].length -= - stripe_offset; - - if (stripe_index >= last_stripe && - stripe_index <= (last_stripe + - sub_stripes - 1)) - bbio->stripes[i].length -= - stripe_end_offset; - - if (i == sub_stripes - 1) - stripe_offset = 0; - } else - bbio->stripes[i].length = *length; - - stripe_index++; - if (stripe_index == map->num_stripes) { - /* This could only happen for RAID0/10 */ - stripe_index = 0; - stripe_nr++; - } - } - } else { - for (i = 0; i < num_stripes; i++) { - bbio->stripes[i].physical = - map->stripes[stripe_index].physical + - stripe_offset + - stripe_nr * map->stripe_len; - bbio->stripes[i].dev = - map->stripes[stripe_index].dev; - stripe_index++; - } + for (i = 0; i < num_stripes; i++) { + bbio->stripes[i].physical = + map->stripes[stripe_index].physical + + stripe_offset + + stripe_nr * map->stripe_len; + bbio->stripes[i].dev = + map->stripes[stripe_index].dev; + stripe_index++; } if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) @@ -5730,8 +5809,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, sort_parity_stripes(bbio, num_stripes); tgtdev_indexes = 0; - if (dev_replace_is_ongoing && - (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_DISCARD) && + if (dev_replace_is_ongoing && op == BTRFS_MAP_WRITE && dev_replace->tgtdev != NULL) { int index_where_to_add; u64 srcdev_devid = dev_replace->srcdev->devid;
Since DISCARD is not as important as an operation like write, we don't copy it to target device during replace, and it makes __btrfs_map_block less complex. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> --- fs/btrfs/volumes.c | 306 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 192 insertions(+), 114 deletions(-)