@@ -452,6 +452,225 @@ static inline int is_io_in_chunk_boundary(struct mddev *mddev,
}
}
+
+struct bio_record {
+ sector_t bi_sector;
+ unsigned long sectors;
+ struct md_rdev *rdev;
+};
+
+static void handle_discard_request(struct mddev *mddev, struct bio *bio)
+{
+ struct bio_record *recs = NULL;
+ struct bio *split;
+ struct r0conf *conf = mddev->private;
+ sector_t sectors, sector;
+ struct strip_zone *first_zone;
+ int zone_idx;
+ sector_t zone_start, zone_end;
+ int nr_strip_zones = conf->nr_strip_zones;
+ int disks;
+ int first_rdev_idx = -1, rdev_idx;
+ struct md_rdev *first_rdev;
+ unsigned int chunk_sects = mddev->chunk_sectors;
+
+ sector = bio->bi_iter.bi_sector;
+ first_zone = find_zone(conf, §or);
+ first_rdev = map_sector(mddev, first_zone, sector, §or);
+
+ sectors = chunk_sects -
+ (likely(is_power_of_2(chunk_sects))
+ ? (sector & (chunk_sects - 1))
+ : sector_div(sector, chunk_sects));
+
+ /* if bio size is not exceed a chunk boundary,
+ * simply handle it here.
+ */
+ if (sectors >= bio_sectors(bio)) {
+ bio->bi_bdev = first_rdev->bdev;
+ bio->bi_iter.bi_sector = sector + first_zone->dev_start +
+ first_rdev->data_offset;
+ if (unlikely(!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ /* Just ignore it */
+ bio_endio(bio);
+ else
+ generic_make_request(bio);
+ return;
+ }
+
+ /* bio is large enough to be split, allocate recs firstly */
+ disks = mddev->raid_disks;
+ recs = kcalloc(disks, sizeof(struct bio_record), GFP_NOIO);
+ if (recs == NULL) {
+ printk(KERN_ERR "md/raid0:%s: failed to allocate memory " \
+ "for bio_record", mdname(mddev));
+ bio->bi_error = -ENOMEM;
+ bio_endio(bio);
+ return;
+ }
+
+ zone_idx = first_zone - conf->strip_zone;
+ for (rdev_idx = 0; rdev_idx < first_zone->nb_dev; rdev_idx++) {
+ struct md_rdev *rdev;
+
+ rdev = conf->devlist[zone_idx * disks + rdev_idx];
+ recs[rdev_idx].rdev = rdev;
+ if (rdev == first_rdev)
+ first_rdev_idx = rdev_idx;
+ }
+
+ /* Restore due to sector_div */
+ sector = bio->bi_iter.bi_sector;
+ recs[first_rdev_idx].bi_sector = sector + first_zone->dev_start;
+ recs[first_rdev_idx].sectors = sectors;
+ BUG_ON(recs[first_rdev_idx].rdev != first_rdev);
+
+ /* recs[first_rdev_idx] is initialized with 'sectors', we need to
+ * handle the rested sectors, which is sotred in 'sectors' too.
+ */
+ sectors = bio_sectors(bio) - sectors;
+
+ /* bio may not be chunk size aligned, the split bio on first rdev
+ * may not be chunk size aligned too. But the rested split bios
+ * on rested rdevs must be chunk size aligned, and aligned to
+ * round down chunk number.
+ */
+ zone_end = first_zone->zone_end;
+ rdev_idx = first_rdev_idx + 1;
+ sector = likely(is_power_of_2(chunk_sects))
+ ? sector & (~(chunk_sects - 1))
+ : chunk_sects * (sector/chunk_sects);
+
+ while (rdev_idx < first_zone->nb_dev) {
+ recs[rdev_idx].bi_sector = sector + first_zone->dev_start;
+ if (sectors <= chunk_sects) {
+ recs[rdev_idx].sectors = sectors;
+ goto issue;
+ }
+ recs[rdev_idx].sectors = chunk_sects;
+ sectors -= chunk_sects;
+ rdev_idx++;
+ }
+
+ sector += chunk_sects;
+ zone_start = sector + first_zone->dev_start;
+ if (zone_start == zone_end) {
+ zone_idx++;
+ zone_start = conf->strip_zone[zone_idx].dev_start;
+ }
+
+ while (zone_idx < nr_strip_zones) {
+ int rdevs_in_zone = conf->strip_zone[zone_idx].nb_dev;
+ int chunks_per_rdev, rested_chunks, rested_sectors;
+ sector_t zone_sectors, grow_sectors;
+ int add_rested_sectors = 0;
+
+ zone_end = conf->strip_zone[zone_idx].zone_end;
+ zone_sectors = zone_end - zone_start;
+ chunks_per_rdev = sectors;
+ rested_sectors =
+ sector_div(chunks_per_rdev, chunk_sects * rdevs_in_zone);
+ rested_chunks = rested_sectors;
+ rested_sectors = sector_div(rested_chunks, chunk_sects);
+
+ if ((chunks_per_rdev * chunk_sects) > zone_sectors)
+ chunks_per_rdev = zone_sectors/chunk_sects;
+
+ /* rested_chunks and rested_sectors go into next zone, we won't
+ * handle them in this zone. Set them to 0.
+ */
+ if ((chunks_per_rdev * chunk_sects) == zone_sectors &&
+ (rested_chunks != 0 || rested_sectors != 0)) {
+ if (rested_chunks != 0)
+ rested_chunks = 0;
+ if (rested_sectors != 0)
+ rested_sectors = 0;
+ }
+
+ if (rested_chunks == 0 && rested_sectors != 0)
+ add_rested_sectors = 1;
+
+ for (rdev_idx = 0; rdev_idx < rdevs_in_zone; rdev_idx++) {
+ /* if .sectors is not initailized (== 0), it indicates
+ * .bi_sector is not initialized neither. We initiate
+ * .bi_sector firstly, then set .sectors by
+ * grow_sectors.
+ */
+ if (recs[rdev_idx].sectors == 0)
+ recs[rdev_idx].bi_sector = zone_start;
+ grow_sectors = chunks_per_rdev * chunk_sects;
+ if (rested_chunks) {
+ grow_sectors += chunk_sects;
+ rested_chunks--;
+ if (rested_chunks == 0 &&
+ rested_sectors != 0) {
+ recs[rdev_idx].sectors += grow_sectors;
+ sectors -= grow_sectors;
+ add_rested_sectors = 1;
+ continue;
+ }
+ }
+
+ /* if add_rested_sectors != 0, it indicates
+ * rested_sectors != 0
+ */
+ if (add_rested_sectors)
+ grow_sectors += rested_sectors;
+ recs[rdev_idx].sectors += grow_sectors;
+ sectors -= grow_sectors;
+ if (add_rested_sectors)
+ break;
+ }
+
+ if (sectors == 0)
+ break;
+ zone_start = zone_end;
+ zone_idx++;
+ BUG_ON(zone_start != conf->strip_zone[zone_idx].dev_start);
+ }
+
+
+issue:
+ /* recs contains the re-ordered requests layout, now we can
+ * chain split bios from recs
+ */
+ for (rdev_idx = 0; rdev_idx < disks; rdev_idx++) {
+ if (rdev_idx == first_rdev_idx ||
+ recs[rdev_idx].sectors == 0)
+ continue;
+ split = bio_split(bio,
+ recs[rdev_idx].sectors,
+ GFP_NOIO,
+ fs_bio_set);
+ if (split == NULL)
+ break;
+ bio_chain(split, bio);
+ BUG_ON(split->bi_iter.bi_size != recs[rdev_idx].sectors << 9);
+ split->bi_bdev = recs[rdev_idx].rdev->bdev;
+ split->bi_iter.bi_sector = recs[rdev_idx].bi_sector +
+ recs[rdev_idx].rdev->data_offset;
+
+ if (unlikely(!blk_queue_discard(
+ bdev_get_queue(split->bi_bdev))))
+ /* Just ignore it */
+ bio_endio(split);
+ else
+ generic_make_request(split);
+ }
+ BUG_ON(bio->bi_iter.bi_size != recs[first_rdev_idx].sectors << 9);
+ bio->bi_iter.bi_sector = recs[first_rdev_idx].bi_sector +
+ recs[first_rdev_idx].rdev->data_offset;
+ bio->bi_bdev = recs[first_rdev_idx].rdev->bdev;
+
+ if (unlikely(!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
+ /* Just ignore it */
+ bio_endio(bio);
+ else
+ generic_make_request(bio);
+
+ kfree(recs);
+}
+
static void raid0_make_request(struct mddev *mddev, struct bio *bio)
{
struct strip_zone *zone;
@@ -463,6 +682,11 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
return;
}
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD)) {
+ handle_discard_request(mddev, bio);
+ return;
+ }
+
do {
sector_t sector = bio->bi_iter.bi_sector;
unsigned chunk_sects = mddev->chunk_sectors;