Message ID | 1343072962.20599.2.camel@f16 (mailing list archive) |
---|---|
State | Deferred, archived |
Headers | show |
On Mon, 23 Jul 2012 14:49:22 -0500 Jonathan Brassow <jbrassow@redhat.com> wrote: > Neil, > > Updated 'sectors_per_dev' calculation and integrated your other > suggestions. Mostly ... > > brassow > > dm raid: add md raid10 support > > Support the MD RAID10 personality through dm-raid.c > > Signed-off-by: Jonathan Brassow <jbrassow@redhat.com> > > Index: linux-upstream/drivers/md/dm-raid.c > =================================================================== > --- linux-upstream.orig/drivers/md/dm-raid.c > +++ linux-upstream/drivers/md/dm-raid.c > @@ -11,6 +11,7 @@ > #include "md.h" > #include "raid1.h" > #include "raid5.h" > +#include "raid10.h" > #include "bitmap.h" > > #include <linux/device-mapper.h> > @@ -52,7 +53,10 @@ struct raid_dev { > #define DMPF_MAX_RECOVERY_RATE 0x20 > #define DMPF_MAX_WRITE_BEHIND 0x40 > #define DMPF_STRIPE_CACHE 0x80 > -#define DMPF_REGION_SIZE 0X100 > +#define DMPF_REGION_SIZE 0x100 > +#define DMPF_RAID10_COPIES 0x200 > +#define DMPF_RAID10_FORMAT 0x400 > + > struct raid_set { > struct dm_target *ti; > > @@ -76,6 +80,7 @@ static struct raid_type { > const unsigned algorithm; /* RAID algorithm. */ > } raid_types[] = { > {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, > + {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, > {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, > {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, > {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, > @@ -86,6 +91,17 @@ static struct raid_type { > {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} > }; > > +static unsigned raid10_md_layout_to_copies(int layout) > +{ > + return layout & 0xFF; > +} > + > +static int raid10_format_to_md_layout(char *format, unsigned copies) > +{ > + /* 1 "far" copy, and 'copies' "near" copies */ > + return (1 << 8) | (copies & 0xFF); > +} > + > static struct raid_type *get_raid_type(char *name) > { > int i; > @@ -339,10 +355,16 @@ static int validate_region_size(struct r > * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) > * [stripe_cache <sectors>] Stripe cache size for higher RAIDs > * [region_size <sectors>] Defines granularity of bitmap > + * > + * RAID10-only options: > + * [raid10_copies <# copies>] Number of copies. (Default: 2) > + * [raid10_format <near>] Layout algorithm. (Default: near) > */ > static int parse_raid_params(struct raid_set *rs, char **argv, > unsigned num_raid_params) > { > + char *raid10_format = "near"; > + unsigned raid10_copies = 2; > unsigned i, rebuild_cnt = 0; > unsigned long value, region_size = 0; > sector_t sectors_per_dev = rs->ti->len; > @@ -416,11 +438,28 @@ static int parse_raid_params(struct raid > } > > key = argv[i++]; > + > + /* Parameters that take a string value are checked here. */ > + if (!strcasecmp(key, "raid10_format")) { > + if (rs->raid_type->level != 10) { > + rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; > + return -EINVAL; > + } > + if (strcmp("near", argv[i])) { > + rs->ti->error = "Invalid 'raid10_format' value given"; > + return -EINVAL; > + } > + raid10_format = argv[i]; > + rs->print_flags |= DMPF_RAID10_FORMAT; > + continue; > + } > + > if (strict_strtoul(argv[i], 10, &value) < 0) { > rs->ti->error = "Bad numerical argument given in raid params"; > return -EINVAL; > } > > + /* Parameters that take a numeric value are checked here */ > if (!strcasecmp(key, "rebuild")) { > rebuild_cnt++; > rs->ti->error = NULL; > @@ -436,6 +475,7 @@ static int parse_raid_params(struct raid > if (rebuild_cnt > rs->raid_type->parity_devs) > rs->ti->error = "Too many rebuild devices specified for given RAID type"; > break; > + case 10: > default: > DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); > rs->ti->error = "Rebuild not supported for this RAID type"; This hunk doesn't apply for me, or against 3.5. Is there some patch I'm missing. I do vaguely recall you changing this to a switch statement I think, but I still have an if statement here. If I'm missing a patch - could you resend it please? >> @@ -536,8 +585,30 @@ static int parse_raid_params(struct raid > if (dm_set_target_max_io_len(rs->ti, max_io_len)) > return -EINVAL; > > - if ((rs->raid_type->level > 1) && > - sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { > + if (rs->raid_type->level == 10) { > + if (raid10_copies > rs->md.raid_disks) { > + rs->ti->error = "Not enough devices to satisfy specification"; > + return -EINVAL; > + } > + > + /* (Len * #mirrors) / #devices */ > + sectors_per_dev = rs->ti->len * raid10_copies; > + if (sector_div(sectors_per_dev, rs->md.raid_disks)) { > + rs->ti->error = "Target length not evenly divisible by number of stripes"; > + return -EINVAL; > + } This test is still completely pointless, and putting an extra test for chunk alignment after it doesn't make it any less pointless. And putting an important division inside the condition of an if(), hides it a bit more than I like. But it probably isn't worth arguing about it any more so once I can get a patch to apply I'll take it. Thanks, NeilBrown -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Jul 23, 2012, at 5:26 PM, NeilBrown wrote: > On Mon, 23 Jul 2012 14:49:22 -0500 Jonathan Brassow <jbrassow@redhat.com> </snip> > >> >> br@@ -436,6 +475,7 @@ static int parse_raid_params(struct raid >> if (rebuild_cnt > rs->raid_type->parity_devs) >> rs->ti->error = "Too many rebuild devices specified for given RAID type"; >> break; >> + case 10: >> default: >> DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); >> rs->ti->error = "Rebuild not supported for this RAID type"; > > This hunk doesn't apply for me, or against 3.5. Is there some patch I'm > missing. > I do vaguely recall you changing this to a switch statement I think, but I > still have an if statement here. > If I'm missing a patch - could you resend it please? My fault. I sent these two patches to dm-devel and failed to CC linux-raid or you. https://www.redhat.com/archives/dm-devel/2012-July/msg00041.html https://www.redhat.com/archives/dm-devel/2012-July/msg00042.html Agk hasn't pushed these two yet and we should probably wait for that to happen. The above two patches are necessary for this patch, but are dependent upon other patches that agk has staged at the moment. The timing is not working out well, and we'll have to wait. >>> @@ -536,8 +585,30 @@ static int parse_raid_params(struct raid >> if (dm_set_target_max_io_len(rs->ti, max_io_len)) >> return -EINVAL; >> >> - if ((rs->raid_type->level > 1) && >> - sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { >> + if (rs->raid_type->level == 10) { >> + if (raid10_copies > rs->md.raid_disks) { >> + rs->ti->error = "Not enough devices to satisfy specification"; >> + return -EINVAL; >> + } >> + >> + /* (Len * #mirrors) / #devices */ >> + sectors_per_dev = rs->ti->len * raid10_copies; >> + if (sector_div(sectors_per_dev, rs->md.raid_disks)) { >> + rs->ti->error = "Target length not evenly divisible by number of stripes"; >> + return -EINVAL; >> + } > > This test is still completely pointless, and putting an extra test for chunk > alignment after it doesn't make it any less pointless. > And putting an important division inside the condition of an if(), hides it a > bit more than I like. > But it probably isn't worth arguing about it any more so once I can get a > patch to apply I'll take it. I'll pull the division out of the conditional so that it's a little more visible. Once agk has pushed the aforementioned patches, I'll repost this patch with that change as 'v5'. I don't want to belabor the issue - especially since you are kind enough to be accommodating, but I don't know how I can get by without calculating and setting 'mddev->dev_sectors'. MD can't get that information from anywhere else (when setting up RAID through DM). The main point is to compute sectors_per_dev, but secondarily I am checking for other conditions - like not aligning on chunk boundaries or not being evenly divisible. Failure to set sectors_per_dev - or set it correctly - results in an ill-sized array (mddev->array_sectors). The per device value is not passed in via DM table either - it must be computed. So, I don't understand why it is pointless. brassow -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
On Mon, 23 Jul 2012 20:18:03 -0500 Brassow Jonathan <jbrassow@redhat.com> wrote: > > On Jul 23, 2012, at 5:26 PM, NeilBrown wrote: > > > On Mon, 23 Jul 2012 14:49:22 -0500 Jonathan Brassow <jbrassow@redhat.com> > > </snip> > > > > >> > >> br@@ -436,6 +475,7 @@ static int parse_raid_params(struct raid > >> if (rebuild_cnt > rs->raid_type->parity_devs) > >> rs->ti->error = "Too many rebuild devices specified for given RAID type"; > >> break; > >> + case 10: > >> default: > >> DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); > >> rs->ti->error = "Rebuild not supported for this RAID type"; > > > > This hunk doesn't apply for me, or against 3.5. Is there some patch I'm > > missing. > > I do vaguely recall you changing this to a switch statement I think, but I > > still have an if statement here. > > If I'm missing a patch - could you resend it please? > > My fault. I sent these two patches to dm-devel and failed to CC linux-raid or you. > > https://www.redhat.com/archives/dm-devel/2012-July/msg00041.html > https://www.redhat.com/archives/dm-devel/2012-July/msg00042.html Maybe that is where I saw it before - I thought I had. > > Agk hasn't pushed these two yet and we should probably wait for that to happen. The above two patches are necessary for this patch, but are dependent upon other patches that agk has staged at the moment. The timing is not working out well, and we'll have to wait. I can do "waiting" (as long as you can help with the waking up when the time comes). > > >>> @@ -536,8 +585,30 @@ static int parse_raid_params(struct raid > >> if (dm_set_target_max_io_len(rs->ti, max_io_len)) > >> return -EINVAL; > >> > >> - if ((rs->raid_type->level > 1) && > >> - sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { > >> + if (rs->raid_type->level == 10) { > >> + if (raid10_copies > rs->md.raid_disks) { > >> + rs->ti->error = "Not enough devices to satisfy specification"; > >> + return -EINVAL; > >> + } > >> + > >> + /* (Len * #mirrors) / #devices */ > >> + sectors_per_dev = rs->ti->len * raid10_copies; > >> + if (sector_div(sectors_per_dev, rs->md.raid_disks)) { > >> + rs->ti->error = "Target length not evenly divisible by number of stripes"; > >> + return -EINVAL; > >> + } > > > > This test is still completely pointless, and putting an extra test for chunk > > alignment after it doesn't make it any less pointless. > > And putting an important division inside the condition of an if(), hides it a > > bit more than I like. > > But it probably isn't worth arguing about it any more so once I can get a > > patch to apply I'll take it. > > I'll pull the division out of the conditional so that it's a little more visible. Once agk has pushed the aforementioned patches, I'll repost this patch with that change as 'v5'. > > I don't want to belabor the issue - especially since you are kind enough to be accommodating, but I don't know how I can get by without calculating and setting 'mddev->dev_sectors'. MD can't get that information from anywhere else (when setting up RAID through DM). The main point is to compute sectors_per_dev, but secondarily I am checking for other conditions - like not aligning on chunk boundaries or not being evenly divisible. Failure to set sectors_per_dev - or set it correctly - results in an ill-sized array (mddev->array_sectors). The per device value is not passed in via DM table either - it must be computed. So, I don't understand why it is pointless. The division is certainly needed. Getting the correct "sectors_per_dev" is obviously required. It is the testing of the remainder that is pointless. A non-zero remainder is not necessarily bad, a zero remainder is not a guarantee that everything is good. The test that ti->len == rs->md.array_sectors (which you included, thanks) is the important test and it makes all the other tests on sizes redundant. Thanks, NeilBrown -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel
Index: linux-upstream/drivers/md/dm-raid.c =================================================================== --- linux-upstream.orig/drivers/md/dm-raid.c +++ linux-upstream/drivers/md/dm-raid.c @@ -11,6 +11,7 @@ #include "md.h" #include "raid1.h" #include "raid5.h" +#include "raid10.h" #include "bitmap.h" #include <linux/device-mapper.h> @@ -52,7 +53,10 @@ struct raid_dev { #define DMPF_MAX_RECOVERY_RATE 0x20 #define DMPF_MAX_WRITE_BEHIND 0x40 #define DMPF_STRIPE_CACHE 0x80 -#define DMPF_REGION_SIZE 0X100 +#define DMPF_REGION_SIZE 0x100 +#define DMPF_RAID10_COPIES 0x200 +#define DMPF_RAID10_FORMAT 0x400 + struct raid_set { struct dm_target *ti; @@ -76,6 +80,7 @@ static struct raid_type { const unsigned algorithm; /* RAID algorithm. */ } raid_types[] = { {"raid1", "RAID1 (mirroring)", 0, 2, 1, 0 /* NONE */}, + {"raid10", "RAID10 (striped mirrors)", 0, 2, 10, UINT_MAX /* Varies */}, {"raid4", "RAID4 (dedicated parity disk)", 1, 2, 5, ALGORITHM_PARITY_0}, {"raid5_la", "RAID5 (left asymmetric)", 1, 2, 5, ALGORITHM_LEFT_ASYMMETRIC}, {"raid5_ra", "RAID5 (right asymmetric)", 1, 2, 5, ALGORITHM_RIGHT_ASYMMETRIC}, @@ -86,6 +91,17 @@ static struct raid_type { {"raid6_nc", "RAID6 (N continue)", 2, 4, 6, ALGORITHM_ROTATING_N_CONTINUE} }; +static unsigned raid10_md_layout_to_copies(int layout) +{ + return layout & 0xFF; +} + +static int raid10_format_to_md_layout(char *format, unsigned copies) +{ + /* 1 "far" copy, and 'copies' "near" copies */ + return (1 << 8) | (copies & 0xFF); +} + static struct raid_type *get_raid_type(char *name) { int i; @@ -339,10 +355,16 @@ static int validate_region_size(struct r * [max_write_behind <sectors>] See '-write-behind=' (man mdadm) * [stripe_cache <sectors>] Stripe cache size for higher RAIDs * [region_size <sectors>] Defines granularity of bitmap + * + * RAID10-only options: + * [raid10_copies <# copies>] Number of copies. (Default: 2) + * [raid10_format <near>] Layout algorithm. (Default: near) */ static int parse_raid_params(struct raid_set *rs, char **argv, unsigned num_raid_params) { + char *raid10_format = "near"; + unsigned raid10_copies = 2; unsigned i, rebuild_cnt = 0; unsigned long value, region_size = 0; sector_t sectors_per_dev = rs->ti->len; @@ -416,11 +438,28 @@ static int parse_raid_params(struct raid } key = argv[i++]; + + /* Parameters that take a string value are checked here. */ + if (!strcasecmp(key, "raid10_format")) { + if (rs->raid_type->level != 10) { + rs->ti->error = "'raid10_format' is an invalid parameter for this RAID type"; + return -EINVAL; + } + if (strcmp("near", argv[i])) { + rs->ti->error = "Invalid 'raid10_format' value given"; + return -EINVAL; + } + raid10_format = argv[i]; + rs->print_flags |= DMPF_RAID10_FORMAT; + continue; + } + if (strict_strtoul(argv[i], 10, &value) < 0) { rs->ti->error = "Bad numerical argument given in raid params"; return -EINVAL; } + /* Parameters that take a numeric value are checked here */ if (!strcasecmp(key, "rebuild")) { rebuild_cnt++; rs->ti->error = NULL; @@ -436,6 +475,7 @@ static int parse_raid_params(struct raid if (rebuild_cnt > rs->raid_type->parity_devs) rs->ti->error = "Too many rebuild devices specified for given RAID type"; break; + case 10: default: DMERR("The rebuild parameter is not supported for %s", rs->raid_type->name); rs->ti->error = "Rebuild not supported for this RAID type"; @@ -493,7 +533,8 @@ static int parse_raid_params(struct raid */ value /= 2; - if (rs->raid_type->level < 5) { + if ((rs->raid_type->level != 5) && + (rs->raid_type->level != 6)) { rs->ti->error = "Inappropriate argument: stripe_cache"; return -EINVAL; } @@ -518,6 +559,14 @@ static int parse_raid_params(struct raid } else if (!strcasecmp(key, "region_size")) { rs->print_flags |= DMPF_REGION_SIZE; region_size = value; + } else if (!strcasecmp(key, "raid10_copies") && + (rs->raid_type->level == 10)) { + if ((value < 2) || (value > 0xFF)) { + rs->ti->error = "Bad value for 'raid10_copies'"; + return -EINVAL; + } + rs->print_flags |= DMPF_RAID10_COPIES; + raid10_copies = value; } else { DMERR("Unable to parse RAID parameter: %s", key); rs->ti->error = "Unable to parse RAID parameters"; @@ -536,8 +585,30 @@ static int parse_raid_params(struct raid if (dm_set_target_max_io_len(rs->ti, max_io_len)) return -EINVAL; - if ((rs->raid_type->level > 1) && - sector_div(sectors_per_dev, (rs->md.raid_disks - rs->raid_type->parity_devs))) { + if (rs->raid_type->level == 10) { + if (raid10_copies > rs->md.raid_disks) { + rs->ti->error = "Not enough devices to satisfy specification"; + return -EINVAL; + } + + /* (Len * #mirrors) / #devices */ + sectors_per_dev = rs->ti->len * raid10_copies; + if (sector_div(sectors_per_dev, rs->md.raid_disks)) { + rs->ti->error = "Target length not evenly divisible by number of stripes"; + return -EINVAL; + } + + if (sectors_per_dev & (rs->md.chunk_sectors - 1)) { + rs->ti->error = "Device size not aligned on chunk boundary"; + return -EINVAL; + } + + rs->md.layout = raid10_format_to_md_layout(raid10_format, + raid10_copies); + rs->md.new_layout = rs->md.layout; + } else if ((rs->raid_type->level > 1) && + sector_div(sectors_per_dev, + (rs->md.raid_disks - rs->raid_type->parity_devs))) { rs->ti->error = "Target length not divisible by number of data devices"; return -EINVAL; } @@ -564,6 +635,9 @@ static int raid_is_congested(struct dm_t if (rs->raid_type->level == 1) return md_raid1_congested(&rs->md, bits); + if (rs->raid_type->level == 10) + return md_raid10_congested(&rs->md, bits); + return md_raid5_congested(&rs->md, bits); } @@ -882,6 +956,9 @@ static int analyse_superblocks(struct dm case 6: redundancy = rs->raid_type->parity_devs; break; + case 10: + redundancy = raid10_md_layout_to_copies(mddev->layout) - 1; + break; default: ti->error = "Unknown RAID type"; return -EINVAL; @@ -1047,12 +1124,21 @@ static int raid_ctr(struct dm_target *ti goto bad; } + if (ti->len != rs->md.array_sectors) { + ti->error = "Array size does not match requested target length"; + goto size_mismatch; + } rs->callbacks.congested_fn = raid_is_congested; dm_table_add_target_callbacks(ti->table, &rs->callbacks); mddev_suspend(&rs->md); return 0; +size_mismatch: + DMERR("Array size (%llu) does not match requested target length (%llu)", + rs->md.array_sectors, ti->len); + mddev_suspend(&rs->md); + md_stop(&rs->md); bad: context_free(rs); @@ -1201,6 +1287,13 @@ static int raid_status(struct dm_target DMEMIT(" region_size %lu", rs->md.bitmap_info.chunksize >> 9); + if (rs->print_flags & DMPF_RAID10_COPIES) + DMEMIT(" raid10_copies %u", + raid10_md_layout_to_copies(rs->md.layout)); + + if (rs->print_flags & DMPF_RAID10_FORMAT) + DMEMIT(" raid10_format near"); + DMEMIT(" %d", rs->md.raid_disks); for (i = 0; i < rs->md.raid_disks; i++) { if (rs->dev[i].meta_dev) @@ -1275,7 +1368,7 @@ static void raid_resume(struct dm_target static struct target_type raid_target = { .name = "raid", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, @@ -1302,6 +1395,8 @@ module_init(dm_raid_init); module_exit(dm_raid_exit); MODULE_DESCRIPTION(DM_NAME " raid4/5/6 target"); +MODULE_ALIAS("dm-raid1"); +MODULE_ALIAS("dm-raid10"); MODULE_ALIAS("dm-raid4"); MODULE_ALIAS("dm-raid5"); MODULE_ALIAS("dm-raid6"); Index: linux-upstream/Documentation/device-mapper/dm-raid.txt =================================================================== --- linux-upstream.orig/Documentation/device-mapper/dm-raid.txt +++ linux-upstream/Documentation/device-mapper/dm-raid.txt @@ -27,6 +27,10 @@ The target is named "raid" and it accept - rotating parity N (right-to-left) with data restart raid6_nc RAID6 N continue - rotating parity N (right-to-left) with data continuation + raid10 Various RAID10 inspired algorithms chosen by additional params + - RAID10: Striped Mirrors (aka 'Striping on top of mirrors') + - RAID1E: Integrated Adjacent Stripe Mirroring + - and other similar RAID10 variants Reference: Chapter 4 of http://www.snia.org/sites/default/files/SNIA_DDF_Technical_Position_v2.0.pdf @@ -59,6 +63,28 @@ The target is named "raid" and it accept logical size of the array. The bitmap records the device synchronisation state for each region. + [raid10_copies <# copies>] + [raid10_format near] + These two options are used to alter the default layout of + a RAID10 configuration. The number of copies is can be + specified, but the default is 2. There are other variations + to how the copies are laid down - the default and only current + option is "near". Near copies are what most people think of + with respect to mirroring. If these options are left + unspecified, or 'raid10_copies 2' and/or 'raid10_format near' + are given, then the layouts for 2, 3 and 4 devices are: + 2 drives 3 drives 4 drives + -------- ---------- -------------- + A1 A1 A1 A1 A2 A1 A1 A2 A2 + A2 A2 A2 A3 A3 A3 A3 A4 A4 + A3 A3 A4 A4 A5 A5 A5 A6 A6 + A4 A4 A5 A6 A6 A7 A7 A8 A8 + .. .. .. .. .. .. .. .. .. + The 2-device layout is equivalent 2-way RAID1. The 4-device + layout is what a traditional RAID10 would look like. The + 3-device layout is what might be called a 'RAID1E - Integrated + Adjacent Stripe Mirroring'. + <#raid_devs>: The number of devices composing the array. Each device consists of two entries. The first is the device containing the metadata (if any); the second is the one containing the
Neil, Updated 'sectors_per_dev' calculation and integrated your other suggestions. brassow dm raid: add md raid10 support Support the MD RAID10 personality through dm-raid.c Signed-off-by: Jonathan Brassow <jbrassow@redhat.com> -- dm-devel mailing list dm-devel@redhat.com https://www.redhat.com/mailman/listinfo/dm-devel