Message ID | 20231204070327.3150356-2-linan666@huaweicloud.com (mailing list archive) |
---|---|
State | Superseded, archived |
Delegated to: | Song Liu |
Headers | show |
Series | support read error check in raid1 | expand |
On Sun, Dec 3, 2023 at 11:04 PM <linan666@huaweicloud.com> wrote: > > From: Li Nan <linan122@huawei.com> > > Move check_decay_read_errors() to raid1-10.c and factor out a helper > exceed_read_errors() to check if read_errors exceeds the limit, so that > raid1 can also use it. There are no functional changes. > > Signed-off-by: Li Nan <linan122@huawei.com> [...] > +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev) > +{ > + int max_read_errors = atomic_read(&mddev->max_corr_read_errors); > + int read_errors; > + > + check_decay_read_errors(mddev, rdev); > + read_errors = atomic_inc_return(&rdev->read_errors); > + if (read_errors > max_read_errors) { > + pr_notice("md:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", > + mdname(mddev), rdev->bdev, read_errors, max_read_errors); > + pr_notice("md:%s: %pg: Failing raid device\n", > + mdname(mddev), rdev->bdev); This changed the print message from "md/raid10:" to "md:". We should try to avoid such changes. How about we do something like the following? Thanks, Song diff --git i/drivers/md/raid1-10.c w/drivers/md/raid1-10.c index 3f22edec70e7..6c0ef0fe6ba7 100644 --- i/drivers/md/raid1-10.c +++ w/drivers/md/raid1-10.c @@ -173,3 +173,10 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap) else md_bitmap_unplug(bitmap); } + +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev) +{ + pr_notice("md/" RAID_1_10_NAME ":%s: %pg: Raid device ...\n", + ...); + ... +} diff --git i/drivers/md/raid1.c w/drivers/md/raid1.c index 9348f1709512..412e98d02a05 100644 --- i/drivers/md/raid1.c +++ w/drivers/md/raid1.c @@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr); #define raid1_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) +#define RAID_1_10_NAME "raid1" #include "raid1-10.c" #define START(node) ((node)->start) diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c index 375c11d6159f..a1531b5f15e3 100644 --- i/drivers/md/raid10.c +++ w/drivers/md/raid10.c @@ -77,6 +77,8 @@ static void end_reshape(struct r10conf *conf); #define raid10_log(md, fmt, args...) \ do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) +#define RAID_1_10_NAME "raid10" + #include "raid1-10.c" #define NULL_CMD [...]
在 2023/12/9 2:38, Song Liu 写道: > On Sun, Dec 3, 2023 at 11:04 PM <linan666@huaweicloud.com> wrote: >> >> From: Li Nan <linan122@huawei.com> >> >> Move check_decay_read_errors() to raid1-10.c and factor out a helper >> exceed_read_errors() to check if read_errors exceeds the limit, so that >> raid1 can also use it. There are no functional changes. >> >> Signed-off-by: Li Nan <linan122@huawei.com> > [...] >> +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev) >> +{ >> + int max_read_errors = atomic_read(&mddev->max_corr_read_errors); >> + int read_errors; >> + >> + check_decay_read_errors(mddev, rdev); >> + read_errors = atomic_inc_return(&rdev->read_errors); >> + if (read_errors > max_read_errors) { >> + pr_notice("md:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", >> + mdname(mddev), rdev->bdev, read_errors, max_read_errors); >> + pr_notice("md:%s: %pg: Failing raid device\n", >> + mdname(mddev), rdev->bdev); > > This changed the print message from "md/raid10:" to "md:". We should > try to avoid > such changes. How about we do something like the following? > > Thanks, > Song > > diff --git i/drivers/md/raid1-10.c w/drivers/md/raid1-10.c > index 3f22edec70e7..6c0ef0fe6ba7 100644 > --- i/drivers/md/raid1-10.c > +++ w/drivers/md/raid1-10.c > @@ -173,3 +173,10 @@ static inline void > raid1_prepare_flush_writes(struct bitmap *bitmap) > else > md_bitmap_unplug(bitmap); > } > + > +static inline bool exceed_read_errors(struct mddev *mddev, struct > md_rdev *rdev) > +{ > + pr_notice("md/" RAID_1_10_NAME ":%s: %pg: Raid device ...\n", > + ...); > + ... > +} > diff --git i/drivers/md/raid1.c w/drivers/md/raid1.c > index 9348f1709512..412e98d02a05 100644 > --- i/drivers/md/raid1.c > +++ w/drivers/md/raid1.c > @@ -49,6 +49,7 @@ static void lower_barrier(struct r1conf *conf, > sector_t sector_nr); > #define raid1_log(md, fmt, args...) \ > do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " > fmt, ##args); } while (0) > > +#define RAID_1_10_NAME "raid1" > #include "raid1-10.c" > > #define START(node) ((node)->start) > diff --git i/drivers/md/raid10.c w/drivers/md/raid10.c > index 375c11d6159f..a1531b5f15e3 100644 > --- i/drivers/md/raid10.c > +++ w/drivers/md/raid10.c > @@ -77,6 +77,8 @@ static void end_reshape(struct r10conf *conf); > #define raid10_log(md, fmt, args...) \ > do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " > fmt, ##args); } while (0) > > +#define RAID_1_10_NAME "raid10" > + > #include "raid1-10.c" > > #define NULL_CMD > > [...] > > . I agree. Let me improve this in v2.
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c index 3f22edec70e7..9e17eab915f5 100644 --- a/drivers/md/raid1-10.c +++ b/drivers/md/raid1-10.c @@ -173,3 +173,57 @@ static inline void raid1_prepare_flush_writes(struct bitmap *bitmap) else md_bitmap_unplug(bitmap); } + +/* + * Used by fix_read_error() to decay the per rdev read_errors. + * We halve the read error count for every hour that has elapsed + * since the last recorded read error. + */ +static inline void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) +{ + long cur_time_mon; + unsigned long hours_since_last; + unsigned int read_errors = atomic_read(&rdev->read_errors); + + cur_time_mon = ktime_get_seconds(); + + if (rdev->last_read_error == 0) { + /* first time we've seen a read error */ + rdev->last_read_error = cur_time_mon; + return; + } + + hours_since_last = (long)(cur_time_mon - + rdev->last_read_error) / 3600; + + rdev->last_read_error = cur_time_mon; + + /* + * if hours_since_last is > the number of bits in read_errors + * just set read errors to 0. We do this to avoid + * overflowing the shift of read_errors by hours_since_last. + */ + if (hours_since_last >= 8 * sizeof(read_errors)) + atomic_set(&rdev->read_errors, 0); + else + atomic_set(&rdev->read_errors, read_errors >> hours_since_last); +} + +static inline bool exceed_read_errors(struct mddev *mddev, struct md_rdev *rdev) +{ + int max_read_errors = atomic_read(&mddev->max_corr_read_errors); + int read_errors; + + check_decay_read_errors(mddev, rdev); + read_errors = atomic_inc_return(&rdev->read_errors); + if (read_errors > max_read_errors) { + pr_notice("md:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", + mdname(mddev), rdev->bdev, read_errors, max_read_errors); + pr_notice("md:%s: %pg: Failing raid device\n", + mdname(mddev), rdev->bdev); + md_error(mddev, rdev); + return true; + } + + return false; +} diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 375c11d6159f..8ea4974fb91c 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2592,42 +2592,6 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio) } } -/* - * Used by fix_read_error() to decay the per rdev read_errors. - * We halve the read error count for every hour that has elapsed - * since the last recorded read error. - * - */ -static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev) -{ - long cur_time_mon; - unsigned long hours_since_last; - unsigned int read_errors = atomic_read(&rdev->read_errors); - - cur_time_mon = ktime_get_seconds(); - - if (rdev->last_read_error == 0) { - /* first time we've seen a read error */ - rdev->last_read_error = cur_time_mon; - return; - } - - hours_since_last = (long)(cur_time_mon - - rdev->last_read_error) / 3600; - - rdev->last_read_error = cur_time_mon; - - /* - * if hours_since_last is > the number of bits in read_errors - * just set read errors to 0. We do this to avoid - * overflowing the shift of read_errors by hours_since_last. - */ - if (hours_since_last >= 8 * sizeof(read_errors)) - atomic_set(&rdev->read_errors, 0); - else - atomic_set(&rdev->read_errors, read_errors >> hours_since_last); -} - static int r10_sync_page_io(struct md_rdev *rdev, sector_t sector, int sectors, struct page *page, enum req_op op) { @@ -2665,7 +2629,6 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 int sect = 0; /* Offset from r10_bio->sector */ int sectors = r10_bio->sectors, slot = r10_bio->read_slot; struct md_rdev *rdev; - int max_read_errors = atomic_read(&mddev->max_corr_read_errors); int d = r10_bio->devs[slot].devnum; /* still own a reference to this rdev, so it cannot @@ -2678,15 +2641,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 more fix_read_error() attempts */ return; - check_decay_read_errors(mddev, rdev); - atomic_inc(&rdev->read_errors); - if (atomic_read(&rdev->read_errors) > max_read_errors) { - pr_notice("md/raid10:%s: %pg: Raid device exceeded read_error threshold [cur %d:max %d]\n", - mdname(mddev), rdev->bdev, - atomic_read(&rdev->read_errors), max_read_errors); - pr_notice("md/raid10:%s: %pg: Failing raid device\n", - mdname(mddev), rdev->bdev); - md_error(mddev, rdev); + if (exceed_read_errors(mddev, rdev)) { r10_bio->devs[slot].bio = IO_BLOCKED; return; }