@@ -367,6 +367,7 @@ struct cached_dev {
unsigned sequential_cutoff;
unsigned readahead;
+ unsigned io_disable:1;
unsigned verify:1;
unsigned bypass_torture_test:1;
@@ -388,6 +389,9 @@ struct cached_dev {
unsigned writeback_rate_minimum;
enum stop_on_failure stop_when_cache_set_failed;
+#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
+ atomic_t io_errors;
+ unsigned error_limit;
};
enum alloc_reserve {
@@ -911,6 +915,7 @@ static inline void wait_for_kthread_stop(void)
/* Forward declarations */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
blk_status_t, const char *);
@@ -938,6 +943,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
struct bkey *, int, bool);
bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
unsigned, unsigned, bool);
+bool bch_cached_dev_error(struct cached_dev *dc);
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *, const char *, ...);
@@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
}
/* IO errors */
+void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
+{
+ char buf[BDEVNAME_SIZE];
+ unsigned errors;
+
+ WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
+
+ errors = atomic_add_return(1, &dc->io_errors);
+ if (errors < dc->error_limit)
+ pr_err("%s: IO error on backing device, unrecoverable",
+ bio_devname(bio, buf));
+ else
+ bch_cached_dev_error(dc);
+}
void bch_count_io_errors(struct cache *ca,
blk_status_t error,
@@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
if (bio->bi_status) {
struct search *s = container_of(cl, struct search, cl);
+ struct cached_dev *dc = container_of(s->d,
+ struct cached_dev, disk);
/*
* If a bio has REQ_PREFLUSH for writeback mode, it is
* speically assembled in cached_dev_write() for a non-zero
@@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
}
s->recoverable = false;
/* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
}
bio_put(bio);
@@ -1065,8 +1068,14 @@ static void detached_dev_end_io(struct bio *bio)
bio_data_dir(bio),
&ddip->d->disk->part0, ddip->start_time);
+ if (bio->bi_status) {
+ struct cached_dev *dc = container_of(ddip->d,
+ struct cached_dev, disk);
+ /* should count I/O error for backing device here */
+ bch_count_backing_io_errors(dc, bio);
+ }
+
kfree(ddip);
-
bio->bi_end_io(bio);
}
@@ -1105,7 +1114,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
struct cached_dev *dc = container_of(d, struct cached_dev, disk);
int rw = bio_data_dir(bio);
- if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
+ if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
+ dc->io_disable)) {
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -1208,6 +1208,9 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
max(dc->disk.disk->queue->backing_dev_info->ra_pages,
q->backing_dev_info->ra_pages);
+ atomic_set(&dc->io_errors, 0);
+ dc->io_disable = false;
+ dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
/* default to auto */
dc->stop_when_cache_set_failed = BCH_CACHED_DEV_STOP_AUTO;
@@ -1362,6 +1365,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
return flash_dev_run(c, u);
}
+bool bch_cached_dev_error(struct cached_dev *dc)
+{
+ char name[BDEVNAME_SIZE];
+
+ if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
+ return false;
+
+ dc->io_disable = true;
+ /* make others know io_disable is true earlier */
+ smp_mb();
+
+ pr_err("stop %s: too many IO errors on backing device %s\n",
+ dc->disk.disk->disk_name, bdevname(dc->bdev, name));
+
+ bcache_device_stop(&dc->disk);
+ return true;
+}
+
/* Cache set */
__printf(2, 3)
@@ -141,7 +141,9 @@ SHOW(__bch_cached_dev)
var_print(writeback_delay);
var_print(writeback_percent);
sysfs_hprint(writeback_rate, dc->writeback_rate.rate << 9);
-
+ sysfs_hprint(io_errors, atomic_read(&dc->io_errors));
+ sysfs_printf(io_error_limit, "%i", dc->error_limit);
+ sysfs_printf(io_disable, "%i", dc->io_disable);
var_print(writeback_rate_update_seconds);
var_print(writeback_rate_i_term_inverse);
var_print(writeback_rate_p_term_inverse);
@@ -232,6 +234,14 @@ STORE(__cached_dev)
d_strtoul(writeback_rate_i_term_inverse);
d_strtoul_nonzero(writeback_rate_p_term_inverse);
+ sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
+
+ if (attr == &sysfs_io_disable) {
+ int v = strtoul_or_return(buf);
+
+ dc->io_disable = v ? 1 : 0;
+ }
+
d_strtoi_h(sequential_cutoff);
d_strtoi_h(readahead);
@@ -352,6 +362,9 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_writeback_rate_i_term_inverse,
&sysfs_writeback_rate_p_term_inverse,
&sysfs_writeback_rate_debug,
+ &sysfs_errors,
+ &sysfs_io_error_limit,
+ &sysfs_io_disable,
&sysfs_dirty_data,
&sysfs_stripe_size,
&sysfs_partial_stripes_expensive,