From patchwork Wed May 19 11:46:38 2010 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Nikanth Karthikesan X-Patchwork-Id: 100862 Received: from mx02.colomx.prod.int.phx2.redhat.com (mx4-phx2.redhat.com [209.132.183.25]) by demeter.kernel.org (8.14.3/8.14.3) with ESMTP id o4JBlu7X006642 for ; Wed, 19 May 2010 11:48:32 GMT Received: from lists01.pubmisc.prod.ext.phx2.redhat.com (lists01.pubmisc.prod.ext.phx2.redhat.com [10.5.19.33]) by mx02.colomx.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o4JBjIPQ028067; Wed, 19 May 2010 07:45:21 -0400 Received: from int-mx05.intmail.prod.int.phx2.redhat.com (int-mx05.intmail.prod.int.phx2.redhat.com [10.5.11.18]) by lists01.pubmisc.prod.ext.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o4JBjGKl005390 for ; Wed, 19 May 2010 07:45:16 -0400 Received: from mx1.redhat.com (ext-mx04.extmail.prod.ext.phx2.redhat.com [10.5.110.8]) by int-mx05.intmail.prod.int.phx2.redhat.com (8.13.8/8.13.8) with ESMTP id o4JBjAlJ026064; Wed, 19 May 2010 07:45:10 -0400 Received: from mx2.suse.de (cantor2.suse.de [195.135.220.15]) by mx1.redhat.com (8.13.8/8.13.8) with ESMTP id o4JBiv3v032454; Wed, 19 May 2010 07:44:58 -0400 Received: from relay1.suse.de (charybdis-ext.suse.de [195.135.221.2]) by mx2.suse.de (Postfix) with ESMTP id 60D2688B6C; Wed, 19 May 2010 13:44:56 +0200 (CEST) From: Nikanth Karthikesan Organization: suse.de To: "device-mapper development" Date: Wed, 19 May 2010 17:16:38 +0530 User-Agent: KMail/1.12.4 (Linux/2.6.31.12-0.2-desktop; KDE/4.3.5; x86_64; ; ) MIME-Version: 1.0 Message-Id: <201005191716.38617.knikanth@suse.de> X-RedHat-Spam-Score: -2.31 (RCVD_IN_DNSWL_MED,T_RP_MATCHES_RCVD) X-Scanned-By: MIMEDefang 2.67 on 10.5.11.18 X-Scanned-By: MIMEDefang 2.67 on 10.5.110.8 X-loop: dm-devel@redhat.com Cc: Kiyoshi Ueda , Mike Snitzer , Alasdair G Kergon , Jens Axboe , "Jun'ichi Nomura" , Vivek Goyal Subject: [dm-devel] Questions regarding request based dm-multipath and blk-layer/elevator X-BeenThere: dm-devel@redhat.com X-Mailman-Version: 2.1.12 Precedence: junk Reply-To: device-mapper development List-Id: device-mapper development List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: dm-devel-bounces@redhat.com Errors-To: dm-devel-bounces@redhat.com X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by milter-greylist-4.2.3 (demeter.kernel.org [140.211.167.41]); Wed, 19 May 2010 11:48:32 +0000 (UTC) diff --git a/block/blk-core.c b/block/blk-core.c index 9fe174d..fc33c2d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -91,6 +91,44 @@ void blk_queue_congestion_threshold(struct request_queue *q) q->nr_congestion_off = nr; } +unsigned long +__blk_queue_set_nr_requests(struct request_queue *q, unsigned long nr) +{ + struct request_list *rl = &q->rq; + + if (nr < BLKDEV_MIN_RQ) + nr = BLKDEV_MIN_RQ; + + q->nr_requests = nr; + blk_queue_congestion_threshold(q); + + if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_SYNC); + else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_SYNC); + + if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_ASYNC); + else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_ASYNC); + + if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { + blk_set_queue_full(q, BLK_RW_SYNC); + } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { + blk_clear_queue_full(q, BLK_RW_SYNC); + wake_up(&rl->wait[BLK_RW_SYNC]); + } + + if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { + blk_set_queue_full(q, BLK_RW_ASYNC); + } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { + blk_clear_queue_full(q, BLK_RW_ASYNC); + wake_up(&rl->wait[BLK_RW_ASYNC]); + } + return nr; +} +EXPORT_SYMBOL(__blk_queue_set_nr_requests); + /** * blk_get_backing_dev_info - get the address of a queue's backing_dev_info * @bdev: device diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 306759b..615198c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -39,7 +39,6 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) static ssize_t queue_requests_store(struct request_queue *q, const char *page, size_t count) { - struct request_list *rl = &q->rq; unsigned long nr; int ret; @@ -47,37 +46,11 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) return -EINVAL; ret = queue_var_store(&nr, page, count); - if (nr < BLKDEV_MIN_RQ) - nr = BLKDEV_MIN_RQ; spin_lock_irq(q->queue_lock); - q->nr_requests = nr; - blk_queue_congestion_threshold(q); - - if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, BLK_RW_SYNC); - else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, BLK_RW_SYNC); - - if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, BLK_RW_ASYNC); - else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, BLK_RW_ASYNC); - - if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { - blk_set_queue_full(q, BLK_RW_SYNC); - } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { - blk_clear_queue_full(q, BLK_RW_SYNC); - wake_up(&rl->wait[BLK_RW_SYNC]); - } - - if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { - blk_set_queue_full(q, BLK_RW_ASYNC); - } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { - blk_clear_queue_full(q, BLK_RW_ASYNC); - wake_up(&rl->wait[BLK_RW_ASYNC]); - } + __blk_queue_set_nr_requests(q, nr); spin_unlock_irq(q->queue_lock); + return ret; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 9924ea2..c6deff9 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -526,6 +526,18 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, } EXPORT_SYMBOL_GPL(dm_set_device_limits); +int dm_set_device_nr_requests(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + unsigned long *nr_requests = data; + struct block_device *bdev = dev->bdev; + struct request_queue *q = bdev_get_queue(bdev); + + *nr_requests += q->nr_requests; + return 0; +} +EXPORT_SYMBOL_GPL(dm_set_device_nr_requests); + int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, struct dm_dev **result) { @@ -983,12 +995,13 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) * Establish the new table's queue_limits and validate them. */ int dm_calculate_queue_limits(struct dm_table *table, - struct queue_limits *limits) + struct queue_limits *limits, unsigned long *nr_requests) { struct dm_target *uninitialized_var(ti); struct queue_limits ti_limits; unsigned i = 0; + *nr_requests = 0; blk_set_default_limits(limits); while (i < dm_table_get_num_targets(table)) { @@ -1005,6 +1018,13 @@ int dm_calculate_queue_limits(struct dm_table *table, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); + /* + * Combine queue nr_requests limit of all the devices this + * target uses. + */ + ti->type->iterate_devices(ti, dm_set_device_nr_requests, + nr_requests); + /* Set I/O hints portion of queue limits */ if (ti->type->io_hints) ti->type->io_hints(ti, &ti_limits); @@ -1074,7 +1094,7 @@ no_integrity: } void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, - struct queue_limits *limits) + struct queue_limits *limits, unsigned long nr_requests) { /* * Copy table's limits to the DM device's request_queue @@ -1098,8 +1118,10 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * Those bios are passed to request-based dm at the resume time. */ smp_mb(); - if (dm_table_request_based(t)) + if (dm_table_request_based(t)) { queue_flag_set_unlocked(QUEUE_FLAG_STACKABLE, q); + __blk_queue_set_nr_requests(q, nr_requests); + } } unsigned int dm_table_get_num_targets(struct dm_table *t) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d21e128..2d45689 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2052,7 +2052,7 @@ static void __set_size(struct mapped_device *md, sector_t size) * Returns old map, which caller must destroy. */ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, - struct queue_limits *limits) + struct queue_limits *limits, unsigned long nr_requests) { struct dm_table *old_map; struct request_queue *q = md->queue; @@ -2083,11 +2083,13 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, __bind_mempools(md, t); - write_lock_irqsave(&md->map_lock, flags); + spin_lock_irqsave(q->queue_lock, flags); + write_lock(&md->map_lock); old_map = md->map; md->map = t; - dm_table_set_restrictions(t, q, limits); - write_unlock_irqrestore(&md->map_lock, flags); + dm_table_set_restrictions(t, q, limits, nr_requests); + write_unlock(&md->map_lock); + spin_unlock_irqrestore(q->queue_lock, flags); return old_map; } @@ -2390,6 +2392,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) struct dm_table *map = ERR_PTR(-EINVAL); struct queue_limits limits; int r; + unsigned long nr_requests; mutex_lock(&md->suspend_lock); @@ -2397,7 +2400,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) if (!dm_suspended_md(md)) goto out; - r = dm_calculate_queue_limits(table, &limits); + r = dm_calculate_queue_limits(table, &limits, &nr_requests); if (r) { map = ERR_PTR(r); goto out; @@ -2410,7 +2413,7 @@ struct dm_table *dm_swap_table(struct mapped_device *md, struct dm_table *table) goto out; } - map = __bind(md, table, &limits); + map = __bind(md, table, &limits, nr_requests); out: mutex_unlock(&md->suspend_lock); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index bad1724..9e25c48 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -50,9 +50,9 @@ void dm_table_event_callback(struct dm_table *t, struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index); struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector); int dm_calculate_queue_limits(struct dm_table *table, - struct queue_limits *limits); + struct queue_limits *limits, unsigned long *nr_requests); void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, - struct queue_limits *limits); + struct queue_limits *limits, unsigned long nr_requests); struct list_head *dm_table_get_devices(struct dm_table *t); void dm_table_presuspend_targets(struct dm_table *t); void dm_table_postsuspend_targets(struct dm_table *t); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6690e8b..366bba5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -934,6 +934,9 @@ extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); +extern unsigned long __blk_queue_set_nr_requests(struct request_queue *q, + unsigned long nr); + extern void blk_set_default_limits(struct queue_limits *lim); extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, sector_t offset); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 1381cd9..b0f9443 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -108,6 +108,11 @@ void dm_error(const char *message); */ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data); +/* + * Combine device nr_requests. + */ +int dm_set_device_nr_requests(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data); struct dm_dev { struct block_device *bdev;