From patchwork Fri Feb 11 22:27:51 2011
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Andrei Warkentin <andreiw@motorola.com>
X-Patchwork-Id: 551001
Received: from vger.kernel.org (vger.kernel.org [209.132.180.67])
	by demeter1.kernel.org (8.14.4/8.14.3) with ESMTP id p1BMQSen022348
	for <patchwork-linux-mmc@patchwork.kernel.org>;
	Fri, 11 Feb 2011 22:27:54 GMT
Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand
	id S1758106Ab1BKW1y (ORCPT
	<rfc822;patchwork-linux-mmc@patchwork.kernel.org>);
	Fri, 11 Feb 2011 17:27:54 -0500
Received: from exprod5og105.obsmtp.com ([64.18.0.180]:34897 "EHLO
	exprod5og105.obsmtp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1757958Ab1BKW1x (ORCPT
	<rfc822; linux-mmc@vger.kernel.org>); Fri, 11 Feb 2011 17:27:53 -0500
Received: from source ([192.54.82.14]) (using TLSv1) by
	exprod5ob105.postini.com ([64.18.4.12]) with SMTP
	ID DSNKTVW36P+Wu7IcwoHQHFS6RTuYPVbd3hbe@postini.com;
	Fri, 11 Feb 2011 14:27:53 PST
Received: from DE01MGRG01.AM.MOT-MOBILITY.COM ([10.176.129.42])
	by DE01MGRG01.AM.MOT-MOBILITY.COM (8.14.3/8.14.3) with ESMTP id
	p1BMS2Ar012077
	for <linux-mmc@vger.kernel.org>; Fri, 11 Feb 2011 17:28:02 -0500 (EST)
Received: from mail-vw0-f42.google.com (mail-vw0-f42.google.com
	[209.85.212.42])
	by DE01MGRG01.AM.MOT-MOBILITY.COM (8.14.3/8.14.3) with ESMTP id
	p1BMPhwD011425
	(version=TLSv1/SSLv3 cipher=RC4-SHA bits=128 verify=OK)
	for <linux-mmc@vger.kernel.org>; Fri, 11 Feb 2011 17:28:02 -0500 (EST)
Received: by mail-vw0-f42.google.com with SMTP id 11so2205855vws.15
	for <linux-mmc@vger.kernel.org>; Fri, 11 Feb 2011 14:27:51 -0800 (PST)
MIME-Version: 1.0
Received: by 10.220.176.13 with SMTP id bc13mr1306267vcb.82.1297463271169;
	Fri, 11 Feb 2011 14:27:51 -0800 (PST)
Received: by 10.220.117.79 with HTTP; Fri, 11 Feb 2011 14:27:51 -0800 (PST)
In-Reply-To: <AANLkTimjQfuNU-_rZTjAPrjxJhb=3kReVysBKoPE6Q4o@mail.gmail.com>
References: <AANLkTikh4vfS7SLKAa-aUXhbTxcHzYHmBuaXj1qHHYN9@mail.gmail.com>
	<AANLkTimjQfuNU-_rZTjAPrjxJhb=3kReVysBKoPE6Q4o@mail.gmail.com>
Date: Fri, 11 Feb 2011 16:27:51 -0600
Message-ID: <AANLkTi=a1SbU4cn-m7r2BH2KhNoad3H9e41AcXoViMm7@mail.gmail.com>
Subject: Re: MMC quirks relating to performance/lifetime.
From: Andrei Warkentin <andreiw@motorola.com>
To: Linus Walleij <linus.walleij@linaro.org>
Cc: linux-mmc@vger.kernel.org, linux-arm-kernel@lists.infradead.org
X-CFilter-Loop: Reflected
Sender: linux-mmc-owner@vger.kernel.org
Precedence: bulk
List-ID: <linux-mmc.vger.kernel.org>
X-Mailing-List: linux-mmc@vger.kernel.org
X-Greylist: IP, sender and recipient auto-whitelisted, not delayed by
	milter-greylist-4.2.6 (demeter1.kernel.org [140.211.167.41]);
	Fri, 11 Feb 2011 22:28:09 +0000 (UTC)


diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 7054fd5..3b32329 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -60,6 +60,7 @@ struct mmc_blk_data {
 	spinlock_t	lock;
 	struct gendisk	*disk;
 	struct mmc_queue queue;
+	char            *bounce;
 
 	unsigned int	usage;
 	unsigned int	read_only;
@@ -93,6 +94,9 @@ static void mmc_blk_put(struct mmc_blk_data *md)
 
 		__clear_bit(devidx, dev_use);
 
+		if (md->bounce)
+			kfree(md->bounce);
+
 		put_disk(md->disk);
 		kfree(md);
 	}
@@ -312,6 +316,157 @@ out:
 	return err ? 0 : 1;
 }
 
+/*
+ * Workaround for Toshiba eMMC performance.  If the request is less than two
+ * flash pages in size, then we want to split the write into one or two
+ * page-aligned writes to take advantage of faster buffering.  Here we can
+ * adjust the size of the MMC request and let the block layer request handler
+ * deal with generating another MMC request.
+ */
+#define TOSHIBA_MANFID 0x11
+#define TOSHIBA_PAGE_SIZE 16		/* sectors */
+#define TOSHIBA_ADJUST_THRESHOLD 24	/* sectors */
+static bool mmc_adjust_toshiba_write(struct mmc_card *card,
+                                     struct mmc_request *mrq)
+{
+	if (mmc_card_mmc(card) && card->cid.manfid == TOSHIBA_MANFID &&
+	    mrq->data->blocks <= TOSHIBA_ADJUST_THRESHOLD) {
+		int sectors_in_page = TOSHIBA_PAGE_SIZE -
+		                      (mrq->cmd->arg % TOSHIBA_PAGE_SIZE);
+		if (mrq->data->blocks > sectors_in_page) {
+			mrq->data->blocks = sectors_in_page;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+/*
+ * This is another strange workaround to try to close the gap on Toshiba eMMC
+ * performance when compared to other vendors.  In order to take advantage
+ * of certain optimizations and assumptions in those cards, we will look for
+ * multiblock write transfers below a certain size and we do the following:
+ *
+ * - Break them up into seperate page-aligned (8k flash pages) transfers.
+ * - Execute the transfers in reverse order.
+ * - Use "reliable write" transfer mode.
+ *
+ * Neither the block I/O layer nor the scatterlist design seem to lend them-
+ * selves well to executing a block request out of order.  So instead we let
+ * mmc_blk_issue_rq() setup the MMC request for the entire transfer and then
+ * break it up and reorder it here.  This also requires that we put the data
+ * into a bounce buffer and send it as individual sg's.
+ */
+#define TOSHIBA_LOW_THRESHOLD 48	/* sectors */
+#define TOSHIBA_HIGH_THRESHOLD 64	/* sectors */
+static bool mmc_handle_toshiba_write(struct mmc_queue *mq,
+                                     struct mmc_card *card,
+                                     struct mmc_request *mrq)
+{
+	struct mmc_blk_data *md = mq->data;
+	unsigned int first_page, last_page, page;
+	unsigned long flags;
+
+	if (!md->bounce ||
+	    mrq->data->blocks > TOSHIBA_HIGH_THRESHOLD ||
+	    mrq->data->blocks < TOSHIBA_LOW_THRESHOLD)
+		return false;
+
+	first_page = mrq->cmd->arg / TOSHIBA_PAGE_SIZE;
+	last_page = (mrq->cmd->arg + mrq->data->blocks - 1) / TOSHIBA_PAGE_SIZE;
+
+	/* Single page write: just do it the normal way */
+	if (first_page == last_page)
+		return false;
+
+	local_irq_save(flags);
+	sg_copy_to_buffer(mrq->data->sg, mrq->data->sg_len,
+	                  md->bounce, mrq->data->blocks * 512);
+	local_irq_restore(flags);
+
+	for (page = last_page; page >= first_page; page--) {
+		unsigned long offset, length;
+		struct mmc_blk_request brq;
+		struct mmc_command cmd;
+		struct scatterlist sg;
+
+		memset(&brq, 0, sizeof(struct mmc_blk_request));
+		brq.mrq.cmd = &brq.cmd;
+		brq.mrq.data = &brq.data;
+
+		brq.cmd.arg = page * TOSHIBA_PAGE_SIZE;
+		brq.data.blksz = 512;
+		if (page == first_page) {
+			brq.cmd.arg = mrq->cmd->arg;
+			brq.data.blocks = TOSHIBA_PAGE_SIZE -
+			                  (mrq->cmd->arg % TOSHIBA_PAGE_SIZE);
+		} else if (page == last_page)
+			brq.data.blocks = (mrq->cmd->arg + mrq->data->blocks) %
+			                  TOSHIBA_PAGE_SIZE;
+		if (brq.data.blocks == 0)
+			brq.data.blocks = TOSHIBA_PAGE_SIZE;
+
+		if (!mmc_card_blockaddr(card))
+			brq.cmd.arg <<= 9;
+		brq.cmd.flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_ADTC;
+		brq.stop.opcode = MMC_STOP_TRANSMISSION;
+		brq.stop.arg = 0;
+		brq.stop.flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+
+		brq.data.flags |= MMC_DATA_WRITE;
+		if (brq.data.blocks > 1) {
+			if (!mmc_host_is_spi(card->host))
+				brq.mrq.stop = &brq.stop;
+			brq.cmd.opcode = MMC_WRITE_MULTIPLE_BLOCK;
+		} else {
+			brq.mrq.stop = NULL;
+			brq.cmd.opcode = MMC_WRITE_BLOCK;
+		}
+
+		if (brq.cmd.opcode == MMC_WRITE_MULTIPLE_BLOCK &&
+		    brq.data.blocks <= card->ext_csd.rel_wr_sec_c) {
+			int err;
+
+			cmd.opcode = MMC_SET_BLOCK_COUNT;
+			cmd.arg = brq.data.blocks | (1 << 31);
+			cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
+			err = mmc_wait_for_cmd(card->host, &cmd, 0);
+			if (!err)
+				brq.mrq.stop = NULL;
+		}
+
+		mmc_set_data_timeout(&brq.data, card);
+
+		offset = (brq.cmd.arg - mrq->cmd->arg) * 512;
+		length = brq.data.blocks * 512;
+		sg_init_one(&sg, md->bounce + offset, length);
+		brq.data.sg = &sg;
+		brq.data.sg_len = 1;
+
+		mmc_wait_for_req(card->host, &brq.mrq);
+
+		mrq->data->bytes_xfered += brq.data.bytes_xfered;
+
+		if (brq.cmd.error || brq.data.error || brq.stop.error) {
+			mrq->cmd->error = brq.cmd.error;
+			mrq->data->error = brq.data.error;
+			mrq->stop->error = brq.stop.error;
+
+			/*
+			 * We're executing the request backwards, so don't let
+			 * the block layer think some part of it has succeeded.
+			 * It will get it wrong.  Since the failure will cause
+			 * us to fall back on single block writes, we're better
+			 * off reporting that none of the data was written.
+			 */
+			mrq->data->bytes_xfered = 0;
+			break;
+		}
+	}
+
+	return true;
+}
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -378,6 +533,9 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			brq.data.flags |= MMC_DATA_WRITE;
 		}
 
+		if (rq_data_dir(req) == WRITE)
+			mmc_adjust_toshiba_write(card, &brq.mrq);
+
 		mmc_set_data_timeout(&brq.data, card);
 
 		brq.data.sg = mq->sg;
@@ -402,9 +560,14 @@ static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *req)
 			brq.data.sg_len = i;
 		}
 
-		mmc_queue_bounce_pre(mq);
-
-		mmc_wait_for_req(card->host, &brq.mrq);
+               mmc_queue_bounce_pre(mq);
+ 
+               /*
+                * Try the workaround first for writes, then fall back.
+                */
+               if (rq_data_dir(req) != WRITE || disable_multi ||
+                   !mmc_handle_toshiba_write(mq, card, &brq.mrq))
+                       mmc_wait_for_req(card->host, &brq.mrq);
 
 		mmc_queue_bounce_post(mq);
 
@@ -589,6 +752,15 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
 		goto out;
 	}
 
+	if (card->cid.manfid == TOSHIBA_MANFID && mmc_card_mmc(card)) {
+		pr_info("%s: enable Toshiba workaround\n",
+			mmc_hostname(card->host));
+		md->bounce = kmalloc(TOSHIBA_HIGH_THRESHOLD * 512, GFP_KERNEL);
+		if (!md->bounce) {
+			ret = -ENOMEM;
+			goto err_kfree;
+		}
+	}
 
 	/*
 	 * Set the read-only status based on the supported commands
@@ -655,6 +827,8 @@ static struct mmc_blk_data *mmc_blk_alloc(struct mmc_card *card)
  err_putdisk:
 	put_disk(md->disk);
  err_kfree:
+	if (md->bounce)
+		kfree(md->bounce);
 	kfree(md);
  out:
 	return ERR_PTR(ret);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 45055c4..17eef89 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -307,6 +307,9 @@ static int mmc_read_ext_csd(struct mmc_card *card)
 	else
 		card->erased_byte = 0x0;
 
+	if (card->ext_csd.rev >= 5)
+		card->ext_csd.rel_wr_sec_c = ext_csd[EXT_CSD_REL_WR_SEC_C];
+
 out:
 	kfree(ext_csd);
 
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 6b75250..fea7ecb 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -43,6 +43,7 @@ struct mmc_csd {
 
 struct mmc_ext_csd {
 	u8			rev;
+        u8                      rel_wr_sec_c;
 	u8			erase_group_def;
 	u8			sec_feature_support;
 	unsigned int		sa_timeout;		/* Units: 100ns */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index a5d765c..1e87020 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -260,6 +260,7 @@ struct _mmc_csd {
 #define EXT_CSD_CARD_TYPE		196	/* RO */
 #define EXT_CSD_SEC_CNT			212	/* RO, 4 bytes */
 #define EXT_CSD_S_A_TIMEOUT		217	/* RO */
+#define EXT_CSD_REL_WR_SEC_C            222
 #define EXT_CSD_ERASE_TIMEOUT_MULT	223	/* RO */
 #define EXT_CSD_HC_ERASE_GRP_SIZE	224	/* RO */
 #define EXT_CSD_BOOT_SIZE_MULTI		226