diff mbox

[RFC,6/7] NVMe: Use genalloc to allocate CMB regions

Message ID 1470034653-9097-7-git-send-email-haggaie@mellanox.com (mailing list archive)
State RFC
Headers show

Commit Message

Haggai Eran Aug. 1, 2016, 6:57 a.m. UTC
Register the CMB in a gen_pool dedicated to manage CMB regions. Use the
pool to allocate the SQs to make sure they are registered.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
---
 drivers/nvme/host/nvme-pci.h | 24 ++++++++++++++++++++
 drivers/nvme/host/pci.c      | 54 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 74 insertions(+), 4 deletions(-)
 create mode 100644 drivers/nvme/host/nvme-pci.h

Comments

Jon Derrick Aug. 1, 2016, 4:20 p.m. UTC | #1
On Mon, Aug 01, 2016 at 08:53:49AM -0700, Christoph Hellwig wrote:
> On Mon, Aug 01, 2016 at 09:57:32AM +0300, Haggai Eran wrote:
> > Register the CMB in a gen_pool dedicated to manage CMB regions. Use the
> > pool to allocate the SQs to make sure they are registered.
> 
> And why would the NVMe driver care if "they are registered"?
> 
> Once we start allowing diverse CMB uses (what happened to Jon's patches
> btw?) genalloc might be a good backend allocator, but without that
> it's entirely pointless.  Also please don't introduce useless header
> files.
My concern is using CMB as a generic memory space could lead to invalid uses and untested paths in broken firmware (leading to bricked drives...). The spec defines what it's allowed to be used for, but doesn't really leave it open for general purpose usage outside of that. Because of that I think it needs more hand-holding by the kernel to prevent invalid usage.

What I would like to see is a set of knobs controlling the 'chunks' of memory's usages (controlled through sysfs/configfs?) and the kernel takes care of allocation. My set was going to expose a resource file for WDS/RDS but everything else was unhandled except for its current sqes usage. I think the genalloc could fit into this scheme quite well.

Thoughts?
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haggai Eran Aug. 2, 2016, 6:48 a.m. UTC | #2
T24g15EnLCAyMDE2LTA4LTAxIGF0IDA4OjUzIC0wNzAwLCBDaHJpc3RvcGggSGVsbHdpZyB3cm90
ZToNCj4gT24gTW9uLCBBdWcgMDEsIDIwMTYgYXQgMDk6NTc6MzJBTSArMDMwMCwgSGFnZ2FpIEVy
YW4gd3JvdGU6DQo+ID4gDQo+ID4gUmVnaXN0ZXIgdGhlIENNQiBpbiBhIGdlbl9wb29sIGRlZGlj
YXRlZCB0byBtYW5hZ2UgQ01CIHJlZ2lvbnMuIFVzZQ0KPiA+IHRoZQ0KPiA+IHBvb2wgdG8gYWxs
b2NhdGUgdGhlIFNRcyB0byBtYWtlIHN1cmUgdGhleSBhcmUgcmVnaXN0ZXJlZC4NCj4gQW5kIHdo
eSB3b3VsZCB0aGUgTlZNZSBkcml2ZXIgY2FyZSBpZiAidGhleSBhcmUgcmVnaXN0ZXJlZCI/DQpJ
IGp1c3QgbWVhbnQgdGhhdCB0aGV5IGFyZSBhbGxvY2F0ZWQgdGhyb3VnaCBnZW5hbGxvYyBzbyB0
aGF0IGl0DQpkb2Vzbid0IGFsbG9jYXRlIHRoZSBzYW1lIGJ1ZmZlciBhZ2Fpbi4NCg0KPiANCj4g
T25jZSB3ZSBzdGFydCBhbGxvd2luZyBkaXZlcnNlIENNQiB1c2VzICh3aGF0IGhhcHBlbmVkIHRv
IEpvbidzDQo+IHBhdGNoZXMNCj4gYnR3PykgZ2VuYWxsb2MgbWlnaHQgYmUgYSBnb29kIGJhY2tl
bmQgYWxsb2NhdG9yLCBidXQgd2l0aG91dCB0aGF0DQo+IGl0J3MgZW50aXJlbHkgcG9pbnRsZXNz
LsKgwqANCkkgbWlzc2VkIEpvbidzIHBhdGNoZXMuIEknbGwgaGF2ZSBhIGxvb2suIFRoZSBwb2lu
dCBpbiB0aGlzIHBhdGNoIHdhcw0KdG8gYWxsb3cgdGhpcyBkaXZlcnNlIHVzZSBpbiB0aGUgbmV4
dCBwYXRjaC4NCg0KPiBBbHNvIHBsZWFzZSBkb24ndCBpbnRyb2R1Y2UgdXNlbGVzcyBoZWFkZXIN
Cj4gZmlsZXMuDQpGYWlyIGVub3VnaC4NCg0KVGhhbmtzLA0KSGFnZ2Fp
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Haggai Eran Aug. 2, 2016, 7:15 a.m. UTC | #3
On ב', 2016-08-01 at 10:20 -0600, Jon Derrick wrote:
> On Mon, Aug 01, 2016 at 08:53:49AM -0700, Christoph Hellwig wrote:

> > 

> > On Mon, Aug 01, 2016 at 09:57:32AM +0300, Haggai Eran wrote:

> > > 

> > > Register the CMB in a gen_pool dedicated to manage CMB regions.

> > > Use the

> > > pool to allocate the SQs to make sure they are registered.

> > And why would the NVMe driver care if "they are registered"?

> > 

> > Once we start allowing diverse CMB uses (what happened to Jon's

> > patches

> > btw?) genalloc might be a good backend allocator, but without that

> > it's entirely pointless.  Also please don't introduce useless

> > header

> > files.

> My concern is using CMB as a generic memory space could lead to

> invalid uses and untested paths in broken firmware (leading to

> bricked drives...). The spec defines what it's allowed to be used

> for, but doesn't really leave it open for general purpose usage

> outside of that. Because of that I think it needs more hand-holding

> by the kernel to prevent invalid usage.

I understand. Did you happen to think of how the kernel would expose
it?

> What I would like to see is a set of knobs controlling the 'chunks'

> of memory's usages (controlled through sysfs/configfs?) and the

> kernel takes care of allocation. My set was going to expose a

> resource file for WDS/RDS but everything else was unhandled except

> for its current sqes usage. I think the genalloc could fit into this

> scheme quite well.

What do you mean by a resource file? Are you referring to the
sysfs/configfs knob or to a file that exposes the resource to user-
space?

Thanks,
Haggai
diff mbox

Patch

diff --git a/drivers/nvme/host/nvme-pci.h b/drivers/nvme/host/nvme-pci.h
new file mode 100644
index 000000000000..5b29508dc182
--- /dev/null
+++ b/drivers/nvme/host/nvme-pci.h
@@ -0,0 +1,24 @@ 
+/*
+ * Copyright © 2016 Mellanox Technlogies. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef _NVME_PCI_H
+#define _NVME_PCI_H
+
+#include "nvme.h"
+
+struct nvme_dev;
+
+void *nvme_alloc_cmb(struct nvme_dev *dev, size_t size, dma_addr_t *dma_addr);
+void nvme_free_cmb(struct nvme_dev *dev, void *addr, size_t size);
+
+#endif
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index befac5b19490..d3da5d9552dd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -42,8 +42,10 @@ 
 #include <linux/types.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <asm/unaligned.h>
+#include <linux/genalloc.h>
 
 #include "nvme.h"
+#include "nvme-pci.h"
 
 #define NVME_Q_DEPTH		1024
 #define NVME_AQ_DEPTH		256
@@ -99,6 +101,7 @@  struct nvme_dev {
 	dma_addr_t cmb_dma_addr;
 	u64 cmb_size;
 	u32 cmbsz;
+	struct gen_pool *cmb_pool;
 	struct nvme_ctrl ctrl;
 	struct completion ioq_wait;
 };
@@ -937,11 +940,17 @@  static void nvme_cancel_io(struct request *req, void *data, bool reserved)
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
 {
+	struct nvme_dev *dev = nvmeq->dev;
+
 	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
 				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
 	if (nvmeq->sq_cmds)
 		dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
 					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
+	if (nvmeq->sq_cmds_io)
+		nvme_free_cmb(dev, nvmeq->sq_cmds_io,
+			      roundup(SQ_SIZE(nvmeq->q_depth),
+				      dev->ctrl.page_size));
 	kfree(nvmeq);
 }
 
@@ -1032,10 +1041,12 @@  static int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 				int qid, int depth)
 {
 	if (qid && dev->cmb && use_cmb_sqes && NVME_CMB_SQS(dev->cmbsz)) {
-		unsigned offset = (qid - 1) * roundup(SQ_SIZE(depth),
-						      dev->ctrl.page_size);
-		nvmeq->sq_dma_addr = dev->cmb_dma_addr + offset;
-		nvmeq->sq_cmds_io = dev->cmb + offset;
+		nvmeq->sq_cmds_io =
+			nvme_alloc_cmb(dev, roundup(SQ_SIZE(depth),
+						    dev->ctrl.page_size),
+				       &nvmeq->sq_dma_addr);
+		if (!nvmeq->sq_cmds_io)
+			return -ENOMEM;
 	} else {
 		nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(depth),
 					&nvmeq->sq_dma_addr, GFP_KERNEL);
@@ -1339,6 +1350,7 @@  static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	void __iomem *cmb;
 	dma_addr_t dma_addr;
+	int ret;
 
 	if (!use_cmb_sqes)
 		return NULL;
@@ -1372,17 +1384,51 @@  static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
 
 	dev->cmb_dma_addr = dma_addr;
 	dev->cmb_size = size;
+
+	dev->cmb_pool = gen_pool_create(PAGE_SHIFT, -1);
+	if (!dev->cmb_pool)
+		goto unmap;
+
+	ret = gen_pool_add_virt(dev->cmb_pool, (unsigned long)(uintptr_t)cmb,
+				dma_addr, size, -1);
+	if (ret)
+		goto destroy_pool;
+
 	return cmb;
+
+destroy_pool:
+	gen_pool_destroy(dev->cmb_pool);
+	dev->cmb_pool = NULL;
+unmap:
+	iounmap(cmb);
+	return NULL;
 }
 
 static inline void nvme_release_cmb(struct nvme_dev *dev)
 {
 	if (dev->cmb) {
+		gen_pool_destroy(dev->cmb_pool);
 		iounmap(dev->cmb);
 		dev->cmb = NULL;
 	}
 }
 
+void *nvme_alloc_cmb(struct nvme_dev *dev, size_t size, dma_addr_t *dma_addr)
+{
+	if (!dev->cmb_pool)
+		return NULL;
+
+	return gen_pool_dma_alloc(dev->cmb_pool, size, dma_addr);
+}
+
+void nvme_free_cmb(struct nvme_dev *dev, void *addr, size_t size)
+{
+	if (WARN_ON(!dev->cmb_pool))
+		return;
+
+	gen_pool_free(dev->cmb_pool, (unsigned long)(uintptr_t)addr, size);
+}
+
 static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 {
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);