diff mbox series

[v13,04/12] bcache: bch_nvmpg_alloc_pages() of the buddy

Message ID 20211212170552.2812-5-colyli@suse.de (mailing list archive)
State New, archived
Headers show
Series bcache for 5.17: enable NVDIMM for bcache journal | expand

Commit Message

Coly Li Dec. 12, 2021, 5:05 p.m. UTC
From: Jianpeng Ma <jianpeng.ma@intel.com>

This patch implements the bch_nvmpg_alloc_pages() of the nvm pages buddy
allocator. In terms of function, this func is like current
page-buddy-alloc. But the differences are:
a: it need owner_uuid as parameter which record owner info. And it
make those info persistence.
b: it don't need flags like GFP_*. All allocs are the equal.
c: it don't trigger other ops etc swap/recycle.

Signed-off-by: Jianpeng Ma <jianpeng.ma@intel.com>
Co-developed-by: Qiaowei Ren <qiaowei.ren@intel.com>
Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/nvmpg.c | 221 ++++++++++++++++++++++++++++++++++++++
 drivers/md/bcache/nvmpg.h |   9 ++
 2 files changed, 230 insertions(+)

Comments

Jens Axboe Dec. 12, 2021, 8:14 p.m. UTC | #1
On 12/12/21 10:05 AM, Coly Li wrote:
> +/* If not found, it will create if create == true */
> +static struct bch_nvmpg_head *find_nvmpg_head(const char *uuid, bool create)
> +{
> +	struct bch_nvmpg_set_header *set_header = global_nvmpg_set->set_header;
> +	struct bch_nvmpg_head *head = NULL;
> +	int i;
> +
> +	if (set_header == NULL)
> +		goto out;
> +
> +	for (i = 0; i < set_header->size; i++) {
> +		struct bch_nvmpg_head *h = &set_header->heads[i];
> +
> +		if (h->state != BCH_NVMPG_HD_STAT_ALLOC)
> +			continue;
> +
> +		if (!memcmp(uuid, h->uuid, 16)) {
> +			head = h;
> +			break;
> +		}
> +	}
> +
> +	if (!head && create) {
> +		u32 used = set_header->used;
> +
> +		if (set_header->size > used) {
> +			head = &set_header->heads[used];
> +			memset(head, 0, sizeof(struct bch_nvmpg_head));
> +			head->state = BCH_NVMPG_HD_STAT_ALLOC;
> +			memcpy(head->uuid, uuid, 16);
> +			global_nvmpg_set->heads_used++;
> +			set_header->used++;
> +		} else
> +			pr_info("No free bch_nvmpg_head\n");
> +	}

Use {} consistently. Again probably just some printk that should go
away.

> +static struct bch_nvmpg_recs *find_nvmpg_recs(struct bch_nvmpg_ns *ns,
> +					      struct bch_nvmpg_head *head,
> +					      bool create)
> +{
> +	int ns_id = ns->sb->this_ns;
> +	struct bch_nvmpg_recs *prev_recs = NULL, *recs = NULL;
> +
> +	recs = bch_nvmpg_offset_to_ptr(head->recs_offset[ns_id]);
> +
> +	/* If create=false, we return recs[nr] */
> +	if (!create)
> +		return recs;

Would this be cleaner to handle in the caller?

> +static void add_nvmpg_rec(struct bch_nvmpg_ns *ns,
> +			  struct bch_nvmpg_recs *recs,
> +			  unsigned long nvmpg_offset,
> +			  int order)
> +{
> +	int i, ns_id;
> +	unsigned long pgoff;
> +
> +	pgoff = bch_nvmpg_offset_to_pgoff(nvmpg_offset);
> +	ns_id = ns->sb->this_ns;
> +
> +	for (i = 0; i < recs->size; i++) {
> +		if (recs->recs[i].pgoff == 0) {
> +			recs->recs[i].pgoff = pgoff;
> +			recs->recs[i].order = order;
> +			recs->recs[i].ns_id = ns_id;
> +			recs->used++;
> +			break;
> +		}
> +	}
> +	BUG_ON(i == recs->size);

No BUG_ON's, please. It only truly belongs in core code for cases where
error handling isn't possible, does not apply here.

> diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
> index 55778d4db7da..d03f3241b45a 100644
> --- a/drivers/md/bcache/nvmpg.h
> +++ b/drivers/md/bcache/nvmpg.h
> @@ -76,6 +76,9 @@ struct bch_nvmpg_set {
>  /* Indicate which field in bch_nvmpg_sb to be updated */
>  #define BCH_NVMPG_TOTAL_NS	0	/* total_ns */
>  
> +#define BCH_PGOFF_TO_KVADDR(pgoff)					\
> +	((void *)((unsigned long)(pgoff) << PAGE_SHIFT))

Pretty sure we have a general kernel helper for this, better to use that
rather than duplicate it.
Coly Li Dec. 28, 2021, 5:29 a.m. UTC | #2
On 12/13/21 4:14 AM, Jens Axboe wrote:
> On 12/12/21 10:05 AM, Coly Li wrote:
>> +/* If not found, it will create if create == true */
>> +static struct bch_nvmpg_head *find_nvmpg_head(const char *uuid, bool create)
>> +{
>> +	struct bch_nvmpg_set_header *set_header = global_nvmpg_set->set_header;
>> +	struct bch_nvmpg_head *head = NULL;
>> +	int i;
>> +
>> +	if (set_header == NULL)
>> +		goto out;
>> +
>> +	for (i = 0; i < set_header->size; i++) {
>> +		struct bch_nvmpg_head *h = &set_header->heads[i];
>> +
>> +		if (h->state != BCH_NVMPG_HD_STAT_ALLOC)
>> +			continue;
>> +
>> +		if (!memcmp(uuid, h->uuid, 16)) {
>> +			head = h;
>> +			break;
>> +		}
>> +	}
>> +
>> +	if (!head && create) {
>> +		u32 used = set_header->used;
>> +
>> +		if (set_header->size > used) {
>> +			head = &set_header->heads[used];
>> +			memset(head, 0, sizeof(struct bch_nvmpg_head));
>> +			head->state = BCH_NVMPG_HD_STAT_ALLOC;
>> +			memcpy(head->uuid, uuid, 16);
>> +			global_nvmpg_set->heads_used++;
>> +			set_header->used++;
>> +		} else
>> +			pr_info("No free bch_nvmpg_head\n");
>> +	}
> Use {} consistently. Again probably just some printk that should go
> away.

Copied.

>> +static struct bch_nvmpg_recs *find_nvmpg_recs(struct bch_nvmpg_ns *ns,
>> +					      struct bch_nvmpg_head *head,
>> +					      bool create)
>> +{
>> +	int ns_id = ns->sb->this_ns;
>> +	struct bch_nvmpg_recs *prev_recs = NULL, *recs = NULL;
>> +
>> +	recs = bch_nvmpg_offset_to_ptr(head->recs_offset[ns_id]);
>> +
>> +	/* If create=false, we return recs[nr] */
>> +	if (!create)
>> +		return recs;
> Would this be cleaner to handle in the caller?

Cure, I will suggest Jianpeng and Qiaowei to change this.

>> +static void add_nvmpg_rec(struct bch_nvmpg_ns *ns,
>> +			  struct bch_nvmpg_recs *recs,
>> +			  unsigned long nvmpg_offset,
>> +			  int order)
>> +{
>> +	int i, ns_id;
>> +	unsigned long pgoff;
>> +
>> +	pgoff = bch_nvmpg_offset_to_pgoff(nvmpg_offset);
>> +	ns_id = ns->sb->this_ns;
>> +
>> +	for (i = 0; i < recs->size; i++) {
>> +		if (recs->recs[i].pgoff == 0) {
>> +			recs->recs[i].pgoff = pgoff;
>> +			recs->recs[i].order = order;
>> +			recs->recs[i].ns_id = ns_id;
>> +			recs->used++;
>> +			break;
>> +		}
>> +	}
>> +	BUG_ON(i == recs->size);
> No BUG_ON's, please. It only truly belongs in core code for cases where
> error handling isn't possible, does not apply here.

It is because currently only 1 single record allocated for bcache 
journal, and if i == recs->size happens it means the on-NVDIMM struct 
bch_nvmpg_recs is corrupted.

Currently we are working on storing Btree nodes on NVDIMM, such BUG_ON() 
is dropped.


>> diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
>> index 55778d4db7da..d03f3241b45a 100644
>> --- a/drivers/md/bcache/nvmpg.h
>> +++ b/drivers/md/bcache/nvmpg.h
>> @@ -76,6 +76,9 @@ struct bch_nvmpg_set {
>>   /* Indicate which field in bch_nvmpg_sb to be updated */
>>   #define BCH_NVMPG_TOTAL_NS	0	/* total_ns */
>>   
>> +#define BCH_PGOFF_TO_KVADDR(pgoff)					\
>> +	((void *)((unsigned long)(pgoff) << PAGE_SHIFT))
> Pretty sure we have a general kernel helper for this, better to use that
> rather than duplicate it.
>
>
Copied. Thank for pointing out this.

Coly Li
diff mbox series

Patch

diff --git a/drivers/md/bcache/nvmpg.c b/drivers/md/bcache/nvmpg.c
index 2b70ee4a6028..a920779eb548 100644
--- a/drivers/md/bcache/nvmpg.c
+++ b/drivers/md/bcache/nvmpg.c
@@ -42,6 +42,11 @@  void *bch_nvmpg_offset_to_ptr(unsigned long offset)
 	return NULL;
 }
 
+static unsigned long bch_nvmpg_offset_to_pgoff(unsigned long nvmpg_offset)
+{
+	return BCH_NVMPG_GET_OFFSET(nvmpg_offset) >> PAGE_SHIFT;
+}
+
 unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr)
 {
 	int ns_id = ns->ns_id;
@@ -60,6 +65,15 @@  static void *bch_nvmpg_pgoff_to_ptr(struct bch_nvmpg_ns *ns, pgoff_t pgoff)
 	return ns->base_addr + (pgoff << PAGE_SHIFT);
 }
 
+static unsigned long bch_nvmpg_pgoff_to_offset(struct bch_nvmpg_ns *ns,
+					       pgoff_t pgoff)
+{
+	int ns_id = ns->ns_id;
+	unsigned long offset = pgoff << PAGE_SHIFT;
+
+	return BCH_NVMPG_OFFSET(ns_id, offset);
+}
+
 static void *bch_nvmpg_rec_to_ptr(struct bch_nvmpg_rec *r)
 {
 	struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[r->ns_id];
@@ -269,6 +283,213 @@  static void bch_nvmpg_init_free_space(struct bch_nvmpg_ns *ns)
 	}
 }
 
+
+/* If not found, it will create if create == true */
+static struct bch_nvmpg_head *find_nvmpg_head(const char *uuid, bool create)
+{
+	struct bch_nvmpg_set_header *set_header = global_nvmpg_set->set_header;
+	struct bch_nvmpg_head *head = NULL;
+	int i;
+
+	if (set_header == NULL)
+		goto out;
+
+	for (i = 0; i < set_header->size; i++) {
+		struct bch_nvmpg_head *h = &set_header->heads[i];
+
+		if (h->state != BCH_NVMPG_HD_STAT_ALLOC)
+			continue;
+
+		if (!memcmp(uuid, h->uuid, 16)) {
+			head = h;
+			break;
+		}
+	}
+
+	if (!head && create) {
+		u32 used = set_header->used;
+
+		if (set_header->size > used) {
+			head = &set_header->heads[used];
+			memset(head, 0, sizeof(struct bch_nvmpg_head));
+			head->state = BCH_NVMPG_HD_STAT_ALLOC;
+			memcpy(head->uuid, uuid, 16);
+			global_nvmpg_set->heads_used++;
+			set_header->used++;
+		} else
+			pr_info("No free bch_nvmpg_head\n");
+	}
+
+out:
+	return head;
+}
+
+static struct bch_nvmpg_recs *find_empty_nvmpg_recs(void)
+{
+	unsigned int start;
+	struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[0];
+	struct bch_nvmpg_recs *recs;
+
+	start = bitmap_find_next_zero_area(ns->recs_bitmap,
+					   BCH_MAX_PGALLOC_RECS, 0, 1, 0);
+	if (start > BCH_MAX_PGALLOC_RECS) {
+		pr_info("No free struct bch_nvmpg_recs\n");
+		return NULL;
+	}
+
+	bitmap_set(ns->recs_bitmap, start, 1);
+	recs = (struct bch_nvmpg_recs *)
+		bch_nvmpg_offset_to_ptr(BCH_NVMPG_SYSRECS_OFFSET)
+	       + start;
+
+	memset(recs, 0, sizeof(struct bch_nvmpg_recs));
+	return recs;
+}
+
+
+static struct bch_nvmpg_recs *find_nvmpg_recs(struct bch_nvmpg_ns *ns,
+					      struct bch_nvmpg_head *head,
+					      bool create)
+{
+	int ns_id = ns->sb->this_ns;
+	struct bch_nvmpg_recs *prev_recs = NULL, *recs = NULL;
+
+	recs = bch_nvmpg_offset_to_ptr(head->recs_offset[ns_id]);
+
+	/* If create=false, we return recs[nr] */
+	if (!create)
+		return recs;
+
+	/*
+	 * If create=true, it mean we need a empty struct bch_nvmpg_rec
+	 * So we should find non-empty struct bch_nvmpg_recs or alloc
+	 * new struct bch_nvmpg_recs. And return this bch_nvmpg_recs
+	 */
+	while (recs && (recs->used == recs->size)) {
+		prev_recs = recs;
+		recs = bch_nvmpg_offset_to_ptr(recs->next_offset);
+	}
+
+	/* Found empty struct bch_nvmpg_recs */
+	if (recs)
+		return recs;
+
+	/* Need alloc new struct bch_nvmpg_recs */
+	recs = find_empty_nvmpg_recs();
+	if (recs) {
+		unsigned long offset;
+
+		recs->next_offset = 0;
+		recs->head_offset = bch_nvmpg_ptr_to_offset(ns, head);
+		memcpy(recs->magic, bch_nvmpg_recs_magic, 16);
+		memcpy(recs->uuid, head->uuid, 16);
+		recs->size = BCH_NVMPG_MAX_RECS;
+		recs->used = 0;
+
+		offset = bch_nvmpg_ptr_to_offset(ns, recs);
+		if (prev_recs)
+			prev_recs->next_offset = offset;
+		else
+			head->recs_offset[ns_id] = offset;
+	}
+
+	return recs;
+}
+
+static void add_nvmpg_rec(struct bch_nvmpg_ns *ns,
+			  struct bch_nvmpg_recs *recs,
+			  unsigned long nvmpg_offset,
+			  int order)
+{
+	int i, ns_id;
+	unsigned long pgoff;
+
+	pgoff = bch_nvmpg_offset_to_pgoff(nvmpg_offset);
+	ns_id = ns->sb->this_ns;
+
+	for (i = 0; i < recs->size; i++) {
+		if (recs->recs[i].pgoff == 0) {
+			recs->recs[i].pgoff = pgoff;
+			recs->recs[i].order = order;
+			recs->recs[i].ns_id = ns_id;
+			recs->used++;
+			break;
+		}
+	}
+	BUG_ON(i == recs->size);
+}
+
+
+unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+{
+	unsigned long nvmpg_offset = 0;
+	struct bch_nvmpg_head *head;
+	int n, o;
+
+	mutex_lock(&global_nvmpg_set->lock);
+	head = find_nvmpg_head(uuid, true);
+
+	if (!head) {
+		pr_err("Cannot find bch_nvmpg_recs by uuid.\n");
+		goto unlock;
+	}
+
+	for (n = 0; n < global_nvmpg_set->total_ns; n++) {
+		struct bch_nvmpg_ns *ns = global_nvmpg_set->ns_tbl[n];
+
+		if (!ns || (ns->free < (1L << order)))
+			continue;
+
+		for (o = order; o < BCH_MAX_ORDER; o++) {
+			struct list_head *list;
+			struct page *page, *buddy_page;
+
+			if (list_empty(&ns->free_area[o]))
+				continue;
+
+			list = ns->free_area[o].next;
+			page = container_of((void *)list, struct page,
+					    zone_device_data);
+
+			list_del(list);
+
+			while (o != order) {
+				void *addr;
+				pgoff_t pgoff;
+
+				pgoff = page->index + (1L << (o - 1));
+				addr = bch_nvmpg_pgoff_to_ptr(ns, pgoff);
+				buddy_page = bch_nvmpg_va_to_pg(addr);
+				set_page_private(buddy_page, o - 1);
+				buddy_page->index = pgoff;
+				__SetPageBuddy(buddy_page);
+				list_add((struct list_head *)&buddy_page->zone_device_data,
+					 &ns->free_area[o - 1]);
+				o--;
+			}
+
+			set_page_private(page, order);
+			__ClearPageBuddy(page);
+			ns->free -= 1L << order;
+			nvmpg_offset = bch_nvmpg_pgoff_to_offset(ns, page->index);
+			break;
+		}
+
+		if (o < BCH_MAX_ORDER) {
+			struct bch_nvmpg_recs *recs;
+
+			recs = find_nvmpg_recs(ns, head, true);
+			/* ToDo: handle pgalloc_recs==NULL */
+			add_nvmpg_rec(ns, recs, nvmpg_offset, order);
+			break;
+		}
+	}
+
+unlock:
+	mutex_unlock(&global_nvmpg_set->lock);
+	return nvmpg_offset;
+}
+
 static int attach_nvmpg_set(struct bch_nvmpg_ns *ns)
 {
 	struct bch_nvmpg_sb *sb = ns->sb;
diff --git a/drivers/md/bcache/nvmpg.h b/drivers/md/bcache/nvmpg.h
index 55778d4db7da..d03f3241b45a 100644
--- a/drivers/md/bcache/nvmpg.h
+++ b/drivers/md/bcache/nvmpg.h
@@ -76,6 +76,9 @@  struct bch_nvmpg_set {
 /* Indicate which field in bch_nvmpg_sb to be updated */
 #define BCH_NVMPG_TOTAL_NS	0	/* total_ns */
 
+#define BCH_PGOFF_TO_KVADDR(pgoff)					\
+	((void *)((unsigned long)(pgoff) << PAGE_SHIFT))
+
 #define BCH_MAX_PGALLOC_RECS						\
 	(min_t(unsigned int, 64,					\
 	       (BCH_NVMPG_START - BCH_NVMPG_SYSRECS_OFFSET) /		\
@@ -89,6 +92,7 @@  unsigned long bch_nvmpg_ptr_to_offset(struct bch_nvmpg_ns *ns, void *ptr);
 struct bch_nvmpg_ns *bch_register_namespace(const char *dev_path);
 int bch_nvmpg_init(void);
 void bch_nvmpg_exit(void);
+unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid);
 
 #else
 
@@ -104,6 +108,11 @@  static inline int bch_nvmpg_init(void)
 
 static inline void bch_nvmpg_exit(void) { }
 
+static inline unsigned long bch_nvmpg_alloc_pages(int order, const char *uuid)
+{
+	return 0;
+}
+
 #endif /* CONFIG_BCACHE_NVM_PAGES */
 
 #endif /* _BCACHE_NVM_PAGES_H */