@@ -140,8 +140,9 @@ ssize_t part_inflight_show(struct device *dev,
{
struct hd_struct *p = dev_to_part(dev);
- return sprintf(buf, "%8u %8u\n", atomic_read(&p->in_flight[0]),
- atomic_read(&p->in_flight[1]));
+ return sprintf(buf, "%8u %8u\n",
+ pnode_counter_read_all(&p->in_flight[0]),
+ pnode_counter_read_all(&p->in_flight[1]));
}
#ifdef CONFIG_FAIL_MAKE_REQUEST
@@ -521,7 +521,7 @@ static void start_io_acct(struct dm_io *io)
cpu = part_stat_lock();
part_round_stats(cpu, &dm_disk(md)->part0);
part_stat_unlock();
- atomic_set(&dm_disk(md)->part0.in_flight[rw],
+ pnode_counter_set(&dm_disk(md)->part0.in_flight[rw],
atomic_inc_return(&md->pending[rw]));
if (unlikely(dm_stats_used(&md->stats)))
@@ -550,7 +550,7 @@ static void end_io_acct(struct dm_io *io)
* a flush.
*/
pending = atomic_dec_return(&md->pending[rw]);
- atomic_set(&dm_disk(md)->part0.in_flight[rw], pending);
+ pnode_counter_set(&dm_disk(md)->part0.in_flight[rw], pending);
pending += atomic_read(&md->pending[rw^0x1]);
/* nudge anyone waiting on suspend queue */
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
#include <linux/uuid.h>
+#include <linux/pernode_counter.h>
#ifdef CONFIG_BLOCK
@@ -120,7 +121,7 @@ struct hd_struct {
int make_it_fail;
#endif
unsigned long stamp;
- atomic_t in_flight[2];
+ struct pnode_counter in_flight[2];
#ifdef CONFIG_SMP
struct disk_stats __percpu *dkstats;
#else
@@ -364,21 +365,22 @@ static inline void free_part_stats(struct hd_struct *part)
static inline void part_inc_in_flight(struct hd_struct *part, int rw)
{
- atomic_inc(&part->in_flight[rw]);
+ pnode_counter_inc(&part->in_flight[rw]);
if (part->partno)
- atomic_inc(&part_to_disk(part)->part0.in_flight[rw]);
+ pnode_counter_inc(&part_to_disk(part)->part0.in_flight[rw]);
}
static inline void part_dec_in_flight(struct hd_struct *part, int rw)
{
- atomic_dec(&part->in_flight[rw]);
+ pnode_counter_dec(&part->in_flight[rw]);
if (part->partno)
- atomic_dec(&part_to_disk(part)->part0.in_flight[rw]);
+ pnode_counter_dec(&part_to_disk(part)->part0.in_flight[rw]);
}
static inline int part_in_flight(struct hd_struct *part)
{
- return atomic_read(&part->in_flight[0]) + atomic_read(&part->in_flight[1]);
+ return pnode_counter_read_all(&part->in_flight[0]) + \
+ pnode_counter_read_all(&part->in_flight[1]);
}
static inline struct partition_meta_info *alloc_part_info(struct gendisk *disk)
@@ -627,11 +629,34 @@ extern ssize_t part_fail_store(struct device *dev,
const char *buf, size_t count);
#endif /* CONFIG_FAIL_MAKE_REQUEST */
+static inline int hd_counter_init(struct hd_struct *part)
+{
+ if (pnode_counter_init(&part->in_flight[0], GFP_KERNEL))
+ return -ENOMEM;
+ if (pnode_counter_init(&part->in_flight[1], GFP_KERNEL)) {
+ pnode_counter_deinit(&part->in_flight[0]);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static inline void hd_counter_deinit(struct hd_struct *part)
+{
+ pnode_counter_deinit(&part->in_flight[0]);
+ pnode_counter_deinit(&part->in_flight[1]);
+}
+
static inline int hd_ref_init(struct hd_struct *part)
{
+ if (hd_counter_init(part))
+ return -ENOMEM;
+
if (percpu_ref_init(&part->ref, __delete_partition, 0,
- GFP_KERNEL))
+ GFP_KERNEL)) {
+ hd_counter_deinit(part);
return -ENOMEM;
+ }
return 0;
}
@@ -659,6 +684,7 @@ static inline void hd_free_part(struct hd_struct *part)
{
free_part_stats(part);
free_part_info(part);
+ hd_counter_deinit(part);
percpu_ref_exit(&part->ref);
}
new file mode 100644
@@ -0,0 +1,118 @@
+#ifndef _LINUX_PERNODE_COUNTER_H
+#define _LINUX_PERNODE_COUNTER_H
+/*
+ * A simple per node atomic counter for use in block io accounting.
+ */
+
+#include <linux/smp.h>
+#include <linux/percpu.h>
+#include <linux/types.h>
+#include <linux/gfp.h>
+
+struct node_counter {
+ atomic_t counter_in_node;
+};
+
+struct pnode_counter {
+ struct node_counter * __percpu *counter;
+ struct node_counter **nodes;
+};
+
+static inline int pnode_counter_init(struct pnode_counter *pnc, gfp_t gfp)
+{
+ struct node_counter **nodes;
+ int i, j, cpu;
+
+ nodes = kzalloc(nr_node_ids * sizeof(struct node_counter *), gfp);
+ if (!nodes)
+ goto err_nodes;
+
+ for_each_node(i) {
+ nodes[i] = kzalloc_node(sizeof(struct node_counter), gfp, i);
+ if (!nodes[i])
+ goto err_node_counter;
+ }
+
+ pnc->counter = alloc_percpu_gfp(struct node_counter *, gfp);
+ if (!pnc->counter)
+ goto err_node_counter;
+
+ for_each_possible_cpu(cpu)
+ *per_cpu_ptr(pnc->counter, cpu) = nodes[cpu_to_node(cpu)];
+
+ pnc->nodes = nodes;
+
+ return 0;
+
+ err_node_counter:
+ for (j = 0; j < i; j++)
+ kfree(nodes[j]);
+ kfree(nodes);
+ err_nodes:
+ return -ENOMEM;
+}
+
+static inline void pnode_counter_deinit(struct pnode_counter *pnc)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct node_counter *node = *per_cpu_ptr(pnc->counter, cpu);
+
+ kfree(node);
+ *per_cpu_ptr(pnc->counter, cpu) = NULL;
+ }
+ free_percpu(pnc->counter);
+ kfree(pnc->nodes);
+}
+
+static inline void pnode_counter_inc(struct pnode_counter *pnc)
+{
+ struct node_counter *node = this_cpu_read(*pnc->counter);
+
+ atomic_inc(&node->counter_in_node);
+}
+
+static inline void pnode_counter_inc_cpu(struct pnode_counter *pnc, int cpu)
+{
+ struct node_counter *node = *per_cpu_ptr(pnc->counter, cpu);
+
+ atomic_inc(&node->counter_in_node);
+}
+
+static inline void pnode_counter_dec(struct pnode_counter *pnc)
+{
+ struct node_counter *node = this_cpu_read(*pnc->counter);
+
+ atomic_dec(&node->counter_in_node);
+}
+
+static inline void pnode_counter_dec_cpu(struct pnode_counter *pnc, int cpu)
+{
+ struct node_counter *node = *per_cpu_ptr(pnc->counter, cpu);
+
+ atomic_dec(&node->counter_in_node);
+}
+
+static inline void pnode_counter_set(struct pnode_counter *pnc, int val)
+{
+ int i;
+ struct node_counter *node = this_cpu_read(*pnc->counter);
+
+ for_each_node(i)
+ atomic_set(&pnc->nodes[i]->counter_in_node, 0);
+ atomic_set(&node->counter_in_node, val);
+}
+
+static inline long pnode_counter_read_all(struct pnode_counter *pnc)
+{
+ int i;
+ long val = 0;
+
+ for_each_node(i)
+ val += atomic_read(&pnc->nodes[i]->counter_in_node);
+
+ return val;
+}
+
+#endif /* _LINUX_PERNODE_COUNTER_H */