@@ -95,9 +95,6 @@ static void __blkg_release(struct rcu_head *rcu)
css_put(&blkg->blkcg->css);
if (blkg->parent)
blkg_put(blkg->parent);
-
- wb_congested_put(blkg->wb_congested);
-
blkg_free(blkg);
}
@@ -227,7 +224,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
struct blkcg_gq *new_blkg)
{
struct blkcg_gq *blkg;
- struct bdi_writeback_congested *wb_congested;
int i, ret;
WARN_ON_ONCE(!rcu_read_lock_held());
@@ -245,31 +241,22 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
goto err_free_blkg;
}
- wb_congested = wb_congested_get_create(q->backing_dev_info,
- blkcg->css.id,
- GFP_NOWAIT | __GFP_NOWARN);
- if (!wb_congested) {
- ret = -ENOMEM;
- goto err_put_css;
- }
-
/* allocate */
if (!new_blkg) {
new_blkg = blkg_alloc(blkcg, q, GFP_NOWAIT | __GFP_NOWARN);
if (unlikely(!new_blkg)) {
ret = -ENOMEM;
- goto err_put_congested;
+ goto err_put_css;
}
}
blkg = new_blkg;
- blkg->wb_congested = wb_congested;
/* link parent */
if (blkcg_parent(blkcg)) {
blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
if (WARN_ON_ONCE(!blkg->parent)) {
ret = -ENODEV;
- goto err_put_congested;
+ goto err_put_css;
}
blkg_get(blkg->parent);
}
@@ -306,8 +293,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg_put(blkg);
return ERR_PTR(ret);
-err_put_congested:
- wb_congested_put(wb_congested);
err_put_css:
css_put(&blkcg->css);
err_free_blkg:
@@ -1838,7 +1838,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
* top-level queue for congestion.
*/
struct backing_dev_info *bdi = md->queue->backing_dev_info;
- r = bdi->wb.congested->state & bdi_bits;
+ r = bdi->wb.congested & bdi_bits;
} else {
map = dm_get_live_table_fast(md);
if (map)
@@ -87,26 +87,6 @@ struct wb_completion {
#define DEFINE_WB_COMPLETION(cmpl, bdi) \
struct wb_completion cmpl = WB_COMPLETION_INIT(bdi)
-/*
- * For cgroup writeback, multiple wb's may map to the same blkcg. Those
- * wb's can operate mostly independently but should share the congested
- * state. To facilitate such sharing, the congested state is tracked using
- * the following struct which is created on demand, indexed by blkcg ID on
- * its bdi, and refcounted.
- */
-struct bdi_writeback_congested {
- unsigned long state; /* WB_[a]sync_congested flags */
- refcount_t refcnt; /* nr of attached wb's and blkg */
-
-#ifdef CONFIG_CGROUP_WRITEBACK
- struct backing_dev_info *__bdi; /* the associated bdi, set to NULL
- * on bdi unregistration. For memcg-wb
- * internal use only! */
- int blkcg_id; /* ID of the associated blkcg */
- struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */
-#endif
-};
-
/*
* Each wb (bdi_writeback) can perform writeback operations, is measured
* and throttled, independently. Without cgroup writeback, each bdi
@@ -140,7 +120,7 @@ struct bdi_writeback {
struct percpu_counter stat[NR_WB_STAT_ITEMS];
- struct bdi_writeback_congested *congested;
+ unsigned long congested; /* WB_[a]sync_congested flags */
unsigned long bw_time_stamp; /* last time write bw is updated */
unsigned long dirtied_stamp;
@@ -208,11 +188,8 @@ struct backing_dev_info {
struct list_head wb_list; /* list of all wbs */
#ifdef CONFIG_CGROUP_WRITEBACK
struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
- struct rb_root cgwb_congested_tree; /* their congested states */
struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */
struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */
-#else
- struct bdi_writeback_congested *wb_congested;
#endif
wait_queue_head_t wb_waitq;
@@ -173,7 +173,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
if (bdi->congested_fn)
return bdi->congested_fn(bdi->congested_data, cong_bits);
- return wb->congested->state & cong_bits;
+ return wb->congested & cong_bits;
}
long congestion_wait(int sync, long timeout);
@@ -224,9 +224,6 @@ static inline int bdi_sched_wait(void *word)
#ifdef CONFIG_CGROUP_WRITEBACK
-struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp);
-void wb_congested_put(struct bdi_writeback_congested *congested);
struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi,
struct cgroup_subsys_state *memcg_css);
struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
@@ -404,19 +401,6 @@ static inline bool inode_cgwb_enabled(struct inode *inode)
return false;
}
-static inline struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
-{
- refcount_inc(&bdi->wb_congested->refcnt);
- return bdi->wb_congested;
-}
-
-static inline void wb_congested_put(struct bdi_writeback_congested *congested)
-{
- if (refcount_dec_and_test(&congested->refcnt))
- kfree(congested);
-}
-
static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi)
{
return &bdi->wb;
@@ -109,12 +109,6 @@ struct blkcg_gq {
struct hlist_node blkcg_node;
struct blkcg *blkcg;
- /*
- * Each blkg gets congested separately and the congestion state is
- * propagated to the matching bdi_writeback_congested.
- */
- struct bdi_writeback_congested *wb_congested;
-
/* all non-root blkcg_gq's are guaranteed to have access to parent */
struct blkcg_gq *parent;
@@ -281,7 +281,7 @@ void wb_wakeup_delayed(struct bdi_writeback *wb)
#define INIT_BW (100 << (20 - PAGE_SHIFT))
static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
- int blkcg_id, gfp_t gfp)
+ gfp_t gfp)
{
int i, err;
@@ -308,15 +308,9 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
wb->dirty_sleep = jiffies;
- wb->congested = wb_congested_get_create(bdi, blkcg_id, gfp);
- if (!wb->congested) {
- err = -ENOMEM;
- goto out_put_bdi;
- }
-
err = fprop_local_init_percpu(&wb->completions, gfp);
if (err)
- goto out_put_cong;
+ goto out_put_bdi;
for (i = 0; i < NR_WB_STAT_ITEMS; i++) {
err = percpu_counter_init(&wb->stat[i], 0, gfp);
@@ -330,8 +324,6 @@ static int wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi,
while (i--)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
-out_put_cong:
- wb_congested_put(wb->congested);
out_put_bdi:
if (wb != &bdi->wb)
bdi_put(bdi);
@@ -374,7 +366,6 @@ static void wb_exit(struct bdi_writeback *wb)
percpu_counter_destroy(&wb->stat[i]);
fprop_local_destroy_percpu(&wb->completions);
- wb_congested_put(wb->congested);
if (wb != &wb->bdi->wb)
bdi_put(wb->bdi);
}
@@ -384,99 +375,12 @@ static void wb_exit(struct bdi_writeback *wb)
#include <linux/memcontrol.h>
/*
- * cgwb_lock protects bdi->cgwb_tree, bdi->cgwb_congested_tree,
- * blkcg->cgwb_list, and memcg->cgwb_list. bdi->cgwb_tree is also RCU
- * protected.
+ * cgwb_lock protects bdi->cgwb_tree, blkcg->cgwb_list, and memcg->cgwb_list.
+ * bdi->cgwb_tree is also RCU protected.
*/
static DEFINE_SPINLOCK(cgwb_lock);
static struct workqueue_struct *cgwb_release_wq;
-/**
- * wb_congested_get_create - get or create a wb_congested
- * @bdi: associated bdi
- * @blkcg_id: ID of the associated blkcg
- * @gfp: allocation mask
- *
- * Look up the wb_congested for @blkcg_id on @bdi. If missing, create one.
- * The returned wb_congested has its reference count incremented. Returns
- * NULL on failure.
- */
-struct bdi_writeback_congested *
-wb_congested_get_create(struct backing_dev_info *bdi, int blkcg_id, gfp_t gfp)
-{
- struct bdi_writeback_congested *new_congested = NULL, *congested;
- struct rb_node **node, *parent;
- unsigned long flags;
-retry:
- spin_lock_irqsave(&cgwb_lock, flags);
-
- node = &bdi->cgwb_congested_tree.rb_node;
- parent = NULL;
-
- while (*node != NULL) {
- parent = *node;
- congested = rb_entry(parent, struct bdi_writeback_congested,
- rb_node);
- if (congested->blkcg_id < blkcg_id)
- node = &parent->rb_left;
- else if (congested->blkcg_id > blkcg_id)
- node = &parent->rb_right;
- else
- goto found;
- }
-
- if (new_congested) {
- /* !found and storage for new one already allocated, insert */
- congested = new_congested;
- rb_link_node(&congested->rb_node, parent, node);
- rb_insert_color(&congested->rb_node, &bdi->cgwb_congested_tree);
- spin_unlock_irqrestore(&cgwb_lock, flags);
- return congested;
- }
-
- spin_unlock_irqrestore(&cgwb_lock, flags);
-
- /* allocate storage for new one and retry */
- new_congested = kzalloc(sizeof(*new_congested), gfp);
- if (!new_congested)
- return NULL;
-
- refcount_set(&new_congested->refcnt, 1);
- new_congested->__bdi = bdi;
- new_congested->blkcg_id = blkcg_id;
- goto retry;
-
-found:
- refcount_inc(&congested->refcnt);
- spin_unlock_irqrestore(&cgwb_lock, flags);
- kfree(new_congested);
- return congested;
-}
-
-/**
- * wb_congested_put - put a wb_congested
- * @congested: wb_congested to put
- *
- * Put @congested and destroy it if the refcnt reaches zero.
- */
-void wb_congested_put(struct bdi_writeback_congested *congested)
-{
- unsigned long flags;
-
- if (!refcount_dec_and_lock_irqsave(&congested->refcnt, &cgwb_lock, &flags))
- return;
-
- /* bdi might already have been destroyed leaving @congested unlinked */
- if (congested->__bdi) {
- rb_erase(&congested->rb_node,
- &congested->__bdi->cgwb_congested_tree);
- congested->__bdi = NULL;
- }
-
- spin_unlock_irqrestore(&cgwb_lock, flags);
- kfree(congested);
-}
-
static void cgwb_release_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
@@ -558,7 +462,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
goto out_put;
}
- ret = wb_init(wb, bdi, blkcg_css->id, gfp);
+ ret = wb_init(wb, bdi, gfp);
if (ret)
goto err_free;
@@ -696,11 +600,10 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
int ret;
INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
- bdi->cgwb_congested_tree = RB_ROOT;
mutex_init(&bdi->cgwb_release_mutex);
init_rwsem(&bdi->wb_switch_rwsem);
- ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
+ ret = wb_init(&bdi->wb, bdi, GFP_KERNEL);
if (!ret) {
bdi->wb.memcg_css = &root_mem_cgroup->css;
bdi->wb.blkcg_css = blkcg_root_css;
@@ -769,21 +672,6 @@ void wb_blkcg_offline(struct blkcg *blkcg)
spin_unlock_irq(&cgwb_lock);
}
-static void cgwb_bdi_exit(struct backing_dev_info *bdi)
-{
- struct rb_node *rbn;
-
- spin_lock_irq(&cgwb_lock);
- while ((rbn = rb_first(&bdi->cgwb_congested_tree))) {
- struct bdi_writeback_congested *congested =
- rb_entry(rbn, struct bdi_writeback_congested, rb_node);
-
- rb_erase(rbn, &bdi->cgwb_congested_tree);
- congested->__bdi = NULL; /* mark @congested unlinked */
- }
- spin_unlock_irq(&cgwb_lock);
-}
-
static void cgwb_bdi_register(struct backing_dev_info *bdi)
{
spin_lock_irq(&cgwb_lock);
@@ -810,29 +698,11 @@ subsys_initcall(cgwb_init);
static int cgwb_bdi_init(struct backing_dev_info *bdi)
{
- int err;
-
- bdi->wb_congested = kzalloc(sizeof(*bdi->wb_congested), GFP_KERNEL);
- if (!bdi->wb_congested)
- return -ENOMEM;
-
- refcount_set(&bdi->wb_congested->refcnt, 1);
-
- err = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
- if (err) {
- wb_congested_put(bdi->wb_congested);
- return err;
- }
- return 0;
+ return wb_init(&bdi->wb, bdi, GFP_KERNEL);
}
static void cgwb_bdi_unregister(struct backing_dev_info *bdi) { }
-static void cgwb_bdi_exit(struct backing_dev_info *bdi)
-{
- wb_congested_put(bdi->wb_congested);
-}
-
static void cgwb_bdi_register(struct backing_dev_info *bdi)
{
list_add_tail_rcu(&bdi->wb.bdi_node, &bdi->wb_list);
@@ -1023,7 +893,6 @@ static void release_bdi(struct kref *ref)
bdi_unregister(bdi);
WARN_ON_ONCE(bdi->dev);
wb_exit(&bdi->wb);
- cgwb_bdi_exit(bdi);
kfree(bdi);
}
@@ -1053,7 +922,7 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync)
enum wb_congested_state bit;
bit = sync ? WB_sync_congested : WB_async_congested;
- if (test_and_clear_bit(bit, &bdi->wb.congested->state))
+ if (test_and_clear_bit(bit, &bdi->wb.congested))
atomic_dec(&nr_wb_congested[sync]);
smp_mb__after_atomic();
if (waitqueue_active(wqh))
@@ -1066,7 +935,7 @@ void set_bdi_congested(struct backing_dev_info *bdi, int sync)
enum wb_congested_state bit;
bit = sync ? WB_sync_congested : WB_async_congested;
- if (!test_and_set_bit(bit, &bdi->wb.congested->state))
+ if (!test_and_set_bit(bit, &bdi->wb.congested))
atomic_inc(&nr_wb_congested[sync]);
}
EXPORT_SYMBOL(set_bdi_congested);
We never set any congested bits in the group writeback instances of it. And for the simpler bdi-wide case a simple scalar field is all that that is needed. Signed-off-by: Christoph Hellwig <hch@lst.de> --- block/blk-cgroup.c | 19 +--- drivers/md/dm.c | 2 +- include/linux/backing-dev-defs.h | 25 +----- include/linux/backing-dev.h | 18 +--- include/linux/blk-cgroup.h | 6 -- mm/backing-dev.c | 149 ++----------------------------- 6 files changed, 14 insertions(+), 205 deletions(-)