@@ -205,7 +205,6 @@ struct mem_cgroup {
int oom_kill_disable;
/* memory.events */
- atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
struct cgroup_file events_file;
/* protect arrays of thresholds */
@@ -238,6 +237,7 @@ struct mem_cgroup {
struct mem_cgroup_stat_cpu __percpu *stat_cpu;
atomic_long_t stat[MEMCG_NR_STAT];
atomic_long_t events[NR_VM_EVENT_ITEMS];
+ atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
unsigned long socket_pressure;
And performance restored.
Later investigation found that as long as the following 3 fields
moving_account, move_lock_task and stat_cpu are in the same cacheline,
performance will be good. To avoid future performance surprise by
other commits changing the layout of 'struct mem_cgroup', this patch
makes sure the 3 fields stay in the same cacheline.
One concern of this approach is, moving_account and move_lock_task
could be modified when a process changes memory cgroup while stat_cpu
is a always read field, it might hurt to place them in the same
cacheline. I assume it is rare for a process to change memory cgroup
so this should be OK.
LINK: https://lkml.kernel.org/r/20180528114019.GF9904@yexl-desktop
Reported-by: kernel test robot <xiaolong.ye@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Aaron Lu <aaron.lu@intel.com>
---
include/linux/memcontrol.h | 21 ++++++++++++++++++---
1 file changed, 18 insertions(+), 3 deletions(-)
@@ -158,6 +158,15 @@ enum memcg_kmem_state {
KMEM_ONLINE,
};
+#if defined(CONFIG_SMP)
+struct memcg_padding {
+ char x[0];
+} ____cacheline_internodealigned_in_smp;
+#define MEMCG_PADDING(name) struct memcg_padding name;
+#else
+#define MEMCG_PADDING(name)
+#endif
+
/*
* The memory controller data structure. The memory controller controls both
* page cache and RSS per cgroup. We would eventually like to provide
@@ -225,17 +234,23 @@ struct mem_cgroup {
* mem_cgroup ? And what type of charges should we move ?
*/
unsigned long move_charge_at_immigrate;
+ /* taken only while moving_account > 0 */
+ spinlock_t move_lock;
+ unsigned long move_lock_flags;
+
+ MEMCG_PADDING(_pad1_);
+
/*
* set > 0 if pages under this cgroup are moving to other cgroup.
*/
atomic_t moving_account;
- /* taken only while moving_account > 0 */
- spinlock_t move_lock;
struct task_struct *move_lock_task;
- unsigned long move_lock_flags;
/* memory.stat */
struct mem_cgroup_stat_cpu __percpu *stat_cpu;
+
+ MEMCG_PADDING(_pad2_);
+
atomic_long_t stat[MEMCG_NR_STAT];
atomic_long_t events[NR_VM_EVENT_ITEMS];