Message ID | 20190228021839.55779-6-dennis@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | introduce percpu block scan_hint | expand |
Hi Dennis, > -----Original Message----- > From: owner-linux-mm@kvack.org [mailto:owner-linux-mm@kvack.org] On > Behalf Of Dennis Zhou > Sent: 2019年2月28日 10:19 > To: Dennis Zhou <dennis@kernel.org>; Tejun Heo <tj@kernel.org>; Christoph > Lameter <cl@linux.com> > Cc: Vlad Buslov <vladbu@mellanox.com>; kernel-team@fb.com; > linux-mm@kvack.org; linux-kernel@vger.kernel.org > Subject: [PATCH 05/12] percpu: relegate chunks unusable when failing small > allocations > > In certain cases, requestors of percpu memory may want specific alignments. > However, it is possible to end up in situations where the contig_hint matches, > but the alignment does not. This causes excess scanning of chunks that will fail. > To prevent this, if a small allocation fails (< 32B), the chunk is moved to the > empty list. Once an allocation is freed from that chunk, it is placed back into > rotation. > > Signed-off-by: Dennis Zhou <dennis@kernel.org> > --- > mm/percpu.c | 35 ++++++++++++++++++++++++++--------- > 1 file changed, 26 insertions(+), 9 deletions(-) > > diff --git a/mm/percpu.c b/mm/percpu.c > index c996bcffbb2a..3d7deece9556 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -94,6 +94,8 @@ > > /* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ > #define PCPU_SLOT_BASE_SHIFT 5 > +/* chunks in slots below this are subject to being sidelined on failed alloc */ > +#define PCPU_SLOT_FAIL_THRESHOLD 3 > > #define PCPU_EMPTY_POP_PAGES_LOW 2 > #define PCPU_EMPTY_POP_PAGES_HIGH 4 > @@ -488,6 +490,22 @@ static void pcpu_mem_free(void *ptr) > kvfree(ptr); > } > > +static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, > + bool move_front) > +{ > + if (chunk != pcpu_reserved_chunk) { > + if (move_front) > + list_move(&chunk->list, &pcpu_slot[slot]); > + else > + list_move_tail(&chunk->list, &pcpu_slot[slot]); > + } > +} > + > +static void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot) { > + __pcpu_chunk_move(chunk, slot, true); > +} > + > /** > * pcpu_chunk_relocate - put chunk in the appropriate chunk slot > * @chunk: chunk of interest > @@ -505,12 +523,8 @@ static void pcpu_chunk_relocate(struct pcpu_chunk > *chunk, int oslot) { > int nslot = pcpu_chunk_slot(chunk); > > - if (chunk != pcpu_reserved_chunk && oslot != nslot) { > - if (oslot < nslot) > - list_move(&chunk->list, &pcpu_slot[nslot]); > - else > - list_move_tail(&chunk->list, &pcpu_slot[nslot]); > - } > + if (oslot != nslot) > + __pcpu_chunk_move(chunk, nslot, oslot < nslot); > } > > /** > @@ -1381,7 +1395,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t > align, bool reserved, > bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; > bool do_warn = !(gfp & __GFP_NOWARN); > static int warn_limit = 10; > - struct pcpu_chunk *chunk; > + struct pcpu_chunk *chunk, *next; > const char *err; > int slot, off, cpu, ret; > unsigned long flags; > @@ -1443,11 +1457,14 @@ static void __percpu *pcpu_alloc(size_t size, > size_t align, bool reserved, > restart: > /* search through normal chunks */ > for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { > - list_for_each_entry(chunk, &pcpu_slot[slot], list) { > + list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) { > off = pcpu_find_block_fit(chunk, bits, bit_align, > is_atomic); > - if (off < 0) > + if (off < 0) { > + if (slot < PCPU_SLOT_FAIL_THRESHOLD) > + pcpu_chunk_move(chunk, 0); > continue; > + } > > off = pcpu_alloc_area(chunk, bits, bit_align, off); > if (off >= 0) For the code: Reviewed-by: Peng Fan <peng.fan@nxp.com> But I did not understand well why choose 32B? If there are more information, better put in commit log. Thanks, Peng. > -- > 2.17.1
On Sat, Mar 02, 2019 at 01:55:54PM +0000, Peng Fan wrote: > Hi Dennis, > > > -----Original Message----- > > From: owner-linux-mm@kvack.org [mailto:owner-linux-mm@kvack.org] On > > Behalf Of Dennis Zhou > > Sent: 2019年2月28日 10:19 > > To: Dennis Zhou <dennis@kernel.org>; Tejun Heo <tj@kernel.org>; Christoph > > Lameter <cl@linux.com> > > Cc: Vlad Buslov <vladbu@mellanox.com>; kernel-team@fb.com; > > linux-mm@kvack.org; linux-kernel@vger.kernel.org > > Subject: [PATCH 05/12] percpu: relegate chunks unusable when failing small > > allocations > > > > In certain cases, requestors of percpu memory may want specific alignments. > > However, it is possible to end up in situations where the contig_hint matches, > > but the alignment does not. This causes excess scanning of chunks that will fail. > > To prevent this, if a small allocation fails (< 32B), the chunk is moved to the > > empty list. Once an allocation is freed from that chunk, it is placed back into > > rotation. > > > > Signed-off-by: Dennis Zhou <dennis@kernel.org> > > --- > > mm/percpu.c | 35 ++++++++++++++++++++++++++--------- > > 1 file changed, 26 insertions(+), 9 deletions(-) > > > > diff --git a/mm/percpu.c b/mm/percpu.c > > index c996bcffbb2a..3d7deece9556 100644 > > --- a/mm/percpu.c > > +++ b/mm/percpu.c > > @@ -94,6 +94,8 @@ > > > > /* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ > > #define PCPU_SLOT_BASE_SHIFT 5 > > +/* chunks in slots below this are subject to being sidelined on failed alloc */ > > +#define PCPU_SLOT_FAIL_THRESHOLD 3 > > > > #define PCPU_EMPTY_POP_PAGES_LOW 2 > > #define PCPU_EMPTY_POP_PAGES_HIGH 4 > > @@ -488,6 +490,22 @@ static void pcpu_mem_free(void *ptr) > > kvfree(ptr); > > } > > > > +static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, > > + bool move_front) > > +{ > > + if (chunk != pcpu_reserved_chunk) { > > + if (move_front) > > + list_move(&chunk->list, &pcpu_slot[slot]); > > + else > > + list_move_tail(&chunk->list, &pcpu_slot[slot]); > > + } > > +} > > + > > +static void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot) { > > + __pcpu_chunk_move(chunk, slot, true); > > +} > > + > > /** > > * pcpu_chunk_relocate - put chunk in the appropriate chunk slot > > * @chunk: chunk of interest > > @@ -505,12 +523,8 @@ static void pcpu_chunk_relocate(struct pcpu_chunk > > *chunk, int oslot) { > > int nslot = pcpu_chunk_slot(chunk); > > > > - if (chunk != pcpu_reserved_chunk && oslot != nslot) { > > - if (oslot < nslot) > > - list_move(&chunk->list, &pcpu_slot[nslot]); > > - else > > - list_move_tail(&chunk->list, &pcpu_slot[nslot]); > > - } > > + if (oslot != nslot) > > + __pcpu_chunk_move(chunk, nslot, oslot < nslot); > > } > > > > /** > > @@ -1381,7 +1395,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t > > align, bool reserved, > > bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; > > bool do_warn = !(gfp & __GFP_NOWARN); > > static int warn_limit = 10; > > - struct pcpu_chunk *chunk; > > + struct pcpu_chunk *chunk, *next; > > const char *err; > > int slot, off, cpu, ret; > > unsigned long flags; > > @@ -1443,11 +1457,14 @@ static void __percpu *pcpu_alloc(size_t size, > > size_t align, bool reserved, > > restart: > > /* search through normal chunks */ > > for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { > > - list_for_each_entry(chunk, &pcpu_slot[slot], list) { > > + list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) { > > off = pcpu_find_block_fit(chunk, bits, bit_align, > > is_atomic); > > - if (off < 0) > > + if (off < 0) { > > + if (slot < PCPU_SLOT_FAIL_THRESHOLD) > > + pcpu_chunk_move(chunk, 0); > > continue; > > + } > > > > off = pcpu_alloc_area(chunk, bits, bit_align, off); > > if (off >= 0) > > For the code: Reviewed-by: Peng Fan <peng.fan@nxp.com> > > But I did not understand well why choose 32B? If there are > more information, better put in commit log. > There isn't I just picked a small allocation size. Thanks, Dennis
diff --git a/mm/percpu.c b/mm/percpu.c index c996bcffbb2a..3d7deece9556 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -94,6 +94,8 @@ /* the slots are sorted by free bytes left, 1-31 bytes share the same slot */ #define PCPU_SLOT_BASE_SHIFT 5 +/* chunks in slots below this are subject to being sidelined on failed alloc */ +#define PCPU_SLOT_FAIL_THRESHOLD 3 #define PCPU_EMPTY_POP_PAGES_LOW 2 #define PCPU_EMPTY_POP_PAGES_HIGH 4 @@ -488,6 +490,22 @@ static void pcpu_mem_free(void *ptr) kvfree(ptr); } +static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, + bool move_front) +{ + if (chunk != pcpu_reserved_chunk) { + if (move_front) + list_move(&chunk->list, &pcpu_slot[slot]); + else + list_move_tail(&chunk->list, &pcpu_slot[slot]); + } +} + +static void pcpu_chunk_move(struct pcpu_chunk *chunk, int slot) +{ + __pcpu_chunk_move(chunk, slot, true); +} + /** * pcpu_chunk_relocate - put chunk in the appropriate chunk slot * @chunk: chunk of interest @@ -505,12 +523,8 @@ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) { int nslot = pcpu_chunk_slot(chunk); - if (chunk != pcpu_reserved_chunk && oslot != nslot) { - if (oslot < nslot) - list_move(&chunk->list, &pcpu_slot[nslot]); - else - list_move_tail(&chunk->list, &pcpu_slot[nslot]); - } + if (oslot != nslot) + __pcpu_chunk_move(chunk, nslot, oslot < nslot); } /** @@ -1381,7 +1395,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; - struct pcpu_chunk *chunk; + struct pcpu_chunk *chunk, *next; const char *err; int slot, off, cpu, ret; unsigned long flags; @@ -1443,11 +1457,14 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, restart: /* search through normal chunks */ for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { - list_for_each_entry(chunk, &pcpu_slot[slot], list) { + list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) { off = pcpu_find_block_fit(chunk, bits, bit_align, is_atomic); - if (off < 0) + if (off < 0) { + if (slot < PCPU_SLOT_FAIL_THRESHOLD) + pcpu_chunk_move(chunk, 0); continue; + } off = pcpu_alloc_area(chunk, bits, bit_align, off); if (off >= 0)
In certain cases, requestors of percpu memory may want specific alignments. However, it is possible to end up in situations where the contig_hint matches, but the alignment does not. This causes excess scanning of chunks that will fail. To prevent this, if a small allocation fails (< 32B), the chunk is moved to the empty list. Once an allocation is freed from that chunk, it is placed back into rotation. Signed-off-by: Dennis Zhou <dennis@kernel.org> --- mm/percpu.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-)