diff mbox series

fs/buffer.c: Revoke LRU when trying to drop buffers

Message ID 1fe5d53722407a2651eeeada3a422c117041bf1d.1606194703.git.cgoldswo@codeaurora.org (mailing list archive)
State New, archived
Headers show
Series fs/buffer.c: Revoke LRU when trying to drop buffers | expand

Commit Message

Chris Goldsworthy Nov. 24, 2020, 6:49 a.m. UTC
From: Laura Abbott <lauraa@codeaurora.org>

When a buffer is added to the LRU list, a reference is taken which is
not dropped until the buffer is evicted from the LRU list. This is the
correct behavior, however this LRU reference will prevent the buffer
from being dropped. This means that the buffer can't actually be dropped
until it is selected for eviction. There's no bound on the time spent
on the LRU list, which means that the buffer may be undroppable for
very long periods of time. Given that migration involves dropping
buffers, the associated page is now unmigratible for long periods of
time as well. CMA relies on being able to migrate a specific range
of pages, so these types of failures make CMA significantly
less reliable, especially under high filesystem usage.

Rather than waiting for the LRU algorithm to eventually kick out
the buffer, explicitly remove the buffer from the LRU list when trying
to drop it. There is still the possibility that the buffer
could be added back on the list, but that indicates the buffer is
still in use and would probably have other 'in use' indicates to
prevent dropping.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Chris Goldsworthy <cgoldswo@codeaurora.org>
---
 fs/buffer.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 45 insertions(+), 2 deletions(-)

Comments

Matthew Wilcox Nov. 24, 2020, 3:39 p.m. UTC | #1
On Mon, Nov 23, 2020 at 10:49:38PM -0800, Chris Goldsworthy wrote:
> +static void __evict_bh_lru(void *arg)
> +{
> +	struct bh_lru *b = &get_cpu_var(bh_lrus);
> +	struct buffer_head *bh = arg;
> +	int i;
> +
> +	for (i = 0; i < BH_LRU_SIZE; i++) {
> +		if (b->bhs[i] == bh) {
> +			brelse(b->bhs[i]);
> +			b->bhs[i] = NULL;
> +			goto out;

That's an odd way to spell 'break' ...

> +		}
> +	}
> +out:
> +	put_cpu_var(bh_lrus);
> +}

...

> @@ -3245,8 +3281,15 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
>  
>  	bh = head;
>  	do {
> -		if (buffer_busy(bh))
> -			goto failed;
> +		if (buffer_busy(bh)) {
> +			/*
> +			 * Check if the busy failure was due to an
> +			 * outstanding LRU reference
> +			 */
> +			evict_bh_lrus(bh);
> +			if (buffer_busy(bh))
> +				goto failed;

Do you see any performance problems with this?  I'm concerned that we
need to call all CPUs for each buffer on a page, so with a 4kB page
and 512-byte block, we'd call each CPU eight times (with a 64kB page
size and 4kB page, we'd call each CPU 16 times!).  We might be better
off just calling invalidate_bh_lrus() -- we'd flush the entire LRU,
but we'd only need to do it once, not once per buffer head.

We could have a more complex 'evict' that iterates each busy buffer on a
page so transforming:

for_each_buffer
	for_each_cpu
		for_each_lru_entry

to:

for_each_cpu
	for_each_buffer
		for_each_lru_entry

(and i suggest that way because it's more expensive to iterate the buffers
than it is to iterate the lru entries)
Chris Goldsworthy Jan. 5, 2021, 2:57 p.m. UTC | #2
On 2020-11-24 07:39, Matthew Wilcox wrote:
> On Mon, Nov 23, 2020 at 10:49:38PM -0800, Chris Goldsworthy wrote:
>> +static void __evict_bh_lru(void *arg)
>> +{
>> +	struct bh_lru *b = &get_cpu_var(bh_lrus);
>> +	struct buffer_head *bh = arg;
>> +	int i;
>> +
>> +	for (i = 0; i < BH_LRU_SIZE; i++) {
>> +		if (b->bhs[i] == bh) {
>> +			brelse(b->bhs[i]);
>> +			b->bhs[i] = NULL;
>> +			goto out;
> 
> That's an odd way to spell 'break' ...
> 
>> +		}
>> +	}
>> +out:
>> +	put_cpu_var(bh_lrus);
>> +}
> 
> ...
> 
>> @@ -3245,8 +3281,15 @@ drop_buffers(struct page *page, struct 
>> buffer_head **buffers_to_free)
>> 
>>  	bh = head;
>>  	do {
>> -		if (buffer_busy(bh))
>> -			goto failed;
>> +		if (buffer_busy(bh)) {
>> +			/*
>> +			 * Check if the busy failure was due to an
>> +			 * outstanding LRU reference
>> +			 */
>> +			evict_bh_lrus(bh);
>> +			if (buffer_busy(bh))
>> +				goto failed;

Hi Matthew,

Apologies for the delayed response.

> We might be better off just calling invalidate_bh_lrus() -- we'd flush
> the entire LRU, but we'd only need to do it once, not once per buffer 
> head.

I'm concerned about emptying the cache, such that those who might 
benefit
from it would be left affected.

> We could have a more complex 'evict' that iterates each busy buffer on 
> a
> page so transforming:
> 
> for_each_buffer
> 	for_each_cpu
> 		for_each_lru_entry
> 
> to:
> 
> for_each_cpu
> 	for_each_buffer
> 		for_each_lru_entry
> 
> (and i suggest that way because it's more expensive to iterate the 
> buffers
> than it is to iterate the lru entries)

I've gone ahead and done this in a follow-up patch:

https://lore.kernel.org/lkml/cover.1609829465.git.cgoldswo@codeaurora.org/

There might be room for improvement in the data structure being used to
track the used entries - using an xarray gives the cleanest code, but
pre-allocating an array to hold up to page_size(page) / bh->b_size 
entres
might be faster, although it would be a bit uglier to do in a way that
doesn't reduce the performance of the case when evict_bh_lru() doesn't
need to be called.

Regards,

Chris.
diff mbox series

Patch

diff --git a/fs/buffer.c b/fs/buffer.c
index 64564ac..1751f0b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1471,12 +1471,48 @@  static bool has_bh_in_lru(int cpu, void *dummy)
 	return false;
 }
 
+static void __evict_bh_lru(void *arg)
+{
+	struct bh_lru *b = &get_cpu_var(bh_lrus);
+	struct buffer_head *bh = arg;
+	int i;
+
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		if (b->bhs[i] == bh) {
+			brelse(b->bhs[i]);
+			b->bhs[i] = NULL;
+			goto out;
+		}
+	}
+out:
+	put_cpu_var(bh_lrus);
+}
+
+static bool bh_exists_in_lru(int cpu, void *arg)
+{
+	struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
+	struct buffer_head *bh = arg;
+	int i;
+
+	for (i = 0; i < BH_LRU_SIZE; i++) {
+		if (b->bhs[i] == bh)
+			return true;
+	}
+
+	return false;
+
+}
 void invalidate_bh_lrus(void)
 {
 	on_each_cpu_cond(has_bh_in_lru, invalidate_bh_lru, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
 
+static void evict_bh_lrus(struct buffer_head *bh)
+{
+	on_each_cpu_cond(bh_exists_in_lru, __evict_bh_lru, bh, 1);
+}
+
 void set_bh_page(struct buffer_head *bh,
 		struct page *page, unsigned long offset)
 {
@@ -3245,8 +3281,15 @@  drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
 
 	bh = head;
 	do {
-		if (buffer_busy(bh))
-			goto failed;
+		if (buffer_busy(bh)) {
+			/*
+			 * Check if the busy failure was due to an
+			 * outstanding LRU reference
+			 */
+			evict_bh_lrus(bh);
+			if (buffer_busy(bh))
+				goto failed;
+		}
 		bh = bh->b_this_page;
 	} while (bh != head);