Message ID | 20241208224629.697448-7-neilb@suse.de (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | nfsd: allocate/free session-based DRC slots on demand | expand |
On 12/8/24 5:43 PM, NeilBrown wrote: > Add a shrinker which frees unused slots and may ask the clients to use > fewer slots on each session. > > We keep a global count of the number of freeable slots, which is the sum > of one less than the current "target" slots in all sessions in all > clients in all net-namespaces. This number is reported by the shrinker. > > When the shrinker is asked to free some, we call xxx on each session in > a round-robin asking each to reduce the slot count by 1. This will > reduce the "target" so the number reported by the shrinker will reduce > immediately. The memory will only be freed later when the client > confirmed that it is no longer needed. > > We use a global list of sessions and move the "head" to after the last > session that we asked to reduce, so the next callback from the shrinker > will move on to the next session. This pressure should be applied > "evenly" across all sessions over time. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++--- > fs/nfsd/state.h | 1 + > 2 files changed, 68 insertions(+), 4 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index a2d1f97b8a0e..311f67418759 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses) > */ > #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) > > +static struct shrinker *nfsd_slot_shrinker; > +static DEFINE_SPINLOCK(nfsd_session_list_lock); > +static LIST_HEAD(nfsd_session_list); > +/* The sum of "target_slots-1" on every session. The shrinker can push this > + * down, though it can take a little while for the memory to actually > + * be freed. The "-1" is because we can never free slot 0 while the > + * session is active. > + */ > +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); > + > static void > free_session_slots(struct nfsd4_session *ses, int from) > { > @@ -1930,8 +1940,11 @@ free_session_slots(struct nfsd4_session *ses, int from) > kfree(slot); > } > ses->se_fchannel.maxreqs = from; > - if (ses->se_target_maxslots > from) > - ses->se_target_maxslots = from; > + if (ses->se_target_maxslots > from) { > + int new_target = from ?: 1; > + atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); > + ses->se_target_maxslots = new_target; > + } > } > > /** > @@ -1949,7 +1962,7 @@ free_session_slots(struct nfsd4_session *ses, int from) > * Return value: > * The number of slots that the target was reduced by. > */ > -static int __maybe_unused > +static int > reduce_session_slots(struct nfsd4_session *ses, int dec) > { > struct nfsd_net *nn = net_generic(ses->se_client->net, > @@ -1962,6 +1975,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec) > return ret; > ret = min(dec, ses->se_target_maxslots-1); > ses->se_target_maxslots -= ret; > + atomic_sub(ret, &nfsd_total_target_slots); > ses->se_slot_gen += 1; > if (ses->se_slot_gen == 0) { > int i; > @@ -2021,6 +2035,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, > fattrs->maxreqs = i; > memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); > new->se_target_maxslots = i; > + atomic_add(i - 1, &nfsd_total_target_slots); > new->se_cb_slot_avail = ~0U; > new->se_cb_highest_slot = min(battrs->maxreqs - 1, > NFSD_BC_SLOT_TABLE_SIZE - 1); > @@ -2145,6 +2160,36 @@ static void free_session(struct nfsd4_session *ses) > __free_session(ses); > } > > +static unsigned long > +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) > +{ > + unsigned long cnt = atomic_read(&nfsd_total_target_slots); > + > + return cnt ? cnt : SHRINK_EMPTY; > +} > + > +static unsigned long > +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) > +{ > + struct nfsd4_session *ses; > + unsigned long scanned = 0; > + unsigned long freed = 0; > + > + spin_lock(&nfsd_session_list_lock); > + list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { > + freed += reduce_session_slots(ses, 1); > + scanned += 1; > + if (scanned >= sc->nr_to_scan) { > + /* Move starting point for next scan */ > + list_move(&nfsd_session_list, &ses->se_all_sessions); > + break; > + } > + } > + spin_unlock(&nfsd_session_list_lock); > + sc->nr_scanned = scanned; > + return freed; > +} > + > static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) > { > int idx; > @@ -2169,6 +2214,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru > list_add(&new->se_perclnt, &clp->cl_sessions); > spin_unlock(&clp->cl_lock); > > + spin_lock(&nfsd_session_list_lock); > + list_add_tail(&new->se_all_sessions, &nfsd_session_list); > + spin_unlock(&nfsd_session_list_lock); > + > { > struct sockaddr *sa = svc_addr(rqstp); > /* > @@ -2238,6 +2287,9 @@ unhash_session(struct nfsd4_session *ses) > spin_lock(&ses->se_client->cl_lock); > list_del(&ses->se_perclnt); > spin_unlock(&ses->se_client->cl_lock); > + spin_lock(&nfsd_session_list_lock); > + list_del(&ses->se_all_sessions); > + spin_unlock(&nfsd_session_list_lock); > } > > /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ > @@ -4380,6 +4432,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > GFP_NOWAIT))) { > s += 1; > session->se_fchannel.maxreqs = s; > + atomic_add(s - session->se_target_maxslots, > + &nfsd_total_target_slots); > session->se_target_maxslots = s; > } else { > kfree(slot); > @@ -8776,7 +8830,6 @@ nfs4_state_start_net(struct net *net) > } > > /* initialization to perform when the nfsd service is started: */ > - > int > nfs4_state_start(void) > { > @@ -8786,6 +8839,15 @@ nfs4_state_start(void) > if (ret) > return ret; > > + nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); > + if (!nfsd_slot_shrinker) { > + rhltable_destroy(&nfs4_file_rhltable); > + return -ENOMEM; > + } > + nfsd_slot_shrinker->count_objects = nfsd_slot_count; > + nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; > + shrinker_register(nfsd_slot_shrinker); > + > set_max_delegations(); > return 0; > } > @@ -8827,6 +8889,7 @@ void > nfs4_state_shutdown(void) > { > rhltable_destroy(&nfs4_file_rhltable); > + shrinker_free(nfsd_slot_shrinker); > } > > static void > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 4251ff3c5ad1..f45aee751a10 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -325,6 +325,7 @@ struct nfsd4_session { > u32 se_cb_prog; > struct list_head se_hash; /* hash by sessionid */ > struct list_head se_perclnt; > + struct list_head se_all_sessions;/* global list of sessions */ > struct nfs4_client *se_client; > struct nfs4_sessionid se_sessionid; > struct nfsd4_channel_attrs se_fchannel; Bisected to this patch. Sometime during the pynfs NFSv4.1 server tests, this list_del corruption splat is triggered: [ 87.768277] list_del corruption. prev->next should be ff388b4606369638, but was 0000000000000000. (prev=ff388b4606368038) [ 87.771492] ------------[ cut here ]------------ [ 87.772862] kernel BUG at lib/list_debug.c:62! [ 87.775029] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI [ 87.777179] CPU: 2 UID: 0 PID: 940 Comm: nfsd Not tainted 6.13.0-rc2-g6139eb164177 #1 [ 87.780065] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-2.fc40 04/01/2014 [ 87.783143] RIP: 0010:__list_del_entry_valid_or_report.cold+0x4f/0x9f [ 87.785336] Code: c2 48 83 05 43 a7 13 04 01 e8 5e ba f9 ff 0f 0b 48 89 f2 48 89 fe 48 c7 c7 00 07 84 ae 48 83 05 0f a7 13 04 01 e8 42 ba f9 ff <0f> 0b 48 89 fe 48 89 ca 48 c7 c7 c8 06 84 ae 48 83 05 db a6 13 04 [ 87.791467] RSP: 0018:ff4e1b1302de3d08 EFLAGS: 00010246 [ 87.793251] RAX: 000000000000006d RBX: ff388b4606369600 RCX: 0000000000000000 [ 87.795660] RDX: 0000000000000000 RSI: ff388b496fd21900 RDI: ff388b496fd21900 [ 87.798066] RBP: ff4e1b1302de3d08 R08: 0000000000000000 R09: 656e3e2d76657270 [ 87.800485] R10: 0000000000000029 R11: ff4e1b1302de3aa0 R12: ffffffffb0495580 [ 87.802884] R13: ff388b460dcee128 R14: 0000000000000001 R15: ffffffffb0495580 [ 87.805301] FS: 0000000000000000(0000) GS:ff388b496fd00000(0000) knlGS:0000000000000000 [ 87.807992] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 87.809952] CR2: 00007f7424c42008 CR3: 0000000100f30001 CR4: 0000000000771ef0 [ 87.811961] PKRU: 55555554 [ 87.812699] Call Trace: [ 87.813380] <TASK> [ 87.813966] ? show_regs.cold+0x21/0x36 [ 87.814990] ? __die_body+0x2b/0xa0 [ 87.815934] ? __die+0x3c/0x4e [ 87.816669] ? die+0x43/0x80 [ 87.817297] ? do_trap+0x11c/0x150 [ 87.818008] ? do_error_trap+0xbc/0x110 [ 87.818797] ? __list_del_entry_valid_or_report.cold+0x4f/0x9f [ 87.819955] ? exc_invalid_op+0x6e/0x90 [ 87.820747] ? __list_del_entry_valid_or_report.cold+0x4f/0x9f [ 87.821904] ? asm_exc_invalid_op+0x1f/0x30 [ 87.822761] ? __list_del_entry_valid_or_report.cold+0x4f/0x9f [ 87.823915] ? __list_del_entry_valid_or_report.cold+0x4f/0x9f [ 87.825069] nfsd4_destroy_session+0x280/0x430 [nfsd] [ 87.826230] nfsd4_proc_compound+0x64d/0xcf0 [nfsd] [ 87.827141] ? nfs4svc_decode_compoundargs+0x367/0x6c0 [nfsd] [ 87.827989] nfsd_dispatch+0x16b/0x3d0 [nfsd] [ 87.828671] svc_process_common+0x903/0xc80 [sunrpc] [ 87.829440] ? __pfx_nfsd_dispatch+0x10/0x10 [nfsd] [ 87.830178] svc_process+0x166/0x2e0 [sunrpc] [ 87.830868] svc_recv+0xd65/0x12c0 [sunrpc] [ 87.831529] ? __pfx_nfsd+0x10/0x10 [nfsd] [ 87.832160] nfsd+0x10a/0x1b0 [nfsd] [ 87.832734] kthread+0x149/0x1c0 [ 87.833201] ? __pfx_kthread+0x10/0x10 [ 87.833737] ret_from_fork+0x5e/0x80 [ 87.834248] ? __pfx_kthread+0x10/0x10 [ 87.834786] ret_from_fork_asm+0x1a/0x30 [ 87.835349] </TASK>
On Wed, 11 Dec 2024, Chuck Lever wrote: > On 12/8/24 5:43 PM, NeilBrown wrote: > > Add a shrinker which frees unused slots and may ask the clients to use > > fewer slots on each session. > > Bisected to this patch. Sometime during the pynfs NFSv4.1 server tests, > this list_del corruption splat is triggered: Thanks. This fixes it. Do you want to squash it in, or should I resend? Having two places that detach a session from a client seems less than ideal. I wonder if I should fix that. Thanks, NeilBrown diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 311f67418759..3b76cfe44b45 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -2425,8 +2425,12 @@ unhash_client_locked(struct nfs4_client *clp) } list_del_init(&clp->cl_lru); spin_lock(&clp->cl_lock); - list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { list_del_init(&ses->se_hash); + list_del_init(&ses->se_all_sessions); + } + spin_unlock(&nfsd_session_list_lock); spin_unlock(&clp->cl_lock); } Process Finished
On 12/10/24 10:32 PM, NeilBrown wrote: > On Wed, 11 Dec 2024, Chuck Lever wrote: >> On 12/8/24 5:43 PM, NeilBrown wrote: >>> Add a shrinker which frees unused slots and may ask the clients to use >>> fewer slots on each session. > >> >> Bisected to this patch. Sometime during the pynfs NFSv4.1 server tests, >> this list_del corruption splat is triggered: > > Thanks. > This fixes it. Do you want to squash it in, or should I resend? Resend, thanks! > Having two places that detach a session from a client seems less than > ideal. I wonder if I should fix that. > > Thanks, > NeilBrown > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 311f67418759..3b76cfe44b45 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -2425,8 +2425,12 @@ unhash_client_locked(struct nfs4_client *clp) > } > list_del_init(&clp->cl_lru); > spin_lock(&clp->cl_lock); > - list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) > + spin_lock(&nfsd_session_list_lock); > + list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) { > list_del_init(&ses->se_hash); > + list_del_init(&ses->se_all_sessions); > + } > + spin_unlock(&nfsd_session_list_lock); > spin_unlock(&clp->cl_lock); > } > > > Process Finished
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index a2d1f97b8a0e..311f67418759 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1909,6 +1909,16 @@ gen_sessionid(struct nfsd4_session *ses) */ #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) +static struct shrinker *nfsd_slot_shrinker; +static DEFINE_SPINLOCK(nfsd_session_list_lock); +static LIST_HEAD(nfsd_session_list); +/* The sum of "target_slots-1" on every session. The shrinker can push this + * down, though it can take a little while for the memory to actually + * be freed. The "-1" is because we can never free slot 0 while the + * session is active. + */ +static atomic_t nfsd_total_target_slots = ATOMIC_INIT(0); + static void free_session_slots(struct nfsd4_session *ses, int from) { @@ -1930,8 +1940,11 @@ free_session_slots(struct nfsd4_session *ses, int from) kfree(slot); } ses->se_fchannel.maxreqs = from; - if (ses->se_target_maxslots > from) - ses->se_target_maxslots = from; + if (ses->se_target_maxslots > from) { + int new_target = from ?: 1; + atomic_sub(ses->se_target_maxslots - new_target, &nfsd_total_target_slots); + ses->se_target_maxslots = new_target; + } } /** @@ -1949,7 +1962,7 @@ free_session_slots(struct nfsd4_session *ses, int from) * Return value: * The number of slots that the target was reduced by. */ -static int __maybe_unused +static int reduce_session_slots(struct nfsd4_session *ses, int dec) { struct nfsd_net *nn = net_generic(ses->se_client->net, @@ -1962,6 +1975,7 @@ reduce_session_slots(struct nfsd4_session *ses, int dec) return ret; ret = min(dec, ses->se_target_maxslots-1); ses->se_target_maxslots -= ret; + atomic_sub(ret, &nfsd_total_target_slots); ses->se_slot_gen += 1; if (ses->se_slot_gen == 0) { int i; @@ -2021,6 +2035,7 @@ static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, fattrs->maxreqs = i; memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs)); new->se_target_maxslots = i; + atomic_add(i - 1, &nfsd_total_target_slots); new->se_cb_slot_avail = ~0U; new->se_cb_highest_slot = min(battrs->maxreqs - 1, NFSD_BC_SLOT_TABLE_SIZE - 1); @@ -2145,6 +2160,36 @@ static void free_session(struct nfsd4_session *ses) __free_session(ses); } +static unsigned long +nfsd_slot_count(struct shrinker *s, struct shrink_control *sc) +{ + unsigned long cnt = atomic_read(&nfsd_total_target_slots); + + return cnt ? cnt : SHRINK_EMPTY; +} + +static unsigned long +nfsd_slot_scan(struct shrinker *s, struct shrink_control *sc) +{ + struct nfsd4_session *ses; + unsigned long scanned = 0; + unsigned long freed = 0; + + spin_lock(&nfsd_session_list_lock); + list_for_each_entry(ses, &nfsd_session_list, se_all_sessions) { + freed += reduce_session_slots(ses, 1); + scanned += 1; + if (scanned >= sc->nr_to_scan) { + /* Move starting point for next scan */ + list_move(&nfsd_session_list, &ses->se_all_sessions); + break; + } + } + spin_unlock(&nfsd_session_list_lock); + sc->nr_scanned = scanned; + return freed; +} + static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses) { int idx; @@ -2169,6 +2214,10 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru list_add(&new->se_perclnt, &clp->cl_sessions); spin_unlock(&clp->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_add_tail(&new->se_all_sessions, &nfsd_session_list); + spin_unlock(&nfsd_session_list_lock); + { struct sockaddr *sa = svc_addr(rqstp); /* @@ -2238,6 +2287,9 @@ unhash_session(struct nfsd4_session *ses) spin_lock(&ses->se_client->cl_lock); list_del(&ses->se_perclnt); spin_unlock(&ses->se_client->cl_lock); + spin_lock(&nfsd_session_list_lock); + list_del(&ses->se_all_sessions); + spin_unlock(&nfsd_session_list_lock); } /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */ @@ -4380,6 +4432,8 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, GFP_NOWAIT))) { s += 1; session->se_fchannel.maxreqs = s; + atomic_add(s - session->se_target_maxslots, + &nfsd_total_target_slots); session->se_target_maxslots = s; } else { kfree(slot); @@ -8776,7 +8830,6 @@ nfs4_state_start_net(struct net *net) } /* initialization to perform when the nfsd service is started: */ - int nfs4_state_start(void) { @@ -8786,6 +8839,15 @@ nfs4_state_start(void) if (ret) return ret; + nfsd_slot_shrinker = shrinker_alloc(0, "nfsd-DRC-slot"); + if (!nfsd_slot_shrinker) { + rhltable_destroy(&nfs4_file_rhltable); + return -ENOMEM; + } + nfsd_slot_shrinker->count_objects = nfsd_slot_count; + nfsd_slot_shrinker->scan_objects = nfsd_slot_scan; + shrinker_register(nfsd_slot_shrinker); + set_max_delegations(); return 0; } @@ -8827,6 +8889,7 @@ void nfs4_state_shutdown(void) { rhltable_destroy(&nfs4_file_rhltable); + shrinker_free(nfsd_slot_shrinker); } static void diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4251ff3c5ad1..f45aee751a10 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -325,6 +325,7 @@ struct nfsd4_session { u32 se_cb_prog; struct list_head se_hash; /* hash by sessionid */ struct list_head se_perclnt; + struct list_head se_all_sessions;/* global list of sessions */ struct nfs4_client *se_client; struct nfs4_sessionid se_sessionid; struct nfsd4_channel_attrs se_fchannel;
Add a shrinker which frees unused slots and may ask the clients to use fewer slots on each session. We keep a global count of the number of freeable slots, which is the sum of one less than the current "target" slots in all sessions in all clients in all net-namespaces. This number is reported by the shrinker. When the shrinker is asked to free some, we call xxx on each session in a round-robin asking each to reduce the slot count by 1. This will reduce the "target" so the number reported by the shrinker will reduce immediately. The memory will only be freed later when the client confirmed that it is no longer needed. We use a global list of sessions and move the "head" to after the last session that we asked to reduce, so the next callback from the shrinker will move on to the next session. This pressure should be applied "evenly" across all sessions over time. Signed-off-by: NeilBrown <neilb@suse.de> --- fs/nfsd/nfs4state.c | 71 ++++++++++++++++++++++++++++++++++++++++++--- fs/nfsd/state.h | 1 + 2 files changed, 68 insertions(+), 4 deletions(-)