Message ID | 172972088601.81717.17711026200394256822@noble.neil.brown.name (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | [-,for,-next,V2] nfsd: dynamically adjust per-client DRC slot limits. | expand |
On Thu, Oct 24, 2024 at 09:01:26AM +1100, NeilBrown wrote: > > From 6e071e346134e5b21db01f042039ab0159bb23a3 Mon Sep 17 00:00:00 2001 > From: NeilBrown <neilb@suse.de> > Date: Wed, 17 Apr 2024 11:50:53 +1000 > Subject: [PATCH] nfsd: dynamically adjust per-client DRC slot limits. > > Currently per-client DRC slot limits (for v4.1+) are calculated when the > client connects, and they are left unchanged. So earlier clients can > get a larger share when memory is tight. > > The heuristic for choosing a number includes the number of configured > server threads. This is problematic for 2 reasons. > 1/ sv_nrthreads is different in different net namespaces, but the > memory allocation is global across all namespaces. So different > namespaces get treated differently without good reason. > 2/ a future patch will auto-configure number of threads based on > load so that there will be no need to preconfigure a number. This will > make the current heuristic even more arbitrary. > > NFSv4.1 allows the number of slots to be varied dynamically - in the > reply to each SEQUENCE op. With this patch we provide a provisional > upper limit in the EXCHANGE_ID reply which might end up being too big, > and adjust it with each SEQUENCE reply. > > The goal for when memory is tight is to allow each client to have a > similar number of slots. So clients that ask for larger slots get more > memory. This may not be ideal. It could be changed later. > > So we track the sum of the slot sizes of all active clients, and share > memory out based on the ratio of the slot size for a given client with > the sum of slot sizes. We never allow more in a SEQUENCE reply than we > did in the EXCHANGE_ID reply. > > Signed-off-by: NeilBrown <neilb@suse.de> > --- > > This time actually with the comment change. > > > fs/nfsd/nfs4state.c | 82 +++++++++++++++++++++++++-------------------- > fs/nfsd/nfs4xdr.c | 2 +- > fs/nfsd/nfsd.h | 6 +++- > fs/nfsd/nfssvc.c | 7 ++-- > fs/nfsd/state.h | 2 +- > fs/nfsd/xdr4.h | 2 -- > 6 files changed, 57 insertions(+), 44 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 56b261608af4..d585c267731b 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -1909,44 +1909,26 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) > } > > /* > - * XXX: If we run out of reserved DRC memory we could (up to a point) > - * re-negotiate active sessions and reduce their slot usage to make > - * room for new connections. For now we just fail the create session. > + * When a client connects it gets a max_requests number that would allow > + * it to use 1/8 of the memory we think can reasonably be used for the DRC. > + * Later in response to SEQUENCE operations we further limit that when there > + * are more than 8 concurrent clients. > */ > -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) > +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) > { > u32 slotsize = slot_bytes(ca); > u32 num = ca->maxreqs; > - unsigned long avail, total_avail; > - unsigned int scale_factor; > + unsigned long avail; > > spin_lock(&nfsd_drc_lock); > - if (nfsd_drc_max_mem > nfsd_drc_mem_used) > - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; > - else > - /* We have handed out more space than we chose in > - * set_max_drc() to allow. That isn't really a > - * problem as long as that doesn't make us think we > - * have lots more due to integer overflow. > - */ > - total_avail = 0; > - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); > - /* > - * Never use more than a fraction of the remaining memory, > - * unless it's the only way to give this client a slot. > - * The chosen fraction is either 1/8 or 1/number of threads, > - * whichever is smaller. This ensures there are adequate > - * slots to support multiple clients per thread. > - * Give the client one slot even if that would require > - * over-allocation--it is better than failure. > - */ > - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); > > - avail = clamp_t(unsigned long, avail, slotsize, > - total_avail/scale_factor); > - num = min_t(int, num, avail / slotsize); > - num = max_t(int, num, 1); > - nfsd_drc_mem_used += num * slotsize; > + avail = min(NFSD_MAX_MEM_PER_SESSION, > + nfsd_drc_max_mem / 8); > + > + num = clamp_t(int, num, 1, avail / slotsize); > + > + nfsd_drc_slotsize_sum += slotsize; > + > spin_unlock(&nfsd_drc_lock); > > return num; > @@ -1957,10 +1939,34 @@ static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) > int slotsize = slot_bytes(ca); > > spin_lock(&nfsd_drc_lock); > - nfsd_drc_mem_used -= slotsize * ca->maxreqs; > + nfsd_drc_slotsize_sum -= slotsize; > spin_unlock(&nfsd_drc_lock); > } > > +/* > + * Report the number of slots that we would like the client to limit > + * itself to. > + */ > +static unsigned int nfsd4_get_drc_slots(struct nfsd4_session *session) > +{ > + u32 slotsize = slot_bytes(&session->se_fchannel); > + unsigned long avail; > + unsigned long slotsize_sum = READ_ONCE(nfsd_drc_slotsize_sum); > + > + if (slotsize_sum < slotsize) > + slotsize_sum = slotsize; > + > + /* > + * We share memory so all clients get the same number of slots. > + * Find our share of avail mem across all active clients, > + * then limit to 1/8 of total, and configured max > + */ > + avail = min3(nfsd_drc_max_mem * slotsize / slotsize_sum, > + nfsd_drc_max_mem / 8, > + NFSD_MAX_MEM_PER_SESSION); > + return max3(1UL, avail / slotsize, session->se_fchannel.maxreqs); > +} > + > static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, > struct nfsd4_channel_attrs *battrs) > { > @@ -3731,7 +3737,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs > * Note that we always allow at least one slot, because our > * accounting is soft and provides no guarantees either way. > */ > - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); > + ca->maxreqs = nfsd4_get_drc_mem(ca); > > return nfs_ok; > } > @@ -4225,10 +4231,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > slot = session->se_slots[seq->slotid]; > dprintk("%s: slotid %d\n", __func__, seq->slotid); > > - /* We do not negotiate the number of slots yet, so set the > - * maxslots to the session maxreqs which is used to encode > - * sr_highest_slotid and the sr_target_slot id to maxslots */ > - seq->maxslots = session->se_fchannel.maxreqs; > + /* Negotiate number of slots: set the target, and use the > + * same for max as long as it doesn't decrease the max > + * (that isn't allowed). > + */ > + seq->target_maxslots = nfsd4_get_drc_slots(session); > + seq->maxslots = max(seq->maxslots, seq->target_maxslots); > > trace_nfsd_slot_seqid_sequence(clp, seq, slot); > status = check_slot_seqid(seq->seqid, slot->sl_seqid, > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index f118921250c3..e4e706872e54 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4955,7 +4955,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, > if (nfserr != nfs_ok) > return nfserr; > /* sr_target_highest_slotid */ > - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); > + nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1); > if (nfserr != nfs_ok) > return nfserr; > /* sr_status_flags */ > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 4b56ba1e8e48..33c9db3ee8b6 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -90,7 +90,11 @@ extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; > extern struct mutex nfsd_mutex; > extern spinlock_t nfsd_drc_lock; > extern unsigned long nfsd_drc_max_mem; > -extern unsigned long nfsd_drc_mem_used; > +/* Storing the sum of the slot sizes for all active clients (across > + * all net-namespaces) allows a share of total available memory to > + * be allocaed to each client based on its slot size. > + */ > +extern unsigned long nfsd_drc_slotsize_sum; > extern atomic_t nfsd_th_cnt; /* number of available threads */ > > extern const struct seq_operations nfs_exports_op; > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index 49e2f32102ab..e596eb5d10db 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -78,7 +78,7 @@ DEFINE_MUTEX(nfsd_mutex); > */ > DEFINE_SPINLOCK(nfsd_drc_lock); > unsigned long nfsd_drc_max_mem; > -unsigned long nfsd_drc_mem_used; > +unsigned long nfsd_drc_slotsize_sum; > > #if IS_ENABLED(CONFIG_NFS_LOCALIO) > static const struct svc_version *localio_versions[] = { > @@ -589,10 +589,13 @@ void nfsd_reset_versions(struct nfsd_net *nn) > */ > static void set_max_drc(void) > { > + if (nfsd_drc_max_mem) > + return; > + > #define NFSD_DRC_SIZE_SHIFT 7 > nfsd_drc_max_mem = (nr_free_buffer_pages() > >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; > - nfsd_drc_mem_used = 0; > + nfsd_drc_slotsize_sum = 0; > dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); > } > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 79c743c01a47..fe71ae3c577b 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -214,7 +214,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) > /* Maximum number of slots per session. 160 is useful for long haul TCP */ > #define NFSD_MAX_SLOTS_PER_SESSION 160 > /* Maximum session per slot cache size */ > -#define NFSD_SLOT_CACHE_SIZE 2048 > +#define NFSD_SLOT_CACHE_SIZE 2048UL > /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ > #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 > #define NFSD_MAX_MEM_PER_SESSION \ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index 2a21a7662e03..71b87190a4a6 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -575,9 +575,7 @@ struct nfsd4_sequence { > u32 slotid; /* request/response */ > u32 maxslots; /* request/response */ > u32 cachethis; /* request */ > -#if 0 > u32 target_maxslots; /* response */ > -#endif /* not yet */ > u32 status_flags; /* response */ > }; > > -- > 2.46.0 > I've applied this to nfsd-next while we review, to enable broad testing.
On Thu, Oct 24, 2024 at 09:01:26AM +1100, NeilBrown wrote: > > From 6e071e346134e5b21db01f042039ab0159bb23a3 Mon Sep 17 00:00:00 2001 > From: NeilBrown <neilb@suse.de> > Date: Wed, 17 Apr 2024 11:50:53 +1000 > Subject: [PATCH] nfsd: dynamically adjust per-client DRC slot limits. > > Currently per-client DRC slot limits (for v4.1+) are calculated when the > client connects, and they are left unchanged. So earlier clients can > get a larger share when memory is tight. > > The heuristic for choosing a number includes the number of configured > server threads. This is problematic for 2 reasons. > 1/ sv_nrthreads is different in different net namespaces, but the > memory allocation is global across all namespaces. So different > namespaces get treated differently without good reason. > 2/ a future patch will auto-configure number of threads based on > load so that there will be no need to preconfigure a number. This will > make the current heuristic even more arbitrary. > > NFSv4.1 allows the number of slots to be varied dynamically - in the > reply to each SEQUENCE op. With this patch we provide a provisional > upper limit in the EXCHANGE_ID reply which might end up being too big, > and adjust it with each SEQUENCE reply. > > The goal for when memory is tight is to allow each client to have a > similar number of slots. So clients that ask for larger slots get more > memory. This may not be ideal. It could be changed later. > > So we track the sum of the slot sizes of all active clients, and share > memory out based on the ratio of the slot size for a given client with > the sum of slot sizes. We never allow more in a SEQUENCE reply than we > did in the EXCHANGE_ID reply. IIUC: s/EXCHANGE_ID/CREATE_SESSION It would be great if nfsd4_create_session() could report the negotiated session parameters. dprintk would be fine since CREATE_SESSION is an infrequent operation, and administrators might want this information if they notice unexplained slowness. It might be nicer if these heuristics were eventually replaced by a shrinker. Maybe for another day. One or two more thoughts below. > Signed-off-by: NeilBrown <neilb@suse.de> > --- > > This time actually with the comment change. > > > fs/nfsd/nfs4state.c | 82 +++++++++++++++++++++++++-------------------- > fs/nfsd/nfs4xdr.c | 2 +- > fs/nfsd/nfsd.h | 6 +++- > fs/nfsd/nfssvc.c | 7 ++-- > fs/nfsd/state.h | 2 +- > fs/nfsd/xdr4.h | 2 -- > 6 files changed, 57 insertions(+), 44 deletions(-) > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > index 56b261608af4..d585c267731b 100644 > --- a/fs/nfsd/nfs4state.c > +++ b/fs/nfsd/nfs4state.c > @@ -1909,44 +1909,26 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) > } > > /* > - * XXX: If we run out of reserved DRC memory we could (up to a point) > - * re-negotiate active sessions and reduce their slot usage to make > - * room for new connections. For now we just fail the create session. > + * When a client connects it gets a max_requests number that would allow > + * it to use 1/8 of the memory we think can reasonably be used for the DRC. > + * Later in response to SEQUENCE operations we further limit that when there > + * are more than 8 concurrent clients. Eight is a bit of a magic number. Other constants appear to get nice symbolic names. > */ > -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) > +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) > { > u32 slotsize = slot_bytes(ca); > u32 num = ca->maxreqs; > - unsigned long avail, total_avail; > - unsigned int scale_factor; > + unsigned long avail; > > spin_lock(&nfsd_drc_lock); > - if (nfsd_drc_max_mem > nfsd_drc_mem_used) > - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; > - else > - /* We have handed out more space than we chose in > - * set_max_drc() to allow. That isn't really a > - * problem as long as that doesn't make us think we > - * have lots more due to integer overflow. > - */ > - total_avail = 0; > - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); > - /* > - * Never use more than a fraction of the remaining memory, > - * unless it's the only way to give this client a slot. > - * The chosen fraction is either 1/8 or 1/number of threads, > - * whichever is smaller. This ensures there are adequate > - * slots to support multiple clients per thread. > - * Give the client one slot even if that would require > - * over-allocation--it is better than failure. > - */ > - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); > > - avail = clamp_t(unsigned long, avail, slotsize, > - total_avail/scale_factor); > - num = min_t(int, num, avail / slotsize); > - num = max_t(int, num, 1); > - nfsd_drc_mem_used += num * slotsize; > + avail = min(NFSD_MAX_MEM_PER_SESSION, > + nfsd_drc_max_mem / 8); > + > + num = clamp_t(int, num, 1, avail / slotsize); The variables involved in this computation are all unsigned. Why is clamp_t called with an "int" first argument? > + > + nfsd_drc_slotsize_sum += slotsize; > + > spin_unlock(&nfsd_drc_lock); > > return num; > @@ -1957,10 +1939,34 @@ static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) > int slotsize = slot_bytes(ca); > > spin_lock(&nfsd_drc_lock); > - nfsd_drc_mem_used -= slotsize * ca->maxreqs; > + nfsd_drc_slotsize_sum -= slotsize; > spin_unlock(&nfsd_drc_lock); > } > > +/* > + * Report the number of slots that we would like the client to limit > + * itself to. > + */ > +static unsigned int nfsd4_get_drc_slots(struct nfsd4_session *session) > +{ > + u32 slotsize = slot_bytes(&session->se_fchannel); > + unsigned long avail; > + unsigned long slotsize_sum = READ_ONCE(nfsd_drc_slotsize_sum); > + > + if (slotsize_sum < slotsize) > + slotsize_sum = slotsize; > + > + /* > + * We share memory so all clients get the same number of slots. > + * Find our share of avail mem across all active clients, > + * then limit to 1/8 of total, and configured max > + */ > + avail = min3(nfsd_drc_max_mem * slotsize / slotsize_sum, > + nfsd_drc_max_mem / 8, > + NFSD_MAX_MEM_PER_SESSION); > + return max3(1UL, avail / slotsize, session->se_fchannel.maxreqs); > +} > + > static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, > struct nfsd4_channel_attrs *battrs) > { > @@ -3731,7 +3737,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs > * Note that we always allow at least one slot, because our > * accounting is soft and provides no guarantees either way. > */ > - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); > + ca->maxreqs = nfsd4_get_drc_mem(ca); > > return nfs_ok; > } > @@ -4225,10 +4231,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > slot = session->se_slots[seq->slotid]; > dprintk("%s: slotid %d\n", __func__, seq->slotid); > > - /* We do not negotiate the number of slots yet, so set the > - * maxslots to the session maxreqs which is used to encode > - * sr_highest_slotid and the sr_target_slot id to maxslots */ > - seq->maxslots = session->se_fchannel.maxreqs; > + /* Negotiate number of slots: set the target, and use the > + * same for max as long as it doesn't decrease the max > + * (that isn't allowed). > + */ > + seq->target_maxslots = nfsd4_get_drc_slots(session); > + seq->maxslots = max(seq->maxslots, seq->target_maxslots); > > trace_nfsd_slot_seqid_sequence(clp, seq, slot); > status = check_slot_seqid(seq->seqid, slot->sl_seqid, > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index f118921250c3..e4e706872e54 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -4955,7 +4955,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, > if (nfserr != nfs_ok) > return nfserr; > /* sr_target_highest_slotid */ > - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); > + nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1); > if (nfserr != nfs_ok) > return nfserr; > /* sr_status_flags */ > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > index 4b56ba1e8e48..33c9db3ee8b6 100644 > --- a/fs/nfsd/nfsd.h > +++ b/fs/nfsd/nfsd.h > @@ -90,7 +90,11 @@ extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; > extern struct mutex nfsd_mutex; > extern spinlock_t nfsd_drc_lock; > extern unsigned long nfsd_drc_max_mem; > -extern unsigned long nfsd_drc_mem_used; > +/* Storing the sum of the slot sizes for all active clients (across > + * all net-namespaces) allows a share of total available memory to > + * be allocaed to each client based on its slot size. > + */ > +extern unsigned long nfsd_drc_slotsize_sum; > extern atomic_t nfsd_th_cnt; /* number of available threads */ > > extern const struct seq_operations nfs_exports_op; > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > index 49e2f32102ab..e596eb5d10db 100644 > --- a/fs/nfsd/nfssvc.c > +++ b/fs/nfsd/nfssvc.c > @@ -78,7 +78,7 @@ DEFINE_MUTEX(nfsd_mutex); > */ > DEFINE_SPINLOCK(nfsd_drc_lock); > unsigned long nfsd_drc_max_mem; > -unsigned long nfsd_drc_mem_used; > +unsigned long nfsd_drc_slotsize_sum; > > #if IS_ENABLED(CONFIG_NFS_LOCALIO) > static const struct svc_version *localio_versions[] = { > @@ -589,10 +589,13 @@ void nfsd_reset_versions(struct nfsd_net *nn) > */ > static void set_max_drc(void) > { > + if (nfsd_drc_max_mem) > + return; > + > #define NFSD_DRC_SIZE_SHIFT 7 > nfsd_drc_max_mem = (nr_free_buffer_pages() > >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; > - nfsd_drc_mem_used = 0; > + nfsd_drc_slotsize_sum = 0; > dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); > } > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > index 79c743c01a47..fe71ae3c577b 100644 > --- a/fs/nfsd/state.h > +++ b/fs/nfsd/state.h > @@ -214,7 +214,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) > /* Maximum number of slots per session. 160 is useful for long haul TCP */ > #define NFSD_MAX_SLOTS_PER_SESSION 160 > /* Maximum session per slot cache size */ > -#define NFSD_SLOT_CACHE_SIZE 2048 > +#define NFSD_SLOT_CACHE_SIZE 2048UL > /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ > #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 > #define NFSD_MAX_MEM_PER_SESSION \ > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index 2a21a7662e03..71b87190a4a6 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -575,9 +575,7 @@ struct nfsd4_sequence { > u32 slotid; /* request/response */ > u32 maxslots; /* request/response */ > u32 cachethis; /* request */ > -#if 0 > u32 target_maxslots; /* response */ > -#endif /* not yet */ > u32 status_flags; /* response */ > }; > > -- > 2.46.0 >
On Mon, 28 Oct 2024, Chuck Lever wrote: > On Thu, Oct 24, 2024 at 09:01:26AM +1100, NeilBrown wrote: > > > > From 6e071e346134e5b21db01f042039ab0159bb23a3 Mon Sep 17 00:00:00 2001 > > From: NeilBrown <neilb@suse.de> > > Date: Wed, 17 Apr 2024 11:50:53 +1000 > > Subject: [PATCH] nfsd: dynamically adjust per-client DRC slot limits. > > > > Currently per-client DRC slot limits (for v4.1+) are calculated when the > > client connects, and they are left unchanged. So earlier clients can > > get a larger share when memory is tight. > > > > The heuristic for choosing a number includes the number of configured > > server threads. This is problematic for 2 reasons. > > 1/ sv_nrthreads is different in different net namespaces, but the > > memory allocation is global across all namespaces. So different > > namespaces get treated differently without good reason. > > 2/ a future patch will auto-configure number of threads based on > > load so that there will be no need to preconfigure a number. This will > > make the current heuristic even more arbitrary. > > > > NFSv4.1 allows the number of slots to be varied dynamically - in the > > reply to each SEQUENCE op. With this patch we provide a provisional > > upper limit in the EXCHANGE_ID reply which might end up being too big, > > and adjust it with each SEQUENCE reply. > > > > The goal for when memory is tight is to allow each client to have a > > similar number of slots. So clients that ask for larger slots get more > > memory. This may not be ideal. It could be changed later. > > > > So we track the sum of the slot sizes of all active clients, and share > > memory out based on the ratio of the slot size for a given client with > > the sum of slot sizes. We never allow more in a SEQUENCE reply than we > > did in the EXCHANGE_ID reply. > > IIUC: > > s/EXCHANGE_ID/CREATE_SESSION Yes, thanks. > > > It would be great if nfsd4_create_session() could report the > negotiated session parameters. dprintk would be fine since > CREATE_SESSION is an infrequent operation, and administrators might > want this information if they notice unexplained slowness. Maybe. But as the setting can change in response to SEQUENCE replies, maybe it would be better in /proc/fs/nfsd/clients/*/info, or a new file there? > > It might be nicer if these heuristics were eventually replaced by a > shrinker. Maybe for another day. I don't think so. A shrinker is for freeing things that have already been allocated. Here we want to predict whether future allocations will succeed easily. If we pre-allocated space for all slots in the reply cache, then a shrinker would be appropriate to reduce the pre-allocation, but we don't do that and I don't think we want to. > > One or two more thoughts below. > > > > Signed-off-by: NeilBrown <neilb@suse.de> > > --- > > > > This time actually with the comment change. > > > > > > fs/nfsd/nfs4state.c | 82 +++++++++++++++++++++++++-------------------- > > fs/nfsd/nfs4xdr.c | 2 +- > > fs/nfsd/nfsd.h | 6 +++- > > fs/nfsd/nfssvc.c | 7 ++-- > > fs/nfsd/state.h | 2 +- > > fs/nfsd/xdr4.h | 2 -- > > 6 files changed, 57 insertions(+), 44 deletions(-) > > > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index 56b261608af4..d585c267731b 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -1909,44 +1909,26 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) > > } > > > > /* > > - * XXX: If we run out of reserved DRC memory we could (up to a point) > > - * re-negotiate active sessions and reduce their slot usage to make > > - * room for new connections. For now we just fail the create session. > > + * When a client connects it gets a max_requests number that would allow > > + * it to use 1/8 of the memory we think can reasonably be used for the DRC. > > + * Later in response to SEQUENCE operations we further limit that when there > > + * are more than 8 concurrent clients. > > Eight is a bit of a magic number. Other constants appear to get nice > symbolic names. I wonder what name would make sense.... "min_clients" ??? Not ideal. Maybe we should just use "1" - then we wouldn't need a name. > > > > */ > > -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) > > +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) > > { > > u32 slotsize = slot_bytes(ca); > > u32 num = ca->maxreqs; > > - unsigned long avail, total_avail; > > - unsigned int scale_factor; > > + unsigned long avail; > > > > spin_lock(&nfsd_drc_lock); > > - if (nfsd_drc_max_mem > nfsd_drc_mem_used) > > - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; > > - else > > - /* We have handed out more space than we chose in > > - * set_max_drc() to allow. That isn't really a > > - * problem as long as that doesn't make us think we > > - * have lots more due to integer overflow. > > - */ > > - total_avail = 0; > > - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); > > - /* > > - * Never use more than a fraction of the remaining memory, > > - * unless it's the only way to give this client a slot. > > - * The chosen fraction is either 1/8 or 1/number of threads, > > - * whichever is smaller. This ensures there are adequate > > - * slots to support multiple clients per thread. > > - * Give the client one slot even if that would require > > - * over-allocation--it is better than failure. > > - */ > > - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); > > > > - avail = clamp_t(unsigned long, avail, slotsize, > > - total_avail/scale_factor); > > - num = min_t(int, num, avail / slotsize); > > - num = max_t(int, num, 1); > > - nfsd_drc_mem_used += num * slotsize; > > + avail = min(NFSD_MAX_MEM_PER_SESSION, > > + nfsd_drc_max_mem / 8); > > + > > + num = clamp_t(int, num, 1, avail / slotsize); > > The variables involved in this computation are all unsigned. Why is > clamp_t called with an "int" first argument? The code being replaced has a "unsigned" for max_t and clamp_t, then "int" for min_t and max_t. I guess I copied the wrong one :-( Do you want to make some tweeks to the patch, or should I resend it? Thanks, NeilBrown > > > > + > > + nfsd_drc_slotsize_sum += slotsize; > > + > > spin_unlock(&nfsd_drc_lock); > > > > return num; > > @@ -1957,10 +1939,34 @@ static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) > > int slotsize = slot_bytes(ca); > > > > spin_lock(&nfsd_drc_lock); > > - nfsd_drc_mem_used -= slotsize * ca->maxreqs; > > + nfsd_drc_slotsize_sum -= slotsize; > > spin_unlock(&nfsd_drc_lock); > > } > > > > +/* > > + * Report the number of slots that we would like the client to limit > > + * itself to. > > + */ > > +static unsigned int nfsd4_get_drc_slots(struct nfsd4_session *session) > > +{ > > + u32 slotsize = slot_bytes(&session->se_fchannel); > > + unsigned long avail; > > + unsigned long slotsize_sum = READ_ONCE(nfsd_drc_slotsize_sum); > > + > > + if (slotsize_sum < slotsize) > > + slotsize_sum = slotsize; > > + > > + /* > > + * We share memory so all clients get the same number of slots. > > + * Find our share of avail mem across all active clients, > > + * then limit to 1/8 of total, and configured max > > + */ > > + avail = min3(nfsd_drc_max_mem * slotsize / slotsize_sum, > > + nfsd_drc_max_mem / 8, > > + NFSD_MAX_MEM_PER_SESSION); > > + return max3(1UL, avail / slotsize, session->se_fchannel.maxreqs); > > +} > > + > > static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, > > struct nfsd4_channel_attrs *battrs) > > { > > @@ -3731,7 +3737,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs > > * Note that we always allow at least one slot, because our > > * accounting is soft and provides no guarantees either way. > > */ > > - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); > > + ca->maxreqs = nfsd4_get_drc_mem(ca); > > > > return nfs_ok; > > } > > @@ -4225,10 +4231,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > slot = session->se_slots[seq->slotid]; > > dprintk("%s: slotid %d\n", __func__, seq->slotid); > > > > - /* We do not negotiate the number of slots yet, so set the > > - * maxslots to the session maxreqs which is used to encode > > - * sr_highest_slotid and the sr_target_slot id to maxslots */ > > - seq->maxslots = session->se_fchannel.maxreqs; > > + /* Negotiate number of slots: set the target, and use the > > + * same for max as long as it doesn't decrease the max > > + * (that isn't allowed). > > + */ > > + seq->target_maxslots = nfsd4_get_drc_slots(session); > > + seq->maxslots = max(seq->maxslots, seq->target_maxslots); > > > > trace_nfsd_slot_seqid_sequence(clp, seq, slot); > > status = check_slot_seqid(seq->seqid, slot->sl_seqid, > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > > index f118921250c3..e4e706872e54 100644 > > --- a/fs/nfsd/nfs4xdr.c > > +++ b/fs/nfsd/nfs4xdr.c > > @@ -4955,7 +4955,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, > > if (nfserr != nfs_ok) > > return nfserr; > > /* sr_target_highest_slotid */ > > - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); > > + nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1); > > if (nfserr != nfs_ok) > > return nfserr; > > /* sr_status_flags */ > > diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h > > index 4b56ba1e8e48..33c9db3ee8b6 100644 > > --- a/fs/nfsd/nfsd.h > > +++ b/fs/nfsd/nfsd.h > > @@ -90,7 +90,11 @@ extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; > > extern struct mutex nfsd_mutex; > > extern spinlock_t nfsd_drc_lock; > > extern unsigned long nfsd_drc_max_mem; > > -extern unsigned long nfsd_drc_mem_used; > > +/* Storing the sum of the slot sizes for all active clients (across > > + * all net-namespaces) allows a share of total available memory to > > + * be allocaed to each client based on its slot size. > > + */ > > +extern unsigned long nfsd_drc_slotsize_sum; > > extern atomic_t nfsd_th_cnt; /* number of available threads */ > > > > extern const struct seq_operations nfs_exports_op; > > diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c > > index 49e2f32102ab..e596eb5d10db 100644 > > --- a/fs/nfsd/nfssvc.c > > +++ b/fs/nfsd/nfssvc.c > > @@ -78,7 +78,7 @@ DEFINE_MUTEX(nfsd_mutex); > > */ > > DEFINE_SPINLOCK(nfsd_drc_lock); > > unsigned long nfsd_drc_max_mem; > > -unsigned long nfsd_drc_mem_used; > > +unsigned long nfsd_drc_slotsize_sum; > > > > #if IS_ENABLED(CONFIG_NFS_LOCALIO) > > static const struct svc_version *localio_versions[] = { > > @@ -589,10 +589,13 @@ void nfsd_reset_versions(struct nfsd_net *nn) > > */ > > static void set_max_drc(void) > > { > > + if (nfsd_drc_max_mem) > > + return; > > + > > #define NFSD_DRC_SIZE_SHIFT 7 > > nfsd_drc_max_mem = (nr_free_buffer_pages() > > >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; > > - nfsd_drc_mem_used = 0; > > + nfsd_drc_slotsize_sum = 0; > > dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); > > } > > > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > index 79c743c01a47..fe71ae3c577b 100644 > > --- a/fs/nfsd/state.h > > +++ b/fs/nfsd/state.h > > @@ -214,7 +214,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) > > /* Maximum number of slots per session. 160 is useful for long haul TCP */ > > #define NFSD_MAX_SLOTS_PER_SESSION 160 > > /* Maximum session per slot cache size */ > > -#define NFSD_SLOT_CACHE_SIZE 2048 > > +#define NFSD_SLOT_CACHE_SIZE 2048UL > > /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ > > #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 > > #define NFSD_MAX_MEM_PER_SESSION \ > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index 2a21a7662e03..71b87190a4a6 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -575,9 +575,7 @@ struct nfsd4_sequence { > > u32 slotid; /* request/response */ > > u32 maxslots; /* request/response */ > > u32 cachethis; /* request */ > > -#if 0 > > u32 target_maxslots; /* response */ > > -#endif /* not yet */ > > u32 status_flags; /* response */ > > }; > > > > -- > > 2.46.0 > > > > -- > Chuck Lever >
On Mon, 28 Oct 2024, NeilBrown wrote: > On Mon, 28 Oct 2024, Chuck Lever wrote: > > > > > It might be nicer if these heuristics were eventually replaced by a > > shrinker. Maybe for another day. > > I don't think so. A shrinker is for freeing things that have already > been allocated. Here we want to predict whether future allocations will > succeed easily. If we pre-allocated space for all slots in the reply > cache, then a shrinker would be appropriate to reduce the > pre-allocation, but we don't do that and I don't think we want to. > Oh wait - we do pre-allocate. In alloc_session(). So we really need to be freeing some of those when we push the max_slot limit down. I need to revisit this patch. Thanks, NeilBrown
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 56b261608af4..d585c267731b 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1909,44 +1909,26 @@ static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca) } /* - * XXX: If we run out of reserved DRC memory we could (up to a point) - * re-negotiate active sessions and reduce their slot usage to make - * room for new connections. For now we just fail the create session. + * When a client connects it gets a max_requests number that would allow + * it to use 1/8 of the memory we think can reasonably be used for the DRC. + * Later in response to SEQUENCE operations we further limit that when there + * are more than 8 concurrent clients. */ -static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn) +static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca) { u32 slotsize = slot_bytes(ca); u32 num = ca->maxreqs; - unsigned long avail, total_avail; - unsigned int scale_factor; + unsigned long avail; spin_lock(&nfsd_drc_lock); - if (nfsd_drc_max_mem > nfsd_drc_mem_used) - total_avail = nfsd_drc_max_mem - nfsd_drc_mem_used; - else - /* We have handed out more space than we chose in - * set_max_drc() to allow. That isn't really a - * problem as long as that doesn't make us think we - * have lots more due to integer overflow. - */ - total_avail = 0; - avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION, total_avail); - /* - * Never use more than a fraction of the remaining memory, - * unless it's the only way to give this client a slot. - * The chosen fraction is either 1/8 or 1/number of threads, - * whichever is smaller. This ensures there are adequate - * slots to support multiple clients per thread. - * Give the client one slot even if that would require - * over-allocation--it is better than failure. - */ - scale_factor = max_t(unsigned int, 8, nn->nfsd_serv->sv_nrthreads); - avail = clamp_t(unsigned long, avail, slotsize, - total_avail/scale_factor); - num = min_t(int, num, avail / slotsize); - num = max_t(int, num, 1); - nfsd_drc_mem_used += num * slotsize; + avail = min(NFSD_MAX_MEM_PER_SESSION, + nfsd_drc_max_mem / 8); + + num = clamp_t(int, num, 1, avail / slotsize); + + nfsd_drc_slotsize_sum += slotsize; + spin_unlock(&nfsd_drc_lock); return num; @@ -1957,10 +1939,34 @@ static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca) int slotsize = slot_bytes(ca); spin_lock(&nfsd_drc_lock); - nfsd_drc_mem_used -= slotsize * ca->maxreqs; + nfsd_drc_slotsize_sum -= slotsize; spin_unlock(&nfsd_drc_lock); } +/* + * Report the number of slots that we would like the client to limit + * itself to. + */ +static unsigned int nfsd4_get_drc_slots(struct nfsd4_session *session) +{ + u32 slotsize = slot_bytes(&session->se_fchannel); + unsigned long avail; + unsigned long slotsize_sum = READ_ONCE(nfsd_drc_slotsize_sum); + + if (slotsize_sum < slotsize) + slotsize_sum = slotsize; + + /* + * We share memory so all clients get the same number of slots. + * Find our share of avail mem across all active clients, + * then limit to 1/8 of total, and configured max + */ + avail = min3(nfsd_drc_max_mem * slotsize / slotsize_sum, + nfsd_drc_max_mem / 8, + NFSD_MAX_MEM_PER_SESSION); + return max3(1UL, avail / slotsize, session->se_fchannel.maxreqs); +} + static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs, struct nfsd4_channel_attrs *battrs) { @@ -3731,7 +3737,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs * Note that we always allow at least one slot, because our * accounting is soft and provides no guarantees either way. */ - ca->maxreqs = nfsd4_get_drc_mem(ca, nn); + ca->maxreqs = nfsd4_get_drc_mem(ca); return nfs_ok; } @@ -4225,10 +4231,12 @@ nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, slot = session->se_slots[seq->slotid]; dprintk("%s: slotid %d\n", __func__, seq->slotid); - /* We do not negotiate the number of slots yet, so set the - * maxslots to the session maxreqs which is used to encode - * sr_highest_slotid and the sr_target_slot id to maxslots */ - seq->maxslots = session->se_fchannel.maxreqs; + /* Negotiate number of slots: set the target, and use the + * same for max as long as it doesn't decrease the max + * (that isn't allowed). + */ + seq->target_maxslots = nfsd4_get_drc_slots(session); + seq->maxslots = max(seq->maxslots, seq->target_maxslots); trace_nfsd_slot_seqid_sequence(clp, seq, slot); status = check_slot_seqid(seq->seqid, slot->sl_seqid, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index f118921250c3..e4e706872e54 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4955,7 +4955,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr != nfs_ok) return nfserr; /* sr_target_highest_slotid */ - nfserr = nfsd4_encode_slotid4(xdr, seq->maxslots - 1); + nfserr = nfsd4_encode_slotid4(xdr, seq->target_maxslots - 1); if (nfserr != nfs_ok) return nfserr; /* sr_status_flags */ diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 4b56ba1e8e48..33c9db3ee8b6 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -90,7 +90,11 @@ extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; extern spinlock_t nfsd_drc_lock; extern unsigned long nfsd_drc_max_mem; -extern unsigned long nfsd_drc_mem_used; +/* Storing the sum of the slot sizes for all active clients (across + * all net-namespaces) allows a share of total available memory to + * be allocaed to each client based on its slot size. + */ +extern unsigned long nfsd_drc_slotsize_sum; extern atomic_t nfsd_th_cnt; /* number of available threads */ extern const struct seq_operations nfs_exports_op; diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 49e2f32102ab..e596eb5d10db 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -78,7 +78,7 @@ DEFINE_MUTEX(nfsd_mutex); */ DEFINE_SPINLOCK(nfsd_drc_lock); unsigned long nfsd_drc_max_mem; -unsigned long nfsd_drc_mem_used; +unsigned long nfsd_drc_slotsize_sum; #if IS_ENABLED(CONFIG_NFS_LOCALIO) static const struct svc_version *localio_versions[] = { @@ -589,10 +589,13 @@ void nfsd_reset_versions(struct nfsd_net *nn) */ static void set_max_drc(void) { + if (nfsd_drc_max_mem) + return; + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_drc_max_mem = (nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE; - nfsd_drc_mem_used = 0; + nfsd_drc_slotsize_sum = 0; dprintk("%s nfsd_drc_max_mem %lu \n", __func__, nfsd_drc_max_mem); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 79c743c01a47..fe71ae3c577b 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -214,7 +214,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s) /* Maximum number of slots per session. 160 is useful for long haul TCP */ #define NFSD_MAX_SLOTS_PER_SESSION 160 /* Maximum session per slot cache size */ -#define NFSD_SLOT_CACHE_SIZE 2048 +#define NFSD_SLOT_CACHE_SIZE 2048UL /* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */ #define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32 #define NFSD_MAX_MEM_PER_SESSION \ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 2a21a7662e03..71b87190a4a6 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -575,9 +575,7 @@ struct nfsd4_sequence { u32 slotid; /* request/response */ u32 maxslots; /* request/response */ u32 cachethis; /* request */ -#if 0 u32 target_maxslots; /* response */ -#endif /* not yet */ u32 status_flags; /* response */ };