Message ID | 20220824002756.3659568-7-lsahlber@redhat.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [1/6] cifs: Make tcon contain a wrapper structure cached_fids instead of cached_fid | expand |
On 8/23/2022 8:27 PM, Ronnie Sahlberg wrote: > Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> > --- > fs/cifs/cached_dir.c | 32 ++++++++++++++++++++++++++++++-- > fs/cifs/cached_dir.h | 2 ++ > 2 files changed, 32 insertions(+), 2 deletions(-) > > diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c > index f4d7700827b3..1c4a409bcb67 100644 > --- a/fs/cifs/cached_dir.c > +++ b/fs/cifs/cached_dir.c > @@ -14,6 +14,7 @@ > > static struct cached_fid *init_cached_dir(const char *path); > static void free_cached_dir(struct cached_fid *cfid); > +static void smb2_cached_lease_timeout(struct work_struct *work); > > /* > * Locking and reference count for cached directory handles: > @@ -321,6 +322,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > cfid->tcon = tcon; > cfid->time = jiffies; > cfid->is_open = true; > + queue_delayed_work(cifsiod_wq, &cfid->lease_timeout, HZ * 30); Wouldn't it be more efficient to implement a laundromat? There will be MAX_CACHED_FIDS of these delayed workers, and the timing does not need to be precise (right?). Is it worth considering making HZ*30 into a tunable? Tom. > + cfid->has_timeout = true; > cfid->has_lease = true; > > oshr_free: > @@ -447,6 +450,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > list_add(&cfid->entry, &entry); > cfids->num_entries--; > cfid->is_open = false; > + cfid->has_timeout = false; > /* To prevent race with smb2_cached_lease_break() */ > kref_get(&cfid->refcount); > } > @@ -455,6 +459,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > list_for_each_entry_safe(cfid, q, &entry, entry) { > cfid->on_list = false; > list_del(&cfid->entry); > + cancel_delayed_work_sync(&cfid->lease_timeout); > cancel_work_sync(&cfid->lease_break); > if (cfid->has_lease) { > /* > @@ -477,12 +482,34 @@ smb2_cached_lease_break(struct work_struct *work) > struct cached_fid *cfid = container_of(work, > struct cached_fid, lease_break); > > + cancel_delayed_work_sync(&cfid->lease_timeout); > spin_lock(&cfid->cfids->cfid_list_lock); > + if (!cfid->has_lease) { > + spin_unlock(&cfid->cfids->cfid_list_lock); > + return; > + } > cfid->has_lease = false; > spin_unlock(&cfid->cfids->cfid_list_lock); > kref_put(&cfid->refcount, smb2_close_cached_fid); > } > > +static void > +smb2_cached_lease_timeout(struct work_struct *work) > +{ > + struct cached_fid *cfid = container_of(work, > + struct cached_fid, lease_timeout.work); > + > + spin_lock(&cfid->cfids->cfid_list_lock); > + if (!cfid->has_timeout || !cfid->on_list) { > + spin_unlock(&cfid->cfids->cfid_list_lock); > + return; > + } > + cfid->has_timeout = false; > + spin_unlock(&cfid->cfids->cfid_list_lock); > + > + queue_work(cifsiod_wq, &cfid->lease_break); > +} > + > int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > { > struct cached_fids *cfids = tcon->cfids; > @@ -506,9 +533,9 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > cfid->on_list = false; > cfids->num_entries--; > > - queue_work(cifsiod_wq, > - &cfid->lease_break); > + cfid->has_timeout = false; > spin_unlock(&cfids->cfid_list_lock); > + queue_work(cifsiod_wq, &cfid->lease_break); > return true; > } > } > @@ -530,6 +557,7 @@ static struct cached_fid *init_cached_dir(const char *path) > } > > INIT_WORK(&cfid->lease_break, smb2_cached_lease_break); > + INIT_DELAYED_WORK(&cfid->lease_timeout, smb2_cached_lease_timeout); > INIT_LIST_HEAD(&cfid->entry); > INIT_LIST_HEAD(&cfid->dirents.entries); > mutex_init(&cfid->dirents.de_mutex); > diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h > index e536304ca2ce..813cd30f7ca3 100644 > --- a/fs/cifs/cached_dir.h > +++ b/fs/cifs/cached_dir.h > @@ -35,6 +35,7 @@ struct cached_fid { > struct cached_fids *cfids; > const char *path; > bool has_lease:1; > + bool has_timeout:1; > bool is_open:1; > bool on_list:1; > bool file_all_info_is_valid:1; > @@ -45,6 +46,7 @@ struct cached_fid { > struct cifs_tcon *tcon; > struct dentry *dentry; > struct work_struct lease_break; > + struct delayed_work lease_timeout; > struct smb2_file_all_info file_all_info; > struct cached_dirents dirents; > };
On Wed, 24 Aug 2022 at 23:58, Tom Talpey <tom@talpey.com> wrote: > > On 8/23/2022 8:27 PM, Ronnie Sahlberg wrote: > > Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> > > --- > > fs/cifs/cached_dir.c | 32 ++++++++++++++++++++++++++++++-- > > fs/cifs/cached_dir.h | 2 ++ > > 2 files changed, 32 insertions(+), 2 deletions(-) > > > > diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c > > index f4d7700827b3..1c4a409bcb67 100644 > > --- a/fs/cifs/cached_dir.c > > +++ b/fs/cifs/cached_dir.c > > @@ -14,6 +14,7 @@ > > > > static struct cached_fid *init_cached_dir(const char *path); > > static void free_cached_dir(struct cached_fid *cfid); > > +static void smb2_cached_lease_timeout(struct work_struct *work); > > > > /* > > * Locking and reference count for cached directory handles: > > @@ -321,6 +322,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, > > cfid->tcon = tcon; > > cfid->time = jiffies; > > cfid->is_open = true; > > + queue_delayed_work(cifsiod_wq, &cfid->lease_timeout, HZ * 30); > > Wouldn't it be more efficient to implement a laundromat? There > will be MAX_CACHED_FIDS of these delayed workers, and the > timing does not need to be precise (right?). I was thinking that first but thought it would be easier to just use delayed events on the pre-existing work-queue. However, having two potentially racing work items turned out to be as complex as just having a separate thread for the tcon. I will drop this patch for now and re-do it with a laundromat instead. It will be more efficient and I think it will make the logic a bit simpler too. > > Is it worth considering making HZ*30 into a tunable? Maybe. I can add that when I re-spin this patch. Recinding leases is super hard to get right. Recind them too quickly and you miss out on potential performance. Recind them too late and you hurt performance, at least for other clients. Right now the caching is binary. It is either enabled, or fully disabled. I would like to have timeouts like this to be auto-adjusted by the module itself, because setting it "correctly" or "optimally" will probably be super hard, to impossible, for the average sysadmin. Heck, I don't think I could even set a parameter like this correctly. And that is even not taking into account that workloads change dynamically over time so any fixed value will only be "correct" for some/part of the time anyway. Sometimes these changes occur over just a few minutes/hours so a fixed value is impossible to come up with. I think it would be possible to have this to be automatically and dynamically adjusted. I want to measure things like "how often do we re-open a directory while holding a lease", "how often is the lease broken by the server" and try to keep the first as high as possible while also have the second as low, or zero. And have this adjust automatically at runtime. > > Tom. > > > + cfid->has_timeout = true; > > cfid->has_lease = true; > > > > oshr_free: > > @@ -447,6 +450,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > > list_add(&cfid->entry, &entry); > > cfids->num_entries--; > > cfid->is_open = false; > > + cfid->has_timeout = false; > > /* To prevent race with smb2_cached_lease_break() */ > > kref_get(&cfid->refcount); > > } > > @@ -455,6 +459,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) > > list_for_each_entry_safe(cfid, q, &entry, entry) { > > cfid->on_list = false; > > list_del(&cfid->entry); > > + cancel_delayed_work_sync(&cfid->lease_timeout); > > cancel_work_sync(&cfid->lease_break); > > if (cfid->has_lease) { > > /* > > @@ -477,12 +482,34 @@ smb2_cached_lease_break(struct work_struct *work) > > struct cached_fid *cfid = container_of(work, > > struct cached_fid, lease_break); > > > > + cancel_delayed_work_sync(&cfid->lease_timeout); > > spin_lock(&cfid->cfids->cfid_list_lock); > > + if (!cfid->has_lease) { > > + spin_unlock(&cfid->cfids->cfid_list_lock); > > + return; > > + } > > cfid->has_lease = false; > > spin_unlock(&cfid->cfids->cfid_list_lock); > > kref_put(&cfid->refcount, smb2_close_cached_fid); > > } > > > > +static void > > +smb2_cached_lease_timeout(struct work_struct *work) > > +{ > > + struct cached_fid *cfid = container_of(work, > > + struct cached_fid, lease_timeout.work); > > + > > + spin_lock(&cfid->cfids->cfid_list_lock); > > + if (!cfid->has_timeout || !cfid->on_list) { > > + spin_unlock(&cfid->cfids->cfid_list_lock); > > + return; > > + } > > + cfid->has_timeout = false; > > + spin_unlock(&cfid->cfids->cfid_list_lock); > > + > > + queue_work(cifsiod_wq, &cfid->lease_break); > > +} > > + > > int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > > { > > struct cached_fids *cfids = tcon->cfids; > > @@ -506,9 +533,9 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) > > cfid->on_list = false; > > cfids->num_entries--; > > > > - queue_work(cifsiod_wq, > > - &cfid->lease_break); > > + cfid->has_timeout = false; > > spin_unlock(&cfids->cfid_list_lock); > > + queue_work(cifsiod_wq, &cfid->lease_break); > > return true; > > } > > } > > @@ -530,6 +557,7 @@ static struct cached_fid *init_cached_dir(const char *path) > > } > > > > INIT_WORK(&cfid->lease_break, smb2_cached_lease_break); > > + INIT_DELAYED_WORK(&cfid->lease_timeout, smb2_cached_lease_timeout); > > INIT_LIST_HEAD(&cfid->entry); > > INIT_LIST_HEAD(&cfid->dirents.entries); > > mutex_init(&cfid->dirents.de_mutex); > > diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h > > index e536304ca2ce..813cd30f7ca3 100644 > > --- a/fs/cifs/cached_dir.h > > +++ b/fs/cifs/cached_dir.h > > @@ -35,6 +35,7 @@ struct cached_fid { > > struct cached_fids *cfids; > > const char *path; > > bool has_lease:1; > > + bool has_timeout:1; > > bool is_open:1; > > bool on_list:1; > > bool file_all_info_is_valid:1; > > @@ -45,6 +46,7 @@ struct cached_fid { > > struct cifs_tcon *tcon; > > struct dentry *dentry; > > struct work_struct lease_break; > > + struct delayed_work lease_timeout; > > struct smb2_file_all_info file_all_info; > > struct cached_dirents dirents; > > };
On 8/25/2022 12:39 AM, ronnie sahlberg wrote: > On Wed, 24 Aug 2022 at 23:58, Tom Talpey <tom@talpey.com> wrote: >> Is it worth considering making HZ*30 into a tunable? > > Maybe. I can add that when I re-spin this patch. > Recinding leases is super hard to get right. Recind them too quickly > and you miss out on potential performance. > Recind them too late and you hurt performance, at least for other clients. > > Right now the caching is binary. It is either enabled, or fully disabled. > I would like to have timeouts like this to be auto-adjusted by the > module itself, because setting it "correctly" > or "optimally" will probably be super hard, to impossible, for the > average sysadmin. > Heck, I don't think I could even set a parameter like this correctly. > And that is even not taking into account that workloads change > dynamically over time so any fixed value will only > be "correct" for some/part of the time anyway. Sometimes these changes > occur over just a few minutes/hours so a fixed value is impossible to > come up with. > > I think it would be possible to have this to be automatically and > dynamically adjusted. > I want to measure things like "how often do we re-open a directory > while holding a lease", "how often is the lease broken by the > server" > and try to keep the first as high as possible while also have the > second as low, or zero. > And have this adjust automatically at runtime. Unfortunately I think the problem is that the lease behavior is dependent on the workload, and whether the app is sharing files. If the lease is never recalled, obviously the caching can be "forever". But apps banging heads over a set of shared files in a shared directory will be nearly pointless. I'm not sure how you can sanely automate that detection. However, it seems to me that 30 seconds is pretty arbitrary. It might help speed up a "tar x" or "tar c" but is it broadly worthwhile? Would 5 seconds do the same? Dunno. Tom.
diff --git a/fs/cifs/cached_dir.c b/fs/cifs/cached_dir.c index f4d7700827b3..1c4a409bcb67 100644 --- a/fs/cifs/cached_dir.c +++ b/fs/cifs/cached_dir.c @@ -14,6 +14,7 @@ static struct cached_fid *init_cached_dir(const char *path); static void free_cached_dir(struct cached_fid *cfid); +static void smb2_cached_lease_timeout(struct work_struct *work); /* * Locking and reference count for cached directory handles: @@ -321,6 +322,8 @@ int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, cfid->tcon = tcon; cfid->time = jiffies; cfid->is_open = true; + queue_delayed_work(cifsiod_wq, &cfid->lease_timeout, HZ * 30); + cfid->has_timeout = true; cfid->has_lease = true; oshr_free: @@ -447,6 +450,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) list_add(&cfid->entry, &entry); cfids->num_entries--; cfid->is_open = false; + cfid->has_timeout = false; /* To prevent race with smb2_cached_lease_break() */ kref_get(&cfid->refcount); } @@ -455,6 +459,7 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) list_for_each_entry_safe(cfid, q, &entry, entry) { cfid->on_list = false; list_del(&cfid->entry); + cancel_delayed_work_sync(&cfid->lease_timeout); cancel_work_sync(&cfid->lease_break); if (cfid->has_lease) { /* @@ -477,12 +482,34 @@ smb2_cached_lease_break(struct work_struct *work) struct cached_fid *cfid = container_of(work, struct cached_fid, lease_break); + cancel_delayed_work_sync(&cfid->lease_timeout); spin_lock(&cfid->cfids->cfid_list_lock); + if (!cfid->has_lease) { + spin_unlock(&cfid->cfids->cfid_list_lock); + return; + } cfid->has_lease = false; spin_unlock(&cfid->cfids->cfid_list_lock); kref_put(&cfid->refcount, smb2_close_cached_fid); } +static void +smb2_cached_lease_timeout(struct work_struct *work) +{ + struct cached_fid *cfid = container_of(work, + struct cached_fid, lease_timeout.work); + + spin_lock(&cfid->cfids->cfid_list_lock); + if (!cfid->has_timeout || !cfid->on_list) { + spin_unlock(&cfid->cfids->cfid_list_lock); + return; + } + cfid->has_timeout = false; + spin_unlock(&cfid->cfids->cfid_list_lock); + + queue_work(cifsiod_wq, &cfid->lease_break); +} + int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) { struct cached_fids *cfids = tcon->cfids; @@ -506,9 +533,9 @@ int cached_dir_lease_break(struct cifs_tcon *tcon, __u8 lease_key[16]) cfid->on_list = false; cfids->num_entries--; - queue_work(cifsiod_wq, - &cfid->lease_break); + cfid->has_timeout = false; spin_unlock(&cfids->cfid_list_lock); + queue_work(cifsiod_wq, &cfid->lease_break); return true; } } @@ -530,6 +557,7 @@ static struct cached_fid *init_cached_dir(const char *path) } INIT_WORK(&cfid->lease_break, smb2_cached_lease_break); + INIT_DELAYED_WORK(&cfid->lease_timeout, smb2_cached_lease_timeout); INIT_LIST_HEAD(&cfid->entry); INIT_LIST_HEAD(&cfid->dirents.entries); mutex_init(&cfid->dirents.de_mutex); diff --git a/fs/cifs/cached_dir.h b/fs/cifs/cached_dir.h index e536304ca2ce..813cd30f7ca3 100644 --- a/fs/cifs/cached_dir.h +++ b/fs/cifs/cached_dir.h @@ -35,6 +35,7 @@ struct cached_fid { struct cached_fids *cfids; const char *path; bool has_lease:1; + bool has_timeout:1; bool is_open:1; bool on_list:1; bool file_all_info_is_valid:1; @@ -45,6 +46,7 @@ struct cached_fid { struct cifs_tcon *tcon; struct dentry *dentry; struct work_struct lease_break; + struct delayed_work lease_timeout; struct smb2_file_all_info file_all_info; struct cached_dirents dirents; };
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com> --- fs/cifs/cached_dir.c | 32 ++++++++++++++++++++++++++++++-- fs/cifs/cached_dir.h | 2 ++ 2 files changed, 32 insertions(+), 2 deletions(-)