Message ID | 87r2s7ql5m.fsf@notabene.neil.brown.name (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Thu, Dec 7, 2017 at 5:20 AM, NeilBrown <neilb@suse.com> wrote: > On Wed, Dec 06 2017, Linus Torvalds wrote: > >> On Thu, Nov 30, 2017 at 12:56 PM, NeilBrown <neilb@suse.com> wrote: >>> >>> -/* limit the handle size to NFSv4 handle size now */ >>> -#define MAX_HANDLE_SZ 128 >>> +/* Must be larger than NFSv4 file handle, but small >>> + * enough for an on-stack allocation. overlayfs doesn't >>> + * want this too close to 255. >>> + */ >>> +#define MAX_HANDLE_SZ 200 >> >> This really smells for so many reasons. >> >> Also, that really is starting to be a fairly big stack allocation, and >> it seems to be used in exactly one place (show_mark_fhandle), which >> makes me go "why is that on the stack anyway?". >> >> Could we just allocate a buffer at open time or something? >> >> Linus > > "open time" would be when /proc/X/fdinfo/Y was opened in > seq_fdinfo_open(), and allocating a file_handle there seems a bit odd. > > We can allocate in fs/notify/fdinfo.c:show_fdinfo() which is > the earliest 'notify' specific code to run. There is no > opportunity to return an error but GFP_KERNEL allocations under 1 page > never fail.. > > This patch allocates a single buffer for all inodes reported for a given > inotify fdinfo, and if the allocation files, the filehandle is silently > left blank. More surgery would be needed to be able to return an error. > > Is that at all suitable? > > Thanks, > NeilBrown > > From: NeilBrown <neilb@suse.com> > Subject: fs/notify: don't put file handle buffer on stack. > > A file handle buffer is not tiny, and could need to be larger in future, > so it isn't safe to allocate one on the stack. Instead, we need to > kmalloc(). > > There is no way to return an error status from a ->show_fdinfo() > function, so if the kmalloc fails, we silently exclude the filehandle > from the output. As it is at the end of line, this shouldn't > upset parsing too much. It shouldn't upset parsing because that would be the same out output as without CONFIG_EXPORTFS. AFAIK this information is used by CRUI. > > Signed-off-by: NeilBrown <neilb@suse.com> > > diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c > index d478629c728b..20d863b9ae16 100644 > --- a/fs/notify/fdinfo.c > +++ b/fs/notify/fdinfo.c > @@ -23,56 +23,58 @@ > > static void show_fdinfo(struct seq_file *m, struct file *f, > void (*show)(struct seq_file *m, > - struct fsnotify_mark *mark)) > + struct fsnotify_mark *mark, > + struct fid *fh)) > { > struct fsnotify_group *group = f->private_data; > struct fsnotify_mark *mark; > + struct fid *fh = kmalloc(MAX_HANDLE_SZ, GFP_KERNEL); > > mutex_lock(&group->mark_mutex); > list_for_each_entry(mark, &group->marks_list, g_list) { > - show(m, mark); > + show(m, mark, fh); > if (seq_has_overflowed(m)) > break; > } > mutex_unlock(&group->mark_mutex); > + kfree(fh); > } > > #if defined(CONFIG_EXPORTFS) > -static void show_mark_fhandle(struct seq_file *m, struct inode *inode) > +static void show_mark_fhandle(struct seq_file *m, struct inode *inode, > + struct fid *fhbuf) > { > - struct { > - struct file_handle handle; > - u8 pad[MAX_HANDLE_SZ]; > - } f; > int size, ret, i; > + unsigned char *bytes; > > - f.handle.handle_bytes = sizeof(f.pad); > - size = f.handle.handle_bytes >> 2; > + if (!fhbuf) > + return; > + size = MAX_HANDLE_SZ >> 2; > > - ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); > + ret = exportfs_encode_inode_fh(inode, fhbuf, &size, 0); > if ((ret == FILEID_INVALID) || (ret < 0)) { > WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); This WARN_ONCE is out of order. It is perfectly valid for inotify/fanotify to watch over fs that doesn't support exportfs. Care to clean it up? Perhaps a pr_warn_ratelimited() for either !fhbuf or can't encode? Cheers, Amir. -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 07, 2017 at 02:20:05PM +1100, NeilBrown wrote: > We can allocate in fs/notify/fdinfo.c:show_fdinfo() which is > the earliest 'notify' specific code to run. There is no > opportunity to return an error but GFP_KERNEL allocations under 1 page > never fail.. "never" * The default allocator behavior depends on the request size. We have a concept * of so called costly allocations (with order > PAGE_ALLOC_COSTLY_ORDER). * !costly allocations are too essential to fail so they are implicitly * non-failing by default (with some exceptions like OOM victims might fail so * the caller still has to check for failures) while costly requests try to be * not disruptive and back off even without invoking the OOM killer. * The following three modifiers might be used to override some of these * implicit rules -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Thu, Dec 07 2017, Amir Goldstein wrote: > On Thu, Dec 7, 2017 at 5:20 AM, NeilBrown <neilb@suse.com> wrote: >> On Wed, Dec 06 2017, Linus Torvalds wrote: >> >>> On Thu, Nov 30, 2017 at 12:56 PM, NeilBrown <neilb@suse.com> wrote: >>>> >>>> -/* limit the handle size to NFSv4 handle size now */ >>>> -#define MAX_HANDLE_SZ 128 >>>> +/* Must be larger than NFSv4 file handle, but small >>>> + * enough for an on-stack allocation. overlayfs doesn't >>>> + * want this too close to 255. >>>> + */ >>>> +#define MAX_HANDLE_SZ 200 >>> >>> This really smells for so many reasons. >>> >>> Also, that really is starting to be a fairly big stack allocation, and >>> it seems to be used in exactly one place (show_mark_fhandle), which >>> makes me go "why is that on the stack anyway?". >>> >>> Could we just allocate a buffer at open time or something? >>> >>> Linus >> >> "open time" would be when /proc/X/fdinfo/Y was opened in >> seq_fdinfo_open(), and allocating a file_handle there seems a bit odd. >> >> We can allocate in fs/notify/fdinfo.c:show_fdinfo() which is >> the earliest 'notify' specific code to run. There is no >> opportunity to return an error but GFP_KERNEL allocations under 1 page >> never fail.. >> >> This patch allocates a single buffer for all inodes reported for a given >> inotify fdinfo, and if the allocation files, the filehandle is silently >> left blank. More surgery would be needed to be able to return an error. >> >> Is that at all suitable? >> >> Thanks, >> NeilBrown >> >> From: NeilBrown <neilb@suse.com> >> Subject: fs/notify: don't put file handle buffer on stack. >> >> A file handle buffer is not tiny, and could need to be larger in future, >> so it isn't safe to allocate one on the stack. Instead, we need to >> kmalloc(). >> >> There is no way to return an error status from a ->show_fdinfo() >> function, so if the kmalloc fails, we silently exclude the filehandle >> from the output. As it is at the end of line, this shouldn't >> upset parsing too much. > > It shouldn't upset parsing because that would be the same out > output as without CONFIG_EXPORTFS. AFAIK this information > is used by CRUI. > >> >> Signed-off-by: NeilBrown <neilb@suse.com> >> >> diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c >> index d478629c728b..20d863b9ae16 100644 >> --- a/fs/notify/fdinfo.c >> +++ b/fs/notify/fdinfo.c >> @@ -23,56 +23,58 @@ >> >> static void show_fdinfo(struct seq_file *m, struct file *f, >> void (*show)(struct seq_file *m, >> - struct fsnotify_mark *mark)) >> + struct fsnotify_mark *mark, >> + struct fid *fh)) >> { >> struct fsnotify_group *group = f->private_data; >> struct fsnotify_mark *mark; >> + struct fid *fh = kmalloc(MAX_HANDLE_SZ, GFP_KERNEL); >> >> mutex_lock(&group->mark_mutex); >> list_for_each_entry(mark, &group->marks_list, g_list) { >> - show(m, mark); >> + show(m, mark, fh); >> if (seq_has_overflowed(m)) >> break; >> } >> mutex_unlock(&group->mark_mutex); >> + kfree(fh); >> } >> >> #if defined(CONFIG_EXPORTFS) >> -static void show_mark_fhandle(struct seq_file *m, struct inode *inode) >> +static void show_mark_fhandle(struct seq_file *m, struct inode *inode, >> + struct fid *fhbuf) >> { >> - struct { >> - struct file_handle handle; >> - u8 pad[MAX_HANDLE_SZ]; >> - } f; >> int size, ret, i; >> + unsigned char *bytes; >> >> - f.handle.handle_bytes = sizeof(f.pad); >> - size = f.handle.handle_bytes >> 2; >> + if (!fhbuf) >> + return; >> + size = MAX_HANDLE_SZ >> 2; >> >> - ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); >> + ret = exportfs_encode_inode_fh(inode, fhbuf, &size, 0); >> if ((ret == FILEID_INVALID) || (ret < 0)) { >> WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); > > This WARN_ONCE is out of order. It is perfectly valid for inotify/fanotify > to watch over fs that doesn't support exportfs. Care to clean it up? > Perhaps a pr_warn_ratelimited() for either !fhbuf or can't encode? If I were going to clean it up, I would need to do more than remove the WARN_ONCE(), which almost certainly never fires. exportfs_encode_inode_fh() should only be called if sb->s_export_op is not NULL. When it is NULL, it means that the filesystem doesn't support file handles. do_sys_name_to_handle() tests this, as does nfsd. But this inotify code doesn't. So it can report a "file handle" for a file for which file handles aren't supported. It will use the default export_encode_fh which reports i_ino and i_generation, which may or may not be stable or meaningful. So yes, this code does need a bit of cleaning up.... Thanks, NeilBrown
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index d478629c728b..20d863b9ae16 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -23,56 +23,58 @@ static void show_fdinfo(struct seq_file *m, struct file *f, void (*show)(struct seq_file *m, - struct fsnotify_mark *mark)) + struct fsnotify_mark *mark, + struct fid *fh)) { struct fsnotify_group *group = f->private_data; struct fsnotify_mark *mark; + struct fid *fh = kmalloc(MAX_HANDLE_SZ, GFP_KERNEL); mutex_lock(&group->mark_mutex); list_for_each_entry(mark, &group->marks_list, g_list) { - show(m, mark); + show(m, mark, fh); if (seq_has_overflowed(m)) break; } mutex_unlock(&group->mark_mutex); + kfree(fh); } #if defined(CONFIG_EXPORTFS) -static void show_mark_fhandle(struct seq_file *m, struct inode *inode) +static void show_mark_fhandle(struct seq_file *m, struct inode *inode, + struct fid *fhbuf) { - struct { - struct file_handle handle; - u8 pad[MAX_HANDLE_SZ]; - } f; int size, ret, i; + unsigned char *bytes; - f.handle.handle_bytes = sizeof(f.pad); - size = f.handle.handle_bytes >> 2; + if (!fhbuf) + return; + size = MAX_HANDLE_SZ >> 2; - ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0); + ret = exportfs_encode_inode_fh(inode, fhbuf, &size, 0); if ((ret == FILEID_INVALID) || (ret < 0)) { WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret); return; } - f.handle.handle_type = ret; - f.handle.handle_bytes = size * sizeof(u32); - - seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:", - f.handle.handle_bytes, f.handle.handle_type); + seq_printf(m, "fhandle-bytes:%zx fhandle-type:%x f_handle:", + size * sizeof(u32), ret); - for (i = 0; i < f.handle.handle_bytes; i++) - seq_printf(m, "%02x", (int)f.handle.f_handle[i]); + bytes = (unsigned char *)(fhbuf->raw); + for (i = 0; i < size * sizeof(u32); i++) + seq_printf(m, "%02x", bytes[i]); } #else -static void show_mark_fhandle(struct seq_file *m, struct inode *inode) +static void show_mark_fhandle(struct seq_file *m, struct inode *inode, + struct fid *fhbuf) { } #endif #ifdef CONFIG_INOTIFY_USER -static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) +static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark, + struct fid *fhbuf) { struct inotify_inode_mark *inode_mark; struct inode *inode; @@ -93,7 +95,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ", inode_mark->wd, inode->i_ino, inode->i_sb->s_dev, mask, mark->ignored_mask); - show_mark_fhandle(m, inode); + show_mark_fhandle(m, inode, fhbuf); seq_putc(m, '\n'); iput(inode); } @@ -108,7 +110,8 @@ void inotify_show_fdinfo(struct seq_file *m, struct file *f) #ifdef CONFIG_FANOTIFY -static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) +static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark, + struct fid *fhbuf) { unsigned int mflags = 0; struct inode *inode; @@ -123,7 +126,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ", inode->i_ino, inode->i_sb->s_dev, mflags, mark->mask, mark->ignored_mask); - show_mark_fhandle(m, inode); + show_mark_fhandle(m, inode, fhbuf); seq_putc(m, '\n'); iput(inode); } else if (mark->connector->flags & FSNOTIFY_OBJ_TYPE_VFSMOUNT) {