diff mbox series

[v4,2/6] fsmonitor: determine if filesystem is local or remote

Message ID e53fc07754094aa5ba8080ec7761869c6429a8af.1669230044.git.gitgitgadget@gmail.com (mailing list archive)
State New, archived
Headers show
Series fsmonitor: Implement fsmonitor for Linux | expand

Commit Message

Eric DeCosta Nov. 23, 2022, 7 p.m. UTC
From: Eric DeCosta <edecosta@mathworks.com>

Compare the given path to the mounted filesystems. Find the mount that is
the longest prefix of the path (if any) and determine if that mount is on a
local or remote filesystem.

Signed-off-by: Eric DeCosta <edecosta@mathworks.com>
---
 compat/fsmonitor/fsm-path-utils-linux.c | 186 ++++++++++++++++++++++++
 1 file changed, 186 insertions(+)
 create mode 100644 compat/fsmonitor/fsm-path-utils-linux.c

Comments

Junio C Hamano Nov. 25, 2022, 7:31 a.m. UTC | #1
"Eric DeCosta via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +static int is_remote_fs(const char* path) {

Style (asterisk sticks to the variable not to the type).

> +	switch (fs.f_type) {
> +		case 0x61636673:  /* ACFS */
> ...
> +		case 0xA501FCF5:  /* VXFS */
> +			return 1;
> +		default:
> +			break;
> +	}

Align case/default to switch by de-denting one level, just like you
did the switch() statement in find_mount().

> +static int find_mount(const char *path, const struct statvfs *fs,
> +	struct mntent *ent)
> +{
> +	const char *const mounts = "/proc/mounts";
> +	const char *rp = real_pathdup(path, 1);

Nobody seems to free() this once we are done with this function.

> +	struct mntent *ment = NULL;
> +	struct statvfs mntfs;
> +	FILE *fp;
> +	int found = 0;
> +	int dlen, plen, flen = 0;
> +
> +	ent->mnt_fsname = NULL;
> +	ent->mnt_dir = NULL;
> +	ent->mnt_type = NULL;

More on this later.

> +	fp = setmntent(mounts, "r");
> +	if (!fp) {
> +		error_errno(_("setmntent('%s') failed"), mounts);
> +		return -1;
> +	}
> +
> +	plen = strlen(rp);
> +
> +	/* read all the mount information and compare to path */
> +	while ((ment = getmntent(fp)) != NULL) {
> +		if (statvfs(ment->mnt_dir, &mntfs)) {
> +			switch (errno) {
> +			case EPERM:
> +			case ESRCH:
> +			case EACCES:
> +				continue;
> +			default:
> +				error_errno(_("statvfs('%s') failed"), ment->mnt_dir);
> +				endmntent(fp);
> +				return -1;
> +			}
> +		}
> +
> +		/* is mount on the same filesystem and is a prefix of the path */
> +		if ((fs->f_fsid == mntfs.f_fsid) &&
> +			!strncmp(ment->mnt_dir, rp, strlen(ment->mnt_dir))) {
> +			dlen = strlen(ment->mnt_dir);
> +			if (dlen > plen)
> +				continue;
> +			/*
> +			 * root is always a potential match; otherwise look for
> +			 * directory prefix
> +			 */
> +			if ((dlen == 1 && ment->mnt_dir[0] == '/') ||
> +				(dlen > flen && (!rp[dlen] || rp[dlen] == '/'))) {
> +				flen = dlen;
> +				/*
> +				 * https://man7.org/linux/man-pages/man3/getmntent.3.html
> +				 *
> +				 * The pointer points to a static area of memory which is
> +				 * overwritten by subsequent calls to getmntent().
> +				 */
> +				found = 1;
> +				free(ent->mnt_fsname);
> +				free(ent->mnt_dir);
> +				free(ent->mnt_type);
> +				ent->mnt_fsname = xstrdup(ment->mnt_fsname);
> +				ent->mnt_dir = xstrdup(ment->mnt_dir);
> +				ent->mnt_type = xstrdup(ment->mnt_type);
> +			}

So more than one mount entries could match the given path.  This
loop implements "the last one wins", but is that a sensible thing to
do?  Shouldn't it be more like "the longest one, being the most
specific, wins"?  If /usr mounts on / and /usr/local mounts on /usr,
asking for /usr/local/me would want to discover that it is on
the filesystem mounted at /usr/local regardless of the order in
which getmntent() returns the entries, no?

> +		}
> +	}
> +	endmntent(fp);
> +
> +	if (!found)
> +		return -1;
> +
> +	return 0;
> +}


> +
> +int fsmonitor__get_fs_info(const char *path, struct fs_info *fs_info)
> +{
> +	struct mntent ment;
> +	struct statvfs fs;
> +
> +	if (statvfs(path, &fs))
> +		return error_errno(_("statvfs('%s') failed"), path);
> +
> +
> +	if (find_mount(path, &fs, &ment) < 0) {
> +		free(ment.mnt_fsname);
> +		free(ment.mnt_dir);
> +		free(ment.mnt_type);

It is a good idea to free, but I _think_ the code is easier to
follow if you _clear_ ment before calling find_mount(), not in
find_mount().

> +		return -1;
> +	}
> +
> +	trace_printf_key(&trace_fsmonitor,
> +			 "statvfs('%s') [flags 0x%08lx] '%s' '%s'",
> +			 path, fs.f_flag, ment.mnt_type, ment.mnt_fsname);
> +
> +	fs_info->is_remote = is_remote_fs(ment.mnt_dir);
> +	fs_info->typename = ment.mnt_fsname;
> +	free(ment.mnt_dir);
> +	free(ment.mnt_type);
> +
> +	if (fs_info->is_remote < 0) {
> +		free(ment.mnt_fsname);
> +		return -1;

fs_info->typename just started to point into an already freed memory
at this point, which is a very safe thing to do to the caller of
this function.

Rather, perhaps delay the setting of typename after this statement,
which would be more friendly to the caller than telling them that
they are not allowed to touch the member when the function returns
negative.

> +	}
> +
> +	trace_printf_key(&trace_fsmonitor,
> +				"'%s' is_remote: %d",
> +				path, fs_info->is_remote);
> +
> +	return 0;
> +}
> +
> +int fsmonitor__is_fs_remote(const char *path)
> +{
> +	struct fs_info fs;
> +
> +	if (fsmonitor__get_fs_info(path, &fs))
> +		return -1;
> +
> +	free(fs.typename);
> +
> +	return fs.is_remote;
> +}
> +
> +/*
> + * No-op for now.
> + */
> +int fsmonitor__get_alias(const char *path, struct alias_info *info)
> +{
> +	return 0;
> +}
> +
> +/*
> + * No-op for now.
> + */
> +char *fsmonitor__resolve_alias(const char *path,
> +	const struct alias_info *info)
> +{
> +	return NULL;
> +}
Ævar Arnfjörð Bjarmason Dec. 12, 2022, 10:24 a.m. UTC | #2
On Wed, Nov 23 2022, Eric DeCosta via GitGitGadget wrote:

> From: Eric DeCosta <edecosta@mathworks.com>
>
> Compare the given path to the mounted filesystems. Find the mount that is
> the longest prefix of the path (if any) and determine if that mount is on a
> local or remote filesystem.
>
> Signed-off-by: Eric DeCosta <edecosta@mathworks.com>
> ---
>  compat/fsmonitor/fsm-path-utils-linux.c | 186 ++++++++++++++++++++++++
>  1 file changed, 186 insertions(+)
>  create mode 100644 compat/fsmonitor/fsm-path-utils-linux.c
>
> diff --git a/compat/fsmonitor/fsm-path-utils-linux.c b/compat/fsmonitor/fsm-path-utils-linux.c
> new file mode 100644
> index 00000000000..d3281422ebc
> --- /dev/null
> +++ b/compat/fsmonitor/fsm-path-utils-linux.c
> @@ -0,0 +1,186 @@
> +#include "fsmonitor.h"
> +#include "fsmonitor-path-utils.h"
> +#include <errno.h>
> +#include <mntent.h>
> +#include <sys/mount.h>
> +#include <sys/vfs.h>
> +#include <sys/statvfs.h>
> +
> +static int is_remote_fs(const char* path) {
> +	struct statfs fs;
> +
> +	if (statfs(path, &fs)) {
> +		error_errno(_("statfs('%s') failed"), path);
> +		return -1;
> +	}

Nit: Drop the braces and do:

	if (statfs(...) == -1)
		return error_errno(...)

> +	switch (fs.f_type) {
> +		case 0x61636673:  /* ACFS */
> +		case 0x5346414F:  /* AFS */
> +		case 0x00C36400:  /* CEPH */
> +		case 0xFF534D42:  /* CIFS */
> +		case 0x73757245:  /* CODA */
> +		case 0x19830326:  /* FHGFS */
> +		case 0x1161970:   /* GFS */
> +		case 0x47504653:  /* GPFS */
> +		case 0x013111A8:  /* IBRIX */
> +		case 0x6B414653:  /* KAFS */
> +		case 0x0BD00BD0:  /* LUSTRE */
> +		case 0x564C:      /* NCP */
> +		case 0x6969:      /* NFS */
> +		case 0x6E667364:  /* NFSD */
> +		case 0x7461636f:  /* OCFS2 */
> +		case 0xAAD7AAEA:  /* PANFS */
> +		case 0x517B:      /* SMB */
> +		case 0xBEEFDEAD:  /* SNFS */
> +		case 0xFE534D42:  /* SMB2 */
> +		case 0xBACBACBC:  /* VMHGFS */
> +		case 0xA501FCF5:  /* VXFS */

So, before we'd compare against the name, but to avoid the GPLv3
copy/pasting we're now comparing against the fs.f_type.

If we are hardcoding them, our usual convention is to lower-case
hexdigits, so 0xbacbacbc not 0xBACBACBC.

But at least my statfs() manpage documents the named defines in
linux/magic.h for most of these. Why not use those?

> +			return 1;
> +		default:
> +			break;

You could just "return 0" here, and...

> +	}
> +
> +	return 0;

...drop this "return 0".

> +}
> +
> +static int find_mount(const char *path, const struct statvfs *fs,
> +	struct mntent *ent)

Misindentation.

> +{
> +	const char *const mounts = "/proc/mounts";
> +	const char *rp = real_pathdup(path, 1);
> +	struct mntent *ment = NULL;
> +	struct statvfs mntfs;
> +	FILE *fp;
> +	int found = 0;
> +	int dlen, plen, flen = 0;
> +
> +	ent->mnt_fsname = NULL;
> +	ent->mnt_dir = NULL;
> +	ent->mnt_type = NULL;
> +
> +	fp = setmntent(mounts, "r");
> +	if (!fp) {
> +		error_errno(_("setmntent('%s') failed"), mounts);
> +		return -1;

Ditto "return error_errno()"


> +	}
> +
> +	plen = strlen(rp);

Let's make "plen", "dlen" and "flen" a "size_t", not "int"
> +
> +	/* read all the mount information and compare to path */
> +	while ((ment = getmntent(fp)) != NULL) {

Drop the "!= NULL"

> +		if (statvfs(ment->mnt_dir, &mntfs)) {
> +			switch (errno) {
> +			case EPERM:
> +			case ESRCH:
> +			case EACCES:
> +				continue;
> +			default:
> +				error_errno(_("statvfs('%s') failed"), ment->mnt_dir);
> +				endmntent(fp);

Shouldn't we check the endmntent() error too? Now, from the manpage the
interface is funny, and always returns 1.

But since this is linux-specific code it seems safe enough to go with it
& glibc assumptions and:

	errno = 0;
        endmntent(fp);
        if (errno)
        	return error_errno(....);

I.e. it'll just call fclose(), which might set errno() on failure.

Maybe it's not worth it...

> +	if (statvfs(path, &fs))
> +		return error_errno(_("statvfs('%s') failed"), path);

Here you do use that "return error_errno(...)" pattern...", yay!


> +
> +
> +	if (find_mount(path, &fs, &ment) < 0) {
> +		free(ment.mnt_fsname);
> +		free(ment.mnt_dir);
> +		free(ment.mnt_type);
> +		return -1;
> +	}
> +
> +	trace_printf_key(&trace_fsmonitor,
> +			 "statvfs('%s') [flags 0x%08lx] '%s' '%s'",
> +			 path, fs.f_flag, ment.mnt_type, ment.mnt_fsname);
> +
> +	fs_info->is_remote = is_remote_fs(ment.mnt_dir);
> +	fs_info->typename = ment.mnt_fsname;
> +	free(ment.mnt_dir);
> +	free(ment.mnt_type);

If you're going to \n\n-seperate this and the trace_printf_key() above I
think moving the second free() here to that "block" would make sense,
sinec here is the last time we use mnt_dir, but the last time we used
mnt_type was in the trace_printf_key().

But...

> +
> +	if (fs_info->is_remote < 0) {
> +		free(ment.mnt_fsname);

...aren't you NULL init-ing these, why not just for all of these:

	goto error;

Then....
> +		return -1;
> +	}
> +
> +	trace_printf_key(&trace_fsmonitor,
> +				"'%s' is_remote: %d",
> +				path, fs_info->is_remote);
> +
> +	return 0;

Have this be:

	int ret = -1; /* earlier */

	ret = 0;
cleanup:
	free(...);
	free(...);
	return ret;

> +}
> +
> +int fsmonitor__is_fs_remote(const char *path)
> +{
> +	struct fs_info fs;
> +
> +	if (fsmonitor__get_fs_info(path, &fs))
> +		return -1;
> +
> +	free(fs.typename);

This will segfault if you take the part through fsmonitor__get_fs_info()
where we don't have the fs.typename yet, i.e. if statfs() fails.

There's the trivial NULL-init way to work around it, but I think this
suggests a leaky abstraction. If we fail to get the fs info, then the
function itself should have free'd that, shouldn't it?

> +/*
> + * No-op for now.
> + */
> +char *fsmonitor__resolve_alias(const char *path,
> +	const struct alias_info *info)

Ditto misindentatione
diff mbox series

Patch

diff --git a/compat/fsmonitor/fsm-path-utils-linux.c b/compat/fsmonitor/fsm-path-utils-linux.c
new file mode 100644
index 00000000000..d3281422ebc
--- /dev/null
+++ b/compat/fsmonitor/fsm-path-utils-linux.c
@@ -0,0 +1,186 @@ 
+#include "fsmonitor.h"
+#include "fsmonitor-path-utils.h"
+#include <errno.h>
+#include <mntent.h>
+#include <sys/mount.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+
+static int is_remote_fs(const char* path) {
+	struct statfs fs;
+
+	if (statfs(path, &fs)) {
+		error_errno(_("statfs('%s') failed"), path);
+		return -1;
+	}
+
+	switch (fs.f_type) {
+		case 0x61636673:  /* ACFS */
+		case 0x5346414F:  /* AFS */
+		case 0x00C36400:  /* CEPH */
+		case 0xFF534D42:  /* CIFS */
+		case 0x73757245:  /* CODA */
+		case 0x19830326:  /* FHGFS */
+		case 0x1161970:   /* GFS */
+		case 0x47504653:  /* GPFS */
+		case 0x013111A8:  /* IBRIX */
+		case 0x6B414653:  /* KAFS */
+		case 0x0BD00BD0:  /* LUSTRE */
+		case 0x564C:      /* NCP */
+		case 0x6969:      /* NFS */
+		case 0x6E667364:  /* NFSD */
+		case 0x7461636f:  /* OCFS2 */
+		case 0xAAD7AAEA:  /* PANFS */
+		case 0x517B:      /* SMB */
+		case 0xBEEFDEAD:  /* SNFS */
+		case 0xFE534D42:  /* SMB2 */
+		case 0xBACBACBC:  /* VMHGFS */
+		case 0xA501FCF5:  /* VXFS */
+			return 1;
+		default:
+			break;
+	}
+
+	return 0;
+}
+
+static int find_mount(const char *path, const struct statvfs *fs,
+	struct mntent *ent)
+{
+	const char *const mounts = "/proc/mounts";
+	const char *rp = real_pathdup(path, 1);
+	struct mntent *ment = NULL;
+	struct statvfs mntfs;
+	FILE *fp;
+	int found = 0;
+	int dlen, plen, flen = 0;
+
+	ent->mnt_fsname = NULL;
+	ent->mnt_dir = NULL;
+	ent->mnt_type = NULL;
+
+	fp = setmntent(mounts, "r");
+	if (!fp) {
+		error_errno(_("setmntent('%s') failed"), mounts);
+		return -1;
+	}
+
+	plen = strlen(rp);
+
+	/* read all the mount information and compare to path */
+	while ((ment = getmntent(fp)) != NULL) {
+		if (statvfs(ment->mnt_dir, &mntfs)) {
+			switch (errno) {
+			case EPERM:
+			case ESRCH:
+			case EACCES:
+				continue;
+			default:
+				error_errno(_("statvfs('%s') failed"), ment->mnt_dir);
+				endmntent(fp);
+				return -1;
+			}
+		}
+
+		/* is mount on the same filesystem and is a prefix of the path */
+		if ((fs->f_fsid == mntfs.f_fsid) &&
+			!strncmp(ment->mnt_dir, rp, strlen(ment->mnt_dir))) {
+			dlen = strlen(ment->mnt_dir);
+			if (dlen > plen)
+				continue;
+			/*
+			 * root is always a potential match; otherwise look for
+			 * directory prefix
+			 */
+			if ((dlen == 1 && ment->mnt_dir[0] == '/') ||
+				(dlen > flen && (!rp[dlen] || rp[dlen] == '/'))) {
+				flen = dlen;
+				/*
+				 * https://man7.org/linux/man-pages/man3/getmntent.3.html
+				 *
+				 * The pointer points to a static area of memory which is
+				 * overwritten by subsequent calls to getmntent().
+				 */
+				found = 1;
+				free(ent->mnt_fsname);
+				free(ent->mnt_dir);
+				free(ent->mnt_type);
+				ent->mnt_fsname = xstrdup(ment->mnt_fsname);
+				ent->mnt_dir = xstrdup(ment->mnt_dir);
+				ent->mnt_type = xstrdup(ment->mnt_type);
+			}
+		}
+	}
+	endmntent(fp);
+
+	if (!found)
+		return -1;
+
+	return 0;
+}
+
+int fsmonitor__get_fs_info(const char *path, struct fs_info *fs_info)
+{
+	struct mntent ment;
+	struct statvfs fs;
+
+	if (statvfs(path, &fs))
+		return error_errno(_("statvfs('%s') failed"), path);
+
+
+	if (find_mount(path, &fs, &ment) < 0) {
+		free(ment.mnt_fsname);
+		free(ment.mnt_dir);
+		free(ment.mnt_type);
+		return -1;
+	}
+
+	trace_printf_key(&trace_fsmonitor,
+			 "statvfs('%s') [flags 0x%08lx] '%s' '%s'",
+			 path, fs.f_flag, ment.mnt_type, ment.mnt_fsname);
+
+	fs_info->is_remote = is_remote_fs(ment.mnt_dir);
+	fs_info->typename = ment.mnt_fsname;
+	free(ment.mnt_dir);
+	free(ment.mnt_type);
+
+	if (fs_info->is_remote < 0) {
+		free(ment.mnt_fsname);
+		return -1;
+	}
+
+	trace_printf_key(&trace_fsmonitor,
+				"'%s' is_remote: %d",
+				path, fs_info->is_remote);
+
+	return 0;
+}
+
+int fsmonitor__is_fs_remote(const char *path)
+{
+	struct fs_info fs;
+
+	if (fsmonitor__get_fs_info(path, &fs))
+		return -1;
+
+	free(fs.typename);
+
+	return fs.is_remote;
+}
+
+/*
+ * No-op for now.
+ */
+int fsmonitor__get_alias(const char *path, struct alias_info *info)
+{
+	return 0;
+}
+
+/*
+ * No-op for now.
+ */
+char *fsmonitor__resolve_alias(const char *path,
+	const struct alias_info *info)
+{
+	return NULL;
+}