diff mbox

[v3,1/2] xfstests: add fssum tool

Message ID 1375949833-1104-2-git-send-email-list.xfs@jan-o-sch.net (mailing list archive)
State Not Applicable
Headers show

Commit Message

Jan Schmidt Aug. 8, 2013, 8:17 a.m. UTC
fssum is a tool to build a recursive checksum for a file system. The home
repository of fssum is

    git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git

It is added as an optional target, because it depends on glibc >= 2.15 for
SEEK_HOLE / SEEK_DATA. The test to be added using fssum will just be skipped
if fssum wasn't built.

Signed-off-by: Jan Schmidt <list.xfs@jan-o-sch.net>
---
 .gitignore    |    1 +
 common/config |    2 +
 src/Makefile  |   11 +-
 src/fssum.c   |  819 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 832 insertions(+), 1 deletions(-)
 create mode 100644 src/fssum.c

Comments

Eric Sandeen Aug. 8, 2013, 5:40 p.m. UTC | #1
On 8/8/13 3:17 AM, Jan Schmidt wrote:
> fssum is a tool to build a recursive checksum for a file system. The home
> repository of fssum is
> 
>     git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git
> 
> It is added as an optional target, because it depends on glibc >= 2.15 for
> SEEK_HOLE / SEEK_DATA. The test to be added using fssum will just be skipped
> if fssum wasn't built.
> 
> Signed-off-by: Jan Schmidt <list.xfs@jan-o-sch.net>
> ---
>  .gitignore    |    1 +
>  common/config |    2 +
>  src/Makefile  |   11 +-
>  src/fssum.c   |  819 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 832 insertions(+), 1 deletions(-)
>  create mode 100644 src/fssum.c
> 
> diff --git a/.gitignore b/.gitignore
> index 11594aa..c2fc6e3 100644
> --- a/.gitignore
> +++ b/.gitignore
> @@ -45,6 +45,7 @@
>  /src/fill
>  /src/fill2
>  /src/fs_perms
> +/src/fssum
>  /src/fstest
>  /src/fsync-tester
>  /src/ftrunc
> diff --git a/common/config b/common/config
> index 67c1498..c8bee29 100644
> --- a/common/config
> +++ b/common/config
> @@ -146,6 +146,8 @@ export SED_PROG="`set_prog_path sed`"
>  export BC_PROG="`set_prog_path bc`"
>  [ "$BC_PROG" = "" ] && _fatal "bc not found"
>  
> +export FSSUM_PROG="`set_prog_path fssum $here/src/fssum`"

So this will pick up a local copy of fssum if it exists;
is that really desired?  (If there's any difference in
behavior, then the one in src/ presumably would need to
be fixed...)

> +
>  export PS_ALL_FLAGS="-ef"
>  
>  export DF_PROG="`set_prog_path df`"
> diff --git a/src/Makefile b/src/Makefile
> index cc679e8..10a4d3c 100644
> --- a/src/Makefile
> +++ b/src/Makefile
> @@ -20,10 +20,14 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
>  	stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \
>  	seek_copy_test t_readdir_1 t_readdir_2 fsync-tester
>  
> +OPT_TARGETS = fssum
> +

I'm not sure how this helps . . .

>  SUBDIRS =
>  
>  LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
>  
> +OPT_LDLIBS = -lssl -lcrypto

Hm, new deps.  I guess it's not a huge problem, these should always
be available, right?

>  ifeq ($(HAVE_XLOG_ASSIGN_LSN), true)
>  LINUX_TARGETS += loggen
>  endif
> @@ -60,7 +64,7 @@ CFILES = $(TARGETS:=.c)
>  LDIRT = $(TARGETS)
>  
>  
> -default: depend $(TARGETS) $(SUBDIRS)
> +default: depend $(TARGETS) $(OPT_TARGETS) $(SUBDIRS)

Anyway, OPT_TARGETS isn't optional, because you still build it by default.  :)

>  depend: .dep
>  
> @@ -70,11 +74,16 @@ $(TARGETS): $(LIBTEST)
>  	@echo "    [CC]    $@"
>  	$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(LIBTEST)
>  
> +$(OPT_TARGETS): $(LIBTEST)
> +	@echo "    [CC]    $@"
> +	-$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(OPT_LDLIBS) $(LIBTEST)

Oh, I see, you ignore the error.  Well, that's still pretty ugly.
I'd really rather you just add the #defines as I suggested in my
reply to [PATCH 0/2], so it'll build for everyone.

Thanks,
-Eric

> +
>  LINKTEST = $(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS)
>  
>  install: default $(addsuffix -install,$(SUBDIRS))
>  	$(INSTALL) -m 755 -d $(PKG_LIB_DIR)/src
>  	$(LTINSTALL) -m 755 $(TARGETS) $(PKG_LIB_DIR)/src
> +	-$(LTINSTALL) -m 755 $(OPT_TARGETS) $(PKG_LIB_DIR)/src
>  	$(LTINSTALL) -m 755 fill2attr fill2fs fill2fs_check scaleread.sh $(PKG_LIB_DIR)/src
>  	$(LTINSTALL) -m 644 dumpfile $(PKG_LIB_DIR)/src
>  

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Dave Chinner Aug. 12, 2013, 1:15 a.m. UTC | #2
On Thu, Aug 08, 2013 at 12:40:55PM -0500, Eric Sandeen wrote:
> On 8/8/13 3:17 AM, Jan Schmidt wrote:
> >  SUBDIRS =
> >  
> >  LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
> >  
> > +OPT_LDLIBS = -lssl -lcrypto
> 
> Hm, new deps.  I guess it's not a huge problem, these should always
> be available, right?

Please, let's keep crypto out of xfstests if we can. That's just
going to add a nightmare of US export compliance garbage to any
distro that wants to package and ship this....

Cheers,

Dave.
Eric Sandeen Aug. 12, 2013, 2:54 a.m. UTC | #3
On Aug 11, 2013, at 8:15 PM, Dave Chinner <david@fromorbit.com> wrote:

> On Thu, Aug 08, 2013 at 12:40:55PM -0500, Eric Sandeen wrote:
>> On 8/8/13 3:17 AM, Jan Schmidt wrote:
>>> SUBDIRS =
>>> 
>>> LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
>>> 
>>> +OPT_LDLIBS = -lssl -lcrypto
>> 
>> Hm, new deps.  I guess it's not a huge problem, these should always
>> be available, right?
> 
> Please, let's keep crypto out of xfstests if we can. That's just
> going to add a nightmare of US export compliance garbage to any
> distro that wants to package and ship this....
> 
It's ssl that's (optionally) required for check summing I believe.  But it's optional if done properly...

-Eric

> Cheers,
> 
> Dave.
> -- 
> Dave Chinner
> david@fromorbit.com
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Josef Bacik Aug. 13, 2013, 3:28 p.m. UTC | #4
On Sun, Aug 11, 2013 at 09:54:41PM -0500, Eric Sandeen wrote:
> On Aug 11, 2013, at 8:15 PM, Dave Chinner <david@fromorbit.com> wrote:
> 
> > On Thu, Aug 08, 2013 at 12:40:55PM -0500, Eric Sandeen wrote:
> >> On 8/8/13 3:17 AM, Jan Schmidt wrote:
> >>> SUBDIRS =
> >>> 
> >>> LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
> >>> 
> >>> +OPT_LDLIBS = -lssl -lcrypto
> >> 
> >> Hm, new deps.  I guess it's not a huge problem, these should always
> >> be available, right?
> > 
> > Please, let's keep crypto out of xfstests if we can. That's just
> > going to add a nightmare of US export compliance garbage to any
> > distro that wants to package and ship this....
> > 
> It's ssl that's (optionally) required for check summing I believe.  But it's optional if done properly...
> 

It doesn't build without -lcrypto for me, so either we have an external
dependancy (which I'm fine with), or we add it to xfstests (which is what you
wanted).  Does a config option alleviate your concerns?  And I it's just linking
it in at build time, I don't think we have to work about export compliance as
long as we aren't shipping actual crytpo stuff right?  Anybody who is going to
be running this stuff is going to have libssl installed, which installs
libcrypto (at least on my box) so I don't think it's a huge deal to have us
linking it.  Thanks,

Josef
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Eric Sandeen Aug. 13, 2013, 4:28 p.m. UTC | #5
On 8/13/13 10:28 AM, Josef Bacik wrote:
> On Sun, Aug 11, 2013 at 09:54:41PM -0500, Eric Sandeen wrote:
>> On Aug 11, 2013, at 8:15 PM, Dave Chinner <david@fromorbit.com> wrote:
>>
>>> On Thu, Aug 08, 2013 at 12:40:55PM -0500, Eric Sandeen wrote:
>>>> On 8/8/13 3:17 AM, Jan Schmidt wrote:
>>>>> SUBDIRS =
>>>>>
>>>>> LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
>>>>>
>>>>> +OPT_LDLIBS = -lssl -lcrypto
>>>>
>>>> Hm, new deps.  I guess it's not a huge problem, these should always
>>>> be available, right?
>>>
>>> Please, let's keep crypto out of xfstests if we can. That's just
>>> going to add a nightmare of US export compliance garbage to any
>>> distro that wants to package and ship this....
>>>
>> It's ssl that's (optionally) required for check summing I believe.  But it's optional if done properly...
>>
> 
> It doesn't build without -lcrypto for me, so either we have an external
> dependancy (which I'm fine with), or we add it to xfstests (which is what you
> wanted).  Does a config option alleviate your concerns?  And I it's just linking
> it in at build time, I don't think we have to work about export compliance as
> long as we aren't shipping actual crytpo stuff right?  Anybody who is going to
> be running this stuff is going to have libssl installed, which installs
> libcrypto (at least on my box) so I don't think it's a huge deal to have us
> linking it.  Thanks,

Yeah I think linking in a consumer is not at all the same as exporting the functionality.

Looks like my Makefile patch needs to add both -lssl & -lcrypto after all,
but again, yeah, just linking against it _if_ it's there.

Oh, and here's my m4/package_ssldev.m4, sorry about that:

AC_DEFUN([AC_PACKAGE_WANT_SSL],
  [ AC_CHECK_HEADERS(openssl/md5.h, [ have_ssl=true ], [ have_ssl=false ])
    AC_SUBST(have_ssl)
  ])


-Eric

> Josef
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/.gitignore b/.gitignore
index 11594aa..c2fc6e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@ 
 /src/fill
 /src/fill2
 /src/fs_perms
+/src/fssum
 /src/fstest
 /src/fsync-tester
 /src/ftrunc
diff --git a/common/config b/common/config
index 67c1498..c8bee29 100644
--- a/common/config
+++ b/common/config
@@ -146,6 +146,8 @@  export SED_PROG="`set_prog_path sed`"
 export BC_PROG="`set_prog_path bc`"
 [ "$BC_PROG" = "" ] && _fatal "bc not found"
 
+export FSSUM_PROG="`set_prog_path fssum $here/src/fssum`"
+
 export PS_ALL_FLAGS="-ef"
 
 export DF_PROG="`set_prog_path df`"
diff --git a/src/Makefile b/src/Makefile
index cc679e8..10a4d3c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -20,10 +20,14 @@  LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
 	stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \
 	seek_copy_test t_readdir_1 t_readdir_2 fsync-tester
 
+OPT_TARGETS = fssum
+
 SUBDIRS =
 
 LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
 
+OPT_LDLIBS = -lssl -lcrypto
+
 ifeq ($(HAVE_XLOG_ASSIGN_LSN), true)
 LINUX_TARGETS += loggen
 endif
@@ -60,7 +64,7 @@  CFILES = $(TARGETS:=.c)
 LDIRT = $(TARGETS)
 
 
-default: depend $(TARGETS) $(SUBDIRS)
+default: depend $(TARGETS) $(OPT_TARGETS) $(SUBDIRS)
 
 depend: .dep
 
@@ -70,11 +74,16 @@  $(TARGETS): $(LIBTEST)
 	@echo "    [CC]    $@"
 	$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(LIBTEST)
 
+$(OPT_TARGETS): $(LIBTEST)
+	@echo "    [CC]    $@"
+	-$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(OPT_LDLIBS) $(LIBTEST)
+
 LINKTEST = $(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS)
 
 install: default $(addsuffix -install,$(SUBDIRS))
 	$(INSTALL) -m 755 -d $(PKG_LIB_DIR)/src
 	$(LTINSTALL) -m 755 $(TARGETS) $(PKG_LIB_DIR)/src
+	-$(LTINSTALL) -m 755 $(OPT_TARGETS) $(PKG_LIB_DIR)/src
 	$(LTINSTALL) -m 755 fill2attr fill2fs fill2fs_check scaleread.sh $(PKG_LIB_DIR)/src
 	$(LTINSTALL) -m 644 dumpfile $(PKG_LIB_DIR)/src
 
diff --git a/src/fssum.c b/src/fssum.c
new file mode 100644
index 0000000..ecddb6a
--- /dev/null
+++ b/src/fssum.c
@@ -0,0 +1,819 @@ 
+/*
+ * Copyright (C) 2012 STRATO AG.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#define _BSD_SOURCE
+#define _LARGEFILE64_SOURCE
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifdef __SOLARIS__
+#include <sys/mkdev.h>
+#endif
+#include <openssl/md5.h>
+#include <netinet/in.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#define CS_SIZE 16
+#define CHUNKS	128
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x)     __bswap_64 (x)
+#endif
+
+/* TODO: add hardlink recognition */
+/* TODO: add xattr/acl */
+
+struct excludes {
+	char *path;
+	int len;
+};
+
+typedef struct _sum {
+	MD5_CTX 	md5;
+	unsigned char	out[16];
+} sum_t;
+
+typedef int (*sum_file_data_t)(int fd, sum_t *dst);
+
+int gen_manifest = 0;
+int in_manifest = 0;
+char *checksum = NULL;
+struct excludes *excludes;
+int n_excludes = 0;
+int verbose = 0;
+FILE *out_fp;
+FILE *in_fp;
+
+enum _flags {
+	FLAG_UID,
+	FLAG_GID,
+	FLAG_MODE,
+	FLAG_ATIME,
+	FLAG_MTIME,
+	FLAG_CTIME,
+	FLAG_DATA,
+	FLAG_OPEN_ERROR,
+	FLAG_STRUCTURE,
+	NUM_FLAGS
+};
+
+const char flchar[] = "ugoamcdes";
+char line[65536];
+
+int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 0, 0};
+
+char *
+getln(char *buf, int size, FILE *fp)
+{
+	char *p;
+	int l;
+
+	p = fgets(buf, size, fp);
+	if (!p)
+		return NULL;
+
+	l = strlen(p);
+	while(l > 0  && (p[l - 1] == '\n' || p[l - 1] == '\r'))
+		p[--l] = 0;
+
+	return p;
+}
+
+void
+parse_flag(int c)
+{
+	int i;
+	int is_upper = 0;
+
+	if (c >= 'A' && c <= 'Z') {
+		is_upper = 1;
+		c += 'a' - 'A';
+	}
+	for (i = 0; flchar[i]; ++i) {
+		if (flchar[i] == c) {
+			flags[i] = is_upper ? 0 : 1;
+			return;
+		}
+	}
+	fprintf(stderr, "unrecognized flag %c\n", c);
+	exit(-1);
+}
+
+void
+parse_flags(char *p)
+{
+	while (*p)
+		parse_flag(*p++);
+}
+
+void
+usage(void)
+{
+	fprintf(stderr, "usage: fssum <options> <path>\n");
+	fprintf(stderr, "  options:\n");
+	fprintf(stderr, "    -f          : write out a full manifest file\n");
+	fprintf(stderr, "    -w <file>   : send output to file\n");
+	fprintf(stderr, "    -v          : verbose mode (debugging only)\n");
+	fprintf(stderr,
+		"    -r <file>   : read checksum or manifest from file\n");
+	fprintf(stderr, "    -[ugoamcde] : specify which fields to include in checksum calculation.\n");
+	fprintf(stderr, "         u      : include uid\n");
+	fprintf(stderr, "         g      : include gid\n");
+	fprintf(stderr, "         o      : include mode\n");
+	fprintf(stderr, "         m      : include mtime\n");
+	fprintf(stderr, "         a      : include atime\n");
+	fprintf(stderr, "         c      : include ctime\n");
+	fprintf(stderr, "         d      : include file data\n");
+	fprintf(stderr, "         e      : include open errors (aborts otherwise)\n");
+	fprintf(stderr, "         s      : include block structure (holes)\n");
+	fprintf(stderr, "    -[UGOAMCDES]: exclude respective field from calculation\n");
+	fprintf(stderr, "    -n          : reset all flags\n");
+	fprintf(stderr, "    -N          : set all flags\n");
+	fprintf(stderr, "    -x path     : exclude path when building checksum (multiple ok)\n");
+	fprintf(stderr, "    -h          : this help\n\n");
+	fprintf(stderr, "The default field mask is ugoamCdES. If the checksum/manifest is read from a\n");
+	fprintf(stderr, "file, the mask is taken from there and the values given on the command line\n");
+	fprintf(stderr, "are ignored.\n");
+	exit(-1);
+}
+
+static char buf[65536];
+
+void *
+alloc(size_t sz)
+{
+	void *p = malloc(sz);
+
+	if (!p) {
+		fprintf(stderr, "malloc failed\n");
+		exit(-1);
+	}
+
+	return p;
+}
+
+void
+sum_init(sum_t *cs)
+{
+	MD5_Init(&cs->md5);
+}
+
+void
+sum_fini(sum_t *cs)
+{
+	MD5_Final(cs->out, &cs->md5);
+}
+
+void
+sum_add(sum_t *cs, void *buf, int size)
+{
+	MD5_Update(&cs->md5, buf, size);
+}
+
+void
+sum_add_sum(sum_t *dst, sum_t *src)
+{
+	sum_add(dst, src->out, sizeof(src->out));
+}
+
+void
+sum_add_u64(sum_t *dst, uint64_t val)
+{
+	uint64_t v = htonll(val);
+	sum_add(dst, &v, sizeof(v));
+}
+
+void
+sum_add_time(sum_t *dst, time_t t)
+{
+	sum_add_u64(dst, t);
+}
+
+char *
+sum_to_string(sum_t *dst)
+{
+	int i;
+	char *s = alloc(CS_SIZE * 2 + 1);
+
+	for (i = 0; i < CS_SIZE; ++i)
+		sprintf(s + i * 2, "%02x", dst->out[i]);
+
+	return s;
+}
+
+int
+sum_file_data_permissive(int fd, sum_t *dst)
+{
+	int ret;
+	off_t pos;
+	off_t old;
+	int i;
+	uint64_t zeros = 0;
+
+	pos = lseek(fd, 0, SEEK_CUR);
+	if (pos == (off_t)-1)
+		return errno == ENXIO ? 0 : -2;
+
+	while (1) {
+		old = pos;
+		pos = lseek(fd, pos, SEEK_DATA);
+		if (pos == (off_t)-1) {
+			if (errno == ENXIO) {
+				ret = 0;
+				pos = lseek(fd, 0, SEEK_END);
+				if (pos != (off_t)-1)
+					zeros += pos - old;
+			} else {
+				ret = -2;
+			}
+			break;
+		}
+		ret = read(fd, buf, sizeof(buf));
+		assert(ret); /* eof found by lseek */
+		if (ret <= 0)
+			break;
+		if (old < pos) /* hole */
+			zeros += pos - old;
+		for (i = 0; i < ret; ++i) {
+			for (old = i; buf[i] == 0 && i < ret; ++i)
+				;
+			if (old < i) /* code like a hole */
+				zeros += i - old;
+			if (i == ret)
+				break;
+			if (zeros) {
+				if (verbose >= 2)
+					fprintf(stderr,
+						"adding %llu zeros to sum\n",
+						(unsigned long long)zeros);
+				sum_add_u64(dst, 0);
+				sum_add_u64(dst, zeros);
+				zeros = 0;
+			}
+			for (old = i; buf[i] != 0 && i < ret; ++i)
+				;
+			if (verbose >= 2)
+				fprintf(stderr, "adding %u non-zeros to sum\n",
+					i - (int)old);
+			sum_add(dst, buf + old, i - old);
+		}
+		pos += ret;
+	}
+
+	if (zeros) {
+		if (verbose >= 2)
+			fprintf(stderr,
+				"adding %llu zeros to sum (finishing)\n",
+				(unsigned long long)zeros);
+		sum_add_u64(dst, 0);
+		sum_add_u64(dst, zeros);
+	}
+
+	return ret;
+}
+
+int
+sum_file_data_strict(int fd, sum_t *dst)
+{
+	int ret;
+	off_t pos;
+
+	pos = lseek(fd, 0, SEEK_CUR);
+	if (pos == (off_t)-1)
+		return errno == ENXIO ? 0 : -2;
+
+	while (1) {
+		pos = lseek(fd, pos, SEEK_DATA);
+		if (pos == (off_t)-1)
+			return errno == ENXIO ? 0 : -2;
+		ret = read(fd, buf, sizeof(buf));
+		assert(ret); /* eof found by lseek */
+		if (ret <= 0)
+			return ret;
+		if (verbose >= 2)
+			fprintf(stderr,
+				"adding to sum at file offset %llu, %d bytes\n",
+				(unsigned long long)pos, ret);
+		sum_add_u64(dst, (uint64_t)pos);
+		sum_add(dst, buf, ret);
+		pos += ret;
+	}
+}
+
+char *
+escape(char *in)
+{
+	char *out = alloc(strlen(in) * 3 + 1);
+	char *src = in;
+	char *dst = out;
+
+	for (; *src; ++src) {
+		if (*src >= 32 && *src < 127 && *src != '\\') {
+			*dst++ = *src;
+		} else {
+			sprintf(dst, "\\%02x", (unsigned char)*src);
+			dst += 3;
+		}
+	}
+	*dst = 0;
+
+	return out;
+}
+
+void
+excess_file(const char *fn)
+{
+	printf("only in local fs: %s\n", fn);
+}
+
+void
+missing_file(const char *fn)
+{
+	printf("only in remote fs: %s\n", fn);
+}
+
+int
+pathcmp(const char *a, const char *b)
+{
+	int len_a = strlen(a);
+	int len_b = strlen(b);
+
+	/*
+	 * as the containing directory is sent after the files, it has to
+	 * come out bigger in the comparison.
+	 */
+	if (len_a < len_b && a[len_a - 1] == '/' && strncmp(a, b, len_a) == 0)
+		return 1;
+	if (len_a > len_b && b[len_b - 1] == '/' && strncmp(a, b, len_b) == 0)
+		return -1;
+
+	return strcmp(a, b);
+}
+
+void
+check_match(char *fn, char *local_m, char *remote_m,
+	    char *local_c, char *remote_c)
+{
+	int match_m = !strcmp(local_m, remote_m);
+	int match_c = !strcmp(local_c, remote_c);
+
+	if (match_m && !match_c) {
+		printf("data mismatch in %s\n", fn);
+	} else if (!match_m && match_c) {
+		printf("metadata mismatch in %s\n", fn);
+	} else if (!match_m && !match_c) {
+		printf("metadata and data mismatch in %s\n", fn);
+	}
+}
+
+char *prev_fn;
+char *prev_m;
+char *prev_c;
+void
+check_manifest(char *fn, char *m, char *c, int last_call)
+{
+	char *rem_m;
+	char *rem_c;
+	char *l;
+	int cmp;
+
+	if (prev_fn) {
+		if (last_call)
+			cmp = -1;
+		else
+			cmp = pathcmp(prev_fn, fn);
+		if (cmp > 0) {
+			excess_file(fn);
+			return;
+		} else if (cmp < 0) {
+			missing_file(prev_fn);
+		} else {
+			check_match(fn, m, prev_m, c, prev_c);
+		}
+		free(prev_fn);
+		free(prev_m);
+		free(prev_c);
+		prev_fn = NULL;
+		prev_m = NULL;
+		prev_c = NULL;
+		if (cmp == 0)
+			return;
+	}
+	while ((l = getln(line, sizeof(line), in_fp))) {
+		rem_c = strrchr(l, ' ');
+		if (!rem_c) {
+			/* final cs */
+			checksum = strdup(l);
+			break;
+		}
+		if (rem_c == l) {
+malformed:
+			fprintf(stderr, "malformed input\n");
+			exit(-1);
+		}
+		*rem_c++ = 0;
+		rem_m = strrchr(l, ' ');
+		if (!rem_m)
+			goto malformed;
+		*rem_m++ = 0;
+
+		if (last_call)
+			cmp = -1;
+		else
+			cmp = pathcmp(l, fn);
+		if (cmp == 0) {
+			check_match(fn, m, rem_m, c, rem_c);
+			return;
+		} else if (cmp > 0) {
+			excess_file(fn);
+			prev_fn = strdup(l);
+			prev_m = strdup(rem_m);
+			prev_c = strdup(rem_c); 
+			return;
+		}
+		missing_file(l);
+	}
+	if (!last_call)
+		excess_file(fn);
+}
+
+int
+namecmp(const void *aa, const void *bb)
+{
+	char * const *a = aa;
+	char * const *b = bb;
+
+	return strcmp(*a, *b);
+}
+
+void
+sum(int dirfd, int level, sum_t *dircs, char *path_prefix, char *path_in)
+{
+	DIR *d;
+	struct dirent *de;
+	char **namelist = NULL;
+	int alloclen = 0;
+	int entries = 0;
+	int i;
+	int ret;
+	int fd;
+	int excl;
+	sum_file_data_t sum_file_data = flags[FLAG_STRUCTURE] ?
+			sum_file_data_strict : sum_file_data_permissive;
+
+	d = fdopendir(dirfd);
+	if (!d) {
+		perror("opendir");
+		exit(-1);
+	}
+	while((de = readdir(d))) {
+		if (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))
+			continue;
+		if (entries == alloclen) {
+			alloclen += CHUNKS;
+			namelist = realloc(namelist,
+					   alloclen * sizeof(*namelist));
+			if (!namelist) {
+				fprintf(stderr, "malloc failed\n");
+				exit(-1);
+			}
+		}
+		namelist[entries] = strdup(de->d_name);
+		if (!namelist[entries]) {
+			fprintf(stderr, "malloc failed\n");
+			exit(-1);
+		}
+		++entries;
+	}
+	qsort(namelist, entries, sizeof(*namelist), namecmp);
+	for (i = 0; i < entries; ++i) {
+		struct stat64 st;
+		sum_t cs;
+		sum_t meta;
+		char *path;
+
+		sum_init(&cs);
+		sum_init(&meta);
+		path = alloc(strlen(path_in) + strlen(namelist[i]) + 3);
+		sprintf(path, "%s/%s", path_in, namelist[i]);
+		for (excl = 0; excl < n_excludes; ++excl) {
+			if (strncmp(excludes[excl].path, path,
+			    excludes[excl].len) == 0)
+				goto next;
+		}
+
+		ret = fchdir(dirfd);
+		if (ret == -1) {
+			perror("fchdir");
+			exit(-1);
+		}
+		ret = lstat64(namelist[i], &st);
+		if (ret) {
+			fprintf(stderr, "stat failed for %s/%s: %s\n",
+				path_prefix, path, strerror(errno));
+			exit(-1);
+		}
+		sum_add_u64(&meta, level);
+		sum_add(&meta, namelist[i], strlen(namelist[i]));
+		if (!S_ISDIR(st.st_mode))
+			sum_add_u64(&meta, st.st_nlink);
+		if (flags[FLAG_UID])
+			sum_add_u64(&meta, st.st_uid);
+		if (flags[FLAG_GID])
+			sum_add_u64(&meta, st.st_gid);
+		if (flags[FLAG_MODE])
+			sum_add_u64(&meta, st.st_mode);
+		if (flags[FLAG_ATIME])
+			sum_add_time(&meta, st.st_atime);
+		if (flags[FLAG_MTIME])
+			sum_add_time(&meta, st.st_mtime);
+		if (flags[FLAG_CTIME])
+			sum_add_time(&meta, st.st_ctime);
+		if (S_ISDIR(st.st_mode)) {
+			fd = openat(dirfd, namelist[i], 0);
+			if (fd == -1 && flags[FLAG_OPEN_ERROR]) {
+				sum_add_u64(&meta, errno);
+			} else if (fd == -1) {
+				fprintf(stderr, "open failed for %s/%s: %s\n",
+					path_prefix, path, strerror(errno));
+				exit(-1);
+			} else {
+				sum(fd, level + 1, &cs, path_prefix, path);
+				close(fd);
+			}
+		} else if (S_ISREG(st.st_mode)) {
+			sum_add_u64(&meta, st.st_size);
+			if (flags[FLAG_DATA]) {
+				if (verbose)
+					fprintf(stderr, "file %s\n",
+						namelist[i]);
+				fd = openat(dirfd, namelist[i], 0);
+				if (fd == -1 && flags[FLAG_OPEN_ERROR]) {
+					sum_add_u64(&meta, errno);
+				} else if (fd == -1) {
+					fprintf(stderr,
+						"open failed for %s/%s: %s\n",
+						path_prefix, path,
+						strerror(errno));
+					exit(-1);
+				}
+				if (fd != -1) {
+					ret = sum_file_data(fd, &cs);
+					if (ret < 0) {
+						fprintf(stderr,
+							"read failed for "
+							"%s/%s: %s\n",
+							path_prefix, path,
+							strerror(errno));
+						exit(-1);
+					}
+					close(fd);
+				}
+			}
+		} else if (S_ISLNK(st.st_mode)) {
+			ret = readlink(namelist[i], buf, sizeof(buf));
+			if (ret == -1) {
+				perror("readlink");
+				exit(-1);
+			}
+			sum_add(&cs, buf, ret);
+		} else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
+			sum_add_u64(&cs, major(st.st_rdev));
+			sum_add_u64(&cs, minor(st.st_rdev));
+		}
+		sum_fini(&cs);
+		sum_fini(&meta);
+		if (gen_manifest || in_manifest) {
+			char *fn;
+			char *m;
+			char *c;
+
+			if (S_ISDIR(st.st_mode))
+				strcat(path, "/");
+			fn = escape(path);
+			m = sum_to_string(&meta);
+			c = sum_to_string(&cs);
+
+			if (gen_manifest)
+				fprintf(out_fp, "%s %s %s\n", fn, m, c);
+			if (in_manifest)
+				check_manifest(fn, m, c, 0);
+			free(c);
+			free(m);
+			free(fn);
+		}
+		sum_add_sum(dircs, &cs);
+		sum_add_sum(dircs, &meta);
+next:
+		free(path);
+	}
+}
+
+int
+main(int argc, char *argv[])
+{
+	extern char *optarg;
+	extern int optind;
+	int	c;
+	char *path;
+	int fd;
+	sum_t cs;
+	char flagstring[sizeof(flchar)];
+	int i;
+	int plen;
+	int elen;
+	int n_flags = 0;
+	const char *allopts = "heEfuUgGoOaAmMcCdDsSnNw:r:vx:";
+
+	out_fp = stdout;
+	while ((c = getopt(argc, argv, allopts)) != EOF) {
+		switch(c) {
+		case 'f':
+			gen_manifest = 1;
+			break;
+		case 'u':
+		case 'U':
+		case 'g':
+		case 'G':
+		case 'o':
+		case 'O':
+		case 'a':
+		case 'A':
+		case 'm':
+		case 'M':
+		case 'c':
+		case 'C':
+		case 'd':
+		case 'D':
+		case 'e':
+		case 'E':
+		case 's':
+		case 'S':
+			++n_flags;
+			parse_flag(c);
+			break;
+		case 'n':
+			for (i = 0; i < NUM_FLAGS; ++i)
+				flags[i] = 0;
+			break;
+		case 'N':
+			for (i = 0; i < NUM_FLAGS; ++i)
+				flags[i] = 1;
+			break;
+		case 'w':
+			out_fp = fopen(optarg, "w");
+			if (!out_fp) {
+				fprintf(stderr,
+					"failed to open output file: %s\n",
+					strerror(errno));
+				exit(-1);
+			}
+			break;
+		case 'r':
+			in_fp = fopen(optarg, "r");
+			if (!in_fp) {
+				fprintf(stderr,
+					"failed to open input file: %s\n",
+					strerror(errno));
+				exit(-1);
+			}
+			break;
+		case 'x':
+			++n_excludes;
+			excludes = realloc(excludes,
+					   sizeof(*excludes) * n_excludes);
+			if (!excludes) {
+				fprintf(stderr,
+					"failed to alloc exclude space\n");
+				exit(-1);
+			}
+			excludes[n_excludes - 1].path = optarg;
+			break;
+		case 'v':
+			++verbose;
+			break;
+		case 'h':
+		case '?':
+			usage();
+		}
+	}
+
+	if (optind + 1 != argc) {
+		fprintf(stderr, "missing path\n");
+		usage();
+	}
+
+	if (in_fp) {
+		char *l = getln(line, sizeof(line), in_fp);
+		char *p;
+
+		if (l == NULL) {
+			fprintf(stderr, "failed to read line from input\n");
+			exit(-1);
+		}
+		if (strncmp(l, "Flags: ", 7) == 0) {
+			l += 7;
+			in_manifest = 1;
+			parse_flags(l);
+		} else if ((p = strchr(l, ':'))) {
+			*p++ = 0;
+			parse_flags(l);
+			checksum = strdup(p);
+		} else {
+			fprintf(stderr, "invalid input file format\n");
+			exit(-1);
+		}
+		if (n_flags)
+			fprintf(stderr, "warning: "
+				"command line flags ignored in -r mode\n");
+	}
+	strcpy(flagstring, flchar);
+	for (i = 0; i < NUM_FLAGS; ++i) {
+		if (flags[i] == 0)
+			flagstring[i] -= 'a' - 'A';
+	}
+
+	path = argv[optind];
+	plen = strlen(path);
+	if (path[plen - 1] == '/') {
+		--plen;
+		path[plen] = '\0';
+	}
+
+	for (i = 0; i < n_excludes; ++i) {
+		if (strncmp(path, excludes[i].path, plen) != 0)
+			fprintf(stderr,
+				"warning: exclude %s outside of path %s\n",
+				excludes[i].path, path);
+		else
+			excludes[i].path += plen;
+		elen = strlen(excludes[i].path);
+		if (excludes[i].path[elen - 1] == '/')
+			--elen;
+		excludes[i].path[elen] = '\0';
+		excludes[i].len = elen;
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
+		fprintf(stderr, "failed to open %s: %s\n", path,
+			strerror(errno));
+		exit(-1);
+	}
+
+	if (gen_manifest)
+		fprintf(out_fp, "Flags: %s\n", flagstring);
+
+	sum_init(&cs);
+	sum(fd, 1, &cs, path, "");
+	sum_fini(&cs);
+
+	close(fd);
+	if (in_manifest)
+		check_manifest("", "", "", 1);
+
+	if (!checksum) {
+		if (in_manifest) {
+			fprintf(stderr, "malformed input\n");
+			exit(-1);
+		}
+		if (!gen_manifest)
+			fprintf(out_fp, "%s:", flagstring);
+
+		fprintf(out_fp, "%s\n", sum_to_string(&cs));
+	} else {
+		if (strcmp(checksum, sum_to_string(&cs)) == 0) {
+			printf("OK\n");
+			exit(0);
+		} else {
+			printf("FAIL\n");
+			exit(1);
+		}
+	}
+
+	exit(0);
+}