Message ID | 20110826185117.GA19365@sir.fritz.box (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On Fri, Aug 26, 2011 at 11:51, Christian Brunner <chb@muc.de> wrote: > + if (!buf) { > + len = ofs-lastofs; > + tempbuf = (byte *) malloc(len); > + if (!tempbuf) > + return -ENOMEM; > + hashbuf = tempbuf; > + } > + Hash->Update((const byte *) hashbuf, len); That'll still try to allocate 100GB of RAM for a 100GB hole. It needs to loop through big holes in smaller chunks, feeding them to the hash e.g. 8kB at a time. And at that point you might as well just use read and not read_iterate, that'll do the memsetting etc for you, and then you can use a static buffer and avoid malloc/free every round. There's no shortcut to be had from "skipping" holes when you need to feed bytes to a hash function. -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 26, 2011 at 11:51 AM, Christian Brunner <chb@muc.de> wrote: > +static int hash_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg) > +{ > + ceph::crypto::Digest *Hash = (ceph::crypto::Digest *)arg; > + byte *hashbuf = (byte *) buf; Looking at it again, hashbuf is pretty much useless, you can use buf directly. > + byte *tempbuf = NULL; > + > + if (!buf) { > + len = ofs-lastofs; Why setting len here? len was already passed in. > + tempbuf = (byte *) malloc(len); > + if (!tempbuf) > + return -ENOMEM; > + hashbuf = tempbuf; buf = tempbuf; > + } > + Hash->Update((const byte *) hashbuf, len); > + > + lastofs = ofs + len; you don't need lastofs either. > + > + free(tempbuf); > + > + return 0; > +} > + -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 26, 2011 at 12:10 PM, Tommi Virtanen <tommi.virtanen@dreamhost.com> wrote: > On Fri, Aug 26, 2011 at 11:51, Christian Brunner <chb@muc.de> wrote: >> + if (!buf) { >> + len = ofs-lastofs; >> + tempbuf = (byte *) malloc(len); >> + if (!tempbuf) >> + return -ENOMEM; >> + hashbuf = tempbuf; >> + } >> + Hash->Update((const byte *) hashbuf, len); > > That'll still try to allocate 100GB of RAM for a 100GB hole. It needs > to loop through big holes in smaller chunks, feeding them to the hash > e.g. 8kB at a time. And at that point you might as well just use read > and not read_iterate, that'll do the memsetting etc for you, and then > you can use a static buffer and avoid malloc/free every round. There's > no shortcut to be had from "skipping" holes when you need to feed > bytes to a hash function. Well, when using read_iterate you avoid reading extra data over the network when the object (chunk) exists (and is sparse). We can probably have some optimization here, and only allocate and memset a buffer once for the case where len == objsize and reuse it later. Yehuda -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/8/26 Yehuda Sadeh Weinraub <yehudasa@gmail.com>: > On Fri, Aug 26, 2011 at 11:51 AM, Christian Brunner <chb@muc.de> wrote: >> +static int hash_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg) >> +{ >> + ceph::crypto::Digest *Hash = (ceph::crypto::Digest *)arg; >> + byte *hashbuf = (byte *) buf; > > Looking at it again, hashbuf is pretty much useless, you can use buf directly. > >> + byte *tempbuf = NULL; >> + >> + if (!buf) { >> + len = ofs-lastofs; > > Why setting len here? len was already passed in. Ah - then I didn't understand what read_iterate is doing. I was thinking, that the callback is not called for missing objects. > >> + tempbuf = (byte *) malloc(len); Do we need a memset here? >> + if (!tempbuf) >> + return -ENOMEM; >> + hashbuf = tempbuf; > > buf = tempbuf; > >> + } >> + Hash->Update((const byte *) hashbuf, len); >> + >> + lastofs = ofs + len; > > you don't need lastofs either. > >> + >> + free(tempbuf); >> + >> + return 0; >> +} >> + Christian -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 26, 2011 at 12:52 PM, Christian Brunner <chb@muc.de> wrote: > 2011/8/26 Yehuda Sadeh Weinraub <yehudasa@gmail.com>: >> On Fri, Aug 26, 2011 at 11:51 AM, Christian Brunner <chb@muc.de> wrote: >>> +static int hash_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg) >>> +{ >>> + ceph::crypto::Digest *Hash = (ceph::crypto::Digest *)arg; >>> + byte *hashbuf = (byte *) buf; >> >> Looking at it again, hashbuf is pretty much useless, you can use buf directly. >> >>> + byte *tempbuf = NULL; >>> + >>> + if (!buf) { >>> + len = ofs-lastofs; >> >> Why setting len here? len was already passed in. > > Ah - then I didn't understand what read_iterate is doing. I was > thinking, that the callback is not called for missing objects. > >> >>> + tempbuf = (byte *) malloc(len); > > Do we need a memset here? > You can memset, or calloc instead. Yehuda -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 26, 2011 at 12:25, Yehuda Sadeh Weinraub <yehudasa@gmail.com> wrote: > On Fri, Aug 26, 2011 at 12:10 PM, Tommi Virtanen > <tommi.virtanen@dreamhost.com> wrote: >> e.g. 8kB at a time. And at that point you might as well just use read >> and not read_iterate, that'll do the memsetting etc for you, and then >> you can use a static buffer and avoid malloc/free every round. There's >> no shortcut to be had from "skipping" holes when you need to feed >> bytes to a hash function. > > Well, when using read_iterate you avoid reading extra data over the > network when the object (chunk) exists (and is sparse). We can > probably have some optimization here, and only allocate and memset a > buffer once for the case where len == objsize and reuse it later. Reading src/librbd.cc, I don't see the holes going over the wire in either case. read() is just a simple wrapper on top of read_iterate(), that memsets to 0 in case of a hole. Which in this case he was doing manually, so why not just use read() in the first place. -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
2011/8/26 Tommi Virtanen <tommi.virtanen@dreamhost.com>: > On Fri, Aug 26, 2011 at 12:25, Yehuda Sadeh Weinraub <yehudasa@gmail.com> wrote: >> On Fri, Aug 26, 2011 at 12:10 PM, Tommi Virtanen >> <tommi.virtanen@dreamhost.com> wrote: >>> e.g. 8kB at a time. And at that point you might as well just use read >>> and not read_iterate, that'll do the memsetting etc for you, and then >>> you can use a static buffer and avoid malloc/free every round. There's >>> no shortcut to be had from "skipping" holes when you need to feed >>> bytes to a hash function. >> >> Well, when using read_iterate you avoid reading extra data over the >> network when the object (chunk) exists (and is sparse). We can >> probably have some optimization here, and only allocate and memset a >> buffer once for the case where len == objsize and reuse it later. > > Reading src/librbd.cc, I don't see the holes going over the wire in > either case. read() is just a simple wrapper on top of read_iterate(), > that memsets to 0 in case of a hole. Which in this case he was doing > manually, so why not just use read() in the first place. I think Yehuda meant that we could reuse the buffer to avoid the malloc/memset for every hole. But I think that the need for optimization in this case isn't really that big and the code is simpler to read without having a global buffer of zeros. Christian -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, Aug 26, 2011 at 1:03 PM, Tommi Virtanen <tommi.virtanen@dreamhost.com> wrote: > On Fri, Aug 26, 2011 at 12:25, Yehuda Sadeh Weinraub <yehudasa@gmail.com> wrote: >> On Fri, Aug 26, 2011 at 12:10 PM, Tommi Virtanen >> <tommi.virtanen@dreamhost.com> wrote: >>> e.g. 8kB at a time. And at that point you might as well just use read >>> and not read_iterate, that'll do the memsetting etc for you, and then >>> you can use a static buffer and avoid malloc/free every round. There's >>> no shortcut to be had from "skipping" holes when you need to feed >>> bytes to a hash function. >> >> Well, when using read_iterate you avoid reading extra data over the >> network when the object (chunk) exists (and is sparse). We can >> probably have some optimization here, and only allocate and memset a >> buffer once for the case where len == objsize and reuse it later. > > Reading src/librbd.cc, I don't see the holes going over the wire in > either case. read() is just a simple wrapper on top of read_iterate(), > that memsets to 0 in case of a hole. Which in this case he was doing > manually, so why not just use read() in the first place. > Yeah, you're right.. the librbd::read() already does everything. -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
diff --git a/src/rbd.cc b/src/rbd.cc index a18c029..9a40941 100644 --- a/src/rbd.cc +++ b/src/rbd.cc @@ -38,6 +38,8 @@ #include <time.h> #include <sys/ioctl.h> +#include "common/ceph_crypto.h" + #include "include/rbd_types.h" #include <linux/fs.h> @@ -49,6 +51,30 @@ static string dir_oid = RBD_DIRECTORY; static string dir_info_oid = RBD_INFO; +static size_t lastofs; + +enum { + OPT_NO_CMD = 0, + OPT_LIST, + OPT_INFO, + OPT_CREATE, + OPT_RESIZE, + OPT_RM, + OPT_EXPORT, + OPT_IMPORT, + OPT_COPY, + OPT_RENAME, + OPT_SNAP_CREATE, + OPT_SNAP_ROLLBACK, + OPT_SNAP_REMOVE, + OPT_SNAP_LIST, + OPT_WATCH, + OPT_MAP, + OPT_UNMAP, + OPT_SHOWMAPPED, + OPT_MD5, + OPT_SHA1, +}; void usage() { @@ -65,6 +91,8 @@ void usage() << " export [image-name] [dest-path] export image to file\n" << " import [path] [dst-image] import image from file (dest defaults\n" << " as the filename part of file)\n" + << " md5 [image-name] print md5 checksum for image\n" + << " sha1 [image-name] print sha1 checksum for image\n" << " <cp | copy> [src-image] [dest-image] copy image to dest\n" << " <mv | rename> [src-image] [dest-image] copy image to dest\n" << " snap ls [image-name] dump list of image snapshots\n" @@ -262,6 +290,78 @@ static int do_export(librbd::Image& image, const char *path) return 0; } +static int hash_read_cb(uint64_t ofs, size_t len, const char *buf, void *arg) +{ + ceph::crypto::Digest *Hash = (ceph::crypto::Digest *)arg; + byte *hashbuf = (byte *) buf; + byte *tempbuf = NULL; + + if (!buf) { + len = ofs-lastofs; + tempbuf = (byte *) malloc(len); + if (!tempbuf) + return -ENOMEM; + hashbuf = tempbuf; + } + Hash->Update((const byte *) hashbuf, len); + + lastofs = ofs + len; + + free(tempbuf); + + return 0; +} + +static int do_hash(librbd::Image& image, const char *imgname, int opt_cmd) +{ + int64_t r, i, digest_size; + byte *digest; + char hexval[] = "0123456789abcdef"; + char *hexdigest; + librbd::image_info_t info; + ceph::crypto::Digest *Hash; + + lastofs = 0; + + if (opt_cmd == OPT_MD5) { + Hash = (ceph::crypto::Digest *) new ceph::crypto::MD5; + } else if (opt_cmd == OPT_SHA1) { + Hash = (ceph::crypto::Digest *) new ceph::crypto::SHA1; + } else { + return -1; + } + + r = image.stat(info, sizeof(info)); + if (r < 0) + return r; + + r = image.read_iterate(0, info.size, hash_read_cb, (void *)Hash); + if (r < 0) + return r; + + if (lastofs < info.size) { + hash_read_cb(info.size, 0, NULL, (void *)Hash); + } + + digest_size = Hash->DigestSize(); + digest = (byte *) malloc(digest_size); + hexdigest = (char *) malloc((digest_size * 2 + 1) * sizeof(char)); + Hash->Final(digest); + + for(i = 0; i < digest_size; i++){ + hexdigest[i*2] = hexval[((digest[i] >> 4) & 0xF)]; + hexdigest[(i*2) + 1] = hexval[(digest[i]) & 0x0F]; + } + hexdigest[(digest_size*2)] = '\0'; + + cout << hexdigest << " " << imgname << std::endl; + + free(hexdigest); + free(digest); + + return 0; +} + static const char *imgname_from_path(const char *path) { const char *imgname; @@ -720,27 +820,6 @@ static int do_kernel_rm(const char *dev) return r; } -enum { - OPT_NO_CMD = 0, - OPT_LIST, - OPT_INFO, - OPT_CREATE, - OPT_RESIZE, - OPT_RM, - OPT_EXPORT, - OPT_IMPORT, - OPT_COPY, - OPT_RENAME, - OPT_SNAP_CREATE, - OPT_SNAP_ROLLBACK, - OPT_SNAP_REMOVE, - OPT_SNAP_LIST, - OPT_WATCH, - OPT_MAP, - OPT_UNMAP, - OPT_SHOWMAPPED, -}; - static int get_cmd(const char *cmd, bool *snapcmd) { if (strcmp(cmd, "snap") == 0) { @@ -766,6 +845,10 @@ static int get_cmd(const char *cmd, bool *snapcmd) return OPT_EXPORT; if (strcmp(cmd, "import") == 0) return OPT_IMPORT; + if (strcmp(cmd, "md5") == 0) + return OPT_MD5; + if (strcmp(cmd, "sha1") == 0) + return OPT_SHA1; if (strcmp(cmd, "copy") == 0 || strcmp(cmd, "cp") == 0) return OPT_COPY; @@ -888,6 +971,10 @@ int main(int argc, const char **argv) case OPT_IMPORT: set_conf_param(CEPH_ARGPARSE_VAL, &path, &destname); break; + case OPT_MD5: + case OPT_SHA1: + set_conf_param(CEPH_ARGPARSE_VAL, &imgname, NULL); + break; case OPT_COPY: case OPT_RENAME: set_conf_param(CEPH_ARGPARSE_VAL, &imgname, &destname); @@ -966,7 +1053,7 @@ int main(int argc, const char **argv) (opt_cmd == OPT_RESIZE || opt_cmd == OPT_INFO || opt_cmd == OPT_SNAP_LIST || opt_cmd == OPT_SNAP_CREATE || opt_cmd == OPT_SNAP_ROLLBACK || opt_cmd == OPT_SNAP_REMOVE || opt_cmd == OPT_EXPORT || opt_cmd == OPT_WATCH || - opt_cmd == OPT_COPY)) { + opt_cmd == OPT_COPY || opt_cmd == OPT_MD5 || opt_cmd == OPT_SHA1 )) { r = rbd.open(io_ctx, image, imgname); if (r < 0) { cerr << "error opening image " << imgname << ": " << strerror(r) << std::endl; @@ -1127,6 +1214,15 @@ int main(int argc, const char **argv) } break; + case OPT_MD5: + case OPT_SHA1: + r = do_hash(image, imgname, opt_cmd); + if (r < 0) { + cerr << "hashing failed: " << strerror(-r) << std::endl; + exit(1); + } + break; + case OPT_COPY: r = do_copy(image, dest_io_ctx, destname); if (r < 0) {