diff --git a/.gitignore b/.gitignore index 251a0d7f0f..da5bf8e6c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +# IDEs. +/.vscode +/.idea + # Bazel. /.user.bazelrc /bazel-* diff --git a/pkg/c3/defs.h b/pkg/c3/defs.h index 38b39263a9..bbf8d91ef7 100644 --- a/pkg/c3/defs.h +++ b/pkg/c3/defs.h @@ -149,6 +149,8 @@ mkdir(a, b);}) # define c3_rmdir(a) ({ \ rmdir(a);}) +# define c3_link(a, b) ({ \ + link(a, b);}) # define c3_unlink(a) ({ \ unlink(a);}) # define c3_fopen(a, b) ({ \ diff --git a/pkg/noun/events.c b/pkg/noun/events.c index dc1ba965d0..0294f522ab 100644 --- a/pkg/noun/events.c +++ b/pkg/noun/events.c @@ -398,23 +398,14 @@ _ce_ephemeral_open(c3_i* eph_i) /* _ce_image_open(): open or create image. */ static c3_o -_ce_image_open(u3e_image* img_u) +_ce_image_open(u3e_image* img_u, c3_c* ful_c) { c3_i mod_i = O_RDWR | O_CREAT; - c3_c ful_c[8193]; - - snprintf(ful_c, 8192, "%s", u3P.dir_c); - c3_mkdir(ful_c, 0700); - - snprintf(ful_c, 8192, "%s/.urb", u3P.dir_c); - c3_mkdir(ful_c, 0700); - - snprintf(ful_c, 8192, "%s/.urb/chk", u3P.dir_c); - c3_mkdir(ful_c, 0700); - snprintf(ful_c, 8192, "%s/.urb/chk/%s.bin", u3P.dir_c, img_u->nam_c); - if ( -1 == (img_u->fid_i = c3_open(ful_c, mod_i, 0666)) ) { - fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno)); + c3_c pax_c[8192]; + snprintf(pax_c, 8192, "%s/%s.bin", ful_c, img_u->nam_c); + if ( -1 == (img_u->fid_i = c3_open(pax_c, mod_i, 0666)) ) { + fprintf(stderr, "loom: c3_open %s: %s\r\n", pax_c, strerror(errno)); return c3n; } else if ( c3n == _ce_image_stat(img_u, &img_u->pgs_w) ) { @@ -1345,54 +1336,75 @@ _ce_image_copy(u3e_image* fom_u, u3e_image* tou_u) return c3y; } -/* u3e_backup(): copy snapshot to .urb/bhk (if it doesn't exist yet). +/* u3e_backup(): copy snapshot from [pux_c] to [pax_c], + * overwriting optionally. note that image files must + * be named "north" and "south". */ c3_o -u3e_backup(c3_o ovw_o) +u3e_backup(c3_c* pux_c, c3_c* pax_c, c3_o ovw_o) { - u3e_image nop_u = { .nam_c = "north", .pgs_w = 0 }; - u3e_image sop_u = { .nam_c = "south", .pgs_w = 0 }; - c3_i mod_i = O_RDWR | O_CREAT; // XX O_TRUNC ? - c3_c ful_c[8193]; + // source image files from [pux_c] + u3e_image nux_u = { .nam_c = "north", .pgs_w = 0 }; + u3e_image sux_u = { .nam_c = "south", .pgs_w = 0 }; + + // destination image files to [pax_c] + u3e_image nax_u = { .nam_c = "north", .pgs_w = 0 }; + u3e_image sax_u = { .nam_c = "south", .pgs_w = 0 }; + + c3_i mod_i = O_RDWR | O_CREAT; - snprintf(ful_c, 8192, "%s/.urb/bhk", u3P.dir_c); + if ( !pux_c || !pax_c ) { + fprintf(stderr, "loom: image backup: bad path\r\n"); + return c3n; + } - if ( (c3n == ovw_o) && c3_mkdir(ful_c, 0700) ) { + if ( (c3n == ovw_o) && c3_mkdir(pax_c, 0700) ) { if ( EEXIST != errno ) { fprintf(stderr, "loom: image backup: %s\r\n", strerror(errno)); } return c3n; } - snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, nop_u.nam_c); - - if ( -1 == (nop_u.fid_i = c3_open(ful_c, mod_i, 0666)) ) { - fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno)); + // open source image files if they exist + // + c3_c nux_c[8193]; + snprintf(nux_c, 8192, "%s/%s.bin", pux_c, nux_u.nam_c); + if ( (0 != access(nux_c, F_OK)) || (c3n == _ce_image_open(&nux_u, pux_c)) ) { + fprintf(stderr, "loom: couldn't open north image at %s\r\n", pux_c); + return c3n; + } + c3_c sux_c[8193]; + snprintf(sux_c, 8192, "%s/%s.bin", pux_c, sux_u.nam_c); + if ( (0 != access(sux_c, F_OK)) || (c3n == _ce_image_open(&sux_u, pux_c)) ) { + fprintf(stderr, "loom: couldn't open south image at %s\r\n", pux_c); return c3n; } - snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, sop_u.nam_c); - - if ( -1 == (sop_u.fid_i = c3_open(ful_c, mod_i, 0666)) ) { - fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno)); + // open destination image files + c3_c nax_c[8193]; + snprintf(nax_c, 8192, "%s/%s.bin", pax_c, nax_u.nam_c); + if ( -1 == (nax_u.fid_i = c3_open(nax_c, mod_i, 0666)) ) { + fprintf(stderr, "loom: c3_open %s: %s\r\n", nax_c, strerror(errno)); + return c3n; + } + c3_c sax_c[8193]; + snprintf(sax_c, 8192, "%s/%s.bin", pax_c, sax_u.nam_c); + if ( -1 == (sax_u.fid_i = c3_open(sax_c, mod_i, 0666)) ) { + fprintf(stderr, "loom: c3_open %s: %s\r\n", sax_c, strerror(errno)); return c3n; } - if ( (c3n == _ce_image_copy(&u3P.nor_u, &nop_u)) - || (c3n == _ce_image_copy(&u3P.sou_u, &sop_u)) ) + if ( (c3n == _ce_image_copy(&nux_u, &nax_u)) + || (c3n == _ce_image_copy(&sux_u, &sax_u)) ) { - - c3_unlink(ful_c); - snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, nop_u.nam_c); - c3_unlink(ful_c); - snprintf(ful_c, 8192, "%s/.urb/bhk", u3P.dir_c); - c3_rmdir(ful_c); + c3_unlink(nax_c); + c3_unlink(sax_c); fprintf(stderr, "loom: image backup failed\r\n"); return c3n; } - close(nop_u.fid_i); - close(sop_u.fid_i); + close(nax_u.fid_i); + close(sax_u.fid_i); fprintf(stderr, "loom: image backup complete\r\n"); return c3y; } @@ -1502,8 +1514,6 @@ u3e_save(u3_post low_p, u3_post hig_p) } u3e_toss(low_p, hig_p); - - u3e_backup(c3n); } /* _ce_toss_pages(): discard ephemeral pages. @@ -1573,8 +1583,10 @@ u3e_live(c3_o nuu_o, c3_c* dir_c) // Open image files. // - if ( (c3n == _ce_image_open(&u3P.nor_u)) || - (c3n == _ce_image_open(&u3P.sou_u)) ) + c3_c chk_c[8193]; + snprintf(chk_c, 8193, "%s/.urb/chk", u3P.dir_c); + if ( (c3n == _ce_image_open(&u3P.nor_u, chk_c)) || + (c3n == _ce_image_open(&u3P.sou_u, chk_c)) ) { fprintf(stderr, "boot: image failed\r\n"); exit(1); diff --git a/pkg/noun/events.h b/pkg/noun/events.h index b17dcba65d..7442c9df94 100644 --- a/pkg/noun/events.h +++ b/pkg/noun/events.h @@ -74,10 +74,11 @@ /** Functions. **/ - /* u3e_backup(): copy the snapshot from chk to bhk. + /* u3e_backup(): copy the snapshot from [pux_c] to [pax_c], + * overwriting optional. */ - c3_o - u3e_backup(c3_o ovw_o); + c3_o + u3e_backup(c3_c* pux_c, c3_c* pax_c, c3_o ovw_o); /* u3e_fault(): handle a memory fault. */ diff --git a/pkg/noun/manage.c b/pkg/noun/manage.c index 5ff7d7b129..9b6d47fcab 100644 --- a/pkg/noun/manage.c +++ b/pkg/noun/manage.c @@ -1762,14 +1762,6 @@ _cm_limits(void) # endif } -/* u3m_backup(): copy snapshot to .urb/bhk (if it doesn't exist yet). -*/ -c3_o -u3m_backup(c3_o ovw_o) -{ - return u3e_backup(ovw_o); -} - /* u3m_fault(): handle a memory event with libsigsegv protocol. */ c3_i @@ -2090,6 +2082,42 @@ u3m_stop() u3je_secp_stop(); } +/* u3m_pier(): make a pier. +*/ +c3_c* +u3m_pier(c3_c* dir_c) +{ + c3_c ful_c[8193]; + + u3C.dir_c = dir_c; + + snprintf(ful_c, 8192, "%s", dir_c); + if ( c3_mkdir(ful_c, 0700) ) { + if ( EEXIST != errno ) { + fprintf(stderr, "loom: pier create: %s\r\n", strerror(errno)); + exit(1); + } + } + + snprintf(ful_c, 8192, "%s/.urb", dir_c); + if ( c3_mkdir(ful_c, 0700) ) { + if ( EEXIST != errno ) { + fprintf(stderr, "loom: .urb create: %s\r\n", strerror(errno)); + exit(1); + } + } + + snprintf(ful_c, 8192, "%s/.urb/chk", dir_c); + if ( c3_mkdir(ful_c, 0700) ) { + if ( EEXIST != errno ) { + fprintf(stderr, "loom: .urb/chk create: %s\r\n", strerror(errno)); + exit(1); + } + } + + return strdup(dir_c); +} + /* u3m_boot(): start the u3 system. return next event, starting from 1. */ c3_d @@ -2105,7 +2133,7 @@ u3m_boot(c3_c* dir_c, size_t len_i) /* Activate the storage system. */ - nuu_o = u3e_live(c3n, dir_c); + nuu_o = u3e_live(c3n, u3m_pier(dir_c)); /* Activate tracing. */ diff --git a/pkg/noun/manage.h b/pkg/noun/manage.h index 297585c486..56d4ca7901 100644 --- a/pkg/noun/manage.h +++ b/pkg/noun/manage.h @@ -14,6 +14,11 @@ c3_d u3m_boot(c3_c* dir_c, size_t len_i); + /* u3m_pier(): make a pier. + */ + c3_c* + u3m_pier(c3_c* dir_c); + /* u3m_boot_lite(): start without checkpointing. */ c3_d diff --git a/pkg/noun/version.h b/pkg/noun/version.h index f64d0a398e..125abd684c 100644 --- a/pkg/noun/version.h +++ b/pkg/noun/version.h @@ -18,4 +18,10 @@ typedef c3_w u3e_version; #define U3E_VER1 1 #define U3E_VERLAT U3E_VER1 +/* DISK FORMAT + */ + +#define U3D_VER1 1 +#define U3D_VERLAT U3L_VER1 + #endif /* ifndef U3_VERSION_H */ diff --git a/pkg/vere/disk.c b/pkg/vere/disk.c index 2cc270a368..f24c11e11b 100644 --- a/pkg/vere/disk.c +++ b/pkg/vere/disk.c @@ -1,8 +1,11 @@ /// @file #include "noun.h" +#include "events.h" #include "vere.h" +#include "version.h" #include "db/lmdb.h" +#include struct _cd_read { uv_timer_t tim_u; @@ -28,6 +31,15 @@ struct _u3_disk_walk { c3_o liv_o; }; +// for u3_lmdb_init() calls +static const size_t siz_i = +#if (defined(U3_CPU_aarch64) && defined(U3_OS_linux)) + // 500 GiB is as large as musl on aarch64 wants to allow + 0x7d00000000; +#else + 0x10000000000; +#endif + #undef VERBOSE_DISK #undef DISK_TRACE_JAM #undef DISK_TRACE_CUE @@ -963,7 +975,7 @@ u3_disk_slog(u3_disk* log_u) /* u3_disk_init(): load or create pier directories and event log. */ u3_disk* -u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u) +u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u, c3_o mig_o) { u3_disk* log_u = c3_calloc(sizeof(*log_u)); log_u->liv_o = c3n; @@ -1019,62 +1031,83 @@ u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u) c3_free(dir_c); } - // create/load $pier/.urb/log, initialize db + // create/load $pier/.urb/log // { - c3_c* log_c = c3_malloc(10 + strlen(pax_c)); - - strcpy(log_c, pax_c); - strcat(log_c, "/.urb/log"); + c3_c log_c[8193]; + snprintf(log_c, sizeof(log_c), "%s/.urb/log", pax_c); if ( 0 == (log_u->com_u = u3_foil_folder(log_c)) ) { fprintf(stderr, "disk: failed to load /.urb/log in %s\r\n", pax_c); - c3_free(log_c); c3_free(log_u); return 0; } - // Arbitrarily choosing 1TB as a "large enough" mapsize - // - // per the LMDB docs: - // "[..] on 64-bit there is no penalty for making this huge (say 1TB)." - // - { - const size_t siz_i = - // 500 GiB is as large as musl on aarch64 wants to allow - #if (defined(U3_CPU_aarch64) && defined(U3_OS_linux)) - 0x7d00000000; - #else - 0x10000000000; - #endif - - if ( 0 == (log_u->mdb_u = u3_lmdb_init(log_c, siz_i)) ) { - fprintf(stderr, "disk: failed to initialize database\r\n"); - c3_free(log_c); + if ( c3y == u3_disk_need_migrate(log_u) ) { + if ( (c3y == mig_o) && (c3n == u3_disk_migrate(log_u)) ) { + fprintf(stderr, "disk: failed to migrate log\r\n"); c3_free(log_u); return 0; } + else { + fprintf(stderr, "disk: loading old format\r\n"); + + if ( 0 == (log_u->mdb_u = u3_lmdb_init(log_c, siz_i)) ) { + fprintf(stderr, "disk: failed to initialize lmdb\r\n"); + c3_free(log_u); + } + + c3_d fir_d; + if ( c3n == u3_lmdb_gulf(log_u->mdb_u, &fir_d, &log_u->dun_d) ) { + fprintf(stderr, "disk: failed to load latest event from lmdb\r\n"); + c3_free(log_u); + return 0; + } + + log_u->sen_d = log_u->dun_d; + + return log_u; + } } - c3_free(log_c); - } + // get latest epoch number + c3_d lat_d; + if ( c3n == u3_disk_epoc_last(log_u, &lat_d) ) { + fprintf(stderr, "disk: failed to load epoch number\r\n"); + c3_free(log_u); + return 0; + } - // get the latest event number from the db - // - { - log_u->dun_d = 0; - c3_d fir_d; + // set path to latest epoch + c3_c epo_c[8193]; + snprintf(epo_c, 8192, "%s/0i%" PRIc3_d, log_c, lat_d); - if ( c3n == u3_lmdb_gulf(log_u->mdb_u, &fir_d, &log_u->dun_d) ) { - fprintf(stderr, "disk: failed to load latest event from database\r\n"); + // initialize latest epoch's db + if ( 0 == (log_u->mdb_u = u3_lmdb_init(epo_c, siz_i)) ) { + fprintf(stderr, "disk: failed to initialize database\r\n"); c3_free(log_u); return 0; } + fprintf(stderr, "disk: loaded epoch 0i%" PRIc3_d "\r\n", lat_d); + + // get first/last event numbers from lmdb + c3_d fir_d, las_d; + if ( c3n == u3_lmdb_gulf(log_u->mdb_u, &fir_d, &las_d) ) { + fprintf(stderr, "disk: failed to get first/last event numbers\r\n"); + return 0; + } + // initialize dun_d/sen_d values + log_u->dun_d = ( 0 != las_d ) ? las_d : lat_d; log_u->sen_d = log_u->dun_d; + + // mark the latest epoch directory + log_u->epo_d = lat_d; + + // mark the log as live + log_u->liv_o = c3y; } - log_u->liv_o = c3y; #if defined(DISK_TRACE_JAM) || defined(DISK_TRACE_CUE) u3t_trace_open(pax_c); @@ -1082,3 +1115,365 @@ u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u) return log_u; } + +/* u3_disk_epoc_init: create new epoch. +*/ +c3_o +u3_disk_epoc_init(u3_disk* log_u, c3_d epo_d) +{ + // check if any epoch directories exist + c3_d lat_d; + c3_o eps_o = u3_disk_epoc_last(log_u, &lat_d); + + // create new epoch directory if it doesn't exist + c3_c epo_c[8193]; + snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); + c3_d ret_d = c3_mkdir(epo_c, 0700); + if ( ( ret_d < 0 ) && ( errno != EEXIST ) ) { + fprintf(stderr, "disk: failed to create epoch directory %" PRIc3_d "\r\n", epo_d); + return c3n; + } + + // create epoch version file, overwriting any existing file + c3_c epv_c[8193]; + snprintf(epv_c, sizeof(epv_c), "%s/epoc.txt", epo_c); + FILE* epv_f = fopen(epv_c, "w"); + fprintf(epv_f, "%d\n", U3D_VER1); + fclose(epv_f); + + // create binary version file, overwriting any existing file + c3_c biv_c[8193]; + snprintf(biv_c, sizeof(biv_c), "%s/vere.txt", epo_c); + FILE* biv_f = fopen(biv_c, "w"); + fprintf(biv_f, URBIT_VERSION); + fclose(biv_f); + + // copy snapshot files (skip if first epoch) + if ( epo_d > 0 ) { + c3_c chk_c[8193]; + snprintf(chk_c, 8192, "%s/.urb/chk", u3_Host.dir_c); + if ( c3n == u3e_backup(chk_c, epo_c, c3y) ) { + fprintf(stderr, "disk: failed to copy snapshot to new epoch\r\n"); + goto fail; + } + } + + // get metadata from old epoch or unmigrated event log's db + c3_d who_d[2]; + c3_o fak_o; + c3_w lif_w; + if ( c3y == eps_o ) { // skip if no epochs yet + if ( c3y != u3_disk_read_meta(log_u->mdb_u, who_d, &fak_o, &lif_w) ) { + fprintf(stderr, "disk: failed to read metadata\r\n"); + goto fail; + } + } + + // initialize db of new epoch + if ( c3y == u3_Host.ops_u.nuu || epo_d > 0 ) { + c3_c dat_c[8193]; + snprintf(dat_c, sizeof(dat_c), "%s/data.mdb", epo_c); + if ( 0 == (log_u->mdb_u = u3_lmdb_init(epo_c, siz_i)) ) { + fprintf(stderr, "disk: failed to initialize database\r\n"); + c3_free(log_u); + goto fail; + } + } + + // write the metadata to the database + if ( c3y == eps_o ) { + if ( c3n == u3_disk_save_meta(log_u->mdb_u, who_d, fak_o, lif_w) ) { + fprintf(stderr, "disk: failed to save metadata\r\n"); + goto fail; + } + } + + // load new epoch directory and set it in log_u + log_u->epo_d = epo_d; + + // success + return c3y; + +fail: + c3_unlink(epv_c); + c3_unlink(biv_c); + c3_rmdir(epo_c); + return c3n; +} + +/* u3_disk_epoc_kill: delete an epoch. +*/ +c3_o +u3_disk_epoc_kill(u3_disk* log_u, c3_d epo_d) +{ + // get epoch directory + c3_c epo_c[8193]; + snprintf(epo_c, sizeof(epo_c), "%s/0i%" PRIc3_d, log_u->com_u->pax_c, epo_d); + + // delete files in epoch directory + u3_dire* dir_u = u3_foil_folder(epo_c); + u3_dent* den_u = dir_u->all_u; + while ( den_u ) { + c3_c fil_c[8193]; + snprintf(fil_c, sizeof(fil_c), "%s/%s", epo_c, den_u->nam_c); + if ( 0 != c3_unlink(fil_c) ) { + fprintf(stderr, "disk: failed to delete file in epoch directory\r\n"); + return c3n; + } + den_u = den_u->nex_u; + } + + // delete epoch directory + if ( 0 != c3_rmdir(epo_c) ) { + fprintf(stderr, "disk: failed to delete epoch directory\r\n"); + return c3n; + } + + // cleanup + u3_dire_free(dir_u); + + // success + return c3y; +} + +/* u3_disk_epoc_last: get latest epoch number. +*/ +c3_o +u3_disk_epoc_last(u3_disk* log_u, c3_d* lat_d) +{ + c3_o ret_o = c3n; // return c3n if no epoch directories exist + *lat_d = 0; // initialize lat_d to 0 + u3_dire* die_u = u3_foil_folder(log_u->com_u->pax_c); + u3_dent* den_u = die_u->dil_u; + while ( den_u ) { + c3_d epo_d = 0; + if ( 1 == sscanf(den_u->nam_c, "0i%" PRIc3_d, &epo_d) ) { + ret_o = c3y; // NB: returns yes if the directory merely exists + *lat_d = c3_max(epo_d, *lat_d); // update the latest epoch number + } + den_u = den_u->nex_u; + } + + u3_dire_free(die_u); + + return ret_o; +} + +/* u3_disk_epoc_vere: get binary version from epoch. +*/ +c3_o +u3_disk_epoc_vere(u3_disk* log_u, c3_d epo_d, c3_c* ver_w) +{ + struct stat buf_u; + c3_c* ver_c; + c3_w red_w, len_w; + c3_i ret_i, fid_i; + ret_i = asprintf(&ver_c, "%s/0i%" PRIc3_d "/vere.txt", + log_u->com_u->pax_c, epo_d); + u3_assert( ret_i > 0 ); + + fid_i = c3_open(ver_c, O_RDONLY, 0644); + + if ( (fid_i < 0) || (fstat(fid_i, &buf_u) < 0) ) { + fprintf(stderr, "disk: failed to open vere.txt in epoch 0i%" PRIc3_d + "\r\n", epo_d); + return c3n; + } + + len_w = buf_u.st_size; + red_w = read(fid_i, ver_w, len_w); + close(fid_i); + + if ( len_w != red_w ) { + fprintf(stderr, "disk: failed to read vere.txt in epoch 0i%" PRIc3_d + "\r\n", epo_d); + return c3n; + } + + // trim trailing whitespace + ver_w[len_w] = 0; + while ( len_w-- && isspace(ver_w[len_w]) ) { + ver_w[len_w] = 0; + } + + return c3y; +} + +/* u3_disk_need_migrate: does the desk need to be migrated? +*/ +c3_o +u3_disk_need_migrate(u3_disk* log_u) +{ + // check if data.mdb is readable in log directory + c3_c dut_c[8193]; + snprintf(dut_c, sizeof(dut_c), "%s/data.mdb", log_u->com_u->pax_c); + if ( !_(u3_Host.ops_u.nuu) + && 0 != access(dut_c, F_OK) ) { + // if .urb/log/data.mdb does not exist, skip migration + return c3n; + } + + return c3y; +} + +/* u3_disk_migrate: migrates disk format. + */ +c3_o +u3_disk_migrate(u3_disk* log_u) +{ + /* migration steps: + * 0. detect whether we need to migrate or not + * a. if it's a fresh boot via u3_Host.ops_u.nuu -> skip migration + * b. if data.mdb is readable in log directory -> execute migration + * if not -> skip migration (returns yes) + * 1. initialize epoch 0i0 (first call to u3_disk_epoc_init()) + * a. creates epoch directory + * b. creates epoch version file + * c. creates binary version file + * d. initializes database + * e. reads metadata from old database + * f. writes metadata to new database + * g. loads new epoch directory and sets it in log_u + * 2. create hard links to data.mdb and lock.mdb in 0i0/ + * 3. rollover to new epoch (second call to u3_disk_epoc_init()) + * a. same as 1a-g but also copies current snapshot between c/d steps + * 4. delete backup snapshot (c3_unlink() and c3_rmdir() calls) + * 5. delete old data.mdb and lock.mdb files (c3_unlink() calls) + */ + + fprintf(stderr, "disk: migrating disk to v%d format\r\n", U3D_VER1); + + // check if lock.mdb is readable in log directory + c3_o luk_o = c3n; + c3_c luk_c[8193]; + snprintf(luk_c, sizeof(luk_c), "%s/lock.mdb", log_u->com_u->pax_c); + if ( 0 == access(luk_c, R_OK) ) { + luk_o = c3y; + } + + // if fresh boot, initialize disk v1 + if ( c3y == u3_Host.ops_u.nuu ) { + // initialize first epoch "0i0" + if ( c3n == u3_disk_epoc_init(log_u, 0) ) { + fprintf(stderr, "disk: failed to initialize first epoch\r\n"); + return c3n; + } + + return c3y; + } + + // migrate existing pier which has either: + // - not started the migration, or + // - crashed before completing the migration + + // initialize pre-migrated lmdb + { + if ( 0 == (log_u->mdb_u = u3_lmdb_init(log_u->com_u->pax_c, siz_i)) ) { + fprintf(stderr, "disk: failed to initialize database\r\n"); + return c3n; + } + } + + // get first/last event numbers from pre-migrated lmdb + c3_d fir_d, las_d; + if ( c3n == u3_lmdb_gulf(log_u->mdb_u, &fir_d, &las_d) ) { + fprintf(stderr, "disk: failed to get first/last event numbers\r\n"); + return c3n; + } + + // ensure there's a current snapshot + if ( u3_Host.eve_d != las_d ) { + fprintf(stderr, "disk: snapshot is out of date, please " + "start/shutdown your pier gracefully first\r\n"); + fprintf(stderr, "disk: eve_d (%" PRIc3_d ") != las_d (%" PRIc3_d ")\r\n", + u3_Host.eve_d, las_d); + return c3n; + } + + // initialize first epoch "0i0" + if ( c3n == u3_disk_epoc_init(log_u, 0) ) { + fprintf(stderr, "disk: failed to initialize first epoch\r\n"); + return c3n; + } + + // create hard links to data.mdb and lock.mdb in 0i0/ + c3_c epo_c[8193], dut_c[8193], dat_c[8193], lok_c[8193]; + snprintf(epo_c, sizeof(epo_c), "%s/0i0", log_u->com_u->pax_c); + snprintf(dut_c, sizeof(dut_c), "%s/data.mdb", log_u->com_u->pax_c); + snprintf(dat_c, sizeof(dat_c), "%s/data.mdb", epo_c); + snprintf(lok_c, sizeof(lok_c), "%s/lock.mdb", epo_c); + + if ( 0 < c3_link(dut_c, dat_c) ) { + fprintf(stderr, "disk: failed to create data.mdb hard link\r\n"); + return c3n; + } + if ( c3y == luk_o ) { // only link lock.mdb if it exists + if ( 0 < c3_link(luk_c, lok_c) ) { + fprintf(stderr, "disk: failed to create lock.mdb hard link\r\n"); + return c3n; + } + } + + // rollover to new epoch + if ( c3n == u3_disk_epoc_init(log_u, las_d) ) { + fprintf(stderr, "disk: failed to initialize new epoch\r\n"); + return c3n; + } + + // delete backup snapshot + c3_c bhk_c[8193], nop_c[8193], sop_c[8193]; + snprintf(bhk_c, sizeof(bhk_c), "%s/.urb/bhk", u3_Host.dir_c); + snprintf(nop_c, sizeof(nop_c), "%s/north.bin", bhk_c); + snprintf(sop_c, sizeof(sop_c), "%s/south.bin", bhk_c); + if ( c3n == c3_unlink(nop_c) ) { + fprintf(stderr, "disk: failed to delete bhk/north.bin\r\n"); + } + else if ( c3n == c3_unlink(sop_c) ) { + fprintf(stderr, "disk: failed to delete bhk/south.bin\r\n"); + } + else { + if ( c3n == c3_rmdir(bhk_c) ) { + fprintf(stderr, "disk: failed to delete bhk/\r\n"); + } + } + + // delete old lock.mdb and data.mdb files + if ( 0 != c3_unlink(luk_c) ) { + fprintf(stderr, "disk: failed to unlink lock.mdb\r\n"); + } + if ( 0 != c3_unlink(dut_c) ) { + fprintf(stderr, "disk: failed to unlink data.mdb\r\n"); + return c3n; // migration succeeds only if we can unlink data.mdb + } + + // success + fprintf(stderr, "disk: migrated disk to v%d format\r\n", U3D_VER1); + + return c3y; +} + + +/* u3_disk_vere_diff(): checks if vere version mismatches latest epoch's. +*/ +c3_o +u3_disk_vere_diff(u3_disk* log_u) +{ + c3_d lat_d; + if ( c3n == u3_disk_epoc_last(log_u, &lat_d) ) { + fprintf(stderr, "disk: failed to load last epoch\r\n"); + c3_free(log_u); + return 0; + } + + c3_c ver_c[8193]; + if ( c3n == u3_disk_epoc_vere(log_u, lat_d, ver_c) ) { + fprintf(stderr, "disk: failed to load epoch version\r\n"); + c3_free(log_u); + return 0; + } + if ( 0 != strcmp(ver_c, URBIT_VERSION) ) { + return c3y; + } + + return c3n; +} \ No newline at end of file diff --git a/pkg/vere/foil.c b/pkg/vere/foil.c index 26ff8b3d04..9fae86973f 100644 --- a/pkg/vere/foil.c +++ b/pkg/vere/foil.c @@ -106,13 +106,17 @@ u3_foil_folder(const c3_c* pax_c) dir_u = u3_dire_init(pax_c); } - /* create entries for all files + /* create entries for all files and directories */ while ( UV_EOF != uv_fs_scandir_next(&ruq_u, &den_u) ) { if ( UV_DIRENT_FILE == den_u.type ) { u3_dent* det_u = u3_dent_init(den_u.name); det_u->nex_u = dir_u->all_u; dir_u->all_u = det_u; + } else if ( UV_DIRENT_DIR == den_u.type ) { + u3_dent* det_u = u3_dent_init(den_u.name); + det_u->nex_u = dir_u->dil_u; + dir_u->dil_u = det_u; } } diff --git a/pkg/vere/main.c b/pkg/vere/main.c index f83d656e98..cca3081a9b 100644 --- a/pkg/vere/main.c +++ b/pkg/vere/main.c @@ -342,7 +342,7 @@ _main_getopt(c3_i argc, c3_c** argv) return c3n; } else { u3_Host.ops_u.sap_w = arg_w * 60; - if ( 0 == u3_Host.ops_u.sap_w) + if ( 0 == u3_Host.ops_u.sap_w ) return c3n; } break; @@ -712,6 +712,7 @@ _cw_usage(c3_c* bin_c) " %s next %.*s request upgrade:\n", " %s queu %.*s cue state:\n", " %s chop %.*s truncate event log:\n", + " %s roll %.*s rollover to new epoch:\n", " %s vere ARGS download binary:\n", "\n run as a 'serf':\n", " %s serf " @@ -1193,7 +1194,7 @@ static u3_disk* _cw_disk_init(c3_c* dir_c) { u3_disk_cb cb_u = {0}; - u3_disk* log_u = u3_disk_init(dir_c, cb_u); + u3_disk* log_u = u3_disk_init(dir_c, cb_u, c3y); if ( !log_u ) { fprintf(stderr, "unable to open event log\n"); @@ -1560,11 +1561,11 @@ _cw_info(c3_i argc, c3_c* argv[]) exit(1); } - c3_d eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); fprintf(stderr, "\r\nurbit: %s at event %" PRIu64 "\r\n", - u3_Host.dir_c, eve_d); + u3_Host.dir_c, u3_Host.eve_d); u3_disk_slog(log_u); printf("\n"); @@ -1719,17 +1720,17 @@ _cw_cram(c3_i argc, c3_c* argv[]) exit(1); } - c3_d eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); // XX s/b try_aquire lock c3_o ret_o; fprintf(stderr, "urbit: cram: preparing\r\n"); - if ( c3n == (ret_o = u3u_cram(u3_Host.dir_c, eve_d)) ) { + if ( c3n == (ret_o = u3u_cram(u3_Host.dir_c, u3_Host.eve_d)) ) { fprintf(stderr, "urbit: cram: unable to jam state\r\n"); } else { - fprintf(stderr, "urbit: cram: rock saved at event %" PRIu64 "\r\n", eve_d); + fprintf(stderr, "urbit: cram: rock saved at event %" PRIu64 "\r\n", u3_Host.eve_d); } // save even on failure, as we just did all the work of deduplication @@ -1831,12 +1832,11 @@ _cw_queu(c3_i argc, c3_c* argv[]) exit(1); } else { + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); // XX s/b try_aquire lock fprintf(stderr, "urbit: queu: preparing\r\n"); - u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); - // XX can spuriously fail do to corrupt memory-image checkpoint, // need a u3m_half_boot equivalent // workaround is to delete/move the checkpoint in case of corruption @@ -1924,10 +1924,10 @@ _cw_meld(c3_i argc, c3_c* argv[]) exit(1); } - u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); // XX s/b try_aquire lock - u3C.wag_w |= u3o_hashless; - u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); // XX s/b try_aquire lock u3a_print_memory(stderr, "urbit: meld: gained", u3u_meld()); @@ -2086,9 +2086,9 @@ _cw_pack(c3_i argc, c3_c* argv[]) exit(1); } + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); // XX s/b try_aquire lock - u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); u3a_print_memory(stderr, "urbit: pack: gained", u3m_pack()); u3m_save(); @@ -2105,6 +2105,38 @@ _cw_play_slog(u3_noun hod) u3z(hod); } +/* _cw_play_snap(): prepare snapshot for full replay. +*/ +static void +_cw_play_snap(u3_disk* log_u) +{ + c3_c chk_c[8193], epo_c[8193]; + snprintf(chk_c, 8193, "%s/.urb/chk", u3_Host.dir_c); + snprintf(epo_c, 8192, "%s/0i%" PRIc3_d, log_u->com_u->pax_c, log_u->epo_d); + + if ( 0 == log_u->epo_d ) { + // if epoch 0 is the latest, delete the snapshot files in chk/ + c3_c nor_c[8193], sop_c[8193]; + snprintf(nor_c, 8193, "%s/.urb/chk/north.bin", u3_Host.dir_c); + snprintf(sop_c, 8193, "%s/.urb/chk/south.bin", u3_Host.dir_c); + if ( c3_unlink(nor_c) && (ENOENT != errno) ) { + fprintf(stderr, "mars: failed to unlink %s: %s\r\n", + nor_c, strerror(errno)); + exit(1); + } + if ( c3_unlink(sop_c) && (ENOENT != errno) ) { + fprintf(stderr, "mars: failed to unlink %s: %s\r\n", + sop_c, strerror(errno)); + exit(1); + } + } + else if ( 0 != u3e_backup(epo_c, chk_c, c3y) ) { + // copy the latest epoch's snapshot files into chk/ + fprintf(stderr, "mars: failed to copy snapshot\r\n"); + exit(1); + } +} + /* _cw_play_exit(): exit immediately. */ static void @@ -2116,6 +2148,66 @@ _cw_play_exit(c3_i int_i) raise(SIGINT); } +/* _cw_play_impl(): replay events, but better. +*/ +static void +_cw_play_impl(c3_d eve_d, c3_d sap_d, c3_o mel_o, c3_o sof_o, c3_o ful_o) +{ + // XX handle SIGTSTP so that the lockfile is not orphaned? + // + u3_disk* log_u; + if ( 0 == (log_u = u3_disk_init(u3_Host.dir_c, (u3_disk_cb){0}, c3n)) ) { + fprintf(stderr, "mars: failed to load event log\r\n"); + exit(1); + } + + // Handle SIGTSTP as if it was SIGINT. + // + // Configured here using signal() so as to be immediately available. + // + signal(SIGTSTP, _cw_play_exit); + + // XX source these from a shared struct ops_u + if ( c3y == mel_o ) { + u3C.wag_w |= u3o_auto_meld; + } + + if ( c3y == sof_o ) { + u3C.wag_w |= u3o_soft_mugs; + } + + u3C.wag_w |= u3o_hashless; + + if ( c3y == ful_o ) { + u3l_log("mars: preparing for full replay"); + _cw_play_snap(log_u); + } + + u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + + u3C.slog_f = _cw_play_slog; + + { + u3_mars mar_u = { + .log_u = log_u, + .dir_c = u3_Host.dir_c, + .sen_d = u3A->eve_d, + .dun_d = u3A->eve_d, + }; + + u3_mars_play(&mar_u, eve_d, sap_d); + + // migrate after replay, if necessary + u3_Host.eve_d = mar_u.dun_d; + if ( c3y == u3_disk_need_migrate(log_u) ) { + u3_disk_migrate(log_u); + } + } + + u3_disk_exit(log_u); + u3m_stop(); +} + /* _cw_play(): replay events, but better. */ static void @@ -2208,55 +2300,7 @@ _cw_play(c3_i argc, c3_c* argv[]) exit(1); } - // XX handle SIGTSTP so that the lockfile is not orphaned? - // - u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); // XX s/b try_aquire lock - - // Handle SIGTSTP as if it was SIGINT. - // - // Configured here using signal() so as to be immediately available. - // - signal(SIGTSTP, _cw_play_exit); - - if ( c3y == mel_o ) { - u3C.wag_w |= u3o_auto_meld; - } - - if ( c3y == sof_o ) { - u3C.wag_w |= u3o_soft_mugs; - } - - u3C.wag_w |= u3o_hashless; - - // XX this should restore the epoch snapshot and replay that - // - if ( c3y == ful_o ) { - u3l_log("mars: preparing for full replay"); - u3m_init((size_t)1 << u3_Host.ops_u.lom_y); - u3e_live(c3n, u3_Host.dir_c); - u3m_foul(); - u3m_pave(c3y); - u3j_boot(c3y); - } - else { - u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); - } - - u3C.slog_f = _cw_play_slog; - - { - u3_mars mar_u = { - .log_u = log_u, - .dir_c = u3_Host.dir_c, - .sen_d = u3A->eve_d, - .dun_d = u3A->eve_d, - }; - - u3_mars_play(&mar_u, eve_d, sap_d); - } - - u3_disk_exit(log_u); - u3m_stop(); + _cw_play_impl(eve_d, sap_d, mel_o, sof_o, ful_o); } /* _cw_prep(): prepare for upgrade @@ -2264,6 +2308,8 @@ _cw_play(c3_i argc, c3_c* argv[]) static void _cw_prep(c3_i argc, c3_c* argv[]) { + // XX roll with old binary + // check that new epoch is empty, migrate snapshot in-place c3_i ch_i, lid_i; c3_w arg_w; @@ -2404,123 +2450,171 @@ _cw_chop(c3_i argc, c3_c* argv[]) } // gracefully shutdown the pier if it's running - u3_disk* old_u = _cw_disk_init(u3_Host.dir_c); - - // note: this include patch applications (if any) - u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); - // check if there's a *current* snapshot - if ( old_u->dun_d != u3A->eve_d ) { - fprintf(stderr, "chop: error: snapshot is out of date, please " - "start/shutdown your pier gracefully first\r\n"); - fprintf(stderr, "chop: eve_d: %" PRIu64 ", dun_d: %" PRIu64 "\r\n", u3A->eve_d, old_u->dun_d); + // get latest epoch number prior to creating a new one + c3_d pre_d; + if ( c3n == u3_disk_epoc_last(log_u, &pre_d) ) { + fprintf(stderr, "chop: failed to find last epoch\r\n"); exit(1); } - if ( c3n == u3m_backup(c3y)) { // backup current snapshot - fprintf(stderr, "chop: error: failed to backup snapshot\r\n"); + // create new epoch + c3_d fir_d, las_d; + if ( c3n == u3_lmdb_gulf(log_u->mdb_u, &fir_d, &las_d) ) { + fprintf(stderr, "chop: failed to get first/last events\r\n"); exit(1); } - // initialize the lmdb environment - // see disk.c:885 - const size_t siz_i = - // 500 GiB is as large as musl on aarch64 wants to allow - #if (defined(U3_CPU_aarch64) && defined(U3_OS_linux)) - 0x7d00000000; - #else - 0x10000000000; - #endif - c3_c log_c[8193]; - snprintf(log_c, sizeof(log_c), "%s/.urb/log", u3_Host.dir_c); - - // get the first/last event numbers from the event log - c3_d fir_d, las_d; - if ( c3n == u3_lmdb_gulf(old_u->mdb_u, &fir_d, &las_d) ) { - fprintf(stderr, "chop: failed to load latest event from database\r\n"); + // create new epoch if latest isn't empty + if ( (fir_d != las_d) && (c3n == u3_disk_epoc_init(log_u, las_d)) ) { + fprintf(stderr, "chop: failed to create new epoch\r\n"); exit(1); } - // get the metadata - c3_d who_d[2]; - c3_o fak_o; - c3_w lif_w; - if ( c3y != u3_disk_read_meta(old_u->mdb_u, who_d, &fak_o, &lif_w) ) { - fprintf(stderr, "chop: failed to read metadata\r\n"); - exit(1); + // sort epoch directories in descending order + u3_dire* ned_u = u3_foil_folder(log_u->com_u->pax_c); + u3_dent* den_u = ned_u->dil_u; + c3_z len_z = 0; + while ( den_u ) { // count epochs + len_z++; + den_u = den_u->nex_u; + } + c3_d* sot_d = c3_malloc(len_z * sizeof(c3_d)); + len_z = 0; + den_u = ned_u->dil_u; + while ( den_u ) { + if ( 1 == sscanf(den_u->nam_c, "0i%" PRIc3_d, (sot_d + len_z)) ) { + len_z++; + } + den_u = den_u->nex_u; } - // get the last event - u3_lmdb_walk itr_u; - size_t len_i; - void* buf_v[1]; - if ( c3n == u3_lmdb_walk_init(old_u->mdb_u, &itr_u, las_d, las_d) ) { - fprintf(stderr, "chop: failed to initialize iterator\r\n"); - exit(1); + if ( len_z <= 2 ) { + fprintf(stderr, "chop: nothing to do, have a great day\r\n"); + exit(0); // enjoy + } + + // sort sot_d naively in descending order + c3_d tmp_d; + for ( c3_z i_z = 0; i_z < len_z; i_z++ ) { + for ( c3_z j_z = i_z + 1; j_z < len_z; j_z++ ) { + if ( sot_d[i_z] < sot_d[j_z] ) { + tmp_d = sot_d[i_z]; + sot_d[i_z] = sot_d[j_z]; + sot_d[j_z] = tmp_d; + } + } } - if ( c3n == u3_lmdb_walk_next(&itr_u, &len_i, buf_v) ) { - fprintf(stderr, "chop: failed to read event\r\n"); + + // get latest epoch number prior to creating a new one + c3_d pos_d; + if ( c3n == u3_disk_epoc_last(log_u, &pos_d) ) { + fprintf(stderr, "chop: failed to find last epoch\r\n"); exit(1); } - u3_lmdb_walk_done(&itr_u); - // initialize a fresh lmdb environment in the "chop" subdir - c3_c cho_c[8193]; - snprintf(cho_c, sizeof(cho_c), "%s/chop", log_c); - if ( 0 != access(cho_c, F_OK) ) { - if ( 0 != c3_mkdir(cho_c, 0700) ) { - fprintf(stderr, "chop: failed to create chop directory\r\n"); + // delete all but the last two epochs + // XX parameterize the number of epochs to chop + for ( c3_z i_z = 2; i_z < len_z; i_z++ ) { + fprintf(stderr, "chop: deleting epoch 0i%" PRIc3_d "\r\n", sot_d[i_z]); + if ( c3y != u3_disk_epoc_kill(log_u, sot_d[i_z]) ) { + fprintf(stderr, "chop: failed to delete epoch 0i%" PRIu64 "\r\n", sot_d[i_z]); exit(1); } } - MDB_env* new_u = u3_lmdb_init(cho_c, siz_i); - if ( !new_u ) { - fprintf(stderr, "chop: failed to initialize new database\r\n"); - exit(1); + + // cleanup + u3_dire_free(ned_u); + u3_disk_exit(log_u); + + // success + fprintf(stderr, "chop: event log truncation complete\r\n"); +} + +/* _cw_roll(): rollover to new epoch + */ +static void +_cw_roll(c3_i argc, c3_c* argv[]) +{ + c3_i ch_i, lid_i; + c3_w arg_w; + + static struct option lop_u[] = { + { "loom", required_argument, NULL, c3__loom }, + { NULL, 0, NULL, 0 } + }; + + u3_Host.dir_c = _main_pier_run(argv[0]); + + while ( -1 != (ch_i=getopt_long(argc, argv, "", lop_u, &lid_i)) ) { + switch ( ch_i ) { + case c3__loom: { + if (_main_readw_loom("loom", &u3_Host.ops_u.lom_y)) { + exit(1); + } + } break; + + case '?': { + fprintf(stderr, "invalid argument\r\n"); + exit(1); + } break; + } } - // write the metadata to the database - if ( c3n == u3_disk_save_meta(new_u, who_d, fak_o, lif_w) ) { - fprintf(stderr, "chop: failed to save metadata\r\n"); - exit(1); + // argv[optind] is always "roll" + // + + if ( !u3_Host.dir_c ) { + if ( optind + 1 < argc ) { + u3_Host.dir_c = argv[optind + 1]; + } + else { + fprintf(stderr, "invalid command, pier required\r\n"); + exit(1); + } + + optind++; } - // write the last event to the database - // warning: this relies on the old database still being open - if ( c3n == u3_lmdb_save(new_u, las_d, 1, buf_v, &len_i) ) { - fprintf(stderr, "chop: failed to write last event\r\n"); + if ( optind + 1 != argc ) { + fprintf(stderr, "invalid command\r\n"); exit(1); } - // backup the original database file - c3_c dat_c[8193], bak_c[8193]; - snprintf(dat_c, sizeof(dat_c), "%s/data.mdb", log_c); - // "data_-.mdb.bak" - snprintf(bak_c, sizeof(bak_c), "%s/data_%" PRIu64 "-%" PRIu64 ".mdb.bak", cho_c, fir_d, las_d); - if ( 0 != c3_rename(dat_c, bak_c) ) { - fprintf(stderr, "chop: failed to backup original database file\r\n"); + // gracefully shutdown the pier if it's running + u3_Host.eve_d = u3m_boot(u3_Host.dir_c, (size_t)1 << u3_Host.ops_u.lom_y); + u3_disk* log_u = _cw_disk_init(u3_Host.dir_c); + + // check if there's a *current* snapshot + if ( log_u->dun_d != u3A->eve_d ) { + fprintf(stderr, "roll: error: snapshot is out of date, please " + "start/shutdown your pier gracefully first\r\n"); + fprintf(stderr, "roll: eve_d: %" PRIc3_d ", dun_d: %" PRIc3_d "\r\n", \ + u3A->eve_d, log_u->dun_d); exit(1); } - // rename new database file to be official - c3_c new_c[8193]; - snprintf(new_c, sizeof(new_c), "%s/data.mdb", cho_c); - if ( 0 != c3_rename(new_c, dat_c) ) { - fprintf(stderr, "chop: failed to rename new database file\r\n"); + // create new epoch + c3_d fir_d, las_d; + if ( c3n == u3_lmdb_gulf(log_u->mdb_u, &fir_d, &las_d) ) { + fprintf(stderr, "roll: failed to get first/last events\r\n"); exit(1); } - // cleanup - u3_disk_exit(old_u); - u3_lmdb_exit(new_u); - u3m_stop(); + if ( fir_d == las_d ) { + fprintf(stderr, "roll: latest epoch already empty\r\n"); + exit(0); + } + else if ( c3n == u3_disk_epoc_init(log_u, las_d) ) { + fprintf(stderr, "roll: failed to create new epoch\r\n"); + exit(1); + } - // success - fprintf(stderr, "chop: event log truncation complete\r\n"); - fprintf(stderr, " event log backup written to %s\r\n", bak_c); - fprintf(stderr, " WARNING: ENSURE YOU CAN RESTART YOUR SHIP BEFORE DELETING YOUR EVENT LOG BACKUP FILE!\r\n"); - fprintf(stderr, " if you can't, restore your log by running:\r\n"); - fprintf(stderr, " `mv %s %s` then try again\r\n", bak_c, dat_c); + // success + c3_d epo_d = log_u->dun_d + 1; + fprintf(stderr, "roll: epoch rollover complete\r\n"); } /* _cw_vere(): download vere @@ -2806,6 +2900,7 @@ _cw_utils(c3_i argc, c3_c* argv[]) case c3__prep: _cw_prep(argc, argv); return 2; // continue on case c3__queu: _cw_queu(argc, argv); return 1; case c3__chop: _cw_chop(argc, argv); return 1; + case c3__roll: _cw_roll(argc, argv); return 1; case c3__vere: _cw_vere(argc, argv); return 1; case c3__vile: _cw_vile(argc, argv); return 1; @@ -2995,6 +3090,13 @@ main(c3_i argc, } } + // we need the current snapshot's latest event number to + // validate whether we can execute disk migration + if ( u3_Host.ops_u.nuu == c3n ) { + _cw_play_impl(0, 0, c3n, c3n, c3n); + // XX unmap loom, else parts of the snapshot could be left in memory + } + // starting u3m configures OpenSSL memory functions, so we must do it // before any OpenSSL allocations // diff --git a/pkg/vere/pier.c b/pkg/vere/pier.c index 1e39d14ef4..970f9b4eb8 100644 --- a/pkg/vere/pier.c +++ b/pkg/vere/pier.c @@ -783,7 +783,7 @@ _pier_on_lord_wyrd_bail(void* ptr_v, u3_ovum* egg_u, u3_noun lud) #endif } -/* _pier_wyrd_init(): construct %wyrd. +/* _pier_wyrd_card(): construct %wyrd. */ static u3_noun _pier_wyrd_card(u3_pier* pir_u) @@ -827,6 +827,14 @@ _pier_wyrd_card(u3_pier* pir_u) static void _pier_wyrd_init(u3_pier* pir_u) { + // create a new epoch if current version mismatches the latest epoch's + if ( c3y == u3_disk_vere_diff(pir_u->log_u) ) { + if ( c3n == u3_disk_epoc_init(pir_u->log_u, pir_u->log_u->dun_d) ) { + fprintf(stderr, "disk: failed to initialize epoch\r\n"); + exit(1); + } + } + u3_noun cad = _pier_wyrd_card(pir_u); u3_noun wir = u3nc(c3__arvo, u3_nul); @@ -1402,6 +1410,8 @@ _pier_on_lord_live(void* ptr_v) // XX print bootstrap commit complete // XX s/b boot_complete_cb // + // XX this codepath should never be hit due to sync replay + u3l_log("pier: warning: async replay"); _pier_play_init(pir_u, log_u->dun_d); } } @@ -1637,7 +1647,7 @@ _pier_init(c3_w wag_w, c3_c* pax_c) .write_bail_f = _pier_on_disk_write_bail }; - if ( !(pir_u->log_u = u3_disk_init(pax_c, cb_u)) ) { + if ( !(pir_u->log_u = u3_disk_init(pax_c, cb_u, c3y)) ) { c3_free(pir_u); return 0; } diff --git a/pkg/vere/vere.h b/pkg/vere/vere.h index cef16810b6..cc3d71f6ed 100644 --- a/pkg/vere/vere.h +++ b/pkg/vere/vere.h @@ -8,6 +8,7 @@ #include "noun.h" #include "serf.h" #include "uv.h" +#include /** Quasi-tunable parameters. **/ @@ -130,6 +131,7 @@ c3_c* pax_c; // path of directory uv_file fil_u; // file, opened read-only to fsync u3_dent* all_u; // file list + u3_dent* dil_u; // directory list } u3_dire; /* u3_save: checkpoint control. @@ -313,6 +315,7 @@ typedef struct _u3_host { c3_w kno_w; // current executing stage c3_c* dir_c; // pier path (no trailing /) + c3_d eve_d; // initial current snapshot c3_c* dem_c; // daemon executable path c3_c* wrk_c; // worker executable path c3_d now_d; // event tick @@ -538,9 +541,10 @@ u3_dire* urb_u; // urbit system data u3_dire* com_u; // log directory c3_o liv_o; // live - void* mdb_u; // lmdb environment. + void* mdb_u; // lmdb env of current epoch c3_d sen_d; // commit requested c3_d dun_d; // committed + c3_d epo_d; // current epoch number u3_disk_cb cb_u; // callbacks u3_read* red_u; // read requests union { // write thread/request @@ -932,7 +936,7 @@ /* u3_disk_init(): load or create pier directories and event log. */ u3_disk* - u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u); + u3_disk_init(c3_c* pax_c, u3_disk_cb cb_u, c3_o mig_o); /* u3_disk_etch(): serialize an event for persistence. RETAIN [eve] */ @@ -1002,6 +1006,40 @@ void u3_disk_plan(u3_disk* log_u, u3_fact* tac_u); + /* u3_disk_epoc_init(): create new epoch. + */ + c3_o + u3_disk_epoc_init(u3_disk* log_u, c3_d epo_d); + + /* u3_disk_epoc_kill(): delete an epoch. + */ + c3_o + u3_disk_epoc_kill(u3_disk* log_u, c3_d epo_d); + + /* u3_disk_epoc_last(): get latest epoch number. + */ + c3_o + u3_disk_epoc_last(u3_disk* log_u, c3_d* lat_d); + + /* u3_disk_epoc_vere(): get binary version from epoch. + */ + c3_o + u3_disk_epoc_vere(u3_disk* log_u, c3_d epo_d, c3_c* ver_w); + + /* u3_disk_vere_diff(): checks if vere version mismatches latest epoch's. + */ + c3_o + u3_disk_vere_diff(u3_disk* log_u); + + /* u3_disk_need_migrate(): does the disk need migration? + */ + c3_o + u3_disk_need_migrate(u3_disk* log_u); + + /* u3_disk_migrate(): migrates disk format. + */ + c3_o + u3_disk_migrate(u3_disk* log_u); /* u3_disk_read_list(): synchronously read a cons list of events. */ u3_weak diff --git a/pkg/vere/ward.c b/pkg/vere/ward.c index 4c90be7179..9d6c8cbed3 100644 --- a/pkg/vere/ward.c +++ b/pkg/vere/ward.c @@ -39,6 +39,7 @@ u3_dire_init(const c3_c* pax_c) { u3_dire *dir_u = c3_malloc(sizeof *dir_u); dir_u->all_u = 0; + dir_u->dil_u = 0; dir_u->pax_c = c3_malloc(1 + strlen(pax_c)); strcpy(dir_u->pax_c, pax_c); @@ -61,6 +62,17 @@ u3_dire_free(u3_dire *dir_u) } } + { + u3_dent *det_u = dir_u->dil_u; + u3_dent *nex_u; + + while ( det_u ) { + nex_u = det_u->nex_u; + u3_dent_free(det_u); + det_u = nex_u; + } + } + c3_free(dir_u->pax_c); c3_free(dir_u); }