Skip to content

Commit

Permalink
Epoch System (#459)
Browse files Browse the repository at this point in the history
This PR implements a new format for how piers store their event logs on
disk.

Resolves #313.

### Design

Existing format:
```
./zod/.urb/log
├── data.mdb
└── lock.mdb
```

New format:
```
./zod/.urb/log
├── 0i0             # epoch dirnames specify the last event of the previous epoch
│   ├── data.mdb    # lmdb file containing events 1-132
│   ├── epoc.txt    # disk format version (this PR starts versioning at 1)
│   ├── lock.mdb    # lmdb lock file
│   └── vere.txt    # binary version this set of events was originally run with
└── 0i132
    ├── data.mdb
    ├── epoc.txt
    ├── lock.mdb
    ├── north.bin   #
    ├── south.bin   # snapshot files (state as of event 132), strictly read-only
    └── vere.txt
```

The new format introduces *epochs*, which are simply "slices" or
"chunks" of a ship's complete event log. Above, you can see the ship's
event log chunked into two epochs: `0i0` and `0i132`.

New ships booted with the code in this PR instantiate their `log`
directories with the new format. Existing piers are automatically
migrated on boot.

Epoch "rollovers" (when the current epoch is ended and a new, empty
epoch is created) occur under three conditions:
1. The pilot uses the new `roll` subcommand to manually rollover.
2. The pilot runs the `chop` subcommand.
3. We detect a different running binary version than the one pinned in
the current epoch.

Both migrations and epoch rollovers ensure there's a current snapshot
before running.

A few TODOs left:
- [x] Iron out small kink in migration behavior for previously chopped
piers
- [x] Make sure correct binary version gets pinned to first epoch of
migrated piers
- [x] Rollover to new epoch when a new binary version is detected
- [x] Make sure manual migration logic is idempotent
- [x] ~~Update `prep` command~~
- [x] Fix `chop` so it works when there are 3 epochs starting with `0i0`
- [x] ~~Reproduce and fix partially-deleted epoch `0i0` after `chop`~~
- [x] Pair with someone to run manual GDB testing for migration
idempotency and rollover logic
- [x] Take a look at @joemfb's replay code and compare/find overlaps
- [x] Document final system design in this PR
- [x] Correct epoch naming scheme
- [x] Make `chop` leave the latest two epochs
- [x] Better error handling
- [x] Better cleanup
- [x] Test migration with real ships running on local-networking mode
- [x] Test epoch rollover idempotency
- [x] Test fresh boot
- [x] Handle case where snapshot has been deleted from `chk/`
- [x] Ensure `u3_disk_epoc_good()` is implemented and used how we want
- [x] Ensure `u3_disk_epoc_init()` is implemented and used how we want
- [x] Replay works with `urbit play` and `urbit`
- [x] Replay works in edge case where only epoch 0 and no valid snapshot
exist
- [x] Move new-epoch-on-vere-version-mismatch logic to
`_pier_wyrd_init()`
- [x] Make subcommands which call `u3_disk_init()` auto-migrate
  - [x] `info`
  - [x] `cram`
  - [x] `queu`
  - [x] `meld`
  - [x] `pack`
  - [x] `play`
  - [x] `chop`
  - [x] `roll`
- [x] Make replay on boot use `u3_mars_play()`
- [x] Test migration from an old pier (again)
- [x] Test migration from an old pier that needs a full replay (i.e.,
from beginning of its event log) first works
- [x] Test that `./urbit roll zod` with an updated binary version *and*
an empty latest epoch, it does not roll but instead just updates the
`vere.txt` file
  • Loading branch information
pkova authored Sep 18, 2023
2 parents 930cc58 + ea6f278 commit 7ba4bca
Show file tree
Hide file tree
Showing 13 changed files with 860 additions and 241 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# IDEs.
/.vscode
/.idea

# Bazel.
/.user.bazelrc
/bazel-*
Expand Down
2 changes: 2 additions & 0 deletions pkg/c3/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@
mkdir(a, b);})
# define c3_rmdir(a) ({ \
rmdir(a);})
# define c3_link(a, b) ({ \
link(a, b);})
# define c3_unlink(a) ({ \
unlink(a);})
# define c3_fopen(a, b) ({ \
Expand Down
100 changes: 56 additions & 44 deletions pkg/noun/events.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,23 +398,14 @@ _ce_ephemeral_open(c3_i* eph_i)
/* _ce_image_open(): open or create image.
*/
static c3_o
_ce_image_open(u3e_image* img_u)
_ce_image_open(u3e_image* img_u, c3_c* ful_c)
{
c3_i mod_i = O_RDWR | O_CREAT;
c3_c ful_c[8193];

snprintf(ful_c, 8192, "%s", u3P.dir_c);
c3_mkdir(ful_c, 0700);

snprintf(ful_c, 8192, "%s/.urb", u3P.dir_c);
c3_mkdir(ful_c, 0700);

snprintf(ful_c, 8192, "%s/.urb/chk", u3P.dir_c);
c3_mkdir(ful_c, 0700);

snprintf(ful_c, 8192, "%s/.urb/chk/%s.bin", u3P.dir_c, img_u->nam_c);
if ( -1 == (img_u->fid_i = c3_open(ful_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno));
c3_c pax_c[8192];
snprintf(pax_c, 8192, "%s/%s.bin", ful_c, img_u->nam_c);
if ( -1 == (img_u->fid_i = c3_open(pax_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", pax_c, strerror(errno));
return c3n;
}
else if ( c3n == _ce_image_stat(img_u, &img_u->pgs_w) ) {
Expand Down Expand Up @@ -1345,54 +1336,75 @@ _ce_image_copy(u3e_image* fom_u, u3e_image* tou_u)
return c3y;
}

/* u3e_backup(): copy snapshot to .urb/bhk (if it doesn't exist yet).
/* u3e_backup(): copy snapshot from [pux_c] to [pax_c],
* overwriting optionally. note that image files must
* be named "north" and "south".
*/
c3_o
u3e_backup(c3_o ovw_o)
u3e_backup(c3_c* pux_c, c3_c* pax_c, c3_o ovw_o)
{
u3e_image nop_u = { .nam_c = "north", .pgs_w = 0 };
u3e_image sop_u = { .nam_c = "south", .pgs_w = 0 };
c3_i mod_i = O_RDWR | O_CREAT; // XX O_TRUNC ?
c3_c ful_c[8193];
// source image files from [pux_c]
u3e_image nux_u = { .nam_c = "north", .pgs_w = 0 };
u3e_image sux_u = { .nam_c = "south", .pgs_w = 0 };

// destination image files to [pax_c]
u3e_image nax_u = { .nam_c = "north", .pgs_w = 0 };
u3e_image sax_u = { .nam_c = "south", .pgs_w = 0 };

c3_i mod_i = O_RDWR | O_CREAT;

snprintf(ful_c, 8192, "%s/.urb/bhk", u3P.dir_c);
if ( !pux_c || !pax_c ) {
fprintf(stderr, "loom: image backup: bad path\r\n");
return c3n;
}

if ( (c3n == ovw_o) && c3_mkdir(ful_c, 0700) ) {
if ( (c3n == ovw_o) && c3_mkdir(pax_c, 0700) ) {
if ( EEXIST != errno ) {
fprintf(stderr, "loom: image backup: %s\r\n", strerror(errno));
}
return c3n;
}

snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, nop_u.nam_c);

if ( -1 == (nop_u.fid_i = c3_open(ful_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno));
// open source image files if they exist
//
c3_c nux_c[8193];
snprintf(nux_c, 8192, "%s/%s.bin", pux_c, nux_u.nam_c);
if ( (0 != access(nux_c, F_OK)) || (c3n == _ce_image_open(&nux_u, pux_c)) ) {
fprintf(stderr, "loom: couldn't open north image at %s\r\n", pux_c);
return c3n;
}
c3_c sux_c[8193];
snprintf(sux_c, 8192, "%s/%s.bin", pux_c, sux_u.nam_c);
if ( (0 != access(sux_c, F_OK)) || (c3n == _ce_image_open(&sux_u, pux_c)) ) {
fprintf(stderr, "loom: couldn't open south image at %s\r\n", pux_c);
return c3n;
}

snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, sop_u.nam_c);

if ( -1 == (sop_u.fid_i = c3_open(ful_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", ful_c, strerror(errno));
// open destination image files
c3_c nax_c[8193];
snprintf(nax_c, 8192, "%s/%s.bin", pax_c, nax_u.nam_c);
if ( -1 == (nax_u.fid_i = c3_open(nax_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", nax_c, strerror(errno));
return c3n;
}
c3_c sax_c[8193];
snprintf(sax_c, 8192, "%s/%s.bin", pax_c, sax_u.nam_c);
if ( -1 == (sax_u.fid_i = c3_open(sax_c, mod_i, 0666)) ) {
fprintf(stderr, "loom: c3_open %s: %s\r\n", sax_c, strerror(errno));
return c3n;
}

if ( (c3n == _ce_image_copy(&u3P.nor_u, &nop_u))
|| (c3n == _ce_image_copy(&u3P.sou_u, &sop_u)) )
if ( (c3n == _ce_image_copy(&nux_u, &nax_u))
|| (c3n == _ce_image_copy(&sux_u, &sax_u)) )
{

c3_unlink(ful_c);
snprintf(ful_c, 8192, "%s/.urb/bhk/%s.bin", u3P.dir_c, nop_u.nam_c);
c3_unlink(ful_c);
snprintf(ful_c, 8192, "%s/.urb/bhk", u3P.dir_c);
c3_rmdir(ful_c);
c3_unlink(nax_c);
c3_unlink(sax_c);
fprintf(stderr, "loom: image backup failed\r\n");
return c3n;
}

close(nop_u.fid_i);
close(sop_u.fid_i);
close(nax_u.fid_i);
close(sax_u.fid_i);
fprintf(stderr, "loom: image backup complete\r\n");
return c3y;
}
Expand Down Expand Up @@ -1502,8 +1514,6 @@ u3e_save(u3_post low_p, u3_post hig_p)
}

u3e_toss(low_p, hig_p);

u3e_backup(c3n);
}

/* _ce_toss_pages(): discard ephemeral pages.
Expand Down Expand Up @@ -1573,8 +1583,10 @@ u3e_live(c3_o nuu_o, c3_c* dir_c)

// Open image files.
//
if ( (c3n == _ce_image_open(&u3P.nor_u)) ||
(c3n == _ce_image_open(&u3P.sou_u)) )
c3_c chk_c[8193];
snprintf(chk_c, 8193, "%s/.urb/chk", u3P.dir_c);
if ( (c3n == _ce_image_open(&u3P.nor_u, chk_c)) ||
(c3n == _ce_image_open(&u3P.sou_u, chk_c)) )
{
fprintf(stderr, "boot: image failed\r\n");
exit(1);
Expand Down
7 changes: 4 additions & 3 deletions pkg/noun/events.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,11 @@

/** Functions.
**/
/* u3e_backup(): copy the snapshot from chk to bhk.
/* u3e_backup(): copy the snapshot from [pux_c] to [pax_c],
* overwriting optional.
*/
c3_o
u3e_backup(c3_o ovw_o);
c3_o
u3e_backup(c3_c* pux_c, c3_c* pax_c, c3_o ovw_o);

/* u3e_fault(): handle a memory fault.
*/
Expand Down
46 changes: 37 additions & 9 deletions pkg/noun/manage.c
Original file line number Diff line number Diff line change
Expand Up @@ -1762,14 +1762,6 @@ _cm_limits(void)
# endif
}

/* u3m_backup(): copy snapshot to .urb/bhk (if it doesn't exist yet).
*/
c3_o
u3m_backup(c3_o ovw_o)
{
return u3e_backup(ovw_o);
}

/* u3m_fault(): handle a memory event with libsigsegv protocol.
*/
c3_i
Expand Down Expand Up @@ -2090,6 +2082,42 @@ u3m_stop()
u3je_secp_stop();
}

/* u3m_pier(): make a pier.
*/
c3_c*
u3m_pier(c3_c* dir_c)
{
c3_c ful_c[8193];

u3C.dir_c = dir_c;

snprintf(ful_c, 8192, "%s", dir_c);
if ( c3_mkdir(ful_c, 0700) ) {
if ( EEXIST != errno ) {
fprintf(stderr, "loom: pier create: %s\r\n", strerror(errno));
exit(1);
}
}

snprintf(ful_c, 8192, "%s/.urb", dir_c);
if ( c3_mkdir(ful_c, 0700) ) {
if ( EEXIST != errno ) {
fprintf(stderr, "loom: .urb create: %s\r\n", strerror(errno));
exit(1);
}
}

snprintf(ful_c, 8192, "%s/.urb/chk", dir_c);
if ( c3_mkdir(ful_c, 0700) ) {
if ( EEXIST != errno ) {
fprintf(stderr, "loom: .urb/chk create: %s\r\n", strerror(errno));
exit(1);
}
}

return strdup(dir_c);
}

/* u3m_boot(): start the u3 system. return next event, starting from 1.
*/
c3_d
Expand All @@ -2105,7 +2133,7 @@ u3m_boot(c3_c* dir_c, size_t len_i)

/* Activate the storage system.
*/
nuu_o = u3e_live(c3n, dir_c);
nuu_o = u3e_live(c3n, u3m_pier(dir_c));

/* Activate tracing.
*/
Expand Down
5 changes: 5 additions & 0 deletions pkg/noun/manage.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
c3_d
u3m_boot(c3_c* dir_c, size_t len_i);

/* u3m_pier(): make a pier.
*/
c3_c*
u3m_pier(c3_c* dir_c);

/* u3m_boot_lite(): start without checkpointing.
*/
c3_d
Expand Down
6 changes: 6 additions & 0 deletions pkg/noun/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,10 @@ typedef c3_w u3e_version;
#define U3E_VER1 1
#define U3E_VERLAT U3E_VER1

/* DISK FORMAT
*/

#define U3D_VER1 1
#define U3D_VERLAT U3L_VER1

#endif /* ifndef U3_VERSION_H */
Loading

0 comments on commit 7ba4bca

Please sign in to comment.