diff --git a/Cargo.lock b/Cargo.lock index f45ac02690..0c996de0ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -330,20 +336,6 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" -[[package]] -name = "blake3" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -499,6 +491,17 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +[[package]] +name = "clipboard-win" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +dependencies = [ + "error-code", + "str-buf", + "winapi", +] + [[package]] name = "cobs" version = "0.2.3" @@ -619,6 +622,64 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" +[[package]] +name = "crossbeam" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset 0.9.0", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.16" @@ -842,6 +903,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -852,6 +922,18 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -988,6 +1070,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + [[package]] name = "enum-as-inner" version = "0.5.1" @@ -1055,6 +1143,16 @@ dependencies = [ "libc", ] +[[package]] +name = "error-code" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" +dependencies = [ + "libc", + "str-buf", +] + [[package]] name = "fallible-iterator" version = "0.2.0" @@ -1067,6 +1165,17 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +[[package]] +name = "fd-lock" +version = "3.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "ff" version = "0.13.0" @@ -1401,6 +1510,15 @@ version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3688e69b38018fec1557254f64c8dc2cc8ec502890182f395dbb0aa997aa5735" +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "hostname" version = "0.3.1" @@ -1646,18 +1764,22 @@ dependencies = [ "derive_more", "dirs-next", "duct", + "ed25519-dalek", "flume", "futures", "genawaiter", "hex", "indicatif", "iroh-bytes", + "iroh-gossip", "iroh-io", "iroh-metrics", "iroh-net", + "iroh-sync", "multibase", "nix", "num_cpus", + "once_cell", "portable-atomic", "postcard", "proptest", @@ -1666,7 +1788,10 @@ dependencies = [ "rand", "range-collections", "regex", + "rustyline", "serde", + "shell-words", + "shellexpand", "tempfile", "testdir", "thiserror", @@ -1729,10 +1854,9 @@ dependencies = [ [[package]] name = "iroh-gossip" -version = "0.4.1" +version = "0.5.1" dependencies = [ "anyhow", - "blake3", "bytes", "clap", "data-encoding", @@ -1741,6 +1865,7 @@ dependencies = [ "futures", "genawaiter", "indexmap 2.0.0", + "iroh-blake3", "iroh-metrics", "iroh-net", "once_cell", @@ -1857,6 +1982,31 @@ dependencies = [ "zeroize", ] +[[package]] +name = "iroh-sync" +version = "0.5.1" +dependencies = [ + "anyhow", + "bytes", + "crossbeam", + "derive_more", + "ed25519-dalek", + "hex", + "iroh-blake3", + "iroh-bytes", + "once_cell", + "ouroboros", + "parking_lot", + "postcard", + "rand", + "rand_core", + "redb", + "serde", + "tempfile", + "tokio", + "url", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -1998,6 +2148,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -2115,6 +2274,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "nix" version = "0.26.2" @@ -2124,7 +2292,7 @@ dependencies = [ "bitflags 1.3.2", "cfg-if", "libc", - "memoffset", + "memoffset 0.7.1", "pin-utils", "static_assertions", ] @@ -2341,6 +2509,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "os_info" version = "3.7.0" @@ -2362,6 +2536,30 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "ouroboros" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2ba07320d39dfea882faa70554b4bd342a5f273ed59ba7c1c6b4c840492c954" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec4c6225c69b4ca778c0aea097321a64c421cf4577b331c61b229267edabb6f8" +dependencies = [ + "heck", + "proc-macro-error 1.0.4", + "proc-macro2", + "quote", + "syn 2.0.27", +] + [[package]] name = "overload" version = "0.1.1" @@ -2825,6 +3023,16 @@ dependencies = [ "unarray", ] +[[package]] +name = "pyo3-build-config" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "554db24f0b3c180a9c0b1268f91287ab3f17c162e15b54caaae5a6b3773396b0" +dependencies = [ + "once_cell", + "target-lexicon", +] + [[package]] name = "quanta" version = "0.11.1" @@ -2933,6 +3141,16 @@ dependencies = [ "pest_derive", ] +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -3005,6 +3223,16 @@ dependencies = [ "yasna", ] +[[package]] +name = "redb" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "717a806693d0e1ed6cc55b392066bf13e703dd835acf5c5888c74740f924d355" +dependencies = [ + "libc", + "pyo3-build-config", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -3333,6 +3561,29 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rustyline" +version = "12.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" +dependencies = [ + "bitflags 2.3.3", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "scopeguard", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "winapi", +] + [[package]] name = "ryu" version = "1.0.15" @@ -3542,6 +3793,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" + +[[package]] +name = "shellexpand" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da03fa3b94cc19e3ebfc88c4229c49d8f08cdbd1228870a45f0ffdf84988e14b" +dependencies = [ + "dirs", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -3677,6 +3943,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "str-buf" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" + [[package]] name = "strsim" version = "0.10.0" @@ -3836,6 +4108,12 @@ dependencies = [ "libc", ] +[[package]] +name = "target-lexicon" +version = "0.12.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2faeef5759ab89935255b1a4cd98e0baf99d1085e37d36599c625dac49ae8e" + [[package]] name = "tempfile" version = "3.7.0" @@ -4277,6 +4555,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/Cargo.toml b/Cargo.toml index be650b1911..694f2af3b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "iroh-bytes", "iroh-gossip", "iroh-metrics", + "iroh-sync", ] [profile.optimized-release] diff --git a/deny.toml b/deny.toml index 1488465e15..023b15271a 100644 --- a/deny.toml +++ b/deny.toml @@ -4,8 +4,10 @@ multiple-versions = "allow" [licenses] allow = [ "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", "BSD-2-Clause", "BSD-3-Clause", + "BSL-1.0", # BOSL license "ISC", "MIT", "OpenSSL", diff --git a/iroh-bytes/src/protocol.rs b/iroh-bytes/src/protocol.rs index 8ace63a8c8..5b1f8444d1 100644 --- a/iroh-bytes/src/protocol.rs +++ b/iroh-bytes/src/protocol.rs @@ -172,7 +172,7 @@ impl GetRequest { } /// Write the given data to the provider sink, with a unsigned varint length prefix. -pub(crate) async fn write_lp(writer: &mut W, data: &[u8]) -> Result<()> { +pub async fn write_lp(writer: &mut W, data: &[u8]) -> Result<()> { ensure!( data.len() < MAX_MESSAGE_SIZE, "sending message is too large" @@ -193,7 +193,7 @@ pub(crate) async fn write_lp(writer: &mut W, data: &[u8]) /// /// The message as raw bytes. If the end of the stream is reached and there is no partial /// message, returns `None`. -pub(crate) async fn read_lp( +pub async fn read_lp( mut reader: impl AsyncRead + Unpin, buffer: &mut BytesMut, ) -> Result> { diff --git a/iroh-bytes/src/util.rs b/iroh-bytes/src/util.rs index 1950e6e837..0696d8e904 100644 --- a/iroh-bytes/src/util.rs +++ b/iroh-bytes/src/util.rs @@ -82,6 +82,18 @@ impl From<[u8; 32]> for Hash { } } +impl From for [u8; 32] { + fn from(value: Hash) -> Self { + *value.as_bytes() + } +} + +impl From<&[u8; 32]> for Hash { + fn from(value: &[u8; 32]) -> Self { + Hash(blake3::Hash::from(*value)) + } +} + impl PartialOrd for Hash { fn partial_cmp(&self, other: &Self) -> Option { Some(self.0.as_bytes().cmp(other.0.as_bytes())) diff --git a/iroh-gossip/Cargo.toml b/iroh-gossip/Cargo.toml index bf2254095b..326254a209 100644 --- a/iroh-gossip/Cargo.toml +++ b/iroh-gossip/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "iroh-gossip" -version = "0.4.1" +version = "0.5.1" edition = "2021" readme = "README.md" description = "gossip messages over broadcast trees" @@ -11,7 +11,7 @@ repository = "https://github.com/n0-computer/iroh-sync" [dependencies] # proto dependencies (required) anyhow = { version = "1", features = ["backtrace"] } -blake3 = "1.3.3" +blake3 = { package = "iroh-blake3", version = "1.4.3"} bytes = { version = "1.4.0", features = ["serde"] } data-encoding = "2.4.0" derive_more = { version = "1.0.0-beta.1", features = ["add", "debug", "display", "from", "try_into"] } diff --git a/iroh-gossip/src/proto/util.rs b/iroh-gossip/src/proto/util.rs index 64c7a92c9a..03a759ad01 100644 --- a/iroh-gossip/src/proto/util.rs +++ b/iroh-gossip/src/proto/util.rs @@ -56,7 +56,7 @@ macro_rules! idbytes_impls { } } - impl> From for $ty { + impl> std::convert::From for $ty { fn from(value: T) -> Self { Self::from_bytes(value.into()) } diff --git a/iroh-net/src/tls.rs b/iroh-net/src/tls.rs index 4f92c9106a..83e1dde697 100644 --- a/iroh-net/src/tls.rs +++ b/iroh-net/src/tls.rs @@ -106,6 +106,23 @@ impl PeerId { pub fn as_bytes(&self) -> &[u8; 32] { self.0.as_bytes() } + + /// Try to create a peer id from a byte array. + /// + /// # Warning + /// + /// The caller is responsible for ensuring that the bytes passed into this + /// method actually represent a `curve25519_dalek::curve::CompressedEdwardsY` + /// and that said compressed point is actually a point on the curve. + pub fn from_bytes(bytes: &[u8; 32]) -> anyhow::Result { + let key = PublicKey::from_bytes(bytes)?; + Ok(PeerId(key)) + } + + /// Get the peer id as a byte array. + pub fn to_bytes(&self) -> [u8; 32] { + self.0.to_bytes() + } } impl From for PeerId { diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml new file mode 100644 index 0000000000..8ab9a1de20 --- /dev/null +++ b/iroh-sync/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "iroh-sync" +version = "0.5.1" +edition = "2021" +readme = "README.md" +description = "Iroh sync" +license = "MIT/Apache-2.0" +authors = ["n0 team"] +repository = "https://github.com/n0-computer/iroh" + +[dependencies] +anyhow = "1.0.71" +blake3 = { package = "iroh-blake3", version = "1.4.3"} +crossbeam = "0.8.2" +derive_more = { version = "1.0.0-beta.1", features = ["debug", "display", "from", "try_into"] } +ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] } +iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } +once_cell = "1.18.0" +postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } +rand = "0.8.5" +rand_core = "0.6.4" +serde = { version = "1.0.164", features = ["derive"] } +url = "2.4.0" +bytes = "1.4.0" +parking_lot = "0.12.1" +hex = "0.4" + +# fs-store +redb = { version = "1.0.5", optional = true } +ouroboros = { version = "0.17", optional = true } + +[dev-dependencies] +tokio = { version = "1.28.2", features = ["sync", "macros"] } +tempfile = "3.4" + +[features] +default = ["fs-store"] +fs-store = ["redb", "ouroboros"] diff --git a/iroh-sync/LICENSE-APACHE b/iroh-sync/LICENSE-APACHE new file mode 100644 index 0000000000..16fe87b06e --- /dev/null +++ b/iroh-sync/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/iroh-sync/LICENSE-MIT b/iroh-sync/LICENSE-MIT new file mode 100644 index 0000000000..dfd85baf84 --- /dev/null +++ b/iroh-sync/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2023 + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/iroh-sync/README.md b/iroh-sync/README.md new file mode 100644 index 0000000000..7c79e368f2 --- /dev/null +++ b/iroh-sync/README.md @@ -0,0 +1,19 @@ +# iroh-sync + + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/iroh-sync/src/lib.rs b/iroh-sync/src/lib.rs new file mode 100644 index 0000000000..4c73579e2d --- /dev/null +++ b/iroh-sync/src/lib.rs @@ -0,0 +1,3 @@ +pub mod ranger; +pub mod store; +pub mod sync; diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs new file mode 100644 index 0000000000..aff7f66578 --- /dev/null +++ b/iroh-sync/src/ranger.rs @@ -0,0 +1,1317 @@ +//! Implementation of Set Reconcilliation based on +//! "Range-Based Set Reconciliation" by Aljoscha Meyer. +//! + +use std::collections::BTreeMap; +use std::fmt::Debug; +use std::marker::PhantomData; +use std::{cmp::Ordering, convert::Infallible}; + +use serde::{Deserialize, Serialize}; + +/// Stores a range. +/// +/// There are three possibilities +/// - x, x: All elements in a set, denoted with +/// - [x, y): x < y: Includes x, but not y +/// - S \ [y, x) y < x: Includes x, but not y. +/// This means that ranges are "wrap around" conceptually. +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)] +pub struct Range { + x: K, + y: K, +} + +impl Range { + pub fn x(&self) -> &K { + &self.x + } + + pub fn y(&self) -> &K { + &self.y + } + + pub fn new(x: K, y: K) -> Self { + Range { x, y } + } + + pub fn map(self, f: impl FnOnce(K, K) -> (X, X)) -> Range { + let (x, y) = f(self.x, self.y); + Range { x, y } + } +} + +impl From<(K, K)> for Range { + fn from((x, y): (K, K)) -> Self { + Range { x, y } + } +} + +pub trait RangeKey: Sized + Ord + Debug { + /// Is this key inside the range? + fn contains(&self, range: &Range) -> bool { + contains(self, range) + } +} + +/// Default implementation of `contains` for `Ord` types. +pub fn contains(t: &T, range: &Range) -> bool { + match range.x().cmp(range.y()) { + Ordering::Equal => true, + Ordering::Less => range.x() <= t && t < range.y(), + Ordering::Greater => range.x() <= t || t < range.y(), + } +} + +impl RangeKey for &str {} +impl RangeKey for &[u8] {} + +#[derive(Copy, Clone, PartialEq, Serialize, Deserialize)] +pub struct Fingerprint(pub [u8; 32]); + +impl Debug for Fingerprint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Fp({})", blake3::Hash::from(self.0).to_hex()) + } +} + +impl Fingerprint { + /// The fingerprint of the empty set + pub fn empty() -> Self { + Fingerprint::new(&[][..]) + } + + pub fn new(val: T) -> Self { + val.as_fingerprint() + } +} + +pub trait AsFingerprint { + fn as_fingerprint(&self) -> Fingerprint; +} + +impl> AsFingerprint for T { + fn as_fingerprint(&self) -> Fingerprint { + Fingerprint(blake3::hash(self.as_ref()).into()) + } +} + +impl std::ops::BitXorAssign for Fingerprint { + fn bitxor_assign(&mut self, rhs: Self) { + for (a, b) in self.0.iter_mut().zip(rhs.0.iter()) { + *a ^= b; + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RangeFingerprint { + #[serde(bound( + serialize = "Range: Serialize", + deserialize = "Range: Deserialize<'de>" + ))] + pub range: Range, + /// The fingerprint of `range`. + pub fingerprint: Fingerprint, +} + +/// Transfers items inside a range to the other participant. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RangeItem { + /// The range out of which the elements are. + #[serde(bound( + serialize = "Range: Serialize", + deserialize = "Range: Deserialize<'de>" + ))] + pub range: Range, + #[serde(bound( + serialize = "K: Serialize, V: Serialize", + deserialize = "K: Deserialize<'de>, V: Deserialize<'de>" + ))] + pub values: Vec<(K, V)>, + /// If false, requests to send local items in the range. + /// Otherwise not. + pub have_local: bool, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum MessagePart { + #[serde(bound( + serialize = "RangeFingerprint: Serialize", + deserialize = "RangeFingerprint: Deserialize<'de>" + ))] + RangeFingerprint(RangeFingerprint), + #[serde(bound( + serialize = "RangeItem: Serialize", + deserialize = "RangeItem: Deserialize<'de>" + ))] + RangeItem(RangeItem), +} + +impl MessagePart { + pub fn is_range_fingerprint(&self) -> bool { + matches!(self, MessagePart::RangeFingerprint(_)) + } + + pub fn is_range_item(&self) -> bool { + matches!(self, MessagePart::RangeItem(_)) + } + + pub fn values(&self) -> Option<&[(K, V)]> { + match self { + MessagePart::RangeFingerprint(_) => None, + MessagePart::RangeItem(RangeItem { values, .. }) => Some(values), + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Message { + #[serde(bound( + serialize = "MessagePart: Serialize", + deserialize = "MessagePart: Deserialize<'de>" + ))] + parts: Vec>, +} + +impl Message +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + /// Construct the initial message. + fn init>(store: &S, limit: Option<&Range>) -> Result { + let x = store.get_first()?; + let range = Range::new(x.clone(), x); + let fingerprint = store.get_fingerprint(&range, limit)?; + let part = MessagePart::RangeFingerprint(RangeFingerprint { range, fingerprint }); + Ok(Message { parts: vec![part] }) + } + + pub fn parts(&self) -> &[MessagePart] { + &self.parts + } +} + +pub trait Store: Sized +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + type Error: Debug + Send + Sync + Into; + + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> Result; + fn get(&self, key: &K) -> Result, Self::Error>; + fn len(&self) -> Result; + fn is_empty(&self) -> Result; + /// Calculate the fingerprint of the given range. + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result; + + /// Insert the given key value pair. + fn put(&mut self, k: K, v: V) -> Result<(), Self::Error>; + + type RangeIterator<'a>: Iterator> + where + Self: 'a, + K: 'a, + V: 'a; + + /// Returns all items in the given range + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result, Self::Error>; + fn remove(&mut self, key: &K) -> Result, Self::Error>; + + type AllIterator<'a>: Iterator> + where + Self: 'a, + K: 'a, + V: 'a; + fn all(&self) -> Result, Self::Error>; +} + +#[derive(Debug)] +pub struct SimpleStore { + data: BTreeMap, +} + +impl Default for SimpleStore { + fn default() -> Self { + SimpleStore { + data: BTreeMap::default(), + } + } +} + +impl Store for SimpleStore +where + K: RangeKey + Clone + Default + AsFingerprint, + V: Clone, +{ + type Error = Infallible; + + fn get_first(&self) -> Result { + if let Some((k, _)) = self.data.first_key_value() { + Ok(k.clone()) + } else { + Ok(Default::default()) + } + } + + fn get(&self, key: &K) -> Result, Self::Error> { + Ok(self.data.get(key).cloned()) + } + + fn len(&self) -> Result { + Ok(self.data.len()) + } + + fn is_empty(&self) -> Result { + Ok(self.data.is_empty()) + } + + /// Calculate the fingerprint of the given range. + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result { + let elements = self.get_range(range.clone(), limit.cloned())?; + let mut fp = Fingerprint::empty(); + for el in elements { + let el = el?; + fp ^= el.0.as_fingerprint(); + } + + Ok(fp) + } + + /// Insert the given key value pair. + fn put(&mut self, k: K, v: V) -> Result<(), Self::Error> { + self.data.insert(k, v); + Ok(()) + } + + type RangeIterator<'a> = SimpleRangeIterator<'a, K, V> + where K: 'a, V: 'a; + /// Returns all items in the given range + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result, Self::Error> { + // TODO: this is not very efficient, optimize depending on data structure + let iter = self.data.iter(); + + Ok(SimpleRangeIterator { + iter, + range: Some(range), + limit, + }) + } + + fn remove(&mut self, key: &K) -> Result, Self::Error> { + // No versions stored + + let res = self.data.remove(key).into_iter().collect(); + Ok(res) + } + + type AllIterator<'a> = SimpleRangeIterator<'a, K, V> + where K: 'a, + V: 'a; + + fn all(&self) -> Result, Self::Error> { + let iter = self.data.iter(); + + Ok(SimpleRangeIterator { + iter, + range: None, + limit: None, + }) + } +} + +#[derive(Debug)] +pub struct SimpleRangeIterator<'a, K: 'a, V: 'a> { + iter: std::collections::btree_map::Iter<'a, K, V>, + range: Option>, + limit: Option>, +} + +impl<'a, K, V> Iterator for SimpleRangeIterator<'a, K, V> +where + K: RangeKey + Clone, + V: Clone, +{ + type Item = Result<(K, V), Infallible>; + + fn next(&mut self) -> Option { + let mut next = self.iter.next()?; + + let filter = |x: &K| match (&self.range, &self.limit) { + (None, None) => true, + (Some(ref range), Some(ref limit)) => x.contains(range) && x.contains(limit), + (Some(ref range), None) => x.contains(range), + (None, Some(ref limit)) => x.contains(limit), + }; + + loop { + if filter(next.0) { + return Some(Ok((next.0.clone(), next.1.clone()))); + } + + next = self.iter.next()?; + } + } +} + +#[derive(Debug)] +pub struct Peer = SimpleStore> +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + store: S, + /// Up to how many values to send immediately, before sending only a fingerprint. + max_set_size: usize, + /// `k` in the protocol, how many splits to generate. at least 2 + split_factor: usize, + limit: Option>, + + _phantom: PhantomData, // why??? +} + +impl Default for Peer +where + K: RangeKey + Clone + Default + AsFingerprint, + S: Store + Default, +{ + fn default() -> Self { + Peer { + store: S::default(), + max_set_size: 1, + split_factor: 2, + limit: None, + _phantom: Default::default(), + } + } +} + +impl Peer +where + K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, + V: Clone + Debug, + S: Store + Default, +{ + pub fn with_limit(limit: Range) -> Self { + Peer { + store: S::default(), + max_set_size: 1, + split_factor: 2, + limit: Some(limit), + _phantom: Default::default(), + } + } +} +impl Peer +where + K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, + V: Clone + Debug, + S: Store, +{ + pub fn from_store(store: S) -> Self { + Peer { + store, + max_set_size: 1, + split_factor: 2, + limit: None, + _phantom: Default::default(), + } + } + + /// Generates the initial message. + pub fn initial_message(&self) -> Result, S::Error> { + Message::init(&self.store, self.limit.as_ref()) + } + + /// Processes an incoming message and produces a response. + /// If terminated, returns `None` + pub fn process_message( + &mut self, + message: Message, + cb: F, + ) -> Result>, S::Error> + where + F: Fn(K, V), + { + let mut out = Vec::new(); + + // TODO: can these allocs be avoided? + let mut items = Vec::new(); + let mut fingerprints = Vec::new(); + for part in message.parts { + match part { + MessagePart::RangeItem(item) => { + items.push(item); + } + MessagePart::RangeFingerprint(fp) => { + fingerprints.push(fp); + } + } + } + + // Process item messages + for RangeItem { + range, + values, + have_local, + } in items + { + let diff: Option> = if have_local { + None + } else { + Some( + self.store + .get_range(range.clone(), self.limit.clone())? + .filter_map(|el| match el { + Ok((k, v)) => { + if !values.iter().any(|(vk, _)| vk == &k) { + Some(Ok((k, v))) + } else { + None + } + } + Err(err) => Some(Err(err)), + }) + .collect::>()?, + ) + }; + + // Store incoming values + for (k, v) in values { + cb(k.clone(), v.clone()); + self.store.put(k, v)?; + } + + if let Some(diff) = diff { + if !diff.is_empty() { + out.push(MessagePart::RangeItem(RangeItem { + range, + values: diff, + have_local: true, + })); + } + } + } + + // Process fingerprint messages + for RangeFingerprint { range, fingerprint } in fingerprints { + let local_fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref())?; + + // Case1 Match, nothing to do + if local_fingerprint == fingerprint { + continue; + } + + // Case2 Recursion Anchor + let local_values: Vec<_> = self + .store + .get_range(range.clone(), self.limit.clone())? + .collect::>()?; + if local_values.len() <= 1 || fingerprint == Fingerprint::empty() { + let values = local_values.into_iter().map(|(k, v)| (k, v)).collect(); + out.push(MessagePart::RangeItem(RangeItem { + range, + values, + have_local: false, + })); + } else { + // Case3 Recurse + // Create partition + // m0 = x < m1 < .. < mk = y, with k>= 2 + // such that [ml, ml+1) is nonempty + let mut ranges = Vec::with_capacity(self.split_factor); + let chunk_len = div_ceil(local_values.len(), self.split_factor); + + // Select the first index, for which the key is larger than the x of the range. + let mut start_index = local_values + .iter() + .position(|(k, _)| range.x() <= k) + .unwrap_or(0); + let max_len = local_values.len(); + for i in 0..self.split_factor { + let s_index = start_index; + let start = (s_index * chunk_len) % max_len; + let e_index = s_index + 1; + let end = (e_index * chunk_len) % max_len; + + let (x, y) = if i == 0 { + // first + (range.x(), &local_values[end].0) + } else if i == self.split_factor - 1 { + // last + (&local_values[start].0, range.y()) + } else { + // regular + (&local_values[start].0, &local_values[end].0) + }; + let range = Range::new(x.clone(), y.clone()); + ranges.push(range); + start_index += 1; + } + + for range in ranges.into_iter() { + let chunk: Vec<_> = self + .store + .get_range(range.clone(), self.limit.clone())? + .collect(); + // Add either the fingerprint or the item set + let fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref())?; + if chunk.len() > self.max_set_size { + out.push(MessagePart::RangeFingerprint(RangeFingerprint { + range, + fingerprint, + })); + } else { + let values = chunk + .into_iter() + .map(|el| match el { + Ok((k, v)) => { + let k: K = k; + let v: V = v; + Ok((k, v)) + } + Err(err) => Err(err), + }) + .collect::>()?; + out.push(MessagePart::RangeItem(RangeItem { + range, + values, + have_local: false, + })); + } + } + } + } + + // If we have any parts, return a message + if !out.is_empty() { + Ok(Some(Message { parts: out })) + } else { + Ok(None) + } + } + + /// Insert a key value pair. + pub fn put(&mut self, k: K, v: V) -> Result<(), S::Error> { + self.store.put(k, v) + } + + pub fn get(&self, k: &K) -> Result, S::Error> { + self.store.get(k) + } + + /// Remove the given key. + pub fn remove(&mut self, k: &K) -> Result, S::Error> { + self.store.remove(k) + } + + /// List all existing key value pairs. + pub fn all(&self) -> Result> + '_, S::Error> { + self.store.all() + } + + /// Returns a refernce to the underlying store. + pub fn store(&self) -> &S { + &self.store + } +} + +/// Sadly is still unstable.. +fn div_ceil(a: usize, b: usize) -> usize { + debug_assert!(a != 0); + debug_assert!(b != 0); + + a / b + (a % b != 0) as usize +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + use super::*; + + #[test] + fn test_paper_1() { + let alice_set = [("ape", 1), ("eel", 1), ("fox", 1), ("gnu", 1)]; + let bob_set = [ + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), + ("hog", 1), + ]; + + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + + // Initial message + assert_eq!(res.alice_to_bob[0].parts.len(), 1); + assert!(res.alice_to_bob[0].parts[0].is_range_fingerprint()); + + // Response from Bob - recurse once + assert_eq!(res.bob_to_alice[0].parts.len(), 2); + assert!(res.bob_to_alice[0].parts[0].is_range_fingerprint()); + assert!(res.bob_to_alice[0].parts[1].is_range_fingerprint()); + + // Last response from Alice + assert_eq!(res.alice_to_bob[1].parts.len(), 3); + assert!(res.alice_to_bob[1].parts[0].is_range_item()); + assert!(res.alice_to_bob[1].parts[1].is_range_fingerprint()); + assert!(res.alice_to_bob[1].parts[2].is_range_item()); + + // Last response from Bob + assert_eq!(res.bob_to_alice[1].parts.len(), 2); + assert!(res.bob_to_alice[1].parts[0].is_range_item()); + assert!(res.bob_to_alice[1].parts[1].is_range_item()); + } + + #[test] + fn test_paper_2() { + let alice_set = [ + ("ape", 1), + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), // the only value being sent + ("gnu", 1), + ("hog", 1), + ]; + let bob_set = [ + ("ape", 1), + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("gnu", 1), + ("hog", 1), + ]; + + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + } + + #[test] + fn test_paper_3() { + let alice_set = [ + ("ape", 1), + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), + ("gnu", 1), + ("hog", 1), + ]; + let bob_set = [("ape", 1), ("cat", 1), ("eel", 1), ("gnu", 1)]; + + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + } + + #[test] + fn test_limits() { + let alice_set = [("ape", 1), ("bee", 1), ("cat", 1)]; + let bob_set = [("ape", 1), ("cat", 1), ("doe", 1)]; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + + // With Limit: just ape + let limit = ("ape", "bee").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 0, "B -> A message count"); + + // With Limit: just bee, cat + let limit = ("bee", "doe").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + } + + #[test] + fn test_prefixes_simple() { + let alice_set = [("/foo/bar", 1), ("/foo/baz", 1), ("/foo/cat", 1)]; + let bob_set = [("/foo/bar", 1), ("/alice/bar", 1), ("/alice/baz", 1)]; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + + // With Limit: just /alice + let limit = ("/alice", "/b").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + } + + #[test] + fn test_prefixes_empty_alice() { + let alice_set = []; + let bob_set = [("/foo/bar", 1), ("/alice/bar", 1), ("/alice/baz", 1)]; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + // With Limit: just /alice + let limit = ("/alice", "/b").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + } + + #[test] + fn test_prefixes_empty_bob() { + let alice_set = [("/foo/bar", 1), ("/foo/baz", 1), ("/foo/cat", 1)]; + let bob_set = []; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + // With Limit: just /alice + let limit = ("/alice", "/b").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 0, "B -> A message count"); + } + + #[test] + fn test_multikey() { + #[derive(Default, Clone, PartialEq, Eq, PartialOrd, Ord)] + struct Multikey { + author: [u8; 4], + key: Vec, + } + + impl RangeKey for Multikey { + fn contains(&self, range: &Range) -> bool { + let author = range.x().author.cmp(&range.y().author); + let key = range.x().key.cmp(&range.y().key); + + match (author, key) { + (Ordering::Equal, Ordering::Equal) => { + // All + true + } + (Ordering::Equal, Ordering::Less) => { + // Regular, based on key + range.x().key <= self.key && self.key < range.y().key + } + (Ordering::Equal, Ordering::Greater) => { + // Reverse, based on key + range.x().key <= self.key || self.key < range.y().key + } + (Ordering::Less, Ordering::Equal) => { + // Regular, based on author + range.x().author <= self.author && self.author < range.y().author + } + (Ordering::Greater, Ordering::Equal) => { + // Reverse, based on key + range.x().author <= self.author || self.author < range.y().author + } + (Ordering::Less, Ordering::Less) => { + // Regular, key and author + range.x().key <= self.key + && self.key < range.y().key + && range.x().author <= self.author + && self.author < range.y().author + } + (Ordering::Greater, Ordering::Greater) => { + // Reverse, key and author + (range.x().key <= self.key || self.key < range.y().key) + && (range.x().author <= self.author || self.author < range.y().author) + } + (Ordering::Less, Ordering::Greater) => { + // Regular author, Reverse key + (range.x().key <= self.key || self.key < range.y().key) + && (range.x().author <= self.author && self.author < range.y().author) + } + (Ordering::Greater, Ordering::Less) => { + // Regular key, Reverse author + (range.x().key <= self.key && self.key < range.y().key) + && (range.x().author <= self.author || self.author < range.y().author) + } + } + } + } + + impl Debug for Multikey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let key = if let Ok(key) = std::str::from_utf8(&self.key) { + key.to_string() + } else { + hex::encode(&self.key) + }; + f.debug_struct("Multikey") + .field("author", &hex::encode(self.author)) + .field("key", &key) + .finish() + } + } + impl AsFingerprint for Multikey { + fn as_fingerprint(&self) -> Fingerprint { + let mut hasher = blake3::Hasher::new(); + hasher.update(&self.author); + hasher.update(&self.key); + Fingerprint(hasher.finalize().into()) + } + } + + impl Multikey { + fn new(author: [u8; 4], key: impl AsRef<[u8]>) -> Self { + Multikey { + author, + key: key.as_ref().to_vec(), + } + } + } + let author_a = [1u8; 4]; + let author_b = [2u8; 4]; + let alice_set = [ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + ]; + let bob_set = [ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ]; + + // No limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + res.assert_alice_set( + "no limit", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + + res.assert_bob_set( + "no limit", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + + // Only author_a + let limit = Range::new(Multikey::new(author_a, ""), Multikey::new(author_b, "")); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + res.assert_alice_set( + "only author_a", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + ], + ); + + res.assert_bob_set( + "only author_a", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + (Multikey::new(author_a, "doe"), 1), + ], + ); + + // All authors, but only cat + let limit = Range::new( + Multikey::new(author_a, "cat"), + Multikey::new(author_a, "doe"), + ); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + res.assert_alice_set( + "only cat", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + + res.assert_bob_set( + "only cat", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + } + + struct SyncResult + where + K: RangeKey + Clone + Default + AsFingerprint, + V: Clone, + { + alice: Peer, + bob: Peer, + alice_to_bob: Vec>, + bob_to_alice: Vec>, + } + + impl SyncResult + where + K: RangeKey + Clone + Default + AsFingerprint + Debug, + V: Clone + Debug, + { + fn print_messages(&self) { + let len = std::cmp::max(self.alice_to_bob.len(), self.bob_to_alice.len()); + for i in 0..len { + if let Some(msg) = self.alice_to_bob.get(i) { + println!("A -> B:"); + print_message(msg); + } + if let Some(msg) = self.bob_to_alice.get(i) { + println!("B -> A:"); + print_message(msg); + } + } + } + } + + impl SyncResult + where + K: Debug + RangeKey + Clone + Default + AsFingerprint, + V: Debug + Clone + PartialEq, + { + fn assert_alice_set(&self, ctx: &str, expected: &[(K, V)]) { + dbg!(self.alice.all().unwrap().collect::>()); + for (k, v) in expected { + assert_eq!( + self.alice.store.get(k).unwrap().as_ref(), + Some(v), + "{}: (alice) missing key {:?}", + ctx, + k + ); + } + assert_eq!( + expected.len(), + self.alice.store.len().unwrap(), + "{}: (alice)", + ctx + ); + } + + fn assert_bob_set(&self, ctx: &str, expected: &[(K, V)]) { + dbg!(self.bob.all().unwrap().collect::>()); + + for (k, v) in expected { + assert_eq!( + self.bob.store.get(k).unwrap().as_ref(), + Some(v), + "{}: (bob) missing key {:?}", + ctx, + k + ); + } + assert_eq!( + expected.len(), + self.bob.store.len().unwrap(), + "{}: (bob)", + ctx + ); + } + } + + fn print_message(msg: &Message) + where + K: Debug, + V: Debug, + { + for part in &msg.parts { + match part { + MessagePart::RangeFingerprint(RangeFingerprint { range, fingerprint }) => { + println!( + " RangeFingerprint({:?}, {:?}, {:?})", + range.x(), + range.y(), + fingerprint + ); + } + MessagePart::RangeItem(RangeItem { + range, + values, + have_local, + }) => { + println!( + " RangeItem({:?} | {:?}) (local?: {})\n {:?}", + range.x(), + range.y(), + have_local, + values, + ); + } + } + } + } + + fn sync( + limit: Option>, + alice_set: &[(K, V)], + bob_set: &[(K, V)], + ) -> SyncResult + where + K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, + V: Clone + Debug + PartialEq, + { + println!("Using Limit: {:?}", limit); + let mut expected_set_alice = BTreeMap::new(); + let mut expected_set_bob = BTreeMap::new(); + + let mut alice = if let Some(limit) = limit.clone() { + Peer::::with_limit(limit) + } else { + Peer::::default() + }; + for (k, v) in alice_set { + alice.put(k.clone(), v.clone()).unwrap(); + + let include = if let Some(ref limit) = limit { + k.contains(limit) + } else { + true + }; + if include { + expected_set_bob.insert(k.clone(), v.clone()); + } + // alices things are always in alices store + expected_set_alice.insert(k.clone(), v.clone()); + } + + let mut bob = if let Some(limit) = limit.clone() { + Peer::::with_limit(limit) + } else { + Peer::::default() + }; + for (k, v) in bob_set { + bob.put(k.clone(), v.clone()).unwrap(); + let include = if let Some(ref limit) = limit { + k.contains(limit) + } else { + true + }; + if include { + expected_set_alice.insert(k.clone(), v.clone()); + } + // bobs things are always in bobs store + expected_set_bob.insert(k.clone(), v.clone()); + } + + let mut alice_to_bob = Vec::new(); + let mut bob_to_alice = Vec::new(); + let initial_message = alice.initial_message().unwrap(); + + let mut next_to_bob = Some(initial_message); + let mut rounds = 0; + while let Some(msg) = next_to_bob.take() { + assert!(rounds < 100, "too many rounds"); + rounds += 1; + alice_to_bob.push(msg.clone()); + + if let Some(msg) = bob.process_message(msg, |_, _| {}).unwrap() { + bob_to_alice.push(msg.clone()); + next_to_bob = alice.process_message(msg, |_, _| {}).unwrap(); + } + } + let res = SyncResult { + alice, + bob, + alice_to_bob, + bob_to_alice, + }; + res.print_messages(); + + let alice_now: Vec<_> = res.alice.all().unwrap().collect::>().unwrap(); + assert_eq!( + expected_set_alice.into_iter().collect::>(), + alice_now, + "alice" + ); + + let bob_now: Vec<_> = res.bob.all().unwrap().collect::>().unwrap(); + assert_eq!( + expected_set_bob.into_iter().collect::>(), + bob_now, + "bob" + ); + + // Check that values were never sent twice + let mut alice_sent = BTreeMap::new(); + for msg in &res.alice_to_bob { + for part in &msg.parts { + if let Some(values) = part.values() { + for (key, value) in values { + assert!( + alice_sent.insert(key.clone(), value.clone()).is_none(), + "alice: duplicate {:?} - {:?}", + key, + value + ); + } + } + } + } + + let mut bob_sent = BTreeMap::new(); + for msg in &res.bob_to_alice { + for part in &msg.parts { + if let Some(values) = part.values() { + for (key, value) in values { + assert!( + bob_sent.insert(key.clone(), value.clone()).is_none(), + "bob: duplicate {:?} - {:?}", + key, + value + ); + } + } + } + } + + res + } + + #[test] + fn store_get_range() { + let mut store = SimpleStore::<&'static str, usize>::default(); + let set = [ + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), + ("hog", 1), + ]; + for (k, v) in &set { + store.put(*k, *v).unwrap(); + } + + let all: Vec<_> = store + .get_range(Range::new("", ""), None) + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(&all, &set[..]); + + let regular: Vec<_> = store + .get_range(("bee", "eel").into(), None) + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(®ular, &set[..3]); + + // empty start + let regular: Vec<_> = store + .get_range(("", "eel").into(), None) + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(®ular, &set[..3]); + + let regular: Vec<_> = store + .get_range(("cat", "hog").into(), None) + .unwrap() + .collect::>() + .unwrap(); + + assert_eq!(®ular, &set[1..5]); + + let excluded: Vec<_> = store + .get_range(("fox", "bee").into(), None) + .unwrap() + .collect::>() + .unwrap(); + + assert_eq!(excluded[0].0, "fox"); + assert_eq!(excluded[1].0, "hog"); + assert_eq!(excluded.len(), 2); + + let excluded: Vec<_> = store + .get_range(("fox", "doe").into(), None) + .unwrap() + .collect::>() + .unwrap(); + + assert_eq!(excluded.len(), 4); + assert_eq!(excluded[0].0, "bee"); + assert_eq!(excluded[1].0, "cat"); + assert_eq!(excluded[2].0, "fox"); + assert_eq!(excluded[3].0, "hog"); + + // Limit + let all: Vec<_> = store + .get_range(("", "").into(), Some(("bee", "doe").into())) + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(&all, &set[..2]); + } + + #[test] + fn test_div_ceil() { + assert_eq!(div_ceil(1, 1), 1); + assert_eq!(div_ceil(2, 1), 2); + assert_eq!(div_ceil(4, 2), 4 / 2); + + assert_eq!(div_ceil(3, 2), 2); + assert_eq!(div_ceil(5, 3), 2); + } +} diff --git a/iroh-sync/src/store.rs b/iroh-sync/src/store.rs new file mode 100644 index 0000000000..a28b9a904a --- /dev/null +++ b/iroh-sync/src/store.rs @@ -0,0 +1,79 @@ +use anyhow::Result; +use rand_core::CryptoRngCore; + +use crate::{ + ranger, + sync::{Author, AuthorId, Namespace, NamespaceId, RecordIdentifier, Replica, SignedEntry}, +}; + +#[cfg(feature = "fs-store")] +pub mod fs; +pub mod memory; + +/// Abstraction over the different available storage solutions. +pub trait Store: std::fmt::Debug + Clone + Send + Sync + 'static { + /// The specialized instance scoped to a `Namespace`. + type Instance: ranger::Store + Send + Sync + 'static + Clone; + + type GetLatestIter<'a>: Iterator> + where + Self: 'a; + type GetAllIter<'a>: Iterator> + where + Self: 'a; + + fn get_replica(&self, namespace: &NamespaceId) -> Result>>; + fn get_author(&self, author: &AuthorId) -> Result>; + fn new_author(&self, rng: &mut R) -> Result; + fn new_replica(&self, namespace: Namespace) -> Result>; + + /// Gets all entries matching this key and author. + fn get_latest_by_key_and_author( + &self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns the latest version of the matching documents by key. + fn get_latest_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns the latest version of the matching documents by prefix. + fn get_latest_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns the latest versions of all documents. + fn get_latest(&self, namespace: NamespaceId) -> Result>; + + /// Returns all versions of the matching documents by author. + fn get_all_by_key_and_author<'a, 'b: 'a>( + &'a self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]> + 'b, + ) -> Result>; + + /// Returns all versions of the matching documents by key. + fn get_all_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns all versions of the matching documents by prefix. + fn get_all_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns all versions of all documents. + fn get_all(&self, namespace: NamespaceId) -> Result>; +} diff --git a/iroh-sync/src/store/fs.rs b/iroh-sync/src/store/fs.rs new file mode 100644 index 0000000000..e490c177a2 --- /dev/null +++ b/iroh-sync/src/store/fs.rs @@ -0,0 +1,751 @@ +//! On disk storage for replicas. + +use std::{path::Path, sync::Arc}; + +use anyhow::Result; +use ouroboros::self_referencing; +use rand_core::CryptoRngCore; +use redb::{ + AccessGuard, Database, MultimapRange, MultimapTableDefinition, MultimapValue, + ReadOnlyMultimapTable, ReadTransaction, ReadableMultimapTable, ReadableTable, TableDefinition, +}; + +use crate::{ + ranger::{AsFingerprint, Fingerprint, Range, RangeKey}, + store::Store as _, + sync::{ + Author, AuthorId, Entry, EntrySignature, Namespace, NamespaceId, Record, RecordIdentifier, + Replica, SignedEntry, + }, +}; + +use self::ouroboros_impl_range_all_iterator::BorrowedMutFields; + +/// Manages the replicas and authors for an instance. +#[derive(Debug, Clone)] +pub struct Store { + db: Arc, +} + +// Table Definitions + +// Authors +// Table +// Key: [u8; 32] # AuthorId +// Value: #[u8; 32] # Author +const AUTHORS_TABLE: TableDefinition<&[u8; 32], &[u8; 32]> = TableDefinition::new("authors-1"); + +// Namespaces +// Table +// Key: [u8; 32] # NamespaceId +// Value: #[u8; 32] # Namespace +const NAMESPACES_TABLE: TableDefinition<&[u8; 32], &[u8; 32]> = + TableDefinition::new("namespaces-1"); + +// Records +// Multimap +// Key: ([u8; 32], [u8; 32], Vec) # (NamespaceId, AuthorId, Key) +// Values: +// (u64, [u8; 32], [u8; 32], u64, [u8; 32]) +// # (timestamp, signature_namespace, signature_author, len, hash) + +type RecordsId<'a> = (&'a [u8; 32], &'a [u8; 32], &'a [u8]); +type RecordsValue<'a> = (u64, &'a [u8; 64], &'a [u8; 64], u64, &'a [u8; 32]); + +const RECORDS_TABLE: MultimapTableDefinition = + MultimapTableDefinition::new("records-1"); + +impl Store { + pub fn new(path: impl AsRef) -> Result { + let db = Database::create(path)?; + + // Setup all tables + let write_tx = db.begin_write()?; + { + let _table = write_tx.open_multimap_table(RECORDS_TABLE)?; + let _table = write_tx.open_table(NAMESPACES_TABLE)?; + let _table = write_tx.open_table(AUTHORS_TABLE)?; + } + write_tx.commit()?; + + Ok(Store { db: Arc::new(db) }) + } + /// Stores a new namespace + fn insert_namespace(&self, namespace: Namespace) -> Result<()> { + let write_tx = self.db.begin_write()?; + { + let mut namespace_table = write_tx.open_table(NAMESPACES_TABLE)?; + namespace_table.insert(&namespace.id_bytes(), &namespace.to_bytes())?; + } + write_tx.commit()?; + + Ok(()) + } + + fn insert_author(&self, author: Author) -> Result<()> { + let write_tx = self.db.begin_write()?; + { + let mut author_table = write_tx.open_table(AUTHORS_TABLE)?; + author_table.insert(&author.id_bytes(), &author.to_bytes())?; + } + write_tx.commit()?; + + Ok(()) + } +} + +impl super::Store for Store { + type Instance = StoreInstance; + type GetAllIter<'a> = RangeAllIterator<'a>; + type GetLatestIter<'a> = RangeLatestIterator<'a>; + + fn get_replica(&self, namespace_id: &NamespaceId) -> Result>> { + let read_tx = self.db.begin_read()?; + let namespace_table = read_tx.open_table(NAMESPACES_TABLE)?; + let Some(namespace) = namespace_table.get(namespace_id.as_bytes())? else { + return Ok(None); + }; + let namespace = Namespace::from_bytes(namespace.value()); + let replica = Replica::new(namespace, StoreInstance::new(*namespace_id, self.clone())); + Ok(Some(replica)) + } + + fn get_author(&self, author_id: &AuthorId) -> Result> { + let read_tx = self.db.begin_read()?; + let author_table = read_tx.open_table(AUTHORS_TABLE)?; + let Some(author) = author_table.get(author_id.as_bytes())? else { + return Ok(None); + }; + + let author = Author::from_bytes(author.value()); + Ok(Some(author)) + } + + /// Generates a new author, using the passed in randomness. + fn new_author(&self, rng: &mut R) -> Result { + let author = Author::new(rng); + self.insert_author(author.clone())?; + Ok(author) + } + + fn new_replica(&self, namespace: Namespace) -> Result> { + let id = namespace.id(); + self.insert_namespace(namespace.clone())?; + + let replica = Replica::new(namespace, StoreInstance::new(id, self.clone())); + + Ok(replica) + } + + /// Gets all entries matching this key and author. + fn get_latest_by_key_and_author( + &self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]>, + ) -> Result> { + let read_tx = self.db.begin_read()?; + let record_table = read_tx.open_multimap_table(RECORDS_TABLE)?; + + let db_key = (namespace.as_bytes(), author.as_bytes(), key.as_ref()); + let records = record_table.get(db_key)?; + let Some(record) = records.last() else { + return Ok(None); + }; + let record = record?; + let (timestamp, namespace_sig, author_sig, len, hash) = record.value(); + let record = Record::new(timestamp, len, hash.into()); + let id = RecordIdentifier::new(key, namespace, author); + let entry = Entry::new(id, record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + + Ok(Some(signed_entry)) + } + + fn get_latest_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::Key(key.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_latest_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::Prefix(prefix.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_latest(&self, namespace: NamespaceId) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::None, + )?; + + Ok(iter) + } + + fn get_all_by_key_and_author<'a, 'b: 'a>( + &'a self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]> + 'b, + ) -> Result> { + let start = (namespace.as_bytes(), author.as_bytes(), key.as_ref()); + let end = (namespace.as_bytes(), author.as_bytes(), key.as_ref()); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::None, + )?; + + Ok(iter) + } + + fn get_all_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::Key(key.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_all_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::Prefix(prefix.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_all(&self, namespace: NamespaceId) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::None, + )?; + + Ok(iter) + } +} + +/// [`Namespace`] specific wrapper around the [`Store`]. +#[derive(Debug, Clone)] +pub struct StoreInstance { + namespace: NamespaceId, + store: Store, +} + +impl StoreInstance { + fn new(namespace: NamespaceId, store: Store) -> Self { + StoreInstance { namespace, store } + } +} + +impl crate::ranger::Store for StoreInstance { + type Error = anyhow::Error; + + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> Result { + let read_tx = self.store.db.begin_read()?; + let record_table = read_tx.open_multimap_table(RECORDS_TABLE)?; + + // TODO: verify this fetches all keys with this namespace + let start = (self.namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (self.namespace.as_bytes(), &[255u8; 32], &[][..]); + let mut records = record_table.range(start..=end)?; + + let Some(record) = records.next() else { + return Ok(RecordIdentifier::default()); + }; + let (compound_key, _) = record?; + let (namespace_id, author_id, key) = compound_key.value(); + + let id = RecordIdentifier::from_parts(key, namespace_id, author_id)?; + Ok(id) + } + + fn get(&self, id: &RecordIdentifier) -> Result> { + self.store + .get_latest_by_key_and_author(id.namespace(), id.author(), id.key()) + } + + fn len(&self) -> Result { + let read_tx = self.store.db.begin_read()?; + let record_table = read_tx.open_multimap_table(RECORDS_TABLE)?; + + // TODO: verify this fetches all keys with this namespace + let start = (self.namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (self.namespace.as_bytes(), &[255u8; 32], &[][..]); + let records = record_table.range(start..=end)?; + Ok(records.count()) + } + + fn is_empty(&self) -> Result { + Ok(self.len()? == 0) + } + + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result { + // TODO: optimize? + + let elements = self.get_range(range.clone(), limit.cloned())?; + let mut fp = Fingerprint::empty(); + for el in elements { + let el = el?; + fp ^= el.0.as_fingerprint(); + } + + Ok(fp) + } + + fn put(&mut self, k: RecordIdentifier, v: SignedEntry) -> Result<()> { + // TODO: propagate error/not insertion? + if v.verify().is_ok() { + let timestamp = v.entry().record().timestamp(); + // TODO: verify timestamp is "reasonable" + + let write_tx = self.store.db.begin_write()?; + { + let mut record_table = write_tx.open_multimap_table(RECORDS_TABLE)?; + let key = (k.namespace_bytes(), k.author_bytes(), k.key()); + let record = v.entry().record(); + let value = ( + timestamp, + &v.signature().namespace_signature().to_bytes(), + &v.signature().author_signature().to_bytes(), + record.content_len(), + record.content_hash().as_bytes(), + ); + record_table.insert(key, value)?; + } + write_tx.commit()?; + } + Ok(()) + } + + type RangeIterator<'a> = RangeLatestIterator<'a>; + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result> { + // TODO: implement inverted range + let range_start = range.x(); + let range_end = range.y(); + + let start = ( + range_start.namespace_bytes(), + range_start.author_bytes(), + range_start.key(), + ); + let end = ( + range_end.namespace_bytes(), + range_end.author_bytes(), + range_end.key(), + ); + let iter = RangeLatestIterator::try_new( + self.store.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + limit, + RangeFilter::None, + )?; + + Ok(iter) + } + + fn remove(&mut self, k: &RecordIdentifier) -> Result> { + let write_tx = self.store.db.begin_write()?; + let res = { + let mut records_table = write_tx.open_multimap_table(RECORDS_TABLE)?; + let key = (k.namespace_bytes(), k.author_bytes(), k.key()); + let records = records_table.remove_all(key)?; + let mut res = Vec::new(); + for record in records.into_iter() { + let record = record?; + let (timestamp, namespace_sig, author_sig, len, hash) = record.value(); + let record = Record::new(timestamp, len, hash.into()); + let entry = Entry::new(k.clone(), record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + res.push(signed_entry); + } + res + }; + write_tx.commit()?; + Ok(res) + } + + type AllIterator<'a> = RangeLatestIterator<'a>; + + fn all(&self) -> Result> { + let start = (self.namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (self.namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.store.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::None, + )?; + + Ok(iter) + } +} + +fn matches(limit: &Option>, x: &RecordIdentifier) -> bool { + limit.as_ref().map(|r| x.contains(r)).unwrap_or(true) +} + +#[self_referencing] +pub struct RangeLatestIterator<'a> { + read_tx: ReadTransaction<'a>, + #[borrows(read_tx)] + #[covariant] + record_table: ReadOnlyMultimapTable<'this, RecordsId<'static>, RecordsValue<'static>>, + #[covariant] + #[borrows(record_table)] + records: MultimapRange<'this, RecordsId<'static>, RecordsValue<'static>>, + limit: Option>, + filter: RangeFilter, +} + +impl std::fmt::Debug for RangeLatestIterator<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RangeLatestIterator") + .finish_non_exhaustive() + } +} + +impl Iterator for RangeLatestIterator<'_> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + self.with_mut(|fields| { + for next in fields.records.by_ref() { + let next = match next { + Ok(next) => next, + Err(err) => return Some(Err(err.into())), + }; + + let (namespace, author, key) = next.0.value(); + let id = match RecordIdentifier::from_parts(key, namespace, author) { + Ok(id) => id, + Err(err) => return Some(Err(err)), + }; + if fields.filter.matches(&id) && matches(fields.limit, &id) { + let last = next.1.last(); + let value = match last? { + Ok(value) => value, + Err(err) => return Some(Err(err.into())), + }; + let (timestamp, namespace_sig, author_sig, len, hash) = value.value(); + let record = Record::new(timestamp, len, hash.into()); + let entry = Entry::new(id.clone(), record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + + return Some(Ok((id, signed_entry))); + } + } + None + }) + } +} + +#[self_referencing] +pub struct RangeAllIterator<'a> { + read_tx: ReadTransaction<'a>, + #[borrows(read_tx)] + #[covariant] + record_table: ReadOnlyMultimapTable<'this, RecordsId<'static>, RecordsValue<'static>>, + #[covariant] + #[borrows(record_table)] + records: ( + MultimapRange<'this, RecordsId<'static>, RecordsValue<'static>>, + Option<( + AccessGuard<'this, RecordsId<'static>>, + MultimapValue<'this, RecordsValue<'static>>, + RecordIdentifier, + )>, + ), + filter: RangeFilter, +} + +#[derive(Debug)] +enum RangeFilter { + None, + Prefix(Vec), + Key(Vec), +} + +impl RangeFilter { + fn matches(&self, id: &RecordIdentifier) -> bool { + match self { + RangeFilter::None => true, + RangeFilter::Prefix(ref prefix) => id.key().starts_with(prefix), + RangeFilter::Key(ref key) => id.key() == key, + } + } +} + +impl std::fmt::Debug for RangeAllIterator<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RangeAllIterator").finish_non_exhaustive() + } +} + +/// Advance the internal iterator to the next set of multimap values +fn next_iter(fields: &mut BorrowedMutFields) -> Result<()> { + for next_iter in fields.records.0.by_ref() { + let (id_guard, values_guard) = next_iter?; + let (namespace, author, key) = id_guard.value(); + let id = RecordIdentifier::from_parts(key, namespace, author)?; + if fields.filter.matches(&id) { + fields.records.1 = Some((id_guard, values_guard, id)); + return Ok(()); + } + } + Ok(()) +} + +impl Iterator for RangeAllIterator<'_> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + self.with_mut(|mut fields| { + loop { + if fields.records.1.is_none() { + if let Err(err) = next_iter(&mut fields) { + return Some(Err(err)); + } + } + // If this is None, nothing is available anymore + let (_id_guard, values_guard, id) = fields.records.1.as_mut()?; + + match values_guard.next() { + Some(Ok(value)) => { + let (timestamp, namespace_sig, author_sig, len, hash) = value.value(); + let record = Record::new(timestamp, len, hash.into()); + let entry = Entry::new(id.clone(), record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + return Some(Ok((id.clone(), signed_entry))); + } + Some(Err(err)) => return Some(Err(err.into())), + None => { + // clear the current + fields.records.1 = None; + } + } + } + }) + } +} + +#[cfg(test)] +mod tests { + use crate::ranger::Store as _; + use crate::store::Store as _; + + use super::*; + + #[test] + fn test_basics() -> Result<()> { + let dbfile = tempfile::NamedTempFile::new()?; + let store = Store::new(dbfile.path())?; + + let author = store.new_author(&mut rand::thread_rng())?; + let namespace = Namespace::new(&mut rand::thread_rng()); + let replica = store.new_replica(namespace.clone())?; + + let replica_back = store.get_replica(&namespace.id())?.unwrap(); + assert_eq!( + replica.namespace().as_bytes(), + replica_back.namespace().as_bytes() + ); + + let author_back = store.get_author(&author.id())?.unwrap(); + assert_eq!(author.to_bytes(), author_back.to_bytes(),); + + let mut wrapper = StoreInstance::new(namespace.id(), store.clone()); + for i in 0..5 { + let id = RecordIdentifier::new(format!("hello-{i}"), namespace.id(), author.id()); + let entry = Entry::new( + id.clone(), + Record::from_data(format!("world-{i}"), namespace.id()), + ); + let entry = SignedEntry::from_entry(entry, &namespace, &author); + wrapper.put(id, entry)?; + } + + // all + let all: Vec<_> = wrapper.all()?.collect(); + assert_eq!(all.len(), 5); + + // add a second version + for i in 0..5 { + let id = RecordIdentifier::new(format!("hello-{i}"), namespace.id(), author.id()); + let entry = Entry::new( + id.clone(), + Record::from_data(format!("world-{i}-2"), namespace.id()), + ); + let entry = SignedEntry::from_entry(entry, &namespace, &author); + wrapper.put(id, entry)?; + } + + // get all + let entries = store.get_all(namespace.id())?.collect::>>()?; + assert_eq!(entries.len(), 10); + + // get all prefix + let entries = store + .get_all_by_prefix(namespace.id(), "hello-")? + .collect::>>()?; + assert_eq!(entries.len(), 10); + + // get latest + let entries = store + .get_latest(namespace.id())? + .collect::>>()?; + assert_eq!(entries.len(), 5); + + // get latest by prefix + let entries = store + .get_latest_by_prefix(namespace.id(), "hello-")? + .collect::>>()?; + assert_eq!(entries.len(), 5); + + // delete and get + for i in 0..5 { + let id = RecordIdentifier::new(format!("hello-{i}"), namespace.id(), author.id()); + let res = wrapper.get(&id)?; + assert!(res.is_some()); + let out = wrapper.remove(&id)?; + assert_eq!(out.len(), 2); + for val in out { + assert_eq!(val.entry().id(), &id); + } + let res = wrapper.get(&id)?; + assert!(res.is_none()); + } + + // get latest + let entries = store + .get_latest(namespace.id())? + .collect::>>()?; + assert_eq!(entries.len(), 0); + + Ok(()) + } +} diff --git a/iroh-sync/src/store/memory.rs b/iroh-sync/src/store/memory.rs new file mode 100644 index 0000000000..b10213f9ce --- /dev/null +++ b/iroh-sync/src/store/memory.rs @@ -0,0 +1,502 @@ +//! In memory storage for replicas. + +use std::{ + collections::{BTreeMap, HashMap}, + convert::Infallible, + sync::Arc, +}; + +use anyhow::Result; +use parking_lot::{RwLock, RwLockReadGuard}; +use rand_core::CryptoRngCore; + +use crate::{ + ranger::{AsFingerprint, Fingerprint, Range, RangeKey}, + sync::{Author, AuthorId, Namespace, NamespaceId, RecordIdentifier, Replica, SignedEntry}, +}; + +/// Manages the replicas and authors for an instance. +#[derive(Debug, Clone, Default)] +pub struct Store { + replicas: Arc>>>, + authors: Arc>>, + /// Stores records by namespace -> identifier + timestamp + replica_records: Arc>, +} + +type ReplicaRecordsOwned = + HashMap>>; + +impl super::Store for Store { + type Instance = ReplicaStoreInstance; + type GetLatestIter<'a> = GetLatestIter<'a>; + type GetAllIter<'a> = GetAllIter<'a>; + + fn get_replica(&self, namespace: &NamespaceId) -> Result>> { + let replicas = &*self.replicas.read(); + Ok(replicas.get(namespace).cloned()) + } + + fn get_author(&self, author: &AuthorId) -> Result> { + let authors = &*self.authors.read(); + Ok(authors.get(author).cloned()) + } + + fn new_author(&self, rng: &mut R) -> Result { + let author = Author::new(rng); + self.authors.write().insert(author.id(), author.clone()); + Ok(author) + } + + fn new_replica(&self, namespace: Namespace) -> Result> { + let id = namespace.id(); + let replica = Replica::new(namespace, ReplicaStoreInstance::new(id, self.clone())); + self.replicas + .write() + .insert(replica.namespace(), replica.clone()); + Ok(replica) + } + + fn get_latest_by_key_and_author( + &self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]>, + ) -> Result> { + let inner = self.replica_records.read(); + + let value = inner + .get(&namespace) + .and_then(|records| records.get(&RecordIdentifier::new(key, namespace, author))) + .and_then(|values| values.last_key_value()); + + Ok(value.map(|(_, v)| v.clone())) + } + + fn get_latest_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let key = key.as_ref().to_vec(); + let filter = GetFilter::Key { namespace, key }; + + Ok(GetLatestIter { + records, + filter, + index: 0, + }) + } + + fn get_latest_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let prefix = prefix.as_ref().to_vec(); + let filter = GetFilter::Prefix { namespace, prefix }; + + Ok(GetLatestIter { + records, + filter, + index: 0, + }) + } + + fn get_latest(&self, namespace: NamespaceId) -> Result> { + let records = self.replica_records.read(); + let filter = GetFilter::All { namespace }; + + Ok(GetLatestIter { + records, + filter, + index: 0, + }) + } + + fn get_all_by_key_and_author<'a, 'b: 'a>( + &'a self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]> + 'b, + ) -> Result> { + let records = self.replica_records.read(); + let record_id = RecordIdentifier::new(key, namespace, author); + let filter = GetFilter::KeyAuthor(record_id); + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } + + fn get_all_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let key = key.as_ref().to_vec(); + let filter = GetFilter::Key { namespace, key }; + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } + + fn get_all_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let prefix = prefix.as_ref().to_vec(); + let filter = GetFilter::Prefix { namespace, prefix }; + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } + + fn get_all(&self, namespace: NamespaceId) -> Result> { + let records = self.replica_records.read(); + let filter = GetFilter::All { namespace }; + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } +} +#[derive(Debug)] +enum GetFilter { + /// All entries. + All { namespace: NamespaceId }, + /// Filter by key and author. + KeyAuthor(RecordIdentifier), + /// Filter by key only. + Key { + namespace: NamespaceId, + key: Vec, + }, + /// Filter by prefix only. + Prefix { + namespace: NamespaceId, + prefix: Vec, + }, +} + +impl GetFilter { + fn namespace(&self) -> NamespaceId { + match self { + GetFilter::All { namespace } => *namespace, + GetFilter::KeyAuthor(ref r) => r.namespace(), + GetFilter::Key { namespace, .. } => *namespace, + GetFilter::Prefix { namespace, .. } => *namespace, + } + } +} + +#[derive(Debug)] +pub struct GetLatestIter<'a> { + records: ReplicaRecords<'a>, + filter: GetFilter, + /// Current iteration index. + index: usize, +} + +impl<'a> Iterator for GetLatestIter<'a> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + let records = self.records.get(&self.filter.namespace())?; + let res = match self.filter { + GetFilter::All { namespace } => records + .iter() + .filter(|(k, _)| k.namespace() == namespace) + .filter_map(|(key, value)| { + value + .last_key_value() + .map(|(_, v)| (key.clone(), v.clone())) + }) + .nth(self.index)?, + GetFilter::KeyAuthor(ref record_id) => { + let values = records.get(record_id)?; + let (_, res) = values.iter().nth(self.index)?; + (record_id.clone(), res.clone()) + } + GetFilter::Key { namespace, ref key } => records + .iter() + .filter(|(k, _)| k.key() == key && k.namespace() == namespace) + .filter_map(|(key, value)| { + value + .last_key_value() + .map(|(_, v)| (key.clone(), v.clone())) + }) + .nth(self.index)?, + GetFilter::Prefix { + namespace, + ref prefix, + } => records + .iter() + .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == namespace) + .filter_map(|(key, value)| { + value + .last_key_value() + .map(|(_, v)| (key.clone(), v.clone())) + }) + .nth(self.index)?, + }; + self.index += 1; + Some(Ok(res)) + } +} + +#[derive(Debug)] +pub struct GetAllIter<'a> { + records: ReplicaRecords<'a>, + filter: GetFilter, + /// Current iteration index. + index: usize, +} + +impl<'a> Iterator for GetAllIter<'a> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + let records = self.records.get(&self.filter.namespace())?; + let res = match self.filter { + GetFilter::All { namespace } => records + .iter() + .filter(|(k, _)| k.namespace() == namespace) + .flat_map(|(key, value)| { + value.iter().map(|(_, value)| (key.clone(), value.clone())) + }) + .nth(self.index)?, + GetFilter::KeyAuthor(ref record_id) => { + let values = records.get(record_id)?; + let (_, value) = values.iter().nth(self.index)?; + (record_id.clone(), value.clone()) + } + GetFilter::Key { namespace, ref key } => records + .iter() + .filter(|(k, _)| k.key() == key && k.namespace() == namespace) + .flat_map(|(key, value)| { + value.iter().map(|(_, value)| (key.clone(), value.clone())) + }) + .nth(self.index)?, + GetFilter::Prefix { + namespace, + ref prefix, + } => records + .iter() + .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == namespace) + .flat_map(|(key, value)| { + value.iter().map(|(_, value)| (key.clone(), value.clone())) + }) + .nth(self.index)?, + }; + self.index += 1; + Some(Ok(res)) + } +} + +#[derive(Debug, Clone)] +pub struct ReplicaStoreInstance { + namespace: NamespaceId, + store: Store, +} + +impl ReplicaStoreInstance { + fn new(namespace: NamespaceId, store: Store) -> Self { + ReplicaStoreInstance { namespace, store } + } + + fn with_records(&self, f: F) -> T + where + F: FnOnce(Option<&BTreeMap>>) -> T, + { + let guard = self.store.replica_records.read(); + let value = guard.get(&self.namespace); + f(value) + } + + fn with_records_mut(&self, f: F) -> T + where + F: FnOnce(Option<&mut BTreeMap>>) -> T, + { + let mut guard = self.store.replica_records.write(); + let value = guard.get_mut(&self.namespace); + f(value) + } + + fn with_records_mut_with_default(&self, f: F) -> T + where + F: FnOnce(&mut BTreeMap>) -> T, + { + let mut guard = self.store.replica_records.write(); + let value = guard.entry(self.namespace).or_default(); + f(value) + } + + fn records_iter(&self) -> RecordsIter<'_> { + RecordsIter { + namespace: self.namespace, + replica_records: self.store.replica_records.read(), + i: 0, + } + } +} + +type ReplicaRecords<'a> = RwLockReadGuard< + 'a, + HashMap>>, +>; + +#[derive(Debug)] +struct RecordsIter<'a> { + namespace: NamespaceId, + replica_records: ReplicaRecords<'a>, + i: usize, +} + +impl Iterator for RecordsIter<'_> { + type Item = (RecordIdentifier, BTreeMap); + + fn next(&mut self) -> Option { + let records = self.replica_records.get(&self.namespace)?; + let (key, value) = records.iter().nth(self.i)?; + self.i += 1; + Some((key.clone(), value.clone())) + } +} + +impl crate::ranger::Store for ReplicaStoreInstance { + type Error = Infallible; + + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> Result { + Ok(self.with_records(|records| { + records + .and_then(|r| r.first_key_value().map(|(k, _)| k.clone())) + .unwrap_or_default() + })) + } + + fn get(&self, key: &RecordIdentifier) -> Result, Self::Error> { + Ok(self.with_records(|records| { + records + .and_then(|r| r.get(key)) + .and_then(|values| values.last_key_value()) + .map(|(_, v)| v.clone()) + })) + } + + fn len(&self) -> Result { + Ok(self.with_records(|records| records.map(|v| v.len()).unwrap_or_default())) + } + + fn is_empty(&self) -> Result { + Ok(self.len()? == 0) + } + + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result { + let elements = self.get_range(range.clone(), limit.cloned())?; + let mut fp = Fingerprint::empty(); + for el in elements { + let el = el?; + fp ^= el.0.as_fingerprint(); + } + + Ok(fp) + } + + fn put(&mut self, k: RecordIdentifier, v: SignedEntry) -> Result<(), Self::Error> { + // TODO: propagate error/not insertion? + if v.verify().is_ok() { + let timestamp = v.entry().record().timestamp(); + // TODO: verify timestamp is "reasonable" + + self.with_records_mut_with_default(|records| { + records.entry(k).or_default().insert(timestamp, v); + }); + } + Ok(()) + } + + type RangeIterator<'a> = RangeIterator<'a>; + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result, Self::Error> { + Ok(RangeIterator { + iter: self.records_iter(), + range: Some(range), + limit, + }) + } + + fn remove(&mut self, key: &RecordIdentifier) -> Result, Self::Error> { + let res = self.with_records_mut(|records| { + records + .and_then(|records| records.remove(key).map(|v| v.into_values().collect())) + .unwrap_or_default() + }); + Ok(res) + } + + type AllIterator<'a> = RangeIterator<'a>; + + fn all(&self) -> Result, Self::Error> { + Ok(RangeIterator { + iter: self.records_iter(), + range: None, + limit: None, + }) + } +} + +#[derive(Debug)] +pub struct RangeIterator<'a> { + iter: RecordsIter<'a>, + range: Option>, + limit: Option>, +} + +impl RangeIterator<'_> { + fn matches(&self, x: &RecordIdentifier) -> bool { + let range = self.range.as_ref().map(|r| x.contains(r)).unwrap_or(true); + let limit = self.limit.as_ref().map(|r| x.contains(r)).unwrap_or(true); + range && limit + } +} + +impl Iterator for RangeIterator<'_> { + type Item = Result<(RecordIdentifier, SignedEntry), Infallible>; + + fn next(&mut self) -> Option { + let mut next = self.iter.next()?; + loop { + if self.matches(&next.0) { + let (k, mut values) = next; + let (_, v) = values.pop_last()?; + return Some(Ok((k, v))); + } + + next = self.iter.next()?; + } + } +} diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs new file mode 100644 index 0000000000..3cc7767df9 --- /dev/null +++ b/iroh-sync/src/sync.rs @@ -0,0 +1,943 @@ +// Names and concepts are roughly based on Willows design at the moment: +// +// https://hackmd.io/DTtck8QOQm6tZaQBBtTf7w +// +// This is going to change! + +use std::{ + cmp::Ordering, + fmt::{Debug, Display}, + str::FromStr, + sync::Arc, + time::SystemTime, +}; + +use parking_lot::RwLock; + +use ed25519_dalek::{Signature, SignatureError, Signer, SigningKey, VerifyingKey}; +use iroh_bytes::Hash; +use rand_core::CryptoRngCore; +use serde::{Deserialize, Serialize}; + +use crate::ranger::{self, AsFingerprint, Fingerprint, Peer, RangeKey}; + +pub type ProtocolMessage = crate::ranger::Message; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Author { + priv_key: SigningKey, +} + +impl Display for Author { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Author({})", hex::encode(self.priv_key.to_bytes())) + } +} + +impl Author { + pub fn new(rng: &mut R) -> Self { + let priv_key = SigningKey::generate(rng); + + Author { priv_key } + } + + pub fn from_bytes(bytes: &[u8; 32]) -> Self { + SigningKey::from_bytes(bytes).into() + } + + /// Returns the Author byte representation. + pub fn to_bytes(&self) -> [u8; 32] { + self.priv_key.to_bytes() + } + + /// Returns the AuthorId byte representation. + pub fn id_bytes(&self) -> [u8; 32] { + self.priv_key.verifying_key().to_bytes() + } + + pub fn id(&self) -> AuthorId { + AuthorId(self.priv_key.verifying_key()) + } + + pub fn sign(&self, msg: &[u8]) -> Signature { + self.priv_key.sign(msg) + } + + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.priv_key.verify_strict(msg, signature) + } +} + +#[derive(Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)] +pub struct AuthorId(VerifyingKey); + +impl Debug for AuthorId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "AuthorId({})", hex::encode(self.0.as_bytes())) + } +} + +impl Display for AuthorId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", hex::encode(self.0.as_bytes())) + } +} + +impl AuthorId { + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.0.verify_strict(msg, signature) + } + + pub fn as_bytes(&self) -> &[u8; 32] { + self.0.as_bytes() + } + + pub fn from_bytes(bytes: &[u8; 32]) -> anyhow::Result { + Ok(AuthorId(VerifyingKey::from_bytes(bytes)?)) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Namespace { + priv_key: SigningKey, +} + +impl Display for Namespace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Namespace({})", hex::encode(self.priv_key.to_bytes())) + } +} + +impl FromStr for Namespace { + type Err = (); + + fn from_str(s: &str) -> Result { + let priv_key: [u8; 32] = hex::decode(s).map_err(|_| ())?.try_into().map_err(|_| ())?; + let priv_key = SigningKey::from_bytes(&priv_key); + + Ok(Namespace { priv_key }) + } +} + +impl FromStr for Author { + type Err = (); + + fn from_str(s: &str) -> Result { + let priv_key: [u8; 32] = hex::decode(s).map_err(|_| ())?.try_into().map_err(|_| ())?; + let priv_key = SigningKey::from_bytes(&priv_key); + + Ok(Author { priv_key }) + } +} + +impl From for Author { + fn from(priv_key: SigningKey) -> Self { + Self { priv_key } + } +} + +impl From for Namespace { + fn from(priv_key: SigningKey) -> Self { + Self { priv_key } + } +} + +impl Namespace { + pub fn new(rng: &mut R) -> Self { + let priv_key = SigningKey::generate(rng); + + Namespace { priv_key } + } + + pub fn from_bytes(bytes: &[u8; 32]) -> Self { + SigningKey::from_bytes(bytes).into() + } + + /// Returns the Namespace byte representation. + pub fn to_bytes(&self) -> [u8; 32] { + self.priv_key.to_bytes() + } + + /// Returns the NamespaceId byte representation. + pub fn id_bytes(&self) -> [u8; 32] { + self.priv_key.verifying_key().to_bytes() + } + + pub fn id(&self) -> NamespaceId { + NamespaceId(self.priv_key.verifying_key()) + } + + pub fn sign(&self, msg: &[u8]) -> Signature { + self.priv_key.sign(msg) + } + + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.priv_key.verify_strict(msg, signature) + } +} + +#[derive(Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] +pub struct NamespaceId(VerifyingKey); + +impl Display for NamespaceId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "NamespaceId({})", hex::encode(self.0.as_bytes())) + } +} + +impl Debug for NamespaceId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "NamespaceId({})", hex::encode(self.0.as_bytes())) + } +} + +impl NamespaceId { + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.0.verify_strict(msg, signature) + } + + pub fn as_bytes(&self) -> &[u8; 32] { + self.0.as_bytes() + } + + pub fn from_bytes(bytes: &[u8; 32]) -> anyhow::Result { + Ok(NamespaceId(VerifyingKey::from_bytes(bytes)?)) + } +} + +/// TODO: Would potentially nice to pass a `&SignedEntry` reference, however that would make +/// everything `!Send`. +/// TODO: Not sure if the `Sync` requirement will be a problem for implementers. It comes from +/// [parking_lot::RwLock] requiring `Sync`. +pub type OnInsertCallback = Box; + +/// TODO: PeerId is in iroh-net which iroh-sync doesn't depend on. Add iroh-common crate with `PeerId`. +pub type PeerIdBytes = [u8; 32]; + +#[derive(Debug, Clone)] +pub enum InsertOrigin { + Local, + Sync(Option), +} + +#[derive(derive_more::Debug, Clone)] +pub struct Replica> { + inner: Arc>>, + #[debug("on_insert: [Box; {}]", "self.on_insert.len()")] + on_insert: Arc>>, +} + +#[derive(derive_more::Debug)] +struct InnerReplica> { + namespace: Namespace, + peer: Peer, +} + +#[derive(Debug, Serialize, Deserialize)] +struct ReplicaData { + entries: Vec, + namespace: Namespace, +} + +impl> Replica { + // TODO: check that read only replicas are possible + pub fn new(namespace: Namespace, store: S) -> Self { + Replica { + inner: Arc::new(RwLock::new(InnerReplica { + namespace, + peer: Peer::from_store(store), + })), + on_insert: Default::default(), + } + } + + pub fn on_insert(&self, callback: OnInsertCallback) { + let mut on_insert = self.on_insert.write(); + on_insert.push(callback); + } + + /// Inserts a new record at the given key. + pub fn insert( + &self, + key: impl AsRef<[u8]>, + author: &Author, + hash: Hash, + len: u64, + ) -> Result<(), S::Error> { + let mut inner = self.inner.write(); + + let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); + let record = Record::from_hash(hash, len); + + // Store signed entries + let entry = Entry::new(id.clone(), record); + let signed_entry = entry.sign(&inner.namespace, author); + inner.peer.put(id, signed_entry.clone())?; + drop(inner); + let on_insert = self.on_insert.read(); + for cb in &*on_insert { + cb(InsertOrigin::Local, signed_entry.clone()); + } + Ok(()) + } + + /// Hashes the given data and inserts it. + /// This does not store the content, just the record of it. + /// + /// Returns the calculated hash. + pub fn hash_and_insert( + &self, + key: impl AsRef<[u8]>, + author: &Author, + data: impl AsRef<[u8]>, + ) -> Result { + let len = data.as_ref().len() as u64; + let hash = Hash::new(data); + self.insert(key, author, hash, len)?; + Ok(hash) + } + + pub fn id(&self, key: impl AsRef<[u8]>, author: &Author) -> RecordIdentifier { + let inner = self.inner.read(); + RecordIdentifier::new(key, inner.namespace.id(), author.id()) + } + + pub fn insert_remote_entry( + &self, + entry: SignedEntry, + received_from: Option, + ) -> anyhow::Result<()> { + entry.verify()?; + let mut inner = self.inner.write(); + let id = entry.entry.id.clone(); + inner.peer.put(id, entry.clone()).map_err(Into::into)?; + drop(inner); + let on_insert = self.on_insert.read(); + for cb in &*on_insert { + cb(InsertOrigin::Sync(received_from), entry.clone()); + } + Ok(()) + } + + pub fn sync_initial_message( + &self, + ) -> Result, S::Error> { + self.inner.read().peer.initial_message() + } + + pub fn sync_process_message( + &self, + message: crate::ranger::Message, + from_peer: Option, + ) -> Result>, S::Error> { + let reply = self + .inner + .write() + .peer + .process_message(message, |_key, entry| { + let on_insert = self.on_insert.read(); + for cb in &*on_insert { + cb(InsertOrigin::Sync(from_peer), entry.clone()); + } + })?; + + Ok(reply) + } + + pub fn namespace(&self) -> NamespaceId { + self.inner.read().namespace.id() + } +} + +/// A signed entry. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SignedEntry { + signature: EntrySignature, + entry: Entry, +} + +impl SignedEntry { + pub fn new(signature: EntrySignature, entry: Entry) -> Self { + SignedEntry { signature, entry } + } + + pub fn from_entry(entry: Entry, namespace: &Namespace, author: &Author) -> Self { + let signature = EntrySignature::from_entry(&entry, namespace, author); + SignedEntry { signature, entry } + } + + pub fn verify(&self) -> Result<(), SignatureError> { + self.signature + .verify(&self.entry, &self.entry.id.namespace, &self.entry.id.author) + } + + pub fn signature(&self) -> &EntrySignature { + &self.signature + } + + pub fn entry(&self) -> &Entry { + &self.entry + } + + pub fn content_hash(&self) -> &Hash { + self.entry().record().content_hash() + } +} + +/// Signature over an entry. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EntrySignature { + author_signature: Signature, + namespace_signature: Signature, +} + +impl EntrySignature { + pub fn from_entry(entry: &Entry, namespace: &Namespace, author: &Author) -> Self { + // TODO: this should probably include a namespace prefix + // namespace in the cryptographic sense. + let bytes = entry.to_vec(); + let namespace_signature = namespace.sign(&bytes); + let author_signature = author.sign(&bytes); + + EntrySignature { + author_signature, + namespace_signature, + } + } + + pub fn verify( + &self, + entry: &Entry, + namespace: &NamespaceId, + author: &AuthorId, + ) -> Result<(), SignatureError> { + let bytes = entry.to_vec(); + namespace.verify(&bytes, &self.namespace_signature)?; + author.verify(&bytes, &self.author_signature)?; + + Ok(()) + } + + pub fn from_parts(namespace_sig: &[u8; 64], author_sig: &[u8; 64]) -> Self { + let namespace_signature = Signature::from_bytes(namespace_sig); + let author_signature = Signature::from_bytes(author_sig); + + EntrySignature { + author_signature, + namespace_signature, + } + } + + pub fn author_signature(&self) -> &Signature { + &self.author_signature + } + + pub fn namespace_signature(&self) -> &Signature { + &self.namespace_signature + } +} + +/// A single entry in a replica. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Entry { + id: RecordIdentifier, + record: Record, +} + +impl Entry { + pub fn new(id: RecordIdentifier, record: Record) -> Self { + Entry { id, record } + } + + pub fn id(&self) -> &RecordIdentifier { + &self.id + } + + pub fn record(&self) -> &Record { + &self.record + } + + /// Serialize this entry into its canonical byte representation used for signing. + pub fn into_vec(&self, out: &mut Vec) { + self.id.as_bytes(out); + self.record.as_bytes(out); + } + + pub fn to_vec(&self) -> Vec { + let mut out = Vec::new(); + self.into_vec(&mut out); + out + } + + pub fn sign(self, namespace: &Namespace, author: &Author) -> SignedEntry { + SignedEntry::from_entry(self, namespace, author) + } +} + +/// The indentifier of a record. +#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct RecordIdentifier { + /// The key of the record. + key: Vec, + /// The namespace this record belongs to. + namespace: NamespaceId, + /// The author that wrote this record. + author: AuthorId, +} + +impl AsFingerprint for RecordIdentifier { + fn as_fingerprint(&self) -> crate::ranger::Fingerprint { + let mut hasher = blake3::Hasher::new(); + hasher.update(self.namespace.as_bytes()); + hasher.update(self.author.as_bytes()); + hasher.update(&self.key); + Fingerprint(hasher.finalize().into()) + } +} + +impl PartialOrd for NamespaceId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for NamespaceId { + fn cmp(&self, other: &Self) -> Ordering { + self.0.as_bytes().cmp(other.0.as_bytes()) + } +} + +impl PartialOrd for AuthorId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for AuthorId { + fn cmp(&self, other: &Self) -> Ordering { + self.0.as_bytes().cmp(other.0.as_bytes()) + } +} + +impl RangeKey for RecordIdentifier { + fn contains(&self, range: &crate::ranger::Range) -> bool { + use crate::ranger::contains; + + let key_range = range.clone().map(|x, y| (x.key, y.key)); + let namespace_range = range.clone().map(|x, y| (x.namespace, y.namespace)); + let author_range = range.clone().map(|x, y| (x.author, y.author)); + + contains(&self.key, &key_range) + && contains(&self.namespace, &namespace_range) + && contains(&self.author, &author_range) + } +} + +impl RecordIdentifier { + pub fn new(key: impl AsRef<[u8]>, namespace: NamespaceId, author: AuthorId) -> Self { + RecordIdentifier { + key: key.as_ref().to_vec(), + namespace, + author, + } + } + + pub fn from_parts(key: &[u8], namespace: &[u8; 32], author: &[u8; 32]) -> anyhow::Result { + Ok(RecordIdentifier { + key: key.to_vec(), + namespace: NamespaceId::from_bytes(namespace)?, + author: AuthorId::from_bytes(author)?, + }) + } + + pub fn as_bytes(&self, out: &mut Vec) { + out.extend_from_slice(self.namespace.as_bytes()); + out.extend_from_slice(self.author.as_bytes()); + out.extend_from_slice(&self.key); + } + + pub fn key(&self) -> &[u8] { + &self.key + } + + pub fn namespace(&self) -> NamespaceId { + self.namespace + } + + pub fn namespace_bytes(&self) -> &[u8; 32] { + self.namespace.as_bytes() + } + + pub fn author(&self) -> AuthorId { + self.author + } + + pub fn author_bytes(&self) -> &[u8; 32] { + self.author.as_bytes() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Record { + /// Record creation timestamp. Counted as micros since the Unix epoch. + timestamp: u64, + /// Length of the data referenced by `hash`. + len: u64, + hash: Hash, +} + +impl Record { + pub fn new(timestamp: u64, len: u64, hash: Hash) -> Self { + Record { + timestamp, + len, + hash, + } + } + + pub fn timestamp(&self) -> u64 { + self.timestamp + } + + pub fn content_len(&self) -> u64 { + self.len + } + + pub fn content_hash(&self) -> &Hash { + &self.hash + } + + pub fn from_hash(hash: Hash, len: u64) -> Self { + let timestamp = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("time drift") + .as_micros() as u64; + Self::new(timestamp, len, hash) + } + + // TODO: remove + pub fn from_data(data: impl AsRef<[u8]>, namespace: NamespaceId) -> Self { + // Salted hash + // TODO: do we actually want this? + // TODO: this should probably use a namespace prefix if used + let mut hasher = blake3::Hasher::new(); + hasher.update(namespace.as_bytes()); + hasher.update(data.as_ref()); + let hash = hasher.finalize(); + Self::from_hash(hash.into(), data.as_ref().len() as u64) + } + + pub fn as_bytes(&self, out: &mut Vec) { + out.extend_from_slice(&self.timestamp.to_be_bytes()); + out.extend_from_slice(&self.len.to_be_bytes()); + out.extend_from_slice(self.hash.as_ref()); + } +} + +#[cfg(test)] +mod tests { + use anyhow::Result; + + use crate::{ranger::Range, store}; + + use super::*; + + #[test] + fn test_basics_memory() -> Result<()> { + let store = store::memory::Store::default(); + test_basics(store)?; + + Ok(()) + } + + #[cfg(feature = "fs-store")] + #[test] + fn test_basics_fs() -> Result<()> { + let dbfile = tempfile::NamedTempFile::new()?; + let store = store::fs::Store::new(dbfile.path())?; + test_basics(store)?; + Ok(()) + } + + fn test_basics(store: S) -> Result<()> { + let mut rng = rand::thread_rng(); + let alice = Author::new(&mut rng); + let bob = Author::new(&mut rng); + let myspace = Namespace::new(&mut rng); + + let record_id = RecordIdentifier::new("/my/key", myspace.id(), alice.id()); + let record = Record::from_data(b"this is my cool data", myspace.id()); + let entry = Entry::new(record_id, record); + let signed_entry = entry.sign(&myspace, &alice); + signed_entry.verify().expect("failed to verify"); + + let my_replica = store.new_replica(myspace)?; + for i in 0..10 { + my_replica + .hash_and_insert(format!("/{i}"), &alice, format!("{i}: hello from alice")) + .map_err(Into::into)?; + } + + for i in 0..10 { + let res = store + .get_latest_by_key_and_author(my_replica.namespace(), alice.id(), format!("/{i}"))? + .unwrap(); + let len = format!("{i}: hello from alice").as_bytes().len() as u64; + assert_eq!(res.entry().record().content_len(), len); + res.verify()?; + } + + // Test multiple records for the same key + my_replica + .hash_and_insert("/cool/path", &alice, "round 1") + .map_err(Into::into)?; + let _entry = store + .get_latest_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? + .unwrap(); + // Second + my_replica + .hash_and_insert("/cool/path", &alice, "round 2") + .map_err(Into::into)?; + let _entry = store + .get_latest_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? + .unwrap(); + + // Get All by author + let entries: Vec<_> = store + .get_all_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 2); + + // Get All by key + let entries: Vec<_> = store + .get_all_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 2); + + // Get latest by key + let entries: Vec<_> = store + .get_latest_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 1); + + // Get latest by prefix + let entries: Vec<_> = store + .get_latest_by_prefix(my_replica.namespace(), b"/cool")? + .collect::>()?; + assert_eq!(entries.len(), 1); + + // Get All + let entries: Vec<_> = store + .get_all(my_replica.namespace())? + .collect::>()?; + assert_eq!(entries.len(), 12); + + // Get All latest + let entries: Vec<_> = store + .get_latest(my_replica.namespace())? + .collect::>()?; + assert_eq!(entries.len(), 11); + + // insert record from different author + let _entry = my_replica + .hash_and_insert("/cool/path", &bob, "bob round 1") + .map_err(Into::into)?; + + // Get All by author + let entries: Vec<_> = store + .get_all_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 2); + + let entries: Vec<_> = store + .get_all_by_key_and_author(my_replica.namespace(), bob.id(), "/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 1); + + // Get All by key + let entries: Vec<_> = store + .get_all_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 3); + + // Get latest by key + let entries: Vec<_> = store + .get_latest_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; + assert_eq!(entries.len(), 2); + + // Get latest by prefix + let entries: Vec<_> = store + .get_latest_by_prefix(my_replica.namespace(), b"/cool")? + .collect::>()?; + assert_eq!(entries.len(), 2); + + // Get all by prefix + let entries: Vec<_> = store + .get_all_by_prefix(my_replica.namespace(), b"/cool")? + .collect::>()?; + assert_eq!(entries.len(), 3); + + // Get All + let entries: Vec<_> = store + .get_all(my_replica.namespace())? + .collect::>()?; + assert_eq!(entries.len(), 13); + + // Get All latest + let entries: Vec<_> = store + .get_latest(my_replica.namespace())? + .collect::>()?; + assert_eq!(entries.len(), 12); + + Ok(()) + } + + #[test] + fn test_multikey() { + let mut rng = rand::thread_rng(); + + let k = vec!["a", "c", "z"]; + + let mut n: Vec<_> = (0..3).map(|_| Namespace::new(&mut rng)).collect(); + n.sort_by_key(|n| n.id()); + + let mut a: Vec<_> = (0..3).map(|_| Author::new(&mut rng)).collect(); + a.sort_by_key(|a| a.id()); + + // Just key + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[1], n[0].id(), a[0].id()); + let ri2 = RecordIdentifier::new(k[2], n[0].id(), a[0].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + + // Just namespace + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[0], n[1].id(), a[0].id()); + let ri2 = RecordIdentifier::new(k[0], n[2].id(), a[0].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + + // Just author + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[0], n[0].id(), a[1].id()); + let ri2 = RecordIdentifier::new(k[0], n[0].id(), a[2].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + + // Just key and namespace + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[1], n[1].id(), a[0].id()); + let ri2 = RecordIdentifier::new(k[2], n[2].id(), a[0].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + } + + #[test] + fn test_replica_sync_memory() -> Result<()> { + let alice_store = store::memory::Store::default(); + let bob_store = store::memory::Store::default(); + + test_replica_sync(alice_store, bob_store)?; + Ok(()) + } + + #[cfg(feature = "fs-store")] + #[test] + fn test_replica_sync_fs() -> Result<()> { + let alice_dbfile = tempfile::NamedTempFile::new()?; + let alice_store = store::fs::Store::new(alice_dbfile.path())?; + let bob_dbfile = tempfile::NamedTempFile::new()?; + let bob_store = store::fs::Store::new(bob_dbfile.path())?; + test_replica_sync(alice_store, bob_store)?; + + Ok(()) + } + + fn test_replica_sync(alice_store: S, bob_store: S) -> Result<()> { + let alice_set = ["ape", "eel", "fox", "gnu"]; + let bob_set = ["bee", "cat", "doe", "eel", "fox", "hog"]; + + let mut rng = rand::thread_rng(); + let author = Author::new(&mut rng); + let myspace = Namespace::new(&mut rng); + let alice = alice_store.new_replica(myspace.clone())?; + for el in &alice_set { + alice + .hash_and_insert(el, &author, el.as_bytes()) + .map_err(Into::into)?; + } + + let bob = bob_store.new_replica(myspace)?; + for el in &bob_set { + bob.hash_and_insert(el, &author, el.as_bytes()) + .map_err(Into::into)?; + } + + sync( + &author, + &alice, + &alice_store, + &bob, + &bob_store, + &alice_set, + &bob_set, + )?; + Ok(()) + } + + fn sync( + author: &Author, + alice: &Replica, + alice_store: &S, + bob: &Replica, + bob_store: &S, + alice_set: &[&str], + bob_set: &[&str], + ) -> Result<()> { + // Sync alice - bob + let mut next_to_bob = Some(alice.sync_initial_message().map_err(Into::into)?); + let mut rounds = 0; + while let Some(msg) = next_to_bob.take() { + assert!(rounds < 100, "too many rounds"); + rounds += 1; + println!("round {}", rounds); + if let Some(msg) = bob.sync_process_message(msg, None).map_err(Into::into)? { + next_to_bob = alice.sync_process_message(msg, None).map_err(Into::into)?; + } + } + + // Check result + for el in alice_set { + alice_store.get_latest_by_key_and_author(alice.namespace(), author.id(), el)?; + bob_store.get_latest_by_key_and_author(bob.namespace(), author.id(), el)?; + } + + for el in bob_set { + alice_store.get_latest_by_key_and_author(alice.namespace(), author.id(), el)?; + bob_store.get_latest_by_key_and_author(bob.namespace(), author.id(), el)?; + } + Ok(()) + } +} diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 7e8ec2def8..afa611e111 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -26,10 +26,13 @@ iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } iroh-metrics = { version = "0.5.0", path = "../iroh-metrics", optional = true } num_cpus = { version = "1.15.0" } portable-atomic = "1" +iroh-sync = { version = "0.5.1", path = "../iroh-sync" } +iroh-gossip = { version = "0.5.1", path = "../iroh-gossip" } postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.6", default-features = false, features = ["flume-transport"] } quinn = "0.10" range-collections = { version = "0.4.0" } +rand = "0.8" serde = { version = "1", features = ["derive"] } thiserror = "1" tokio = { version = "1", features = ["io-util", "rt"] } @@ -50,22 +53,29 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = tr data-encoding = "2.4.0" url = { version = "2.4", features = ["serde"] } +# Examples +once_cell = { version = "1.18.0", optional = true } +ed25519-dalek = { version = "=2.0.0-rc.3", features = ["serde", "rand_core"], optional = true } +shell-words = { version = "1.1.0", optional = true } +shellexpand = { version = "3.1.0", optional = true } +rustyline = { version = "12.0.0", optional = true } [features] -default = ["cli", "metrics"] +default = ["cli", "metrics", "sync"] +sync = ["metrics", "flat-db", "iroh-sync/fs-store"] cli = ["clap", "config", "console", "dirs-next", "indicatif", "multibase", "quic-rpc/quinn-transport", "tempfile", "tokio/rt-multi-thread", "tracing-subscriber"] metrics = ["iroh-metrics", "flat-db", "mem-db", "iroh-collection"] flat-db = [] mem-db = [] iroh-collection = [] test = [] +example-sync = ["cli", "ed25519-dalek", "once_cell", "shell-words", "shellexpand", "sync", "rustyline"] [dev-dependencies] anyhow = { version = "1", features = ["backtrace"] } bytes = "1" duct = "0.13.6" nix = "0.26.2" -rand = "0.8" regex = { version = "1.7.1", features = ["std"] } testdir = "0.8" tokio = { version = "1", features = ["macros", "io-util", "rt"] } @@ -84,6 +94,14 @@ required-features = ["cli"] name = "collection" required-features = ["mem-db", "iroh-collection"] +[[example]] +name = "dump-blob-stream" +required-features = ["mem-db", "iroh-collection"] + [[example]] name = "hello-world" required-features = ["mem-db"] + +[[example]] +name = "sync" +required-features = ["example-sync"] diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs new file mode 100644 index 0000000000..d27076a584 --- /dev/null +++ b/iroh/examples/sync.rs @@ -0,0 +1,997 @@ +//! Live edit a p2p document +//! +//! By default a new peer id is created when starting the example. To reuse your identity, +//! set the `--private-key` CLI flag with the private key printed on a previous invocation. +//! +//! You can use this with a local DERP server. To do so, run +//! `cargo run --bin derper -- --dev` +//! and then set the `-d http://localhost:3340` flag on this example. + +use std::{ + collections::HashSet, fmt, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc, + time::Instant, +}; + +use anyhow::{anyhow, bail}; +use clap::{CommandFactory, FromArgMatches, Parser}; +use ed25519_dalek::SigningKey; +use indicatif::HumanBytes; +use iroh::sync::{ + BlobStore, Doc as SyncDoc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN, +}; +use iroh_bytes::util::runtime; +use iroh_gossip::{ + net::{Gossip, GOSSIP_ALPN}, + proto::TopicId, +}; +use iroh_metrics::{ + core::{Counter, Metric}, + struct_iterable::Iterable, +}; +use iroh_net::{ + defaults::default_derp_map, derp::DerpMap, magic_endpoint::get_alpn, tls::Keypair, + MagicEndpoint, +}; +use iroh_sync::{ + store::{self, Store as _}, + sync::{Author, Namespace, SignedEntry}, +}; +use once_cell::sync::OnceCell; +use serde::{Deserialize, Serialize}; +use tokio::{ + io::AsyncWriteExt, + sync::{mpsc, oneshot}, + task::JoinHandle, +}; +use tracing::warn; +use tracing_subscriber::{EnvFilter, Registry}; +use url::Url; + +use iroh_bytes_handlers::IrohBytesHandlers; + +const MAX_DISPLAY_CONTENT_LEN: u64 = 1024 * 1024; + +type Doc = SyncDoc; + +#[derive(Parser, Debug)] +struct Args { + /// Private key to derive our peer id from + #[clap(long)] + private_key: Option, + /// Path to a data directory where blobs will be persisted + #[clap(short, long)] + storage_path: Option, + /// Set a custom DERP server. By default, the DERP server hosted by n0 will be used. + #[clap(short, long)] + derp: Option, + /// Disable DERP completeley + #[clap(long)] + no_derp: bool, + /// Set your nickname + #[clap(short, long)] + name: Option, + /// Set the bind port for our socket. By default, a random port will be used. + #[clap(short, long, default_value = "0")] + bind_port: u16, + /// Bind address on which to serve Prometheus metrics + #[clap(long)] + metrics_addr: Option, + #[clap(subcommand)] + command: Command, +} + +#[derive(Parser, Debug)] +enum Command { + Open { doc_name: String }, + Join { ticket: String }, +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + let args = Args::parse(); + run(args).await +} + +pub fn init_metrics_collection( + metrics_addr: Option, +) -> Option> { + iroh_metrics::core::Core::init(|reg, metrics| { + metrics.insert(iroh::sync::metrics::Metrics::new(reg)); + metrics.insert(iroh_gossip::metrics::Metrics::new(reg)); + }); + + // doesn't start the server if the address is None + if let Some(metrics_addr) = metrics_addr { + return Some(tokio::spawn(async move { + if let Err(e) = iroh_metrics::metrics::start_metrics_server(metrics_addr).await { + eprintln!("Failed to start metrics server: {e}"); + } + })); + } + tracing::info!("Metrics server not started, no address provided"); + None +} + +async fn run(args: Args) -> anyhow::Result<()> { + // setup logging + let log_filter = init_logging(); + + let metrics_fut = init_metrics_collection(args.metrics_addr); + + // parse or generate our keypair + let keypair = match args.private_key { + None => Keypair::generate(), + Some(key) => parse_keypair(&key)?, + }; + println!("> our private key: {}", fmt_secret(&keypair)); + + // configure our derp map + let derp_map = match (args.no_derp, args.derp) { + (false, None) => Some(default_derp_map()), + (false, Some(url)) => Some(DerpMap::from_url(url, 0)), + (true, None) => None, + (true, Some(_)) => bail!("You cannot set --no-derp and --derp at the same time"), + }; + println!("> using DERP servers: {}", fmt_derp_map(&derp_map)); + + // build our magic endpoint and the gossip protocol + let (endpoint, gossip, initial_endpoints) = { + // init a cell that will hold our gossip handle to be used in endpoint callbacks + let gossip_cell: OnceCell = OnceCell::new(); + // init a channel that will emit once the initial endpoints of our local node are discovered + let (initial_endpoints_tx, mut initial_endpoints_rx) = mpsc::channel(1); + // build the magic endpoint + let endpoint = MagicEndpoint::builder() + .keypair(keypair.clone()) + .alpns(vec![ + GOSSIP_ALPN.to_vec(), + SYNC_ALPN.to_vec(), + iroh_bytes::protocol::ALPN.to_vec(), + ]) + .derp_map(derp_map) + .on_endpoints({ + let gossip_cell = gossip_cell.clone(); + Box::new(move |endpoints| { + // send our updated endpoints to the gossip protocol to be sent as PeerData to peers + if let Some(gossip) = gossip_cell.get() { + gossip.update_endpoints(endpoints).ok(); + } + // trigger oneshot on the first endpoint update + initial_endpoints_tx.try_send(endpoints.to_vec()).ok(); + }) + }) + .bind(args.bind_port) + .await?; + + // initialize the gossip protocol + let gossip = Gossip::from_endpoint(endpoint.clone(), Default::default()); + // insert into the gossip cell to be used in the endpoint callbacks above + gossip_cell.set(gossip.clone()).unwrap(); + + // wait for a first endpoint update so that we know about at least one of our addrs + let initial_endpoints = initial_endpoints_rx.recv().await.unwrap(); + // pass our initial endpoints to the gossip protocol so that they can be announced to peers + gossip.update_endpoints(&initial_endpoints)?; + (endpoint, gossip, initial_endpoints) + }; + println!("> our peer id: {}", endpoint.peer_id()); + + let (topic, peers) = match &args.command { + Command::Open { doc_name } => { + let topic: TopicId = iroh_bytes::Hash::new(doc_name.as_bytes()).into(); + println!( + "> opening document {doc_name} as namespace {} and waiting for peers to join us...", + fmt_hash(topic.as_bytes()) + ); + (topic, vec![]) + } + Command::Join { ticket } => { + let Ticket { topic, peers } = Ticket::from_str(ticket)?; + println!("> joining topic {topic} and connecting to {peers:?}",); + (topic, peers) + } + }; + + let our_ticket = { + // add our local endpoints to the ticket and print it for others to join + let addrs = initial_endpoints.iter().map(|ep| ep.addr).collect(); + let mut peers = peers.clone(); + peers.push(PeerSource { + peer_id: endpoint.peer_id(), + addrs, + derp_region: endpoint.my_derp().await, + }); + Ticket { peers, topic } + }; + println!("> ticket to join us: {our_ticket}"); + + // unwrap our storage path or default to temp + let storage_path = args.storage_path.unwrap_or_else(|| { + let name = format!("iroh-sync-{}", endpoint.peer_id()); + let dir = std::env::temp_dir().join(name); + if !dir.exists() { + std::fs::create_dir(&dir).expect("failed to create temp dir"); + } + dir + }); + println!("> storage directory: {storage_path:?}"); + + // create a runtime that can spawn tasks on a local-thread executors (to support !Send futures) + let rt = iroh_bytes::util::runtime::Handle::from_currrent(num_cpus::get())?; + + // create a blob store (with a iroh-bytes database inside) + let blobs = BlobStore::new(rt.clone(), storage_path.join("blobs"), endpoint.clone()).await?; + + // create a doc store for the iroh-sync docs + let author = Author::from(keypair.secret().clone()); + let docs_path = storage_path.join("docs"); + tokio::fs::create_dir_all(&docs_path).await?; + let docs = DocStore::new(blobs.clone(), author, docs_path)?; + + // create the live syncer + let live_sync = LiveSync::::spawn(endpoint.clone(), gossip.clone()); + + // construct the state that is passed to the endpoint loop and from there cloned + // into to the connection handler task for incoming connections. + let state = Arc::new(State { + gossip: gossip.clone(), + docs: docs.clone(), + bytes: IrohBytesHandlers::new(rt.clone(), blobs.db().clone()), + }); + + // spawn our endpoint loop that forwards incoming connections + rt.main().spawn(endpoint_loop(endpoint.clone(), state)); + + // open our document and add to the live syncer + let namespace = Namespace::from_bytes(topic.as_bytes()); + println!("> opening doc {}", fmt_hash(namespace.id().as_bytes())); + let doc: Doc = docs.create_or_open(namespace, DownloadMode::Always).await?; + live_sync.add(doc.replica().clone(), peers.clone()).await?; + + // spawn an repl thread that reads stdin and parses each line as a `Cmd` command + let (cmd_tx, mut cmd_rx) = mpsc::channel(1); + std::thread::spawn(move || repl_loop(cmd_tx).expect("input loop crashed")); + // process commands in a loop + println!("> ready to accept commands"); + println!("> type `help` for a list of commands"); + + let current_watch: Arc>> = + Arc::new(std::sync::Mutex::new(None)); + let watch = current_watch.clone(); + doc.on_insert(Box::new(move |_origin, entry| { + let matcher = watch.lock().unwrap(); + if let Some(matcher) = &*matcher { + let key = entry.entry().id().key(); + if key.starts_with(matcher.as_bytes()) { + println!("change: {}", fmt_entry(&entry)); + } + } + })); + + loop { + // wait for a command from the input repl thread + let Some((cmd, to_repl_tx)) = cmd_rx.recv().await else { + break; + }; + // exit command: break early + if let Cmd::Exit = cmd { + to_repl_tx.send(ToRepl::Exit).ok(); + break; + } + + // handle the command, but select against Ctrl-C signal so that commands can be aborted + tokio::select! { + biased; + _ = tokio::signal::ctrl_c() => { + println!("> aborted"); + } + res = handle_command(cmd, &rt, docs.store(), &doc, &our_ticket, &log_filter, ¤t_watch) => if let Err(err) = res { + println!("> error: {err}"); + }, + }; + // notify to the repl that we want to get the next command + to_repl_tx.send(ToRepl::Continue).ok(); + } + + // exit: cancel the sync and store blob database and document + if let Err(err) = live_sync.cancel().await { + println!("> syncer closed with error: {err:?}"); + } + println!("> persisting document and blob database at {storage_path:?}"); + blobs.save().await?; + + if let Some(metrics_fut) = metrics_fut { + metrics_fut.abort(); + drop(metrics_fut); + } + + Ok(()) +} + +async fn handle_command( + cmd: Cmd, + rt: &runtime::Handle, + store: &store::fs::Store, + doc: &Doc, + ticket: &Ticket, + log_filter: &LogLevelReload, + current_watch: &Arc>>, +) -> anyhow::Result<()> { + match cmd { + Cmd::Set { key, value } => { + doc.insert_bytes(&key, value.into_bytes().into()).await?; + } + Cmd::Get { + key, + print_content, + prefix, + } => { + let entries = if prefix { + store.get_all_by_prefix(doc.replica().namespace(), key.as_bytes())? + } else { + store.get_all_by_key(doc.replica().namespace(), key.as_bytes())? + }; + for entry in entries { + let (_id, entry) = entry?; + println!("{}", fmt_entry(&entry)); + if print_content { + println!("{}", fmt_content(doc, &entry).await); + } + } + } + Cmd::Watch { key } => { + println!("watching key: '{key}'"); + current_watch.lock().unwrap().replace(key); + } + Cmd::WatchCancel => match current_watch.lock().unwrap().take() { + Some(key) => { + println!("canceled watching key: '{key}'"); + } + None => { + println!("no watch active"); + } + }, + Cmd::Ls { prefix } => { + let entries = match prefix { + None => store.get_all(doc.replica().namespace())?, + Some(prefix) => { + store.get_all_by_prefix(doc.replica().namespace(), prefix.as_bytes())? + } + }; + let mut count = 0; + for entry in entries { + let (_id, entry) = entry?; + count += 1; + println!("{}", fmt_entry(&entry),); + } + println!("> {} entries", count); + } + Cmd::Ticket => { + println!("Ticket: {ticket}"); + } + Cmd::Log { directive } => { + let next_filter = EnvFilter::from_str(&directive)?; + log_filter.modify(|layer| *layer = next_filter)?; + } + Cmd::Stats => get_stats(), + Cmd::Fs(cmd) => handle_fs_command(cmd, store, doc).await?, + Cmd::Hammer { + prefix, + threads, + count, + size, + mode, + } => { + println!( + "> Hammering with prefix \"{prefix}\" for {threads} x {count} messages of size {size} bytes in {mode} mode", + mode = format!("{mode:?}").to_lowercase() + ); + let start = Instant::now(); + let mut handles: Vec>> = Vec::new(); + match mode { + HammerMode::Set => { + let mut bytes = vec![0; size]; + // TODO: Add a flag to fill content differently per entry to be able to + // test downloading too + bytes.fill(97); + for t in 0..threads { + let prefix = prefix.clone(); + let doc = doc.clone(); + let bytes = bytes.clone(); + let handle = rt.main().spawn(async move { + for i in 0..count { + let value = String::from_utf8(bytes.clone()).unwrap(); + let key = format!("{}/{}/{}", prefix, t, i); + doc.insert_bytes(key, value.into_bytes().into()).await?; + } + Ok(count) + }); + handles.push(handle); + } + } + HammerMode::Get => { + for t in 0..threads { + let prefix = prefix.clone(); + let doc = doc.clone(); + let store = store.clone(); + let handle = rt.main().spawn(async move { + let mut read = 0; + for i in 0..count { + let key = format!("{}/{}/{}", prefix, t, i); + let entries = store + .get_all_by_key(doc.replica().namespace(), key.as_bytes())?; + for entry in entries { + let (_id, entry) = entry?; + let _content = fmt_content_simple(&doc, &entry); + read += 1; + } + } + Ok(read) + }); + handles.push(handle); + } + } + } + + let mut total_count = 0; + for result in futures::future::join_all(handles).await { + // Check that no errors ocurred and count rows inserted/read + total_count += result??; + } + + let diff = start.elapsed().as_secs_f64(); + println!( + "> Hammering done in {diff:.2}s for {total_count} messages with total of {size}", + size = HumanBytes(total_count as u64 * size as u64), + ); + } + Cmd::Exit => {} + } + Ok(()) +} + +async fn handle_fs_command(cmd: FsCmd, store: &store::fs::Store, doc: &Doc) -> anyhow::Result<()> { + match cmd { + FsCmd::ImportFile { file_path, key } => { + let file_path = canonicalize_path(&file_path)?.canonicalize()?; + let (hash, len) = doc.insert_from_file(&key, &file_path).await?; + println!( + "> imported {file_path:?}: {} ({})", + fmt_hash(hash), + HumanBytes(len) + ); + } + FsCmd::ImportDir { + dir_path, + mut key_prefix, + } => { + if key_prefix.ends_with('/') { + key_prefix.pop(); + } + let root = canonicalize_path(&dir_path)?.canonicalize()?; + let files = walkdir::WalkDir::new(&root).into_iter(); + // TODO: parallelize + for file in files { + let file = file?; + if file.file_type().is_file() { + let relative = file.path().strip_prefix(&root)?.to_string_lossy(); + if relative.is_empty() { + warn!("invalid file path: {:?}", file.path()); + continue; + } + let key = format!("{key_prefix}/{relative}"); + let (hash, len) = doc.insert_from_file(key, file.path()).await?; + println!( + "> imported {relative}: {} ({})", + fmt_hash(hash), + HumanBytes(len) + ); + } + } + } + FsCmd::ExportDir { + mut key_prefix, + dir_path, + } => { + if !key_prefix.ends_with('/') { + key_prefix.push('/'); + } + let root = canonicalize_path(&dir_path)?; + println!("> exporting {key_prefix} to {root:?}"); + let entries = + store.get_latest_by_prefix(doc.replica().namespace(), key_prefix.as_bytes())?; + let mut checked_dirs = HashSet::new(); + for entry in entries { + let (id, entry) = entry?; + let key = id.key(); + let relative = String::from_utf8(key[key_prefix.len()..].to_vec())?; + let len = entry.entry().record().content_len(); + if let Some(mut reader) = doc.get_content_reader(&entry).await { + let path = root.join(&relative); + let parent = path.parent().unwrap(); + if !checked_dirs.contains(parent) { + tokio::fs::create_dir_all(&parent).await?; + checked_dirs.insert(parent.to_owned()); + } + let mut file = tokio::fs::File::create(&path).await?; + copy(&mut reader, &mut file).await?; + println!( + "> exported {} to {path:?} ({})", + fmt_hash(entry.content_hash()), + HumanBytes(len) + ); + } + } + } + FsCmd::ExportFile { key, file_path } => { + let path = canonicalize_path(&file_path)?; + // TODO: Fix + let entry = store + .get_latest_by_key(doc.replica().namespace(), &key)? + .next(); + if let Some(entry) = entry { + let (_, entry) = entry?; + println!("> exporting {key} to {path:?}"); + let parent = path.parent().ok_or_else(|| anyhow!("Invalid path"))?; + tokio::fs::create_dir_all(&parent).await?; + let mut file = tokio::fs::File::create(&path).await?; + let mut reader = doc + .get_content_reader(&entry) + .await + .ok_or_else(|| anyhow!(format!("content for {key} is not available")))?; + copy(&mut reader, &mut file).await?; + } else { + println!("> key not found, abort"); + } + } + } + + Ok(()) +} + +#[derive(Parser, Debug)] +pub enum Cmd { + /// Set an entry + Set { + /// Key to the entry (parsed as UTF-8 string). + key: String, + /// Content to store for this entry (parsed as UTF-8 string) + value: String, + }, + /// Get entries by key + /// + /// Shows the author, content hash and content length for all entries for this key. + Get { + /// Key to the entry (parsed as UTF-8 string). + key: String, + /// Print the value (but only if it is valid UTF-8 and smaller than 1MB) + #[clap(short = 'c', long)] + print_content: bool, + /// Match the key as prefix, not an exact match. + #[clap(short = 'p', long)] + prefix: bool, + }, + /// List entries. + Ls { + /// Optionally list only entries whose key starts with PREFIX. + prefix: Option, + }, + + /// Import from and export to the local file system. + #[clap(subcommand)] + Fs(FsCmd), + + /// Print the ticket with which other peers can join our document. + Ticket, + /// Change the log level + Log { + /// The log level or log filtering directive + /// + /// Valid log levels are: "trace", "debug", "info", "warn", "error" + /// + /// You can also set one or more filtering directives to enable more fine-grained log + /// filtering. The supported filtering directives and their semantics are documented here: + /// https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives + /// + /// To disable logging completely, set to the empty string (via empty double quotes: ""). + #[clap(verbatim_doc_comment)] + directive: String, + }, + /// Watch for changes. + Watch { + /// The key to watch. + key: String, + }, + /// Cancels any running watch command. + WatchCancel, + /// Show stats about the current session + Stats, + /// Hammer time - stress test with the hammer + Hammer { + /// The hammer mode + #[clap(value_enum)] + mode: HammerMode, + /// The key prefix + prefix: String, + /// The number of threads to use (each thread will create it's own replica) + #[clap(long, short, default_value = "2")] + threads: usize, + /// The number of entries to create + #[clap(long, short, default_value = "1000")] + count: usize, + /// The size of each entry in Bytes + #[clap(long, short, default_value = "1024")] + size: usize, + }, + /// Quit + Exit, +} + +#[derive(Clone, Debug, clap::ValueEnum)] +pub enum HammerMode { + /// Create entries + Set, + /// Read entries + Get, +} + +#[derive(Parser, Debug)] +pub enum FsCmd { + /// Import a file system directory into the document. + ImportDir { + /// The file system path to import recursively + dir_path: String, + /// The key prefix to apply to the document keys + key_prefix: String, + }, + /// Import a file into the document. + ImportFile { + /// The path to the file + file_path: String, + /// The key in the document + key: String, + }, + /// Export a part of the document into a file system directory + ExportDir { + /// The key prefix to filter on + key_prefix: String, + /// The file system path to export to + dir_path: String, + }, + /// Import a file into the document. + ExportFile { + /// The key in the document + key: String, + /// The path to the file + file_path: String, + }, +} + +impl FromStr for Cmd { + type Err = anyhow::Error; + fn from_str(s: &str) -> Result { + let args = shell_words::split(s)?; + let matches = Cmd::command() + .multicall(true) + .subcommand_required(true) + .try_get_matches_from(args)?; + let cmd = Cmd::from_arg_matches(&matches)?; + Ok(cmd) + } +} + +#[derive(Debug)] +struct State { + gossip: Gossip, + docs: DocStore, + bytes: IrohBytesHandlers, +} + +async fn endpoint_loop(endpoint: MagicEndpoint, state: Arc) -> anyhow::Result<()> { + while let Some(conn) = endpoint.accept().await { + let state = state.clone(); + tokio::spawn(async move { + if let Err(err) = handle_connection(conn, state).await { + println!("> connection closed, reason: {err}"); + } + }); + } + Ok(()) +} + +async fn handle_connection(mut conn: quinn::Connecting, state: Arc) -> anyhow::Result<()> { + let alpn = get_alpn(&mut conn).await?; + println!("> incoming connection with alpn {alpn}"); + match alpn.as_bytes() { + GOSSIP_ALPN => state.gossip.handle_connection(conn.await?).await, + SYNC_ALPN => state.docs.handle_connection(conn).await, + alpn if alpn == iroh_bytes::protocol::ALPN => state.bytes.handle_connection(conn).await, + _ => bail!("ignoring connection: unsupported ALPN protocol"), + } +} + +#[derive(Debug)] +enum ToRepl { + Continue, + Exit, +} + +fn repl_loop(cmd_tx: mpsc::Sender<(Cmd, oneshot::Sender)>) -> anyhow::Result<()> { + use rustyline::{error::ReadlineError, Config, DefaultEditor}; + let mut rl = DefaultEditor::with_config(Config::builder().check_cursor_position(true).build())?; + loop { + // prepare a channel to receive a signal from the main thread when a command completed + let (to_repl_tx, to_repl_rx) = oneshot::channel(); + let readline = rl.readline(">> "); + match readline { + Ok(line) if line.is_empty() => continue, + Ok(line) => { + rl.add_history_entry(line.as_str())?; + match Cmd::from_str(&line) { + Ok(cmd) => cmd_tx.blocking_send((cmd, to_repl_tx))?, + Err(err) => { + println!("{err}"); + continue; + } + }; + } + Err(ReadlineError::Interrupted | ReadlineError::Eof) => { + cmd_tx.blocking_send((Cmd::Exit, to_repl_tx))?; + } + Err(ReadlineError::WindowResized) => continue, + Err(err) => return Err(err.into()), + } + // wait for reply from main thread + match to_repl_rx.blocking_recv()? { + ToRepl::Continue => continue, + ToRepl::Exit => break, + } + } + Ok(()) +} + +fn get_stats() { + let core = iroh_metrics::core::Core::get().expect("Metrics core not initialized"); + println!("# sync"); + let metrics = core + .get_collector::() + .unwrap(); + fmt_metrics(metrics); + println!("# gossip"); + let metrics = core + .get_collector::() + .unwrap(); + fmt_metrics(metrics); +} + +fn fmt_metrics(metrics: &impl Iterable) { + for (name, counter) in metrics.iter() { + if let Some(counter) = counter.downcast_ref::() { + let value = counter.get(); + println!("{name:23} : {value:>6} ({})", counter.description); + } else { + println!("{name:23} : unsupported metric kind"); + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct Ticket { + topic: TopicId, + peers: Vec, +} +impl Ticket { + /// Deserializes from bytes. + fn from_bytes(bytes: &[u8]) -> anyhow::Result { + postcard::from_bytes(bytes).map_err(Into::into) + } + /// Serializes to bytes. + pub fn to_bytes(&self) -> Vec { + postcard::to_stdvec(self).expect("postcard::to_stdvec is infallible") + } +} + +/// Serializes to base32. +impl fmt::Display for Ticket { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let encoded = self.to_bytes(); + let mut text = data_encoding::BASE32_NOPAD.encode(&encoded); + text.make_ascii_lowercase(); + write!(f, "{text}") + } +} + +/// Deserializes from base32. +impl FromStr for Ticket { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let bytes = data_encoding::BASE32_NOPAD.decode(s.to_ascii_uppercase().as_bytes())?; + let slf = Self::from_bytes(&bytes)?; + Ok(slf) + } +} + +type LogLevelReload = tracing_subscriber::reload::Handle; +fn init_logging() -> LogLevelReload { + use tracing_subscriber::{filter, fmt, prelude::*, reload}; + let filter = filter::EnvFilter::from_default_env(); + let (filter, reload_handle) = reload::Layer::new(filter); + tracing_subscriber::registry() + .with(filter) + .with(fmt::Layer::default()) + .init(); + reload_handle +} + +// helpers + +fn fmt_entry(entry: &SignedEntry) -> String { + let id = entry.entry().id(); + let key = std::str::from_utf8(id.key()).unwrap_or(""); + let author = fmt_hash(id.author().as_bytes()); + let hash = entry.entry().record().content_hash(); + let hash = fmt_hash(hash.as_bytes()); + let len = HumanBytes(entry.entry().record().content_len()); + format!("@{author}: {key} = {hash} ({len})",) +} + +async fn fmt_content_simple(_doc: &Doc, entry: &SignedEntry) -> String { + let len = entry.entry().record().content_len(); + format!("<{}>", HumanBytes(len)) +} + +async fn fmt_content(doc: &Doc, entry: &SignedEntry) -> String { + let len = entry.entry().record().content_len(); + if len > MAX_DISPLAY_CONTENT_LEN { + format!("<{}>", HumanBytes(len)) + } else { + match doc.get_content_bytes(entry).await { + None => "".to_string(), + Some(content) => match String::from_utf8(content.into()) { + Ok(str) => str, + Err(_err) => format!("", HumanBytes(len)), + }, + } + } +} +fn fmt_hash(hash: impl AsRef<[u8]>) -> String { + let mut text = data_encoding::BASE32_NOPAD.encode(hash.as_ref()); + text.make_ascii_lowercase(); + format!("{}…{}", &text[..5], &text[(text.len() - 2)..]) +} +fn fmt_secret(keypair: &Keypair) -> String { + let mut text = data_encoding::BASE32_NOPAD.encode(&keypair.secret().to_bytes()); + text.make_ascii_lowercase(); + text +} +fn parse_keypair(secret: &str) -> anyhow::Result { + let bytes: [u8; 32] = data_encoding::BASE32_NOPAD + .decode(secret.to_ascii_uppercase().as_bytes())? + .try_into() + .map_err(|_| anyhow::anyhow!("Invalid secret"))?; + let key = SigningKey::from_bytes(&bytes); + Ok(key.into()) +} +fn fmt_derp_map(derp_map: &Option) -> String { + match derp_map { + None => "None".to_string(), + Some(map) => map + .regions() + .flat_map(|region| region.nodes.iter().map(|node| node.url.to_string())) + .collect::>() + .join(", "), + } +} +fn canonicalize_path(path: &str) -> anyhow::Result { + let path = PathBuf::from(shellexpand::tilde(&path).to_string()); + Ok(path) +} + +/// Copy from a [`iroh_io::AsyncSliceReader`] into a [`tokio::io::AsyncWrite`] +/// +/// TODO: move to iroh-io or iroh-bytes +async fn copy( + mut reader: impl iroh_io::AsyncSliceReader, + mut writer: impl tokio::io::AsyncWrite + Unpin, +) -> anyhow::Result<()> { + // this is the max chunk size. + // will only allocate this much if the resource behind the reader is at least this big. + let chunk_size = 1024 * 16; + let mut pos = 0u64; + loop { + let chunk = reader.read_at(pos, chunk_size).await?; + if chunk.is_empty() { + break; + } + writer.write_all(&chunk).await?; + pos += chunk.len() as u64; + } + Ok(()) +} + +/// handlers for iroh_bytes connections +mod iroh_bytes_handlers { + use std::sync::Arc; + + use bytes::Bytes; + use futures::{future::BoxFuture, FutureExt}; + use iroh_bytes::{ + protocol::{GetRequest, RequestToken}, + provider::{CustomGetHandler, EventSender, RequestAuthorizationHandler}, + }; + + use iroh::{collection::IrohCollectionParser, database::flat::Database}; + + #[derive(Debug, Clone)] + pub struct IrohBytesHandlers { + db: Database, + rt: iroh_bytes::util::runtime::Handle, + event_sender: NoopEventSender, + get_handler: Arc, + auth_handler: Arc, + } + impl IrohBytesHandlers { + pub fn new(rt: iroh_bytes::util::runtime::Handle, db: Database) -> Self { + Self { + db, + rt, + event_sender: NoopEventSender, + get_handler: Arc::new(NoopCustomGetHandler), + auth_handler: Arc::new(NoopRequestAuthorizationHandler), + } + } + pub async fn handle_connection(&self, conn: quinn::Connecting) -> anyhow::Result<()> { + iroh_bytes::provider::handle_connection( + conn, + self.db.clone(), + self.event_sender.clone(), + IrohCollectionParser, + self.get_handler.clone(), + self.auth_handler.clone(), + self.rt.clone(), + ) + .await; + Ok(()) + } + } + + #[derive(Debug, Clone)] + struct NoopEventSender; + impl EventSender for NoopEventSender { + fn send(&self, _event: iroh_bytes::provider::Event) -> BoxFuture<()> { + async {}.boxed() + } + } + #[derive(Debug)] + struct NoopCustomGetHandler; + impl CustomGetHandler for NoopCustomGetHandler { + fn handle( + &self, + _token: Option, + _request: Bytes, + ) -> BoxFuture<'static, anyhow::Result> { + async move { Err(anyhow::anyhow!("no custom get handler defined")) }.boxed() + } + } + #[derive(Debug)] + struct NoopRequestAuthorizationHandler; + impl RequestAuthorizationHandler for NoopRequestAuthorizationHandler { + fn authorize( + &self, + token: Option, + _request: &iroh_bytes::protocol::Request, + ) -> BoxFuture<'static, anyhow::Result<()>> { + async move { + if let Some(token) = token { + anyhow::bail!( + "no authorization handler defined, but token was provided: {:?}", + token + ); + } + Ok(()) + } + .boxed() + } + } +} diff --git a/iroh/src/database/flat.rs b/iroh/src/database/flat.rs index acad820b5b..31a41f2d18 100644 --- a/iroh/src/database/flat.rs +++ b/iroh/src/database/flat.rs @@ -29,6 +29,8 @@ use crate::util::io::validate_bao; use crate::util::io::BaoValidationError; use crate::util::progress::{Progress, ProgressReader, ProgressReaderUpdate}; +pub mod writable; + /// File name of directory inside `IROH_DATA_DIR` where outboards are stored. const FNAME_OUTBOARDS: &str = "outboards"; diff --git a/iroh/src/database/flat/writable.rs b/iroh/src/database/flat/writable.rs new file mode 100644 index 0000000000..6973d6e4fe --- /dev/null +++ b/iroh/src/database/flat/writable.rs @@ -0,0 +1,206 @@ +#![allow(missing_docs)] +//! Quick-and-dirty writable database +//! +//! I wrote this while diving into iroh-bytes, wildly copying code around. This will be solved much +//! nicer with the upcoming generic writable database branch by @rklaehn. + +use std::{ + collections::HashMap, + io, + path::{Path, PathBuf}, + sync::Arc, +}; + +use anyhow::Context; +use bytes::Bytes; +use iroh_io::{AsyncSliceWriter, File}; +use range_collections::RangeSet2; +use tokio::io::AsyncRead; + +use iroh_bytes::{ + get::fsm, + protocol::{GetRequest, RangeSpecSeq, Request}, + Hash, +}; + +use crate::database::flat::{create_collection, DataSource, Database, DbEntry, FNAME_PATHS}; + +/// A blob database into which new blobs can be inserted. +/// +/// Blobs can be inserted either from bytes or by downloading from open connections to peers. +/// New blobs will be saved as files with a filename based on their hash. +/// +/// TODO: Replace with the generic writable database. +#[derive(Debug, Clone)] +pub struct WritableFileDatabase { + db: Database, + storage: Arc, +} + +impl WritableFileDatabase { + pub async fn new(data_path: PathBuf) -> anyhow::Result { + let storage = Arc::new(StoragePaths::new(data_path).await?); + let db = if storage.db_path.join(FNAME_PATHS).exists() { + Database::load(&storage.db_path).await.with_context(|| { + format!( + "Failed to load iroh database from {}", + storage.db_path.display() + ) + })? + } else { + Database::default() + }; + Ok(Self { db, storage }) + } + + pub fn db(&self) -> &Database { + &self.db + } + + pub async fn save(&self) -> io::Result<()> { + self.db.save(&self.storage.db_path).await + } + + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64)> { + let (hash, size, entry) = self.storage.put_bytes(data).await?; + self.db.union_with(HashMap::from_iter([(hash, entry)])); + Ok((hash, size)) + } + + pub async fn put_reader(&self, data: impl AsyncRead + Unpin) -> anyhow::Result<(Hash, u64)> { + let (hash, size, entry) = self.storage.put_reader(data).await?; + self.db.union_with(HashMap::from_iter([(hash, entry)])); + Ok((hash, size)) + } + + pub async fn put_from_temp_file(&self, temp_path: &PathBuf) -> anyhow::Result<(Hash, u64)> { + let (hash, size, entry) = self.storage.move_to_blobs(temp_path).await?; + self.db.union_with(HashMap::from_iter([(hash, entry)])); + Ok((hash, size)) + } + + pub async fn get_size(&self, hash: &Hash) -> Option { + Some(self.db.get(hash)?.size().await) + } + + pub fn has(&self, hash: &Hash) -> bool { + self.db.to_inner().contains_key(hash) + } + pub async fn download_single( + &self, + conn: quinn::Connection, + hash: Hash, + ) -> anyhow::Result> { + // 1. Download to temp file + let temp_path = { + let temp_path = self.storage.temp_path(); + let request = + Request::Get(GetRequest::new(hash, RangeSpecSeq::new([RangeSet2::all()]))); + let response = fsm::start(conn, request); + let connected = response.next().await?; + + let fsm::ConnectedNext::StartRoot(curr) = connected.next().await? else { + return Ok(None) + }; + let header = curr.next(); + + let path = temp_path.clone(); + let mut data_file = File::create(move || { + std::fs::OpenOptions::new() + .write(true) + .create(true) + .open(path) + }) + .await?; + + let (curr, _size) = header.next().await?; + let _curr = curr.write_all(&mut data_file).await?; + // Flush the data file first, it is the only thing that matters at this point + data_file.sync().await?; + temp_path + }; + + // 2. Insert into database + let (hash, size, entry) = self.storage.move_to_blobs(&temp_path).await?; + let entries = HashMap::from_iter([(hash, entry)]); + self.db.union_with(entries); + Ok(Some((hash, size))) + } +} + +#[derive(Debug)] +pub struct StoragePaths { + blob_path: PathBuf, + temp_path: PathBuf, + db_path: PathBuf, +} + +impl StoragePaths { + pub async fn new(data_path: PathBuf) -> anyhow::Result { + let blob_path = data_path.join("blobs"); + let temp_path = data_path.join("temp"); + let db_path = data_path.join("db"); + tokio::fs::create_dir_all(&blob_path).await?; + tokio::fs::create_dir_all(&temp_path).await?; + tokio::fs::create_dir_all(&db_path).await?; + Ok(Self { + blob_path, + temp_path, + db_path, + }) + } + + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64, DbEntry)> { + let temp_path = self.temp_path(); + tokio::fs::write(&temp_path, &data).await?; + let (hash, size, entry) = self.move_to_blobs(&temp_path).await?; + Ok((hash, size, entry)) + } + + pub async fn put_reader( + &self, + mut reader: impl AsyncRead + Unpin, + ) -> anyhow::Result<(Hash, u64, DbEntry)> { + let temp_path = self.temp_path(); + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create(true) + .open(&temp_path) + .await?; + tokio::io::copy(&mut reader, &mut file).await?; + let (hash, size, entry) = self.move_to_blobs(&temp_path).await?; + Ok((hash, size, entry)) + } + + async fn move_to_blobs(&self, path: &PathBuf) -> anyhow::Result<(Hash, u64, DbEntry)> { + let datasource = DataSource::new(path.clone()); + // TODO: this needlessly creates a collection, but that's what's pub atm in iroh-bytes + let (db, _collection_hash) = create_collection(vec![datasource]).await?; + // the actual blob is the first entry in the external entries in the created collection + let (hash, _path, _len) = db.external().next().unwrap(); + let Some(DbEntry::External { outboard, size, .. }) = db.get(&hash) else { + unreachable!("just inserted"); + }; + + let final_path = prepare_hash_dir(&self.blob_path, &hash).await?; + tokio::fs::rename(&path, &final_path).await?; + let entry = DbEntry::External { + outboard, + path: final_path, + size, + }; + Ok((hash, size, entry)) + } + + fn temp_path(&self) -> PathBuf { + let name = hex::encode(rand::random::().to_be_bytes()); + self.temp_path.join(name) + } +} + +async fn prepare_hash_dir(path: &Path, hash: &Hash) -> anyhow::Result { + let hash = hex::encode(hash.as_ref()); + let path = path.join(&hash[0..2]).join(&hash[2..4]).join(&hash[4..]); + tokio::fs::create_dir_all(path.parent().unwrap()).await?; + Ok(path) +} diff --git a/iroh/src/download.rs b/iroh/src/download.rs new file mode 100644 index 0000000000..60be7b3f92 --- /dev/null +++ b/iroh/src/download.rs @@ -0,0 +1,351 @@ +//! Download queue + +use std::{ + collections::{HashMap, VecDeque}, + sync::{Arc, Mutex}, + time::Instant, +}; + +use futures::{ + future::{BoxFuture, LocalBoxFuture, Shared}, + stream::FuturesUnordered, + FutureExt, +}; +use iroh_bytes::util::Hash; +use iroh_gossip::net::util::Dialer; +use iroh_metrics::{inc, inc_by}; +use iroh_net::{tls::PeerId, MagicEndpoint}; +use tokio::sync::oneshot; +use tokio_stream::StreamExt; +use tracing::{debug, error, warn}; + +#[cfg(feature = "metrics")] +use crate::metrics::Metrics; +// TODO: Will be replaced by proper persistent DB once +// https://github.com/n0-computer/iroh/pull/1320 is merged +use crate::database::flat::writable::WritableFileDatabase; + +/// Future for the completion of a download request +pub type DownloadFuture = Shared>>; + +/// A download queue for iroh-bytes +/// +/// Spawns a background task that handles connecting to peers and performing get requests. +/// +/// TODO: Move to iroh-bytes or replace with corresponding feature from iroh-bytes once available +/// TODO: Support retries and backoff - become a proper queue... +/// TODO: Download requests send via synchronous flume::Sender::send. Investigate if we want async +/// here. We currently use [`Downloader::push`] from [`iroh_sync::Replica::on_insert`] callbacks, +/// which are sync, thus we need a sync method on the Downloader to push new download requests. +#[derive(Debug, Clone)] +pub struct Downloader { + pending_downloads: Arc>>, + to_actor_tx: flume::Sender, +} + +impl Downloader { + /// Create a new downloader + pub fn new( + rt: iroh_bytes::util::runtime::Handle, + endpoint: MagicEndpoint, + db: WritableFileDatabase, + ) -> Self { + let (tx, rx) = flume::bounded(64); + // spawn the actor on a local pool + // the local pool is required because WritableFileDatabase::download_single + // returns a future that is !Send + rt.local_pool().spawn_pinned(move || async move { + let mut actor = DownloadActor::new(endpoint, db, rx); + if let Err(err) = actor.run().await { + error!("download actor failed with error {err:?}"); + } + }); + Self { + pending_downloads: Arc::new(Mutex::new(HashMap::new())), + to_actor_tx: tx, + } + } + + /// Add a new download request to the download queue. + /// + /// Note: This method takes only [`PeerId`]s and will attempt to connect to those peers. For + /// this to succeed, you need to add addresses for these peers to the magic endpoint's + /// addressbook yourself. See [`MagicEndpoint::add_known_addrs`]. + pub fn push(&self, hash: Hash, peers: Vec) { + let (reply, reply_rx) = oneshot::channel(); + let req = DownloadRequest { hash, peers, reply }; + + // TODO: this is potentially blocking inside an async call. figure out a better solution + if let Err(err) = self.to_actor_tx.send(req) { + warn!("download actor dropped: {err}"); + } + + if self.pending_downloads.lock().unwrap().get(&hash).is_none() { + let pending_downloads = self.pending_downloads.clone(); + let fut = async move { + let res = reply_rx.await; + pending_downloads.lock().unwrap().remove(&hash); + res.ok().flatten() + }; + self.pending_downloads + .lock() + .unwrap() + .insert(hash, fut.boxed().shared()); + } + } + + /// Returns a future that completes once the blob for `hash` has been downloaded, or all queued + /// requests for that blob have failed. + /// + /// NOTE: This does not start the download itself. Use [`Self::push`] for that. + pub fn finished(&self, hash: &Hash) -> DownloadFuture { + match self.pending_downloads.lock().unwrap().get(hash) { + Some(fut) => fut.clone(), + None => futures::future::ready(None).boxed().shared(), + } + } +} + +type DownloadReply = oneshot::Sender>; +type PendingDownloadsFutures = + FuturesUnordered>)>>; + +#[derive(Debug)] +struct DownloadRequest { + hash: Hash, + peers: Vec, + reply: DownloadReply, +} + +#[derive(Debug)] +struct DownloadActor { + dialer: Dialer, + db: WritableFileDatabase, + conns: HashMap, + replies: HashMap>, + pending_download_futs: PendingDownloadsFutures, + queue: DownloadQueue, + rx: flume::Receiver, +} +impl DownloadActor { + fn new( + endpoint: MagicEndpoint, + db: WritableFileDatabase, + rx: flume::Receiver, + ) -> Self { + Self { + rx, + db, + dialer: Dialer::new(endpoint), + replies: Default::default(), + conns: Default::default(), + pending_download_futs: Default::default(), + queue: Default::default(), + } + } + pub async fn run(&mut self) -> anyhow::Result<()> { + loop { + tokio::select! { + req = self.rx.recv_async() => match req { + Err(_) => return Ok(()), + Ok(req) => self.on_download_request(req).await + }, + (peer, conn) = self.dialer.next() => match conn { + Ok(conn) => { + debug!("connection to {peer} established"); + self.conns.insert(peer, conn); + self.on_peer_ready(peer); + }, + Err(err) => self.on_peer_fail(&peer, err), + }, + Some((peer, hash, res)) = self.pending_download_futs.next() => match res { + Ok(Some((hash, size))) => { + self.queue.on_success(hash, peer); + self.reply(hash, Some((hash, size))); + self.on_peer_ready(peer); + } + Ok(None) => { + self.on_not_found(&peer, hash); + self.on_peer_ready(peer); + } + Err(err) => self.on_peer_fail(&peer, err), + } + } + } + } + + fn reply(&mut self, hash: Hash, res: Option<(Hash, u64)>) { + for reply in self.replies.remove(&hash).into_iter().flatten() { + reply.send(res).ok(); + } + } + + fn on_peer_fail(&mut self, peer: &PeerId, err: anyhow::Error) { + warn!("download from {peer} failed: {err}"); + for hash in self.queue.on_peer_fail(peer) { + self.reply(hash, None); + } + self.conns.remove(peer); + } + + fn on_not_found(&mut self, peer: &PeerId, hash: Hash) { + self.queue.on_not_found(hash, *peer); + if self.queue.has_no_candidates(&hash) { + self.reply(hash, None); + } + } + + fn on_peer_ready(&mut self, peer: PeerId) { + if let Some(hash) = self.queue.try_next_for_peer(peer) { + self.start_download_unchecked(peer, hash); + } else { + self.conns.remove(&peer); + } + } + + fn start_download_unchecked(&mut self, peer: PeerId, hash: Hash) { + let conn = self.conns.get(&peer).unwrap().clone(); + let blobs = self.db.clone(); + let fut = async move { + #[cfg(feature = "metrics")] + let start = Instant::now(); + let res = blobs.download_single(conn, hash).await; + // record metrics + #[cfg(feature = "metrics")] + { + let elapsed = start.elapsed().as_millis(); + match &res { + Ok(Some((_hash, len))) => { + inc!(Metrics, downloads_success); + inc_by!(Metrics, download_bytes_total, *len); + inc_by!(Metrics, download_time_total, elapsed as u64); + } + Ok(None) => inc!(Metrics, downloads_notfound), + Err(_) => inc!(Metrics, downloads_error), + } + } + (peer, hash, res) + }; + self.pending_download_futs.push(fut.boxed_local()); + } + + async fn on_download_request(&mut self, req: DownloadRequest) { + let DownloadRequest { peers, hash, reply } = req; + if self.db.has(&hash) { + let size = self.db.get_size(&hash).await.unwrap(); + reply.send(Some((hash, size))).ok(); + return; + } + self.replies.entry(hash).or_default().push_back(reply); + for peer in peers { + self.queue.push_candidate(hash, peer); + // TODO: Don't dial all peers instantly. + if self.conns.get(&peer).is_none() && !self.dialer.is_pending(&peer) { + self.dialer.queue_dial(peer, &iroh_bytes::protocol::ALPN); + } + } + } +} + +#[derive(Debug, Default)] +struct DownloadQueue { + candidates_by_hash: HashMap>, + candidates_by_peer: HashMap>, + running_by_hash: HashMap, + running_by_peer: HashMap, +} + +impl DownloadQueue { + pub fn push_candidate(&mut self, hash: Hash, peer: PeerId) { + self.candidates_by_hash + .entry(hash) + .or_default() + .push_back(peer); + self.candidates_by_peer + .entry(peer) + .or_default() + .push_back(hash); + } + + pub fn try_next_for_peer(&mut self, peer: PeerId) -> Option { + let mut next = None; + for (idx, hash) in self.candidates_by_peer.get(&peer)?.iter().enumerate() { + if !self.running_by_hash.contains_key(hash) { + next = Some((idx, *hash)); + break; + } + } + if let Some((idx, hash)) = next { + self.running_by_hash.insert(hash, peer); + self.running_by_peer.insert(peer, hash); + self.candidates_by_peer.get_mut(&peer).unwrap().remove(idx); + if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { + peers.retain(|p| p != &peer); + } + self.ensure_no_empty(hash, peer); + Some(hash) + } else { + None + } + } + + pub fn has_no_candidates(&self, hash: &Hash) -> bool { + self.candidates_by_hash.get(hash).is_none() && self.running_by_hash.get(hash).is_none() + } + + pub fn on_success(&mut self, hash: Hash, peer: PeerId) -> Option<(PeerId, Hash)> { + let peer2 = self.running_by_hash.remove(&hash); + debug_assert_eq!(peer2, Some(peer)); + self.running_by_peer.remove(&peer); + self.try_next_for_peer(peer).map(|hash| (peer, hash)) + } + + pub fn on_peer_fail(&mut self, peer: &PeerId) -> Vec { + let mut failed = vec![]; + for hash in self + .candidates_by_peer + .remove(peer) + .map(|hashes| hashes.into_iter()) + .into_iter() + .flatten() + { + if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { + peers.retain(|p| p != peer); + if peers.is_empty() && self.running_by_hash.get(&hash).is_none() { + failed.push(hash); + } + } + } + if let Some(hash) = self.running_by_peer.remove(peer) { + self.running_by_hash.remove(&hash); + if self.candidates_by_hash.get(&hash).is_none() { + failed.push(hash); + } + } + failed + } + + pub fn on_not_found(&mut self, hash: Hash, peer: PeerId) { + let peer2 = self.running_by_hash.remove(&hash); + debug_assert_eq!(peer2, Some(peer)); + self.running_by_peer.remove(&peer); + self.ensure_no_empty(hash, peer); + } + + fn ensure_no_empty(&mut self, hash: Hash, peer: PeerId) { + if self + .candidates_by_peer + .get(&peer) + .map_or(false, |hashes| hashes.is_empty()) + { + self.candidates_by_peer.remove(&peer); + } + if self + .candidates_by_hash + .get(&hash) + .map_or(false, |peers| peers.is_empty()) + { + self.candidates_by_hash.remove(&hash); + } + } +} diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 1b152de112..6a829fffec 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -8,8 +8,14 @@ pub use iroh_net as net; pub mod collection; pub mod database; pub mod dial; +// TODO: Remove feature flag once https://github.com/n0-computer/iroh/pull/1320 is merged +#[cfg(feature = "flat-db")] +pub mod download; pub mod node; pub mod rpc_protocol; +#[allow(missing_docs)] +#[cfg(feature = "sync")] +pub mod sync; pub mod util; /// Expose metrics module diff --git a/iroh/src/metrics.rs b/iroh/src/metrics.rs index 3b3b7f46ef..74355f2a09 100644 --- a/iroh/src/metrics.rs +++ b/iroh/src/metrics.rs @@ -10,6 +10,11 @@ pub struct Metrics { pub requests_total: Counter, pub bytes_sent: Counter, pub bytes_received: Counter, + pub download_bytes_total: Counter, + pub download_time_total: Counter, + pub downloads_success: Counter, + pub downloads_error: Counter, + pub downloads_notfound: Counter, } impl Default for Metrics { @@ -18,6 +23,11 @@ impl Default for Metrics { requests_total: Counter::new("Total number of requests received"), bytes_sent: Counter::new("Number of bytes streamed"), bytes_received: Counter::new("Number of bytes received"), + download_bytes_total: Counter::new("Total number of content bytes downloaded"), + download_time_total: Counter::new("Total time in ms spent downloading content bytes"), + downloads_success: Counter::new("Total number of successfull downloads"), + downloads_error: Counter::new("Total number of downloads failed with error"), + downloads_notfound: Counter::new("Total number of downloads failed with not found"), } } } diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs new file mode 100644 index 0000000000..b710b70da5 --- /dev/null +++ b/iroh/src/sync.rs @@ -0,0 +1,290 @@ +//! Implementation of the iroh-sync protocol + +use std::net::SocketAddr; + +use anyhow::{bail, ensure, Context, Result}; +use bytes::BytesMut; +use iroh_net::{magic_endpoint::get_peer_id, tls::PeerId, MagicEndpoint}; +use iroh_sync::{ + store, + sync::{NamespaceId, Replica}, +}; +use serde::{Deserialize, Serialize}; +use tokio::io::{AsyncRead, AsyncWrite}; +use tracing::debug; + +/// The ALPN identifier for the iroh-sync protocol +pub const SYNC_ALPN: &[u8] = b"/iroh-sync/1"; + +mod content; +mod live; +pub mod metrics; + +pub use content::*; +pub use live::*; + +/// Sync Protocol +/// +/// - Init message: signals which namespace is being synced +/// - N Sync messages +/// +/// On any error and on success the substream is closed. +#[derive(Debug, Clone, Serialize, Deserialize)] +enum Message { + Init { + /// Namespace to sync + namespace: NamespaceId, + /// Initial message + message: iroh_sync::sync::ProtocolMessage, + }, + Sync(iroh_sync::sync::ProtocolMessage), +} + +/// Connect to a peer and sync a replica +pub async fn connect_and_sync( + endpoint: &MagicEndpoint, + doc: &Replica, + peer_id: PeerId, + derp_region: Option, + addrs: &[SocketAddr], +) -> anyhow::Result<()> { + debug!("sync with peer {}: start", peer_id); + let connection = endpoint + .connect(peer_id, SYNC_ALPN, derp_region, addrs) + .await + .context("dial_and_sync")?; + let (mut send_stream, mut recv_stream) = connection.open_bi().await?; + let res = run_alice::(&mut send_stream, &mut recv_stream, doc, Some(peer_id)).await; + debug!("sync with peer {}: finish {:?}", peer_id, res); + res +} + +/// Runs the initiator side of the sync protocol. +pub async fn run_alice( + writer: &mut W, + reader: &mut R, + alice: &Replica, + peer: Option, +) -> Result<()> { + let peer = peer.map(|peer| peer.to_bytes()); + let mut buffer = BytesMut::with_capacity(1024); + + // Init message + + let init_message = Message::Init { + namespace: alice.namespace(), + message: alice.sync_initial_message().map_err(Into::into)?, + }; + let msg_bytes = postcard::to_stdvec(&init_message)?; + iroh_bytes::protocol::write_lp(writer, &msg_bytes).await?; + + // Sync message loop + + while let Some(read) = iroh_bytes::protocol::read_lp(&mut *reader, &mut buffer).await? { + debug!("read {}", read.len()); + let msg = postcard::from_bytes(&read)?; + match msg { + Message::Init { .. } => { + bail!("unexpected message: init"); + } + Message::Sync(msg) => { + if let Some(msg) = alice.sync_process_message(msg, peer).map_err(Into::into)? { + send_sync_message(writer, msg).await?; + } else { + break; + } + } + } + } + + Ok(()) +} + +/// Handle an iroh-sync connection and sync all shared documents in the replica store. +pub async fn handle_connection( + connecting: quinn::Connecting, + replica_store: S, +) -> Result<()> { + let connection = connecting.await?; + debug!("> connection established!"); + let peer_id = get_peer_id(&connection).await?; + let (mut send_stream, mut recv_stream) = connection.accept_bi().await?; + + run_bob( + &mut send_stream, + &mut recv_stream, + replica_store, + Some(peer_id), + ) + .await?; + send_stream.finish().await?; + + debug!("done"); + + Ok(()) +} + +/// Runs the receiver side of the sync protocol. +pub async fn run_bob( + writer: &mut W, + reader: &mut R, + replica_store: S, + peer: Option, +) -> Result<()> { + let peer = peer.map(|peer| peer.to_bytes()); + let mut buffer = BytesMut::with_capacity(1024); + + let mut replica = None; + while let Some(read) = iroh_bytes::protocol::read_lp(&mut *reader, &mut buffer).await? { + debug!("read {}", read.len()); + let msg = postcard::from_bytes(&read)?; + + match msg { + Message::Init { namespace, message } => { + ensure!(replica.is_none(), "double init message"); + + match replica_store.get_replica(&namespace)? { + Some(r) => { + debug!("starting sync for {}", namespace); + if let Some(msg) = + r.sync_process_message(message, peer).map_err(Into::into)? + { + send_sync_message(writer, msg).await?; + } else { + break; + } + replica = Some(r); + } + None => { + // TODO: this should be possible. + bail!("unable to synchronize unknown namespace: {}", namespace); + } + } + } + Message::Sync(msg) => match replica { + Some(ref replica) => { + if let Some(msg) = replica + .sync_process_message(msg, peer) + .map_err(Into::into)? + { + send_sync_message(writer, msg).await?; + } else { + break; + } + } + None => { + bail!("unexpected sync message without init"); + } + }, + } + } + + Ok(()) +} + +async fn send_sync_message( + stream: &mut W, + msg: iroh_sync::sync::ProtocolMessage, +) -> Result<()> { + let msg_bytes = postcard::to_stdvec(&Message::Sync(msg))?; + iroh_bytes::protocol::write_lp(stream, &msg_bytes).await?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use iroh_sync::{store::Store as _, sync::Namespace}; + + use super::*; + + #[tokio::test] + async fn test_sync_simple() -> Result<()> { + let mut rng = rand::thread_rng(); + + let alice_replica_store = store::memory::Store::default(); + // For now uses same author on both sides. + let author = alice_replica_store.new_author(&mut rng).unwrap(); + + let namespace = Namespace::new(&mut rng); + + let alice_replica = alice_replica_store.new_replica(namespace.clone()).unwrap(); + alice_replica + .hash_and_insert("hello bob", &author, "from alice") + .unwrap(); + + let bob_replica_store = store::memory::Store::default(); + let bob_replica = bob_replica_store.new_replica(namespace.clone()).unwrap(); + bob_replica + .hash_and_insert("hello alice", &author, "from bob") + .unwrap(); + + assert_eq!( + bob_replica_store + .get_all(bob_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 1 + ); + assert_eq!( + alice_replica_store + .get_all(alice_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 1 + ); + + let (alice, bob) = tokio::io::duplex(64); + + let (mut alice_reader, mut alice_writer) = tokio::io::split(alice); + let replica = alice_replica.clone(); + let alice_task = tokio::task::spawn(async move { + run_alice::( + &mut alice_writer, + &mut alice_reader, + &replica, + None, + ) + .await + }); + + let (mut bob_reader, mut bob_writer) = tokio::io::split(bob); + let bob_replica_store_task = bob_replica_store.clone(); + let bob_task = tokio::task::spawn(async move { + run_bob::( + &mut bob_writer, + &mut bob_reader, + bob_replica_store_task, + None, + ) + .await + }); + + alice_task.await??; + bob_task.await??; + + assert_eq!( + bob_replica_store + .get_all(bob_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 2 + ); + assert_eq!( + alice_replica_store + .get_all(alice_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 2 + ); + + Ok(()) + } +} diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs new file mode 100644 index 0000000000..7e04323807 --- /dev/null +++ b/iroh/src/sync/content.rs @@ -0,0 +1,278 @@ +use std::{ + io, + path::{Path, PathBuf}, + sync::Arc, +}; + +use anyhow::Result; +use bytes::Bytes; +use iroh_bytes::util::Hash; +use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; +use iroh_metrics::{inc, inc_by}; +use iroh_net::{tls::PeerId, MagicEndpoint}; +use iroh_sync::{ + store::{self, Store as _}, + sync::{Author, InsertOrigin, Namespace, OnInsertCallback, PeerIdBytes, Replica, SignedEntry}, +}; +use tokio::io::AsyncRead; + +use super::metrics::Metrics; +use crate::{ + database::flat::{writable::WritableFileDatabase, Database}, + download::Downloader, +}; + +#[derive(Debug, Copy, Clone)] +pub enum DownloadMode { + Always, + Manual, +} + +#[derive(Debug, Clone)] +pub struct DocStore { + replicas: store::fs::Store, + blobs: BlobStore, + local_author: Arc, +} + +const REPLICA_DB_NAME: &str = "replica.db"; + +impl DocStore { + pub fn new(blobs: BlobStore, author: Author, storage_path: PathBuf) -> Result { + let replicas = store::fs::Store::new(storage_path.join(REPLICA_DB_NAME))?; + + Ok(Self { + replicas, + local_author: Arc::new(author), + blobs, + }) + } + + pub async fn create_or_open( + &self, + namespace: Namespace, + download_mode: DownloadMode, + ) -> Result> { + let replica = match self.replicas.get_replica(&namespace.id())? { + Some(replica) => replica, + None => self.replicas.new_replica(namespace)?, + }; + + let doc = Doc::new( + replica, + self.blobs.clone(), + self.local_author.clone(), + download_mode, + ); + Ok(doc) + } + + pub async fn handle_connection(&self, conn: quinn::Connecting) -> anyhow::Result<()> { + crate::sync::handle_connection(conn, self.replicas.clone()).await + } + + pub fn store(&self) -> &store::fs::Store { + &self.replicas + } +} + +/// A replica with a [`BlobStore`] for contents. +/// +/// This will also download missing content from peers. +/// +/// TODO: Currently content is only downloaded from the author of a entry. +/// We want to try other peers if the author is offline (or always). +/// We'll need some heuristics which peers to try. +#[derive(Clone, Debug)] +pub struct Doc { + replica: Replica, + blobs: BlobStore, + local_author: Arc, +} + +impl Doc { + pub fn new( + replica: Replica, + blobs: BlobStore, + local_author: Arc, + download_mode: DownloadMode, + ) -> Self { + let doc = Self { + replica, + blobs, + local_author, + }; + + // If download mode is set to always download: + // setup on_insert callback to trigger download on remote insert + if let DownloadMode::Always = download_mode { + let doc_clone = doc.clone(); + doc.replica + .on_insert(Box::new(move |origin, entry| match origin { + InsertOrigin::Sync(peer) => { + doc_clone.download_content_from_author_and_other_peer(&entry, peer); + } + InsertOrigin::Local => {} + })); + } + + // Collect metrics + doc.replica.on_insert(Box::new(move |origin, entry| { + let size = entry.entry().record().content_len(); + match origin { + InsertOrigin::Local => { + inc!(Metrics, new_entries_local); + inc_by!(Metrics, new_entries_local_size, size); + } + InsertOrigin::Sync(_) => { + inc!(Metrics, new_entries_remote); + inc_by!(Metrics, new_entries_remote_size, size); + } + } + })); + + doc + } + + pub fn on_insert(&self, callback: OnInsertCallback) { + self.replica.on_insert(callback); + } + + pub fn replica(&self) -> &Replica { + &self.replica + } + + pub fn local_author(&self) -> &Author { + &self.local_author + } + + pub async fn insert_bytes( + &self, + key: impl AsRef<[u8]>, + content: Bytes, + ) -> anyhow::Result<(Hash, u64)> { + let (hash, len) = self.blobs.put_bytes(content).await?; + self.replica + .insert(key, &self.local_author, hash, len) + .map_err(Into::into)?; + Ok((hash, len)) + } + + pub async fn insert_reader( + &self, + key: impl AsRef<[u8]>, + content: impl AsyncRead + Unpin, + ) -> anyhow::Result<(Hash, u64)> { + let (hash, len) = self.blobs.put_reader(content).await?; + self.replica + .insert(key, &self.local_author, hash, len) + .map_err(Into::into)?; + Ok((hash, len)) + } + + pub async fn insert_from_file( + &self, + key: impl AsRef<[u8]>, + file_path: impl AsRef, + ) -> anyhow::Result<(Hash, u64)> { + let reader = tokio::fs::File::open(&file_path).await?; + self.insert_reader(&key, reader).await + } + + pub fn download_content_from_author_and_other_peer( + &self, + entry: &SignedEntry, + other_peer: Option, + ) { + let author_peer_id = PeerId::from_bytes(entry.entry().id().author().as_bytes()) + .expect("failed to convert author to peer id"); + + let mut peers = vec![author_peer_id]; + + if let Some(other_peer) = other_peer { + let other_peer_id = + PeerId::from_bytes(&other_peer).expect("failed to convert author to peer id"); + if other_peer_id != peers[0] { + peers.push(other_peer_id); + } + } + + let hash = *entry.entry().record().content_hash(); + self.blobs.start_download(hash, peers); + } + + pub async fn get_content_bytes(&self, entry: &SignedEntry) -> Option { + let hash = entry.entry().record().content_hash(); + self.blobs.get_bytes(hash).await.ok().flatten() + } + + pub async fn get_content_reader(&self, entry: &SignedEntry) -> Option { + let hash = entry.entry().record().content_hash(); + self.blobs.get_reader(hash).await.ok().flatten() + } +} + +/// A blob database that can download missing blobs from peers. +/// +/// Blobs can be inserted either from bytes or by downloading from peers. +/// Downloads can be started and will be tracked in the blobstore. +/// New blobs will be saved as files with a filename based on their hash. +/// +/// TODO: This is similar to what is used in the iroh provider. +/// Unify once we know how the APIs should look like. +#[derive(Debug, Clone)] +pub struct BlobStore { + db: WritableFileDatabase, + downloader: Downloader, +} +impl BlobStore { + pub async fn new( + rt: iroh_bytes::util::runtime::Handle, + data_path: PathBuf, + endpoint: MagicEndpoint, + ) -> anyhow::Result { + let db = WritableFileDatabase::new(data_path).await?; + let downloader = Downloader::new(rt, endpoint, db.clone()); + Ok(Self { db, downloader }) + } + + pub async fn save(&self) -> io::Result<()> { + self.db.save().await + } + + pub fn db(&self) -> &Database { + self.db.db() + } + + pub fn start_download(&self, hash: Hash, peers: Vec) { + if !self.db.has(&hash) { + self.downloader.push(hash, peers); + } + } + + pub async fn get_bytes(&self, hash: &Hash) -> anyhow::Result> { + self.downloader.finished(hash).await; + let Some(entry) = self.db().get(hash) else { + return Ok(None) + }; + let bytes = entry.data_reader().await?.read_to_end().await?; + Ok(Some(bytes)) + } + + pub async fn get_reader(&self, hash: &Hash) -> anyhow::Result> { + self.downloader.finished(hash).await; + let Some(entry) = self.db().get(hash) else { + return Ok(None) + }; + let reader = entry.data_reader().await?; + Ok(Some(reader)) + } + + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64)> { + self.db.put_bytes(data).await + } + + pub async fn put_reader(&self, data: impl AsyncRead + Unpin) -> anyhow::Result<(Hash, u64)> { + self.db.put_reader(data).await + } +} diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs new file mode 100644 index 0000000000..320c236385 --- /dev/null +++ b/iroh/src/sync/live.rs @@ -0,0 +1,313 @@ +use std::{collections::HashMap, net::SocketAddr, sync::Arc}; + +use crate::sync::connect_and_sync; +use anyhow::{anyhow, Result}; +use futures::{ + future::{BoxFuture, Shared}, + stream::{BoxStream, FuturesUnordered, StreamExt}, + FutureExt, TryFutureExt, +}; +use iroh_gossip::{ + net::{Event, Gossip}, + proto::TopicId, +}; +use iroh_metrics::inc; +use iroh_net::{tls::PeerId, MagicEndpoint}; +use iroh_sync::{ + store, + sync::{InsertOrigin, Replica, SignedEntry}, +}; +use serde::{Deserialize, Serialize}; +use tokio::{sync::mpsc, task::JoinError}; +use tracing::{debug, error}; + +use super::metrics::Metrics; + +const CHANNEL_CAP: usize = 8; + +/// The address to connect to a peer +/// TODO: Move into iroh-net +/// TODO: Make an enum and support DNS resolution +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PeerSource { + pub peer_id: PeerId, + pub addrs: Vec, + pub derp_region: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Op { + Put(SignedEntry), +} + +#[derive(Debug)] +enum SyncState { + Running, + Finished, + Failed(anyhow::Error), +} + +#[derive(Debug)] +pub enum ToActor { + SyncDoc { + doc: Replica, + initial_peers: Vec, + }, + Shutdown, +} + +/// Handle to a running live sync actor +#[derive(Debug, Clone)] +pub struct LiveSync { + to_actor_tx: mpsc::Sender>, + task: Shared>>>, +} + +impl LiveSync { + pub fn spawn(endpoint: MagicEndpoint, gossip: Gossip) -> Self { + let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); + let mut actor = Actor::new(endpoint, gossip, to_actor_rx); + let task = tokio::spawn(async move { + if let Err(err) = actor.run().await { + error!("live sync failed: {err:?}"); + } + }); + let handle = LiveSync { + to_actor_tx, + task: task.map_err(Arc::new).boxed().shared(), + }; + handle + } + + /// Cancel the live sync. + pub async fn cancel(&self) -> Result<()> { + self.to_actor_tx.send(ToActor::::Shutdown).await?; + self.task.clone().await?; + Ok(()) + } + + pub async fn add( + &self, + doc: Replica, + initial_peers: Vec, + ) -> Result<()> { + self.to_actor_tx + .send(ToActor::::SyncDoc { doc, initial_peers }) + .await?; + Ok(()) + } +} + +// TODO: Also add `handle_connection` to the replica and track incoming sync requests here too. +// Currently peers might double-sync in both directions. +struct Actor { + endpoint: MagicEndpoint, + gossip: Gossip, + + docs: HashMap>, + subscription: BoxStream<'static, Result<(TopicId, Event)>>, + sync_state: HashMap<(TopicId, PeerId), SyncState>, + + to_actor_rx: mpsc::Receiver>, + insert_entry_tx: flume::Sender<(TopicId, SignedEntry)>, + insert_entry_rx: flume::Receiver<(TopicId, SignedEntry)>, + + pending_syncs: FuturesUnordered)>>, + pending_joins: FuturesUnordered)>>, +} + +impl Actor { + pub fn new( + endpoint: MagicEndpoint, + gossip: Gossip, + to_actor_rx: mpsc::Receiver>, + ) -> Self { + let (insert_tx, insert_rx) = flume::bounded(64); + let sub = gossip.clone().subscribe_all().boxed(); + + Self { + gossip, + endpoint, + insert_entry_rx: insert_rx, + insert_entry_tx: insert_tx, + to_actor_rx, + sync_state: Default::default(), + pending_syncs: Default::default(), + pending_joins: Default::default(), + docs: Default::default(), + subscription: sub, + } + } + + async fn run(&mut self) -> Result<()> { + loop { + tokio::select! { + biased; + msg = self.to_actor_rx.recv() => { + match msg { + // received shutdown signal, or livesync handle was dropped: + // break loop and exit + Some(ToActor::Shutdown) | None => { + self.on_shutdown().await?; + break; + } + Some(ToActor::SyncDoc { doc, initial_peers }) => self.insert_doc(doc, initial_peers).await?, + } + } + // new gossip message + Some(event) = self.subscription.next() => { + let (topic, event) = event?; + if let Err(err) = self.on_gossip_event(topic, event) { + error!("Failed to process gossip event: {err:?}"); + } + }, + entry = self.insert_entry_rx.recv_async() => { + let (topic, entry) = entry?; + self.on_insert_entry(topic, entry).await?; + } + Some((topic, peer, res)) = self.pending_syncs.next() => { + // let (topic, peer, res) = res.context("task sync_with_peer paniced")?; + self.on_sync_finished(topic, peer, res); + + } + Some((topic, res)) = self.pending_joins.next() => { + if let Err(err) = res { + error!("failed to join {topic:?}: {err:?}"); + } + // TODO: maintain some join state + } + } + } + Ok(()) + } + + fn sync_with_peer(&mut self, topic: TopicId, peer: PeerId) { + let Some(doc) = self.docs.get(&topic) else { + return; + }; + // Check if we synced and only start sync if not yet synced + // sync_with_peer is triggered on NeighborUp events, so might trigger repeatedly for the + // same peers. + // TODO: Track finished time and potentially re-run sync + if let Some(_state) = self.sync_state.get(&(topic, peer)) { + return; + }; + // TODO: fixme (doc_id, peer) + self.sync_state.insert((topic, peer), SyncState::Running); + let task = { + let endpoint = self.endpoint.clone(); + let doc = doc.clone(); + async move { + debug!("sync with {peer}"); + // TODO: Make sure that the peer is dialable. + let res = connect_and_sync::(&endpoint, &doc, peer, None, &[]).await; + debug!("> synced with {peer}: {res:?}"); + // collect metrics + match &res { + Ok(_) => inc!(Metrics, initial_sync_success), + Err(_) => inc!(Metrics, initial_sync_failed), + } + (topic, peer, res) + } + .boxed() + }; + self.pending_syncs.push(task); + } + + async fn on_shutdown(&mut self) -> anyhow::Result<()> { + for (topic, _doc) in self.docs.drain() { + // TODO: Remove the on_insert callbacks + self.gossip.quit(topic).await?; + } + Ok(()) + } + + async fn insert_doc( + &mut self, + doc: Replica, + initial_peers: Vec, + ) -> Result<()> { + let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id).collect(); + + // add addresses of initial peers to our endpoint address book + for peer in &initial_peers { + self.endpoint + .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) + .await?; + } + + // join gossip for the topic to receive and send message + let topic = TopicId::from_bytes(*doc.namespace().as_bytes()); + self.pending_joins.push({ + let peer_ids = peer_ids.clone(); + let gossip = self.gossip.clone(); + async move { + match gossip.join(topic, peer_ids).await { + Err(err) => (topic, Err(err)), + Ok(fut) => (topic, fut.await), + } + } + .boxed() + }); + + // setup replica insert notifications. + let insert_entry_tx = self.insert_entry_tx.clone(); + doc.on_insert(Box::new(move |origin, entry| { + // only care for local inserts, otherwise we'd do endless gossip loops + if let InsertOrigin::Local = origin { + // TODO: this is potentially blocking inside an async call. figure out a better solution + insert_entry_tx.send((topic, entry)).ok(); + } + })); + self.docs.insert(topic, doc); + // add addresses of initial peers to our endpoint address book + for peer in &initial_peers { + self.endpoint + .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) + .await?; + } + + // trigger initial sync with initial peers + for peer in peer_ids { + self.sync_with_peer(topic, peer); + } + Ok(()) + } + + fn on_sync_finished(&mut self, topic: TopicId, peer: PeerId, res: Result<()>) { + let state = match res { + Ok(_) => SyncState::Finished, + Err(err) => SyncState::Failed(err), + }; + self.sync_state.insert((topic, peer), state); + } + + fn on_gossip_event(&mut self, topic: TopicId, event: Event) -> Result<()> { + let Some(doc) = self.docs.get(&topic) else { + return Err(anyhow!("Missing doc for {topic:?}")); + }; + match event { + // We received a gossip message. Try to insert it into our replica. + Event::Received(data, prev_peer) => { + let op: Op = postcard::from_bytes(&data)?; + match op { + Op::Put(entry) => doc.insert_remote_entry(entry, Some(prev_peer.to_bytes()))?, + } + } + // A new neighbor appeared in the gossip swarm. Try to sync with it directly. + // [Self::sync_with_peer] will check to not resync with peers synced previously in the + // same session. TODO: Maybe this is too broad and leads to too many sync requests. + Event::NeighborUp(peer) => self.sync_with_peer(topic, peer), + _ => {} + } + Ok(()) + } + + /// A new entry was inserted locally. Broadcast a gossip message. + async fn on_insert_entry(&mut self, topic: TopicId, entry: SignedEntry) -> Result<()> { + let op = Op::Put(entry); + let message = postcard::to_stdvec(&op)?.into(); + self.gossip.broadcast(topic, message).await?; + Ok(()) + } +} diff --git a/iroh/src/sync/metrics.rs b/iroh/src/sync/metrics.rs new file mode 100644 index 0000000000..37185e6cec --- /dev/null +++ b/iroh/src/sync/metrics.rs @@ -0,0 +1,35 @@ +use iroh_metrics::{ + core::{Counter, Metric}, + struct_iterable::Iterable, +}; + +/// Metrics for iroh-sync +#[allow(missing_docs)] +#[derive(Debug, Clone, Iterable)] +pub struct Metrics { + pub new_entries_local: Counter, + pub new_entries_remote: Counter, + pub new_entries_local_size: Counter, + pub new_entries_remote_size: Counter, + pub initial_sync_success: Counter, + pub initial_sync_failed: Counter, +} + +impl Default for Metrics { + fn default() -> Self { + Self { + new_entries_local: Counter::new("Number of document entries added locally"), + new_entries_remote: Counter::new("Number of document entries added by peers"), + new_entries_local_size: Counter::new("Total size of entry contents added locally"), + new_entries_remote_size: Counter::new("Total size of entry contents added by peers"), + initial_sync_success: Counter::new("Number of successfull initial syncs "), + initial_sync_failed: Counter::new("Number of failed initial syncs"), + } + } +} + +impl Metric for Metrics { + fn name() -> &'static str { + "iroh-sync" + } +}