From ac96d9dae760b63d819322733ced4290f79d67c0 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Wed, 26 Jul 2023 20:57:21 +0200 Subject: [PATCH 01/45] feat: initial prototype of integrating iroh-sync --- Cargo.lock | 90 ++- Cargo.toml | 1 + iroh-bytes/src/protocol.rs | 4 +- iroh-sync/Cargo.toml | 27 + iroh-sync/LICENSE-APACHE | 201 ++++++ iroh-sync/LICENSE-MIT | 25 + iroh-sync/README.md | 19 + iroh-sync/src/lib.rs | 2 + iroh-sync/src/ranger.rs | 1246 ++++++++++++++++++++++++++++++++++++ iroh-sync/src/sync.rs | 789 +++++++++++++++++++++++ iroh/Cargo.toml | 4 +- iroh/src/lib.rs | 1 + iroh/src/sync.rs | 189 ++++++ 13 files changed, 2593 insertions(+), 5 deletions(-) create mode 100644 iroh-sync/Cargo.toml create mode 100644 iroh-sync/LICENSE-APACHE create mode 100644 iroh-sync/LICENSE-MIT create mode 100644 iroh-sync/README.md create mode 100644 iroh-sync/src/lib.rs create mode 100644 iroh-sync/src/ranger.rs create mode 100644 iroh-sync/src/sync.rs create mode 100644 iroh/src/sync.rs diff --git a/Cargo.lock b/Cargo.lock index f45ac02690..d61d2bf5a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -619,6 +619,64 @@ version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6548a0ad5d2549e111e1f6a11a6c2e2d00ce6a3dafe22948d67c2b443f775e52" +[[package]] +name = "crossbeam" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2801af0d36612ae591caa9568261fddce32ce6e08a7275ea334a06a4ad021a2c" +dependencies = [ + "cfg-if", + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce6fd6f855243022dcecf8702fef0c297d4338e226845fe067f6341ad9fa0cef" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae211234986c545741a7dc064309f67ee1e5ad243d0e48335adc0484d960bcc7" +dependencies = [ + "autocfg", + "cfg-if", + "crossbeam-utils", + "memoffset 0.9.0", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1cfb3ea8a53f37c40dea2c7bedcbd88bdfae54f5e2175d6ecaff1c988353add" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.16" @@ -1655,6 +1713,7 @@ dependencies = [ "iroh-io", "iroh-metrics", "iroh-net", + "iroh-sync", "multibase", "nix", "num_cpus", @@ -1857,6 +1916,26 @@ dependencies = [ "zeroize", ] +[[package]] +name = "iroh-sync" +version = "0.1.0" +dependencies = [ + "anyhow", + "blake3", + "bytes", + "crossbeam", + "ed25519-dalek", + "hex", + "iroh-bytes", + "once_cell", + "parking_lot", + "rand", + "rand_core", + "serde", + "tokio", + "url", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -1998,6 +2077,15 @@ dependencies = [ "autocfg", ] +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.17" @@ -2124,7 +2212,7 @@ dependencies = [ "bitflags 1.3.2", "cfg-if", "libc", - "memoffset", + "memoffset 0.7.1", "pin-utils", "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index be650b1911..694f2af3b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,6 +5,7 @@ members = [ "iroh-bytes", "iroh-gossip", "iroh-metrics", + "iroh-sync", ] [profile.optimized-release] diff --git a/iroh-bytes/src/protocol.rs b/iroh-bytes/src/protocol.rs index 8ace63a8c8..5b1f8444d1 100644 --- a/iroh-bytes/src/protocol.rs +++ b/iroh-bytes/src/protocol.rs @@ -172,7 +172,7 @@ impl GetRequest { } /// Write the given data to the provider sink, with a unsigned varint length prefix. -pub(crate) async fn write_lp(writer: &mut W, data: &[u8]) -> Result<()> { +pub async fn write_lp(writer: &mut W, data: &[u8]) -> Result<()> { ensure!( data.len() < MAX_MESSAGE_SIZE, "sending message is too large" @@ -193,7 +193,7 @@ pub(crate) async fn write_lp(writer: &mut W, data: &[u8]) /// /// The message as raw bytes. If the end of the stream is reached and there is no partial /// message, returns `None`. -pub(crate) async fn read_lp( +pub async fn read_lp( mut reader: impl AsyncRead + Unpin, buffer: &mut BytesMut, ) -> Result> { diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml new file mode 100644 index 0000000000..b7aa4ff340 --- /dev/null +++ b/iroh-sync/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "iroh-sync" +version = "0.1.0" +edition = "2021" +readme = "README.md" +description = "Iroh sync" +license = "MIT/Apache-2.0" +authors = ["n0 team"] +repository = "https://github.com/n0-computer/iroh" + +[dependencies] +anyhow = "1.0.71" +blake3 = "1.3.3" +crossbeam = "0.8.2" +ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] } +iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } +once_cell = "1.18.0" +rand = "0.8.5" +rand_core = "0.6.4" +serde = { version = "1.0.164", features = ["derive"] } +url = "2.4.0" +bytes = "1.4.0" +parking_lot = "0.12.1" +hex = "0.4" + +[dev-dependencies] +tokio = { version = "1.28.2", features = ["sync", "macros"] } diff --git a/iroh-sync/LICENSE-APACHE b/iroh-sync/LICENSE-APACHE new file mode 100644 index 0000000000..16fe87b06e --- /dev/null +++ b/iroh-sync/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/iroh-sync/LICENSE-MIT b/iroh-sync/LICENSE-MIT new file mode 100644 index 0000000000..dfd85baf84 --- /dev/null +++ b/iroh-sync/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2023 + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/iroh-sync/README.md b/iroh-sync/README.md new file mode 100644 index 0000000000..7c79e368f2 --- /dev/null +++ b/iroh-sync/README.md @@ -0,0 +1,19 @@ +# iroh-sync + + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +### Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in this project by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/iroh-sync/src/lib.rs b/iroh-sync/src/lib.rs new file mode 100644 index 0000000000..a37ead1b6f --- /dev/null +++ b/iroh-sync/src/lib.rs @@ -0,0 +1,2 @@ +pub mod ranger; +pub mod sync; diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs new file mode 100644 index 0000000000..159d99d8cf --- /dev/null +++ b/iroh-sync/src/ranger.rs @@ -0,0 +1,1246 @@ +//! Implementation of Set Reconcilliation based on +//! "Range-Based Set Reconciliation" by Aljoscha Meyer. +//! + +use std::cmp::Ordering; +use std::collections::BTreeMap; +use std::fmt::Debug; +use std::marker::PhantomData; + +use serde::{Deserialize, Serialize}; + +/// Stores a range. +/// +/// There are three possibilities +/// - x, x: All elements in a set, denoted with +/// - [x, y): x < y: Includes x, but not y +/// - S \ [y, x) y < x: Includes x, but not y. +/// This means that ranges are "wrap around" conceptually. +#[derive(Debug, Clone, Copy, PartialEq, Serialize, Deserialize, Default)] +pub struct Range { + x: K, + y: K, +} + +impl Range { + pub fn x(&self) -> &K { + &self.x + } + + pub fn y(&self) -> &K { + &self.y + } + + pub fn new(x: K, y: K) -> Self { + Range { x, y } + } + + pub fn map(self, f: impl FnOnce(K, K) -> (X, X)) -> Range { + let (x, y) = f(self.x, self.y); + Range { x, y } + } +} + +impl From<(K, K)> for Range { + fn from((x, y): (K, K)) -> Self { + Range { x, y } + } +} + +pub trait RangeKey: Sized + Ord + Debug { + /// Is this key inside the range? + fn contains(&self, range: &Range) -> bool { + contains(self, range) + } +} + +/// Default implementation of `contains` for `Ord` types. +pub fn contains(t: &T, range: &Range) -> bool { + match range.x().cmp(range.y()) { + Ordering::Equal => true, + Ordering::Less => range.x() <= t && t < range.y(), + Ordering::Greater => range.x() <= t || t < range.y(), + } +} + +impl RangeKey for &str {} +impl RangeKey for &[u8] {} + +#[derive(Copy, Clone, PartialEq, Serialize, Deserialize)] +pub struct Fingerprint(pub [u8; 32]); + +impl Debug for Fingerprint { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Fp({})", blake3::Hash::from(self.0).to_hex()) + } +} + +impl Fingerprint { + /// The fingerprint of the empty set + pub fn empty() -> Self { + Fingerprint::new(&[][..]) + } + + pub fn new(val: T) -> Self { + val.as_fingerprint() + } +} + +pub trait AsFingerprint { + fn as_fingerprint(&self) -> Fingerprint; +} + +impl> AsFingerprint for T { + fn as_fingerprint(&self) -> Fingerprint { + Fingerprint(blake3::hash(self.as_ref()).into()) + } +} + +impl std::ops::BitXorAssign for Fingerprint { + fn bitxor_assign(&mut self, rhs: Self) { + for (a, b) in self.0.iter_mut().zip(rhs.0.iter()) { + *a ^= b; + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RangeFingerprint { + #[serde(bound( + serialize = "Range: Serialize", + deserialize = "Range: Deserialize<'de>" + ))] + pub range: Range, + /// The fingerprint of `range`. + pub fingerprint: Fingerprint, +} + +/// Transfers items inside a range to the other participant. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct RangeItem { + /// The range out of which the elements are. + #[serde(bound( + serialize = "Range: Serialize", + deserialize = "Range: Deserialize<'de>" + ))] + pub range: Range, + #[serde(bound( + serialize = "K: Serialize, V: Serialize", + deserialize = "K: Deserialize<'de>, V: Deserialize<'de>" + ))] + pub values: Vec<(K, V)>, + /// If false, requests to send local items in the range. + /// Otherwise not. + pub have_local: bool, +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum MessagePart { + #[serde(bound( + serialize = "RangeFingerprint: Serialize", + deserialize = "RangeFingerprint: Deserialize<'de>" + ))] + RangeFingerprint(RangeFingerprint), + #[serde(bound( + serialize = "RangeItem: Serialize", + deserialize = "RangeItem: Deserialize<'de>" + ))] + RangeItem(RangeItem), +} + +impl MessagePart { + pub fn is_range_fingerprint(&self) -> bool { + matches!(self, MessagePart::RangeFingerprint(_)) + } + + pub fn is_range_item(&self) -> bool { + matches!(self, MessagePart::RangeItem(_)) + } + + pub fn values(&self) -> Option<&[(K, V)]> { + match self { + MessagePart::RangeFingerprint(_) => None, + MessagePart::RangeItem(RangeItem { values, .. }) => Some(&values), + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct Message { + #[serde(bound( + serialize = "MessagePart: Serialize", + deserialize = "MessagePart: Deserialize<'de>" + ))] + parts: Vec>, +} + +impl Message +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + /// Construct the initial message. + fn init>(store: &S, limit: Option<&Range>) -> Self { + let x = store.get_first().clone(); + let range = Range::new(x.clone(), x); + let fingerprint = store.get_fingerprint(&range, limit); + let part = MessagePart::RangeFingerprint(RangeFingerprint { range, fingerprint }); + Message { parts: vec![part] } + } + + pub fn parts(&self) -> &[MessagePart] { + &self.parts + } +} + +pub trait Store: Sized + Default +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> K; + fn get(&self, key: &K) -> Option<&V>; + fn len(&self) -> usize; + fn is_empty(&self) -> bool; + /// Calculate the fingerprint of the given range. + fn get_fingerprint(&self, range: &Range, limit: Option<&Range>) -> Fingerprint; + + /// Insert the given key value pair. + fn put(&mut self, k: K, v: V); + + type RangeIterator<'a>: Iterator + where + Self: 'a, + K: 'a, + V: 'a; + + /// Returns all items in the given range + fn get_range<'a>(&'a self, range: Range, limit: Option>) + -> Self::RangeIterator<'a>; + fn remove(&mut self, key: &K) -> Option; + + type AllIterator<'a>: Iterator + where + Self: 'a, + K: 'a, + V: 'a; + fn all(&self) -> Self::AllIterator<'_>; +} + +#[derive(Debug)] +pub struct SimpleStore { + data: BTreeMap, +} + +impl Default for SimpleStore { + fn default() -> Self { + SimpleStore { + data: BTreeMap::default(), + } + } +} + +impl Store for SimpleStore +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + fn get_first(&self) -> K { + if let Some((k, _)) = self.data.first_key_value() { + k.clone() + } else { + Default::default() + } + } + + fn get(&self, key: &K) -> Option<&V> { + self.data.get(key) + } + + fn len(&self) -> usize { + self.data.len() + } + + fn is_empty(&self) -> bool { + self.data.is_empty() + } + + /// Calculate the fingerprint of the given range. + fn get_fingerprint(&self, range: &Range, limit: Option<&Range>) -> Fingerprint { + let elements = self.get_range(range.clone(), limit.cloned()); + let mut fp = Fingerprint::empty(); + for el in elements { + fp ^= el.0.as_fingerprint(); + } + + fp + } + + /// Insert the given key value pair. + fn put(&mut self, k: K, v: V) { + self.data.insert(k, v); + } + + type RangeIterator<'a> = SimpleRangeIterator<'a, K, V> + where K: 'a, V: 'a; + /// Returns all items in the given range + fn get_range<'a>( + &'a self, + range: Range, + limit: Option>, + ) -> Self::RangeIterator<'a> { + // TODO: this is not very efficient, optimize depending on data structure + let iter = self.data.iter(); + + SimpleRangeIterator { iter, range, limit } + } + + fn remove(&mut self, key: &K) -> Option { + self.data.remove(key) + } + + type AllIterator<'a> = std::collections::btree_map::Iter<'a, K, V> + where K: 'a, + V: 'a; + + fn all(&self) -> Self::AllIterator<'_> { + self.data.iter() + } +} + +#[derive(Debug)] +pub struct SimpleRangeIterator<'a, K: 'a, V: 'a> { + iter: std::collections::btree_map::Iter<'a, K, V>, + range: Range, + limit: Option>, +} + +impl<'a, K, V> Iterator for SimpleRangeIterator<'a, K, V> +where + K: RangeKey, +{ + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + let mut next = self.iter.next()?; + + let filter = |x: &K| { + let r = x.contains(&self.range); + if let Some(ref limit) = self.limit { + r && x.contains(limit) + } else { + r + } + }; + + loop { + if filter(&next.0) { + return Some(next); + } + + next = self.iter.next()?; + } + } +} + +#[derive(Debug)] +pub struct Peer = SimpleStore> +where + K: RangeKey + Clone + Default + AsFingerprint, +{ + store: S, + /// Up to how many values to send immediately, before sending only a fingerprint. + max_set_size: usize, + /// `k` in the protocol, how many splits to generate. at least 2 + split_factor: usize, + limit: Option>, + + _phantom: PhantomData, // why??? +} + +impl Default for Peer +where + K: RangeKey + Clone + Default + AsFingerprint, + S: Store + Default, +{ + fn default() -> Self { + Peer { + store: S::default(), + max_set_size: 1, + split_factor: 2, + limit: None, + _phantom: Default::default(), + } + } +} + +impl Peer +where + K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, + V: Clone + Debug, + S: Store + Default, +{ + pub fn with_limit(limit: Range) -> Self { + Peer { + store: S::default(), + max_set_size: 1, + split_factor: 2, + limit: Some(limit), + _phantom: Default::default(), + } + } +} +impl Peer +where + K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, + V: Clone + Debug, + S: Store, +{ + /// Generates the initial message. + pub fn initial_message(&self) -> Message { + Message::init(&self.store, self.limit.as_ref()) + } + + /// Processes an incoming message and produces a response. + /// If terminated, returns `None` + pub fn process_message(&mut self, message: Message) -> Option> { + let mut out = Vec::new(); + + // TODO: can these allocs be avoided? + let mut items = Vec::new(); + let mut fingerprints = Vec::new(); + for part in message.parts { + match part { + MessagePart::RangeItem(item) => { + items.push(item); + } + MessagePart::RangeFingerprint(fp) => { + fingerprints.push(fp); + } + } + } + + // Process item messages + for RangeItem { + range, + values, + have_local, + } in items + { + let diff: Option> = if have_local { + None + } else { + Some( + self.store + .get_range(range.clone(), self.limit.clone()) + .into_iter() + .filter(|(k, _)| values.iter().find(|(vk, _)| &vk == k).is_none()) + .map(|(k, v)| (k.clone(), v.clone())) + .collect(), + ) + }; + + // Store incoming values + for (k, v) in values { + self.store.put(k, v); + } + + if let Some(diff) = diff { + if !diff.is_empty() { + out.push(MessagePart::RangeItem(RangeItem { + range, + values: diff, + have_local: true, + })); + } + } + } + + // Process fingerprint messages + for RangeFingerprint { range, fingerprint } in fingerprints { + let local_fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref()); + + // Case1 Match, nothing to do + if local_fingerprint == fingerprint { + continue; + } + + // Case2 Recursion Anchor + let local_values: Vec<_> = self + .store + .get_range(range.clone(), self.limit.clone()) + .collect(); + if local_values.len() <= 1 || fingerprint == Fingerprint::empty() { + let values = local_values + .into_iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); + out.push(MessagePart::RangeItem(RangeItem { + range, + values, + have_local: false, + })); + } else { + // Case3 Recurse + // Create partition + // m0 = x < m1 < .. < mk = y, with k>= 2 + // such that [ml, ml+1) is nonempty + let mut ranges = Vec::with_capacity(self.split_factor); + let chunk_len = div_ceil(local_values.len(), self.split_factor); + + // Select the first index, for which the key is larger than the x of the range. + let mut start_index = local_values + .iter() + .position(|(k, _)| range.x() <= k) + .unwrap_or(0); + let max_len = local_values.len(); + for i in 0..self.split_factor { + let s_index = start_index; + let start = (s_index * chunk_len) % max_len; + let e_index = s_index + 1; + let end = (e_index * chunk_len) % max_len; + + let (x, y) = if i == 0 { + // first + (range.x(), local_values[end].0) + } else if i == self.split_factor - 1 { + // last + (local_values[start].0, range.y()) + } else { + // regular + (local_values[start].0, local_values[end].0) + }; + let range = Range::new(x.clone(), y.clone()); + ranges.push(range); + start_index += 1; + } + + for range in ranges.into_iter() { + let chunk: Vec<_> = self + .store + .get_range(range.clone(), self.limit.clone()) + .collect(); + // Add either the fingerprint or the item set + let fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref()); + if chunk.len() > self.max_set_size { + out.push(MessagePart::RangeFingerprint(RangeFingerprint { + range, + fingerprint, + })); + } else { + let values = chunk + .into_iter() + .map(|(k, v)| { + let k: K = k.clone(); + let v: V = v.clone(); + (k, v) + }) + .collect(); + out.push(MessagePart::RangeItem(RangeItem { + range, + values, + have_local: false, + })); + } + } + } + } + + // If we have any parts, return a message + if !out.is_empty() { + Some(Message { parts: out }) + } else { + None + } + } + + /// Insert a key value pair. + pub fn put(&mut self, k: K, v: V) { + self.store.put(k, v); + } + + pub fn get(&self, k: &K) -> Option<&V> { + self.store.get(k) + } + + /// Remove the given key. + pub fn remove(&mut self, k: &K) -> Option { + self.store.remove(k) + } + + /// List all existing key value pairs. + pub fn all(&self) -> impl Iterator { + self.store.all() + } + + /// Returns a refernce to the underlying store. + pub fn store(&self) -> &S { + &self.store + } +} + +/// Sadly https://doc.rust-lang.org/std/primitive.usize.html#method.div_ceil is still unstable.. +fn div_ceil(a: usize, b: usize) -> usize { + debug_assert!(a != 0); + debug_assert!(b != 0); + + a / b + (a % b != 0) as usize +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + use super::*; + + #[test] + fn test_paper_1() { + let alice_set = [("ape", 1), ("eel", 1), ("fox", 1), ("gnu", 1)]; + let bob_set = [ + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), + ("hog", 1), + ]; + + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + + // Initial message + assert_eq!(res.alice_to_bob[0].parts.len(), 1); + assert!(res.alice_to_bob[0].parts[0].is_range_fingerprint()); + + // Response from Bob - recurse once + assert_eq!(res.bob_to_alice[0].parts.len(), 2); + assert!(res.bob_to_alice[0].parts[0].is_range_fingerprint()); + assert!(res.bob_to_alice[0].parts[1].is_range_fingerprint()); + + // Last response from Alice + assert_eq!(res.alice_to_bob[1].parts.len(), 3); + assert!(res.alice_to_bob[1].parts[0].is_range_item()); + assert!(res.alice_to_bob[1].parts[1].is_range_fingerprint()); + assert!(res.alice_to_bob[1].parts[2].is_range_item()); + + // Last response from Bob + assert_eq!(res.bob_to_alice[1].parts.len(), 2); + assert!(res.bob_to_alice[1].parts[0].is_range_item()); + assert!(res.bob_to_alice[1].parts[1].is_range_item()); + } + + #[test] + fn test_paper_2() { + let alice_set = [ + ("ape", 1), + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), // the only value being sent + ("gnu", 1), + ("hog", 1), + ]; + let bob_set = [ + ("ape", 1), + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("gnu", 1), + ("hog", 1), + ]; + + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + } + + #[test] + fn test_paper_3() { + let alice_set = [ + ("ape", 1), + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), + ("gnu", 1), + ("hog", 1), + ]; + let bob_set = [("ape", 1), ("cat", 1), ("eel", 1), ("gnu", 1)]; + + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + } + + #[test] + fn test_limits() { + let alice_set = [("ape", 1), ("bee", 1), ("cat", 1)]; + let bob_set = [("ape", 1), ("cat", 1), ("doe", 1)]; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 3, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + + // With Limit: just ape + let limit = ("ape", "bee").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 0, "B -> A message count"); + + // With Limit: just bee, cat + let limit = ("bee", "doe").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + } + + #[test] + fn test_prefixes_simple() { + let alice_set = [("/foo/bar", 1), ("/foo/baz", 1), ("/foo/cat", 1)]; + let bob_set = [("/foo/bar", 1), ("/alice/bar", 1), ("/alice/baz", 1)]; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + + // With Limit: just /alice + let limit = ("/alice", "/b").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + } + + #[test] + fn test_prefixes_empty_alice() { + let alice_set = []; + let bob_set = [("/foo/bar", 1), ("/alice/bar", 1), ("/alice/baz", 1)]; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + // With Limit: just /alice + let limit = ("/alice", "/b").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + } + + #[test] + fn test_prefixes_empty_bob() { + let alice_set = [("/foo/bar", 1), ("/foo/baz", 1), ("/foo/cat", 1)]; + let bob_set = []; + + // No Limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + // With Limit: just /alice + let limit = ("/alice", "/b").into(); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 0, "B -> A message count"); + } + + #[test] + fn test_multikey() { + #[derive(Default, Clone, PartialEq, Eq, PartialOrd, Ord)] + struct Multikey { + author: [u8; 4], + key: Vec, + } + + impl RangeKey for Multikey { + fn contains(&self, range: &Range) -> bool { + let author = range.x().author.cmp(&range.y().author); + let key = range.x().key.cmp(&range.y().key); + + match (author, key) { + (Ordering::Equal, Ordering::Equal) => { + // All + true + } + (Ordering::Equal, Ordering::Less) => { + // Regular, based on key + range.x().key <= self.key && self.key < range.y().key + } + (Ordering::Equal, Ordering::Greater) => { + // Reverse, based on key + range.x().key <= self.key || self.key < range.y().key + } + (Ordering::Less, Ordering::Equal) => { + // Regular, based on author + range.x().author <= self.author && self.author < range.y().author + } + (Ordering::Greater, Ordering::Equal) => { + // Reverse, based on key + range.x().author <= self.author || self.author < range.y().author + } + (Ordering::Less, Ordering::Less) => { + // Regular, key and author + range.x().key <= self.key + && self.key < range.y().key + && range.x().author <= self.author + && self.author < range.y().author + } + (Ordering::Greater, Ordering::Greater) => { + // Reverse, key and author + (range.x().key <= self.key || self.key < range.y().key) + && (range.x().author <= self.author || self.author < range.y().author) + } + (Ordering::Less, Ordering::Greater) => { + // Regular author, Reverse key + (range.x().key <= self.key || self.key < range.y().key) + && (range.x().author <= self.author && self.author < range.y().author) + } + (Ordering::Greater, Ordering::Less) => { + // Regular key, Reverse author + (range.x().key <= self.key && self.key < range.y().key) + && (range.x().author <= self.author || self.author < range.y().author) + } + } + } + } + + impl Debug for Multikey { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let key = if let Ok(key) = std::str::from_utf8(&self.key) { + key.to_string() + } else { + hex::encode(&self.key) + }; + f.debug_struct("Multikey") + .field("author", &hex::encode(&self.author)) + .field("key", &key) + .finish() + } + } + impl AsFingerprint for Multikey { + fn as_fingerprint(&self) -> Fingerprint { + let mut hasher = blake3::Hasher::new(); + hasher.update(&self.author); + hasher.update(&self.key); + Fingerprint(hasher.finalize().into()) + } + } + + impl Multikey { + fn new(author: [u8; 4], key: impl AsRef<[u8]>) -> Self { + Multikey { + author, + key: key.as_ref().to_vec(), + } + } + } + let author_a = [1u8; 4]; + let author_b = [2u8; 4]; + let alice_set = [ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + ]; + let bob_set = [ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ]; + + // No limit + let res = sync(None, &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 2, "B -> A message count"); + res.assert_alice_set( + "no limit", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + + res.assert_bob_set( + "no limit", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + + // Only author_a + let limit = Range::new(Multikey::new(author_a, ""), Multikey::new(author_b, "")); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 2, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + res.assert_alice_set( + "only author_a", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + ], + ); + + res.assert_bob_set( + "only author_a", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + (Multikey::new(author_a, "doe"), 1), + ], + ); + + // All authors, but only cat + let limit = Range::new( + Multikey::new(author_a, "cat"), + Multikey::new(author_a, "doe"), + ); + let res = sync(Some(limit), &alice_set, &bob_set); + assert_eq!(res.alice_to_bob.len(), 1, "A -> B message count"); + assert_eq!(res.bob_to_alice.len(), 1, "B -> A message count"); + + res.assert_alice_set( + "only cat", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_b, "bee"), 1), + (Multikey::new(author_a, "doe"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + + res.assert_bob_set( + "only cat", + &[ + (Multikey::new(author_a, "ape"), 1), + (Multikey::new(author_a, "bee"), 1), + (Multikey::new(author_a, "cat"), 1), + (Multikey::new(author_b, "cat"), 1), + ], + ); + } + + struct SyncResult + where + K: RangeKey + Clone + Default + AsFingerprint, + { + alice: Peer, + bob: Peer, + alice_to_bob: Vec>, + bob_to_alice: Vec>, + } + + impl SyncResult + where + K: RangeKey + Clone + Default + AsFingerprint + Debug, + V: Debug, + { + fn print_messages(&self) { + let len = std::cmp::max(self.alice_to_bob.len(), self.bob_to_alice.len()); + for i in 0..len { + if let Some(msg) = self.alice_to_bob.get(i) { + println!("A -> B:"); + print_message(msg); + } + if let Some(msg) = self.bob_to_alice.get(i) { + println!("B -> A:"); + print_message(msg); + } + } + } + } + + impl SyncResult + where + K: Debug + RangeKey + Clone + Default + AsFingerprint, + V: Debug + Clone + PartialEq, + { + fn assert_alice_set(&self, ctx: &str, expected: &[(K, V)]) { + dbg!(self.alice.all().collect::>()); + for (k, v) in expected { + assert_eq!( + self.alice.store.get(k), + Some(v), + "{}: (alice) missing key {:?}", + ctx, + k + ); + } + assert_eq!(expected.len(), self.alice.store.len(), "{}: (alice)", ctx); + } + + fn assert_bob_set(&self, ctx: &str, expected: &[(K, V)]) { + dbg!(self.bob.all().collect::>()); + + for (k, v) in expected { + assert_eq!( + self.bob.store.get(k), + Some(v), + "{}: (bob) missing key {:?}", + ctx, + k + ); + } + assert_eq!(expected.len(), self.bob.store.len(), "{}: (bob)", ctx); + } + } + + fn print_message(msg: &Message) + where + K: Debug, + V: Debug, + { + for part in &msg.parts { + match part { + MessagePart::RangeFingerprint(RangeFingerprint { range, fingerprint }) => { + println!( + " RangeFingerprint({:?}, {:?}, {:?})", + range.x(), + range.y(), + fingerprint + ); + } + MessagePart::RangeItem(RangeItem { + range, + values, + have_local, + }) => { + println!( + " RangeItem({:?} | {:?}) (local?: {})\n {:?}", + range.x(), + range.y(), + have_local, + values, + ); + } + } + } + } + + fn sync( + limit: Option>, + alice_set: &[(K, V)], + bob_set: &[(K, V)], + ) -> SyncResult + where + K: PartialEq + RangeKey + Clone + Default + Debug + AsFingerprint, + V: Clone + Debug + PartialEq, + { + println!("Using Limit: {:?}", limit); + let mut expected_set_alice = BTreeMap::new(); + let mut expected_set_bob = BTreeMap::new(); + + let mut alice = if let Some(limit) = limit.clone() { + Peer::::with_limit(limit) + } else { + Peer::::default() + }; + for (k, v) in alice_set { + alice.put(k.clone(), v.clone()); + + let include = if let Some(ref limit) = limit { + k.contains(limit) + } else { + true + }; + if include { + expected_set_bob.insert(k.clone(), v.clone()); + } + // alices things are always in alices store + expected_set_alice.insert(k.clone(), v.clone()); + } + + let mut bob = if let Some(limit) = limit.clone() { + Peer::::with_limit(limit) + } else { + Peer::::default() + }; + for (k, v) in bob_set { + bob.put(k.clone(), v.clone()); + let include = if let Some(ref limit) = limit { + k.contains(limit) + } else { + true + }; + if include { + expected_set_alice.insert(k.clone(), v.clone()); + } + // bobs things are always in bobs store + expected_set_bob.insert(k.clone(), v.clone()); + } + + let mut alice_to_bob = Vec::new(); + let mut bob_to_alice = Vec::new(); + let initial_message = alice.initial_message(); + + let mut next_to_bob = Some(initial_message); + let mut rounds = 0; + while let Some(msg) = next_to_bob.take() { + assert!(rounds < 100, "too many rounds"); + rounds += 1; + alice_to_bob.push(msg.clone()); + + if let Some(msg) = bob.process_message(msg) { + bob_to_alice.push(msg.clone()); + next_to_bob = alice.process_message(msg); + } + } + let res = SyncResult { + alice, + bob, + alice_to_bob, + bob_to_alice, + }; + res.print_messages(); + + let alice_now: Vec<_> = res.alice.all().collect(); + assert_eq!( + expected_set_alice.iter().collect::>(), + alice_now, + "alice" + ); + + let bob_now: Vec<_> = res.bob.all().collect(); + assert_eq!(expected_set_bob.iter().collect::>(), bob_now, "bob"); + + // Check that values were never sent twice + let mut alice_sent = BTreeMap::new(); + for msg in &res.alice_to_bob { + for part in &msg.parts { + if let Some(values) = part.values() { + for (key, value) in values { + assert!( + alice_sent.insert(key.clone(), value.clone()).is_none(), + "alice: duplicate {:?} - {:?}", + key, + value + ); + } + } + } + } + + let mut bob_sent = BTreeMap::new(); + for msg in &res.bob_to_alice { + for part in &msg.parts { + if let Some(values) = part.values() { + for (key, value) in values { + assert!( + bob_sent.insert(key.clone(), value.clone()).is_none(), + "bob: duplicate {:?} - {:?}", + key, + value + ); + } + } + } + } + + res + } + + #[test] + fn store_get_range() { + let mut store = SimpleStore::<&'static str, usize>::default(); + let set = [ + ("bee", 1), + ("cat", 1), + ("doe", 1), + ("eel", 1), + ("fox", 1), + ("hog", 1), + ]; + for (k, v) in &set { + store.put(*k, *v); + } + + let all: Vec<_> = store + .get_range(Range::new("", ""), None) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + assert_eq!(&all, &set[..]); + + let regular: Vec<_> = store + .get_range(("bee", "eel").into(), None) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + assert_eq!(®ular, &set[..3]); + + // empty start + let regular: Vec<_> = store + .get_range(("", "eel").into(), None) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + assert_eq!(®ular, &set[..3]); + + let regular: Vec<_> = store + .get_range(("cat", "hog").into(), None) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + assert_eq!(®ular, &set[1..5]); + + let excluded: Vec<_> = store + .get_range(("fox", "bee").into(), None) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + + assert_eq!(excluded[0].0, "fox"); + assert_eq!(excluded[1].0, "hog"); + assert_eq!(excluded.len(), 2); + + let excluded: Vec<_> = store + .get_range(("fox", "doe").into(), None) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + + assert_eq!(excluded.len(), 4); + assert_eq!(excluded[0].0, "bee"); + assert_eq!(excluded[1].0, "cat"); + assert_eq!(excluded[2].0, "fox"); + assert_eq!(excluded[3].0, "hog"); + + // Limit + let all: Vec<_> = store + .get_range(("", "").into(), Some(("bee", "doe").into())) + .into_iter() + .map(|(k, v)| (*k, *v)) + .collect(); + assert_eq!(&all, &set[..2]); + } + + #[test] + fn test_div_ceil() { + assert_eq!(div_ceil(1, 1), 1 / 1); + assert_eq!(div_ceil(2, 1), 2 / 1); + assert_eq!(div_ceil(4, 2), 4 / 2); + + assert_eq!(div_ceil(3, 2), 2); + assert_eq!(div_ceil(5, 3), 2); + } +} diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs new file mode 100644 index 0000000000..f4dcece817 --- /dev/null +++ b/iroh-sync/src/sync.rs @@ -0,0 +1,789 @@ +// Names and concepts are roughly based on Willows design at the moment: +// +// https://hackmd.io/DTtck8QOQm6tZaQBBtTf7w +// +// This is going to change! + +use std::{ + cmp::Ordering, + collections::{BTreeMap, HashMap}, + fmt::{Debug, Display}, + str::FromStr, + sync::Arc, + time::SystemTime, +}; + +use parking_lot::RwLock; + +use bytes::Bytes; +use ed25519_dalek::{Signature, SignatureError, Signer, SigningKey, VerifyingKey}; +use iroh_bytes::Hash; +use rand_core::CryptoRngCore; +use serde::{Deserialize, Serialize}; + +use crate::ranger::{AsFingerprint, Fingerprint, Peer, Range, RangeKey}; + +pub type ProtocolMessage = crate::ranger::Message; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Author { + priv_key: SigningKey, + id: AuthorId, +} + +impl Display for Author { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Author({})", hex::encode(self.priv_key.to_bytes())) + } +} + +impl Author { + pub fn new(rng: &mut R) -> Self { + let priv_key = SigningKey::generate(rng); + let id = AuthorId(priv_key.verifying_key()); + + Author { priv_key, id } + } + + pub fn id(&self) -> &AuthorId { + &self.id + } + + pub fn sign(&self, msg: &[u8]) -> Signature { + self.priv_key.sign(msg) + } + + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.id.verify(msg, signature) + } +} + +#[derive(Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)] +pub struct AuthorId(VerifyingKey); + +impl Debug for AuthorId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "AuthorId({})", hex::encode(self.0.as_bytes())) + } +} + +impl AuthorId { + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.0.verify_strict(msg, signature) + } + + pub fn as_bytes(&self) -> &[u8; 32] { + self.0.as_bytes() + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Namespace { + priv_key: SigningKey, + id: NamespaceId, +} + +impl Display for Namespace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Namespace({})", hex::encode(self.priv_key.to_bytes())) + } +} + +impl FromStr for Namespace { + type Err = (); + + fn from_str(s: &str) -> Result { + let priv_key: [u8; 32] = hex::decode(s).map_err(|_| ())?.try_into().map_err(|_| ())?; + let priv_key = SigningKey::from_bytes(&priv_key); + let id = NamespaceId(priv_key.verifying_key()); + Ok(Namespace { priv_key, id }) + } +} + +impl FromStr for Author { + type Err = (); + + fn from_str(s: &str) -> Result { + let priv_key: [u8; 32] = hex::decode(s).map_err(|_| ())?.try_into().map_err(|_| ())?; + let priv_key = SigningKey::from_bytes(&priv_key); + let id = AuthorId(priv_key.verifying_key()); + Ok(Author { priv_key, id }) + } +} + +impl Namespace { + pub fn new(rng: &mut R) -> Self { + let priv_key = SigningKey::generate(rng); + let id = NamespaceId(priv_key.verifying_key()); + + Namespace { priv_key, id } + } + + pub fn id(&self) -> &NamespaceId { + &self.id + } + + pub fn sign(&self, msg: &[u8]) -> Signature { + self.priv_key.sign(msg) + } + + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.id.verify(msg, signature) + } +} + +#[derive(Default, Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Hash)] +pub struct NamespaceId(VerifyingKey); + +impl Display for NamespaceId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "NamespaceId({})", hex::encode(self.0.as_bytes())) + } +} + +impl Debug for NamespaceId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "NamespaceId({})", hex::encode(self.0.as_bytes())) + } +} + +impl NamespaceId { + pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { + self.0.verify_strict(msg, signature) + } + + pub fn as_bytes(&self) -> &[u8; 32] { + self.0.as_bytes() + } +} + +/// Manages the replicas and authors for an instance. +#[derive(Debug, Clone, Default)] +pub struct ReplicaStore { + replicas: Arc>>, + authors: Arc>>, +} + +impl ReplicaStore { + pub fn get_replica(&self, namespace: &NamespaceId) -> Option { + let replicas = &*self.replicas.read(); + replicas.get(namespace).cloned() + } + + pub fn get_author(&self, author: &AuthorId) -> Option { + let authors = &*self.authors.read(); + authors.get(author).cloned() + } + + pub fn new_author(&self, rng: &mut R) -> Author { + let author = Author::new(rng); + self.authors.write().insert(*author.id(), author.clone()); + author + } + + pub fn new_replica(&self, namespace: Namespace) -> Replica { + let replica = Replica::new(namespace); + self.replicas + .write() + .insert(replica.namespace(), replica.clone()); + replica + } +} + +#[derive(Debug, Clone)] +pub struct Replica { + inner: Arc>, +} + +#[derive(Debug)] +struct InnerReplica { + namespace: Namespace, + peer: Peer, + content: HashMap, +} + +#[derive(Default, Debug, Clone)] +pub struct Store { + /// Stores records by identifier + timestamp + records: BTreeMap>, +} + +impl Store { + pub fn latest(&self) -> impl Iterator { + self.records.iter().filter_map(|(k, values)| { + let (_, v) = values.last_key_value()?; + Some((k, v)) + }) + } +} + +impl crate::ranger::Store for Store { + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> RecordIdentifier { + self.records + .first_key_value() + .map(|(k, _)| k.clone()) + .unwrap_or_default() + } + + fn get(&self, key: &RecordIdentifier) -> Option<&SignedEntry> { + self.records + .get(key) + .and_then(|values| values.last_key_value()) + .map(|(_, v)| v) + } + + fn len(&self) -> usize { + self.records.len() + } + + fn is_empty(&self) -> bool { + self.records.is_empty() + } + + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Fingerprint { + let elements = self.get_range(range.clone(), limit.cloned()); + let mut fp = Fingerprint::empty(); + for el in elements { + fp ^= el.0.as_fingerprint(); + } + + fp + } + + fn put(&mut self, k: RecordIdentifier, v: SignedEntry) { + // TODO: propagate error/not insertion? + if v.verify().is_ok() { + let timestamp = v.entry().record().timestamp(); + // TODO: verify timestamp is "reasonable" + + self.records.entry(k).or_default().insert(timestamp, v); + } + } + + type RangeIterator<'a> = RangeIterator<'a>; + fn get_range<'a>( + &'a self, + range: Range, + limit: Option>, + ) -> Self::RangeIterator<'a> { + RangeIterator { + iter: self.records.iter(), + range: Some(range), + limit, + } + } + + fn remove(&mut self, key: &RecordIdentifier) -> Option { + self.records + .remove(key) + .and_then(|mut v| v.last_entry().map(|e| e.remove_entry().1)) + } + + type AllIterator<'a> = RangeIterator<'a>; + + fn all(&self) -> Self::AllIterator<'_> { + RangeIterator { + iter: self.records.iter(), + range: None, + limit: None, + } + } +} + +#[derive(Debug)] +pub struct RangeIterator<'a> { + iter: std::collections::btree_map::Iter<'a, RecordIdentifier, BTreeMap>, + range: Option>, + limit: Option>, +} + +impl<'a> RangeIterator<'a> { + fn matches(&self, x: &RecordIdentifier) -> bool { + let range = self.range.as_ref().map(|r| x.contains(r)).unwrap_or(true); + let limit = self.limit.as_ref().map(|r| x.contains(r)).unwrap_or(true); + range && limit + } +} + +impl<'a> Iterator for RangeIterator<'a> { + type Item = (&'a RecordIdentifier, &'a SignedEntry); + + fn next(&mut self) -> Option { + let mut next = self.iter.next()?; + loop { + if self.matches(&next.0) { + let (k, values) = next; + let (_, v) = values.last_key_value()?; + return Some((k, v)); + } + + next = self.iter.next()?; + } + } +} + +impl Replica { + pub fn new(namespace: Namespace) -> Self { + Replica { + inner: Arc::new(RwLock::new(InnerReplica { + namespace, + peer: Peer::default(), + content: HashMap::default(), + })), + } + } + + pub fn get_content(&self, hash: &Hash) -> Option { + self.inner.read().content.get(hash).cloned() + } + + // TODO: not horrible + pub fn all(&self) -> Vec<(RecordIdentifier, SignedEntry)> { + self.inner + .read() + .peer + .all() + .map(|(k, v)| (k.clone(), v.clone())) + .collect() + } + + /// Inserts a new record at the given key. + pub fn insert(&self, key: impl AsRef<[u8]>, author: &Author, data: impl Into) { + let mut inner = self.inner.write(); + + let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); + let data: Bytes = data.into(); + let record = Record::from_data(&data, inner.namespace.id()); + + // Store content + inner.content.insert(*record.content_hash(), data); + + // Store signed entries + let entry = Entry::new(id.clone(), record); + let signed_entry = entry.sign(&inner.namespace, author); + inner.peer.put(id, signed_entry); + } + + /// Gets all entries matching this key and author. + pub fn get_latest(&self, key: impl AsRef<[u8]>, author: &AuthorId) -> Option { + let inner = self.inner.read(); + inner + .peer + .get(&RecordIdentifier::new(key, &inner.namespace.id(), author)) + .cloned() + } + + /// Returns all versions of the matching documents. + pub fn get_all<'a, 'b: 'a>( + &'a self, + key: impl AsRef<[u8]> + 'b, + author: &AuthorId, + ) -> GetAllIter<'a> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let record_id = RecordIdentifier::new(key, guard.namespace.id(), author); + GetAllIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + record_id, + index: 0, + } + } + + pub fn sync_initial_message(&self) -> crate::ranger::Message { + self.inner.read().peer.initial_message() + } + + pub fn sync_process_message( + &self, + message: crate::ranger::Message, + ) -> Option> { + self.inner.write().peer.process_message(message) + } + + pub fn namespace(&self) -> NamespaceId { + *self.inner.read().namespace.id() + } +} + +#[derive(Debug)] +pub struct GetAllIter<'a> { + // Oh my god, rust why u do this to me? + records: parking_lot::lock_api::MappedRwLockReadGuard< + 'a, + parking_lot::RawRwLock, + BTreeMap>, + >, + record_id: RecordIdentifier, + /// Current iteration index. + index: usize, +} + +impl<'a> Iterator for GetAllIter<'a> { + type Item = SignedEntry; + + fn next(&mut self) -> Option { + let values = self.records.get(&self.record_id)?; + + let (_, res) = values.iter().nth(self.index)?; + self.index += 1; + Some(res.clone()) // :( I give up + } +} + +/// A signed entry. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SignedEntry { + signature: EntrySignature, + entry: Entry, +} + +impl SignedEntry { + pub fn from_entry(entry: Entry, namespace: &Namespace, author: &Author) -> Self { + let signature = EntrySignature::from_entry(&entry, namespace, author); + SignedEntry { signature, entry } + } + + pub fn verify(&self) -> Result<(), SignatureError> { + self.signature + .verify(&self.entry, &self.entry.id.namespace, &self.entry.id.author) + } + + pub fn signature(&self) -> &EntrySignature { + &self.signature + } + + pub fn entry(&self) -> &Entry { + &self.entry + } +} + +/// Signature over an entry. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EntrySignature { + author_signature: Signature, + namespace_signature: Signature, +} + +impl EntrySignature { + pub fn from_entry(entry: &Entry, namespace: &Namespace, author: &Author) -> Self { + // TODO: this should probably include a namespace prefix + // namespace in the cryptographic sense. + let bytes = entry.to_vec(); + let namespace_signature = namespace.sign(&bytes); + let author_signature = author.sign(&bytes); + + EntrySignature { + author_signature, + namespace_signature, + } + } + + pub fn verify( + &self, + entry: &Entry, + namespace: &NamespaceId, + author: &AuthorId, + ) -> Result<(), SignatureError> { + let bytes = entry.to_vec(); + namespace.verify(&bytes, &self.namespace_signature)?; + author.verify(&bytes, &self.author_signature)?; + + Ok(()) + } +} + +/// A single entry in a replica. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Entry { + id: RecordIdentifier, + record: Record, +} + +impl Entry { + pub fn new(id: RecordIdentifier, record: Record) -> Self { + Entry { id, record } + } + + pub fn id(&self) -> &RecordIdentifier { + &self.id + } + + pub fn record(&self) -> &Record { + &self.record + } + + /// Serialize this entry into its canonical byte representation used for signing. + pub fn into_vec(&self, out: &mut Vec) { + self.id.as_bytes(out); + self.record.as_bytes(out); + } + + pub fn to_vec(&self) -> Vec { + let mut out = Vec::new(); + self.into_vec(&mut out); + out + } + + pub fn sign(self, namespace: &Namespace, author: &Author) -> SignedEntry { + SignedEntry::from_entry(self, namespace, author) + } +} + +/// The indentifier of a record. +#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)] +pub struct RecordIdentifier { + /// The key of the record. + key: Vec, + /// The namespace this record belongs to. + namespace: NamespaceId, + /// The author that wrote this record. + author: AuthorId, +} + +impl AsFingerprint for RecordIdentifier { + fn as_fingerprint(&self) -> crate::ranger::Fingerprint { + let mut hasher = blake3::Hasher::new(); + hasher.update(self.namespace.as_bytes()); + hasher.update(self.author.as_bytes()); + hasher.update(&self.key); + Fingerprint(hasher.finalize().into()) + } +} + +impl PartialOrd for NamespaceId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for NamespaceId { + fn cmp(&self, other: &Self) -> Ordering { + self.0.as_bytes().cmp(other.0.as_bytes()) + } +} + +impl PartialOrd for AuthorId { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for AuthorId { + fn cmp(&self, other: &Self) -> Ordering { + self.0.as_bytes().cmp(other.0.as_bytes()) + } +} + +impl RangeKey for RecordIdentifier { + fn contains(&self, range: &crate::ranger::Range) -> bool { + // For now we just do key inclusion and check if namespace and author match + if self.namespace != range.x().namespace || self.namespace != range.y().namespace { + return false; + } + if self.author != range.x().author || self.author != range.y().author { + return false; + } + + let mapped_range = range.clone().map(|x, y| (x.key, y.key)); + crate::ranger::contains(&self.key, &mapped_range) + } +} + +impl RecordIdentifier { + pub fn new(key: impl AsRef<[u8]>, namespace: &NamespaceId, author: &AuthorId) -> Self { + RecordIdentifier { + key: key.as_ref().to_vec(), + namespace: *namespace, + author: *author, + } + } + + pub fn as_bytes(&self, out: &mut Vec) { + out.extend_from_slice(self.namespace.as_bytes()); + out.extend_from_slice(self.author.as_bytes()); + out.extend_from_slice(&self.key); + } + + pub fn key(&self) -> &[u8] { + &self.key + } + + pub fn namespace(&self) -> &NamespaceId { + &self.namespace + } + + pub fn author(&self) -> &AuthorId { + &self.author + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Record { + /// Record creation timestamp. Counted as micros since the Unix epoch. + timestamp: u64, + /// Length of the data referenced by `hash`. + len: u64, + hash: Hash, +} + +impl Record { + pub fn new(timestamp: u64, len: u64, hash: Hash) -> Self { + Record { + timestamp, + len, + hash, + } + } + + pub fn timestamp(&self) -> u64 { + self.timestamp + } + + pub fn content_len(&self) -> u64 { + self.len + } + + pub fn content_hash(&self) -> &Hash { + &self.hash + } + + pub fn from_data(data: impl AsRef<[u8]>, namespace: &NamespaceId) -> Self { + let timestamp = SystemTime::now() + .duration_since(SystemTime::UNIX_EPOCH) + .expect("time drift") + .as_micros() as u64; + let data = data.as_ref(); + let len = data.len() as u64; + // Salted hash + // TODO: do we actually want this? + // TODO: this should probably use a namespace prefix if used + let mut hasher = blake3::Hasher::new(); + hasher.update(namespace.as_bytes()); + hasher.update(data); + let hash = hasher.finalize(); + + Self::new(timestamp, len, hash.into()) + } + + pub fn as_bytes(&self, out: &mut Vec) { + out.extend_from_slice(&self.timestamp.to_be_bytes()); + out.extend_from_slice(&self.len.to_be_bytes()); + out.extend_from_slice(self.hash.as_ref()); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basics() { + let mut rng = rand::thread_rng(); + let alice = Author::new(&mut rng); + let myspace = Namespace::new(&mut rng); + + let record_id = RecordIdentifier::new("/my/key", myspace.id(), alice.id()); + let record = Record::from_data(b"this is my cool data", myspace.id()); + let entry = Entry::new(record_id, record); + let signed_entry = entry.sign(&myspace, &alice); + signed_entry.verify().expect("failed to verify"); + + let my_replica = Replica::new(myspace); + for i in 0..10 { + my_replica.insert(format!("/{i}"), &alice, format!("{i}: hello from alice")); + } + + for i in 0..10 { + let res = my_replica.get_latest(format!("/{i}"), alice.id()).unwrap(); + let len = format!("{i}: hello from alice").as_bytes().len() as u64; + assert_eq!(res.entry().record().content_len(), len); + res.verify().expect("invalid signature"); + } + + // Test multiple records for the same key + my_replica.insert("/cool/path", &alice, "round 1"); + let entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); + let content = my_replica + .get_content(entry.entry().record().content_hash()) + .unwrap(); + assert_eq!(&content[..], b"round 1"); + + // Second + + my_replica.insert("/cool/path", &alice, "round 2"); + let entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); + let content = my_replica + .get_content(entry.entry().record().content_hash()) + .unwrap(); + assert_eq!(&content[..], b"round 2"); + + // Get All + let entries: Vec<_> = my_replica.get_all("/cool/path", alice.id()).collect(); + assert_eq!(entries.len(), 2); + let content = my_replica + .get_content(entries[0].entry().record().content_hash()) + .unwrap(); + assert_eq!(&content[..], b"round 1"); + let content = my_replica + .get_content(entries[1].entry().record().content_hash()) + .unwrap(); + assert_eq!(&content[..], b"round 2"); + } + + #[test] + fn test_replica_sync() { + let alice_set = ["ape", "eel", "fox", "gnu"]; + let bob_set = ["bee", "cat", "doe", "eel", "fox", "hog"]; + + let mut rng = rand::thread_rng(); + let author = Author::new(&mut rng); + let myspace = Namespace::new(&mut rng); + let mut alice = Replica::new(myspace.clone()); + for el in &alice_set { + alice.insert(el, &author, el.as_bytes()); + } + + let mut bob = Replica::new(myspace); + for el in &bob_set { + bob.insert(el, &author, el.as_bytes()); + } + + sync(&author, &mut alice, &mut bob, &alice_set, &bob_set); + } + + fn sync( + author: &Author, + alice: &mut Replica, + bob: &mut Replica, + alice_set: &[&str], + bob_set: &[&str], + ) { + // Sync alice - bob + let mut next_to_bob = Some(alice.sync_initial_message()); + let mut rounds = 0; + while let Some(msg) = next_to_bob.take() { + assert!(rounds < 100, "too many rounds"); + rounds += 1; + if let Some(msg) = bob.sync_process_message(msg) { + next_to_bob = alice.sync_process_message(msg); + } + } + + // Check result + for el in alice_set { + alice.get_latest(el, author.id()).unwrap(); + bob.get_latest(el, author.id()).unwrap(); + } + + for el in bob_set { + alice.get_latest(el, author.id()).unwrap(); + bob.get_latest(el, author.id()).unwrap(); + } + } +} diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 7e8ec2def8..4c8cafd970 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -26,10 +26,12 @@ iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } iroh-metrics = { version = "0.5.0", path = "../iroh-metrics", optional = true } num_cpus = { version = "1.15.0" } portable-atomic = "1" +iroh-sync = { path = "../iroh-sync" } postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.6", default-features = false, features = ["flume-transport"] } quinn = "0.10" range-collections = { version = "0.4.0" } +rand = "0.8" serde = { version = "1", features = ["derive"] } thiserror = "1" tokio = { version = "1", features = ["io-util", "rt"] } @@ -50,7 +52,6 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = tr data-encoding = "2.4.0" url = { version = "2.4", features = ["serde"] } - [features] default = ["cli", "metrics"] cli = ["clap", "config", "console", "dirs-next", "indicatif", "multibase", "quic-rpc/quinn-transport", "tempfile", "tokio/rt-multi-thread", "tracing-subscriber"] @@ -65,7 +66,6 @@ anyhow = { version = "1", features = ["backtrace"] } bytes = "1" duct = "0.13.6" nix = "0.26.2" -rand = "0.8" regex = { version = "1.7.1", features = ["std"] } testdir = "0.8" tokio = { version = "1", features = ["macros", "io-util", "rt"] } diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 1b152de112..55335257d4 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -10,6 +10,7 @@ pub mod database; pub mod dial; pub mod node; pub mod rpc_protocol; +pub mod sync; pub mod util; /// Expose metrics module diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs new file mode 100644 index 0000000000..751bc13efe --- /dev/null +++ b/iroh/src/sync.rs @@ -0,0 +1,189 @@ +//! Implementation of the iroh-sync protocol + +use anyhow::{bail, ensure, Result}; +use bytes::BytesMut; +use iroh_sync::sync::{NamespaceId, Replica, ReplicaStore}; +use serde::{Deserialize, Serialize}; +use tokio::io::{AsyncRead, AsyncWrite}; + +/// The ALPN identifier for the iroh-sync protocol +pub const SYNC_ALPN: &[u8] = b"/iroh-sync/1"; + +/// Sync Protocol +/// +/// - Init message: signals which namespace is being synced +/// - N Sync messages +/// +/// On any error and on success the substream is closed. +#[derive(Debug, Clone, Serialize, Deserialize)] +enum Message { + Init { + /// Namespace to sync + namespace: NamespaceId, + /// Initial message + message: iroh_sync::sync::ProtocolMessage, + }, + Sync(iroh_sync::sync::ProtocolMessage), +} + +/// Runs the initiator side of the sync protocol. +pub async fn run_alice( + writer: &mut W, + reader: &mut R, + alice: &Replica, +) -> Result<()> { + let mut buffer = BytesMut::with_capacity(1024); + + // Init message + + let init_message = Message::Init { + namespace: alice.namespace(), + message: alice.sync_initial_message(), + }; + let msg_bytes = postcard::to_stdvec(&init_message)?; + iroh_bytes::protocol::write_lp(writer, &msg_bytes).await?; + + // Sync message loop + + while let Some(read) = iroh_bytes::protocol::read_lp(&mut *reader, &mut buffer).await? { + println!("read {}", read.len()); + let msg = postcard::from_bytes(&read)?; + match msg { + Message::Init { .. } => { + bail!("unexpected message: init"); + } + Message::Sync(msg) => { + if let Some(msg) = alice.sync_process_message(msg) { + send_sync_message(writer, msg).await?; + } else { + break; + } + } + } + } + + Ok(()) +} + +/// Handle an iroh-sync connection and sync all shared documents in the replica store. +pub async fn handle_connection( + connecting: quinn::Connecting, + replica_store: ReplicaStore, +) -> Result<()> { + let connection = connecting.await?; + let (mut send_stream, mut recv_stream) = connection.accept_bi().await?; + + run_bob(&mut send_stream, &mut recv_stream, replica_store).await?; + send_stream.finish().await?; + + println!("done"); + + Ok(()) +} + +/// Runs the receiver side of the sync protocol. +pub async fn run_bob( + writer: &mut W, + reader: &mut R, + replica_store: ReplicaStore, +) -> Result<()> { + let mut buffer = BytesMut::with_capacity(1024); + + let mut replica = None; + while let Some(read) = iroh_bytes::protocol::read_lp(&mut *reader, &mut buffer).await? { + println!("read {}", read.len()); + let msg = postcard::from_bytes(&read)?; + + match msg { + Message::Init { namespace, message } => { + ensure!(replica.is_none(), "double init message"); + + match replica_store.get_replica(&namespace) { + Some(r) => { + println!("starting sync for {}", namespace); + if let Some(msg) = r.sync_process_message(message) { + send_sync_message(writer, msg).await?; + } else { + break; + } + replica = Some(r); + } + None => { + // TODO: this should be possible. + bail!("unable to synchronize unknown namespace: {}", namespace); + } + } + } + Message::Sync(msg) => match replica { + Some(ref replica) => { + if let Some(msg) = replica.sync_process_message(msg) { + send_sync_message(writer, msg).await?; + } else { + break; + } + } + None => { + bail!("unexpected sync message without init"); + } + }, + } + } + + Ok(()) +} + +async fn send_sync_message( + stream: &mut W, + msg: iroh_sync::sync::ProtocolMessage, +) -> Result<()> { + let msg_bytes = postcard::to_stdvec(&Message::Sync(msg))?; + iroh_bytes::protocol::write_lp(stream, &msg_bytes).await?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use iroh_sync::sync::Namespace; + + use super::*; + + #[tokio::test] + async fn test_sync_simple() -> Result<()> { + let mut rng = rand::thread_rng(); + + let replica_store = ReplicaStore::default(); + // For now uses same author on both sides. + let author = replica_store.new_author(&mut rng); + let namespace = Namespace::new(&mut rng); + let bob_replica = replica_store.new_replica(namespace.clone()); + bob_replica.insert("hello alice", &author, "from bob"); + + let alice_replica = Replica::new(namespace.clone()); + alice_replica.insert("hello bob", &author, "from alice"); + + assert_eq!(bob_replica.all().len(), 1); + assert_eq!(alice_replica.all().len(), 1); + + let (alice, bob) = tokio::io::duplex(64); + + let (mut alice_reader, mut alice_writer) = tokio::io::split(alice); + let replica = alice_replica.clone(); + let alice_task = tokio::task::spawn(async move { + run_alice(&mut alice_writer, &mut alice_reader, &replica).await + }); + + let (mut bob_reader, mut bob_writer) = tokio::io::split(bob); + let bob_replica_store = replica_store.clone(); + let bob_task = tokio::task::spawn(async move { + run_bob(&mut bob_writer, &mut bob_reader, bob_replica_store).await + }); + + alice_task.await??; + bob_task.await??; + + assert_eq!(bob_replica.all().len(), 2); + assert_eq!(alice_replica.all().len(), 2); + + Ok(()) + } +} From 5039bd91d398faa68dd198bee6fb448006c779aa Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Mon, 3 Jul 2023 10:38:42 +0200 Subject: [PATCH 02/45] sync: start impl of multikey --- iroh-sync/src/sync.rs | 78 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 69 insertions(+), 9 deletions(-) diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index f4dcece817..8a5278b6a8 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -582,16 +582,15 @@ impl Ord for AuthorId { impl RangeKey for RecordIdentifier { fn contains(&self, range: &crate::ranger::Range) -> bool { - // For now we just do key inclusion and check if namespace and author match - if self.namespace != range.x().namespace || self.namespace != range.y().namespace { - return false; - } - if self.author != range.x().author || self.author != range.y().author { - return false; - } + use crate::ranger::contains; + + let key_range = range.clone().map(|x, y| (x.key, y.key)); + let namespace_range = range.clone().map(|x, y| (x.namespace, y.namespace)); + let author_range = range.clone().map(|x, y| (x.author, y.author)); - let mapped_range = range.clone().map(|x, y| (x.key, y.key)); - crate::ranger::contains(&self.key, &mapped_range) + contains(&self.key, &key_range) + && contains(&self.namespace, &namespace_range) + && contains(&self.author, &author_range) } } @@ -736,6 +735,67 @@ mod tests { assert_eq!(&content[..], b"round 2"); } + #[test] + fn test_multikey() { + let mut rng = rand::thread_rng(); + + let k = vec!["a", "c", "z"]; + + let mut n: Vec<_> = (0..3).map(|_| Namespace::new(&mut rng)).collect(); + n.sort_by_key(|n| *n.id()); + + let mut a: Vec<_> = (0..3).map(|_| Author::new(&mut rng)).collect(); + a.sort_by_key(|a| *a.id()); + + // Just key + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[1], n[0].id(), a[0].id()); + let ri2 = RecordIdentifier::new(k[2], n[0].id(), a[0].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + + // Just namespace + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[0], n[1].id(), a[0].id()); + let ri2 = RecordIdentifier::new(k[0], n[2].id(), a[0].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + + // Just author + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[0], n[0].id(), a[1].id()); + let ri2 = RecordIdentifier::new(k[0], n[0].id(), a[2].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + + // Just key and namespace + { + let ri0 = RecordIdentifier::new(k[0], n[0].id(), a[0].id()); + let ri1 = RecordIdentifier::new(k[1], n[1].id(), a[0].id()); + let ri2 = RecordIdentifier::new(k[2], n[2].id(), a[0].id()); + + let range = Range::new(ri0.clone(), ri2.clone()); + assert!(ri0.contains(&range), "start"); + assert!(ri1.contains(&range), "inside"); + assert!(!ri2.contains(&range), "end"); + } + } + #[test] fn test_replica_sync() { let alice_set = ["ape", "eel", "fox", "gnu"]; From f1f59477d28295a339c4bd667efce2cb8f377c4b Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Thu, 6 Jul 2023 19:27:50 +0200 Subject: [PATCH 03/45] feat: integrate iroh-sync and iroh-gossip, add example --- Cargo.lock | 4 + iroh-sync/Cargo.toml | 1 + iroh-sync/src/sync.rs | 71 ++++++++- iroh/Cargo.toml | 10 ++ iroh/examples/sync.rs | 349 ++++++++++++++++++++++++++++++++++++++++++ iroh/src/lib.rs | 1 + iroh/src/sync.rs | 36 ++++- iroh/src/sync/live.rs | 282 ++++++++++++++++++++++++++++++++++ 8 files changed, 747 insertions(+), 7 deletions(-) create mode 100644 iroh/examples/sync.rs create mode 100644 iroh/src/sync/live.rs diff --git a/Cargo.lock b/Cargo.lock index d61d2bf5a2..07d2c5bcc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1704,12 +1704,14 @@ dependencies = [ "derive_more", "dirs-next", "duct", + "ed25519-dalek", "flume", "futures", "genawaiter", "hex", "indicatif", "iroh-bytes", + "iroh-gossip", "iroh-io", "iroh-metrics", "iroh-net", @@ -1717,6 +1719,7 @@ dependencies = [ "multibase", "nix", "num_cpus", + "once_cell", "portable-atomic", "postcard", "proptest", @@ -1924,6 +1927,7 @@ dependencies = [ "blake3", "bytes", "crossbeam", + "derive_more", "ed25519-dalek", "hex", "iroh-bytes", diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml index b7aa4ff340..0ed3ee73f7 100644 --- a/iroh-sync/Cargo.toml +++ b/iroh-sync/Cargo.toml @@ -12,6 +12,7 @@ repository = "https://github.com/n0-computer/iroh" anyhow = "1.0.71" blake3 = "1.3.3" crossbeam = "0.8.2" +derive_more = { version = "1.0.0-beta.1", features = ["debug", "display", "from", "try_into"] } ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] } iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } once_cell = "1.18.0" diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 8a5278b6a8..34d7d9bd50 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -45,6 +45,10 @@ impl Author { Author { priv_key, id } } + pub fn from_bytes(bytes: &[u8; 32]) -> Self { + SigningKey::from_bytes(&bytes).into() + } + pub fn id(&self) -> &AuthorId { &self.id } @@ -67,6 +71,12 @@ impl Debug for AuthorId { } } +impl Display for AuthorId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", hex::encode(self.0.as_bytes())) + } +} + impl AuthorId { pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { self.0.verify_strict(msg, signature) @@ -111,6 +121,20 @@ impl FromStr for Author { } } +impl From for Author { + fn from(priv_key: SigningKey) -> Self { + let id = AuthorId(priv_key.verifying_key()); + Self { priv_key, id } + } +} + +impl From for Namespace { + fn from(priv_key: SigningKey) -> Self { + let id = NamespaceId(priv_key.verifying_key()); + Self { priv_key, id } + } +} + impl Namespace { pub fn new(rng: &mut R) -> Self { let priv_key = SigningKey::generate(rng); @@ -119,6 +143,10 @@ impl Namespace { Namespace { priv_key, id } } + pub fn from_bytes(bytes: &[u8; 32]) -> Self { + SigningKey::from_bytes(bytes).into() + } + pub fn id(&self) -> &NamespaceId { &self.id } @@ -190,16 +218,30 @@ impl ReplicaStore { } } +/// TODO: Would potentially nice to pass a `&SignedEntry` reference, however that would make +/// everything `!Send`. +/// TODO: Not sure if the `Sync` requirement will be a problem for implementers. It comes from +/// [parking_lot::RwLock] requiring `Sync`. +pub type OnInsertCallback = Box; + #[derive(Debug, Clone)] +pub enum InsertOrigin { + Local, + Sync, +} + +#[derive(derive_more::Debug, Clone)] pub struct Replica { inner: Arc>, } -#[derive(Debug)] +#[derive(derive_more::Debug)] struct InnerReplica { namespace: Namespace, peer: Peer, content: HashMap, + #[debug("on_insert: [Box; {}]", "self.on_insert.len()")] + on_insert: Vec, } #[derive(Default, Debug, Clone)] @@ -334,10 +376,16 @@ impl Replica { namespace, peer: Peer::default(), content: HashMap::default(), + on_insert: Default::default(), })), } } + pub fn on_insert(&self, callback: OnInsertCallback) { + let mut inner = self.inner.write(); + inner.on_insert.push(callback); + } + pub fn get_content(&self, hash: &Hash) -> Option { self.inner.read().content.get(hash).cloned() } @@ -366,7 +414,26 @@ impl Replica { // Store signed entries let entry = Entry::new(id.clone(), record); let signed_entry = entry.sign(&inner.namespace, author); - inner.peer.put(id, signed_entry); + inner.peer.put(id, signed_entry.clone()); + for cb in &inner.on_insert { + cb(InsertOrigin::Local, signed_entry.clone()) + } + } + + pub fn id(&self, key: impl AsRef<[u8]>, author: &Author) -> RecordIdentifier { + let inner = self.inner.read(); + let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); + id + } + + pub fn insert_remote_entry(&self, entry: SignedEntry) -> anyhow::Result<()> { + entry.verify()?; + let mut inner = self.inner.write(); + inner.peer.put(entry.entry.id.clone(), entry.clone()); + for cb in &inner.on_insert { + cb(InsertOrigin::Sync, entry.clone()) + } + Ok(()) } /// Gets all entries matching this key and author. diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 4c8cafd970..bf3fe7cf04 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -27,6 +27,7 @@ iroh-metrics = { version = "0.5.0", path = "../iroh-metrics", optional = true } num_cpus = { version = "1.15.0" } portable-atomic = "1" iroh-sync = { path = "../iroh-sync" } +iroh-gossip = { path = "../iroh-gossip" } postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.6", default-features = false, features = ["flume-transport"] } quinn = "0.10" @@ -52,6 +53,10 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"], optional = tr data-encoding = "2.4.0" url = { version = "2.4", features = ["serde"] } +# Examples +once_cell = { version = "1.18.0", optional = true } +ed25519-dalek = { version = "=2.0.0-rc.3", features = ["serde", "rand_core"], optional = true } + [features] default = ["cli", "metrics"] cli = ["clap", "config", "console", "dirs-next", "indicatif", "multibase", "quic-rpc/quinn-transport", "tempfile", "tokio/rt-multi-thread", "tracing-subscriber"] @@ -60,6 +65,7 @@ flat-db = [] mem-db = [] iroh-collection = [] test = [] +example-sync = ["cli", "ed25519-dalek", "once_cell"] [dev-dependencies] anyhow = { version = "1", features = ["backtrace"] } @@ -87,3 +93,7 @@ required-features = ["mem-db", "iroh-collection"] [[example]] name = "hello-world" required-features = ["mem-db"] + +[[example]] +name = "sync" +required-features = ["example-sync"] diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs new file mode 100644 index 0000000000..0b8acf5774 --- /dev/null +++ b/iroh/examples/sync.rs @@ -0,0 +1,349 @@ +//! Live edit a p2p document +//! +//! By default a new peer id is created when starting the example. To reuse your identity, +//! set the `--private-key` CLI flag with the private key printed on a previous invocation. +//! +//! You can use this with a local DERP server. To do so, run +//! `cargo run --bin derper -- --dev` +//! and then set the `-d http://localhost:3340` flag on this example. + +use std::{fmt, str::FromStr}; + +use anyhow::bail; +use clap::Parser; +use ed25519_dalek::SigningKey; +use iroh::sync::{LiveSync, PeerSource, SYNC_ALPN}; +use iroh_gossip::{ + net::{GossipHandle, GOSSIP_ALPN}, + proto::TopicId, +}; +use iroh_net::{ + defaults::{default_derp_map, DEFAULT_DERP_STUN_PORT}, + derp::{DerpMap, UseIpv4, UseIpv6}, + magic_endpoint::get_alpn, + tls::Keypair, + MagicEndpoint, +}; +use iroh_sync::sync::{Author, Namespace, Replica, ReplicaStore, SignedEntry}; +use once_cell::sync::OnceCell; +use serde::{Deserialize, Serialize}; +use tokio::sync::mpsc; +use url::Url; + +#[derive(Parser, Debug)] +struct Args { + /// Private key to derive our peer id from + #[clap(long)] + private_key: Option, + /// Set a custom DERP server. By default, the DERP server hosted by n0 will be used. + #[clap(short, long)] + derp: Option, + /// Disable DERP completeley + #[clap(long)] + no_derp: bool, + /// Set your nickname + #[clap(short, long)] + name: Option, + /// Set the bind port for our socket. By default, a random port will be used. + #[clap(short, long, default_value = "0")] + bind_port: u16, + #[clap(subcommand)] + command: Command, +} + +#[derive(Parser, Debug)] +enum Command { + Open { doc_name: String }, + Join { ticket: String }, +} + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + tracing_subscriber::fmt::init(); + let args = Args::parse(); + run(args).await +} + +async fn run(args: Args) -> anyhow::Result<()> { + // parse or generate our keypair + let keypair = match args.private_key { + None => Keypair::generate(), + Some(key) => parse_keypair(&key)?, + }; + println!("> our private key: {}", fmt_secret(&keypair)); + + // configure our derp map + let derp_map = match (args.no_derp, args.derp) { + (false, None) => Some(default_derp_map()), + (false, Some(url)) => Some(derp_map_from_url(url)?), + (true, None) => None, + (true, Some(_)) => bail!("You cannot set --no-derp and --derp at the same time"), + }; + println!("> using DERP servers: {}", fmt_derp_map(&derp_map)); + + // init a cell that will hold our gossip handle to be used in endpoint callbacks + let gossip_cell: OnceCell = OnceCell::new(); + // init a channel that will emit once the initial endpoints of our local node are discovered + let (initial_endpoints_tx, mut initial_endpoints_rx) = mpsc::channel(1); + + // build our magic endpoint + let gossip_cell_clone = gossip_cell.clone(); + let endpoint = MagicEndpoint::builder() + .keypair(keypair.clone()) + .alpns(vec![GOSSIP_ALPN.to_vec(), SYNC_ALPN.to_vec()]) + .derp_map(derp_map) + .on_endpoints(Box::new(move |endpoints| { + // send our updated endpoints to the gossip protocol to be sent as PeerData to peers + if let Some(gossip) = gossip_cell_clone.get() { + gossip.update_endpoints(endpoints).ok(); + } + // trigger oneshot on the first endpoint update + initial_endpoints_tx.try_send(endpoints.to_vec()).ok(); + })) + .bind(args.bind_port) + .await?; + println!("> our peer id: {}", endpoint.peer_id()); + + // wait for a first endpoint update so that we know about at least one of our addrs + let initial_endpoints = initial_endpoints_rx.recv().await.unwrap(); + // println!("> our endpoints: {initial_endpoints:?}"); + + let (topic, peers) = match &args.command { + Command::Open { doc_name } => { + let topic: TopicId = blake3::hash(doc_name.as_bytes()).into(); + println!( + "> opening document {doc_name} as namespace {} and waiting for peers to join us...", + fmt_hash(topic.as_bytes()) + ); + (topic, vec![]) + } + Command::Join { ticket } => { + let Ticket { topic, peers } = Ticket::from_str(ticket)?; + println!("> joining topic {topic} and connecting to {peers:?}",); + (topic, peers) + } + }; + + let our_ticket = { + // add our local endpoints to the ticket and print it for others to join + let addrs = initial_endpoints.iter().map(|ep| ep.addr).collect(); + let mut peers = peers.clone(); + peers.push(PeerSource { + peer_id: endpoint.peer_id(), + addrs, + derp_region: endpoint.my_derp().await, + }); + Ticket { peers, topic } + }; + println!("> ticket to join us: {our_ticket}"); + + // create the gossip protocol + let gossip = { + let gossip = GossipHandle::from_endpoint(endpoint.clone(), Default::default()); + // insert the gossip handle into the gossip cell to be used in the endpoint callbacks above + gossip_cell.set(gossip.clone()).unwrap(); + // pass our initial peer println to the gossip protocol + gossip.update_endpoints(&initial_endpoints)?; + gossip + }; + + // create the sync doc and store + let (store, author, doc) = create_document(topic, &keypair)?; + + // spawn our endpoint loop that forwards incoming connections + tokio::spawn(endpoint_loop( + endpoint.clone(), + gossip.clone(), + store.clone(), + )); + + // spawn an input thread that reads stdin + // not using tokio here because they recommend this for "technical reasons" + let (line_tx, mut line_rx) = tokio::sync::mpsc::channel::(1); + std::thread::spawn(move || input_loop(line_tx)); + + // create the live syncer + let mut sync_handle = LiveSync::spawn(endpoint.clone(), gossip.clone(), doc.clone(), peers); + + // do some logging + doc.on_insert(Box::new(move |origin, entry| { + println!("> insert from {origin:?}: {}", fmt_entry(&entry)); + })); + + // process stdin lines + println!("> read to accept commands: set | get | ls | exit"); + while let Some(text) = line_rx.recv().await { + let mut parts = text.split(' '); + match [parts.next(), parts.next(), parts.next()] { + [Some("set"), Some(key), Some(value)] => { + let key = key.to_string(); + let value = value.to_string(); + doc.insert(&key, &author, value); + } + [Some("get"), Some(key), None] => { + // TODO: we need a way to get all filtered by key from all authors + let mut entries = doc + .all() + .into_iter() + .filter_map(|(id, entry)| (id.key() == key.as_bytes()).then(|| entry)); + while let Some(entry) = entries.next() { + println!("{} -> {}", fmt_entry(&entry), fmt_content(&doc, &entry)); + } + } + [Some("ls"), None, None] => { + let all = doc.all(); + println!("> {} entries", all.len()); + for (_id, entry) in all { + println!("{} -> {}", fmt_entry(&entry), fmt_content(&doc, &entry)); + } + } + [Some("exit"), None, None] => { + let res = sync_handle.cancel().await?; + println!("syncer closed with {res:?}"); + break; + } + _ => println!("> invalid command"), + } + } + + Ok(()) +} + +fn create_document( + topic: TopicId, + keypair: &Keypair, +) -> anyhow::Result<(ReplicaStore, Author, Replica)> { + let author = Author::from(keypair.secret().clone()); + let namespace = Namespace::from_bytes(topic.as_bytes()); + let store = ReplicaStore::default(); + let doc = store.new_replica(namespace); + Ok((store, author, doc)) +} + +async fn endpoint_loop( + endpoint: MagicEndpoint, + gossip: GossipHandle, + replica_store: ReplicaStore, +) -> anyhow::Result<()> { + while let Some(mut conn) = endpoint.accept().await { + let alpn = get_alpn(&mut conn).await?; + println!("> incoming connection with alpn {alpn}"); + // let (peer_id, alpn, conn) = accept_conn(conn).await?; + let res = match alpn.as_bytes() { + GOSSIP_ALPN => gossip.handle_connection(conn.await?).await, + SYNC_ALPN => iroh::sync::handle_connection(conn, replica_store.clone()).await, + _ => Err(anyhow::anyhow!( + "ignoring connection: unsupported ALPN protocol" + )), + }; + if let Err(err) = res { + tracing::error!("connection for {alpn} errored: {err:?}"); + } + } + Ok(()) +} + +fn input_loop(line_tx: tokio::sync::mpsc::Sender) -> anyhow::Result<()> { + let mut buffer = String::new(); + let stdin = std::io::stdin(); // We get `Stdin` here. + loop { + stdin.read_line(&mut buffer)?; + line_tx.blocking_send(buffer.trim().to_string())?; + buffer.clear(); + } +} + +#[derive(Debug, Serialize, Deserialize)] +struct Ticket { + topic: TopicId, + peers: Vec, +} +impl Ticket { + /// Deserializes from bytes. + fn from_bytes(bytes: &[u8]) -> anyhow::Result { + postcard::from_bytes(bytes).map_err(Into::into) + } + /// Serializes to bytes. + pub fn to_bytes(&self) -> Vec { + postcard::to_stdvec(self).expect("postcard::to_stdvec is infallible") + } +} + +/// Serializes to base32. +impl fmt::Display for Ticket { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let encoded = self.to_bytes(); + let mut text = data_encoding::BASE32_NOPAD.encode(&encoded); + text.make_ascii_lowercase(); + write!(f, "{text}") + } +} + +/// Deserializes from base32. +impl FromStr for Ticket { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let bytes = data_encoding::BASE32_NOPAD.decode(s.to_ascii_uppercase().as_bytes())?; + let slf = Self::from_bytes(&bytes)?; + Ok(slf) + } +} + +// helpers + +fn fmt_entry(entry: &SignedEntry) -> String { + let id = entry.entry().id(); + let key = std::str::from_utf8(id.key()).unwrap_or(""); + let hash = entry.entry().record().content_hash(); + let author = fmt_hash(id.author().as_bytes()); + let fmt_hash = fmt_hash(hash.as_bytes()); + format!("@{author}: {key} = {fmt_hash}") +} +fn fmt_content(doc: &Replica, entry: &SignedEntry) -> String { + let hash = entry.entry().record().content_hash(); + let content = doc.get_content(hash); + let content = content + .map(|content| String::from_utf8(content.into()).unwrap()) + .unwrap_or_else(|| "".into()); + content +} +fn fmt_hash(hash: &[u8]) -> String { + let mut text = data_encoding::BASE32_NOPAD.encode(hash); + text.make_ascii_lowercase(); + format!("{}…{}", &text[..5], &text[(text.len() - 2)..]) +} +fn fmt_secret(keypair: &Keypair) -> String { + let mut text = data_encoding::BASE32_NOPAD.encode(&keypair.secret().to_bytes()); + text.make_ascii_lowercase(); + text +} +fn parse_keypair(secret: &str) -> anyhow::Result { + let bytes: [u8; 32] = data_encoding::BASE32_NOPAD + .decode(secret.to_ascii_uppercase().as_bytes())? + .try_into() + .map_err(|_| anyhow::anyhow!("Invalid secret"))?; + let key = SigningKey::from_bytes(&bytes); + Ok(key.into()) +} +fn fmt_derp_map(derp_map: &Option) -> String { + match derp_map { + None => "None".to_string(), + Some(map) => { + let regions = map.regions.iter().map(|(id, region)| { + let nodes = region.nodes.iter().map(|node| node.url.to_string()); + (*id, nodes.collect::>()) + }); + format!("{:?}", regions.collect::>()) + } + } +} +fn derp_map_from_url(url: Url) -> anyhow::Result { + Ok(DerpMap::default_from_node( + url, + DEFAULT_DERP_STUN_PORT, + UseIpv4::TryDns, + UseIpv6::TryDns, + 0 + )) +} diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 55335257d4..0c69f4e2a8 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -10,6 +10,7 @@ pub mod database; pub mod dial; pub mod node; pub mod rpc_protocol; +#[allow(missing_docs)] pub mod sync; pub mod util; diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index 751bc13efe..68de0fe252 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -1,14 +1,21 @@ //! Implementation of the iroh-sync protocol -use anyhow::{bail, ensure, Result}; +use std::net::SocketAddr; + +use anyhow::{bail, ensure, Context, Result}; use bytes::BytesMut; +use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::sync::{NamespaceId, Replica, ReplicaStore}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; +use tracing::debug; /// The ALPN identifier for the iroh-sync protocol pub const SYNC_ALPN: &[u8] = b"/iroh-sync/1"; +mod live; +pub use live::*; + /// Sync Protocol /// /// - Init message: signals which namespace is being synced @@ -26,6 +33,24 @@ enum Message { Sync(iroh_sync::sync::ProtocolMessage), } +pub async fn connect_and_sync( + endpoint: &MagicEndpoint, + doc: &Replica, + peer_id: PeerId, + derp_region: Option, + addrs: &[SocketAddr], +) -> anyhow::Result<()> { + debug!("sync with peer {}: start", peer_id); + let connection = endpoint + .connect(peer_id, SYNC_ALPN, derp_region, addrs) + .await + .context("dial_and_sync")?; + let (mut send_stream, mut recv_stream) = connection.open_bi().await?; + let res = run_alice(&mut send_stream, &mut recv_stream, &doc).await; + debug!("sync with peer {}: finish {:?}", peer_id, res); + res +} + /// Runs the initiator side of the sync protocol. pub async fn run_alice( writer: &mut W, @@ -46,7 +71,7 @@ pub async fn run_alice( // Sync message loop while let Some(read) = iroh_bytes::protocol::read_lp(&mut *reader, &mut buffer).await? { - println!("read {}", read.len()); + debug!("read {}", read.len()); let msg = postcard::from_bytes(&read)?; match msg { Message::Init { .. } => { @@ -71,12 +96,13 @@ pub async fn handle_connection( replica_store: ReplicaStore, ) -> Result<()> { let connection = connecting.await?; + debug!("> connection established!"); let (mut send_stream, mut recv_stream) = connection.accept_bi().await?; run_bob(&mut send_stream, &mut recv_stream, replica_store).await?; send_stream.finish().await?; - println!("done"); + debug!("done"); Ok(()) } @@ -91,7 +117,7 @@ pub async fn run_bob( let mut replica = None; while let Some(read) = iroh_bytes::protocol::read_lp(&mut *reader, &mut buffer).await? { - println!("read {}", read.len()); + debug!("read {}", read.len()); let msg = postcard::from_bytes(&read)?; match msg { @@ -100,7 +126,7 @@ pub async fn run_bob( match replica_store.get_replica(&namespace) { Some(r) => { - println!("starting sync for {}", namespace); + debug!("starting sync for {}", namespace); if let Some(msg) = r.sync_process_message(message) { send_sync_message(writer, msg).await?; } else { diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs new file mode 100644 index 0000000000..1697a87819 --- /dev/null +++ b/iroh/src/sync/live.rs @@ -0,0 +1,282 @@ +use std::{collections::HashMap, net::SocketAddr, sync::Arc}; + +use crate::sync::connect_and_sync; +use anyhow::{anyhow, Context}; +use futures::{ + future::{BoxFuture, Shared}, + stream::FuturesUnordered, + FutureExt, TryFutureExt, +}; +use iroh_gossip::{ + net::{Event, GossipHandle}, + proto::TopicId, +}; +use iroh_net::{tls::PeerId, MagicEndpoint}; +use iroh_sync::sync::{InsertOrigin, Replica, SignedEntry}; +use serde::{Deserialize, Serialize}; +use tokio::{ + sync::{broadcast, mpsc}, + task::JoinError, +}; +use tokio_stream::StreamExt; +use tracing::error; + +const CHANNEL_CAP: usize = 8; + +/// The address to connect to a peer +/// TODO: Move into iroh-net +/// TODO: Make an enum and support DNS resolution +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PeerSource { + pub peer_id: PeerId, + pub addrs: Vec, + pub derp_region: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub enum Op { + Put(SignedEntry), +} + +#[derive(Debug)] +enum SyncState { + Running, + Finished, + Failed(anyhow::Error), +} + +#[derive(Debug)] +pub enum ToActor { + Shutdown, +} + +/// Handle to a running live sync actor +#[derive(Debug, Clone)] +pub struct LiveSync { + to_actor_tx: mpsc::Sender, + task: Shared>>>, +} + +impl LiveSync { + pub fn spawn( + endpoint: MagicEndpoint, + gossip: GossipHandle, + doc: Replica, + initial_peers: Vec, + ) -> Self { + let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); + let mut actor = Actor::new(endpoint, gossip, doc, initial_peers, to_actor_rx); + let task = tokio::spawn(async move { actor.run().await }); + let handle = LiveSync { + to_actor_tx, + task: task.map_err(Arc::new).boxed().shared(), + }; + handle + } + + /// Cancel the live sync. + pub async fn cancel(&mut self) -> anyhow::Result<()> { + self.to_actor_tx.send(ToActor::Shutdown).await?; + self.task.clone().await?; + Ok(()) + } +} + +// TODO: Right now works with a single doc. Can quite easily be extended to work on a set of +// replicas. Then the handle above could have a +// `join_doc(doc: Replica, initial_peers: Vec, + gossip_stream: GossipStream, + to_actor_rx: mpsc::Receiver, + insert_entry_rx: mpsc::UnboundedReceiver, + sync_state: HashMap, + gossip: GossipHandle, + running_sync_tasks: FuturesUnordered)>>, +} + +impl Actor { + pub fn new( + endpoint: MagicEndpoint, + gossip: GossipHandle, + replica: Replica, + initial_peers: Vec, + to_actor_rx: mpsc::Receiver, + ) -> Self { + // TODO: instead of an unbounded channel, we'd want a FIFO ring buffer likely + // (we have to send from the blocking Replica::on_insert callback, so we need a channel + // with nonblocking sending, so either unbounded or ringbuffer like) + let (insert_tx, insert_rx) = mpsc::unbounded_channel(); + // let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); + // setup replica insert notifications. + replica.on_insert(Box::new(move |origin, entry| { + // only care for local inserts, otherwise we'd do endless gossip loops + if let InsertOrigin::Local = origin { + insert_tx.send(entry.clone()).ok(); + } + })); + + // setup a gossip subscripion + let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id.clone()).collect(); + let topic: TopicId = replica.namespace().as_bytes().into(); + let gossip_subscription = GossipStream::new(gossip.clone(), topic, peer_ids); + + Self { + gossip, + replica, + endpoint, + gossip_stream: gossip_subscription, + insert_entry_rx: insert_rx, + to_actor_rx, + sync_state: Default::default(), + running_sync_tasks: Default::default(), + initial_peers, + } + } + pub async fn run(&mut self) { + if let Err(err) = self.run_inner().await { + error!("live sync failed: {err:?}"); + } + } + + async fn run_inner(&mut self) -> anyhow::Result<()> { + // add addresses of initial peers to our endpoint address book + for peer in &self.initial_peers { + self.endpoint + .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) + .await?; + } + // trigger initial sync with initial peers + for peer in self.initial_peers.clone().iter().map(|p| p.peer_id) { + self.sync_with_peer(peer); + } + loop { + tokio::select! { + biased; + msg = self.to_actor_rx.recv() => { + match msg { + // received shutdown signal, or livesync handle was dropped: break loop and + // exit + Some(ToActor::Shutdown) | None => break, + } + } + // new gossip message + event = self.gossip_stream.next() => { + if let Err(err) = self.on_gossip_event(event?) { + error!("Failed to process gossip event: {err:?}"); + } + }, + entry = self.insert_entry_rx.recv() => { + let entry = entry.ok_or_else(|| anyhow!("insert_rx returned None"))?; + self.on_insert_entry(entry).await?; + } + Some(res) = self.running_sync_tasks.next() => { + let (peer, res) = res.context("task sync_with_peer paniced")?; + self.on_sync_finished(peer, res); + + } + } + } + Ok(()) + } + + fn sync_with_peer(&mut self, peer: PeerId) { + // Check if we synced and only start sync if not yet synced + // sync_with_peer is triggered on NeighborUp events, so might trigger repeatedly for the + // same peers. + // TODO: Track finished time and potentially re-run sync + if let Some(_state) = self.sync_state.get(&peer) { + return; + }; + self.sync_state.insert(peer, SyncState::Running); + let task = { + let endpoint = self.endpoint.clone(); + let replica = self.replica.clone(); + tokio::spawn(async move { + println!("> connect and sync with {peer}"); + // TODO: Make sure that the peer is dialable. + let res = connect_and_sync(&endpoint, &replica, peer, None, &[]).await; + println!("> sync with {peer} done: {res:?}"); + (peer, res) + }) + }; + self.running_sync_tasks.push(task); + } + + fn on_sync_finished(&mut self, peer: PeerId, res: anyhow::Result<()>) { + let state = match res { + Ok(_) => SyncState::Finished, + Err(err) => SyncState::Failed(err), + }; + self.sync_state.insert(peer, state); + } + + fn on_gossip_event(&mut self, event: Event) -> anyhow::Result<()> { + match event { + // We received a gossip message. Try to insert it into our replica. + Event::Received(data) => { + let op: Op = postcard::from_bytes(&data)?; + match op { + Op::Put(entry) => { + self.replica.insert_remote_entry(entry)?; + } + } + } + // A new neighbor appeared in the gossip swarm. Try to sync with it directly. + // [Self::sync_with_peer] will check to not resync with peers synced previously in the + // same session. TODO: Maybe this is too broad and leads to too many sync requests. + Event::NeighborUp(peer) => { + self.sync_with_peer(peer); + } + _ => {} + } + Ok(()) + } + + /// A new entry was inserted locally. Broadcast a gossip message. + async fn on_insert_entry(&mut self, entry: SignedEntry) -> anyhow::Result<()> { + let op = Op::Put(entry); + let topic: TopicId = self.replica.namespace().as_bytes().into(); + self.gossip + .broadcast(topic, postcard::to_stdvec(&op)?.into()) + .await?; + Ok(()) + } +} + +// TODO: If this is the API surface we want move to iroh-gossip/src/net and make this be +// GossipHandle::subscribe +#[derive(Debug)] +pub enum GossipStream { + Joining(GossipHandle, TopicId, Vec), + Running(broadcast::Receiver), +} + +impl GossipStream { + pub fn new(gossip: GossipHandle, topic: TopicId, peers: Vec) -> Self { + Self::Joining(gossip, topic, peers) + } + pub async fn next(&mut self) -> anyhow::Result { + loop { + match self { + Self::Joining(gossip, topic, peers) => { + // TODO: avoid the clone + gossip.join(*topic, peers.clone()).await?; + let sub = gossip.subscribe(*topic).await?; + *self = Self::Running(sub); + } + Self::Running(sub) => { + let ret = sub.recv().await.map_err(|e| e.into()); + return ret; + } + } + } + } +} From 90d696e23e357608d2d2905f18ab93920c164441 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 7 Jul 2023 12:11:54 +0200 Subject: [PATCH 04/45] feat: make the live sync handler work with many docs --- iroh/examples/sync.rs | 133 ++++++++++++++--------- iroh/src/sync/live.rs | 247 +++++++++++++++++++++--------------------- 2 files changed, 203 insertions(+), 177 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 0b8acf5774..b603940761 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -9,7 +9,7 @@ use std::{fmt, str::FromStr}; -use anyhow::bail; +use anyhow::{anyhow, bail}; use clap::Parser; use ed25519_dalek::SigningKey; use iroh::sync::{LiveSync, PeerSource, SYNC_ALPN}; @@ -81,32 +81,45 @@ async fn run(args: Args) -> anyhow::Result<()> { }; println!("> using DERP servers: {}", fmt_derp_map(&derp_map)); - // init a cell that will hold our gossip handle to be used in endpoint callbacks - let gossip_cell: OnceCell = OnceCell::new(); - // init a channel that will emit once the initial endpoints of our local node are discovered - let (initial_endpoints_tx, mut initial_endpoints_rx) = mpsc::channel(1); - // build our magic endpoint - let gossip_cell_clone = gossip_cell.clone(); - let endpoint = MagicEndpoint::builder() - .keypair(keypair.clone()) - .alpns(vec![GOSSIP_ALPN.to_vec(), SYNC_ALPN.to_vec()]) - .derp_map(derp_map) - .on_endpoints(Box::new(move |endpoints| { - // send our updated endpoints to the gossip protocol to be sent as PeerData to peers - if let Some(gossip) = gossip_cell_clone.get() { - gossip.update_endpoints(endpoints).ok(); - } - // trigger oneshot on the first endpoint update - initial_endpoints_tx.try_send(endpoints.to_vec()).ok(); - })) - .bind(args.bind_port) - .await?; - println!("> our peer id: {}", endpoint.peer_id()); + let (endpoint, gossip, initial_endpoints) = { + // init a cell that will hold our gossip handle to be used in endpoint callbacks + let gossip_cell: OnceCell = OnceCell::new(); + // init a channel that will emit once the initial endpoints of our local node are discovered + let (initial_endpoints_tx, mut initial_endpoints_rx) = mpsc::channel(1); - // wait for a first endpoint update so that we know about at least one of our addrs - let initial_endpoints = initial_endpoints_rx.recv().await.unwrap(); - // println!("> our endpoints: {initial_endpoints:?}"); + let endpoint = MagicEndpoint::builder() + .keypair(keypair.clone()) + .alpns(vec![GOSSIP_ALPN.to_vec(), SYNC_ALPN.to_vec()]) + .derp_map(derp_map) + .on_endpoints({ + let gossip_cell = gossip_cell.clone(); + Box::new(move |endpoints| { + // send our updated endpoints to the gossip protocol to be sent as PeerData to peers + if let Some(gossip) = gossip_cell.get() { + gossip.update_endpoints(endpoints).ok(); + } + // trigger oneshot on the first endpoint update + initial_endpoints_tx.try_send(endpoints.to_vec()).ok(); + }) + }) + .bind(args.bind_port) + .await?; + + // create the gossip protocol + let gossip = { + let gossip = GossipHandle::from_endpoint(endpoint.clone(), Default::default()); + // insert the gossip handle into the gossip cell to be used in the endpoint callbacks above + gossip_cell.set(gossip.clone()).unwrap(); + gossip + }; + // wait for a first endpoint update so that we know about at least one of our addrs + let initial_endpoints = initial_endpoints_rx.recv().await.unwrap(); + // pass our initial endpoints to the gossip protocol + gossip.update_endpoints(&initial_endpoints)?; + (endpoint, gossip, initial_endpoints) + }; + println!("> our peer id: {}", endpoint.peer_id()); let (topic, peers) = match &args.command { Command::Open { doc_name } => { @@ -124,6 +137,7 @@ async fn run(args: Args) -> anyhow::Result<()> { } }; + // println!("> our endpoints: {initial_endpoints:?}"); let our_ticket = { // add our local endpoints to the ticket and print it for others to join let addrs = initial_endpoints.iter().map(|ep| ep.addr).collect(); @@ -137,16 +151,6 @@ async fn run(args: Args) -> anyhow::Result<()> { }; println!("> ticket to join us: {our_ticket}"); - // create the gossip protocol - let gossip = { - let gossip = GossipHandle::from_endpoint(endpoint.clone(), Default::default()); - // insert the gossip handle into the gossip cell to be used in the endpoint callbacks above - gossip_cell.set(gossip.clone()).unwrap(); - // pass our initial peer println to the gossip protocol - gossip.update_endpoints(&initial_endpoints)?; - gossip - }; - // create the sync doc and store let (store, author, doc) = create_document(topic, &keypair)?; @@ -163,7 +167,8 @@ async fn run(args: Args) -> anyhow::Result<()> { std::thread::spawn(move || input_loop(line_tx)); // create the live syncer - let mut sync_handle = LiveSync::spawn(endpoint.clone(), gossip.clone(), doc.clone(), peers); + let sync_handle = LiveSync::spawn(endpoint.clone(), gossip.clone()); + sync_handle.sync_doc(doc.clone(), peers.clone()).await?; // do some logging doc.on_insert(Box::new(move |origin, entry| { @@ -173,15 +178,18 @@ async fn run(args: Args) -> anyhow::Result<()> { // process stdin lines println!("> read to accept commands: set | get | ls | exit"); while let Some(text) = line_rx.recv().await { - let mut parts = text.split(' '); - match [parts.next(), parts.next(), parts.next()] { - [Some("set"), Some(key), Some(value)] => { - let key = key.to_string(); - let value = value.to_string(); + let cmd = match Cmd::from_str(&text) { + Ok(cmd) => cmd, + Err(err) => { + println!("> failed to parse command: {}", err); + continue; + } + }; + match cmd { + Cmd::Set { key, value } => { doc.insert(&key, &author, value); } - [Some("get"), Some(key), None] => { - // TODO: we need a way to get all filtered by key from all authors + Cmd::Get { key } => { let mut entries = doc .all() .into_iter() @@ -190,25 +198,48 @@ async fn run(args: Args) -> anyhow::Result<()> { println!("{} -> {}", fmt_entry(&entry), fmt_content(&doc, &entry)); } } - [Some("ls"), None, None] => { + Cmd::Ls => { let all = doc.all(); println!("> {} entries", all.len()); for (_id, entry) in all { println!("{} -> {}", fmt_entry(&entry), fmt_content(&doc, &entry)); } } - [Some("exit"), None, None] => { + Cmd::Exit => { let res = sync_handle.cancel().await?; println!("syncer closed with {res:?}"); break; } - _ => println!("> invalid command"), } } Ok(()) } +pub enum Cmd { + Set { key: String, value: String }, + Get { key: String }, + Ls, + Exit, +} +impl FromStr for Cmd { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + let mut parts = s.split(' '); + match [parts.next(), parts.next(), parts.next()] { + [Some("set"), Some(key), Some(value)] => Ok(Self::Set { + key: key.into(), + value: value.into(), + }), + [Some("get"), Some(key), None] => Ok(Self::Get { key: key.into() }), + [Some("ls"), None, None] => Ok(Self::Ls), + [Some("exit"), None, None] => Ok(Self::Exit), + _ => Err(anyhow!("invalid command")), + } + } +} + fn create_document( topic: TopicId, keypair: &Keypair, @@ -237,7 +268,7 @@ async fn endpoint_loop( )), }; if let Err(err) = res { - tracing::error!("connection for {alpn} errored: {err:?}"); + println!("> connection for {alpn} closed, reason: {err}"); } } Ok(()) @@ -295,16 +326,16 @@ impl FromStr for Ticket { fn fmt_entry(entry: &SignedEntry) -> String { let id = entry.entry().id(); let key = std::str::from_utf8(id.key()).unwrap_or(""); - let hash = entry.entry().record().content_hash(); let author = fmt_hash(id.author().as_bytes()); - let fmt_hash = fmt_hash(hash.as_bytes()); - format!("@{author}: {key} = {fmt_hash}") + let hash = entry.entry().record().content_hash(); + let hash = fmt_hash(hash.as_bytes()); + format!("@{author}: {key} = {hash}") } fn fmt_content(doc: &Replica, entry: &SignedEntry) -> String { let hash = entry.entry().record().content_hash(); let content = doc.get_content(hash); let content = content - .map(|content| String::from_utf8(content.into()).unwrap()) + .map(|content| String::from_utf8(content.into()).unwrap_or_else(|_| "".into())) .unwrap_or_else(|| "".into()); content } diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 1697a87819..40f2d68b56 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -1,10 +1,10 @@ use std::{collections::HashMap, net::SocketAddr, sync::Arc}; use crate::sync::connect_and_sync; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, Result}; use futures::{ future::{BoxFuture, Shared}, - stream::FuturesUnordered, + stream::{BoxStream, FuturesUnordered, StreamExt}, FutureExt, TryFutureExt, }; use iroh_gossip::{ @@ -14,11 +14,7 @@ use iroh_gossip::{ use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::sync::{InsertOrigin, Replica, SignedEntry}; use serde::{Deserialize, Serialize}; -use tokio::{ - sync::{broadcast, mpsc}, - task::JoinError, -}; -use tokio_stream::StreamExt; +use tokio::{sync::mpsc, task::JoinError}; use tracing::error; const CHANNEL_CAP: usize = 8; @@ -47,6 +43,10 @@ enum SyncState { #[derive(Debug)] pub enum ToActor { + SyncDoc { + doc: Replica, + initial_peers: Vec, + }, Shutdown, } @@ -58,15 +58,14 @@ pub struct LiveSync { } impl LiveSync { - pub fn spawn( - endpoint: MagicEndpoint, - gossip: GossipHandle, - doc: Replica, - initial_peers: Vec, - ) -> Self { + pub fn spawn(endpoint: MagicEndpoint, gossip: GossipHandle) -> Self { let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); - let mut actor = Actor::new(endpoint, gossip, doc, initial_peers, to_actor_rx); - let task = tokio::spawn(async move { actor.run().await }); + let mut actor = Actor::new(endpoint, gossip, to_actor_rx); + let task = tokio::spawn(async move { + if let Err(err) = actor.run().await { + error!("live sync failed: {err:?}"); + } + }); let handle = LiveSync { to_actor_tx, task: task.map_err(Arc::new).boxed().shared(), @@ -75,208 +74,204 @@ impl LiveSync { } /// Cancel the live sync. - pub async fn cancel(&mut self) -> anyhow::Result<()> { + pub async fn cancel(&self) -> Result<()> { self.to_actor_tx.send(ToActor::Shutdown).await?; self.task.clone().await?; Ok(()) } + + pub async fn sync_doc(&self, doc: Replica, initial_peers: Vec) -> Result<()> { + self.to_actor_tx + .send(ToActor::SyncDoc { doc, initial_peers }) + .await?; + Ok(()) + } } -// TODO: Right now works with a single doc. Can quite easily be extended to work on a set of -// replicas. Then the handle above could have a -// `join_doc(doc: Replica, initial_peers: Vec, - gossip_stream: GossipStream, - to_actor_rx: mpsc::Receiver, - insert_entry_rx: mpsc::UnboundedReceiver, - sync_state: HashMap, gossip: GossipHandle, - running_sync_tasks: FuturesUnordered)>>, + + docs: HashMap, + subscription: BoxStream<'static, Result<(TopicId, Event)>>, + sync_state: HashMap<(TopicId, PeerId), SyncState>, + + to_actor_rx: mpsc::Receiver, + insert_entry_tx: mpsc::UnboundedSender<(TopicId, SignedEntry)>, + insert_entry_rx: mpsc::UnboundedReceiver<(TopicId, SignedEntry)>, + + pending_syncs: FuturesUnordered)>>, + pending_joins: FuturesUnordered)>>, } impl Actor { pub fn new( endpoint: MagicEndpoint, gossip: GossipHandle, - replica: Replica, - initial_peers: Vec, to_actor_rx: mpsc::Receiver, ) -> Self { // TODO: instead of an unbounded channel, we'd want a FIFO ring buffer likely // (we have to send from the blocking Replica::on_insert callback, so we need a channel // with nonblocking sending, so either unbounded or ringbuffer like) let (insert_tx, insert_rx) = mpsc::unbounded_channel(); - // let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); - // setup replica insert notifications. - replica.on_insert(Box::new(move |origin, entry| { - // only care for local inserts, otherwise we'd do endless gossip loops - if let InsertOrigin::Local = origin { - insert_tx.send(entry.clone()).ok(); - } - })); - - // setup a gossip subscripion - let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id.clone()).collect(); - let topic: TopicId = replica.namespace().as_bytes().into(); - let gossip_subscription = GossipStream::new(gossip.clone(), topic, peer_ids); + let sub = gossip.clone().subscribe_all().boxed(); Self { gossip, - replica, + // replica, endpoint, - gossip_stream: gossip_subscription, + // gossip_stream: gossip_subscription, insert_entry_rx: insert_rx, + insert_entry_tx: insert_tx, to_actor_rx, sync_state: Default::default(), - running_sync_tasks: Default::default(), - initial_peers, - } - } - pub async fn run(&mut self) { - if let Err(err) = self.run_inner().await { - error!("live sync failed: {err:?}"); + pending_syncs: Default::default(), + // initial_peers, + pending_joins: Default::default(), + docs: Default::default(), + subscription: sub, } } - async fn run_inner(&mut self) -> anyhow::Result<()> { - // add addresses of initial peers to our endpoint address book - for peer in &self.initial_peers { - self.endpoint - .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) - .await?; - } - // trigger initial sync with initial peers - for peer in self.initial_peers.clone().iter().map(|p| p.peer_id) { - self.sync_with_peer(peer); - } + async fn run(&mut self) -> Result<()> { loop { tokio::select! { biased; msg = self.to_actor_rx.recv() => { match msg { - // received shutdown signal, or livesync handle was dropped: break loop and - // exit + // received shutdown signal, or livesync handle was dropped: + // break loop and exit Some(ToActor::Shutdown) | None => break, + Some(ToActor::SyncDoc { doc, initial_peers }) => self.insert_doc(doc, initial_peers).await?, } } // new gossip message - event = self.gossip_stream.next() => { - if let Err(err) = self.on_gossip_event(event?) { + Some(event) = self.subscription.next() => { + let (topic, event) = event?; + if let Err(err) = self.on_gossip_event(topic, event) { error!("Failed to process gossip event: {err:?}"); } }, entry = self.insert_entry_rx.recv() => { - let entry = entry.ok_or_else(|| anyhow!("insert_rx returned None"))?; - self.on_insert_entry(entry).await?; + let (topic, entry) = entry.ok_or_else(|| anyhow!("insert_rx returned None"))?; + self.on_insert_entry(topic, entry).await?; } - Some(res) = self.running_sync_tasks.next() => { - let (peer, res) = res.context("task sync_with_peer paniced")?; - self.on_sync_finished(peer, res); + Some((topic, peer, res)) = self.pending_syncs.next() => { + // let (topic, peer, res) = res.context("task sync_with_peer paniced")?; + self.on_sync_finished(topic, peer, res); } + Some((topic, res)) = self.pending_joins.next() => { + if let Err(err) = res { + error!("failed to join {topic:?}: {err:?}"); + } + // TODO: maintain some join state + } } } Ok(()) } - fn sync_with_peer(&mut self, peer: PeerId) { + fn sync_with_peer(&mut self, topic: TopicId, peer: PeerId) { + let Some(doc) = self.docs.get(&topic) else { + return; + }; // Check if we synced and only start sync if not yet synced // sync_with_peer is triggered on NeighborUp events, so might trigger repeatedly for the // same peers. // TODO: Track finished time and potentially re-run sync - if let Some(_state) = self.sync_state.get(&peer) { + if let Some(_state) = self.sync_state.get(&(topic, peer)) { return; }; - self.sync_state.insert(peer, SyncState::Running); + // TODO: fixme (doc_id, peer) + self.sync_state.insert((topic, peer), SyncState::Running); let task = { let endpoint = self.endpoint.clone(); - let replica = self.replica.clone(); - tokio::spawn(async move { + let doc = doc.clone(); + async move { println!("> connect and sync with {peer}"); // TODO: Make sure that the peer is dialable. - let res = connect_and_sync(&endpoint, &replica, peer, None, &[]).await; + let res = connect_and_sync(&endpoint, &doc, peer, None, &[]).await; println!("> sync with {peer} done: {res:?}"); - (peer, res) - }) + (topic, peer, res) + } + .boxed() }; - self.running_sync_tasks.push(task); + self.pending_syncs.push(task); } - fn on_sync_finished(&mut self, peer: PeerId, res: anyhow::Result<()>) { + async fn insert_doc(&mut self, doc: Replica, initial_peers: Vec) -> Result<()> { + let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id.clone()).collect(); + let topic: TopicId = doc.namespace().as_bytes().into(); + // join gossip for the topic to receive and send message + // let gossip = self.gossip.clone(); + self.pending_joins.push({ + let peer_ids = peer_ids.clone(); + let gossip = self.gossip.clone(); + async move { + let res = gossip.join(topic, peer_ids).await; + (topic, res) + } + .boxed() + }); + // setup replica insert notifications. + let insert_entry_tx = self.insert_entry_tx.clone(); + doc.on_insert(Box::new(move |origin, entry| { + // only care for local inserts, otherwise we'd do endless gossip loops + if let InsertOrigin::Local = origin { + insert_entry_tx.send((topic, entry.clone())).ok(); + } + })); + self.docs.insert(topic, doc); + // add addresses of initial peers to our endpoint address book + for peer in &initial_peers { + self.endpoint + .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) + .await?; + } + // trigger initial sync with initial peers + for peer in peer_ids { + self.sync_with_peer(topic, peer); + } + Ok(()) + } + + fn on_sync_finished(&mut self, topic: TopicId, peer: PeerId, res: Result<()>) { let state = match res { Ok(_) => SyncState::Finished, Err(err) => SyncState::Failed(err), }; - self.sync_state.insert(peer, state); + self.sync_state.insert((topic, peer), state); } - fn on_gossip_event(&mut self, event: Event) -> anyhow::Result<()> { + fn on_gossip_event(&mut self, topic: TopicId, event: Event) -> Result<()> { + let Some(doc) = self.docs.get(&topic) else { + return Err(anyhow!("Missing doc for {topic:?}")); + }; match event { // We received a gossip message. Try to insert it into our replica. Event::Received(data) => { let op: Op = postcard::from_bytes(&data)?; match op { - Op::Put(entry) => { - self.replica.insert_remote_entry(entry)?; - } + Op::Put(entry) => doc.insert_remote_entry(entry)?, } } // A new neighbor appeared in the gossip swarm. Try to sync with it directly. // [Self::sync_with_peer] will check to not resync with peers synced previously in the // same session. TODO: Maybe this is too broad and leads to too many sync requests. - Event::NeighborUp(peer) => { - self.sync_with_peer(peer); - } + Event::NeighborUp(peer) => self.sync_with_peer(topic, peer), _ => {} } Ok(()) } /// A new entry was inserted locally. Broadcast a gossip message. - async fn on_insert_entry(&mut self, entry: SignedEntry) -> anyhow::Result<()> { + async fn on_insert_entry(&mut self, topic: TopicId, entry: SignedEntry) -> Result<()> { let op = Op::Put(entry); - let topic: TopicId = self.replica.namespace().as_bytes().into(); - self.gossip - .broadcast(topic, postcard::to_stdvec(&op)?.into()) - .await?; + let message = postcard::to_stdvec(&op)?.into(); + self.gossip.broadcast(topic, message).await?; Ok(()) } } - -// TODO: If this is the API surface we want move to iroh-gossip/src/net and make this be -// GossipHandle::subscribe -#[derive(Debug)] -pub enum GossipStream { - Joining(GossipHandle, TopicId, Vec), - Running(broadcast::Receiver), -} - -impl GossipStream { - pub fn new(gossip: GossipHandle, topic: TopicId, peers: Vec) -> Self { - Self::Joining(gossip, topic, peers) - } - pub async fn next(&mut self) -> anyhow::Result { - loop { - match self { - Self::Joining(gossip, topic, peers) => { - // TODO: avoid the clone - gossip.join(*topic, peers.clone()).await?; - let sub = gossip.subscribe(*topic).await?; - *self = Self::Running(sub); - } - Self::Running(sub) => { - let ret = sub.recv().await.map_err(|e| e.into()); - return ret; - } - } - } - } -} From 476896d36d6a7848397d3cd279fbb48a91d083ed Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 7 Jul 2023 12:25:14 +0200 Subject: [PATCH 05/45] chore: cleanup and clippy --- iroh-sync/src/ranger.rs | 14 +++++++------- iroh-sync/src/sync.rs | 6 +++--- iroh/src/sync.rs | 2 +- iroh/src/sync/live.rs | 13 +++++-------- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index 159d99d8cf..7317d2a321 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -160,7 +160,7 @@ impl MessagePart { pub fn values(&self) -> Option<&[(K, V)]> { match self { MessagePart::RangeFingerprint(_) => None, - MessagePart::RangeItem(RangeItem { values, .. }) => Some(&values), + MessagePart::RangeItem(RangeItem { values, .. }) => Some(values), } } } @@ -180,7 +180,7 @@ where { /// Construct the initial message. fn init>(store: &S, limit: Option<&Range>) -> Self { - let x = store.get_first().clone(); + let x = store.get_first(); let range = Range::new(x.clone(), x); let fingerprint = store.get_fingerprint(&range, limit); let part = MessagePart::RangeFingerprint(RangeFingerprint { range, fingerprint }); @@ -332,7 +332,7 @@ where }; loop { - if filter(&next.0) { + if filter(next.0) { return Some(next); } @@ -432,7 +432,7 @@ where self.store .get_range(range.clone(), self.limit.clone()) .into_iter() - .filter(|(k, _)| values.iter().find(|(vk, _)| &vk == k).is_none()) + .filter(|(k, _)| !values.iter().any(|(vk, _)| &vk == k)) .map(|(k, v)| (k.clone(), v.clone())) .collect(), ) @@ -816,7 +816,7 @@ mod tests { hex::encode(&self.key) }; f.debug_struct("Multikey") - .field("author", &hex::encode(&self.author)) + .field("author", &hex::encode(self.author)) .field("key", &key) .finish() } @@ -1236,8 +1236,8 @@ mod tests { #[test] fn test_div_ceil() { - assert_eq!(div_ceil(1, 1), 1 / 1); - assert_eq!(div_ceil(2, 1), 2 / 1); + assert_eq!(div_ceil(1, 1), 1); + assert_eq!(div_ceil(2, 1), 2); assert_eq!(div_ceil(4, 2), 4 / 2); assert_eq!(div_ceil(3, 2), 2); diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 34d7d9bd50..adf75507e9 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -46,7 +46,7 @@ impl Author { } pub fn from_bytes(bytes: &[u8; 32]) -> Self { - SigningKey::from_bytes(&bytes).into() + SigningKey::from_bytes(bytes).into() } pub fn id(&self) -> &AuthorId { @@ -358,7 +358,7 @@ impl<'a> Iterator for RangeIterator<'a> { fn next(&mut self) -> Option { let mut next = self.iter.next()?; loop { - if self.matches(&next.0) { + if self.matches(next.0) { let (k, values) = next; let (_, v) = values.last_key_value()?; return Some((k, v)); @@ -441,7 +441,7 @@ impl Replica { let inner = self.inner.read(); inner .peer - .get(&RecordIdentifier::new(key, &inner.namespace.id(), author)) + .get(&RecordIdentifier::new(key, inner.namespace.id(), author)) .cloned() } diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index 68de0fe252..235e190915 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -46,7 +46,7 @@ pub async fn connect_and_sync( .await .context("dial_and_sync")?; let (mut send_stream, mut recv_stream) = connection.open_bi().await?; - let res = run_alice(&mut send_stream, &mut recv_stream, &doc).await; + let res = run_alice(&mut send_stream, &mut recv_stream, doc).await; debug!("sync with peer {}: finish {:?}", peer_id, res); res } diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 40f2d68b56..e11948d3aa 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -15,7 +15,7 @@ use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::sync::{InsertOrigin, Replica, SignedEntry}; use serde::{Deserialize, Serialize}; use tokio::{sync::mpsc, task::JoinError}; -use tracing::error; +use tracing::{debug, error}; const CHANNEL_CAP: usize = 8; @@ -120,15 +120,12 @@ impl Actor { Self { gossip, - // replica, endpoint, - // gossip_stream: gossip_subscription, insert_entry_rx: insert_rx, insert_entry_tx: insert_tx, to_actor_rx, sync_state: Default::default(), pending_syncs: Default::default(), - // initial_peers, pending_joins: Default::default(), docs: Default::default(), subscription: sub, @@ -191,10 +188,10 @@ impl Actor { let endpoint = self.endpoint.clone(); let doc = doc.clone(); async move { - println!("> connect and sync with {peer}"); + debug!("sync with {peer}"); // TODO: Make sure that the peer is dialable. let res = connect_and_sync(&endpoint, &doc, peer, None, &[]).await; - println!("> sync with {peer} done: {res:?}"); + debug!("> synced with {peer}: {res:?}"); (topic, peer, res) } .boxed() @@ -203,7 +200,7 @@ impl Actor { } async fn insert_doc(&mut self, doc: Replica, initial_peers: Vec) -> Result<()> { - let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id.clone()).collect(); + let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id).collect(); let topic: TopicId = doc.namespace().as_bytes().into(); // join gossip for the topic to receive and send message // let gossip = self.gossip.clone(); @@ -221,7 +218,7 @@ impl Actor { doc.on_insert(Box::new(move |origin, entry| { // only care for local inserts, otherwise we'd do endless gossip loops if let InsertOrigin::Local = origin { - insert_entry_tx.send((topic, entry.clone())).ok(); + insert_entry_tx.send((topic, entry)).ok(); } })); self.docs.insert(topic, doc); From 1e505bca61e756893704993da502f7c92bda4495 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Wed, 12 Jul 2023 16:14:48 +0200 Subject: [PATCH 06/45] chore: remove old code and add docs --- iroh/src/sync.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index 235e190915..72b5c02ade 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -33,6 +33,7 @@ enum Message { Sync(iroh_sync::sync::ProtocolMessage), } +/// Connect to a peer and sync a replica pub async fn connect_and_sync( endpoint: &MagicEndpoint, doc: &Replica, From 2af3471e5c12989f3548c72972197a007114a678 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Wed, 12 Jul 2023 14:03:45 +0200 Subject: [PATCH 07/45] feat: WIP integration of sync and bytes * removes content support from iroh-sync * adds a quick-and-dirty writable database to iroh-bytes (will be replaced with a better generic writable database soon) * adds a `Downloader` to queue get requests for individual hashes from individual peers * adds a `BlobStore` that combines the writable db with the downloader * adds a `Doc` abstraction that combines an iroh-sync `Replica` with a `BlobStore` to download content from peers on-demand * updates the sync repl example to plug it all together * also adds very basic persistence to `Replica` (encode to byte string) and uses this in the repl example --- Cargo.lock | 1 + iroh-bytes/src/lib.rs | 1 + iroh-bytes/src/writable.rs | 178 ++++++++++++++++++++ iroh-net/src/tls.rs | 12 ++ iroh-sync/Cargo.toml | 1 + iroh-sync/src/ranger.rs | 9 +- iroh-sync/src/sync.rs | 80 ++++++--- iroh/examples/sync.rs | 289 +++++++++++++++++++++++++-------- iroh/src/sync.rs | 2 + iroh/src/sync/content.rs | 324 +++++++++++++++++++++++++++++++++++++ 10 files changed, 805 insertions(+), 92 deletions(-) create mode 100644 iroh-bytes/src/writable.rs create mode 100644 iroh/src/sync/content.rs diff --git a/Cargo.lock b/Cargo.lock index 07d2c5bcc8..aa8d00e93a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1933,6 +1933,7 @@ dependencies = [ "iroh-bytes", "once_cell", "parking_lot", + "postcard", "rand", "rand_core", "serde", diff --git a/iroh-bytes/src/lib.rs b/iroh-bytes/src/lib.rs index 18d95542d0..02ef6105e4 100644 --- a/iroh-bytes/src/lib.rs +++ b/iroh-bytes/src/lib.rs @@ -8,6 +8,7 @@ pub mod get; pub mod protocol; pub mod provider; pub mod util; +pub mod writable; #[cfg(test)] pub(crate) mod test_utils; diff --git a/iroh-bytes/src/writable.rs b/iroh-bytes/src/writable.rs new file mode 100644 index 0000000000..fe492e47b4 --- /dev/null +++ b/iroh-bytes/src/writable.rs @@ -0,0 +1,178 @@ +#![allow(missing_docs)] +//! Quick-and-dirty writable database +//! +//! I wrote this while diving into iroh-bytes, wildly copying code around. This will be solved much +//! nicer with the upcoming generic writable database branch by @rklaehn. + +use std::{collections::HashMap, io, path::PathBuf, sync::Arc}; + +use anyhow::Context; +use bytes::Bytes; +use iroh_io::{AsyncSliceWriter, File}; +use range_collections::RangeSet2; + +use crate::{ + get::fsm, + protocol::{GetRequest, RangeSpecSeq, Request}, + provider::{create_collection, DataSource, Database, DbEntry, FNAME_PATHS}, + Hash, +}; + +/// A blob database into which new blobs can be inserted. +/// +/// Blobs can be inserted either from bytes or by downloading from open connections to peers. +/// New blobs will be saved as files with a filename based on their hash. +/// +/// TODO: Replace with the generic writable database. +#[derive(Debug, Clone)] +pub struct WritableFileDatabase { + db: Database, + storage: Arc, +} + +impl WritableFileDatabase { + pub async fn new(data_path: PathBuf) -> anyhow::Result { + let storage = Arc::new(StoragePaths::new(data_path).await?); + let db = if storage.db_path.join(FNAME_PATHS).exists() { + Database::load(&storage.db_path).await.with_context(|| { + format!( + "Failed to load iroh database from {}", + storage.db_path.display() + ) + })? + } else { + Database::default() + }; + Ok(Self { db, storage }) + } + + pub fn db(&self) -> &Database { + &self.db + } + + pub async fn save(&self) -> io::Result<()> { + self.db.save(&self.storage.db_path).await + } + + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64)> { + let (hash, size, entry) = self.storage.put_bytes(data).await?; + self.db.union_with(HashMap::from_iter([(hash, entry)])); + Ok((hash, size)) + } + + pub async fn put_from_temp_file(&self, temp_path: &PathBuf) -> anyhow::Result<(Hash, u64)> { + let (hash, size, entry) = self.storage.move_to_blobs(&temp_path).await?; + self.db.union_with(HashMap::from_iter([(hash, entry)])); + Ok((hash, size)) + } + + pub async fn get_size(&self, hash: &Hash) -> Option { + Some(self.db.get(&hash)?.size().await) + } + + pub fn has(&self, hash: &Hash) -> bool { + self.db.to_inner().contains_key(hash) + } + pub async fn download_single( + &self, + conn: quinn::Connection, + hash: Hash, + ) -> anyhow::Result> { + // 1. Download to temp file + let temp_path = { + let temp_path = self.storage.temp_path(); + let request = + Request::Get(GetRequest::new(hash, RangeSpecSeq::new([RangeSet2::all()]))); + let response = fsm::start(conn, request); + let connected = response.next().await?; + + let fsm::ConnectedNext::StartRoot(curr) = connected.next().await? else { + return Ok(None) + }; + let header = curr.next(); + + let path = temp_path.clone(); + let mut data_file = File::create(move || { + std::fs::OpenOptions::new() + .write(true) + .create(true) + .open(&path) + }) + .await?; + + let (curr, _size) = header.next().await?; + let _curr = curr.write_all(&mut data_file).await?; + // Flush the data file first, it is the only thing that matters at this point + data_file.sync().await?; + temp_path + }; + + // 2. Insert into database + let (hash, size, entry) = self.storage.move_to_blobs(&temp_path).await?; + let entries = HashMap::from_iter([(hash, entry)]); + self.db.union_with(entries); + Ok(Some((hash, size))) + } +} + +#[derive(Debug)] +pub struct StoragePaths { + blob_path: PathBuf, + temp_path: PathBuf, + db_path: PathBuf, +} + +impl StoragePaths { + pub async fn new(data_path: PathBuf) -> anyhow::Result { + let blob_path = data_path.join("blobs"); + let temp_path = data_path.join("temp"); + let db_path = data_path.join("db"); + tokio::fs::create_dir_all(&blob_path).await?; + tokio::fs::create_dir_all(&temp_path).await?; + tokio::fs::create_dir_all(&db_path).await?; + Ok(Self { + blob_path, + temp_path, + db_path, + }) + } + + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64, DbEntry)> { + let temp_path = self.temp_path(); + tokio::fs::write(&temp_path, &data).await?; + let (hash, size, entry) = self.move_to_blobs(&temp_path).await?; + Ok((hash, size, entry)) + } + + async fn move_to_blobs(&self, path: &PathBuf) -> anyhow::Result<(Hash, u64, DbEntry)> { + let datasource = DataSource::new(path.clone()); + // TODO: this needlessly creates a collection, but that's what's pub atm in iroh-bytes + let (db, _collection_hash) = create_collection(vec![datasource]).await?; + // the actual blob is the first entry in the external entries in the created collection + let (hash, _path, _len) = db.external().next().unwrap(); + let Some(DbEntry::External { outboard, size, .. }) = db.get(&hash) else { + unreachable!("just inserted"); + }; + + let final_path = prepare_hash_dir(&self.blob_path, &hash).await?; + tokio::fs::rename(&path, &final_path).await?; + let entry = DbEntry::External { + outboard, + path: final_path, + size, + }; + Ok((hash, size, entry)) + } + + fn temp_path(&self) -> PathBuf { + let name = hex::encode(rand::random::().to_be_bytes()); + self.temp_path.join(name) + } +} + +async fn prepare_hash_dir(path: &PathBuf, hash: &Hash) -> anyhow::Result { + let hash = hex::encode(hash.as_ref()); + let path = path.join(&hash[0..2]).join(&hash[2..4]).join(&hash[4..]); + tokio::fs::create_dir_all(path.parent().unwrap()).await?; + Ok(path) +} diff --git a/iroh-net/src/tls.rs b/iroh-net/src/tls.rs index 4f92c9106a..07dbeda035 100644 --- a/iroh-net/src/tls.rs +++ b/iroh-net/src/tls.rs @@ -106,6 +106,18 @@ impl PeerId { pub fn as_bytes(&self) -> &[u8; 32] { self.0.as_bytes() } + + /// Try to create a peer id from a byte array. + /// + /// # Warning + /// + /// The caller is responsible for ensuring that the bytes passed into this + /// method actually represent a `curve25519_dalek::curve::CompressedEdwardsY` + /// and that said compressed point is actually a point on the curve. + pub fn from_bytes(bytes: &[u8; 32]) -> anyhow::Result { + let key = PublicKey::from_bytes(bytes)?; + Ok(PeerId(key)) + } } impl From for PeerId { diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml index 0ed3ee73f7..0d0c9b891a 100644 --- a/iroh-sync/Cargo.toml +++ b/iroh-sync/Cargo.toml @@ -16,6 +16,7 @@ derive_more = { version = "1.0.0-beta.1", features = ["debug", "display", "from" ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] } iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } once_cell = "1.18.0" +postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } rand = "0.8.5" rand_core = "0.6.4" serde = { version = "1.0.164", features = ["derive"] } diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index 7317d2a321..d5310839a6 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -401,11 +401,12 @@ where /// Processes an incoming message and produces a response. /// If terminated, returns `None` - pub fn process_message(&mut self, message: Message) -> Option> { + pub fn process_message(&mut self, message: Message) -> (Vec, Option>) { let mut out = Vec::new(); // TODO: can these allocs be avoided? let mut items = Vec::new(); + let mut inserted = Vec::new(); let mut fingerprints = Vec::new(); for part in message.parts { match part { @@ -431,7 +432,6 @@ where Some( self.store .get_range(range.clone(), self.limit.clone()) - .into_iter() .filter(|(k, _)| !values.iter().any(|(vk, _)| &vk == k)) .map(|(k, v)| (k.clone(), v.clone())) .collect(), @@ -440,6 +440,7 @@ where // Store incoming values for (k, v) in values { + inserted.push(k.clone()); self.store.put(k, v); } @@ -546,9 +547,9 @@ where // If we have any parts, return a message if !out.is_empty() { - Some(Message { parts: out }) + (inserted, Some(Message { parts: out })) } else { - None + (inserted, None) } } diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index adf75507e9..623bd81af7 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -216,6 +216,14 @@ impl ReplicaStore { .insert(replica.namespace(), replica.clone()); replica } + + pub fn open_replica(&self, bytes: &[u8]) -> anyhow::Result { + let replica = Replica::from_bytes(bytes)?; + self.replicas + .write() + .insert(replica.namespace(), replica.clone()); + Ok(replica) + } } /// TODO: Would potentially nice to pass a `&SignedEntry` reference, however that would make @@ -239,7 +247,6 @@ pub struct Replica { struct InnerReplica { namespace: Namespace, peer: Peer, - content: HashMap, #[debug("on_insert: [Box; {}]", "self.on_insert.len()")] on_insert: Vec, } @@ -337,6 +344,12 @@ impl crate::ranger::Store for Store { } } +#[derive(Debug, Serialize, Deserialize)] +struct ReplicaData { + entries: Vec, + namespace: Namespace, +} + #[derive(Debug)] pub struct RangeIterator<'a> { iter: std::collections::btree_map::Iter<'a, RecordIdentifier, BTreeMap>, @@ -375,7 +388,6 @@ impl Replica { inner: Arc::new(RwLock::new(InnerReplica { namespace, peer: Peer::default(), - content: HashMap::default(), on_insert: Default::default(), })), } @@ -386,10 +398,6 @@ impl Replica { inner.on_insert.push(callback); } - pub fn get_content(&self, hash: &Hash) -> Option { - self.inner.read().content.get(hash).cloned() - } - // TODO: not horrible pub fn all(&self) -> Vec<(RecordIdentifier, SignedEntry)> { self.inner @@ -400,23 +408,45 @@ impl Replica { .collect() } + // TODO: not horrible + pub fn all_for_key(&self, key: impl AsRef<[u8]>) -> Vec<(RecordIdentifier, SignedEntry)> { + self.all() + .into_iter() + .filter(|(id, _entry)| id.key() == key.as_ref()) + .collect() + } + + pub fn to_bytes(&self) -> anyhow::Result { + let entries = self.all().into_iter().map(|(_id, entry)| entry).collect(); + let data = ReplicaData { + entries, + namespace: self.inner.read().namespace.clone(), + }; + let bytes = postcard::to_stdvec(&data)?; + Ok(bytes.into()) + } + pub fn from_bytes(bytes: &[u8]) -> anyhow::Result { + let data: ReplicaData = postcard::from_bytes(bytes)?; + let replica = Self::new(data.namespace); + for entry in data.entries { + replica.insert_remote_entry(entry)?; + } + Ok(replica) + } + /// Inserts a new record at the given key. - pub fn insert(&self, key: impl AsRef<[u8]>, author: &Author, data: impl Into) { + pub fn insert(&self, key: impl AsRef<[u8]>, author: &Author, hash: Hash, len: u64) { let mut inner = self.inner.write(); let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); - let data: Bytes = data.into(); - let record = Record::from_data(&data, inner.namespace.id()); - - // Store content - inner.content.insert(*record.content_hash(), data); + let record = Record::from_hash(hash, len); // Store signed entries let entry = Entry::new(id.clone(), record); let signed_entry = entry.sign(&inner.namespace, author); inner.peer.put(id, signed_entry.clone()); for cb in &inner.on_insert { - cb(InsertOrigin::Local, signed_entry.clone()) + cb(InsertOrigin::Local, signed_entry.clone()); } } @@ -470,7 +500,15 @@ impl Replica { &self, message: crate::ranger::Message, ) -> Option> { - self.inner.write().peer.process_message(message) + let (inserted_keys, reply) = self.inner.write().peer.process_message(message); + let inner = self.inner.read(); + for key in inserted_keys { + let entry = inner.peer.get(&key).unwrap(); + for cb in &inner.on_insert { + cb(InsertOrigin::Sync, entry.clone()) + } + } + reply } pub fn namespace(&self) -> NamespaceId { @@ -719,22 +757,24 @@ impl Record { &self.hash } - pub fn from_data(data: impl AsRef<[u8]>, namespace: &NamespaceId) -> Self { + pub fn from_hash(hash: Hash, len: u64) -> Self { let timestamp = SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .expect("time drift") .as_micros() as u64; - let data = data.as_ref(); - let len = data.len() as u64; + Self::new(timestamp, len, hash) + } + + // TODO: remove + pub fn from_data(data: impl AsRef<[u8]>, namespace: &NamespaceId) -> Self { // Salted hash // TODO: do we actually want this? // TODO: this should probably use a namespace prefix if used let mut hasher = blake3::Hasher::new(); hasher.update(namespace.as_bytes()); - hasher.update(data); + hasher.update(data.as_ref()); let hash = hasher.finalize(); - - Self::new(timestamp, len, hash.into()) + Self::from_hash(hash.into(), data.as_ref().len() as u64) } pub fn as_bytes(&self, out: &mut Vec) { diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index b603940761..3c0a332d8d 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -7,12 +7,15 @@ //! `cargo run --bin derper -- --dev` //! and then set the `-d http://localhost:3340` flag on this example. -use std::{fmt, str::FromStr}; +use std::{fmt, path::PathBuf, str::FromStr, sync::Arc}; use anyhow::{anyhow, bail}; +use bytes::Bytes; use clap::Parser; use ed25519_dalek::SigningKey; -use iroh::sync::{LiveSync, PeerSource, SYNC_ALPN}; +use futures::{future::BoxFuture, FutureExt}; +use iroh::sync::{BlobStore, Doc, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; +use iroh_bytes::provider::Database; use iroh_gossip::{ net::{GossipHandle, GOSSIP_ALPN}, proto::TopicId, @@ -24,7 +27,7 @@ use iroh_net::{ tls::Keypair, MagicEndpoint, }; -use iroh_sync::sync::{Author, Namespace, Replica, ReplicaStore, SignedEntry}; +use iroh_sync::sync::{Author, Namespace, NamespaceId, Replica, ReplicaStore, SignedEntry}; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; @@ -35,6 +38,9 @@ struct Args { /// Private key to derive our peer id from #[clap(long)] private_key: Option, + /// Path to a data directory where blobs will be persisted + #[clap(short, long)] + storage_path: Option, /// Set a custom DERP server. By default, the DERP server hosted by n0 will be used. #[clap(short, long)] derp: Option, @@ -90,7 +96,11 @@ async fn run(args: Args) -> anyhow::Result<()> { let endpoint = MagicEndpoint::builder() .keypair(keypair.clone()) - .alpns(vec![GOSSIP_ALPN.to_vec(), SYNC_ALPN.to_vec()]) + .alpns(vec![ + GOSSIP_ALPN.to_vec(), + SYNC_ALPN.to_vec(), + iroh_bytes::protocol::ALPN.to_vec(), + ]) .derp_map(derp_map) .on_endpoints({ let gossip_cell = gossip_cell.clone(); @@ -151,68 +161,94 @@ async fn run(args: Args) -> anyhow::Result<()> { }; println!("> ticket to join us: {our_ticket}"); + // unwrap our storage path or default to temp + let storage_path = args.storage_path.unwrap_or_else(|| { + let dir = format!("/tmp/iroh-example-sync-{}", endpoint.peer_id()); + let dir = PathBuf::from(dir); + if !dir.exists() { + std::fs::create_dir(&dir).expect("failed to create temp dir"); + } + dir + }); + println!("> persisting data in {storage_path:?}"); + + // create a runtime + // we need this because some things need to spawn !Send futures + let rt = create_rt()?; // create the sync doc and store - let (store, author, doc) = create_document(topic, &keypair)?; + // we need to pass the runtime because a !Send task is spawned for + // the downloader in the blob store + let blobs = BlobStore::new(rt.clone(), storage_path.clone(), endpoint.clone()).await?; + let (store, author, doc) = + create_or_open_document(&storage_path, blobs.clone(), topic, &keypair).await?; + // construct the state that is passed to the endpoint loop and from there cloned + // into to the connection handler task for incoming connections. + let state = Arc::new(State { + gossip: gossip.clone(), + replica_store: store.clone(), + db: blobs.db().clone(), + rt, + }); // spawn our endpoint loop that forwards incoming connections - tokio::spawn(endpoint_loop( - endpoint.clone(), - gossip.clone(), - store.clone(), - )); - - // spawn an input thread that reads stdin - // not using tokio here because they recommend this for "technical reasons" - let (line_tx, mut line_rx) = tokio::sync::mpsc::channel::(1); - std::thread::spawn(move || input_loop(line_tx)); + tokio::spawn(endpoint_loop(endpoint.clone(), state)); // create the live syncer let sync_handle = LiveSync::spawn(endpoint.clone(), gossip.clone()); - sync_handle.sync_doc(doc.clone(), peers.clone()).await?; + sync_handle + .sync_doc(doc.replica().clone(), peers.clone()) + .await?; - // do some logging - doc.on_insert(Box::new(move |origin, entry| { - println!("> insert from {origin:?}: {}", fmt_entry(&entry)); - })); + // spawn an input thread that reads stdin and parses each line as a `Cmd` command + // not using tokio here because they recommend this for "technical reasons" + let (cmd_tx, mut cmd_rx) = tokio::sync::mpsc::channel::(1); + std::thread::spawn(move || input_loop(cmd_tx)); + + // process commands in a loop + println!("> ready to accept commands: set | get | ls | exit"); + loop { + let cmd = tokio::select! { + Some(cmd) = cmd_rx.recv() => cmd, + _ = tokio::signal::ctrl_c() => Cmd::Exit - // process stdin lines - println!("> read to accept commands: set | get | ls | exit"); - while let Some(text) = line_rx.recv().await { - let cmd = match Cmd::from_str(&text) { - Ok(cmd) => cmd, - Err(err) => { - println!("> failed to parse command: {}", err); - continue; - } }; match cmd { Cmd::Set { key, value } => { - doc.insert(&key, &author, value); + doc.insert(&key, &author, value.into_bytes().into()).await?; } Cmd::Get { key } => { - let mut entries = doc - .all() - .into_iter() - .filter_map(|(id, entry)| (id.key() == key.as_bytes()).then(|| entry)); - while let Some(entry) = entries.next() { - println!("{} -> {}", fmt_entry(&entry), fmt_content(&doc, &entry)); + let entries = doc.replica().all_for_key(key.as_bytes()); + for (_id, entry) in entries { + let content = fmt_content(&doc, &entry).await?; + println!("{} -> {content}", fmt_entry(&entry),); } } Cmd::Ls => { - let all = doc.all(); + let all = doc.replica().all(); println!("> {} entries", all.len()); for (_id, entry) in all { - println!("{} -> {}", fmt_entry(&entry), fmt_content(&doc, &entry)); + println!( + "{} -> {}", + fmt_entry(&entry), + fmt_content(&doc, &entry).await? + ); } } Cmd::Exit => { - let res = sync_handle.cancel().await?; - println!("syncer closed with {res:?}"); break; } } } + let res = sync_handle.cancel().await; + if let Err(err) = res { + println!("> syncer closed with error: {err:?}"); + } + + println!("> persisting document and blob database at {storage_path:?}"); + blobs.save().await?; + save_document(&storage_path, doc.replica()).await?; + Ok(()) } @@ -240,46 +276,161 @@ impl FromStr for Cmd { } } -fn create_document( +async fn create_or_open_document( + storage_path: &PathBuf, + blobs: BlobStore, topic: TopicId, keypair: &Keypair, -) -> anyhow::Result<(ReplicaStore, Author, Replica)> { +) -> anyhow::Result<(ReplicaStore, Author, Doc)> { let author = Author::from(keypair.secret().clone()); let namespace = Namespace::from_bytes(topic.as_bytes()); let store = ReplicaStore::default(); - let doc = store.new_replica(namespace); + + let replica_path = replica_path(storage_path, namespace.id()); + let replica = if replica_path.exists() { + let bytes = tokio::fs::read(replica_path).await?; + store.open_replica(&bytes)? + } else { + store.new_replica(namespace) + }; + + // do some logging + replica.on_insert(Box::new(move |origin, entry| { + println!("> insert from {origin:?}: {}", fmt_entry(&entry)); + })); + + let doc = Doc::new(replica, blobs, DownloadMode::Always); Ok((store, author, doc)) } -async fn endpoint_loop( - endpoint: MagicEndpoint, +async fn save_document(base_path: &PathBuf, replica: &Replica) -> anyhow::Result<()> { + let replica_path = replica_path(base_path, &replica.namespace()); + tokio::fs::create_dir_all(replica_path.parent().unwrap()).await?; + let bytes = replica.to_bytes()?; + tokio::fs::write(replica_path, bytes).await?; + Ok(()) +} + +fn replica_path(storage_path: &PathBuf, namespace: &NamespaceId) -> PathBuf { + storage_path + .join("docs") + .join(hex::encode(namespace.as_bytes())) +} + +#[derive(Debug)] +struct State { + rt: iroh_bytes::runtime::Handle, gossip: GossipHandle, replica_store: ReplicaStore, + db: Database, +} + +async fn endpoint_loop(endpoint: MagicEndpoint, state: Arc) -> anyhow::Result<()> { + while let Some(conn) = endpoint.accept().await { + // spawn a new task for each incoming connection. + let state = state.clone(); + tokio::spawn(async move { + if let Err(err) = handle_connection(conn, state).await { + println!("> connection closed, reason: {err}"); + } + }); + } + Ok(()) +} + +async fn handle_connection(mut conn: quinn::Connecting, state: Arc) -> anyhow::Result<()> { + let alpn = get_alpn(&mut conn).await?; + println!("> incoming connection with alpn {alpn}"); + match alpn.as_bytes() { + GOSSIP_ALPN => state.gossip.handle_connection(conn.await?).await, + SYNC_ALPN => iroh::sync::handle_connection(conn, state.replica_store.clone()).await, + alpn if alpn == iroh_bytes::protocol::ALPN => { + handle_iroh_byes_connection(conn, state).await + } + _ => bail!("ignoring connection: unsupported ALPN protocol"), + } +} + +async fn handle_iroh_byes_connection( + conn: quinn::Connecting, + state: Arc, ) -> anyhow::Result<()> { - while let Some(mut conn) = endpoint.accept().await { - let alpn = get_alpn(&mut conn).await?; - println!("> incoming connection with alpn {alpn}"); - // let (peer_id, alpn, conn) = accept_conn(conn).await?; - let res = match alpn.as_bytes() { - GOSSIP_ALPN => gossip.handle_connection(conn.await?).await, - SYNC_ALPN => iroh::sync::handle_connection(conn, replica_store.clone()).await, - _ => Err(anyhow::anyhow!( - "ignoring connection: unsupported ALPN protocol" - )), - }; - if let Err(err) = res { - println!("> connection for {alpn} closed, reason: {err}"); + use iroh_bytes::{ + protocol::{GetRequest, RequestToken}, + provider::{ + CustomGetHandler, EventSender, IrohCollectionParser, RequestAuthorizationHandler, + }, + }; + iroh_bytes::provider::handle_connection( + conn, + state.db.clone(), + NoopEventSender, + IrohCollectionParser, + Arc::new(NoopCustomGetHandler), + Arc::new(NoopRequestAuthorizationHandler), + state.rt.clone(), + ) + .await; + + #[derive(Debug, Clone)] + struct NoopEventSender; + impl EventSender for NoopEventSender { + fn send(&self, _event: iroh_bytes::provider::Event) -> Option { + None + } + } + #[derive(Debug)] + struct NoopCustomGetHandler; + impl CustomGetHandler for NoopCustomGetHandler { + fn handle( + &self, + _token: Option, + _request: Bytes, + ) -> BoxFuture<'static, anyhow::Result> { + async move { Err(anyhow::anyhow!("no custom get handler defined")) }.boxed() + } + } + #[derive(Debug)] + struct NoopRequestAuthorizationHandler; + impl RequestAuthorizationHandler for NoopRequestAuthorizationHandler { + fn authorize( + &self, + token: Option, + _request: &iroh_bytes::protocol::Request, + ) -> BoxFuture<'static, anyhow::Result<()>> { + async move { + if let Some(token) = token { + anyhow::bail!( + "no authorization handler defined, but token was provided: {:?}", + token + ); + } + Ok(()) + } + .boxed() } } Ok(()) } -fn input_loop(line_tx: tokio::sync::mpsc::Sender) -> anyhow::Result<()> { +fn create_rt() -> anyhow::Result { + let rt = iroh::bytes::runtime::Handle::from_currrent(num_cpus::get())?; + Ok(rt) +} + +fn input_loop(line_tx: tokio::sync::mpsc::Sender) -> anyhow::Result<()> { let mut buffer = String::new(); - let stdin = std::io::stdin(); // We get `Stdin` here. + let stdin = std::io::stdin(); loop { stdin.read_line(&mut buffer)?; - line_tx.blocking_send(buffer.trim().to_string())?; + let cmd = match Cmd::from_str(buffer.trim()) { + Ok(cmd) => cmd, + Err(err) => { + println!("> failed to parse command: {}", err); + continue; + } + }; + line_tx.blocking_send(cmd)?; buffer.clear(); } } @@ -331,13 +482,15 @@ fn fmt_entry(entry: &SignedEntry) -> String { let hash = fmt_hash(hash.as_bytes()); format!("@{author}: {key} = {hash}") } -fn fmt_content(doc: &Replica, entry: &SignedEntry) -> String { - let hash = entry.entry().record().content_hash(); - let content = doc.get_content(hash); - let content = content - .map(|content| String::from_utf8(content.into()).unwrap_or_else(|_| "".into())) - .unwrap_or_else(|| "".into()); - content +async fn fmt_content(doc: &Doc, entry: &SignedEntry) -> anyhow::Result { + let content = match doc.get_content(entry).await { + None => "".to_string(), + Some(content) => match String::from_utf8(content.into()) { + Ok(str) => str, + Err(_err) => "".to_string(), + }, + }; + Ok(content) } fn fmt_hash(hash: &[u8]) -> String { let mut text = data_encoding::BASE32_NOPAD.encode(hash); diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index 72b5c02ade..cf9f8e0fd0 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -13,7 +13,9 @@ use tracing::debug; /// The ALPN identifier for the iroh-sync protocol pub const SYNC_ALPN: &[u8] = b"/iroh-sync/1"; +mod content; mod live; +pub use content::*; pub use live::*; /// Sync Protocol diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs new file mode 100644 index 0000000000..5a4f1d4489 --- /dev/null +++ b/iroh/src/sync/content.rs @@ -0,0 +1,324 @@ +use std::{ + collections::{HashMap, HashSet, VecDeque}, + io, + path::PathBuf, + sync::{Arc, Mutex}, +}; + +use bytes::Bytes; +use futures::{ + future::{BoxFuture, LocalBoxFuture, Shared}, + stream::FuturesUnordered, + FutureExt, +}; +use iroh_bytes::{provider::Database, util::Hash, writable::WritableFileDatabase}; +use iroh_gossip::net::util::Dialer; +use iroh_io::AsyncSliceReaderExt; +use iroh_net::{tls::PeerId, MagicEndpoint}; +use iroh_sync::sync::{Author, InsertOrigin, Replica, SignedEntry}; +use tokio::sync::{mpsc, oneshot}; +use tokio_stream::StreamExt; +use tracing::{debug, error, warn}; + +#[derive(Debug, Copy, Clone)] +pub enum DownloadMode { + Always, + Manual, +} + +/// A replica with a [`BlobStore`] for contents. +/// +/// This will also download missing content from peers. +/// +/// TODO: Currently content is only downloaded from the author of a entry. +/// We want to try other peers if the author is offline (or always). +/// We'll need some heuristics which peers to try. +#[derive(Clone)] +pub struct Doc { + replica: Replica, + blobs: BlobStore, +} + +impl Doc { + pub fn new(replica: Replica, blobs: BlobStore, download_mode: DownloadMode) -> Self { + let doc = Self { replica, blobs }; + if let DownloadMode::Always = download_mode { + let doc2 = doc.clone(); + doc.replica.on_insert(Box::new(move |origin, entry| { + if matches!(origin, InsertOrigin::Sync) { + doc2.download_content_fron_author(&entry); + } + })); + } + doc + } + + pub fn replica(&self) -> &Replica { + &self.replica + } + + pub async fn insert( + &self, + key: impl AsRef<[u8]>, + author: &Author, + content: Bytes, + ) -> anyhow::Result<()> { + let (hash, len) = self.blobs.put_bytes(content).await?; + self.replica.insert(key, author, hash, len); + Ok(()) + } + + pub fn download_content_fron_author(&self, entry: &SignedEntry) { + let hash = *entry.entry().record().content_hash(); + let peer_id = PeerId::from_bytes(entry.entry().id().author().as_bytes()) + .expect("failed to convert author to peer id"); + self.blobs.start_download(hash, peer_id); + } + + pub async fn get_content(&self, entry: &SignedEntry) -> Option { + let hash = entry.entry().record().content_hash(); + let bytes = self.blobs.get_bytes(hash).await.ok().flatten(); + bytes + } +} + +/// A blob database that can download missing blobs from peers. +/// +/// Blobs can be inserted either from bytes or by downloading from peers. +/// Downloads can be started and will be tracked in the blobstore. +/// New blobs will be saved as files with a filename based on their hash. +/// +/// TODO: This is similar to what is used in the iroh provider. +/// Unify once we know how the APIs should look like. +#[derive(Debug, Clone)] +pub struct BlobStore { + db: WritableFileDatabase, + downloader: Downloader, +} +impl BlobStore { + pub async fn new( + rt: iroh_bytes::runtime::Handle, + data_path: PathBuf, + endpoint: MagicEndpoint, + ) -> anyhow::Result { + let db = WritableFileDatabase::new(data_path).await?; + let downloader = Downloader::new(rt, endpoint, db.clone()); + Ok(Self { db, downloader }) + } + + pub async fn save(&self) -> io::Result<()> { + self.db.save().await + } + + pub fn db(&self) -> &Database { + &self.db.db() + } + + pub fn start_download(&self, hash: Hash, peer: PeerId) { + if !self.db.has(&hash) { + self.downloader.start_download(hash, peer); + } + } + + pub async fn get_bytes(&self, hash: &Hash) -> anyhow::Result> { + self.downloader.wait_for_download(hash).await; + let Some(entry) = self.db().get(hash) else { + return Ok(None) + }; + let bytes = entry.data_reader().await?.read_to_end().await?; + Ok(Some(bytes)) + } + + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64)> { + self.db.put_bytes(data).await + } +} + +pub type DownloadReply = oneshot::Sender>; +pub type DownloadFuture = Shared>>; + +#[derive(Debug)] +pub struct DownloadRequest { + hash: Hash, + peer: PeerId, + reply: DownloadReply, +} + +/// A download queue +/// +/// Spawns a background task that handles connecting to peers and performing get requests. +/// +/// TODO: Queued downloads are pushed into an unbounded channel. Maybe make it bounded instead. +/// We want the start_download() method to be sync though because it is used +/// from sync on_insert callbacks on the replicas. +/// TODO: Move to iroh-bytes or replace with corresponding feature from iroh-bytes once available +#[derive(Debug, Clone)] +pub struct Downloader { + pending_downloads: Arc>>, + to_actor_tx: mpsc::UnboundedSender, +} + +impl Downloader { + pub fn new( + rt: iroh_bytes::runtime::Handle, + endpoint: MagicEndpoint, + blobs: WritableFileDatabase, + ) -> Self { + let (tx, rx) = mpsc::unbounded_channel(); + // spawn the actor on a local pool + // the local pool is required because WritableFileDatabase::download_single + // returns a future that is !Send + rt.local_pool().spawn_pinned(move || async move { + let mut actor = DownloadActor::new(endpoint, blobs, rx); + if let Err(err) = actor.run().await { + error!("download actor failed with error {err:?}"); + } + }); + Self { + pending_downloads: Arc::new(Mutex::new(HashMap::new())), + to_actor_tx: tx, + } + } + + pub fn wait_for_download(&self, hash: &Hash) -> DownloadFuture { + match self.pending_downloads.lock().unwrap().get(hash) { + Some(fut) => fut.clone(), + None => futures::future::ready(None).boxed().shared(), + } + } + + pub fn start_download(&self, hash: Hash, peer: PeerId) { + let (reply, reply_rx) = oneshot::channel(); + let req = DownloadRequest { hash, peer, reply }; + let pending_downloads = self.pending_downloads.clone(); + let fut = async move { + let res = reply_rx.await; + pending_downloads.lock().unwrap().remove(&hash); + res.ok().flatten() + }; + self.pending_downloads + .lock() + .unwrap() + .insert(hash, fut.boxed().shared()); + if let Err(err) = self.to_actor_tx.send(req) { + warn!("download actor dropped: {err}"); + } + } +} + +pub struct DownloadActor { + dialer: Dialer, + db: WritableFileDatabase, + conns: HashMap, + replies: HashMap>, + peer_hashes: HashMap>, + hash_peers: HashMap>, + pending_downloads: FuturesUnordered< + LocalBoxFuture<'static, (PeerId, Hash, anyhow::Result>)>, + >, + rx: mpsc::UnboundedReceiver, +} +impl DownloadActor { + fn new( + endpoint: MagicEndpoint, + db: WritableFileDatabase, + rx: mpsc::UnboundedReceiver, + ) -> Self { + Self { + rx, + db, + dialer: Dialer::new(endpoint), + replies: Default::default(), + conns: Default::default(), + pending_downloads: Default::default(), + peer_hashes: Default::default(), + hash_peers: Default::default(), + } + } + pub async fn run(&mut self) -> anyhow::Result<()> { + loop { + tokio::select! { + req = self.rx.recv() => match req { + None => return Ok(()), + Some(req) => self.on_download_request(req).await + }, + (peer, conn) = self.dialer.next() => match conn { + Ok(conn) => { + debug!("connection to {peer} established"); + self.conns.insert(peer, conn); + self.on_peer_ready(peer); + }, + Err(err) => self.on_peer_fail(&peer, err), + }, + Some((peer, hash, res)) = self.pending_downloads.next() => match res { + Ok(Some((hash, size))) => { + self.reply(hash, Some((hash, size))); + self.on_peer_ready(peer); + } + Ok(None) => { + self.on_not_found(&peer, hash); + self.on_peer_ready(peer); + } + Err(err) => self.on_peer_fail(&peer, err), + } + } + } + } + + fn reply(&mut self, hash: Hash, res: Option<(Hash, u64)>) { + for reply in self.replies.remove(&hash).into_iter().flatten() { + reply.send(res.clone()).ok(); + } + } + + fn on_peer_fail(&mut self, peer: &PeerId, err: anyhow::Error) { + warn!("download from {peer} failed: {err}"); + for hash in self.peer_hashes.remove(&peer).into_iter().flatten() { + self.on_not_found(peer, hash); + } + self.conns.remove(&peer); + } + + fn on_not_found(&mut self, peer: &PeerId, hash: Hash) { + if let Some(peers) = self.hash_peers.get_mut(&hash) { + peers.remove(&peer); + if peers.is_empty() { + self.reply(hash, None); + self.hash_peers.remove(&hash); + } + } + } + + fn on_peer_ready(&mut self, peer: PeerId) { + if let Some(hash) = self + .peer_hashes + .get_mut(&peer) + .map(|hashes| hashes.pop_front()) + .flatten() + { + let conn = self.conns.get(&peer).unwrap().clone(); + let blobs = self.db.clone(); + let fut = async move { (peer, hash, blobs.download_single(conn, hash).await) }; + self.pending_downloads.push(fut.boxed_local()); + } else { + self.conns.remove(&peer); + self.peer_hashes.remove(&peer); + } + } + + async fn on_download_request(&mut self, req: DownloadRequest) { + let DownloadRequest { peer, hash, reply } = req; + if self.db.has(&hash) { + let size = self.db.get_size(&hash).await.unwrap(); + reply.send(Some((hash, size))).ok(); + return; + } + debug!("queue download {hash} from {peer}"); + self.replies.entry(hash).or_default().push_back(reply); + self.hash_peers.entry(hash).or_default().insert(peer); + self.peer_hashes.entry(peer).or_default().push_back(hash); + if self.conns.get(&peer).is_none() && !self.dialer.is_pending(&peer) { + self.dialer.queue_dial(peer, &iroh_bytes::protocol::ALPN); + } + } +} From 3a671d2ab3ba17d609d39810891ce3868eca65d8 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Thu, 13 Jul 2023 17:24:09 +0200 Subject: [PATCH 08/45] feat: proper REPL for sync example, and docs store * make the REPL in the sync example work properly with rustyline for editing and reading input, shell-style argument parsing and clap for parsing commands * add a docs store for opening and closing docs * add author to doc struct --- Cargo.lock | 146 ++++++++++ iroh-bytes/src/writable.rs | 22 ++ iroh-sync/src/sync.rs | 15 ++ iroh/Cargo.toml | 5 +- iroh/examples/sync.rs | 536 +++++++++++++++++++++---------------- iroh/src/sync/content.rs | 128 ++++++++- iroh/src/sync/live.rs | 16 +- 7 files changed, 618 insertions(+), 250 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa8d00e93a..348a2024bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -499,6 +499,17 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +[[package]] +name = "clipboard-win" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +dependencies = [ + "error-code", + "str-buf", + "winapi", +] + [[package]] name = "cobs" version = "0.2.3" @@ -900,6 +911,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -910,6 +930,18 @@ dependencies = [ "dirs-sys-next", ] +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys 0.48.0", +] + [[package]] name = "dirs-sys-next" version = "0.1.2" @@ -1046,6 +1078,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + [[package]] name = "enum-as-inner" version = "0.5.1" @@ -1113,6 +1151,16 @@ dependencies = [ "libc", ] +[[package]] +name = "error-code" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" +dependencies = [ + "libc", + "str-buf", +] + [[package]] name = "fallible-iterator" version = "0.2.0" @@ -1125,6 +1173,17 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" +[[package]] +name = "fd-lock" +version = "3.0.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" +dependencies = [ + "cfg-if", + "rustix", + "windows-sys 0.48.0", +] + [[package]] name = "ff" version = "0.13.0" @@ -1459,6 +1518,15 @@ version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3688e69b38018fec1557254f64c8dc2cc8ec502890182f395dbb0aa997aa5735" +[[package]] +name = "home" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5444c27eef6923071f7ebcc33e3444508466a76f7a2b93da00ed6e19f30c1ddb" +dependencies = [ + "windows-sys 0.48.0", +] + [[package]] name = "hostname" version = "0.3.1" @@ -1728,7 +1796,10 @@ dependencies = [ "rand", "range-collections", "regex", + "rustyline", "serde", + "shell-words", + "shellexpand", "tempfile", "testdir", "thiserror", @@ -2208,6 +2279,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + [[package]] name = "nix" version = "0.26.2" @@ -2434,6 +2514,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "os_info" version = "3.7.0" @@ -3026,6 +3112,16 @@ dependencies = [ "pest_derive", ] +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + [[package]] name = "rand" version = "0.8.5" @@ -3426,6 +3522,29 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rustyline" +version = "12.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" +dependencies = [ + "bitflags 2.3.3", + "cfg-if", + "clipboard-win", + "fd-lock", + "home", + "libc", + "log", + "memchr", + "nix", + "radix_trie", + "scopeguard", + "unicode-segmentation", + "unicode-width", + "utf8parse", + "winapi", +] + [[package]] name = "ryu" version = "1.0.15" @@ -3635,6 +3754,21 @@ dependencies = [ "winapi", ] +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" + +[[package]] +name = "shellexpand" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da03fa3b94cc19e3ebfc88c4229c49d8f08cdbd1228870a45f0ffdf84988e14b" +dependencies = [ + "dirs", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -3770,6 +3904,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "str-buf" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" + [[package]] name = "strsim" version = "0.10.0" @@ -4370,6 +4510,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + [[package]] name = "unicode-width" version = "0.1.10" diff --git a/iroh-bytes/src/writable.rs b/iroh-bytes/src/writable.rs index fe492e47b4..d1666831da 100644 --- a/iroh-bytes/src/writable.rs +++ b/iroh-bytes/src/writable.rs @@ -10,6 +10,7 @@ use anyhow::Context; use bytes::Bytes; use iroh_io::{AsyncSliceWriter, File}; use range_collections::RangeSet2; +use tokio::io::AsyncRead; use crate::{ get::fsm, @@ -60,6 +61,12 @@ impl WritableFileDatabase { Ok((hash, size)) } + pub async fn put_reader(&self, data: impl AsyncRead + Unpin) -> anyhow::Result<(Hash, u64)> { + let (hash, size, entry) = self.storage.put_reader(data).await?; + self.db.union_with(HashMap::from_iter([(hash, entry)])); + Ok((hash, size)) + } + pub async fn put_from_temp_file(&self, temp_path: &PathBuf) -> anyhow::Result<(Hash, u64)> { let (hash, size, entry) = self.storage.move_to_blobs(&temp_path).await?; self.db.union_with(HashMap::from_iter([(hash, entry)])); @@ -144,6 +151,21 @@ impl StoragePaths { Ok((hash, size, entry)) } + pub async fn put_reader( + &self, + mut reader: impl AsyncRead + Unpin, + ) -> anyhow::Result<(Hash, u64, DbEntry)> { + let temp_path = self.temp_path(); + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create(true) + .open(&temp_path) + .await?; + tokio::io::copy(&mut reader, &mut file).await?; + let (hash, size, entry) = self.move_to_blobs(&temp_path).await?; + Ok((hash, size, entry)) + } + async fn move_to_blobs(&self, path: &PathBuf) -> anyhow::Result<(Hash, u64, DbEntry)> { let datasource = DataSource::new(path.clone()); // TODO: this needlessly creates a collection, but that's what's pub atm in iroh-bytes diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 623bd81af7..a6f05391bb 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -416,6 +416,17 @@ impl Replica { .collect() } + // TODO: not horrible + pub fn all_with_key_prefix( + &self, + prefix: impl AsRef<[u8]>, + ) -> Vec<(RecordIdentifier, SignedEntry)> { + self.all() + .into_iter() + .filter(|(id, _entry)| id.key().starts_with(prefix.as_ref())) + .collect() + } + pub fn to_bytes(&self) -> anyhow::Result { let entries = self.all().into_iter().map(|(_id, entry)| entry).collect(); let data = ReplicaData { @@ -566,6 +577,10 @@ impl SignedEntry { pub fn entry(&self) -> &Entry { &self.entry } + + pub fn content_hash(&self) -> &Hash { + self.entry().record().content_hash() + } } /// Signature over an entry. diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index bf3fe7cf04..397f9f15c6 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -56,6 +56,9 @@ url = { version = "2.4", features = ["serde"] } # Examples once_cell = { version = "1.18.0", optional = true } ed25519-dalek = { version = "=2.0.0-rc.3", features = ["serde", "rand_core"], optional = true } +shell-words = { version = "1.1.0", optional = true } +shellexpand = { version = "3.1.0", optional = true } +rustyline = { version = "12.0.0", optional = true } [features] default = ["cli", "metrics"] @@ -65,7 +68,7 @@ flat-db = [] mem-db = [] iroh-collection = [] test = [] -example-sync = ["cli", "ed25519-dalek", "once_cell"] +example-sync = ["cli", "ed25519-dalek", "once_cell", "shell-words", "shellexpand", "rustyline"] [dev-dependencies] anyhow = { version = "1", features = ["backtrace"] } diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 3c0a332d8d..3ec5ec6ddb 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -9,13 +9,11 @@ use std::{fmt, path::PathBuf, str::FromStr, sync::Arc}; -use anyhow::{anyhow, bail}; -use bytes::Bytes; -use clap::Parser; +use anyhow::bail; +use clap::{CommandFactory, FromArgMatches, Parser}; use ed25519_dalek::SigningKey; -use futures::{future::BoxFuture, FutureExt}; -use iroh::sync::{BlobStore, Doc, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; -use iroh_bytes::provider::Database; +use indicatif::HumanBytes; +use iroh::sync::{BlobStore, Doc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; use iroh_gossip::{ net::{GossipHandle, GOSSIP_ALPN}, proto::TopicId, @@ -27,12 +25,17 @@ use iroh_net::{ tls::Keypair, MagicEndpoint, }; -use iroh_sync::sync::{Author, Namespace, NamespaceId, Replica, ReplicaStore, SignedEntry}; +use iroh_sync::sync::{Author, Namespace, SignedEntry}; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; -use tokio::sync::mpsc; +use tokio::sync::{mpsc, oneshot}; +use tracing_subscriber::{EnvFilter, Registry}; use url::Url; +use iroh_bytes_handlers::IrohBytesHandlers; + +const MAX_DISPLAY_CONTENT_LEN: u64 = 1024 * 1024; + #[derive(Parser, Debug)] struct Args { /// Private key to derive our peer id from @@ -65,12 +68,12 @@ enum Command { #[tokio::main] async fn main() -> anyhow::Result<()> { - tracing_subscriber::fmt::init(); let args = Args::parse(); run(args).await } async fn run(args: Args) -> anyhow::Result<()> { + let log_filter = init_logging(); // parse or generate our keypair let keypair = match args.private_key { None => Keypair::generate(), @@ -87,13 +90,13 @@ async fn run(args: Args) -> anyhow::Result<()> { }; println!("> using DERP servers: {}", fmt_derp_map(&derp_map)); - // build our magic endpoint + // build our magic endpoint and the gossip protocol let (endpoint, gossip, initial_endpoints) = { // init a cell that will hold our gossip handle to be used in endpoint callbacks let gossip_cell: OnceCell = OnceCell::new(); // init a channel that will emit once the initial endpoints of our local node are discovered let (initial_endpoints_tx, mut initial_endpoints_rx) = mpsc::channel(1); - + // build the magic endpoint let endpoint = MagicEndpoint::builder() .keypair(keypair.clone()) .alpns(vec![ @@ -116,16 +119,14 @@ async fn run(args: Args) -> anyhow::Result<()> { .bind(args.bind_port) .await?; - // create the gossip protocol - let gossip = { - let gossip = GossipHandle::from_endpoint(endpoint.clone(), Default::default()); - // insert the gossip handle into the gossip cell to be used in the endpoint callbacks above - gossip_cell.set(gossip.clone()).unwrap(); - gossip - }; + // initialize the gossip protocol + let gossip = GossipHandle::from_endpoint(endpoint.clone(), Default::default()); + // insert into the gossip cell to be used in the endpoint callbacks above + gossip_cell.set(gossip.clone()).unwrap(); + // wait for a first endpoint update so that we know about at least one of our addrs let initial_endpoints = initial_endpoints_rx.recv().await.unwrap(); - // pass our initial endpoints to the gossip protocol + // pass our initial endpoints to the gossip protocol so that they can be announced to peers gossip.update_endpoints(&initial_endpoints)?; (endpoint, gossip, initial_endpoints) }; @@ -147,7 +148,6 @@ async fn run(args: Args) -> anyhow::Result<()> { } }; - // println!("> our endpoints: {initial_endpoints:?}"); let our_ticket = { // add our local endpoints to the ticket and print it for others to join let addrs = initial_endpoints.iter().map(|ep| ep.addr).collect(); @@ -163,171 +163,183 @@ async fn run(args: Args) -> anyhow::Result<()> { // unwrap our storage path or default to temp let storage_path = args.storage_path.unwrap_or_else(|| { - let dir = format!("/tmp/iroh-example-sync-{}", endpoint.peer_id()); - let dir = PathBuf::from(dir); + let name = format!("iroh-sync-{}", endpoint.peer_id()); + let dir = std::env::temp_dir().join(name); if !dir.exists() { std::fs::create_dir(&dir).expect("failed to create temp dir"); } dir }); - println!("> persisting data in {storage_path:?}"); - - // create a runtime - // we need this because some things need to spawn !Send futures - let rt = create_rt()?; - // create the sync doc and store - // we need to pass the runtime because a !Send task is spawned for - // the downloader in the blob store - let blobs = BlobStore::new(rt.clone(), storage_path.clone(), endpoint.clone()).await?; - let (store, author, doc) = - create_or_open_document(&storage_path, blobs.clone(), topic, &keypair).await?; + println!("> storage directory: {storage_path:?}"); + + // create a runtime that can spawn tasks on a local-thread executors (to support !Send futures) + let rt = iroh::bytes::runtime::Handle::from_currrent(num_cpus::get())?; + + // create a blob store (with a iroh-bytes database inside) + let blobs = BlobStore::new(rt.clone(), storage_path.join("blobs"), endpoint.clone()).await?; + + // create a doc store for the iroh-sync docs + let author = Author::from(keypair.secret().clone()); + let docs = DocStore::new(blobs.clone(), author, storage_path.join("docs")); + + // create the live syncer + let live_sync = LiveSync::spawn(endpoint.clone(), gossip.clone()); // construct the state that is passed to the endpoint loop and from there cloned // into to the connection handler task for incoming connections. let state = Arc::new(State { gossip: gossip.clone(), - replica_store: store.clone(), - db: blobs.db().clone(), - rt, + docs: docs.clone(), + bytes: IrohBytesHandlers::new(rt.clone(), blobs.db().clone()), }); + // spawn our endpoint loop that forwards incoming connections tokio::spawn(endpoint_loop(endpoint.clone(), state)); - // create the live syncer - let sync_handle = LiveSync::spawn(endpoint.clone(), gossip.clone()); - sync_handle - .sync_doc(doc.replica().clone(), peers.clone()) - .await?; - - // spawn an input thread that reads stdin and parses each line as a `Cmd` command - // not using tokio here because they recommend this for "technical reasons" - let (cmd_tx, mut cmd_rx) = tokio::sync::mpsc::channel::(1); - std::thread::spawn(move || input_loop(cmd_tx)); + // open our document and add to the live syncer + let namespace = Namespace::from_bytes(topic.as_bytes()); + println!("> opening doc {}", fmt_hash(namespace.id().as_bytes())); + let doc = docs.create_or_open(namespace, DownloadMode::Always).await?; + live_sync.add(doc.replica().clone(), peers.clone()).await?; + // spawn an repl thread that reads stdin and parses each line as a `Cmd` command + let (cmd_tx, mut cmd_rx) = mpsc::channel(1); + std::thread::spawn(move || repl_loop(cmd_tx).expect("input loop crashed")); // process commands in a loop - println!("> ready to accept commands: set | get | ls | exit"); + println!("> ready to accept commands"); + println!("> type `help` for a list of commands"); loop { - let cmd = tokio::select! { - Some(cmd) = cmd_rx.recv() => cmd, - _ = tokio::signal::ctrl_c() => Cmd::Exit - + // wait for a command from the input repl thread + let Some((cmd, to_repl_tx)) = cmd_rx.recv().await else { + break; }; - match cmd { - Cmd::Set { key, value } => { - doc.insert(&key, &author, value.into_bytes().into()).await?; - } - Cmd::Get { key } => { - let entries = doc.replica().all_for_key(key.as_bytes()); - for (_id, entry) in entries { - let content = fmt_content(&doc, &entry).await?; - println!("{} -> {content}", fmt_entry(&entry),); - } - } - Cmd::Ls => { - let all = doc.replica().all(); - println!("> {} entries", all.len()); - for (_id, entry) in all { - println!( - "{} -> {}", - fmt_entry(&entry), - fmt_content(&doc, &entry).await? - ); - } - } - Cmd::Exit => { - break; - } + // exit command: break early + if let Cmd::Exit = cmd { + to_repl_tx.send(ToRepl::Exit).ok(); + break; } + + // handle the command, but select against Ctrl-C signal so that commands can be aborted + tokio::select! { + biased; + _ = tokio::signal::ctrl_c() => { + println!("> aborted"); + } + res = handle_command(cmd, &doc, &log_filter) => if let Err(err) = res { + println!("> error: {err}"); + }, + }; + // notify to the repl that we want to get the next command + to_repl_tx.send(ToRepl::Continue).ok(); } - let res = sync_handle.cancel().await; - if let Err(err) = res { + // exit: cancel the sync and store blob database and document + if let Err(err) = live_sync.cancel().await { println!("> syncer closed with error: {err:?}"); } - println!("> persisting document and blob database at {storage_path:?}"); blobs.save().await?; - save_document(&storage_path, doc.replica()).await?; + docs.save(&doc).await?; Ok(()) } +async fn handle_command(cmd: Cmd, doc: &Doc, log_filter: &LogLevelReload) -> anyhow::Result<()> { + match cmd { + Cmd::Set { key, value } => { + doc.insert_bytes(&key, value.into_bytes().into()).await?; + } + Cmd::Get { key, print_content } => { + let entries = doc.replica().all_for_key(key.as_bytes()); + for (_id, entry) in entries { + println!("{}", fmt_entry(&entry)); + if print_content { + println!("{}", fmt_content(&doc, &entry).await); + } + } + } + Cmd::Ls { prefix } => { + let entries = match prefix { + None => doc.replica().all(), + Some(prefix) => doc.replica().all_with_key_prefix(prefix.as_bytes()), + }; + println!("> {} entries", entries.len()); + for (_id, entry) in entries { + println!("{}", fmt_entry(&entry),); + } + } + Cmd::Log { directive } => { + let next_filter = EnvFilter::from_str(&directive)?; + log_filter.modify(|layer| *layer = next_filter)?; + } + Cmd::Exit => {} + } + Ok(()) +} + +#[derive(Parser)] pub enum Cmd { - Set { key: String, value: String }, - Get { key: String }, - Ls, + /// Set an entry + Set { + /// Key to the entry (parsed as UTF-8 string). + key: String, + /// Content to store for this entry (parsed as UTF-8 string) + value: String, + }, + /// Get entries by key + /// + /// Shows the author, content hash and content length for all entries for this key. + Get { + /// Key to the entry (parsed as UTF-8 string). + key: String, + /// Print the value (but only if it is valid UTF-8 and smaller than 1MB) + #[clap(short = 'c', long)] + print_content: bool, + }, + /// List entries + Ls { + /// Optionally list only entries whose key starts with PREFIX. + prefix: Option, + }, + /// Change the log level + Log { + /// The log level or log filtering directive + /// + /// Valid log levels are: "trace", "debug", "info", "warn", "error" + /// + /// You can also set one or more filtering directives to enable more fine-grained log + /// filtering. The supported filtering directives and their semantics are documented here: + /// https://docs.rs/tracing-subscriber/latest/tracing_subscriber/filter/struct.EnvFilter.html#directives + /// + /// To disable logging completely, set to the empty string (via empty double quotes: ""). + #[clap(verbatim_doc_comment)] + directive: String, + }, + /// Quit Exit, } impl FromStr for Cmd { type Err = anyhow::Error; - fn from_str(s: &str) -> Result { - let mut parts = s.split(' '); - match [parts.next(), parts.next(), parts.next()] { - [Some("set"), Some(key), Some(value)] => Ok(Self::Set { - key: key.into(), - value: value.into(), - }), - [Some("get"), Some(key), None] => Ok(Self::Get { key: key.into() }), - [Some("ls"), None, None] => Ok(Self::Ls), - [Some("exit"), None, None] => Ok(Self::Exit), - _ => Err(anyhow!("invalid command")), - } + let args = shell_words::split(s)?; + let matches = Cmd::command() + .multicall(true) + .subcommand_required(true) + .try_get_matches_from(args)?; + let cmd = Cmd::from_arg_matches(&matches)?; + Ok(cmd) } } -async fn create_or_open_document( - storage_path: &PathBuf, - blobs: BlobStore, - topic: TopicId, - keypair: &Keypair, -) -> anyhow::Result<(ReplicaStore, Author, Doc)> { - let author = Author::from(keypair.secret().clone()); - let namespace = Namespace::from_bytes(topic.as_bytes()); - let store = ReplicaStore::default(); - - let replica_path = replica_path(storage_path, namespace.id()); - let replica = if replica_path.exists() { - let bytes = tokio::fs::read(replica_path).await?; - store.open_replica(&bytes)? - } else { - store.new_replica(namespace) - }; - - // do some logging - replica.on_insert(Box::new(move |origin, entry| { - println!("> insert from {origin:?}: {}", fmt_entry(&entry)); - })); - - let doc = Doc::new(replica, blobs, DownloadMode::Always); - Ok((store, author, doc)) -} - -async fn save_document(base_path: &PathBuf, replica: &Replica) -> anyhow::Result<()> { - let replica_path = replica_path(base_path, &replica.namespace()); - tokio::fs::create_dir_all(replica_path.parent().unwrap()).await?; - let bytes = replica.to_bytes()?; - tokio::fs::write(replica_path, bytes).await?; - Ok(()) -} - -fn replica_path(storage_path: &PathBuf, namespace: &NamespaceId) -> PathBuf { - storage_path - .join("docs") - .join(hex::encode(namespace.as_bytes())) -} - #[derive(Debug)] struct State { - rt: iroh_bytes::runtime::Handle, gossip: GossipHandle, - replica_store: ReplicaStore, - db: Database, + docs: DocStore, + bytes: IrohBytesHandlers, } async fn endpoint_loop(endpoint: MagicEndpoint, state: Arc) -> anyhow::Result<()> { while let Some(conn) = endpoint.accept().await { - // spawn a new task for each incoming connection. let state = state.clone(); tokio::spawn(async move { if let Err(err) = handle_connection(conn, state).await { @@ -343,96 +355,50 @@ async fn handle_connection(mut conn: quinn::Connecting, state: Arc) -> an println!("> incoming connection with alpn {alpn}"); match alpn.as_bytes() { GOSSIP_ALPN => state.gossip.handle_connection(conn.await?).await, - SYNC_ALPN => iroh::sync::handle_connection(conn, state.replica_store.clone()).await, - alpn if alpn == iroh_bytes::protocol::ALPN => { - handle_iroh_byes_connection(conn, state).await - } + SYNC_ALPN => state.docs.handle_connection(conn).await, + alpn if alpn == iroh_bytes::protocol::ALPN => state.bytes.handle_connection(conn).await, _ => bail!("ignoring connection: unsupported ALPN protocol"), } } -async fn handle_iroh_byes_connection( - conn: quinn::Connecting, - state: Arc, -) -> anyhow::Result<()> { - use iroh_bytes::{ - protocol::{GetRequest, RequestToken}, - provider::{ - CustomGetHandler, EventSender, IrohCollectionParser, RequestAuthorizationHandler, - }, - }; - iroh_bytes::provider::handle_connection( - conn, - state.db.clone(), - NoopEventSender, - IrohCollectionParser, - Arc::new(NoopCustomGetHandler), - Arc::new(NoopRequestAuthorizationHandler), - state.rt.clone(), - ) - .await; - - #[derive(Debug, Clone)] - struct NoopEventSender; - impl EventSender for NoopEventSender { - fn send(&self, _event: iroh_bytes::provider::Event) -> Option { - None - } - } - #[derive(Debug)] - struct NoopCustomGetHandler; - impl CustomGetHandler for NoopCustomGetHandler { - fn handle( - &self, - _token: Option, - _request: Bytes, - ) -> BoxFuture<'static, anyhow::Result> { - async move { Err(anyhow::anyhow!("no custom get handler defined")) }.boxed() - } - } - #[derive(Debug)] - struct NoopRequestAuthorizationHandler; - impl RequestAuthorizationHandler for NoopRequestAuthorizationHandler { - fn authorize( - &self, - token: Option, - _request: &iroh_bytes::protocol::Request, - ) -> BoxFuture<'static, anyhow::Result<()>> { - async move { - if let Some(token) = token { - anyhow::bail!( - "no authorization handler defined, but token was provided: {:?}", - token - ); - } - Ok(()) - } - .boxed() - } - } - Ok(()) -} - -fn create_rt() -> anyhow::Result { - let rt = iroh::bytes::runtime::Handle::from_currrent(num_cpus::get())?; - Ok(rt) +#[derive(Debug)] +enum ToRepl { + Continue, + Exit, } -fn input_loop(line_tx: tokio::sync::mpsc::Sender) -> anyhow::Result<()> { - let mut buffer = String::new(); - let stdin = std::io::stdin(); +fn repl_loop(cmd_tx: mpsc::Sender<(Cmd, oneshot::Sender)>) -> anyhow::Result<()> { + use rustyline::{error::ReadlineError, Config, DefaultEditor}; + let mut rl = DefaultEditor::with_config(Config::builder().check_cursor_position(true).build())?; loop { - stdin.read_line(&mut buffer)?; - let cmd = match Cmd::from_str(buffer.trim()) { - Ok(cmd) => cmd, - Err(err) => { - println!("> failed to parse command: {}", err); - continue; + // prepare a channel to receive a signal from the main thread when a command completed + let (to_repl_tx, to_repl_rx) = oneshot::channel(); + let readline = rl.readline(">> "); + match readline { + Ok(line) if line.is_empty() => continue, + Ok(line) => { + rl.add_history_entry(line.as_str())?; + match Cmd::from_str(&line) { + Ok(cmd) => cmd_tx.blocking_send((cmd, to_repl_tx))?, + Err(err) => { + println!("{err}"); + continue; + } + }; } - }; - line_tx.blocking_send(cmd)?; - buffer.clear(); + Err(ReadlineError::Interrupted | ReadlineError::Eof) => { + cmd_tx.blocking_send((Cmd::Exit, to_repl_tx))?; + } + Err(ReadlineError::WindowResized) => continue, + Err(err) => return Err(err.into()), + } + // wait for reply from main thread + match to_repl_rx.blocking_recv()? { + ToRepl::Continue => continue, + ToRepl::Exit => break, + } } + Ok(()) } #[derive(Debug, Serialize, Deserialize)] @@ -472,6 +438,19 @@ impl FromStr for Ticket { } } +type LogLevelReload = tracing_subscriber::reload::Handle; +fn init_logging() -> LogLevelReload { + use tracing_subscriber::{filter, fmt, prelude::*, reload}; + let filter = filter::EnvFilter::from_default_env(); + let (filter, reload_handle) = reload::Layer::new(filter); + tracing_subscriber::registry() + .with(filter) + .with(fmt::Layer::default()) + .init(); + reload_handle +} + + // helpers fn fmt_entry(entry: &SignedEntry) -> String { @@ -480,20 +459,25 @@ fn fmt_entry(entry: &SignedEntry) -> String { let author = fmt_hash(id.author().as_bytes()); let hash = entry.entry().record().content_hash(); let hash = fmt_hash(hash.as_bytes()); - format!("@{author}: {key} = {hash}") + let len = HumanBytes(entry.entry().record().content_len()); + format!("@{author}: {key} = {hash} ({len})",) } -async fn fmt_content(doc: &Doc, entry: &SignedEntry) -> anyhow::Result { - let content = match doc.get_content(entry).await { - None => "".to_string(), - Some(content) => match String::from_utf8(content.into()) { - Ok(str) => str, - Err(_err) => "".to_string(), - }, - }; - Ok(content) +async fn fmt_content(doc: &Doc, entry: &SignedEntry) -> String { + let len = entry.entry().record().content_len(); + if len > MAX_DISPLAY_CONTENT_LEN { + format!("<{}>", HumanBytes(len)) + } else { + match doc.get_content_bytes(entry).await { + None => "".to_string(), + Some(content) => match String::from_utf8(content.into()) { + Ok(str) => str, + Err(_err) => format!("", HumanBytes(len)), + }, + } + } } -fn fmt_hash(hash: &[u8]) -> String { - let mut text = data_encoding::BASE32_NOPAD.encode(hash); +fn fmt_hash(hash: impl AsRef<[u8]>) -> String { + let mut text = data_encoding::BASE32_NOPAD.encode(hash.as_ref()); text.make_ascii_lowercase(); format!("{}…{}", &text[..5], &text[(text.len() - 2)..]) } @@ -531,3 +515,89 @@ fn derp_map_from_url(url: Url) -> anyhow::Result { 0 )) } + +/// handlers for iroh_bytes connections +mod iroh_bytes_handlers { + use std::sync::Arc; + + use bytes::Bytes; + use futures::{future::BoxFuture, FutureExt}; + use iroh_bytes::{ + protocol::{GetRequest, RequestToken}, + provider::{ + CustomGetHandler, Database, EventSender, IrohCollectionParser, + RequestAuthorizationHandler, + }, + }; + #[derive(Debug, Clone)] + pub struct IrohBytesHandlers { + db: Database, + rt: iroh_bytes::runtime::Handle, + event_sender: NoopEventSender, + get_handler: Arc, + auth_handler: Arc, + } + impl IrohBytesHandlers { + pub fn new(rt: iroh_bytes::runtime::Handle, db: Database) -> Self { + Self { + db, + rt, + event_sender: NoopEventSender, + get_handler: Arc::new(NoopCustomGetHandler), + auth_handler: Arc::new(NoopRequestAuthorizationHandler), + } + } + pub async fn handle_connection(&self, conn: quinn::Connecting) -> anyhow::Result<()> { + iroh_bytes::provider::handle_connection( + conn, + self.db.clone(), + self.event_sender.clone(), + IrohCollectionParser, + self.get_handler.clone(), + self.auth_handler.clone(), + self.rt.clone(), + ) + .await; + Ok(()) + } + } + + #[derive(Debug, Clone)] + struct NoopEventSender; + impl EventSender for NoopEventSender { + fn send(&self, _event: iroh_bytes::provider::Event) -> Option { + None + } + } + #[derive(Debug)] + struct NoopCustomGetHandler; + impl CustomGetHandler for NoopCustomGetHandler { + fn handle( + &self, + _token: Option, + _request: Bytes, + ) -> BoxFuture<'static, anyhow::Result> { + async move { Err(anyhow::anyhow!("no custom get handler defined")) }.boxed() + } + } + #[derive(Debug)] + struct NoopRequestAuthorizationHandler; + impl RequestAuthorizationHandler for NoopRequestAuthorizationHandler { + fn authorize( + &self, + token: Option, + _request: &iroh_bytes::protocol::Request, + ) -> BoxFuture<'static, anyhow::Result<()>> { + async move { + if let Some(token) = token { + anyhow::bail!( + "no authorization handler defined, but token was provided: {:?}", + token + ); + } + Ok(()) + } + .boxed() + } + } +} diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 5a4f1d4489..f061808609 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -13,10 +13,15 @@ use futures::{ }; use iroh_bytes::{provider::Database, util::Hash, writable::WritableFileDatabase}; use iroh_gossip::net::util::Dialer; -use iroh_io::AsyncSliceReaderExt; +use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; use iroh_net::{tls::PeerId, MagicEndpoint}; -use iroh_sync::sync::{Author, InsertOrigin, Replica, SignedEntry}; -use tokio::sync::{mpsc, oneshot}; +use iroh_sync::sync::{ + Author, InsertOrigin, Namespace, NamespaceId, Replica, ReplicaStore, SignedEntry, +}; +use tokio::{ + io::AsyncRead, + sync::{mpsc, oneshot}, +}; use tokio_stream::StreamExt; use tracing::{debug, error, warn}; @@ -26,6 +31,63 @@ pub enum DownloadMode { Manual, } +#[derive(Debug, Clone)] +pub struct DocStore { + replicas: ReplicaStore, + blobs: BlobStore, + local_author: Arc, + storage_path: PathBuf, +} + +impl DocStore { + pub fn new(blobs: BlobStore, author: Author, storage_path: PathBuf) -> Self { + Self { + replicas: ReplicaStore::default(), + local_author: Arc::new(author), + storage_path, + blobs, + } + } + + pub async fn create_or_open( + &self, + namespace: Namespace, + download_mode: DownloadMode, + ) -> anyhow::Result { + let path = self.replica_path(namespace.id()); + let replica = if path.exists() { + let bytes = tokio::fs::read(path).await?; + self.replicas.open_replica(&bytes)? + } else { + self.replicas.new_replica(namespace) + }; + + let doc = Doc::new( + replica, + self.blobs.clone(), + self.local_author.clone(), + download_mode, + ); + Ok(doc) + } + + pub async fn save(&self, doc: &Doc) -> anyhow::Result<()> { + let replica_path = self.replica_path(&doc.replica().namespace()); + tokio::fs::create_dir_all(replica_path.parent().unwrap()).await?; + let bytes = doc.replica().to_bytes()?; + tokio::fs::write(replica_path, bytes).await?; + Ok(()) + } + + fn replica_path(&self, namespace: &NamespaceId) -> PathBuf { + self.storage_path.join(hex::encode(namespace.as_bytes())) + } + + pub async fn handle_connection(&self, conn: quinn::Connecting) -> anyhow::Result<()> { + crate::sync::handle_connection(conn, self.replicas.clone()).await + } +} + /// A replica with a [`BlobStore`] for contents. /// /// This will also download missing content from peers. @@ -33,15 +95,25 @@ pub enum DownloadMode { /// TODO: Currently content is only downloaded from the author of a entry. /// We want to try other peers if the author is offline (or always). /// We'll need some heuristics which peers to try. -#[derive(Clone)] +#[derive(Clone, Debug)] pub struct Doc { replica: Replica, blobs: BlobStore, + local_author: Arc, } impl Doc { - pub fn new(replica: Replica, blobs: BlobStore, download_mode: DownloadMode) -> Self { - let doc = Self { replica, blobs }; + pub fn new( + replica: Replica, + blobs: BlobStore, + local_author: Arc, + download_mode: DownloadMode, + ) -> Self { + let doc = Self { + replica, + blobs, + local_author, + }; if let DownloadMode::Always = download_mode { let doc2 = doc.clone(); doc.replica.on_insert(Box::new(move |origin, entry| { @@ -57,15 +129,28 @@ impl Doc { &self.replica } - pub async fn insert( + pub fn local_author(&self) -> &Author { + &self.local_author + } + + pub async fn insert_bytes( &self, key: impl AsRef<[u8]>, - author: &Author, content: Bytes, - ) -> anyhow::Result<()> { + ) -> anyhow::Result<(Hash, u64)> { let (hash, len) = self.blobs.put_bytes(content).await?; - self.replica.insert(key, author, hash, len); - Ok(()) + self.replica.insert(key, &self.local_author, hash, len); + Ok((hash, len)) + } + + pub async fn insert_reader( + &self, + key: impl AsRef<[u8]>, + content: impl AsyncRead + Unpin, + ) -> anyhow::Result<(Hash, u64)> { + let (hash, len) = self.blobs.put_reader(content).await?; + self.replica.insert(key, &self.local_author, hash, len); + Ok((hash, len)) } pub fn download_content_fron_author(&self, entry: &SignedEntry) { @@ -75,11 +160,16 @@ impl Doc { self.blobs.start_download(hash, peer_id); } - pub async fn get_content(&self, entry: &SignedEntry) -> Option { + pub async fn get_content_bytes(&self, entry: &SignedEntry) -> Option { let hash = entry.entry().record().content_hash(); let bytes = self.blobs.get_bytes(hash).await.ok().flatten(); bytes } + pub async fn get_content_reader(&self, entry: &SignedEntry) -> Option { + let hash = entry.entry().record().content_hash(); + let bytes = self.blobs.get_reader(hash).await.ok().flatten(); + bytes + } } /// A blob database that can download missing blobs from peers. @@ -129,9 +219,22 @@ impl BlobStore { Ok(Some(bytes)) } + pub async fn get_reader(&self, hash: &Hash) -> anyhow::Result> { + self.downloader.wait_for_download(hash).await; + let Some(entry) = self.db().get(hash) else { + return Ok(None) + }; + let reader = entry.data_reader().await?; + Ok(Some(reader)) + } + pub async fn put_bytes(&self, data: Bytes) -> anyhow::Result<(Hash, u64)> { self.db.put_bytes(data).await } + + pub async fn put_reader(&self, data: impl AsyncRead + Unpin) -> anyhow::Result<(Hash, u64)> { + self.db.put_reader(data).await + } } pub type DownloadReply = oneshot::Sender>; @@ -206,6 +309,7 @@ impl Downloader { } } +#[derive(Debug)] pub struct DownloadActor { dialer: Dialer, db: WritableFileDatabase, diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index e11948d3aa..e8ba2583f7 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -80,7 +80,7 @@ impl LiveSync { Ok(()) } - pub async fn sync_doc(&self, doc: Replica, initial_peers: Vec) -> Result<()> { + pub async fn add(&self, doc: Replica, initial_peers: Vec) -> Result<()> { self.to_actor_tx .send(ToActor::SyncDoc { doc, initial_peers }) .await?; @@ -201,9 +201,16 @@ impl Actor { async fn insert_doc(&mut self, doc: Replica, initial_peers: Vec) -> Result<()> { let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id).collect(); - let topic: TopicId = doc.namespace().as_bytes().into(); + + // add addresses of initial peers to our endpoint address book + for peer in &initial_peers { + self.endpoint + .add_known_addrs(peer.peer_id, &peer.addrs) + .await?; + } + // join gossip for the topic to receive and send message - // let gossip = self.gossip.clone(); + let topic: TopicId = doc.namespace().as_bytes().into(); self.pending_joins.push({ let peer_ids = peer_ids.clone(); let gossip = self.gossip.clone(); @@ -213,6 +220,7 @@ impl Actor { } .boxed() }); + // setup replica insert notifications. let insert_entry_tx = self.insert_entry_tx.clone(); doc.on_insert(Box::new(move |origin, entry| { @@ -227,7 +235,7 @@ impl Actor { self.endpoint .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) .await?; - } + // trigger initial sync with initial peers for peer in peer_ids { self.sync_with_peer(topic, peer); From 97ae828a9ab9bec9c849e9ba82d9fa77c517761a Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Thu, 13 Jul 2023 17:42:04 +0200 Subject: [PATCH 09/45] feat(example-sync): add ticket command --- iroh/examples/sync.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 3ec5ec6ddb..ef6b917ca8 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -225,7 +225,7 @@ async fn run(args: Args) -> anyhow::Result<()> { _ = tokio::signal::ctrl_c() => { println!("> aborted"); } - res = handle_command(cmd, &doc, &log_filter) => if let Err(err) = res { + res = handle_command(cmd, &doc, &our_ticket, &log_filter) => if let Err(err) = res { println!("> error: {err}"); }, }; @@ -244,7 +244,7 @@ async fn run(args: Args) -> anyhow::Result<()> { Ok(()) } -async fn handle_command(cmd: Cmd, doc: &Doc, log_filter: &LogLevelReload) -> anyhow::Result<()> { +async fn handle_command(cmd: Cmd, doc: &Doc, ticket: &Ticket, log_filter: &LogLevelReload) -> anyhow::Result<()> { match cmd { Cmd::Set { key, value } => { doc.insert_bytes(&key, value.into_bytes().into()).await?; @@ -268,6 +268,9 @@ async fn handle_command(cmd: Cmd, doc: &Doc, log_filter: &LogLevelReload) -> any println!("{}", fmt_entry(&entry),); } } + Cmd::Ticket => { + println!("Ticket: {ticket}"); + } Cmd::Log { directive } => { let next_filter = EnvFilter::from_str(&directive)?; log_filter.modify(|layer| *layer = next_filter)?; @@ -301,6 +304,8 @@ pub enum Cmd { /// Optionally list only entries whose key starts with PREFIX. prefix: Option, }, + /// Print the ticket with which other peers can join our document. + Ticket, /// Change the log level Log { /// The log level or log filtering directive @@ -450,7 +455,6 @@ fn init_logging() -> LogLevelReload { reload_handle } - // helpers fn fmt_entry(entry: &SignedEntry) -> String { From 0e96ba9af46a5b9ecb60c13e639d1773544f94ce Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Wed, 26 Jul 2023 23:34:31 +0200 Subject: [PATCH 10/45] fix: adapt to changes on main after rebase --- iroh-bytes/src/lib.rs | 1 - iroh/examples/sync.rs | 27 +++++++++++-------- iroh/src/database/flat.rs | 2 ++ .../src/database/flat}/writable.rs | 5 ++-- iroh/src/sync/content.rs | 8 +++--- iroh/src/sync/live.rs | 3 ++- 6 files changed, 28 insertions(+), 18 deletions(-) rename {iroh-bytes/src => iroh/src/database/flat}/writable.rs (98%) diff --git a/iroh-bytes/src/lib.rs b/iroh-bytes/src/lib.rs index 02ef6105e4..18d95542d0 100644 --- a/iroh-bytes/src/lib.rs +++ b/iroh-bytes/src/lib.rs @@ -8,7 +8,6 @@ pub mod get; pub mod protocol; pub mod provider; pub mod util; -pub mod writable; #[cfg(test)] pub(crate) mod test_utils; diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index ef6b917ca8..147280bb82 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -173,7 +173,7 @@ async fn run(args: Args) -> anyhow::Result<()> { println!("> storage directory: {storage_path:?}"); // create a runtime that can spawn tasks on a local-thread executors (to support !Send futures) - let rt = iroh::bytes::runtime::Handle::from_currrent(num_cpus::get())?; + let rt = iroh_bytes::util::runtime::Handle::from_currrent(num_cpus::get())?; // create a blob store (with a iroh-bytes database inside) let blobs = BlobStore::new(rt.clone(), storage_path.join("blobs"), endpoint.clone()).await?; @@ -244,7 +244,12 @@ async fn run(args: Args) -> anyhow::Result<()> { Ok(()) } -async fn handle_command(cmd: Cmd, doc: &Doc, ticket: &Ticket, log_filter: &LogLevelReload) -> anyhow::Result<()> { +async fn handle_command( + cmd: Cmd, + doc: &Doc, + ticket: &Ticket, + log_filter: &LogLevelReload, +) -> anyhow::Result<()> { match cmd { Cmd::Set { key, value } => { doc.insert_bytes(&key, value.into_bytes().into()).await?; @@ -516,7 +521,7 @@ fn derp_map_from_url(url: Url) -> anyhow::Result { DEFAULT_DERP_STUN_PORT, UseIpv4::TryDns, UseIpv6::TryDns, - 0 + 0, )) } @@ -528,21 +533,21 @@ mod iroh_bytes_handlers { use futures::{future::BoxFuture, FutureExt}; use iroh_bytes::{ protocol::{GetRequest, RequestToken}, - provider::{ - CustomGetHandler, Database, EventSender, IrohCollectionParser, - RequestAuthorizationHandler, - }, + provider::{CustomGetHandler, EventSender, RequestAuthorizationHandler}, }; + + use iroh::{collection::IrohCollectionParser, database::flat::Database}; + #[derive(Debug, Clone)] pub struct IrohBytesHandlers { db: Database, - rt: iroh_bytes::runtime::Handle, + rt: iroh_bytes::util::runtime::Handle, event_sender: NoopEventSender, get_handler: Arc, auth_handler: Arc, } impl IrohBytesHandlers { - pub fn new(rt: iroh_bytes::runtime::Handle, db: Database) -> Self { + pub fn new(rt: iroh_bytes::util::runtime::Handle, db: Database) -> Self { Self { db, rt, @@ -569,8 +574,8 @@ mod iroh_bytes_handlers { #[derive(Debug, Clone)] struct NoopEventSender; impl EventSender for NoopEventSender { - fn send(&self, _event: iroh_bytes::provider::Event) -> Option { - None + fn send(&self, _event: iroh_bytes::provider::Event) -> BoxFuture<()> { + async {}.boxed() } } #[derive(Debug)] diff --git a/iroh/src/database/flat.rs b/iroh/src/database/flat.rs index acad820b5b..31a41f2d18 100644 --- a/iroh/src/database/flat.rs +++ b/iroh/src/database/flat.rs @@ -29,6 +29,8 @@ use crate::util::io::validate_bao; use crate::util::io::BaoValidationError; use crate::util::progress::{Progress, ProgressReader, ProgressReaderUpdate}; +pub mod writable; + /// File name of directory inside `IROH_DATA_DIR` where outboards are stored. const FNAME_OUTBOARDS: &str = "outboards"; diff --git a/iroh-bytes/src/writable.rs b/iroh/src/database/flat/writable.rs similarity index 98% rename from iroh-bytes/src/writable.rs rename to iroh/src/database/flat/writable.rs index d1666831da..aa9bb36a6e 100644 --- a/iroh-bytes/src/writable.rs +++ b/iroh/src/database/flat/writable.rs @@ -12,13 +12,14 @@ use iroh_io::{AsyncSliceWriter, File}; use range_collections::RangeSet2; use tokio::io::AsyncRead; -use crate::{ +use iroh_bytes::{ get::fsm, protocol::{GetRequest, RangeSpecSeq, Request}, - provider::{create_collection, DataSource, Database, DbEntry, FNAME_PATHS}, Hash, }; +use crate::database::flat::{create_collection, DataSource, Database, DbEntry, FNAME_PATHS}; + /// A blob database into which new blobs can be inserted. /// /// Blobs can be inserted either from bytes or by downloading from open connections to peers. diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index f061808609..3f396e37c6 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -11,7 +11,7 @@ use futures::{ stream::FuturesUnordered, FutureExt, }; -use iroh_bytes::{provider::Database, util::Hash, writable::WritableFileDatabase}; +use iroh_bytes::util::Hash; use iroh_gossip::net::util::Dialer; use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; use iroh_net::{tls::PeerId, MagicEndpoint}; @@ -25,6 +25,8 @@ use tokio::{ use tokio_stream::StreamExt; use tracing::{debug, error, warn}; +use crate::database::flat::{writable::WritableFileDatabase, Database}; + #[derive(Debug, Copy, Clone)] pub enum DownloadMode { Always, @@ -187,7 +189,7 @@ pub struct BlobStore { } impl BlobStore { pub async fn new( - rt: iroh_bytes::runtime::Handle, + rt: iroh_bytes::util::runtime::Handle, data_path: PathBuf, endpoint: MagicEndpoint, ) -> anyhow::Result { @@ -263,7 +265,7 @@ pub struct Downloader { impl Downloader { pub fn new( - rt: iroh_bytes::runtime::Handle, + rt: iroh_bytes::util::runtime::Handle, endpoint: MagicEndpoint, blobs: WritableFileDatabase, ) -> Self { diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index e8ba2583f7..c2c961e660 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -205,7 +205,7 @@ impl Actor { // add addresses of initial peers to our endpoint address book for peer in &initial_peers { self.endpoint - .add_known_addrs(peer.peer_id, &peer.addrs) + .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) .await?; } @@ -235,6 +235,7 @@ impl Actor { self.endpoint .add_known_addrs(peer.peer_id, peer.derp_region, &peer.addrs) .await?; + } // trigger initial sync with initial peers for peer in peer_ids { From 400be8f0ca4bf769c460573952294617812e9887 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Wed, 26 Jul 2023 21:00:09 +0200 Subject: [PATCH 11/45] example(sync): add watch command --- iroh/examples/sync.rs | 34 +++++++++++++++++++++++++++++++++- iroh/src/sync/content.rs | 7 ++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 147280bb82..26a44bf032 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -208,6 +208,8 @@ async fn run(args: Args) -> anyhow::Result<()> { // process commands in a loop println!("> ready to accept commands"); println!("> type `help` for a list of commands"); + + let mut current_watch = Arc::new(std::sync::Mutex::new(None)); loop { // wait for a command from the input repl thread let Some((cmd, to_repl_tx)) = cmd_rx.recv().await else { @@ -225,7 +227,7 @@ async fn run(args: Args) -> anyhow::Result<()> { _ = tokio::signal::ctrl_c() => { println!("> aborted"); } - res = handle_command(cmd, &doc, &our_ticket, &log_filter) => if let Err(err) = res { + res = handle_command(cmd, &doc, &our_ticket, &log_filter, ¤t_watch) => if let Err(err) = res { println!("> error: {err}"); }, }; @@ -249,6 +251,7 @@ async fn handle_command( doc: &Doc, ticket: &Ticket, log_filter: &LogLevelReload, + current_watch: &Arc>>, ) -> anyhow::Result<()> { match cmd { Cmd::Set { key, value } => { @@ -263,6 +266,28 @@ async fn handle_command( } } } + Cmd::Watch { key } => { + println!("watching key: '{key}'"); + current_watch.lock().unwrap().replace(key); + let watch = current_watch.clone(); + doc.on_insert(Box::new(move |origin, entry| { + let matcher = watch.lock().unwrap(); + if let Some(matcher) = &*matcher { + let key = entry.entry().id().key(); + if key.starts_with(matcher.as_bytes()) { + println!("change: {}", fmt_entry(&entry)); + } + } + })); + } + Cmd::WatchCancel => match current_watch.lock().unwrap().take() { + Some(key) => { + println!("canceled watching key: '{key}'"); + } + None => { + println!("no watch active"); + } + }, Cmd::Ls { prefix } => { let entries = match prefix { None => doc.replica().all(), @@ -325,6 +350,13 @@ pub enum Cmd { #[clap(verbatim_doc_comment)] directive: String, }, + /// Watch for changes. + Watch { + /// The key to watch. + key: String, + }, + /// Cancels any running watch command. + WatchCancel, /// Quit Exit, } diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 3f396e37c6..3d48424749 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -16,7 +16,8 @@ use iroh_gossip::net::util::Dialer; use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::sync::{ - Author, InsertOrigin, Namespace, NamespaceId, Replica, ReplicaStore, SignedEntry, + Author, InsertOrigin, Namespace, NamespaceId, OnInsertCallback, Replica, ReplicaStore, + SignedEntry, }; use tokio::{ io::AsyncRead, @@ -127,6 +128,10 @@ impl Doc { doc } + pub fn on_insert(&self, callback: OnInsertCallback) { + self.replica.on_insert(callback); + } + pub fn replica(&self) -> &Replica { &self.replica } From df690d221c92e443d08b81916fd590a967f85000 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Wed, 26 Jul 2023 21:06:17 +0200 Subject: [PATCH 12/45] fixup --- iroh/examples/sync.rs | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 26a44bf032..87ebdc2f4d 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -209,7 +209,19 @@ async fn run(args: Args) -> anyhow::Result<()> { println!("> ready to accept commands"); println!("> type `help` for a list of commands"); - let mut current_watch = Arc::new(std::sync::Mutex::new(None)); + let mut current_watch: Arc>> = + Arc::new(std::sync::Mutex::new(None)); + let watch = current_watch.clone(); + doc.on_insert(Box::new(move |origin, entry| { + let matcher = watch.lock().unwrap(); + if let Some(matcher) = &*matcher { + let key = entry.entry().id().key(); + if key.starts_with(matcher.as_bytes()) { + println!("change: {}", fmt_entry(&entry)); + } + } + })); + loop { // wait for a command from the input repl thread let Some((cmd, to_repl_tx)) = cmd_rx.recv().await else { @@ -269,16 +281,6 @@ async fn handle_command( Cmd::Watch { key } => { println!("watching key: '{key}'"); current_watch.lock().unwrap().replace(key); - let watch = current_watch.clone(); - doc.on_insert(Box::new(move |origin, entry| { - let matcher = watch.lock().unwrap(); - if let Some(matcher) = &*matcher { - let key = entry.entry().id().key(); - if key.starts_with(matcher.as_bytes()) { - println!("change: {}", fmt_entry(&entry)); - } - } - })); } Cmd::WatchCancel => match current_watch.lock().unwrap().take() { Some(key) => { From ce72dccb7d0bb22efd3de97f8c9ce4b31ccf10a7 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Wed, 26 Jul 2023 23:44:54 +0200 Subject: [PATCH 13/45] chore: unused variables --- iroh/examples/sync.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 87ebdc2f4d..5df2db1772 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -209,10 +209,10 @@ async fn run(args: Args) -> anyhow::Result<()> { println!("> ready to accept commands"); println!("> type `help` for a list of commands"); - let mut current_watch: Arc>> = + let current_watch: Arc>> = Arc::new(std::sync::Mutex::new(None)); let watch = current_watch.clone(); - doc.on_insert(Box::new(move |origin, entry| { + doc.on_insert(Box::new(move |_origin, entry| { let matcher = watch.lock().unwrap(); if let Some(matcher) = &*matcher { let key = entry.entry().id().key(); From 1ed0cca01d0df824f8af0c7ca8590f1cb5656a25 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 11:39:27 +0200 Subject: [PATCH 14/45] fix tests and warnings --- iroh-sync/src/ranger.rs | 16 ++++--- iroh-sync/src/sync.rs | 94 ++++++++++++++++++++++------------------- iroh/src/sync.rs | 4 +- 3 files changed, 62 insertions(+), 52 deletions(-) diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index d5310839a6..665751d248 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -401,12 +401,14 @@ where /// Processes an incoming message and produces a response. /// If terminated, returns `None` - pub fn process_message(&mut self, message: Message) -> (Vec, Option>) { + pub fn process_message(&mut self, message: Message, cb: F) -> Option> + where + F: Fn(K, V), + { let mut out = Vec::new(); // TODO: can these allocs be avoided? let mut items = Vec::new(); - let mut inserted = Vec::new(); let mut fingerprints = Vec::new(); for part in message.parts { match part { @@ -440,7 +442,7 @@ where // Store incoming values for (k, v) in values { - inserted.push(k.clone()); + cb(k.clone(), v.clone()); self.store.put(k, v); } @@ -547,9 +549,9 @@ where // If we have any parts, return a message if !out.is_empty() { - (inserted, Some(Message { parts: out })) + Some(Message { parts: out }) } else { - (inserted, None) + None } } @@ -1101,9 +1103,9 @@ mod tests { rounds += 1; alice_to_bob.push(msg.clone()); - if let Some(msg) = bob.process_message(msg) { + if let Some(msg) = bob.process_message(msg, |_, _| {}) { bob_to_alice.push(msg.clone()); - next_to_bob = alice.process_message(msg); + next_to_bob = alice.process_message(msg, |_, _| {}); } } let res = SyncResult { diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index a6f05391bb..7cfc9ce60b 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -241,14 +241,14 @@ pub enum InsertOrigin { #[derive(derive_more::Debug, Clone)] pub struct Replica { inner: Arc>, + #[debug("on_insert: [Box; {}]", "self.on_insert.len()")] + on_insert: Arc>>, } #[derive(derive_more::Debug)] struct InnerReplica { namespace: Namespace, peer: Peer, - #[debug("on_insert: [Box; {}]", "self.on_insert.len()")] - on_insert: Vec, } #[derive(Default, Debug, Clone)] @@ -388,14 +388,14 @@ impl Replica { inner: Arc::new(RwLock::new(InnerReplica { namespace, peer: Peer::default(), - on_insert: Default::default(), })), + on_insert: Default::default(), } } pub fn on_insert(&self, callback: OnInsertCallback) { - let mut inner = self.inner.write(); - inner.on_insert.push(callback); + let mut on_insert = self.on_insert.write(); + on_insert.push(callback); } // TODO: not horrible @@ -454,13 +454,31 @@ impl Replica { // Store signed entries let entry = Entry::new(id.clone(), record); - let signed_entry = entry.sign(&inner.namespace, author); - inner.peer.put(id, signed_entry.clone()); - for cb in &inner.on_insert { + let signed_entry = entry.sign(&inner.namespace, author).clone(); + inner.peer.put(id.clone(), signed_entry.clone()); + drop(inner); + let on_insert = self.on_insert.read(); + for cb in &*on_insert { cb(InsertOrigin::Local, signed_entry.clone()); } } + /// Hashes the given data and inserts it. + /// This does not store the content, just the record of it. + /// + /// Returns the calculated hash. + pub fn hash_and_insert( + &self, + key: impl AsRef<[u8]>, + author: &Author, + data: impl AsRef<[u8]>, + ) -> Hash { + let len = data.as_ref().len() as u64; + let hash = Hash::new(data); + self.insert(key, author, hash, len); + hash + } + pub fn id(&self, key: impl AsRef<[u8]>, author: &Author) -> RecordIdentifier { let inner = self.inner.read(); let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); @@ -470,9 +488,12 @@ impl Replica { pub fn insert_remote_entry(&self, entry: SignedEntry) -> anyhow::Result<()> { entry.verify()?; let mut inner = self.inner.write(); - inner.peer.put(entry.entry.id.clone(), entry.clone()); - for cb in &inner.on_insert { - cb(InsertOrigin::Sync, entry.clone()) + let id = entry.entry.id.clone(); + inner.peer.put(id.clone(), entry.clone()); + drop(inner); + let on_insert = self.on_insert.read(); + for cb in &*on_insert { + cb(InsertOrigin::Sync, entry.clone()); } Ok(()) } @@ -511,14 +532,17 @@ impl Replica { &self, message: crate::ranger::Message, ) -> Option> { - let (inserted_keys, reply) = self.inner.write().peer.process_message(message); - let inner = self.inner.read(); - for key in inserted_keys { - let entry = inner.peer.get(&key).unwrap(); - for cb in &inner.on_insert { - cb(InsertOrigin::Sync, entry.clone()) - } - } + let reply = self + .inner + .write() + .peer + .process_message(message, |_key, entry| { + let on_insert = self.on_insert.read(); + for cb in &*on_insert { + cb(InsertOrigin::Sync, entry.clone()); + } + }); + reply } @@ -817,7 +841,7 @@ mod tests { let my_replica = Replica::new(myspace); for i in 0..10 { - my_replica.insert(format!("/{i}"), &alice, format!("{i}: hello from alice")); + my_replica.hash_and_insert(format!("/{i}"), &alice, format!("{i}: hello from alice")); } for i in 0..10 { @@ -828,33 +852,16 @@ mod tests { } // Test multiple records for the same key - my_replica.insert("/cool/path", &alice, "round 1"); - let entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); - let content = my_replica - .get_content(entry.entry().record().content_hash()) - .unwrap(); - assert_eq!(&content[..], b"round 1"); + my_replica.hash_and_insert("/cool/path", &alice, "round 1"); + let _entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); // Second - - my_replica.insert("/cool/path", &alice, "round 2"); - let entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); - let content = my_replica - .get_content(entry.entry().record().content_hash()) - .unwrap(); - assert_eq!(&content[..], b"round 2"); + my_replica.hash_and_insert("/cool/path", &alice, "round 2"); + let _entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); // Get All let entries: Vec<_> = my_replica.get_all("/cool/path", alice.id()).collect(); assert_eq!(entries.len(), 2); - let content = my_replica - .get_content(entries[0].entry().record().content_hash()) - .unwrap(); - assert_eq!(&content[..], b"round 1"); - let content = my_replica - .get_content(entries[1].entry().record().content_hash()) - .unwrap(); - assert_eq!(&content[..], b"round 2"); } #[test] @@ -928,12 +935,12 @@ mod tests { let myspace = Namespace::new(&mut rng); let mut alice = Replica::new(myspace.clone()); for el in &alice_set { - alice.insert(el, &author, el.as_bytes()); + alice.hash_and_insert(el, &author, el.as_bytes()); } let mut bob = Replica::new(myspace); for el in &bob_set { - bob.insert(el, &author, el.as_bytes()); + bob.hash_and_insert(el, &author, el.as_bytes()); } sync(&author, &mut alice, &mut bob, &alice_set, &bob_set); @@ -952,6 +959,7 @@ mod tests { while let Some(msg) = next_to_bob.take() { assert!(rounds < 100, "too many rounds"); rounds += 1; + println!("round {}", rounds); if let Some(msg) = bob.sync_process_message(msg) { next_to_bob = alice.sync_process_message(msg); } diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index cf9f8e0fd0..bc396b58a7 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -185,10 +185,10 @@ mod tests { let author = replica_store.new_author(&mut rng); let namespace = Namespace::new(&mut rng); let bob_replica = replica_store.new_replica(namespace.clone()); - bob_replica.insert("hello alice", &author, "from bob"); + bob_replica.hash_and_insert("hello alice", &author, "from bob"); let alice_replica = Replica::new(namespace.clone()); - alice_replica.insert("hello bob", &author, "from alice"); + alice_replica.hash_and_insert("hello bob", &author, "from alice"); assert_eq!(bob_replica.all().len(), 1); assert_eq!(alice_replica.all().len(), 1); From 7d24fdaa357d82a916ca2c050a2e9267e7c038af Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 11:47:06 +0200 Subject: [PATCH 15/45] clippy cleanups --- iroh-sync/src/ranger.rs | 16 ++-------------- iroh-sync/src/sync.rs | 12 ++++++------ iroh/examples/sync.rs | 2 +- iroh/src/database/flat/writable.rs | 13 +++++++++---- iroh/src/sync/content.rs | 20 +++++++++----------- 5 files changed, 27 insertions(+), 36 deletions(-) diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index 665751d248..ea80f49f9e 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -214,8 +214,7 @@ where V: 'a; /// Returns all items in the given range - fn get_range<'a>(&'a self, range: Range, limit: Option>) - -> Self::RangeIterator<'a>; + fn get_range(&self, range: Range, limit: Option>) -> Self::RangeIterator<'_>; fn remove(&mut self, key: &K) -> Option; type AllIterator<'a>: Iterator @@ -282,11 +281,7 @@ where type RangeIterator<'a> = SimpleRangeIterator<'a, K, V> where K: 'a, V: 'a; /// Returns all items in the given range - fn get_range<'a>( - &'a self, - range: Range, - limit: Option>, - ) -> Self::RangeIterator<'a> { + fn get_range(&self, range: Range, limit: Option>) -> Self::RangeIterator<'_> { // TODO: this is not very efficient, optimize depending on data structure let iter = self.data.iter(); @@ -1179,14 +1174,12 @@ mod tests { let all: Vec<_> = store .get_range(Range::new("", ""), None) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); assert_eq!(&all, &set[..]); let regular: Vec<_> = store .get_range(("bee", "eel").into(), None) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); assert_eq!(®ular, &set[..3]); @@ -1194,21 +1187,18 @@ mod tests { // empty start let regular: Vec<_> = store .get_range(("", "eel").into(), None) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); assert_eq!(®ular, &set[..3]); let regular: Vec<_> = store .get_range(("cat", "hog").into(), None) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); assert_eq!(®ular, &set[1..5]); let excluded: Vec<_> = store .get_range(("fox", "bee").into(), None) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); @@ -1218,7 +1208,6 @@ mod tests { let excluded: Vec<_> = store .get_range(("fox", "doe").into(), None) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); @@ -1231,7 +1220,6 @@ mod tests { // Limit let all: Vec<_> = store .get_range(("", "").into(), Some(("bee", "doe").into())) - .into_iter() .map(|(k, v)| (*k, *v)) .collect(); assert_eq!(&all, &set[..2]); diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 7cfc9ce60b..baed4e678d 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -315,11 +315,11 @@ impl crate::ranger::Store for Store { } type RangeIterator<'a> = RangeIterator<'a>; - fn get_range<'a>( - &'a self, + fn get_range( + &self, range: Range, limit: Option>, - ) -> Self::RangeIterator<'a> { + ) -> Self::RangeIterator<'_> { RangeIterator { iter: self.records.iter(), range: Some(range), @@ -454,8 +454,8 @@ impl Replica { // Store signed entries let entry = Entry::new(id.clone(), record); - let signed_entry = entry.sign(&inner.namespace, author).clone(); - inner.peer.put(id.clone(), signed_entry.clone()); + let signed_entry = entry.sign(&inner.namespace, author); + inner.peer.put(id, signed_entry.clone()); drop(inner); let on_insert = self.on_insert.read(); for cb in &*on_insert { @@ -489,7 +489,7 @@ impl Replica { entry.verify()?; let mut inner = self.inner.write(); let id = entry.entry.id.clone(); - inner.peer.put(id.clone(), entry.clone()); + inner.peer.put(id, entry.clone()); drop(inner); let on_insert = self.on_insert.read(); for cb in &*on_insert { diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 5df2db1772..25a8e839c0 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -274,7 +274,7 @@ async fn handle_command( for (_id, entry) in entries { println!("{}", fmt_entry(&entry)); if print_content { - println!("{}", fmt_content(&doc, &entry).await); + println!("{}", fmt_content(doc, &entry).await); } } } diff --git a/iroh/src/database/flat/writable.rs b/iroh/src/database/flat/writable.rs index aa9bb36a6e..8f933813ee 100644 --- a/iroh/src/database/flat/writable.rs +++ b/iroh/src/database/flat/writable.rs @@ -4,7 +4,12 @@ //! I wrote this while diving into iroh-bytes, wildly copying code around. This will be solved much //! nicer with the upcoming generic writable database branch by @rklaehn. -use std::{collections::HashMap, io, path::PathBuf, sync::Arc}; +use std::{ + collections::HashMap, + io, + path::{Path, PathBuf}, + sync::Arc, +}; use anyhow::Context; use bytes::Bytes; @@ -69,13 +74,13 @@ impl WritableFileDatabase { } pub async fn put_from_temp_file(&self, temp_path: &PathBuf) -> anyhow::Result<(Hash, u64)> { - let (hash, size, entry) = self.storage.move_to_blobs(&temp_path).await?; + let (hash, size, entry) = self.storage.move_to_blobs(temp_path).await?; self.db.union_with(HashMap::from_iter([(hash, entry)])); Ok((hash, size)) } pub async fn get_size(&self, hash: &Hash) -> Option { - Some(self.db.get(&hash)?.size().await) + Some(self.db.get(hash)?.size().await) } pub fn has(&self, hash: &Hash) -> bool { @@ -193,7 +198,7 @@ impl StoragePaths { } } -async fn prepare_hash_dir(path: &PathBuf, hash: &Hash) -> anyhow::Result { +async fn prepare_hash_dir(path: &Path, hash: &Hash) -> anyhow::Result { let hash = hex::encode(hash.as_ref()); let path = path.join(&hash[0..2]).join(&hash[2..4]).join(&hash[4..]); tokio::fs::create_dir_all(path.parent().unwrap()).await?; diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 3d48424749..de43f69dfc 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -169,13 +169,12 @@ impl Doc { pub async fn get_content_bytes(&self, entry: &SignedEntry) -> Option { let hash = entry.entry().record().content_hash(); - let bytes = self.blobs.get_bytes(hash).await.ok().flatten(); - bytes + self.blobs.get_bytes(hash).await.ok().flatten() } + pub async fn get_content_reader(&self, entry: &SignedEntry) -> Option { let hash = entry.entry().record().content_hash(); - let bytes = self.blobs.get_reader(hash).await.ok().flatten(); - bytes + self.blobs.get_reader(hash).await.ok().flatten() } } @@ -208,7 +207,7 @@ impl BlobStore { } pub fn db(&self) -> &Database { - &self.db.db() + self.db.db() } pub fn start_download(&self, hash: Hash, peer: PeerId) { @@ -378,21 +377,21 @@ impl DownloadActor { fn reply(&mut self, hash: Hash, res: Option<(Hash, u64)>) { for reply in self.replies.remove(&hash).into_iter().flatten() { - reply.send(res.clone()).ok(); + reply.send(res).ok(); } } fn on_peer_fail(&mut self, peer: &PeerId, err: anyhow::Error) { warn!("download from {peer} failed: {err}"); - for hash in self.peer_hashes.remove(&peer).into_iter().flatten() { + for hash in self.peer_hashes.remove(peer).into_iter().flatten() { self.on_not_found(peer, hash); } - self.conns.remove(&peer); + self.conns.remove(peer); } fn on_not_found(&mut self, peer: &PeerId, hash: Hash) { if let Some(peers) = self.hash_peers.get_mut(&hash) { - peers.remove(&peer); + peers.remove(peer); if peers.is_empty() { self.reply(hash, None); self.hash_peers.remove(&hash); @@ -404,8 +403,7 @@ impl DownloadActor { if let Some(hash) = self .peer_hashes .get_mut(&peer) - .map(|hashes| hashes.pop_front()) - .flatten() + .and_then(|hashes| hashes.pop_front()) { let conn = self.conns.get(&peer).unwrap().clone(); let blobs = self.db.clone(); From 613260cabee612df12c2a7dee43419cfad965132 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Thu, 27 Jul 2023 12:35:25 +0200 Subject: [PATCH 16/45] feat(iroh): metrics for iroh-sync --- iroh/examples/sync.rs | 40 +++++++++++++++++++++++++++++++++++ iroh/src/sync.rs | 2 ++ iroh/src/sync/content.rs | 43 +++++++++++++++++++++++++++++++++++--- iroh/src/sync/live.rs | 21 ++++++++++++++++++- iroh/src/sync/metrics.rs | 45 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 iroh/src/sync/metrics.rs diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 25a8e839c0..ec780768e2 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -18,6 +18,10 @@ use iroh_gossip::{ net::{GossipHandle, GOSSIP_ALPN}, proto::TopicId, }; +use iroh_metrics::{ + core::{Counter, Metric}, + struct_iterable::Iterable, +}; use iroh_net::{ defaults::{default_derp_map, DEFAULT_DERP_STUN_PORT}, derp::{DerpMap, UseIpv4, UseIpv6}, @@ -73,7 +77,15 @@ async fn main() -> anyhow::Result<()> { } async fn run(args: Args) -> anyhow::Result<()> { + // setup logging let log_filter = init_logging(); + + // init metrics + iroh_metrics::core::Core::init(|reg, metrics| { + metrics.insert(iroh::sync::metrics::Metrics::new(reg)); + metrics.insert(iroh_gossip::metrics::Metrics::new(reg)); + }); + // parse or generate our keypair let keypair = match args.private_key { None => Keypair::generate(), @@ -307,6 +319,7 @@ async fn handle_command( let next_filter = EnvFilter::from_str(&directive)?; log_filter.modify(|layer| *layer = next_filter)?; } + Cmd::Stats => get_stats(), Cmd::Exit => {} } Ok(()) @@ -359,6 +372,8 @@ pub enum Cmd { }, /// Cancels any running watch command. WatchCancel, + /// Show stats about the current session + Stats, /// Quit Exit, } @@ -445,6 +460,31 @@ fn repl_loop(cmd_tx: mpsc::Sender<(Cmd, oneshot::Sender)>) -> anyhow::Re Ok(()) } +fn get_stats() { + let core = iroh_metrics::core::Core::get().expect("Metrics core not initialized"); + println!("# sync"); + let metrics = core + .get_collector::() + .unwrap(); + fmt_metrics(metrics); + println!("# gossip"); + let metrics = core + .get_collector::() + .unwrap(); + fmt_metrics(metrics); +} + +fn fmt_metrics(metrics: &impl Iterable) { + for (name, counter) in metrics.iter() { + if let Some(counter) = counter.downcast_ref::() { + let value = counter.get(); + println!("{name:23} : {value:>6} ({})", counter.description); + } else { + println!("{name:23} : unsupported metric kind"); + } + } +} + #[derive(Debug, Serialize, Deserialize)] struct Ticket { topic: TopicId, diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index bc396b58a7..d5b92516b8 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -15,6 +15,8 @@ pub const SYNC_ALPN: &[u8] = b"/iroh-sync/1"; mod content; mod live; +pub mod metrics; + pub use content::*; pub use live::*; diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index de43f69dfc..f5942eb193 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -3,6 +3,7 @@ use std::{ io, path::PathBuf, sync::{Arc, Mutex}, + time::Instant, }; use bytes::Bytes; @@ -14,6 +15,7 @@ use futures::{ use iroh_bytes::util::Hash; use iroh_gossip::net::util::Dialer; use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; +use iroh_metrics::{inc, inc_by}; use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::sync::{ Author, InsertOrigin, Namespace, NamespaceId, OnInsertCallback, Replica, ReplicaStore, @@ -26,6 +28,7 @@ use tokio::{ use tokio_stream::StreamExt; use tracing::{debug, error, warn}; +use super::metrics::Metrics; use crate::database::flat::{writable::WritableFileDatabase, Database}; #[derive(Debug, Copy, Clone)] @@ -117,14 +120,33 @@ impl Doc { blobs, local_author, }; + + // If download mode is set to always download: + // setup on_insert callback to trigger download on remote insert if let DownloadMode::Always = download_mode { - let doc2 = doc.clone(); + let doc_clone = doc.clone(); doc.replica.on_insert(Box::new(move |origin, entry| { if matches!(origin, InsertOrigin::Sync) { - doc2.download_content_fron_author(&entry); + doc_clone.download_content_fron_author(&entry); } })); } + + // Collect metrics + doc.replica.on_insert(Box::new(move |origin, entry| { + let size = entry.entry().record().content_len(); + match origin { + InsertOrigin::Local => { + inc!(Metrics, new_entries_local); + inc_by!(Metrics, new_entries_local_size, size); + } + InsertOrigin::Sync => { + inc!(Metrics, new_entries_remote); + inc_by!(Metrics, new_entries_remote_size, size); + } + } + })); + doc } @@ -407,7 +429,22 @@ impl DownloadActor { { let conn = self.conns.get(&peer).unwrap().clone(); let blobs = self.db.clone(); - let fut = async move { (peer, hash, blobs.download_single(conn, hash).await) }; + let fut = async move { + let start = Instant::now(); + let res = blobs.download_single(conn, hash).await; + // record metrics + let elapsed = start.elapsed().as_millis(); + match &res { + Ok(Some((_hash, len))) => { + inc!(Metrics, downloads_success); + inc_by!(Metrics, download_bytes_total, *len); + inc_by!(Metrics, download_time_total, elapsed as u64); + } + Ok(None) => inc!(Metrics, downloads_notfound), + Err(_) => inc!(Metrics, downloads_error), + } + (peer, hash, res) + }; self.pending_downloads.push(fut.boxed_local()); } else { self.conns.remove(&peer); diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index c2c961e660..2d68c96e44 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -11,12 +11,15 @@ use iroh_gossip::{ net::{Event, GossipHandle}, proto::TopicId, }; +use iroh_metrics::inc; use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::sync::{InsertOrigin, Replica, SignedEntry}; use serde::{Deserialize, Serialize}; use tokio::{sync::mpsc, task::JoinError}; use tracing::{debug, error}; +use super::metrics::Metrics; + const CHANNEL_CAP: usize = 8; /// The address to connect to a peer @@ -140,7 +143,10 @@ impl Actor { match msg { // received shutdown signal, or livesync handle was dropped: // break loop and exit - Some(ToActor::Shutdown) | None => break, + Some(ToActor::Shutdown) | None => { + self.on_shutdown().await?; + break; + } Some(ToActor::SyncDoc { doc, initial_peers }) => self.insert_doc(doc, initial_peers).await?, } } @@ -192,6 +198,11 @@ impl Actor { // TODO: Make sure that the peer is dialable. let res = connect_and_sync(&endpoint, &doc, peer, None, &[]).await; debug!("> synced with {peer}: {res:?}"); + // collect metrics + match &res { + Ok(_) => inc!(Metrics, initial_sync_success), + Err(_) => inc!(Metrics, initial_sync_failed), + } (topic, peer, res) } .boxed() @@ -199,6 +210,14 @@ impl Actor { self.pending_syncs.push(task); } + async fn on_shutdown(&mut self) -> anyhow::Result<()> { + for (topic, _doc) in self.docs.drain() { + // TODO: Remove the on_insert callbacks + self.gossip.quit(topic).await?; + } + Ok(()) + } + async fn insert_doc(&mut self, doc: Replica, initial_peers: Vec) -> Result<()> { let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id).collect(); diff --git a/iroh/src/sync/metrics.rs b/iroh/src/sync/metrics.rs new file mode 100644 index 0000000000..257f2afa07 --- /dev/null +++ b/iroh/src/sync/metrics.rs @@ -0,0 +1,45 @@ +use iroh_metrics::{ + core::{Counter, Metric}, + struct_iterable::Iterable, +}; + +/// Metrics for iroh-sync +#[allow(missing_docs)] +#[derive(Debug, Clone, Iterable)] +pub struct Metrics { + pub new_entries_local: Counter, + pub new_entries_remote: Counter, + pub new_entries_local_size: Counter, + pub new_entries_remote_size: Counter, + pub download_bytes_total: Counter, + pub download_time_total: Counter, + pub downloads_success: Counter, + pub downloads_error: Counter, + pub downloads_notfound: Counter, + pub initial_sync_success: Counter, + pub initial_sync_failed: Counter, +} + +impl Default for Metrics { + fn default() -> Self { + Self { + new_entries_local: Counter::new("Number of document entries added locally"), + new_entries_remote: Counter::new("Number of document entries added by peers"), + new_entries_local_size: Counter::new("Total size of entry contents added locally"), + new_entries_remote_size: Counter::new("Total size of entry contents added by peers"), + download_bytes_total: Counter::new("Total number of content bytes downloaded"), + download_time_total: Counter::new("Total time in ms spent downloading content bytes"), + downloads_success: Counter::new("Total number of successfull downloads"), + downloads_error: Counter::new("Total number of downloads failed with error"), + downloads_notfound: Counter::new("Total number of downloads failed with not found"), + initial_sync_success: Counter::new("Number of successfull initial syncs "), + initial_sync_failed: Counter::new("Number of failed initial syncs"), + } + } +} + +impl Metric for Metrics { + fn name() -> &'static str { + "iroh-sync" + } +} From 684b753a1d26b48d7eb33cff9f16b7093cfcd274 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 13:01:45 +0200 Subject: [PATCH 17/45] fix: remove usage of unbounded channels uses flume channels to allow for combined sync and async usage --- iroh-sync/src/sync.rs | 1 + iroh/src/sync/content.rs | 23 ++++++++++------------- iroh/src/sync/live.rs | 13 +++++-------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index baed4e678d..499da06612 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -436,6 +436,7 @@ impl Replica { let bytes = postcard::to_stdvec(&data)?; Ok(bytes.into()) } + pub fn from_bytes(bytes: &[u8]) -> anyhow::Result { let data: ReplicaData = postcard::from_bytes(bytes)?; let replica = Self::new(data.namespace); diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index f5942eb193..54e616c414 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -21,10 +21,7 @@ use iroh_sync::sync::{ Author, InsertOrigin, Namespace, NamespaceId, OnInsertCallback, Replica, ReplicaStore, SignedEntry, }; -use tokio::{ - io::AsyncRead, - sync::{mpsc, oneshot}, -}; +use tokio::{io::AsyncRead, sync::oneshot}; use tokio_stream::StreamExt; use tracing::{debug, error, warn}; @@ -127,7 +124,7 @@ impl Doc { let doc_clone = doc.clone(); doc.replica.on_insert(Box::new(move |origin, entry| { if matches!(origin, InsertOrigin::Sync) { - doc_clone.download_content_fron_author(&entry); + doc_clone.download_content_from_author(&entry); } })); } @@ -182,7 +179,7 @@ impl Doc { Ok((hash, len)) } - pub fn download_content_fron_author(&self, entry: &SignedEntry) { + pub fn download_content_from_author(&self, entry: &SignedEntry) { let hash = *entry.entry().record().content_hash(); let peer_id = PeerId::from_bytes(entry.entry().id().author().as_bytes()) .expect("failed to convert author to peer id"); @@ -286,7 +283,7 @@ pub struct DownloadRequest { #[derive(Debug, Clone)] pub struct Downloader { pending_downloads: Arc>>, - to_actor_tx: mpsc::UnboundedSender, + to_actor_tx: flume::Sender, } impl Downloader { @@ -295,7 +292,7 @@ impl Downloader { endpoint: MagicEndpoint, blobs: WritableFileDatabase, ) -> Self { - let (tx, rx) = mpsc::unbounded_channel(); + let (tx, rx) = flume::bounded(64); // spawn the actor on a local pool // the local pool is required because WritableFileDatabase::download_single // returns a future that is !Send @@ -348,13 +345,13 @@ pub struct DownloadActor { pending_downloads: FuturesUnordered< LocalBoxFuture<'static, (PeerId, Hash, anyhow::Result>)>, >, - rx: mpsc::UnboundedReceiver, + rx: flume::Receiver, } impl DownloadActor { fn new( endpoint: MagicEndpoint, db: WritableFileDatabase, - rx: mpsc::UnboundedReceiver, + rx: flume::Receiver, ) -> Self { Self { rx, @@ -370,9 +367,9 @@ impl DownloadActor { pub async fn run(&mut self) -> anyhow::Result<()> { loop { tokio::select! { - req = self.rx.recv() => match req { - None => return Ok(()), - Some(req) => self.on_download_request(req).await + req = self.rx.recv_async() => match req { + Err(_) => return Ok(()), + Ok(req) => self.on_download_request(req).await }, (peer, conn) = self.dialer.next() => match conn { Ok(conn) => { diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 2d68c96e44..6fa2c71b85 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -102,8 +102,8 @@ struct Actor { sync_state: HashMap<(TopicId, PeerId), SyncState>, to_actor_rx: mpsc::Receiver, - insert_entry_tx: mpsc::UnboundedSender<(TopicId, SignedEntry)>, - insert_entry_rx: mpsc::UnboundedReceiver<(TopicId, SignedEntry)>, + insert_entry_tx: flume::Sender<(TopicId, SignedEntry)>, + insert_entry_rx: flume::Receiver<(TopicId, SignedEntry)>, pending_syncs: FuturesUnordered)>>, pending_joins: FuturesUnordered)>>, @@ -115,10 +115,7 @@ impl Actor { gossip: GossipHandle, to_actor_rx: mpsc::Receiver, ) -> Self { - // TODO: instead of an unbounded channel, we'd want a FIFO ring buffer likely - // (we have to send from the blocking Replica::on_insert callback, so we need a channel - // with nonblocking sending, so either unbounded or ringbuffer like) - let (insert_tx, insert_rx) = mpsc::unbounded_channel(); + let (insert_tx, insert_rx) = flume::bounded(64); let sub = gossip.clone().subscribe_all().boxed(); Self { @@ -157,8 +154,8 @@ impl Actor { error!("Failed to process gossip event: {err:?}"); } }, - entry = self.insert_entry_rx.recv() => { - let (topic, entry) = entry.ok_or_else(|| anyhow!("insert_rx returned None"))?; + entry = self.insert_entry_rx.recv_async() => { + let (topic, entry) = entry?; self.on_insert_entry(topic, entry).await?; } Some((topic, peer, res)) = self.pending_syncs.next() => { From 9b9042ec8ae23a6dc93056fff169286510a27e5e Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 14:20:56 +0200 Subject: [PATCH 18/45] add todo --- iroh/src/sync/content.rs | 1 + iroh/src/sync/live.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 54e616c414..197e9c2d2f 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -328,6 +328,7 @@ impl Downloader { .lock() .unwrap() .insert(hash, fut.boxed().shared()); + // TODO: this is potentially blocking inside an async call. figure out a better solution if let Err(err) = self.to_actor_tx.send(req) { warn!("download actor dropped: {err}"); } diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 6fa2c71b85..8be595b5f1 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -242,6 +242,7 @@ impl Actor { doc.on_insert(Box::new(move |origin, entry| { // only care for local inserts, otherwise we'd do endless gossip loops if let InsertOrigin::Local = origin { + // TODO: this is potentially blocking inside an async call. figure out a better solution insert_entry_tx.send((topic, entry)).ok(); } })); From b2669e063b5b4bf3350282053b374810c7115187 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 14:08:59 +0200 Subject: [PATCH 19/45] sync: implement more extensive fetch methods --- iroh-sync/src/sync.rs | 244 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 225 insertions(+), 19 deletions(-) diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 499da06612..186ea38537 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -500,7 +500,11 @@ impl Replica { } /// Gets all entries matching this key and author. - pub fn get_latest(&self, key: impl AsRef<[u8]>, author: &AuthorId) -> Option { + pub fn get_latest_by_key_and_author( + &self, + key: impl AsRef<[u8]>, + author: &AuthorId, + ) -> Option { let inner = self.inner.read(); inner .peer @@ -508,19 +512,83 @@ impl Replica { .cloned() } - /// Returns all versions of the matching documents. - pub fn get_all<'a, 'b: 'a>( + /// Returns the latest version of the matching documents by key. + pub fn get_latest_by_key(&self, key: impl AsRef<[u8]>) -> GetLatestIter<'_> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let key = key.as_ref().to_vec(); + let namespace = *guard.namespace.id(); + let filter = GetFilter::Key { namespace, key }; + + GetLatestIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + filter, + index: 0, + } + } + + /// Returns the latest versions of all documents. + pub fn get_latest(&self) -> GetLatestIter<'_> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let namespace = *guard.namespace.id(); + let filter = GetFilter::All { namespace }; + + GetLatestIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + filter, + index: 0, + } + } + + /// Returns all versions of the matching documents by author. + pub fn get_all_by_key_and_author<'a, 'b: 'a>( &'a self, key: impl AsRef<[u8]> + 'b, author: &AuthorId, ) -> GetAllIter<'a> { let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); let record_id = RecordIdentifier::new(key, guard.namespace.id(), author); + let filter = GetFilter::KeyAuthor(record_id); + GetAllIter { records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { &inner.peer.store().records }), - record_id, + filter, + index: 0, + } + } + + /// Returns all versions of the matching documents by key. + pub fn get_all_by_key(&self, key: impl AsRef<[u8]>) -> GetAllIter<'_> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let key = key.as_ref().to_vec(); + let namespace = *guard.namespace.id(); + let filter = GetFilter::Key { namespace, key }; + + GetAllIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + filter, + index: 0, + } + } + + /// Returns all versions of all documents. + pub fn get_all(&self) -> GetAllIter<'_> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let namespace = *guard.namespace.id(); + let filter = GetFilter::All { namespace }; + + GetAllIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + filter, index: 0, } } @@ -553,30 +621,114 @@ impl Replica { } #[derive(Debug)] -pub struct GetAllIter<'a> { +pub enum GetFilter { + /// All entries. + All { namespace: NamespaceId }, + /// Filter by key and author. + KeyAuthor(RecordIdentifier), + /// Filter by key only. + Key { + namespace: NamespaceId, + key: Vec, + }, +} + +#[derive(Debug)] +pub struct GetLatestIter<'a> { // Oh my god, rust why u do this to me? records: parking_lot::lock_api::MappedRwLockReadGuard< 'a, parking_lot::RawRwLock, BTreeMap>, >, - record_id: RecordIdentifier, + filter: GetFilter, /// Current iteration index. index: usize, } -impl<'a> Iterator for GetAllIter<'a> { +impl<'a> Iterator for GetLatestIter<'a> { type Item = SignedEntry; fn next(&mut self) -> Option { - let values = self.records.get(&self.record_id)?; - - let (_, res) = values.iter().nth(self.index)?; + let res = match self.filter { + GetFilter::All { namespace } => { + let (_, res) = self + .records + .iter() + .filter(|(k, _)| k.namespace() == &namespace) + .filter_map(|(_key, value)| value.last_key_value()) + .nth(self.index)?; + res + } + GetFilter::KeyAuthor(ref record_id) => { + let values = self.records.get(record_id)?; + let (_, res) = values.iter().nth(self.index)?; + res + } + GetFilter::Key { namespace, ref key } => { + let (_, res) = self + .records + .iter() + .filter(|(k, _)| k.key() == key && k.namespace() == &namespace) + .filter_map(|(_key, value)| value.last_key_value()) + .nth(self.index)?; + res + } + }; self.index += 1; Some(res.clone()) // :( I give up } } +#[derive(Debug)] +pub struct GetAllIter<'a> { + // Oh my god, rust why u do this to me? + records: parking_lot::lock_api::MappedRwLockReadGuard< + 'a, + parking_lot::RawRwLock, + BTreeMap>, + >, + filter: GetFilter, + /// Current iteration index. + index: usize, +} + +impl<'a> Iterator for GetAllIter<'a> { + type Item = (RecordIdentifier, u64, SignedEntry); + + fn next(&mut self) -> Option { + let res = match self.filter { + GetFilter::All { namespace } => self + .records + .iter() + .filter(|(k, _)| k.namespace() == &namespace) + .flat_map(|(key, value)| { + value + .iter() + .map(|(t, value)| (key.clone(), *t, value.clone())) + }) + .nth(self.index)?, + GetFilter::KeyAuthor(ref record_id) => { + let values = self.records.get(record_id)?; + let (t, value) = values.iter().nth(self.index)?; + (record_id.clone(), *t, value.clone()) + } + GetFilter::Key { namespace, ref key } => self + .records + .iter() + .filter(|(k, _)| k.key() == key && k.namespace() == &namespace) + .flat_map(|(key, value)| { + value + .iter() + .map(|(t, value)| (key.clone(), *t, value.clone())) + }) + .nth(self.index)?, + }; + self.index += 1; + Some(res) + } +} + /// A signed entry. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SignedEntry { @@ -832,6 +984,7 @@ mod tests { fn test_basics() { let mut rng = rand::thread_rng(); let alice = Author::new(&mut rng); + let bob = Author::new(&mut rng); let myspace = Namespace::new(&mut rng); let record_id = RecordIdentifier::new("/my/key", myspace.id(), alice.id()); @@ -846,7 +999,9 @@ mod tests { } for i in 0..10 { - let res = my_replica.get_latest(format!("/{i}"), alice.id()).unwrap(); + let res = my_replica + .get_latest_by_key_and_author(format!("/{i}"), alice.id()) + .unwrap(); let len = format!("{i}: hello from alice").as_bytes().len() as u64; assert_eq!(res.entry().record().content_len(), len); res.verify().expect("invalid signature"); @@ -854,15 +1009,66 @@ mod tests { // Test multiple records for the same key my_replica.hash_and_insert("/cool/path", &alice, "round 1"); - let _entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); - + let _entry = my_replica + .get_latest_by_key_and_author("/cool/path", alice.id()) + .unwrap(); // Second my_replica.hash_and_insert("/cool/path", &alice, "round 2"); - let _entry = my_replica.get_latest("/cool/path", alice.id()).unwrap(); + let _entry = my_replica + .get_latest_by_key_and_author("/cool/path", alice.id()) + .unwrap(); + + // Get All by author + let entries: Vec<_> = my_replica + .get_all_by_key_and_author("/cool/path", alice.id()) + .collect(); + assert_eq!(entries.len(), 2); + + // Get All by key + let entries: Vec<_> = my_replica.get_all_by_key(b"/cool/path").collect(); + assert_eq!(entries.len(), 2); + + // Get latest by key + let entries: Vec<_> = my_replica.get_latest_by_key(b"/cool/path").collect(); + assert_eq!(entries.len(), 1); // Get All - let entries: Vec<_> = my_replica.get_all("/cool/path", alice.id()).collect(); + let entries: Vec<_> = my_replica.get_all().collect(); + assert_eq!(entries.len(), 12); + + // Get All latest + let entries: Vec<_> = my_replica.get_latest().collect(); + assert_eq!(entries.len(), 11); + + // insert record from different author + let _entry = my_replica.hash_and_insert("/cool/path", &bob, "bob round 1"); + + // Get All by author + let entries: Vec<_> = my_replica + .get_all_by_key_and_author("/cool/path", alice.id()) + .collect(); assert_eq!(entries.len(), 2); + + let entries: Vec<_> = my_replica + .get_all_by_key_and_author("/cool/path", bob.id()) + .collect(); + assert_eq!(entries.len(), 1); + + // Get All by key + let entries: Vec<_> = my_replica.get_all_by_key(b"/cool/path").collect(); + assert_eq!(entries.len(), 3); + + // Get latest by key + let entries: Vec<_> = my_replica.get_latest_by_key(b"/cool/path").collect(); + assert_eq!(entries.len(), 2); + + // Get All + let entries: Vec<_> = my_replica.get_all().collect(); + assert_eq!(entries.len(), 13); + + // Get All latest + let entries: Vec<_> = my_replica.get_latest().collect(); + assert_eq!(entries.len(), 12); } #[test] @@ -968,13 +1174,13 @@ mod tests { // Check result for el in alice_set { - alice.get_latest(el, author.id()).unwrap(); - bob.get_latest(el, author.id()).unwrap(); + alice.get_latest_by_key_and_author(el, author.id()).unwrap(); + bob.get_latest_by_key_and_author(el, author.id()).unwrap(); } for el in bob_set { - alice.get_latest(el, author.id()).unwrap(); - bob.get_latest(el, author.id()).unwrap(); + alice.get_latest_by_key_and_author(el, author.id()).unwrap(); + bob.get_latest_by_key_and_author(el, author.id()).unwrap(); } } } From efb540546dbfe00b20a070cd2fdf65d1a441d0f8 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 14:19:09 +0200 Subject: [PATCH 20/45] add prefix methods --- iroh-sync/src/sync.rs | 82 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 78 insertions(+), 4 deletions(-) diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 186ea38537..2faf711ec5 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -528,6 +528,22 @@ impl Replica { } } + /// Returns the latest version of the matching documents by prefix. + pub fn get_latest_by_prefix(&self, prefix: impl AsRef<[u8]>) -> GetLatestIter<'_> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let prefix = prefix.as_ref().to_vec(); + let namespace = *guard.namespace.id(); + let filter = GetFilter::Prefix { namespace, prefix }; + + GetLatestIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + filter, + index: 0, + } + } + /// Returns the latest versions of all documents. pub fn get_latest(&self) -> GetLatestIter<'_> { let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); @@ -578,6 +594,22 @@ impl Replica { } } + /// Returns all versions of the matching documents by prefix. + pub fn get_all_by_prefix(&self, prefix: impl AsRef<[u8]>) -> GetAllIter<'_> { + let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); + let prefix = prefix.as_ref().to_vec(); + let namespace = *guard.namespace.id(); + let filter = GetFilter::Prefix { namespace, prefix }; + + GetAllIter { + records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { + &inner.peer.store().records + }), + filter, + index: 0, + } + } + /// Returns all versions of all documents. pub fn get_all(&self) -> GetAllIter<'_> { let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); @@ -631,6 +663,11 @@ pub enum GetFilter { namespace: NamespaceId, key: Vec, }, + /// Filter by prefix only. + Prefix { + namespace: NamespaceId, + prefix: Vec, + }, } #[derive(Debug)] @@ -658,12 +695,12 @@ impl<'a> Iterator for GetLatestIter<'a> { .filter(|(k, _)| k.namespace() == &namespace) .filter_map(|(_key, value)| value.last_key_value()) .nth(self.index)?; - res + res.clone() } GetFilter::KeyAuthor(ref record_id) => { let values = self.records.get(record_id)?; let (_, res) = values.iter().nth(self.index)?; - res + res.clone() } GetFilter::Key { namespace, ref key } => { let (_, res) = self @@ -672,11 +709,23 @@ impl<'a> Iterator for GetLatestIter<'a> { .filter(|(k, _)| k.key() == key && k.namespace() == &namespace) .filter_map(|(_key, value)| value.last_key_value()) .nth(self.index)?; - res + res.clone() + } + GetFilter::Prefix { + namespace, + ref prefix, + } => { + let (_, res) = self + .records + .iter() + .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == &namespace) + .filter_map(|(_key, value)| value.last_key_value()) + .nth(self.index)?; + res.clone() } }; self.index += 1; - Some(res.clone()) // :( I give up + Some(res) } } @@ -723,6 +772,19 @@ impl<'a> Iterator for GetAllIter<'a> { .map(|(t, value)| (key.clone(), *t, value.clone())) }) .nth(self.index)?, + GetFilter::Prefix { + namespace, + ref prefix, + } => self + .records + .iter() + .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == &namespace) + .flat_map(|(key, value)| { + value + .iter() + .map(|(t, value)| (key.clone(), *t, value.clone())) + }) + .nth(self.index)?, }; self.index += 1; Some(res) @@ -1032,6 +1094,10 @@ mod tests { let entries: Vec<_> = my_replica.get_latest_by_key(b"/cool/path").collect(); assert_eq!(entries.len(), 1); + // Get latest by prefix + let entries: Vec<_> = my_replica.get_latest_by_prefix(b"/cool").collect(); + assert_eq!(entries.len(), 1); + // Get All let entries: Vec<_> = my_replica.get_all().collect(); assert_eq!(entries.len(), 12); @@ -1062,6 +1128,14 @@ mod tests { let entries: Vec<_> = my_replica.get_latest_by_key(b"/cool/path").collect(); assert_eq!(entries.len(), 2); + // Get latest by prefix + let entries: Vec<_> = my_replica.get_latest_by_prefix(b"/cool").collect(); + assert_eq!(entries.len(), 2); + + // Get all by prefix + let entries: Vec<_> = my_replica.get_all_by_prefix(b"/cool").collect(); + assert_eq!(entries.len(), 3); + // Get All let entries: Vec<_> = my_replica.get_all().collect(); assert_eq!(entries.len(), 13); From 83ecf568f746586f46d9117641eeb786a35efbe4 Mon Sep 17 00:00:00 2001 From: Asmir Avdicevic Date: Thu, 27 Jul 2023 14:32:18 +0200 Subject: [PATCH 21/45] feat: enable metrics server on sync (#1308) --- iroh/examples/sync.rs | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index ec780768e2..cf8d18fdff 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -6,8 +6,7 @@ //! You can use this with a local DERP server. To do so, run //! `cargo run --bin derper -- --dev` //! and then set the `-d http://localhost:3340` flag on this example. - -use std::{fmt, path::PathBuf, str::FromStr, sync::Arc}; +use std::{fmt, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc}; use anyhow::bail; use clap::{CommandFactory, FromArgMatches, Parser}; @@ -60,6 +59,9 @@ struct Args { /// Set the bind port for our socket. By default, a random port will be used. #[clap(short, long, default_value = "0")] bind_port: u16, + /// Bind address on which to serve Prometheus metrics + #[clap(long)] + metrics_addr: Option, #[clap(subcommand)] command: Command, } @@ -76,16 +78,32 @@ async fn main() -> anyhow::Result<()> { run(args).await } -async fn run(args: Args) -> anyhow::Result<()> { - // setup logging - let log_filter = init_logging(); - - // init metrics +pub fn init_metrics_collection( + metrics_addr: Option, +) -> Option> { iroh_metrics::core::Core::init(|reg, metrics| { metrics.insert(iroh::sync::metrics::Metrics::new(reg)); metrics.insert(iroh_gossip::metrics::Metrics::new(reg)); }); + // doesn't start the server if the address is None + if let Some(metrics_addr) = metrics_addr { + return Some(tokio::spawn(async move { + if let Err(e) = iroh_metrics::metrics::start_metrics_server(metrics_addr).await { + eprintln!("Failed to start metrics server: {e}"); + } + })); + } + tracing::info!("Metrics server not started, no address provided"); + None +} + +async fn run(args: Args) -> anyhow::Result<()> { + // setup logging + let log_filter = init_logging(); + + let metrics_fut = init_metrics_collection(args.metrics_addr); + // parse or generate our keypair let keypair = match args.private_key { None => Keypair::generate(), @@ -267,6 +285,11 @@ async fn run(args: Args) -> anyhow::Result<()> { blobs.save().await?; docs.save(&doc).await?; + if let Some(metrics_fut) = metrics_fut { + metrics_fut.abort(); + drop(metrics_fut); + } + Ok(()) } From 6a4f83b5087118a4996ac9947e6ba77716d7bb4f Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 14:35:00 +0200 Subject: [PATCH 22/45] chore: update deny.toml --- deny.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/deny.toml b/deny.toml index 1488465e15..517d074102 100644 --- a/deny.toml +++ b/deny.toml @@ -6,6 +6,7 @@ allow = [ "Apache-2.0", "BSD-2-Clause", "BSD-3-Clause", + "BSL-1.0", # BOSL license "ISC", "MIT", "OpenSSL", From 9d95057b18d5535e604c99682017657a5b6ee722 Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 14:41:41 +0200 Subject: [PATCH 23/45] fix clippy and feature selection --- iroh/Cargo.toml | 11 ++++++++--- iroh/examples/sync.rs | 2 +- iroh/src/lib.rs | 1 + iroh/src/sync/content.rs | 7 ++++--- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 397f9f15c6..411af9dcdd 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -26,7 +26,7 @@ iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } iroh-metrics = { version = "0.5.0", path = "../iroh-metrics", optional = true } num_cpus = { version = "1.15.0" } portable-atomic = "1" -iroh-sync = { path = "../iroh-sync" } +iroh-sync = { path = "../iroh-sync" } iroh-gossip = { path = "../iroh-gossip" } postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.6", default-features = false, features = ["flume-transport"] } @@ -61,14 +61,15 @@ shellexpand = { version = "3.1.0", optional = true } rustyline = { version = "12.0.0", optional = true } [features] -default = ["cli", "metrics"] +default = ["cli", "metrics", "sync"] +sync = ["metrics", "flat-db"] cli = ["clap", "config", "console", "dirs-next", "indicatif", "multibase", "quic-rpc/quinn-transport", "tempfile", "tokio/rt-multi-thread", "tracing-subscriber"] metrics = ["iroh-metrics", "flat-db", "mem-db", "iroh-collection"] flat-db = [] mem-db = [] iroh-collection = [] test = [] -example-sync = ["cli", "ed25519-dalek", "once_cell", "shell-words", "shellexpand", "rustyline"] +example-sync = ["cli", "ed25519-dalek", "once_cell", "shell-words", "shellexpand", "sync", "rustyline"] [dev-dependencies] anyhow = { version = "1", features = ["backtrace"] } @@ -93,6 +94,10 @@ required-features = ["cli"] name = "collection" required-features = ["mem-db", "iroh-collection"] +[[example]] +name = "dump-blob-stream" +required-features = ["mem-db", "iroh-collection"] + [[example]] name = "hello-world" required-features = ["mem-db"] diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index cf8d18fdff..058e796995 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -348,7 +348,7 @@ async fn handle_command( Ok(()) } -#[derive(Parser)] +#[derive(Parser, Debug)] pub enum Cmd { /// Set an entry Set { diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 0c69f4e2a8..9eb2a54e19 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -11,6 +11,7 @@ pub mod dial; pub mod node; pub mod rpc_protocol; #[allow(missing_docs)] +#[cfg(feature = "sync")] pub mod sync; pub mod util; diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 197e9c2d2f..39bf4a793c 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -335,6 +335,9 @@ impl Downloader { } } +type PendingDownloadsFutures = + FuturesUnordered>)>>; + #[derive(Debug)] pub struct DownloadActor { dialer: Dialer, @@ -343,9 +346,7 @@ pub struct DownloadActor { replies: HashMap>, peer_hashes: HashMap>, hash_peers: HashMap>, - pending_downloads: FuturesUnordered< - LocalBoxFuture<'static, (PeerId, Hash, anyhow::Result>)>, - >, + pending_downloads: PendingDownloadsFutures, rx: flume::Receiver, } impl DownloadActor { From de430b4b8cf41488c8acd7254d2a0a314de4aa2c Mon Sep 17 00:00:00 2001 From: dignifiedquire Date: Thu, 27 Jul 2023 14:46:28 +0200 Subject: [PATCH 24/45] doc: more docs fixes --- iroh-sync/src/ranger.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index ea80f49f9e..e12db74498 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -575,7 +575,7 @@ where } } -/// Sadly https://doc.rust-lang.org/std/primitive.usize.html#method.div_ceil is still unstable.. +/// Sadly is still unstable.. fn div_ceil(a: usize, b: usize) -> usize { debug_assert!(a != 0); debug_assert!(b != 0); From 4f6ab6521420b6952c4fe74dd8a7f160648c0199 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Thu, 27 Jul 2023 14:52:55 +0200 Subject: [PATCH 25/45] feat: file system export/import in sync repl --- iroh/examples/sync.rs | 173 ++++++++++++++++++++++++++++++++++++++- iroh/src/sync/content.rs | 11 ++- 2 files changed, 179 insertions(+), 5 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 058e796995..7e76a0817b 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -6,9 +6,10 @@ //! You can use this with a local DERP server. To do so, run //! `cargo run --bin derper -- --dev` //! and then set the `-d http://localhost:3340` flag on this example. -use std::{fmt, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc}; -use anyhow::bail; +use std::{collections::HashSet, fmt, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc}; + +use anyhow::{anyhow, bail}; use clap::{CommandFactory, FromArgMatches, Parser}; use ed25519_dalek::SigningKey; use indicatif::HumanBytes; @@ -31,7 +32,11 @@ use iroh_net::{ use iroh_sync::sync::{Author, Namespace, SignedEntry}; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; -use tokio::sync::{mpsc, oneshot}; +use tokio::{ + io::AsyncWriteExt, + sync::{mpsc, oneshot}, +}; +use tracing::warn; use tracing_subscriber::{EnvFilter, Registry}; use url::Url; @@ -343,11 +348,106 @@ async fn handle_command( log_filter.modify(|layer| *layer = next_filter)?; } Cmd::Stats => get_stats(), + Cmd::Fs(cmd) => handle_fs_command(cmd, doc).await?, Cmd::Exit => {} } Ok(()) } +async fn handle_fs_command(cmd: FsCmd, doc: &Doc) -> anyhow::Result<()> { + match cmd { + FsCmd::ImportFile { file_path, key } => { + let file_path = canonicalize_path(&file_path)?.canonicalize()?; + let (hash, len) = doc.insert_from_file(&key, &file_path).await?; + println!( + "> imported {file_path:?}: {} ({})", + fmt_hash(hash), + HumanBytes(len) + ); + } + FsCmd::ImportDir { + dir_path, + mut key_prefix, + } => { + if key_prefix.ends_with("/") { + key_prefix.pop(); + } + let root = canonicalize_path(&dir_path)?.canonicalize()?; + let files = walkdir::WalkDir::new(&root).into_iter(); + // TODO: parallelize + for file in files { + let file = file?; + if file.file_type().is_file() { + let relative = file.path().strip_prefix(&root)?.to_string_lossy(); + if relative.is_empty() { + warn!("invalid file path: {:?}", file.path()); + continue; + } + let key = format!("{key_prefix}/{relative}"); + let (hash, len) = doc.insert_from_file(key, file.path()).await?; + println!( + "> imported {relative}: {} ({})", + fmt_hash(hash), + HumanBytes(len) + ); + } + } + } + FsCmd::ExportDir { + mut key_prefix, + dir_path, + } => { + if !key_prefix.ends_with("/") { + key_prefix.push('/'); + } + let root = canonicalize_path(&dir_path)?; + println!("> exporting {key_prefix} to {root:?}"); + let entries = doc.replica().get_latest_by_prefix(key_prefix.as_bytes()); + let mut checked_dirs = HashSet::new(); + for entry in entries { + let key = entry.entry().id().key(); + let relative = String::from_utf8(key[key_prefix.len()..].to_vec())?; + let len = entry.entry().record().content_len(); + if let Some(mut reader) = doc.get_content_reader(&entry).await { + let path = root.join(&relative); + let parent = path.parent().unwrap(); + if !checked_dirs.contains(parent) { + tokio::fs::create_dir_all(&parent).await?; + checked_dirs.insert(parent.to_owned()); + } + let mut file = tokio::fs::File::create(&path).await?; + copy(&mut reader, &mut file).await?; + println!( + "> exported {} to {path:?} ({})", + fmt_hash(entry.content_hash()), + HumanBytes(len) + ); + } + } + } + FsCmd::ExportFile { key, file_path } => { + let path = canonicalize_path(&file_path)?; + // TODO: Fix + let entry = doc.replica().get_latest_by_key(&key).next(); + if let Some(entry) = entry { + println!("> exporting {key} to {path:?}"); + let parent = path.parent().ok_or_else(|| anyhow!("Invalid path"))?; + tokio::fs::create_dir_all(&parent).await?; + let mut file = tokio::fs::File::create(&path).await?; + let mut reader = doc + .get_content_reader(&entry) + .await + .ok_or_else(|| anyhow!(format!("content for {key} is not available")))?; + copy(&mut reader, &mut file).await?; + } else { + println!("> key not found, abort"); + } + } + } + + Ok(()) +} + #[derive(Parser, Debug)] pub enum Cmd { /// Set an entry @@ -367,11 +467,16 @@ pub enum Cmd { #[clap(short = 'c', long)] print_content: bool, }, - /// List entries + /// List entries. Ls { /// Optionally list only entries whose key starts with PREFIX. prefix: Option, }, + + /// Import from and export to the local file system. + #[clap(subcommand)] + Fs(FsCmd), + /// Print the ticket with which other peers can join our document. Ticket, /// Change the log level @@ -400,6 +505,39 @@ pub enum Cmd { /// Quit Exit, } + +#[derive(Parser, Debug)] +pub enum FsCmd { + /// Import a file system directory into the document. + ImportDir { + /// The file system path to import recursively + dir_path: String, + /// The key prefix to apply to the document keys + key_prefix: String, + }, + /// Import a file into the document. + ImportFile { + /// The path to the file + file_path: String, + /// The key in the document + key: String, + }, + /// Export a part of the document into a file system directory + ExportDir { + /// The key prefix to filter on + key_prefix: String, + /// The file system path to export to + dir_path: String, + }, + /// Import a file into the document. + ExportFile { + /// The key in the document + key: String, + /// The path to the file + file_path: String, + }, +} + impl FromStr for Cmd { type Err = anyhow::Error; fn from_str(s: &str) -> Result { @@ -622,6 +760,33 @@ fn derp_map_from_url(url: Url) -> anyhow::Result { )) } +fn canonicalize_path(path: &str) -> anyhow::Result { + let path = PathBuf::from(shellexpand::tilde(&path).to_string()); + Ok(path) +} + +/// Copy from a [`iroh_io::AsyncSliceReader`] into a [`tokio::io::AsyncWrite`] +/// +/// TODO: move to iroh-io or iroh-bytes +async fn copy( + mut reader: impl iroh_io::AsyncSliceReader, + mut writer: impl tokio::io::AsyncWrite + Unpin, +) -> anyhow::Result<()> { + // this is the max chunk size. + // will only allocate this much if the resource behind the reader is at least this big. + let chunk_size = 1024 * 16; + let mut pos = 0u64; + loop { + let chunk = reader.read_at(pos, chunk_size).await?; + if chunk.is_empty() { + break; + } + writer.write_all(&chunk).await?; + pos += chunk.len() as u64; + } + Ok(()) +} + /// handlers for iroh_bytes connections mod iroh_bytes_handlers { use std::sync::Arc; diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 39bf4a793c..c68d06272c 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -1,7 +1,7 @@ use std::{ collections::{HashMap, HashSet, VecDeque}, io, - path::PathBuf, + path::{Path, PathBuf}, sync::{Arc, Mutex}, time::Instant, }; @@ -179,6 +179,15 @@ impl Doc { Ok((hash, len)) } + pub async fn insert_from_file( + &self, + key: impl AsRef<[u8]>, + file_path: impl AsRef, + ) -> anyhow::Result<(Hash, u64)> { + let reader = tokio::fs::File::open(&file_path).await?; + self.insert_reader(&key, reader).await + } + pub fn download_content_from_author(&self, entry: &SignedEntry) { let hash = *entry.entry().record().content_hash(); let peer_id = PeerId::from_bytes(entry.entry().id().author().as_bytes()) From f0dc252c446973f53164e61aa3b5d03c3ccd824b Mon Sep 17 00:00:00 2001 From: Asmir Avdicevic Date: Thu, 27 Jul 2023 15:04:13 +0200 Subject: [PATCH 26/45] feat: hammer sync example --- iroh/examples/sync.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 7e76a0817b..93acabd2cd 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -39,7 +39,6 @@ use tokio::{ use tracing::warn; use tracing_subscriber::{EnvFilter, Registry}; use url::Url; - use iroh_bytes_handlers::IrohBytesHandlers; const MAX_DISPLAY_CONTENT_LEN: u64 = 1024 * 1024; @@ -349,6 +348,21 @@ async fn handle_command( } Cmd::Stats => get_stats(), Cmd::Fs(cmd) => handle_fs_command(cmd, doc).await?, + Cmd::Hammer { prefix, count, size}=> { + println!( + "> hammering with prefix {prefix} for {count} messages of size {size} bytes", + prefix = prefix, + count = count, + size = size, + ); + let mut bytes = vec![0; size]; + bytes.fill(97); + for i in 0..count { + let value = String::from_utf8(bytes.clone())?; + let key = format!("{}/{}", prefix, i); + doc.insert_bytes(key, value.into_bytes().into()).await?; + } + } Cmd::Exit => {} } Ok(()) @@ -502,6 +516,15 @@ pub enum Cmd { WatchCancel, /// Show stats about the current session Stats, + /// Stress test with the hammer + Hammer { + /// The key prefix + prefix: String, + /// The number of entries to create + count: usize, + /// The size of each entry in Bytes + size: usize, + }, /// Quit Exit, } From 4c4c31c862d8f12a541632e5b9cdaba44d0bbc12 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Thu, 27 Jul 2023 15:17:26 +0200 Subject: [PATCH 27/45] chore: fmt --- iroh/examples/sync.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 93acabd2cd..1dff08cd7a 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -14,6 +14,7 @@ use clap::{CommandFactory, FromArgMatches, Parser}; use ed25519_dalek::SigningKey; use indicatif::HumanBytes; use iroh::sync::{BlobStore, Doc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; +use iroh_bytes_handlers::IrohBytesHandlers; use iroh_gossip::{ net::{GossipHandle, GOSSIP_ALPN}, proto::TopicId, @@ -39,7 +40,6 @@ use tokio::{ use tracing::warn; use tracing_subscriber::{EnvFilter, Registry}; use url::Url; -use iroh_bytes_handlers::IrohBytesHandlers; const MAX_DISPLAY_CONTENT_LEN: u64 = 1024 * 1024; @@ -348,7 +348,11 @@ async fn handle_command( } Cmd::Stats => get_stats(), Cmd::Fs(cmd) => handle_fs_command(cmd, doc).await?, - Cmd::Hammer { prefix, count, size}=> { + Cmd::Hammer { + prefix, + count, + size, + } => { println!( "> hammering with prefix {prefix} for {count} messages of size {size} bytes", prefix = prefix, From 21770428f4dc60c30868a3b78aef42961f9d4cf3 Mon Sep 17 00:00:00 2001 From: Asmir Avdicevic Date: Thu, 27 Jul 2023 15:58:28 +0200 Subject: [PATCH 28/45] allow specifying number of threads and print out some basic stats --- iroh/examples/sync.rs | 40 ++++++++++++++++++++++++++++++++++------ 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 1dff08cd7a..e20c92eb12 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -14,7 +14,6 @@ use clap::{CommandFactory, FromArgMatches, Parser}; use ed25519_dalek::SigningKey; use indicatif::HumanBytes; use iroh::sync::{BlobStore, Doc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; -use iroh_bytes_handlers::IrohBytesHandlers; use iroh_gossip::{ net::{GossipHandle, GOSSIP_ALPN}, proto::TopicId, @@ -41,6 +40,8 @@ use tracing::warn; use tracing_subscriber::{EnvFilter, Registry}; use url::Url; +use iroh_bytes_handlers::IrohBytesHandlers; + const MAX_DISPLAY_CONTENT_LEN: u64 = 1024 * 1024; #[derive(Parser, Debug)] @@ -350,22 +351,47 @@ async fn handle_command( Cmd::Fs(cmd) => handle_fs_command(cmd, doc).await?, Cmd::Hammer { prefix, + threads, count, size, } => { println!( - "> hammering with prefix {prefix} for {count} messages of size {size} bytes", + "> Hammering with prefix {prefix} for {threads} x {count} messages of size {size} bytes", prefix = prefix, + threads = threads, count = count, size = size, ); let mut bytes = vec![0; size]; bytes.fill(97); - for i in 0..count { - let value = String::from_utf8(bytes.clone())?; - let key = format!("{}/{}", prefix, i); - doc.insert_bytes(key, value.into_bytes().into()).await?; + let mut handles = Vec::new(); + let start = std::time::Instant::now(); + for t in 0..threads { + let p = prefix.clone(); + let t_doc = doc.clone(); + let b = bytes.clone(); + let h = tokio::spawn(async move { + for i in 0..count { + let value = String::from_utf8(b.clone()).unwrap(); + let key = format!("{}/{}/{}", p, t, i); + t_doc + .insert_bytes(key, value.into_bytes().into()) + .await + .unwrap(); + } + }); + handles.push(h); } + + let _result = futures::future::join_all(handles).await; + + let diff = start.elapsed().as_secs_f64(); + println!( + "> Hammering done in {:.2}s for {} messages with total of {} bytes", + diff, + threads * count, + threads * count * size + ); } Cmd::Exit => {} } @@ -524,6 +550,8 @@ pub enum Cmd { Hammer { /// The key prefix prefix: String, + /// The number of threads to use (each thread will create it's own replica) + threads: usize, /// The number of entries to create count: usize, /// The size of each entry in Bytes From 65bafe5d7112b4989a0371c18c7531e140e34507 Mon Sep 17 00:00:00 2001 From: Asmir Avdicevic Date: Thu, 27 Jul 2023 22:06:54 +0200 Subject: [PATCH 29/45] mad clippy is mad --- iroh/examples/sync.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index e20c92eb12..72314be6d9 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -413,7 +413,7 @@ async fn handle_fs_command(cmd: FsCmd, doc: &Doc) -> anyhow::Result<()> { dir_path, mut key_prefix, } => { - if key_prefix.ends_with("/") { + if key_prefix.ends_with('/') { key_prefix.pop(); } let root = canonicalize_path(&dir_path)?.canonicalize()?; @@ -441,7 +441,7 @@ async fn handle_fs_command(cmd: FsCmd, doc: &Doc) -> anyhow::Result<()> { mut key_prefix, dir_path, } => { - if !key_prefix.ends_with("/") { + if !key_prefix.ends_with('/') { key_prefix.push('/'); } let root = canonicalize_path(&dir_path)?; From d964d80b656b77e0a293fb27ae3f7086b2ea3b5d Mon Sep 17 00:00:00 2001 From: Asmir Avdicevic Date: Thu, 27 Jul 2023 22:53:06 +0200 Subject: [PATCH 30/45] extend hammer with get/set modes --- iroh/examples/sync.rs | 107 +++++++++++++++++++++++++++++++++--------- 1 file changed, 85 insertions(+), 22 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 72314be6d9..29e676d316 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -11,6 +11,7 @@ use std::{collections::HashSet, fmt, net::SocketAddr, path::PathBuf, str::FromSt use anyhow::{anyhow, bail}; use clap::{CommandFactory, FromArgMatches, Parser}; +use core::fmt::{Display, Formatter}; use ed25519_dalek::SigningKey; use indicatif::HumanBytes; use iroh::sync::{BlobStore, Doc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; @@ -354,43 +355,66 @@ async fn handle_command( threads, count, size, + mode, } => { println!( - "> Hammering with prefix {prefix} for {threads} x {count} messages of size {size} bytes", + "> Hammering with prefix \"{prefix}\" for {threads} x {count} messages of size {size} bytes in {mode} mode", prefix = prefix, threads = threads, count = count, size = size, + mode = mode, ); - let mut bytes = vec![0; size]; - bytes.fill(97); - let mut handles = Vec::new(); let start = std::time::Instant::now(); - for t in 0..threads { - let p = prefix.clone(); - let t_doc = doc.clone(); - let b = bytes.clone(); - let h = tokio::spawn(async move { - for i in 0..count { - let value = String::from_utf8(b.clone()).unwrap(); - let key = format!("{}/{}/{}", p, t, i); - t_doc - .insert_bytes(key, value.into_bytes().into()) - .await - .unwrap(); + let mut handles = Vec::new(); + match mode { + HammerMode::Set => { + let mut bytes = vec![0; size]; + bytes.fill(97); + for t in 0..threads { + let p = prefix.clone(); + let t_doc = doc.clone(); + let b = bytes.clone(); + let h = tokio::spawn(async move { + for i in 0..count { + let value = String::from_utf8(b.clone()).unwrap(); + let key = format!("{}/{}/{}", p, t, i); + t_doc + .insert_bytes(key, value.into_bytes().into()) + .await + .unwrap(); + } + }); + handles.push(h); + } + } + HammerMode::Get => { + for t in 0..threads { + let p = prefix.clone(); + let t_doc = doc.clone(); + let h = tokio::spawn(async move { + for i in 0..count { + let key = format!("{}/{}/{}", p, t, i); + let entries = t_doc.replica().all_for_key(key.as_bytes()); + for (_id, entry) in entries { + let _content = fmt_content(&t_doc, &entry).await; + } + } + }); + handles.push(h); } - }); - handles.push(h); + } } let _result = futures::future::join_all(handles).await; let diff = start.elapsed().as_secs_f64(); + let total_count = threads as u64 * count as u64; println!( - "> Hammering done in {:.2}s for {} messages with total of {} bytes", + "> Hammering done in {:.2}s for {} messages with total of {}", diff, - threads * count, - threads * count * size + total_count, + HumanBytes(total_count * size as u64), ); } Cmd::Exit => {} @@ -546,21 +570,60 @@ pub enum Cmd { WatchCancel, /// Show stats about the current session Stats, - /// Stress test with the hammer + /// Hammer time - stress test with the hammer Hammer { /// The key prefix prefix: String, /// The number of threads to use (each thread will create it's own replica) + #[clap(long, short, default_value = "2")] threads: usize, /// The number of entries to create + #[clap(long, short, default_value = "1000")] count: usize, /// The size of each entry in Bytes + #[clap(long, short, default_value = "1024")] size: usize, + /// Select the hammer mode (set or get) + #[clap(long, short, default_value = "set")] + mode: HammerMode, }, /// Quit Exit, } +#[derive(Clone, Debug, Parser)] +pub enum HammerMode { + Set, + Get, +} + +impl FromStr for HammerMode { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s { + "set" => Ok(HammerMode::Set), + "get" => Ok(HammerMode::Get), + _ => Err(anyhow!("Invalid hammer mode")), + } + } +} + +impl HammerMode { + pub fn to_string(&self) -> &'static str { + match self { + HammerMode::Set => "set", + HammerMode::Get => "get", + } + } +} + +impl Display for HammerMode { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_string()) + } +} + #[derive(Parser, Debug)] pub enum FsCmd { /// Import a file system directory into the document. From 53bd1c09e7b9d8a196eab219f0ab03dd332d6933 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 28 Jul 2023 15:19:00 +0200 Subject: [PATCH 31/45] refactor: use clap::ValueEnum --- iroh/examples/sync.rs | 42 ++++++------------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 29e676d316..474ab699f2 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -11,7 +11,6 @@ use std::{collections::HashSet, fmt, net::SocketAddr, path::PathBuf, str::FromSt use anyhow::{anyhow, bail}; use clap::{CommandFactory, FromArgMatches, Parser}; -use core::fmt::{Display, Formatter}; use ed25519_dalek::SigningKey; use indicatif::HumanBytes; use iroh::sync::{BlobStore, Doc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; @@ -359,11 +358,7 @@ async fn handle_command( } => { println!( "> Hammering with prefix \"{prefix}\" for {threads} x {count} messages of size {size} bytes in {mode} mode", - prefix = prefix, - threads = threads, - count = count, - size = size, - mode = mode, + mode = format!("{mode:?}").to_lowercase() ); let start = std::time::Instant::now(); let mut handles = Vec::new(); @@ -583,47 +578,22 @@ pub enum Cmd { /// The size of each entry in Bytes #[clap(long, short, default_value = "1024")] size: usize, - /// Select the hammer mode (set or get) - #[clap(long, short, default_value = "set")] + /// Select the hammer mode + #[clap(long, short, value_enum, default_value = "set")] mode: HammerMode, }, /// Quit Exit, } -#[derive(Clone, Debug, Parser)] +#[derive(Clone, Debug, clap::ValueEnum)] pub enum HammerMode { + /// Set mode (create entries) Set, + /// Get mode (read entries) Get, } -impl FromStr for HammerMode { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - match s { - "set" => Ok(HammerMode::Set), - "get" => Ok(HammerMode::Get), - _ => Err(anyhow!("Invalid hammer mode")), - } - } -} - -impl HammerMode { - pub fn to_string(&self) -> &'static str { - match self { - HammerMode::Set => "set", - HammerMode::Get => "get", - } - } -} - -impl Display for HammerMode { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.to_string()) - } -} - #[derive(Parser, Debug)] pub enum FsCmd { /// Import a file system directory into the document. From dcf4c5527d7d1276fab0488bf6c8aa55e9e5bf5d Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 28 Jul 2023 15:31:12 +0200 Subject: [PATCH 32/45] refactor: check results, and clearer variable names --- iroh/examples/sync.rs | 58 ++++++++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 474ab699f2..f7bbe356a8 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -7,7 +7,10 @@ //! `cargo run --bin derper -- --dev` //! and then set the `-d http://localhost:3340` flag on this example. -use std::{collections::HashSet, fmt, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc}; +use std::{ + collections::HashSet, fmt, net::SocketAddr, path::PathBuf, str::FromStr, sync::Arc, + time::Instant, +}; use anyhow::{anyhow, bail}; use clap::{CommandFactory, FromArgMatches, Parser}; @@ -35,6 +38,7 @@ use serde::{Deserialize, Serialize}; use tokio::{ io::AsyncWriteExt, sync::{mpsc, oneshot}, + task::JoinHandle, }; use tracing::warn; use tracing_subscriber::{EnvFilter, Registry}; @@ -360,56 +364,58 @@ async fn handle_command( "> Hammering with prefix \"{prefix}\" for {threads} x {count} messages of size {size} bytes in {mode} mode", mode = format!("{mode:?}").to_lowercase() ); - let start = std::time::Instant::now(); - let mut handles = Vec::new(); + let start = Instant::now(); + let mut handles: Vec>> = Vec::new(); match mode { HammerMode::Set => { let mut bytes = vec![0; size]; + // TODO: Add a flag to fill content differently per entry to be able to + // test downloading too bytes.fill(97); for t in 0..threads { - let p = prefix.clone(); - let t_doc = doc.clone(); - let b = bytes.clone(); - let h = tokio::spawn(async move { + let prefix = prefix.clone(); + let doc = doc.clone(); + let bytes = bytes.clone(); + let handle = tokio::spawn(async move { for i in 0..count { - let value = String::from_utf8(b.clone()).unwrap(); - let key = format!("{}/{}/{}", p, t, i); - t_doc - .insert_bytes(key, value.into_bytes().into()) - .await - .unwrap(); + let value = String::from_utf8(bytes.clone()).unwrap(); + let key = format!("{}/{}/{}", prefix, t, i); + doc.insert_bytes(key, value.into_bytes().into()).await?; } + Ok(()) }); - handles.push(h); + handles.push(handle); } } HammerMode::Get => { for t in 0..threads { - let p = prefix.clone(); - let t_doc = doc.clone(); - let h = tokio::spawn(async move { + let prefix = prefix.clone(); + let doc = doc.clone(); + let handle = tokio::spawn(async move { for i in 0..count { - let key = format!("{}/{}/{}", p, t, i); - let entries = t_doc.replica().all_for_key(key.as_bytes()); + let key = format!("{}/{}/{}", prefix, t, i); + let entries = doc.replica().all_for_key(key.as_bytes()); for (_id, entry) in entries { - let _content = fmt_content(&t_doc, &entry).await; + let _content = fmt_content(&doc, &entry).await; } } + Ok(()) }); - handles.push(h); + handles.push(handle); } } } - let _result = futures::future::join_all(handles).await; + for result in futures::future::join_all(handles).await { + // Check that no errors ocurred + result??; + } let diff = start.elapsed().as_secs_f64(); let total_count = threads as u64 * count as u64; println!( - "> Hammering done in {:.2}s for {} messages with total of {}", - diff, - total_count, - HumanBytes(total_count * size as u64), + "> Hammering done in {diff:.2}s for {total_count} messages with total of {size}", + size = HumanBytes(total_count * size as u64), ); } Cmd::Exit => {} From 38cce3f02b0b8098b1fd5e7b998ff235a9538bd7 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 28 Jul 2023 15:38:44 +0200 Subject: [PATCH 33/45] refactor: count actual rows, and make mode an argument --- iroh/examples/sync.rs | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index f7bbe356a8..48afe81265 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -365,7 +365,7 @@ async fn handle_command( mode = format!("{mode:?}").to_lowercase() ); let start = Instant::now(); - let mut handles: Vec>> = Vec::new(); + let mut handles: Vec>> = Vec::new(); match mode { HammerMode::Set => { let mut bytes = vec![0; size]; @@ -382,7 +382,7 @@ async fn handle_command( let key = format!("{}/{}/{}", prefix, t, i); doc.insert_bytes(key, value.into_bytes().into()).await?; } - Ok(()) + Ok(count) }); handles.push(handle); } @@ -392,30 +392,32 @@ async fn handle_command( let prefix = prefix.clone(); let doc = doc.clone(); let handle = tokio::spawn(async move { + let mut read = 0; for i in 0..count { let key = format!("{}/{}/{}", prefix, t, i); let entries = doc.replica().all_for_key(key.as_bytes()); for (_id, entry) in entries { let _content = fmt_content(&doc, &entry).await; + read += 1; } } - Ok(()) + Ok(read) }); handles.push(handle); } } } + let mut total_count = 0; for result in futures::future::join_all(handles).await { - // Check that no errors ocurred - result??; + // Check that no errors ocurred and count rows inserted/read + total_count += result??; } let diff = start.elapsed().as_secs_f64(); - let total_count = threads as u64 * count as u64; println!( "> Hammering done in {diff:.2}s for {total_count} messages with total of {size}", - size = HumanBytes(total_count * size as u64), + size = HumanBytes(total_count as u64 * size as u64), ); } Cmd::Exit => {} @@ -573,6 +575,9 @@ pub enum Cmd { Stats, /// Hammer time - stress test with the hammer Hammer { + /// The hammer mode + #[clap(value_enum)] + mode: HammerMode, /// The key prefix prefix: String, /// The number of threads to use (each thread will create it's own replica) @@ -584,9 +589,6 @@ pub enum Cmd { /// The size of each entry in Bytes #[clap(long, short, default_value = "1024")] size: usize, - /// Select the hammer mode - #[clap(long, short, value_enum, default_value = "set")] - mode: HammerMode, }, /// Quit Exit, @@ -594,9 +596,9 @@ pub enum Cmd { #[derive(Clone, Debug, clap::ValueEnum)] pub enum HammerMode { - /// Set mode (create entries) + /// Create entries Set, - /// Get mode (read entries) + /// Read entries Get, } From 871aa0691bd709a24b942f0ba38be631e7789697 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 28 Jul 2023 19:07:30 +0200 Subject: [PATCH 34/45] fix: rebase fix --- iroh/src/sync/live.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 8be595b5f1..46159cd4bc 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -275,7 +275,7 @@ impl Actor { }; match event { // We received a gossip message. Try to insert it into our replica. - Event::Received(data) => { + Event::Received(data, _prev_peer) => { let op: Op = postcard::from_bytes(&data)?; match op { Op::Put(entry) => doc.insert_remote_entry(entry)?, From 6a42b697ed884b97769975cc8e817cc749e28790 Mon Sep 17 00:00:00 2001 From: Friedel Ziegelmayer Date: Thu, 3 Aug 2023 22:22:45 +0200 Subject: [PATCH 35/45] feat(iroh-sync): implement file system backed for documents (#1315) * start refactoring store into its own module * implement more details * works again * draft fs db and integrate error handling * fill out more of the implemenation * lifetime sadness * self referential fight: Rust 0 - Dig 1 * basic tests and range fixes * introduce Store trait and update tests to test against both impls * implement remove * integrate new storage into the example * implement iterators * fixes and more tests * clippy and deny cleanup --- Cargo.lock | 59 +++ deny.toml | 1 + iroh-bytes/src/util.rs | 6 + iroh-sync/Cargo.toml | 9 + iroh-sync/src/lib.rs | 1 + iroh-sync/src/ranger.rs | 300 +++++++----- iroh-sync/src/store.rs | 79 ++++ iroh-sync/src/store/fs.rs | 751 ++++++++++++++++++++++++++++++ iroh-sync/src/store/memory.rs | 502 ++++++++++++++++++++ iroh-sync/src/sync.rs | 833 +++++++++++----------------------- iroh/Cargo.toml | 4 +- iroh/examples/sync.rs | 92 +++- iroh/src/sync.rs | 108 +++-- iroh/src/sync/content.rs | 68 ++- iroh/src/sync/live.rs | 43 +- 15 files changed, 2058 insertions(+), 798 deletions(-) create mode 100644 iroh-sync/src/store.rs create mode 100644 iroh-sync/src/store/fs.rs create mode 100644 iroh-sync/src/store/memory.rs diff --git a/Cargo.lock b/Cargo.lock index 348a2024bd..d39e883522 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "aliasable" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -2003,11 +2009,14 @@ dependencies = [ "hex", "iroh-bytes", "once_cell", + "ouroboros", "parking_lot", "postcard", "rand", "rand_core", + "redb", "serde", + "tempfile", "tokio", "url", ] @@ -2541,6 +2550,30 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "ouroboros" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2ba07320d39dfea882faa70554b4bd342a5f273ed59ba7c1c6b4c840492c954" +dependencies = [ + "aliasable", + "ouroboros_macro", + "static_assertions", +] + +[[package]] +name = "ouroboros_macro" +version = "0.17.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec4c6225c69b4ca778c0aea097321a64c421cf4577b331c61b229267edabb6f8" +dependencies = [ + "heck", + "proc-macro-error 1.0.4", + "proc-macro2", + "quote", + "syn 2.0.27", +] + [[package]] name = "overload" version = "0.1.1" @@ -3004,6 +3037,16 @@ dependencies = [ "unarray", ] +[[package]] +name = "pyo3-build-config" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "554db24f0b3c180a9c0b1268f91287ab3f17c162e15b54caaae5a6b3773396b0" +dependencies = [ + "once_cell", + "target-lexicon", +] + [[package]] name = "quanta" version = "0.11.1" @@ -3194,6 +3237,16 @@ dependencies = [ "yasna", ] +[[package]] +name = "redb" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "717a806693d0e1ed6cc55b392066bf13e703dd835acf5c5888c74740f924d355" +dependencies = [ + "libc", + "pyo3-build-config", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -4069,6 +4122,12 @@ dependencies = [ "libc", ] +[[package]] +name = "target-lexicon" +version = "0.12.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d2faeef5759ab89935255b1a4cd98e0baf99d1085e37d36599c625dac49ae8e" + [[package]] name = "tempfile" version = "3.7.0" diff --git a/deny.toml b/deny.toml index 517d074102..023b15271a 100644 --- a/deny.toml +++ b/deny.toml @@ -4,6 +4,7 @@ multiple-versions = "allow" [licenses] allow = [ "Apache-2.0", + "Apache-2.0 WITH LLVM-exception", "BSD-2-Clause", "BSD-3-Clause", "BSL-1.0", # BOSL license diff --git a/iroh-bytes/src/util.rs b/iroh-bytes/src/util.rs index 1950e6e837..bc59634195 100644 --- a/iroh-bytes/src/util.rs +++ b/iroh-bytes/src/util.rs @@ -82,6 +82,12 @@ impl From<[u8; 32]> for Hash { } } +impl From<&[u8; 32]> for Hash { + fn from(value: &[u8; 32]) -> Self { + Hash(blake3::Hash::from(*value)) + } +} + impl PartialOrd for Hash { fn partial_cmp(&self, other: &Self) -> Option { Some(self.0.as_bytes().cmp(other.0.as_bytes())) diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml index 0d0c9b891a..e0132aed13 100644 --- a/iroh-sync/Cargo.toml +++ b/iroh-sync/Cargo.toml @@ -25,5 +25,14 @@ bytes = "1.4.0" parking_lot = "0.12.1" hex = "0.4" +# fs-store +redb = { version = "1.0.5", optional = true } +ouroboros = { version = "0.17", optional = true } + [dev-dependencies] tokio = { version = "1.28.2", features = ["sync", "macros"] } +tempfile = "3.4" + +[features] +default = ["fs-store"] +fs-store = ["redb", "ouroboros"] \ No newline at end of file diff --git a/iroh-sync/src/lib.rs b/iroh-sync/src/lib.rs index a37ead1b6f..4c73579e2d 100644 --- a/iroh-sync/src/lib.rs +++ b/iroh-sync/src/lib.rs @@ -1,2 +1,3 @@ pub mod ranger; +pub mod store; pub mod sync; diff --git a/iroh-sync/src/ranger.rs b/iroh-sync/src/ranger.rs index e12db74498..aff7f66578 100644 --- a/iroh-sync/src/ranger.rs +++ b/iroh-sync/src/ranger.rs @@ -2,10 +2,10 @@ //! "Range-Based Set Reconciliation" by Aljoscha Meyer. //! -use std::cmp::Ordering; use std::collections::BTreeMap; use std::fmt::Debug; use std::marker::PhantomData; +use std::{cmp::Ordering, convert::Infallible}; use serde::{Deserialize, Serialize}; @@ -179,12 +179,12 @@ where K: RangeKey + Clone + Default + AsFingerprint, { /// Construct the initial message. - fn init>(store: &S, limit: Option<&Range>) -> Self { - let x = store.get_first(); + fn init>(store: &S, limit: Option<&Range>) -> Result { + let x = store.get_first()?; let range = Range::new(x.clone(), x); - let fingerprint = store.get_fingerprint(&range, limit); + let fingerprint = store.get_fingerprint(&range, limit)?; let part = MessagePart::RangeFingerprint(RangeFingerprint { range, fingerprint }); - Message { parts: vec![part] } + Ok(Message { parts: vec![part] }) } pub fn parts(&self) -> &[MessagePart] { @@ -192,37 +192,47 @@ where } } -pub trait Store: Sized + Default +pub trait Store: Sized where K: RangeKey + Clone + Default + AsFingerprint, { + type Error: Debug + Send + Sync + Into; + /// Get a the first key (or the default if none is available). - fn get_first(&self) -> K; - fn get(&self, key: &K) -> Option<&V>; - fn len(&self) -> usize; - fn is_empty(&self) -> bool; + fn get_first(&self) -> Result; + fn get(&self, key: &K) -> Result, Self::Error>; + fn len(&self) -> Result; + fn is_empty(&self) -> Result; /// Calculate the fingerprint of the given range. - fn get_fingerprint(&self, range: &Range, limit: Option<&Range>) -> Fingerprint; + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result; /// Insert the given key value pair. - fn put(&mut self, k: K, v: V); + fn put(&mut self, k: K, v: V) -> Result<(), Self::Error>; - type RangeIterator<'a>: Iterator + type RangeIterator<'a>: Iterator> where Self: 'a, K: 'a, V: 'a; /// Returns all items in the given range - fn get_range(&self, range: Range, limit: Option>) -> Self::RangeIterator<'_>; - fn remove(&mut self, key: &K) -> Option; + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result, Self::Error>; + fn remove(&mut self, key: &K) -> Result, Self::Error>; - type AllIterator<'a>: Iterator + type AllIterator<'a>: Iterator> where Self: 'a, K: 'a, V: 'a; - fn all(&self) -> Self::AllIterator<'_>; + fn all(&self) -> Result, Self::Error>; } #[derive(Debug)] @@ -241,94 +251,119 @@ impl Default for SimpleStore { impl Store for SimpleStore where K: RangeKey + Clone + Default + AsFingerprint, + V: Clone, { - fn get_first(&self) -> K { + type Error = Infallible; + + fn get_first(&self) -> Result { if let Some((k, _)) = self.data.first_key_value() { - k.clone() + Ok(k.clone()) } else { - Default::default() + Ok(Default::default()) } } - fn get(&self, key: &K) -> Option<&V> { - self.data.get(key) + fn get(&self, key: &K) -> Result, Self::Error> { + Ok(self.data.get(key).cloned()) } - fn len(&self) -> usize { - self.data.len() + fn len(&self) -> Result { + Ok(self.data.len()) } - fn is_empty(&self) -> bool { - self.data.is_empty() + fn is_empty(&self) -> Result { + Ok(self.data.is_empty()) } /// Calculate the fingerprint of the given range. - fn get_fingerprint(&self, range: &Range, limit: Option<&Range>) -> Fingerprint { - let elements = self.get_range(range.clone(), limit.cloned()); + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result { + let elements = self.get_range(range.clone(), limit.cloned())?; let mut fp = Fingerprint::empty(); for el in elements { + let el = el?; fp ^= el.0.as_fingerprint(); } - fp + Ok(fp) } /// Insert the given key value pair. - fn put(&mut self, k: K, v: V) { + fn put(&mut self, k: K, v: V) -> Result<(), Self::Error> { self.data.insert(k, v); + Ok(()) } type RangeIterator<'a> = SimpleRangeIterator<'a, K, V> where K: 'a, V: 'a; /// Returns all items in the given range - fn get_range(&self, range: Range, limit: Option>) -> Self::RangeIterator<'_> { + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result, Self::Error> { // TODO: this is not very efficient, optimize depending on data structure let iter = self.data.iter(); - SimpleRangeIterator { iter, range, limit } + Ok(SimpleRangeIterator { + iter, + range: Some(range), + limit, + }) } - fn remove(&mut self, key: &K) -> Option { - self.data.remove(key) + fn remove(&mut self, key: &K) -> Result, Self::Error> { + // No versions stored + + let res = self.data.remove(key).into_iter().collect(); + Ok(res) } - type AllIterator<'a> = std::collections::btree_map::Iter<'a, K, V> + type AllIterator<'a> = SimpleRangeIterator<'a, K, V> where K: 'a, V: 'a; - fn all(&self) -> Self::AllIterator<'_> { - self.data.iter() + fn all(&self) -> Result, Self::Error> { + let iter = self.data.iter(); + + Ok(SimpleRangeIterator { + iter, + range: None, + limit: None, + }) } } #[derive(Debug)] pub struct SimpleRangeIterator<'a, K: 'a, V: 'a> { iter: std::collections::btree_map::Iter<'a, K, V>, - range: Range, + range: Option>, limit: Option>, } impl<'a, K, V> Iterator for SimpleRangeIterator<'a, K, V> where - K: RangeKey, + K: RangeKey + Clone, + V: Clone, { - type Item = (&'a K, &'a V); + type Item = Result<(K, V), Infallible>; fn next(&mut self) -> Option { let mut next = self.iter.next()?; - let filter = |x: &K| { - let r = x.contains(&self.range); - if let Some(ref limit) = self.limit { - r && x.contains(limit) - } else { - r - } + let filter = |x: &K| match (&self.range, &self.limit) { + (None, None) => true, + (Some(ref range), Some(ref limit)) => x.contains(range) && x.contains(limit), + (Some(ref range), None) => x.contains(range), + (None, Some(ref limit)) => x.contains(limit), }; loop { if filter(next.0) { - return Some(next); + return Some(Ok((next.0.clone(), next.1.clone()))); } next = self.iter.next()?; @@ -389,14 +424,28 @@ where V: Clone + Debug, S: Store, { + pub fn from_store(store: S) -> Self { + Peer { + store, + max_set_size: 1, + split_factor: 2, + limit: None, + _phantom: Default::default(), + } + } + /// Generates the initial message. - pub fn initial_message(&self) -> Message { + pub fn initial_message(&self) -> Result, S::Error> { Message::init(&self.store, self.limit.as_ref()) } /// Processes an incoming message and produces a response. /// If terminated, returns `None` - pub fn process_message(&mut self, message: Message, cb: F) -> Option> + pub fn process_message( + &mut self, + message: Message, + cb: F, + ) -> Result>, S::Error> where F: Fn(K, V), { @@ -428,17 +477,25 @@ where } else { Some( self.store - .get_range(range.clone(), self.limit.clone()) - .filter(|(k, _)| !values.iter().any(|(vk, _)| &vk == k)) - .map(|(k, v)| (k.clone(), v.clone())) - .collect(), + .get_range(range.clone(), self.limit.clone())? + .filter_map(|el| match el { + Ok((k, v)) => { + if !values.iter().any(|(vk, _)| vk == &k) { + Some(Ok((k, v))) + } else { + None + } + } + Err(err) => Some(Err(err)), + }) + .collect::>()?, ) }; // Store incoming values for (k, v) in values { cb(k.clone(), v.clone()); - self.store.put(k, v); + self.store.put(k, v)?; } if let Some(diff) = diff { @@ -454,7 +511,7 @@ where // Process fingerprint messages for RangeFingerprint { range, fingerprint } in fingerprints { - let local_fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref()); + let local_fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref())?; // Case1 Match, nothing to do if local_fingerprint == fingerprint { @@ -464,13 +521,10 @@ where // Case2 Recursion Anchor let local_values: Vec<_> = self .store - .get_range(range.clone(), self.limit.clone()) - .collect(); + .get_range(range.clone(), self.limit.clone())? + .collect::>()?; if local_values.len() <= 1 || fingerprint == Fingerprint::empty() { - let values = local_values - .into_iter() - .map(|(k, v)| (k.clone(), v.clone())) - .collect(); + let values = local_values.into_iter().map(|(k, v)| (k, v)).collect(); out.push(MessagePart::RangeItem(RangeItem { range, values, @@ -498,13 +552,13 @@ where let (x, y) = if i == 0 { // first - (range.x(), local_values[end].0) + (range.x(), &local_values[end].0) } else if i == self.split_factor - 1 { // last - (local_values[start].0, range.y()) + (&local_values[start].0, range.y()) } else { // regular - (local_values[start].0, local_values[end].0) + (&local_values[start].0, &local_values[end].0) }; let range = Range::new(x.clone(), y.clone()); ranges.push(range); @@ -514,10 +568,10 @@ where for range in ranges.into_iter() { let chunk: Vec<_> = self .store - .get_range(range.clone(), self.limit.clone()) + .get_range(range.clone(), self.limit.clone())? .collect(); // Add either the fingerprint or the item set - let fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref()); + let fingerprint = self.store.get_fingerprint(&range, self.limit.as_ref())?; if chunk.len() > self.max_set_size { out.push(MessagePart::RangeFingerprint(RangeFingerprint { range, @@ -526,12 +580,15 @@ where } else { let values = chunk .into_iter() - .map(|(k, v)| { - let k: K = k.clone(); - let v: V = v.clone(); - (k, v) + .map(|el| match el { + Ok((k, v)) => { + let k: K = k; + let v: V = v; + Ok((k, v)) + } + Err(err) => Err(err), }) - .collect(); + .collect::>()?; out.push(MessagePart::RangeItem(RangeItem { range, values, @@ -544,28 +601,28 @@ where // If we have any parts, return a message if !out.is_empty() { - Some(Message { parts: out }) + Ok(Some(Message { parts: out })) } else { - None + Ok(None) } } /// Insert a key value pair. - pub fn put(&mut self, k: K, v: V) { - self.store.put(k, v); + pub fn put(&mut self, k: K, v: V) -> Result<(), S::Error> { + self.store.put(k, v) } - pub fn get(&self, k: &K) -> Option<&V> { + pub fn get(&self, k: &K) -> Result, S::Error> { self.store.get(k) } /// Remove the given key. - pub fn remove(&mut self, k: &K) -> Option { + pub fn remove(&mut self, k: &K) -> Result, S::Error> { self.store.remove(k) } /// List all existing key value pairs. - pub fn all(&self) -> impl Iterator { + pub fn all(&self) -> Result> + '_, S::Error> { self.store.all() } @@ -941,6 +998,7 @@ mod tests { struct SyncResult where K: RangeKey + Clone + Default + AsFingerprint, + V: Clone, { alice: Peer, bob: Peer, @@ -951,7 +1009,7 @@ mod tests { impl SyncResult where K: RangeKey + Clone + Default + AsFingerprint + Debug, - V: Debug, + V: Clone + Debug, { fn print_messages(&self) { let len = std::cmp::max(self.alice_to_bob.len(), self.bob_to_alice.len()); @@ -974,32 +1032,42 @@ mod tests { V: Debug + Clone + PartialEq, { fn assert_alice_set(&self, ctx: &str, expected: &[(K, V)]) { - dbg!(self.alice.all().collect::>()); + dbg!(self.alice.all().unwrap().collect::>()); for (k, v) in expected { assert_eq!( - self.alice.store.get(k), + self.alice.store.get(k).unwrap().as_ref(), Some(v), "{}: (alice) missing key {:?}", ctx, k ); } - assert_eq!(expected.len(), self.alice.store.len(), "{}: (alice)", ctx); + assert_eq!( + expected.len(), + self.alice.store.len().unwrap(), + "{}: (alice)", + ctx + ); } fn assert_bob_set(&self, ctx: &str, expected: &[(K, V)]) { - dbg!(self.bob.all().collect::>()); + dbg!(self.bob.all().unwrap().collect::>()); for (k, v) in expected { assert_eq!( - self.bob.store.get(k), + self.bob.store.get(k).unwrap().as_ref(), Some(v), "{}: (bob) missing key {:?}", ctx, k ); } - assert_eq!(expected.len(), self.bob.store.len(), "{}: (bob)", ctx); + assert_eq!( + expected.len(), + self.bob.store.len().unwrap(), + "{}: (bob)", + ctx + ); } } @@ -1054,7 +1122,7 @@ mod tests { Peer::::default() }; for (k, v) in alice_set { - alice.put(k.clone(), v.clone()); + alice.put(k.clone(), v.clone()).unwrap(); let include = if let Some(ref limit) = limit { k.contains(limit) @@ -1074,7 +1142,7 @@ mod tests { Peer::::default() }; for (k, v) in bob_set { - bob.put(k.clone(), v.clone()); + bob.put(k.clone(), v.clone()).unwrap(); let include = if let Some(ref limit) = limit { k.contains(limit) } else { @@ -1089,7 +1157,7 @@ mod tests { let mut alice_to_bob = Vec::new(); let mut bob_to_alice = Vec::new(); - let initial_message = alice.initial_message(); + let initial_message = alice.initial_message().unwrap(); let mut next_to_bob = Some(initial_message); let mut rounds = 0; @@ -1098,9 +1166,9 @@ mod tests { rounds += 1; alice_to_bob.push(msg.clone()); - if let Some(msg) = bob.process_message(msg, |_, _| {}) { + if let Some(msg) = bob.process_message(msg, |_, _| {}).unwrap() { bob_to_alice.push(msg.clone()); - next_to_bob = alice.process_message(msg, |_, _| {}); + next_to_bob = alice.process_message(msg, |_, _| {}).unwrap(); } } let res = SyncResult { @@ -1111,15 +1179,19 @@ mod tests { }; res.print_messages(); - let alice_now: Vec<_> = res.alice.all().collect(); + let alice_now: Vec<_> = res.alice.all().unwrap().collect::>().unwrap(); assert_eq!( - expected_set_alice.iter().collect::>(), + expected_set_alice.into_iter().collect::>(), alice_now, "alice" ); - let bob_now: Vec<_> = res.bob.all().collect(); - assert_eq!(expected_set_bob.iter().collect::>(), bob_now, "bob"); + let bob_now: Vec<_> = res.bob.all().unwrap().collect::>().unwrap(); + assert_eq!( + expected_set_bob.into_iter().collect::>(), + bob_now, + "bob" + ); // Check that values were never sent twice let mut alice_sent = BTreeMap::new(); @@ -1169,38 +1241,44 @@ mod tests { ("hog", 1), ]; for (k, v) in &set { - store.put(*k, *v); + store.put(*k, *v).unwrap(); } let all: Vec<_> = store .get_range(Range::new("", ""), None) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); assert_eq!(&all, &set[..]); let regular: Vec<_> = store .get_range(("bee", "eel").into(), None) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); assert_eq!(®ular, &set[..3]); // empty start let regular: Vec<_> = store .get_range(("", "eel").into(), None) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); assert_eq!(®ular, &set[..3]); let regular: Vec<_> = store .get_range(("cat", "hog").into(), None) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(®ular, &set[1..5]); let excluded: Vec<_> = store .get_range(("fox", "bee").into(), None) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); assert_eq!(excluded[0].0, "fox"); assert_eq!(excluded[1].0, "hog"); @@ -1208,8 +1286,9 @@ mod tests { let excluded: Vec<_> = store .get_range(("fox", "doe").into(), None) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); assert_eq!(excluded.len(), 4); assert_eq!(excluded[0].0, "bee"); @@ -1220,8 +1299,9 @@ mod tests { // Limit let all: Vec<_> = store .get_range(("", "").into(), Some(("bee", "doe").into())) - .map(|(k, v)| (*k, *v)) - .collect(); + .unwrap() + .collect::>() + .unwrap(); assert_eq!(&all, &set[..2]); } diff --git a/iroh-sync/src/store.rs b/iroh-sync/src/store.rs new file mode 100644 index 0000000000..a28b9a904a --- /dev/null +++ b/iroh-sync/src/store.rs @@ -0,0 +1,79 @@ +use anyhow::Result; +use rand_core::CryptoRngCore; + +use crate::{ + ranger, + sync::{Author, AuthorId, Namespace, NamespaceId, RecordIdentifier, Replica, SignedEntry}, +}; + +#[cfg(feature = "fs-store")] +pub mod fs; +pub mod memory; + +/// Abstraction over the different available storage solutions. +pub trait Store: std::fmt::Debug + Clone + Send + Sync + 'static { + /// The specialized instance scoped to a `Namespace`. + type Instance: ranger::Store + Send + Sync + 'static + Clone; + + type GetLatestIter<'a>: Iterator> + where + Self: 'a; + type GetAllIter<'a>: Iterator> + where + Self: 'a; + + fn get_replica(&self, namespace: &NamespaceId) -> Result>>; + fn get_author(&self, author: &AuthorId) -> Result>; + fn new_author(&self, rng: &mut R) -> Result; + fn new_replica(&self, namespace: Namespace) -> Result>; + + /// Gets all entries matching this key and author. + fn get_latest_by_key_and_author( + &self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns the latest version of the matching documents by key. + fn get_latest_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns the latest version of the matching documents by prefix. + fn get_latest_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns the latest versions of all documents. + fn get_latest(&self, namespace: NamespaceId) -> Result>; + + /// Returns all versions of the matching documents by author. + fn get_all_by_key_and_author<'a, 'b: 'a>( + &'a self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]> + 'b, + ) -> Result>; + + /// Returns all versions of the matching documents by key. + fn get_all_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns all versions of the matching documents by prefix. + fn get_all_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result>; + + /// Returns all versions of all documents. + fn get_all(&self, namespace: NamespaceId) -> Result>; +} diff --git a/iroh-sync/src/store/fs.rs b/iroh-sync/src/store/fs.rs new file mode 100644 index 0000000000..e490c177a2 --- /dev/null +++ b/iroh-sync/src/store/fs.rs @@ -0,0 +1,751 @@ +//! On disk storage for replicas. + +use std::{path::Path, sync::Arc}; + +use anyhow::Result; +use ouroboros::self_referencing; +use rand_core::CryptoRngCore; +use redb::{ + AccessGuard, Database, MultimapRange, MultimapTableDefinition, MultimapValue, + ReadOnlyMultimapTable, ReadTransaction, ReadableMultimapTable, ReadableTable, TableDefinition, +}; + +use crate::{ + ranger::{AsFingerprint, Fingerprint, Range, RangeKey}, + store::Store as _, + sync::{ + Author, AuthorId, Entry, EntrySignature, Namespace, NamespaceId, Record, RecordIdentifier, + Replica, SignedEntry, + }, +}; + +use self::ouroboros_impl_range_all_iterator::BorrowedMutFields; + +/// Manages the replicas and authors for an instance. +#[derive(Debug, Clone)] +pub struct Store { + db: Arc, +} + +// Table Definitions + +// Authors +// Table +// Key: [u8; 32] # AuthorId +// Value: #[u8; 32] # Author +const AUTHORS_TABLE: TableDefinition<&[u8; 32], &[u8; 32]> = TableDefinition::new("authors-1"); + +// Namespaces +// Table +// Key: [u8; 32] # NamespaceId +// Value: #[u8; 32] # Namespace +const NAMESPACES_TABLE: TableDefinition<&[u8; 32], &[u8; 32]> = + TableDefinition::new("namespaces-1"); + +// Records +// Multimap +// Key: ([u8; 32], [u8; 32], Vec) # (NamespaceId, AuthorId, Key) +// Values: +// (u64, [u8; 32], [u8; 32], u64, [u8; 32]) +// # (timestamp, signature_namespace, signature_author, len, hash) + +type RecordsId<'a> = (&'a [u8; 32], &'a [u8; 32], &'a [u8]); +type RecordsValue<'a> = (u64, &'a [u8; 64], &'a [u8; 64], u64, &'a [u8; 32]); + +const RECORDS_TABLE: MultimapTableDefinition = + MultimapTableDefinition::new("records-1"); + +impl Store { + pub fn new(path: impl AsRef) -> Result { + let db = Database::create(path)?; + + // Setup all tables + let write_tx = db.begin_write()?; + { + let _table = write_tx.open_multimap_table(RECORDS_TABLE)?; + let _table = write_tx.open_table(NAMESPACES_TABLE)?; + let _table = write_tx.open_table(AUTHORS_TABLE)?; + } + write_tx.commit()?; + + Ok(Store { db: Arc::new(db) }) + } + /// Stores a new namespace + fn insert_namespace(&self, namespace: Namespace) -> Result<()> { + let write_tx = self.db.begin_write()?; + { + let mut namespace_table = write_tx.open_table(NAMESPACES_TABLE)?; + namespace_table.insert(&namespace.id_bytes(), &namespace.to_bytes())?; + } + write_tx.commit()?; + + Ok(()) + } + + fn insert_author(&self, author: Author) -> Result<()> { + let write_tx = self.db.begin_write()?; + { + let mut author_table = write_tx.open_table(AUTHORS_TABLE)?; + author_table.insert(&author.id_bytes(), &author.to_bytes())?; + } + write_tx.commit()?; + + Ok(()) + } +} + +impl super::Store for Store { + type Instance = StoreInstance; + type GetAllIter<'a> = RangeAllIterator<'a>; + type GetLatestIter<'a> = RangeLatestIterator<'a>; + + fn get_replica(&self, namespace_id: &NamespaceId) -> Result>> { + let read_tx = self.db.begin_read()?; + let namespace_table = read_tx.open_table(NAMESPACES_TABLE)?; + let Some(namespace) = namespace_table.get(namespace_id.as_bytes())? else { + return Ok(None); + }; + let namespace = Namespace::from_bytes(namespace.value()); + let replica = Replica::new(namespace, StoreInstance::new(*namespace_id, self.clone())); + Ok(Some(replica)) + } + + fn get_author(&self, author_id: &AuthorId) -> Result> { + let read_tx = self.db.begin_read()?; + let author_table = read_tx.open_table(AUTHORS_TABLE)?; + let Some(author) = author_table.get(author_id.as_bytes())? else { + return Ok(None); + }; + + let author = Author::from_bytes(author.value()); + Ok(Some(author)) + } + + /// Generates a new author, using the passed in randomness. + fn new_author(&self, rng: &mut R) -> Result { + let author = Author::new(rng); + self.insert_author(author.clone())?; + Ok(author) + } + + fn new_replica(&self, namespace: Namespace) -> Result> { + let id = namespace.id(); + self.insert_namespace(namespace.clone())?; + + let replica = Replica::new(namespace, StoreInstance::new(id, self.clone())); + + Ok(replica) + } + + /// Gets all entries matching this key and author. + fn get_latest_by_key_and_author( + &self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]>, + ) -> Result> { + let read_tx = self.db.begin_read()?; + let record_table = read_tx.open_multimap_table(RECORDS_TABLE)?; + + let db_key = (namespace.as_bytes(), author.as_bytes(), key.as_ref()); + let records = record_table.get(db_key)?; + let Some(record) = records.last() else { + return Ok(None); + }; + let record = record?; + let (timestamp, namespace_sig, author_sig, len, hash) = record.value(); + let record = Record::new(timestamp, len, hash.into()); + let id = RecordIdentifier::new(key, namespace, author); + let entry = Entry::new(id, record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + + Ok(Some(signed_entry)) + } + + fn get_latest_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::Key(key.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_latest_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::Prefix(prefix.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_latest(&self, namespace: NamespaceId) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::None, + )?; + + Ok(iter) + } + + fn get_all_by_key_and_author<'a, 'b: 'a>( + &'a self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]> + 'b, + ) -> Result> { + let start = (namespace.as_bytes(), author.as_bytes(), key.as_ref()); + let end = (namespace.as_bytes(), author.as_bytes(), key.as_ref()); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::None, + )?; + + Ok(iter) + } + + fn get_all_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::Key(key.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_all_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::Prefix(prefix.as_ref().to_vec()), + )?; + + Ok(iter) + } + + fn get_all(&self, namespace: NamespaceId) -> Result> { + let start = (namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeAllIterator::try_new( + self.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| { + record_table + .range(start..=end) + .map_err(anyhow::Error::from) + .map(|v| (v, None)) + }, + RangeFilter::None, + )?; + + Ok(iter) + } +} + +/// [`Namespace`] specific wrapper around the [`Store`]. +#[derive(Debug, Clone)] +pub struct StoreInstance { + namespace: NamespaceId, + store: Store, +} + +impl StoreInstance { + fn new(namespace: NamespaceId, store: Store) -> Self { + StoreInstance { namespace, store } + } +} + +impl crate::ranger::Store for StoreInstance { + type Error = anyhow::Error; + + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> Result { + let read_tx = self.store.db.begin_read()?; + let record_table = read_tx.open_multimap_table(RECORDS_TABLE)?; + + // TODO: verify this fetches all keys with this namespace + let start = (self.namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (self.namespace.as_bytes(), &[255u8; 32], &[][..]); + let mut records = record_table.range(start..=end)?; + + let Some(record) = records.next() else { + return Ok(RecordIdentifier::default()); + }; + let (compound_key, _) = record?; + let (namespace_id, author_id, key) = compound_key.value(); + + let id = RecordIdentifier::from_parts(key, namespace_id, author_id)?; + Ok(id) + } + + fn get(&self, id: &RecordIdentifier) -> Result> { + self.store + .get_latest_by_key_and_author(id.namespace(), id.author(), id.key()) + } + + fn len(&self) -> Result { + let read_tx = self.store.db.begin_read()?; + let record_table = read_tx.open_multimap_table(RECORDS_TABLE)?; + + // TODO: verify this fetches all keys with this namespace + let start = (self.namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (self.namespace.as_bytes(), &[255u8; 32], &[][..]); + let records = record_table.range(start..=end)?; + Ok(records.count()) + } + + fn is_empty(&self) -> Result { + Ok(self.len()? == 0) + } + + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result { + // TODO: optimize? + + let elements = self.get_range(range.clone(), limit.cloned())?; + let mut fp = Fingerprint::empty(); + for el in elements { + let el = el?; + fp ^= el.0.as_fingerprint(); + } + + Ok(fp) + } + + fn put(&mut self, k: RecordIdentifier, v: SignedEntry) -> Result<()> { + // TODO: propagate error/not insertion? + if v.verify().is_ok() { + let timestamp = v.entry().record().timestamp(); + // TODO: verify timestamp is "reasonable" + + let write_tx = self.store.db.begin_write()?; + { + let mut record_table = write_tx.open_multimap_table(RECORDS_TABLE)?; + let key = (k.namespace_bytes(), k.author_bytes(), k.key()); + let record = v.entry().record(); + let value = ( + timestamp, + &v.signature().namespace_signature().to_bytes(), + &v.signature().author_signature().to_bytes(), + record.content_len(), + record.content_hash().as_bytes(), + ); + record_table.insert(key, value)?; + } + write_tx.commit()?; + } + Ok(()) + } + + type RangeIterator<'a> = RangeLatestIterator<'a>; + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result> { + // TODO: implement inverted range + let range_start = range.x(); + let range_end = range.y(); + + let start = ( + range_start.namespace_bytes(), + range_start.author_bytes(), + range_start.key(), + ); + let end = ( + range_end.namespace_bytes(), + range_end.author_bytes(), + range_end.key(), + ); + let iter = RangeLatestIterator::try_new( + self.store.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + limit, + RangeFilter::None, + )?; + + Ok(iter) + } + + fn remove(&mut self, k: &RecordIdentifier) -> Result> { + let write_tx = self.store.db.begin_write()?; + let res = { + let mut records_table = write_tx.open_multimap_table(RECORDS_TABLE)?; + let key = (k.namespace_bytes(), k.author_bytes(), k.key()); + let records = records_table.remove_all(key)?; + let mut res = Vec::new(); + for record in records.into_iter() { + let record = record?; + let (timestamp, namespace_sig, author_sig, len, hash) = record.value(); + let record = Record::new(timestamp, len, hash.into()); + let entry = Entry::new(k.clone(), record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + res.push(signed_entry); + } + res + }; + write_tx.commit()?; + Ok(res) + } + + type AllIterator<'a> = RangeLatestIterator<'a>; + + fn all(&self) -> Result> { + let start = (self.namespace.as_bytes(), &[0u8; 32], &[][..]); + let end = (self.namespace.as_bytes(), &[255u8; 32], &[][..]); + let iter = RangeLatestIterator::try_new( + self.store.db.begin_read()?, + |read_tx| { + read_tx + .open_multimap_table(RECORDS_TABLE) + .map_err(anyhow::Error::from) + }, + |record_table| record_table.range(start..=end).map_err(anyhow::Error::from), + None, + RangeFilter::None, + )?; + + Ok(iter) + } +} + +fn matches(limit: &Option>, x: &RecordIdentifier) -> bool { + limit.as_ref().map(|r| x.contains(r)).unwrap_or(true) +} + +#[self_referencing] +pub struct RangeLatestIterator<'a> { + read_tx: ReadTransaction<'a>, + #[borrows(read_tx)] + #[covariant] + record_table: ReadOnlyMultimapTable<'this, RecordsId<'static>, RecordsValue<'static>>, + #[covariant] + #[borrows(record_table)] + records: MultimapRange<'this, RecordsId<'static>, RecordsValue<'static>>, + limit: Option>, + filter: RangeFilter, +} + +impl std::fmt::Debug for RangeLatestIterator<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RangeLatestIterator") + .finish_non_exhaustive() + } +} + +impl Iterator for RangeLatestIterator<'_> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + self.with_mut(|fields| { + for next in fields.records.by_ref() { + let next = match next { + Ok(next) => next, + Err(err) => return Some(Err(err.into())), + }; + + let (namespace, author, key) = next.0.value(); + let id = match RecordIdentifier::from_parts(key, namespace, author) { + Ok(id) => id, + Err(err) => return Some(Err(err)), + }; + if fields.filter.matches(&id) && matches(fields.limit, &id) { + let last = next.1.last(); + let value = match last? { + Ok(value) => value, + Err(err) => return Some(Err(err.into())), + }; + let (timestamp, namespace_sig, author_sig, len, hash) = value.value(); + let record = Record::new(timestamp, len, hash.into()); + let entry = Entry::new(id.clone(), record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + + return Some(Ok((id, signed_entry))); + } + } + None + }) + } +} + +#[self_referencing] +pub struct RangeAllIterator<'a> { + read_tx: ReadTransaction<'a>, + #[borrows(read_tx)] + #[covariant] + record_table: ReadOnlyMultimapTable<'this, RecordsId<'static>, RecordsValue<'static>>, + #[covariant] + #[borrows(record_table)] + records: ( + MultimapRange<'this, RecordsId<'static>, RecordsValue<'static>>, + Option<( + AccessGuard<'this, RecordsId<'static>>, + MultimapValue<'this, RecordsValue<'static>>, + RecordIdentifier, + )>, + ), + filter: RangeFilter, +} + +#[derive(Debug)] +enum RangeFilter { + None, + Prefix(Vec), + Key(Vec), +} + +impl RangeFilter { + fn matches(&self, id: &RecordIdentifier) -> bool { + match self { + RangeFilter::None => true, + RangeFilter::Prefix(ref prefix) => id.key().starts_with(prefix), + RangeFilter::Key(ref key) => id.key() == key, + } + } +} + +impl std::fmt::Debug for RangeAllIterator<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("RangeAllIterator").finish_non_exhaustive() + } +} + +/// Advance the internal iterator to the next set of multimap values +fn next_iter(fields: &mut BorrowedMutFields) -> Result<()> { + for next_iter in fields.records.0.by_ref() { + let (id_guard, values_guard) = next_iter?; + let (namespace, author, key) = id_guard.value(); + let id = RecordIdentifier::from_parts(key, namespace, author)?; + if fields.filter.matches(&id) { + fields.records.1 = Some((id_guard, values_guard, id)); + return Ok(()); + } + } + Ok(()) +} + +impl Iterator for RangeAllIterator<'_> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + self.with_mut(|mut fields| { + loop { + if fields.records.1.is_none() { + if let Err(err) = next_iter(&mut fields) { + return Some(Err(err)); + } + } + // If this is None, nothing is available anymore + let (_id_guard, values_guard, id) = fields.records.1.as_mut()?; + + match values_guard.next() { + Some(Ok(value)) => { + let (timestamp, namespace_sig, author_sig, len, hash) = value.value(); + let record = Record::new(timestamp, len, hash.into()); + let entry = Entry::new(id.clone(), record); + let entry_signature = EntrySignature::from_parts(namespace_sig, author_sig); + let signed_entry = SignedEntry::new(entry_signature, entry); + return Some(Ok((id.clone(), signed_entry))); + } + Some(Err(err)) => return Some(Err(err.into())), + None => { + // clear the current + fields.records.1 = None; + } + } + } + }) + } +} + +#[cfg(test)] +mod tests { + use crate::ranger::Store as _; + use crate::store::Store as _; + + use super::*; + + #[test] + fn test_basics() -> Result<()> { + let dbfile = tempfile::NamedTempFile::new()?; + let store = Store::new(dbfile.path())?; + + let author = store.new_author(&mut rand::thread_rng())?; + let namespace = Namespace::new(&mut rand::thread_rng()); + let replica = store.new_replica(namespace.clone())?; + + let replica_back = store.get_replica(&namespace.id())?.unwrap(); + assert_eq!( + replica.namespace().as_bytes(), + replica_back.namespace().as_bytes() + ); + + let author_back = store.get_author(&author.id())?.unwrap(); + assert_eq!(author.to_bytes(), author_back.to_bytes(),); + + let mut wrapper = StoreInstance::new(namespace.id(), store.clone()); + for i in 0..5 { + let id = RecordIdentifier::new(format!("hello-{i}"), namespace.id(), author.id()); + let entry = Entry::new( + id.clone(), + Record::from_data(format!("world-{i}"), namespace.id()), + ); + let entry = SignedEntry::from_entry(entry, &namespace, &author); + wrapper.put(id, entry)?; + } + + // all + let all: Vec<_> = wrapper.all()?.collect(); + assert_eq!(all.len(), 5); + + // add a second version + for i in 0..5 { + let id = RecordIdentifier::new(format!("hello-{i}"), namespace.id(), author.id()); + let entry = Entry::new( + id.clone(), + Record::from_data(format!("world-{i}-2"), namespace.id()), + ); + let entry = SignedEntry::from_entry(entry, &namespace, &author); + wrapper.put(id, entry)?; + } + + // get all + let entries = store.get_all(namespace.id())?.collect::>>()?; + assert_eq!(entries.len(), 10); + + // get all prefix + let entries = store + .get_all_by_prefix(namespace.id(), "hello-")? + .collect::>>()?; + assert_eq!(entries.len(), 10); + + // get latest + let entries = store + .get_latest(namespace.id())? + .collect::>>()?; + assert_eq!(entries.len(), 5); + + // get latest by prefix + let entries = store + .get_latest_by_prefix(namespace.id(), "hello-")? + .collect::>>()?; + assert_eq!(entries.len(), 5); + + // delete and get + for i in 0..5 { + let id = RecordIdentifier::new(format!("hello-{i}"), namespace.id(), author.id()); + let res = wrapper.get(&id)?; + assert!(res.is_some()); + let out = wrapper.remove(&id)?; + assert_eq!(out.len(), 2); + for val in out { + assert_eq!(val.entry().id(), &id); + } + let res = wrapper.get(&id)?; + assert!(res.is_none()); + } + + // get latest + let entries = store + .get_latest(namespace.id())? + .collect::>>()?; + assert_eq!(entries.len(), 0); + + Ok(()) + } +} diff --git a/iroh-sync/src/store/memory.rs b/iroh-sync/src/store/memory.rs new file mode 100644 index 0000000000..b10213f9ce --- /dev/null +++ b/iroh-sync/src/store/memory.rs @@ -0,0 +1,502 @@ +//! In memory storage for replicas. + +use std::{ + collections::{BTreeMap, HashMap}, + convert::Infallible, + sync::Arc, +}; + +use anyhow::Result; +use parking_lot::{RwLock, RwLockReadGuard}; +use rand_core::CryptoRngCore; + +use crate::{ + ranger::{AsFingerprint, Fingerprint, Range, RangeKey}, + sync::{Author, AuthorId, Namespace, NamespaceId, RecordIdentifier, Replica, SignedEntry}, +}; + +/// Manages the replicas and authors for an instance. +#[derive(Debug, Clone, Default)] +pub struct Store { + replicas: Arc>>>, + authors: Arc>>, + /// Stores records by namespace -> identifier + timestamp + replica_records: Arc>, +} + +type ReplicaRecordsOwned = + HashMap>>; + +impl super::Store for Store { + type Instance = ReplicaStoreInstance; + type GetLatestIter<'a> = GetLatestIter<'a>; + type GetAllIter<'a> = GetAllIter<'a>; + + fn get_replica(&self, namespace: &NamespaceId) -> Result>> { + let replicas = &*self.replicas.read(); + Ok(replicas.get(namespace).cloned()) + } + + fn get_author(&self, author: &AuthorId) -> Result> { + let authors = &*self.authors.read(); + Ok(authors.get(author).cloned()) + } + + fn new_author(&self, rng: &mut R) -> Result { + let author = Author::new(rng); + self.authors.write().insert(author.id(), author.clone()); + Ok(author) + } + + fn new_replica(&self, namespace: Namespace) -> Result> { + let id = namespace.id(); + let replica = Replica::new(namespace, ReplicaStoreInstance::new(id, self.clone())); + self.replicas + .write() + .insert(replica.namespace(), replica.clone()); + Ok(replica) + } + + fn get_latest_by_key_and_author( + &self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]>, + ) -> Result> { + let inner = self.replica_records.read(); + + let value = inner + .get(&namespace) + .and_then(|records| records.get(&RecordIdentifier::new(key, namespace, author))) + .and_then(|values| values.last_key_value()); + + Ok(value.map(|(_, v)| v.clone())) + } + + fn get_latest_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let key = key.as_ref().to_vec(); + let filter = GetFilter::Key { namespace, key }; + + Ok(GetLatestIter { + records, + filter, + index: 0, + }) + } + + fn get_latest_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let prefix = prefix.as_ref().to_vec(); + let filter = GetFilter::Prefix { namespace, prefix }; + + Ok(GetLatestIter { + records, + filter, + index: 0, + }) + } + + fn get_latest(&self, namespace: NamespaceId) -> Result> { + let records = self.replica_records.read(); + let filter = GetFilter::All { namespace }; + + Ok(GetLatestIter { + records, + filter, + index: 0, + }) + } + + fn get_all_by_key_and_author<'a, 'b: 'a>( + &'a self, + namespace: NamespaceId, + author: AuthorId, + key: impl AsRef<[u8]> + 'b, + ) -> Result> { + let records = self.replica_records.read(); + let record_id = RecordIdentifier::new(key, namespace, author); + let filter = GetFilter::KeyAuthor(record_id); + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } + + fn get_all_by_key( + &self, + namespace: NamespaceId, + key: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let key = key.as_ref().to_vec(); + let filter = GetFilter::Key { namespace, key }; + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } + + fn get_all_by_prefix( + &self, + namespace: NamespaceId, + prefix: impl AsRef<[u8]>, + ) -> Result> { + let records = self.replica_records.read(); + let prefix = prefix.as_ref().to_vec(); + let filter = GetFilter::Prefix { namespace, prefix }; + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } + + fn get_all(&self, namespace: NamespaceId) -> Result> { + let records = self.replica_records.read(); + let filter = GetFilter::All { namespace }; + + Ok(GetAllIter { + records, + filter, + index: 0, + }) + } +} +#[derive(Debug)] +enum GetFilter { + /// All entries. + All { namespace: NamespaceId }, + /// Filter by key and author. + KeyAuthor(RecordIdentifier), + /// Filter by key only. + Key { + namespace: NamespaceId, + key: Vec, + }, + /// Filter by prefix only. + Prefix { + namespace: NamespaceId, + prefix: Vec, + }, +} + +impl GetFilter { + fn namespace(&self) -> NamespaceId { + match self { + GetFilter::All { namespace } => *namespace, + GetFilter::KeyAuthor(ref r) => r.namespace(), + GetFilter::Key { namespace, .. } => *namespace, + GetFilter::Prefix { namespace, .. } => *namespace, + } + } +} + +#[derive(Debug)] +pub struct GetLatestIter<'a> { + records: ReplicaRecords<'a>, + filter: GetFilter, + /// Current iteration index. + index: usize, +} + +impl<'a> Iterator for GetLatestIter<'a> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + let records = self.records.get(&self.filter.namespace())?; + let res = match self.filter { + GetFilter::All { namespace } => records + .iter() + .filter(|(k, _)| k.namespace() == namespace) + .filter_map(|(key, value)| { + value + .last_key_value() + .map(|(_, v)| (key.clone(), v.clone())) + }) + .nth(self.index)?, + GetFilter::KeyAuthor(ref record_id) => { + let values = records.get(record_id)?; + let (_, res) = values.iter().nth(self.index)?; + (record_id.clone(), res.clone()) + } + GetFilter::Key { namespace, ref key } => records + .iter() + .filter(|(k, _)| k.key() == key && k.namespace() == namespace) + .filter_map(|(key, value)| { + value + .last_key_value() + .map(|(_, v)| (key.clone(), v.clone())) + }) + .nth(self.index)?, + GetFilter::Prefix { + namespace, + ref prefix, + } => records + .iter() + .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == namespace) + .filter_map(|(key, value)| { + value + .last_key_value() + .map(|(_, v)| (key.clone(), v.clone())) + }) + .nth(self.index)?, + }; + self.index += 1; + Some(Ok(res)) + } +} + +#[derive(Debug)] +pub struct GetAllIter<'a> { + records: ReplicaRecords<'a>, + filter: GetFilter, + /// Current iteration index. + index: usize, +} + +impl<'a> Iterator for GetAllIter<'a> { + type Item = Result<(RecordIdentifier, SignedEntry)>; + + fn next(&mut self) -> Option { + let records = self.records.get(&self.filter.namespace())?; + let res = match self.filter { + GetFilter::All { namespace } => records + .iter() + .filter(|(k, _)| k.namespace() == namespace) + .flat_map(|(key, value)| { + value.iter().map(|(_, value)| (key.clone(), value.clone())) + }) + .nth(self.index)?, + GetFilter::KeyAuthor(ref record_id) => { + let values = records.get(record_id)?; + let (_, value) = values.iter().nth(self.index)?; + (record_id.clone(), value.clone()) + } + GetFilter::Key { namespace, ref key } => records + .iter() + .filter(|(k, _)| k.key() == key && k.namespace() == namespace) + .flat_map(|(key, value)| { + value.iter().map(|(_, value)| (key.clone(), value.clone())) + }) + .nth(self.index)?, + GetFilter::Prefix { + namespace, + ref prefix, + } => records + .iter() + .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == namespace) + .flat_map(|(key, value)| { + value.iter().map(|(_, value)| (key.clone(), value.clone())) + }) + .nth(self.index)?, + }; + self.index += 1; + Some(Ok(res)) + } +} + +#[derive(Debug, Clone)] +pub struct ReplicaStoreInstance { + namespace: NamespaceId, + store: Store, +} + +impl ReplicaStoreInstance { + fn new(namespace: NamespaceId, store: Store) -> Self { + ReplicaStoreInstance { namespace, store } + } + + fn with_records(&self, f: F) -> T + where + F: FnOnce(Option<&BTreeMap>>) -> T, + { + let guard = self.store.replica_records.read(); + let value = guard.get(&self.namespace); + f(value) + } + + fn with_records_mut(&self, f: F) -> T + where + F: FnOnce(Option<&mut BTreeMap>>) -> T, + { + let mut guard = self.store.replica_records.write(); + let value = guard.get_mut(&self.namespace); + f(value) + } + + fn with_records_mut_with_default(&self, f: F) -> T + where + F: FnOnce(&mut BTreeMap>) -> T, + { + let mut guard = self.store.replica_records.write(); + let value = guard.entry(self.namespace).or_default(); + f(value) + } + + fn records_iter(&self) -> RecordsIter<'_> { + RecordsIter { + namespace: self.namespace, + replica_records: self.store.replica_records.read(), + i: 0, + } + } +} + +type ReplicaRecords<'a> = RwLockReadGuard< + 'a, + HashMap>>, +>; + +#[derive(Debug)] +struct RecordsIter<'a> { + namespace: NamespaceId, + replica_records: ReplicaRecords<'a>, + i: usize, +} + +impl Iterator for RecordsIter<'_> { + type Item = (RecordIdentifier, BTreeMap); + + fn next(&mut self) -> Option { + let records = self.replica_records.get(&self.namespace)?; + let (key, value) = records.iter().nth(self.i)?; + self.i += 1; + Some((key.clone(), value.clone())) + } +} + +impl crate::ranger::Store for ReplicaStoreInstance { + type Error = Infallible; + + /// Get a the first key (or the default if none is available). + fn get_first(&self) -> Result { + Ok(self.with_records(|records| { + records + .and_then(|r| r.first_key_value().map(|(k, _)| k.clone())) + .unwrap_or_default() + })) + } + + fn get(&self, key: &RecordIdentifier) -> Result, Self::Error> { + Ok(self.with_records(|records| { + records + .and_then(|r| r.get(key)) + .and_then(|values| values.last_key_value()) + .map(|(_, v)| v.clone()) + })) + } + + fn len(&self) -> Result { + Ok(self.with_records(|records| records.map(|v| v.len()).unwrap_or_default())) + } + + fn is_empty(&self) -> Result { + Ok(self.len()? == 0) + } + + fn get_fingerprint( + &self, + range: &Range, + limit: Option<&Range>, + ) -> Result { + let elements = self.get_range(range.clone(), limit.cloned())?; + let mut fp = Fingerprint::empty(); + for el in elements { + let el = el?; + fp ^= el.0.as_fingerprint(); + } + + Ok(fp) + } + + fn put(&mut self, k: RecordIdentifier, v: SignedEntry) -> Result<(), Self::Error> { + // TODO: propagate error/not insertion? + if v.verify().is_ok() { + let timestamp = v.entry().record().timestamp(); + // TODO: verify timestamp is "reasonable" + + self.with_records_mut_with_default(|records| { + records.entry(k).or_default().insert(timestamp, v); + }); + } + Ok(()) + } + + type RangeIterator<'a> = RangeIterator<'a>; + fn get_range( + &self, + range: Range, + limit: Option>, + ) -> Result, Self::Error> { + Ok(RangeIterator { + iter: self.records_iter(), + range: Some(range), + limit, + }) + } + + fn remove(&mut self, key: &RecordIdentifier) -> Result, Self::Error> { + let res = self.with_records_mut(|records| { + records + .and_then(|records| records.remove(key).map(|v| v.into_values().collect())) + .unwrap_or_default() + }); + Ok(res) + } + + type AllIterator<'a> = RangeIterator<'a>; + + fn all(&self) -> Result, Self::Error> { + Ok(RangeIterator { + iter: self.records_iter(), + range: None, + limit: None, + }) + } +} + +#[derive(Debug)] +pub struct RangeIterator<'a> { + iter: RecordsIter<'a>, + range: Option>, + limit: Option>, +} + +impl RangeIterator<'_> { + fn matches(&self, x: &RecordIdentifier) -> bool { + let range = self.range.as_ref().map(|r| x.contains(r)).unwrap_or(true); + let limit = self.limit.as_ref().map(|r| x.contains(r)).unwrap_or(true); + range && limit + } +} + +impl Iterator for RangeIterator<'_> { + type Item = Result<(RecordIdentifier, SignedEntry), Infallible>; + + fn next(&mut self) -> Option { + let mut next = self.iter.next()?; + loop { + if self.matches(&next.0) { + let (k, mut values) = next; + let (_, v) = values.pop_last()?; + return Some(Ok((k, v))); + } + + next = self.iter.next()?; + } + } +} diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 2faf711ec5..7ac32f7522 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -6,7 +6,6 @@ use std::{ cmp::Ordering, - collections::{BTreeMap, HashMap}, fmt::{Debug, Display}, str::FromStr, sync::Arc, @@ -15,20 +14,18 @@ use std::{ use parking_lot::RwLock; -use bytes::Bytes; use ed25519_dalek::{Signature, SignatureError, Signer, SigningKey, VerifyingKey}; use iroh_bytes::Hash; use rand_core::CryptoRngCore; use serde::{Deserialize, Serialize}; -use crate::ranger::{AsFingerprint, Fingerprint, Peer, Range, RangeKey}; +use crate::ranger::{self, AsFingerprint, Fingerprint, Peer, RangeKey}; pub type ProtocolMessage = crate::ranger::Message; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Author { priv_key: SigningKey, - id: AuthorId, } impl Display for Author { @@ -40,17 +37,26 @@ impl Display for Author { impl Author { pub fn new(rng: &mut R) -> Self { let priv_key = SigningKey::generate(rng); - let id = AuthorId(priv_key.verifying_key()); - Author { priv_key, id } + Author { priv_key } } pub fn from_bytes(bytes: &[u8; 32]) -> Self { SigningKey::from_bytes(bytes).into() } - pub fn id(&self) -> &AuthorId { - &self.id + /// Returns the Author byte representation. + pub fn to_bytes(&self) -> [u8; 32] { + self.priv_key.to_bytes() + } + + /// Returns the AuthorId byte representation. + pub fn id_bytes(&self) -> [u8; 32] { + self.priv_key.verifying_key().to_bytes() + } + + pub fn id(&self) -> AuthorId { + AuthorId(self.priv_key.verifying_key()) } pub fn sign(&self, msg: &[u8]) -> Signature { @@ -58,7 +64,7 @@ impl Author { } pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { - self.id.verify(msg, signature) + self.priv_key.verify_strict(msg, signature) } } @@ -85,12 +91,15 @@ impl AuthorId { pub fn as_bytes(&self) -> &[u8; 32] { self.0.as_bytes() } + + pub fn from_bytes(bytes: &[u8; 32]) -> anyhow::Result { + Ok(AuthorId(VerifyingKey::from_bytes(bytes)?)) + } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Namespace { priv_key: SigningKey, - id: NamespaceId, } impl Display for Namespace { @@ -105,8 +114,8 @@ impl FromStr for Namespace { fn from_str(s: &str) -> Result { let priv_key: [u8; 32] = hex::decode(s).map_err(|_| ())?.try_into().map_err(|_| ())?; let priv_key = SigningKey::from_bytes(&priv_key); - let id = NamespaceId(priv_key.verifying_key()); - Ok(Namespace { priv_key, id }) + + Ok(Namespace { priv_key }) } } @@ -116,39 +125,46 @@ impl FromStr for Author { fn from_str(s: &str) -> Result { let priv_key: [u8; 32] = hex::decode(s).map_err(|_| ())?.try_into().map_err(|_| ())?; let priv_key = SigningKey::from_bytes(&priv_key); - let id = AuthorId(priv_key.verifying_key()); - Ok(Author { priv_key, id }) + + Ok(Author { priv_key }) } } impl From for Author { fn from(priv_key: SigningKey) -> Self { - let id = AuthorId(priv_key.verifying_key()); - Self { priv_key, id } + Self { priv_key } } } impl From for Namespace { fn from(priv_key: SigningKey) -> Self { - let id = NamespaceId(priv_key.verifying_key()); - Self { priv_key, id } + Self { priv_key } } } impl Namespace { pub fn new(rng: &mut R) -> Self { let priv_key = SigningKey::generate(rng); - let id = NamespaceId(priv_key.verifying_key()); - Namespace { priv_key, id } + Namespace { priv_key } } pub fn from_bytes(bytes: &[u8; 32]) -> Self { SigningKey::from_bytes(bytes).into() } - pub fn id(&self) -> &NamespaceId { - &self.id + /// Returns the Namespace byte representation. + pub fn to_bytes(&self) -> [u8; 32] { + self.priv_key.to_bytes() + } + + /// Returns the NamespaceId byte representation. + pub fn id_bytes(&self) -> [u8; 32] { + self.priv_key.verifying_key().to_bytes() + } + + pub fn id(&self) -> NamespaceId { + NamespaceId(self.priv_key.verifying_key()) } pub fn sign(&self, msg: &[u8]) -> Signature { @@ -156,7 +172,7 @@ impl Namespace { } pub fn verify(&self, msg: &[u8], signature: &Signature) -> Result<(), SignatureError> { - self.id.verify(msg, signature) + self.priv_key.verify_strict(msg, signature) } } @@ -183,46 +199,9 @@ impl NamespaceId { pub fn as_bytes(&self) -> &[u8; 32] { self.0.as_bytes() } -} - -/// Manages the replicas and authors for an instance. -#[derive(Debug, Clone, Default)] -pub struct ReplicaStore { - replicas: Arc>>, - authors: Arc>>, -} - -impl ReplicaStore { - pub fn get_replica(&self, namespace: &NamespaceId) -> Option { - let replicas = &*self.replicas.read(); - replicas.get(namespace).cloned() - } - - pub fn get_author(&self, author: &AuthorId) -> Option { - let authors = &*self.authors.read(); - authors.get(author).cloned() - } - pub fn new_author(&self, rng: &mut R) -> Author { - let author = Author::new(rng); - self.authors.write().insert(*author.id(), author.clone()); - author - } - - pub fn new_replica(&self, namespace: Namespace) -> Replica { - let replica = Replica::new(namespace); - self.replicas - .write() - .insert(replica.namespace(), replica.clone()); - replica - } - - pub fn open_replica(&self, bytes: &[u8]) -> anyhow::Result { - let replica = Replica::from_bytes(bytes)?; - self.replicas - .write() - .insert(replica.namespace(), replica.clone()); - Ok(replica) + pub fn from_bytes(bytes: &[u8; 32]) -> anyhow::Result { + Ok(NamespaceId(VerifyingKey::from_bytes(bytes)?)) } } @@ -239,109 +218,16 @@ pub enum InsertOrigin { } #[derive(derive_more::Debug, Clone)] -pub struct Replica { - inner: Arc>, +pub struct Replica> { + inner: Arc>>, #[debug("on_insert: [Box; {}]", "self.on_insert.len()")] on_insert: Arc>>, } #[derive(derive_more::Debug)] -struct InnerReplica { +struct InnerReplica> { namespace: Namespace, - peer: Peer, -} - -#[derive(Default, Debug, Clone)] -pub struct Store { - /// Stores records by identifier + timestamp - records: BTreeMap>, -} - -impl Store { - pub fn latest(&self) -> impl Iterator { - self.records.iter().filter_map(|(k, values)| { - let (_, v) = values.last_key_value()?; - Some((k, v)) - }) - } -} - -impl crate::ranger::Store for Store { - /// Get a the first key (or the default if none is available). - fn get_first(&self) -> RecordIdentifier { - self.records - .first_key_value() - .map(|(k, _)| k.clone()) - .unwrap_or_default() - } - - fn get(&self, key: &RecordIdentifier) -> Option<&SignedEntry> { - self.records - .get(key) - .and_then(|values| values.last_key_value()) - .map(|(_, v)| v) - } - - fn len(&self) -> usize { - self.records.len() - } - - fn is_empty(&self) -> bool { - self.records.is_empty() - } - - fn get_fingerprint( - &self, - range: &Range, - limit: Option<&Range>, - ) -> Fingerprint { - let elements = self.get_range(range.clone(), limit.cloned()); - let mut fp = Fingerprint::empty(); - for el in elements { - fp ^= el.0.as_fingerprint(); - } - - fp - } - - fn put(&mut self, k: RecordIdentifier, v: SignedEntry) { - // TODO: propagate error/not insertion? - if v.verify().is_ok() { - let timestamp = v.entry().record().timestamp(); - // TODO: verify timestamp is "reasonable" - - self.records.entry(k).or_default().insert(timestamp, v); - } - } - - type RangeIterator<'a> = RangeIterator<'a>; - fn get_range( - &self, - range: Range, - limit: Option>, - ) -> Self::RangeIterator<'_> { - RangeIterator { - iter: self.records.iter(), - range: Some(range), - limit, - } - } - - fn remove(&mut self, key: &RecordIdentifier) -> Option { - self.records - .remove(key) - .and_then(|mut v| v.last_entry().map(|e| e.remove_entry().1)) - } - - type AllIterator<'a> = RangeIterator<'a>; - - fn all(&self) -> Self::AllIterator<'_> { - RangeIterator { - iter: self.records.iter(), - range: None, - limit: None, - } - } + peer: Peer, } #[derive(Debug, Serialize, Deserialize)] @@ -350,44 +236,13 @@ struct ReplicaData { namespace: Namespace, } -#[derive(Debug)] -pub struct RangeIterator<'a> { - iter: std::collections::btree_map::Iter<'a, RecordIdentifier, BTreeMap>, - range: Option>, - limit: Option>, -} - -impl<'a> RangeIterator<'a> { - fn matches(&self, x: &RecordIdentifier) -> bool { - let range = self.range.as_ref().map(|r| x.contains(r)).unwrap_or(true); - let limit = self.limit.as_ref().map(|r| x.contains(r)).unwrap_or(true); - range && limit - } -} - -impl<'a> Iterator for RangeIterator<'a> { - type Item = (&'a RecordIdentifier, &'a SignedEntry); - - fn next(&mut self) -> Option { - let mut next = self.iter.next()?; - loop { - if self.matches(next.0) { - let (k, values) = next; - let (_, v) = values.last_key_value()?; - return Some((k, v)); - } - - next = self.iter.next()?; - } - } -} - -impl Replica { - pub fn new(namespace: Namespace) -> Self { +impl> Replica { + // TODO: check that read only replicas are possible + pub fn new(namespace: Namespace, store: S) -> Self { Replica { inner: Arc::new(RwLock::new(InnerReplica { namespace, - peer: Peer::default(), + peer: Peer::from_store(store), })), on_insert: Default::default(), } @@ -398,56 +253,14 @@ impl Replica { on_insert.push(callback); } - // TODO: not horrible - pub fn all(&self) -> Vec<(RecordIdentifier, SignedEntry)> { - self.inner - .read() - .peer - .all() - .map(|(k, v)| (k.clone(), v.clone())) - .collect() - } - - // TODO: not horrible - pub fn all_for_key(&self, key: impl AsRef<[u8]>) -> Vec<(RecordIdentifier, SignedEntry)> { - self.all() - .into_iter() - .filter(|(id, _entry)| id.key() == key.as_ref()) - .collect() - } - - // TODO: not horrible - pub fn all_with_key_prefix( - &self, - prefix: impl AsRef<[u8]>, - ) -> Vec<(RecordIdentifier, SignedEntry)> { - self.all() - .into_iter() - .filter(|(id, _entry)| id.key().starts_with(prefix.as_ref())) - .collect() - } - - pub fn to_bytes(&self) -> anyhow::Result { - let entries = self.all().into_iter().map(|(_id, entry)| entry).collect(); - let data = ReplicaData { - entries, - namespace: self.inner.read().namespace.clone(), - }; - let bytes = postcard::to_stdvec(&data)?; - Ok(bytes.into()) - } - - pub fn from_bytes(bytes: &[u8]) -> anyhow::Result { - let data: ReplicaData = postcard::from_bytes(bytes)?; - let replica = Self::new(data.namespace); - for entry in data.entries { - replica.insert_remote_entry(entry)?; - } - Ok(replica) - } - /// Inserts a new record at the given key. - pub fn insert(&self, key: impl AsRef<[u8]>, author: &Author, hash: Hash, len: u64) { + pub fn insert( + &self, + key: impl AsRef<[u8]>, + author: &Author, + hash: Hash, + len: u64, + ) -> Result<(), S::Error> { let mut inner = self.inner.write(); let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); @@ -456,12 +269,13 @@ impl Replica { // Store signed entries let entry = Entry::new(id.clone(), record); let signed_entry = entry.sign(&inner.namespace, author); - inner.peer.put(id, signed_entry.clone()); + inner.peer.put(id, signed_entry.clone())?; drop(inner); let on_insert = self.on_insert.read(); for cb in &*on_insert { cb(InsertOrigin::Local, signed_entry.clone()); } + Ok(()) } /// Hashes the given data and inserts it. @@ -473,24 +287,23 @@ impl Replica { key: impl AsRef<[u8]>, author: &Author, data: impl AsRef<[u8]>, - ) -> Hash { + ) -> Result { let len = data.as_ref().len() as u64; let hash = Hash::new(data); - self.insert(key, author, hash, len); - hash + self.insert(key, author, hash, len)?; + Ok(hash) } pub fn id(&self, key: impl AsRef<[u8]>, author: &Author) -> RecordIdentifier { let inner = self.inner.read(); - let id = RecordIdentifier::new(key, inner.namespace.id(), author.id()); - id + RecordIdentifier::new(key, inner.namespace.id(), author.id()) } pub fn insert_remote_entry(&self, entry: SignedEntry) -> anyhow::Result<()> { entry.verify()?; let mut inner = self.inner.write(); let id = entry.entry.id.clone(); - inner.peer.put(id, entry.clone()); + inner.peer.put(id, entry.clone()).map_err(Into::into)?; drop(inner); let on_insert = self.on_insert.read(); for cb in &*on_insert { @@ -499,140 +312,16 @@ impl Replica { Ok(()) } - /// Gets all entries matching this key and author. - pub fn get_latest_by_key_and_author( + pub fn sync_initial_message( &self, - key: impl AsRef<[u8]>, - author: &AuthorId, - ) -> Option { - let inner = self.inner.read(); - inner - .peer - .get(&RecordIdentifier::new(key, inner.namespace.id(), author)) - .cloned() - } - - /// Returns the latest version of the matching documents by key. - pub fn get_latest_by_key(&self, key: impl AsRef<[u8]>) -> GetLatestIter<'_> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let key = key.as_ref().to_vec(); - let namespace = *guard.namespace.id(); - let filter = GetFilter::Key { namespace, key }; - - GetLatestIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - /// Returns the latest version of the matching documents by prefix. - pub fn get_latest_by_prefix(&self, prefix: impl AsRef<[u8]>) -> GetLatestIter<'_> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let prefix = prefix.as_ref().to_vec(); - let namespace = *guard.namespace.id(); - let filter = GetFilter::Prefix { namespace, prefix }; - - GetLatestIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - /// Returns the latest versions of all documents. - pub fn get_latest(&self) -> GetLatestIter<'_> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let namespace = *guard.namespace.id(); - let filter = GetFilter::All { namespace }; - - GetLatestIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - /// Returns all versions of the matching documents by author. - pub fn get_all_by_key_and_author<'a, 'b: 'a>( - &'a self, - key: impl AsRef<[u8]> + 'b, - author: &AuthorId, - ) -> GetAllIter<'a> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let record_id = RecordIdentifier::new(key, guard.namespace.id(), author); - let filter = GetFilter::KeyAuthor(record_id); - - GetAllIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - /// Returns all versions of the matching documents by key. - pub fn get_all_by_key(&self, key: impl AsRef<[u8]>) -> GetAllIter<'_> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let key = key.as_ref().to_vec(); - let namespace = *guard.namespace.id(); - let filter = GetFilter::Key { namespace, key }; - - GetAllIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - /// Returns all versions of the matching documents by prefix. - pub fn get_all_by_prefix(&self, prefix: impl AsRef<[u8]>) -> GetAllIter<'_> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let prefix = prefix.as_ref().to_vec(); - let namespace = *guard.namespace.id(); - let filter = GetFilter::Prefix { namespace, prefix }; - - GetAllIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - /// Returns all versions of all documents. - pub fn get_all(&self) -> GetAllIter<'_> { - let guard: parking_lot::lock_api::RwLockReadGuard<_, _> = self.inner.read(); - let namespace = *guard.namespace.id(); - let filter = GetFilter::All { namespace }; - - GetAllIter { - records: parking_lot::lock_api::RwLockReadGuard::map(guard, move |inner| { - &inner.peer.store().records - }), - filter, - index: 0, - } - } - - pub fn sync_initial_message(&self) -> crate::ranger::Message { + ) -> Result, S::Error> { self.inner.read().peer.initial_message() } pub fn sync_process_message( &self, message: crate::ranger::Message, - ) -> Option> { + ) -> Result>, S::Error> { let reply = self .inner .write() @@ -642,152 +331,13 @@ impl Replica { for cb in &*on_insert { cb(InsertOrigin::Sync, entry.clone()); } - }); + })?; - reply + Ok(reply) } pub fn namespace(&self) -> NamespaceId { - *self.inner.read().namespace.id() - } -} - -#[derive(Debug)] -pub enum GetFilter { - /// All entries. - All { namespace: NamespaceId }, - /// Filter by key and author. - KeyAuthor(RecordIdentifier), - /// Filter by key only. - Key { - namespace: NamespaceId, - key: Vec, - }, - /// Filter by prefix only. - Prefix { - namespace: NamespaceId, - prefix: Vec, - }, -} - -#[derive(Debug)] -pub struct GetLatestIter<'a> { - // Oh my god, rust why u do this to me? - records: parking_lot::lock_api::MappedRwLockReadGuard< - 'a, - parking_lot::RawRwLock, - BTreeMap>, - >, - filter: GetFilter, - /// Current iteration index. - index: usize, -} - -impl<'a> Iterator for GetLatestIter<'a> { - type Item = SignedEntry; - - fn next(&mut self) -> Option { - let res = match self.filter { - GetFilter::All { namespace } => { - let (_, res) = self - .records - .iter() - .filter(|(k, _)| k.namespace() == &namespace) - .filter_map(|(_key, value)| value.last_key_value()) - .nth(self.index)?; - res.clone() - } - GetFilter::KeyAuthor(ref record_id) => { - let values = self.records.get(record_id)?; - let (_, res) = values.iter().nth(self.index)?; - res.clone() - } - GetFilter::Key { namespace, ref key } => { - let (_, res) = self - .records - .iter() - .filter(|(k, _)| k.key() == key && k.namespace() == &namespace) - .filter_map(|(_key, value)| value.last_key_value()) - .nth(self.index)?; - res.clone() - } - GetFilter::Prefix { - namespace, - ref prefix, - } => { - let (_, res) = self - .records - .iter() - .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == &namespace) - .filter_map(|(_key, value)| value.last_key_value()) - .nth(self.index)?; - res.clone() - } - }; - self.index += 1; - Some(res) - } -} - -#[derive(Debug)] -pub struct GetAllIter<'a> { - // Oh my god, rust why u do this to me? - records: parking_lot::lock_api::MappedRwLockReadGuard< - 'a, - parking_lot::RawRwLock, - BTreeMap>, - >, - filter: GetFilter, - /// Current iteration index. - index: usize, -} - -impl<'a> Iterator for GetAllIter<'a> { - type Item = (RecordIdentifier, u64, SignedEntry); - - fn next(&mut self) -> Option { - let res = match self.filter { - GetFilter::All { namespace } => self - .records - .iter() - .filter(|(k, _)| k.namespace() == &namespace) - .flat_map(|(key, value)| { - value - .iter() - .map(|(t, value)| (key.clone(), *t, value.clone())) - }) - .nth(self.index)?, - GetFilter::KeyAuthor(ref record_id) => { - let values = self.records.get(record_id)?; - let (t, value) = values.iter().nth(self.index)?; - (record_id.clone(), *t, value.clone()) - } - GetFilter::Key { namespace, ref key } => self - .records - .iter() - .filter(|(k, _)| k.key() == key && k.namespace() == &namespace) - .flat_map(|(key, value)| { - value - .iter() - .map(|(t, value)| (key.clone(), *t, value.clone())) - }) - .nth(self.index)?, - GetFilter::Prefix { - namespace, - ref prefix, - } => self - .records - .iter() - .filter(|(k, _)| k.key().starts_with(prefix) && k.namespace() == &namespace) - .flat_map(|(key, value)| { - value - .iter() - .map(|(t, value)| (key.clone(), *t, value.clone())) - }) - .nth(self.index)?, - }; - self.index += 1; - Some(res) + self.inner.read().namespace.id() } } @@ -799,6 +349,10 @@ pub struct SignedEntry { } impl SignedEntry { + pub fn new(signature: EntrySignature, entry: Entry) -> Self { + SignedEntry { signature, entry } + } + pub fn from_entry(entry: Entry, namespace: &Namespace, author: &Author) -> Self { let signature = EntrySignature::from_entry(&entry, namespace, author); SignedEntry { signature, entry } @@ -855,6 +409,24 @@ impl EntrySignature { Ok(()) } + + pub fn from_parts(namespace_sig: &[u8; 64], author_sig: &[u8; 64]) -> Self { + let namespace_signature = Signature::from_bytes(namespace_sig); + let author_signature = Signature::from_bytes(author_sig); + + EntrySignature { + author_signature, + namespace_signature, + } + } + + pub fn author_signature(&self) -> &Signature { + &self.author_signature + } + + pub fn namespace_signature(&self) -> &Signature { + &self.namespace_signature + } } /// A single entry in a replica. @@ -954,14 +526,22 @@ impl RangeKey for RecordIdentifier { } impl RecordIdentifier { - pub fn new(key: impl AsRef<[u8]>, namespace: &NamespaceId, author: &AuthorId) -> Self { + pub fn new(key: impl AsRef<[u8]>, namespace: NamespaceId, author: AuthorId) -> Self { RecordIdentifier { key: key.as_ref().to_vec(), - namespace: *namespace, - author: *author, + namespace, + author, } } + pub fn from_parts(key: &[u8], namespace: &[u8; 32], author: &[u8; 32]) -> anyhow::Result { + Ok(RecordIdentifier { + key: key.to_vec(), + namespace: NamespaceId::from_bytes(namespace)?, + author: AuthorId::from_bytes(author)?, + }) + } + pub fn as_bytes(&self, out: &mut Vec) { out.extend_from_slice(self.namespace.as_bytes()); out.extend_from_slice(self.author.as_bytes()); @@ -972,12 +552,20 @@ impl RecordIdentifier { &self.key } - pub fn namespace(&self) -> &NamespaceId { - &self.namespace + pub fn namespace(&self) -> NamespaceId { + self.namespace + } + + pub fn namespace_bytes(&self) -> &[u8; 32] { + self.namespace.as_bytes() } - pub fn author(&self) -> &AuthorId { - &self.author + pub fn author(&self) -> AuthorId { + self.author + } + + pub fn author_bytes(&self) -> &[u8; 32] { + self.author.as_bytes() } } @@ -1020,7 +608,7 @@ impl Record { } // TODO: remove - pub fn from_data(data: impl AsRef<[u8]>, namespace: &NamespaceId) -> Self { + pub fn from_data(data: impl AsRef<[u8]>, namespace: NamespaceId) -> Self { // Salted hash // TODO: do we actually want this? // TODO: this should probably use a namespace prefix if used @@ -1040,10 +628,30 @@ impl Record { #[cfg(test)] mod tests { + use anyhow::Result; + + use crate::{ranger::Range, store}; + use super::*; #[test] - fn test_basics() { + fn test_basics_memory() -> Result<()> { + let store = store::memory::Store::default(); + test_basics(store)?; + + Ok(()) + } + + #[cfg(feature = "fs-store")] + #[test] + fn test_basics_fs() -> Result<()> { + let dbfile = tempfile::NamedTempFile::new()?; + let store = store::fs::Store::new(dbfile.path())?; + test_basics(store)?; + Ok(()) + } + + fn test_basics(store: S) -> Result<()> { let mut rng = rand::thread_rng(); let alice = Author::new(&mut rng); let bob = Author::new(&mut rng); @@ -1055,94 +663,126 @@ mod tests { let signed_entry = entry.sign(&myspace, &alice); signed_entry.verify().expect("failed to verify"); - let my_replica = Replica::new(myspace); + let my_replica = store.new_replica(myspace)?; for i in 0..10 { - my_replica.hash_and_insert(format!("/{i}"), &alice, format!("{i}: hello from alice")); + my_replica + .hash_and_insert(format!("/{i}"), &alice, format!("{i}: hello from alice")) + .map_err(Into::into)?; } for i in 0..10 { - let res = my_replica - .get_latest_by_key_and_author(format!("/{i}"), alice.id()) + let res = store + .get_latest_by_key_and_author(my_replica.namespace(), alice.id(), format!("/{i}"))? .unwrap(); let len = format!("{i}: hello from alice").as_bytes().len() as u64; assert_eq!(res.entry().record().content_len(), len); - res.verify().expect("invalid signature"); + res.verify()?; } // Test multiple records for the same key - my_replica.hash_and_insert("/cool/path", &alice, "round 1"); - let _entry = my_replica - .get_latest_by_key_and_author("/cool/path", alice.id()) + my_replica + .hash_and_insert("/cool/path", &alice, "round 1") + .map_err(Into::into)?; + let _entry = store + .get_latest_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? .unwrap(); // Second - my_replica.hash_and_insert("/cool/path", &alice, "round 2"); - let _entry = my_replica - .get_latest_by_key_and_author("/cool/path", alice.id()) + my_replica + .hash_and_insert("/cool/path", &alice, "round 2") + .map_err(Into::into)?; + let _entry = store + .get_latest_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? .unwrap(); // Get All by author - let entries: Vec<_> = my_replica - .get_all_by_key_and_author("/cool/path", alice.id()) - .collect(); + let entries: Vec<_> = store + .get_all_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 2); // Get All by key - let entries: Vec<_> = my_replica.get_all_by_key(b"/cool/path").collect(); + let entries: Vec<_> = store + .get_all_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 2); // Get latest by key - let entries: Vec<_> = my_replica.get_latest_by_key(b"/cool/path").collect(); + let entries: Vec<_> = store + .get_latest_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 1); // Get latest by prefix - let entries: Vec<_> = my_replica.get_latest_by_prefix(b"/cool").collect(); + let entries: Vec<_> = store + .get_latest_by_prefix(my_replica.namespace(), b"/cool")? + .collect::>()?; assert_eq!(entries.len(), 1); // Get All - let entries: Vec<_> = my_replica.get_all().collect(); + let entries: Vec<_> = store + .get_all(my_replica.namespace())? + .collect::>()?; assert_eq!(entries.len(), 12); // Get All latest - let entries: Vec<_> = my_replica.get_latest().collect(); + let entries: Vec<_> = store + .get_latest(my_replica.namespace())? + .collect::>()?; assert_eq!(entries.len(), 11); // insert record from different author - let _entry = my_replica.hash_and_insert("/cool/path", &bob, "bob round 1"); + let _entry = my_replica + .hash_and_insert("/cool/path", &bob, "bob round 1") + .map_err(Into::into)?; // Get All by author - let entries: Vec<_> = my_replica - .get_all_by_key_and_author("/cool/path", alice.id()) - .collect(); + let entries: Vec<_> = store + .get_all_by_key_and_author(my_replica.namespace(), alice.id(), "/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 2); - let entries: Vec<_> = my_replica - .get_all_by_key_and_author("/cool/path", bob.id()) - .collect(); + let entries: Vec<_> = store + .get_all_by_key_and_author(my_replica.namespace(), bob.id(), "/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 1); // Get All by key - let entries: Vec<_> = my_replica.get_all_by_key(b"/cool/path").collect(); + let entries: Vec<_> = store + .get_all_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 3); // Get latest by key - let entries: Vec<_> = my_replica.get_latest_by_key(b"/cool/path").collect(); + let entries: Vec<_> = store + .get_latest_by_key(my_replica.namespace(), b"/cool/path")? + .collect::>()?; assert_eq!(entries.len(), 2); // Get latest by prefix - let entries: Vec<_> = my_replica.get_latest_by_prefix(b"/cool").collect(); + let entries: Vec<_> = store + .get_latest_by_prefix(my_replica.namespace(), b"/cool")? + .collect::>()?; assert_eq!(entries.len(), 2); // Get all by prefix - let entries: Vec<_> = my_replica.get_all_by_prefix(b"/cool").collect(); + let entries: Vec<_> = store + .get_all_by_prefix(my_replica.namespace(), b"/cool")? + .collect::>()?; assert_eq!(entries.len(), 3); // Get All - let entries: Vec<_> = my_replica.get_all().collect(); + let entries: Vec<_> = store + .get_all(my_replica.namespace())? + .collect::>()?; assert_eq!(entries.len(), 13); // Get All latest - let entries: Vec<_> = my_replica.get_latest().collect(); + let entries: Vec<_> = store + .get_latest(my_replica.namespace())? + .collect::>()?; assert_eq!(entries.len(), 12); + + Ok(()) } #[test] @@ -1152,10 +792,10 @@ mod tests { let k = vec!["a", "c", "z"]; let mut n: Vec<_> = (0..3).map(|_| Namespace::new(&mut rng)).collect(); - n.sort_by_key(|n| *n.id()); + n.sort_by_key(|n| n.id()); let mut a: Vec<_> = (0..3).map(|_| Author::new(&mut rng)).collect(); - a.sort_by_key(|a| *a.id()); + a.sort_by_key(|a| a.id()); // Just key { @@ -1207,54 +847,89 @@ mod tests { } #[test] - fn test_replica_sync() { + fn test_replica_sync_memory() -> Result<()> { + let alice_store = store::memory::Store::default(); + let bob_store = store::memory::Store::default(); + + test_replica_sync(alice_store, bob_store)?; + Ok(()) + } + + #[cfg(feature = "fs-store")] + #[test] + fn test_replica_sync_fs() -> Result<()> { + let alice_dbfile = tempfile::NamedTempFile::new()?; + let alice_store = store::fs::Store::new(alice_dbfile.path())?; + let bob_dbfile = tempfile::NamedTempFile::new()?; + let bob_store = store::fs::Store::new(bob_dbfile.path())?; + test_replica_sync(alice_store, bob_store)?; + + Ok(()) + } + + fn test_replica_sync(alice_store: S, bob_store: S) -> Result<()> { let alice_set = ["ape", "eel", "fox", "gnu"]; let bob_set = ["bee", "cat", "doe", "eel", "fox", "hog"]; let mut rng = rand::thread_rng(); let author = Author::new(&mut rng); let myspace = Namespace::new(&mut rng); - let mut alice = Replica::new(myspace.clone()); + let alice = alice_store.new_replica(myspace.clone())?; for el in &alice_set { - alice.hash_and_insert(el, &author, el.as_bytes()); + alice + .hash_and_insert(el, &author, el.as_bytes()) + .map_err(Into::into)?; } - let mut bob = Replica::new(myspace); + let bob = bob_store.new_replica(myspace)?; for el in &bob_set { - bob.hash_and_insert(el, &author, el.as_bytes()); + bob.hash_and_insert(el, &author, el.as_bytes()) + .map_err(Into::into)?; } - sync(&author, &mut alice, &mut bob, &alice_set, &bob_set); + sync( + &author, + &alice, + &alice_store, + &bob, + &bob_store, + &alice_set, + &bob_set, + )?; + Ok(()) } - fn sync( + fn sync( author: &Author, - alice: &mut Replica, - bob: &mut Replica, + alice: &Replica, + alice_store: &S, + bob: &Replica, + bob_store: &S, alice_set: &[&str], bob_set: &[&str], - ) { + ) -> Result<()> { // Sync alice - bob - let mut next_to_bob = Some(alice.sync_initial_message()); + let mut next_to_bob = Some(alice.sync_initial_message().map_err(Into::into)?); let mut rounds = 0; while let Some(msg) = next_to_bob.take() { assert!(rounds < 100, "too many rounds"); rounds += 1; println!("round {}", rounds); - if let Some(msg) = bob.sync_process_message(msg) { - next_to_bob = alice.sync_process_message(msg); + if let Some(msg) = bob.sync_process_message(msg).map_err(Into::into)? { + next_to_bob = alice.sync_process_message(msg).map_err(Into::into)?; } } // Check result for el in alice_set { - alice.get_latest_by_key_and_author(el, author.id()).unwrap(); - bob.get_latest_by_key_and_author(el, author.id()).unwrap(); + alice_store.get_latest_by_key_and_author(alice.namespace(), author.id(), el)?; + bob_store.get_latest_by_key_and_author(bob.namespace(), author.id(), el)?; } for el in bob_set { - alice.get_latest_by_key_and_author(el, author.id()).unwrap(); - bob.get_latest_by_key_and_author(el, author.id()).unwrap(); + alice_store.get_latest_by_key_and_author(alice.namespace(), author.id(), el)?; + bob_store.get_latest_by_key_and_author(bob.namespace(), author.id(), el)?; } + Ok(()) } } diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index 411af9dcdd..c9c10fb911 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -26,7 +26,7 @@ iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } iroh-metrics = { version = "0.5.0", path = "../iroh-metrics", optional = true } num_cpus = { version = "1.15.0" } portable-atomic = "1" -iroh-sync = { path = "../iroh-sync" } +iroh-sync = { path = "../iroh-sync" } iroh-gossip = { path = "../iroh-gossip" } postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.6", default-features = false, features = ["flume-transport"] } @@ -62,7 +62,7 @@ rustyline = { version = "12.0.0", optional = true } [features] default = ["cli", "metrics", "sync"] -sync = ["metrics", "flat-db"] +sync = ["metrics", "flat-db", "iroh-sync/fs-store"] cli = ["clap", "config", "console", "dirs-next", "indicatif", "multibase", "quic-rpc/quinn-transport", "tempfile", "tokio/rt-multi-thread", "tracing-subscriber"] metrics = ["iroh-metrics", "flat-db", "mem-db", "iroh-collection"] flat-db = [] diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 48afe81265..917011efd1 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -16,7 +16,10 @@ use anyhow::{anyhow, bail}; use clap::{CommandFactory, FromArgMatches, Parser}; use ed25519_dalek::SigningKey; use indicatif::HumanBytes; -use iroh::sync::{BlobStore, Doc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN}; +use iroh::sync::{ + BlobStore, Doc as SyncDoc, DocStore, DownloadMode, LiveSync, PeerSource, SYNC_ALPN, +}; +use iroh_bytes::util::runtime; use iroh_gossip::{ net::{GossipHandle, GOSSIP_ALPN}, proto::TopicId, @@ -32,7 +35,10 @@ use iroh_net::{ tls::Keypair, MagicEndpoint, }; -use iroh_sync::sync::{Author, Namespace, SignedEntry}; +use iroh_sync::{ + store::{self, Store as _}, + sync::{Author, Namespace, SignedEntry}, +}; use once_cell::sync::OnceCell; use serde::{Deserialize, Serialize}; use tokio::{ @@ -48,6 +54,8 @@ use iroh_bytes_handlers::IrohBytesHandlers; const MAX_DISPLAY_CONTENT_LEN: u64 = 1024 * 1024; +type Doc = SyncDoc; + #[derive(Parser, Debug)] struct Args { /// Private key to derive our peer id from @@ -219,10 +227,12 @@ async fn run(args: Args) -> anyhow::Result<()> { // create a doc store for the iroh-sync docs let author = Author::from(keypair.secret().clone()); - let docs = DocStore::new(blobs.clone(), author, storage_path.join("docs")); + let docs_path = storage_path.join("docs"); + tokio::fs::create_dir_all(&docs_path).await?; + let docs = DocStore::new(blobs.clone(), author, docs_path)?; // create the live syncer - let live_sync = LiveSync::spawn(endpoint.clone(), gossip.clone()); + let live_sync = LiveSync::::spawn(endpoint.clone(), gossip.clone()); // construct the state that is passed to the endpoint loop and from there cloned // into to the connection handler task for incoming connections. @@ -233,12 +243,12 @@ async fn run(args: Args) -> anyhow::Result<()> { }); // spawn our endpoint loop that forwards incoming connections - tokio::spawn(endpoint_loop(endpoint.clone(), state)); + rt.main().spawn(endpoint_loop(endpoint.clone(), state)); // open our document and add to the live syncer let namespace = Namespace::from_bytes(topic.as_bytes()); println!("> opening doc {}", fmt_hash(namespace.id().as_bytes())); - let doc = docs.create_or_open(namespace, DownloadMode::Always).await?; + let doc: Doc = docs.create_or_open(namespace, DownloadMode::Always).await?; live_sync.add(doc.replica().clone(), peers.clone()).await?; // spawn an repl thread that reads stdin and parses each line as a `Cmd` command @@ -278,7 +288,7 @@ async fn run(args: Args) -> anyhow::Result<()> { _ = tokio::signal::ctrl_c() => { println!("> aborted"); } - res = handle_command(cmd, &doc, &our_ticket, &log_filter, ¤t_watch) => if let Err(err) = res { + res = handle_command(cmd, &rt, docs.store(), &doc, &our_ticket, &log_filter, ¤t_watch) => if let Err(err) = res { println!("> error: {err}"); }, }; @@ -292,7 +302,6 @@ async fn run(args: Args) -> anyhow::Result<()> { } println!("> persisting document and blob database at {storage_path:?}"); blobs.save().await?; - docs.save(&doc).await?; if let Some(metrics_fut) = metrics_fut { metrics_fut.abort(); @@ -304,6 +313,8 @@ async fn run(args: Args) -> anyhow::Result<()> { async fn handle_command( cmd: Cmd, + rt: &runtime::Handle, + store: &store::fs::Store, doc: &Doc, ticket: &Ticket, log_filter: &LogLevelReload, @@ -313,9 +324,18 @@ async fn handle_command( Cmd::Set { key, value } => { doc.insert_bytes(&key, value.into_bytes().into()).await?; } - Cmd::Get { key, print_content } => { - let entries = doc.replica().all_for_key(key.as_bytes()); - for (_id, entry) in entries { + Cmd::Get { + key, + print_content, + prefix, + } => { + let entries = if prefix { + store.get_all_by_prefix(doc.replica().namespace(), key.as_bytes())? + } else { + store.get_all_by_key(doc.replica().namespace(), key.as_bytes())? + }; + for entry in entries { + let (_id, entry) = entry?; println!("{}", fmt_entry(&entry)); if print_content { println!("{}", fmt_content(doc, &entry).await); @@ -336,13 +356,18 @@ async fn handle_command( }, Cmd::Ls { prefix } => { let entries = match prefix { - None => doc.replica().all(), - Some(prefix) => doc.replica().all_with_key_prefix(prefix.as_bytes()), + None => store.get_all(doc.replica().namespace())?, + Some(prefix) => { + store.get_all_by_prefix(doc.replica().namespace(), prefix.as_bytes())? + } }; - println!("> {} entries", entries.len()); - for (_id, entry) in entries { + let mut count = 0; + for entry in entries { + let (_id, entry) = entry?; + count += 1; println!("{}", fmt_entry(&entry),); } + println!("> {} entries", count); } Cmd::Ticket => { println!("Ticket: {ticket}"); @@ -352,7 +377,7 @@ async fn handle_command( log_filter.modify(|layer| *layer = next_filter)?; } Cmd::Stats => get_stats(), - Cmd::Fs(cmd) => handle_fs_command(cmd, doc).await?, + Cmd::Fs(cmd) => handle_fs_command(cmd, store, doc).await?, Cmd::Hammer { prefix, threads, @@ -376,7 +401,7 @@ async fn handle_command( let prefix = prefix.clone(); let doc = doc.clone(); let bytes = bytes.clone(); - let handle = tokio::spawn(async move { + let handle = rt.main().spawn(async move { for i in 0..count { let value = String::from_utf8(bytes.clone()).unwrap(); let key = format!("{}/{}/{}", prefix, t, i); @@ -391,13 +416,16 @@ async fn handle_command( for t in 0..threads { let prefix = prefix.clone(); let doc = doc.clone(); - let handle = tokio::spawn(async move { + let store = store.clone(); + let handle = rt.main().spawn(async move { let mut read = 0; for i in 0..count { let key = format!("{}/{}/{}", prefix, t, i); - let entries = doc.replica().all_for_key(key.as_bytes()); - for (_id, entry) in entries { - let _content = fmt_content(&doc, &entry).await; + let entries = store + .get_all_by_key(doc.replica().namespace(), key.as_bytes())?; + for entry in entries { + let (_id, entry) = entry?; + let _content = fmt_content_simple(&doc, &entry); read += 1; } } @@ -425,7 +453,7 @@ async fn handle_command( Ok(()) } -async fn handle_fs_command(cmd: FsCmd, doc: &Doc) -> anyhow::Result<()> { +async fn handle_fs_command(cmd: FsCmd, store: &store::fs::Store, doc: &Doc) -> anyhow::Result<()> { match cmd { FsCmd::ImportFile { file_path, key } => { let file_path = canonicalize_path(&file_path)?.canonicalize()?; @@ -473,10 +501,12 @@ async fn handle_fs_command(cmd: FsCmd, doc: &Doc) -> anyhow::Result<()> { } let root = canonicalize_path(&dir_path)?; println!("> exporting {key_prefix} to {root:?}"); - let entries = doc.replica().get_latest_by_prefix(key_prefix.as_bytes()); + let entries = + store.get_latest_by_prefix(doc.replica().namespace(), key_prefix.as_bytes())?; let mut checked_dirs = HashSet::new(); for entry in entries { - let key = entry.entry().id().key(); + let (id, entry) = entry?; + let key = id.key(); let relative = String::from_utf8(key[key_prefix.len()..].to_vec())?; let len = entry.entry().record().content_len(); if let Some(mut reader) = doc.get_content_reader(&entry).await { @@ -499,8 +529,11 @@ async fn handle_fs_command(cmd: FsCmd, doc: &Doc) -> anyhow::Result<()> { FsCmd::ExportFile { key, file_path } => { let path = canonicalize_path(&file_path)?; // TODO: Fix - let entry = doc.replica().get_latest_by_key(&key).next(); + let entry = store + .get_latest_by_key(doc.replica().namespace(), &key)? + .next(); if let Some(entry) = entry { + let (_, entry) = entry?; println!("> exporting {key} to {path:?}"); let parent = path.parent().ok_or_else(|| anyhow!("Invalid path"))?; tokio::fs::create_dir_all(&parent).await?; @@ -537,6 +570,9 @@ pub enum Cmd { /// Print the value (but only if it is valid UTF-8 and smaller than 1MB) #[clap(short = 'c', long)] print_content: bool, + /// Match the key as prefix, not an exact match. + #[clap(short = 'p', long)] + prefix: bool, }, /// List entries. Ls { @@ -802,6 +838,12 @@ fn fmt_entry(entry: &SignedEntry) -> String { let len = HumanBytes(entry.entry().record().content_len()); format!("@{author}: {key} = {hash} ({len})",) } + +async fn fmt_content_simple(_doc: &Doc, entry: &SignedEntry) -> String { + let len = entry.entry().record().content_len(); + format!("<{}>", HumanBytes(len)) +} + async fn fmt_content(doc: &Doc, entry: &SignedEntry) -> String { let len = entry.entry().record().content_len(); if len > MAX_DISPLAY_CONTENT_LEN { diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index d5b92516b8..bcadeb23f9 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -5,7 +5,10 @@ use std::net::SocketAddr; use anyhow::{bail, ensure, Context, Result}; use bytes::BytesMut; use iroh_net::{tls::PeerId, MagicEndpoint}; -use iroh_sync::sync::{NamespaceId, Replica, ReplicaStore}; +use iroh_sync::{ + store, + sync::{NamespaceId, Replica}, +}; use serde::{Deserialize, Serialize}; use tokio::io::{AsyncRead, AsyncWrite}; use tracing::debug; @@ -38,9 +41,9 @@ enum Message { } /// Connect to a peer and sync a replica -pub async fn connect_and_sync( +pub async fn connect_and_sync( endpoint: &MagicEndpoint, - doc: &Replica, + doc: &Replica, peer_id: PeerId, derp_region: Option, addrs: &[SocketAddr], @@ -51,16 +54,16 @@ pub async fn connect_and_sync( .await .context("dial_and_sync")?; let (mut send_stream, mut recv_stream) = connection.open_bi().await?; - let res = run_alice(&mut send_stream, &mut recv_stream, doc).await; + let res = run_alice::(&mut send_stream, &mut recv_stream, doc).await; debug!("sync with peer {}: finish {:?}", peer_id, res); res } /// Runs the initiator side of the sync protocol. -pub async fn run_alice( +pub async fn run_alice( writer: &mut W, reader: &mut R, - alice: &Replica, + alice: &Replica, ) -> Result<()> { let mut buffer = BytesMut::with_capacity(1024); @@ -68,7 +71,7 @@ pub async fn run_alice( let init_message = Message::Init { namespace: alice.namespace(), - message: alice.sync_initial_message(), + message: alice.sync_initial_message().map_err(Into::into)?, }; let msg_bytes = postcard::to_stdvec(&init_message)?; iroh_bytes::protocol::write_lp(writer, &msg_bytes).await?; @@ -83,7 +86,7 @@ pub async fn run_alice( bail!("unexpected message: init"); } Message::Sync(msg) => { - if let Some(msg) = alice.sync_process_message(msg) { + if let Some(msg) = alice.sync_process_message(msg).map_err(Into::into)? { send_sync_message(writer, msg).await?; } else { break; @@ -96,9 +99,9 @@ pub async fn run_alice( } /// Handle an iroh-sync connection and sync all shared documents in the replica store. -pub async fn handle_connection( +pub async fn handle_connection( connecting: quinn::Connecting, - replica_store: ReplicaStore, + replica_store: S, ) -> Result<()> { let connection = connecting.await?; debug!("> connection established!"); @@ -113,10 +116,10 @@ pub async fn handle_connection( } /// Runs the receiver side of the sync protocol. -pub async fn run_bob( +pub async fn run_bob( writer: &mut W, reader: &mut R, - replica_store: ReplicaStore, + replica_store: S, ) -> Result<()> { let mut buffer = BytesMut::with_capacity(1024); @@ -129,10 +132,10 @@ pub async fn run_bob( Message::Init { namespace, message } => { ensure!(replica.is_none(), "double init message"); - match replica_store.get_replica(&namespace) { + match replica_store.get_replica(&namespace)? { Some(r) => { debug!("starting sync for {}", namespace); - if let Some(msg) = r.sync_process_message(message) { + if let Some(msg) = r.sync_process_message(message).map_err(Into::into)? { send_sync_message(writer, msg).await?; } else { break; @@ -147,7 +150,7 @@ pub async fn run_bob( } Message::Sync(msg) => match replica { Some(ref replica) => { - if let Some(msg) = replica.sync_process_message(msg) { + if let Some(msg) = replica.sync_process_message(msg).map_err(Into::into)? { send_sync_message(writer, msg).await?; } else { break; @@ -174,7 +177,7 @@ async fn send_sync_message( #[cfg(test)] mod tests { - use iroh_sync::sync::Namespace; + use iroh_sync::{store::Store as _, sync::Namespace}; use super::*; @@ -182,38 +185,83 @@ mod tests { async fn test_sync_simple() -> Result<()> { let mut rng = rand::thread_rng(); - let replica_store = ReplicaStore::default(); + let alice_replica_store = store::memory::Store::default(); // For now uses same author on both sides. - let author = replica_store.new_author(&mut rng); - let namespace = Namespace::new(&mut rng); - let bob_replica = replica_store.new_replica(namespace.clone()); - bob_replica.hash_and_insert("hello alice", &author, "from bob"); + let author = alice_replica_store.new_author(&mut rng).unwrap(); - let alice_replica = Replica::new(namespace.clone()); - alice_replica.hash_and_insert("hello bob", &author, "from alice"); + let namespace = Namespace::new(&mut rng); - assert_eq!(bob_replica.all().len(), 1); - assert_eq!(alice_replica.all().len(), 1); + let alice_replica = alice_replica_store.new_replica(namespace.clone()).unwrap(); + alice_replica + .hash_and_insert("hello bob", &author, "from alice") + .unwrap(); + + let bob_replica_store = store::memory::Store::default(); + let bob_replica = bob_replica_store.new_replica(namespace.clone()).unwrap(); + bob_replica + .hash_and_insert("hello alice", &author, "from bob") + .unwrap(); + + assert_eq!( + bob_replica_store + .get_all(bob_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 1 + ); + assert_eq!( + alice_replica_store + .get_all(alice_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 1 + ); let (alice, bob) = tokio::io::duplex(64); let (mut alice_reader, mut alice_writer) = tokio::io::split(alice); let replica = alice_replica.clone(); let alice_task = tokio::task::spawn(async move { - run_alice(&mut alice_writer, &mut alice_reader, &replica).await + run_alice::(&mut alice_writer, &mut alice_reader, &replica) + .await }); let (mut bob_reader, mut bob_writer) = tokio::io::split(bob); - let bob_replica_store = replica_store.clone(); + let bob_replica_store_task = bob_replica_store.clone(); let bob_task = tokio::task::spawn(async move { - run_bob(&mut bob_writer, &mut bob_reader, bob_replica_store).await + run_bob::( + &mut bob_writer, + &mut bob_reader, + bob_replica_store_task, + ) + .await }); alice_task.await??; bob_task.await??; - assert_eq!(bob_replica.all().len(), 2); - assert_eq!(alice_replica.all().len(), 2); + assert_eq!( + bob_replica_store + .get_all(bob_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 2 + ); + assert_eq!( + alice_replica_store + .get_all(alice_replica.namespace()) + .unwrap() + .collect::>>() + .unwrap() + .len(), + 2 + ); Ok(()) } diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index c68d06272c..5193fe7b53 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -6,6 +6,7 @@ use std::{ time::Instant, }; +use anyhow::Result; use bytes::Bytes; use futures::{ future::{BoxFuture, LocalBoxFuture, Shared}, @@ -17,9 +18,9 @@ use iroh_gossip::net::util::Dialer; use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; use iroh_metrics::{inc, inc_by}; use iroh_net::{tls::PeerId, MagicEndpoint}; -use iroh_sync::sync::{ - Author, InsertOrigin, Namespace, NamespaceId, OnInsertCallback, Replica, ReplicaStore, - SignedEntry, +use iroh_sync::{ + store::{self, Store as _}, + sync::{Author, InsertOrigin, Namespace, OnInsertCallback, Replica, SignedEntry}, }; use tokio::{io::AsyncRead, sync::oneshot}; use tokio_stream::StreamExt; @@ -36,33 +37,32 @@ pub enum DownloadMode { #[derive(Debug, Clone)] pub struct DocStore { - replicas: ReplicaStore, + replicas: store::fs::Store, blobs: BlobStore, local_author: Arc, - storage_path: PathBuf, } +const REPLICA_DB_NAME: &str = "replica.db"; + impl DocStore { - pub fn new(blobs: BlobStore, author: Author, storage_path: PathBuf) -> Self { - Self { - replicas: ReplicaStore::default(), + pub fn new(blobs: BlobStore, author: Author, storage_path: PathBuf) -> Result { + let replicas = store::fs::Store::new(storage_path.join(REPLICA_DB_NAME))?; + + Ok(Self { + replicas, local_author: Arc::new(author), - storage_path, blobs, - } + }) } pub async fn create_or_open( &self, namespace: Namespace, download_mode: DownloadMode, - ) -> anyhow::Result { - let path = self.replica_path(namespace.id()); - let replica = if path.exists() { - let bytes = tokio::fs::read(path).await?; - self.replicas.open_replica(&bytes)? - } else { - self.replicas.new_replica(namespace) + ) -> Result> { + let replica = match self.replicas.get_replica(&namespace.id())? { + Some(replica) => replica, + None => self.replicas.new_replica(namespace)?, }; let doc = Doc::new( @@ -74,21 +74,13 @@ impl DocStore { Ok(doc) } - pub async fn save(&self, doc: &Doc) -> anyhow::Result<()> { - let replica_path = self.replica_path(&doc.replica().namespace()); - tokio::fs::create_dir_all(replica_path.parent().unwrap()).await?; - let bytes = doc.replica().to_bytes()?; - tokio::fs::write(replica_path, bytes).await?; - Ok(()) - } - - fn replica_path(&self, namespace: &NamespaceId) -> PathBuf { - self.storage_path.join(hex::encode(namespace.as_bytes())) - } - pub async fn handle_connection(&self, conn: quinn::Connecting) -> anyhow::Result<()> { crate::sync::handle_connection(conn, self.replicas.clone()).await } + + pub fn store(&self) -> &store::fs::Store { + &self.replicas + } } /// A replica with a [`BlobStore`] for contents. @@ -99,15 +91,15 @@ impl DocStore { /// We want to try other peers if the author is offline (or always). /// We'll need some heuristics which peers to try. #[derive(Clone, Debug)] -pub struct Doc { - replica: Replica, +pub struct Doc { + replica: Replica, blobs: BlobStore, local_author: Arc, } -impl Doc { +impl Doc { pub fn new( - replica: Replica, + replica: Replica, blobs: BlobStore, local_author: Arc, download_mode: DownloadMode, @@ -151,7 +143,7 @@ impl Doc { self.replica.on_insert(callback); } - pub fn replica(&self) -> &Replica { + pub fn replica(&self) -> &Replica { &self.replica } @@ -165,7 +157,9 @@ impl Doc { content: Bytes, ) -> anyhow::Result<(Hash, u64)> { let (hash, len) = self.blobs.put_bytes(content).await?; - self.replica.insert(key, &self.local_author, hash, len); + self.replica + .insert(key, &self.local_author, hash, len) + .map_err(Into::into)?; Ok((hash, len)) } @@ -175,7 +169,9 @@ impl Doc { content: impl AsyncRead + Unpin, ) -> anyhow::Result<(Hash, u64)> { let (hash, len) = self.blobs.put_reader(content).await?; - self.replica.insert(key, &self.local_author, hash, len); + self.replica + .insert(key, &self.local_author, hash, len) + .map_err(Into::into)?; Ok((hash, len)) } diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 46159cd4bc..3b1ca971a2 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -13,7 +13,10 @@ use iroh_gossip::{ }; use iroh_metrics::inc; use iroh_net::{tls::PeerId, MagicEndpoint}; -use iroh_sync::sync::{InsertOrigin, Replica, SignedEntry}; +use iroh_sync::{ + store, + sync::{InsertOrigin, Replica, SignedEntry}, +}; use serde::{Deserialize, Serialize}; use tokio::{sync::mpsc, task::JoinError}; use tracing::{debug, error}; @@ -45,9 +48,9 @@ enum SyncState { } #[derive(Debug)] -pub enum ToActor { +pub enum ToActor { SyncDoc { - doc: Replica, + doc: Replica, initial_peers: Vec, }, Shutdown, @@ -55,12 +58,12 @@ pub enum ToActor { /// Handle to a running live sync actor #[derive(Debug, Clone)] -pub struct LiveSync { - to_actor_tx: mpsc::Sender, +pub struct LiveSync { + to_actor_tx: mpsc::Sender>, task: Shared>>>, } -impl LiveSync { +impl LiveSync { pub fn spawn(endpoint: MagicEndpoint, gossip: GossipHandle) -> Self { let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); let mut actor = Actor::new(endpoint, gossip, to_actor_rx); @@ -78,14 +81,18 @@ impl LiveSync { /// Cancel the live sync. pub async fn cancel(&self) -> Result<()> { - self.to_actor_tx.send(ToActor::Shutdown).await?; + self.to_actor_tx.send(ToActor::::Shutdown).await?; self.task.clone().await?; Ok(()) } - pub async fn add(&self, doc: Replica, initial_peers: Vec) -> Result<()> { + pub async fn add( + &self, + doc: Replica, + initial_peers: Vec, + ) -> Result<()> { self.to_actor_tx - .send(ToActor::SyncDoc { doc, initial_peers }) + .send(ToActor::::SyncDoc { doc, initial_peers }) .await?; Ok(()) } @@ -93,15 +100,15 @@ impl LiveSync { // TODO: Also add `handle_connection` to the replica and track incoming sync requests here too. // Currently peers might double-sync in both directions. -struct Actor { +struct Actor { endpoint: MagicEndpoint, gossip: GossipHandle, - docs: HashMap, + docs: HashMap>, subscription: BoxStream<'static, Result<(TopicId, Event)>>, sync_state: HashMap<(TopicId, PeerId), SyncState>, - to_actor_rx: mpsc::Receiver, + to_actor_rx: mpsc::Receiver>, insert_entry_tx: flume::Sender<(TopicId, SignedEntry)>, insert_entry_rx: flume::Receiver<(TopicId, SignedEntry)>, @@ -109,11 +116,11 @@ struct Actor { pending_joins: FuturesUnordered)>>, } -impl Actor { +impl Actor { pub fn new( endpoint: MagicEndpoint, gossip: GossipHandle, - to_actor_rx: mpsc::Receiver, + to_actor_rx: mpsc::Receiver>, ) -> Self { let (insert_tx, insert_rx) = flume::bounded(64); let sub = gossip.clone().subscribe_all().boxed(); @@ -193,7 +200,7 @@ impl Actor { async move { debug!("sync with {peer}"); // TODO: Make sure that the peer is dialable. - let res = connect_and_sync(&endpoint, &doc, peer, None, &[]).await; + let res = connect_and_sync::(&endpoint, &doc, peer, None, &[]).await; debug!("> synced with {peer}: {res:?}"); // collect metrics match &res { @@ -215,7 +222,11 @@ impl Actor { Ok(()) } - async fn insert_doc(&mut self, doc: Replica, initial_peers: Vec) -> Result<()> { + async fn insert_doc( + &mut self, + doc: Replica, + initial_peers: Vec, + ) -> Result<()> { let peer_ids: Vec = initial_peers.iter().map(|p| p.peer_id).collect(); // add addresses of initial peers to our endpoint address book From 7c6cb35a3acfb1dcf7a2ad4a53590a504b08d541 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 12:55:10 +0200 Subject: [PATCH 36/45] adapt after rebase on main --- Cargo.lock | 22 ++++---------------- iroh-bytes/src/util.rs | 6 ++++++ iroh-gossip/Cargo.toml | 4 ++-- iroh-gossip/src/proto/util.rs | 2 +- iroh-sync/Cargo.toml | 6 +++--- iroh/Cargo.toml | 4 ++-- iroh/examples/sync.rs | 38 ++++++++++++----------------------- iroh/src/sync/live.rs | 16 ++++++++------- 8 files changed, 40 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d39e883522..0c996de0ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -336,20 +336,6 @@ version = "2.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" -[[package]] -name = "blake3" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", - "constant_time_eq", - "digest", -] - [[package]] name = "block-buffer" version = "0.10.4" @@ -1868,10 +1854,9 @@ dependencies = [ [[package]] name = "iroh-gossip" -version = "0.4.1" +version = "0.5.1" dependencies = [ "anyhow", - "blake3", "bytes", "clap", "data-encoding", @@ -1880,6 +1865,7 @@ dependencies = [ "futures", "genawaiter", "indexmap 2.0.0", + "iroh-blake3", "iroh-metrics", "iroh-net", "once_cell", @@ -1998,15 +1984,15 @@ dependencies = [ [[package]] name = "iroh-sync" -version = "0.1.0" +version = "0.5.1" dependencies = [ "anyhow", - "blake3", "bytes", "crossbeam", "derive_more", "ed25519-dalek", "hex", + "iroh-blake3", "iroh-bytes", "once_cell", "ouroboros", diff --git a/iroh-bytes/src/util.rs b/iroh-bytes/src/util.rs index bc59634195..9b819c457c 100644 --- a/iroh-bytes/src/util.rs +++ b/iroh-bytes/src/util.rs @@ -82,6 +82,12 @@ impl From<[u8; 32]> for Hash { } } +impl From for [u8; 32]{ + fn from(value: Hash) -> Self { + *value.as_bytes() + } +} + impl From<&[u8; 32]> for Hash { fn from(value: &[u8; 32]) -> Self { Hash(blake3::Hash::from(*value)) diff --git a/iroh-gossip/Cargo.toml b/iroh-gossip/Cargo.toml index bf2254095b..326254a209 100644 --- a/iroh-gossip/Cargo.toml +++ b/iroh-gossip/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "iroh-gossip" -version = "0.4.1" +version = "0.5.1" edition = "2021" readme = "README.md" description = "gossip messages over broadcast trees" @@ -11,7 +11,7 @@ repository = "https://github.com/n0-computer/iroh-sync" [dependencies] # proto dependencies (required) anyhow = { version = "1", features = ["backtrace"] } -blake3 = "1.3.3" +blake3 = { package = "iroh-blake3", version = "1.4.3"} bytes = { version = "1.4.0", features = ["serde"] } data-encoding = "2.4.0" derive_more = { version = "1.0.0-beta.1", features = ["add", "debug", "display", "from", "try_into"] } diff --git a/iroh-gossip/src/proto/util.rs b/iroh-gossip/src/proto/util.rs index 64c7a92c9a..03a759ad01 100644 --- a/iroh-gossip/src/proto/util.rs +++ b/iroh-gossip/src/proto/util.rs @@ -56,7 +56,7 @@ macro_rules! idbytes_impls { } } - impl> From for $ty { + impl> std::convert::From for $ty { fn from(value: T) -> Self { Self::from_bytes(value.into()) } diff --git a/iroh-sync/Cargo.toml b/iroh-sync/Cargo.toml index e0132aed13..8ab9a1de20 100644 --- a/iroh-sync/Cargo.toml +++ b/iroh-sync/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "iroh-sync" -version = "0.1.0" +version = "0.5.1" edition = "2021" readme = "README.md" description = "Iroh sync" @@ -10,7 +10,7 @@ repository = "https://github.com/n0-computer/iroh" [dependencies] anyhow = "1.0.71" -blake3 = "1.3.3" +blake3 = { package = "iroh-blake3", version = "1.4.3"} crossbeam = "0.8.2" derive_more = { version = "1.0.0-beta.1", features = ["debug", "display", "from", "try_into"] } ed25519-dalek = { version = "2.0.0-rc.2", features = ["serde", "rand_core"] } @@ -35,4 +35,4 @@ tempfile = "3.4" [features] default = ["fs-store"] -fs-store = ["redb", "ouroboros"] \ No newline at end of file +fs-store = ["redb", "ouroboros"] diff --git a/iroh/Cargo.toml b/iroh/Cargo.toml index c9c10fb911..afa611e111 100644 --- a/iroh/Cargo.toml +++ b/iroh/Cargo.toml @@ -26,8 +26,8 @@ iroh-bytes = { version = "0.5.0", path = "../iroh-bytes" } iroh-metrics = { version = "0.5.0", path = "../iroh-metrics", optional = true } num_cpus = { version = "1.15.0" } portable-atomic = "1" -iroh-sync = { path = "../iroh-sync" } -iroh-gossip = { path = "../iroh-gossip" } +iroh-sync = { version = "0.5.1", path = "../iroh-sync" } +iroh-gossip = { version = "0.5.1", path = "../iroh-gossip" } postcard = { version = "1", default-features = false, features = ["alloc", "use-std", "experimental-derive"] } quic-rpc = { version = "0.6", default-features = false, features = ["flume-transport"] } quinn = "0.10" diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index 917011efd1..edffe96fc4 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -21,7 +21,7 @@ use iroh::sync::{ }; use iroh_bytes::util::runtime; use iroh_gossip::{ - net::{GossipHandle, GOSSIP_ALPN}, + net::{Gossip, GOSSIP_ALPN}, proto::TopicId, }; use iroh_metrics::{ @@ -29,8 +29,8 @@ use iroh_metrics::{ struct_iterable::Iterable, }; use iroh_net::{ - defaults::{default_derp_map, DEFAULT_DERP_STUN_PORT}, - derp::{DerpMap, UseIpv4, UseIpv6}, + defaults::{default_derp_map}, + derp::{DerpMap}, magic_endpoint::get_alpn, tls::Keypair, MagicEndpoint, @@ -131,7 +131,7 @@ async fn run(args: Args) -> anyhow::Result<()> { // configure our derp map let derp_map = match (args.no_derp, args.derp) { (false, None) => Some(default_derp_map()), - (false, Some(url)) => Some(derp_map_from_url(url)?), + (false, Some(url)) => Some(DerpMap::from_url(url, 0)), (true, None) => None, (true, Some(_)) => bail!("You cannot set --no-derp and --derp at the same time"), }; @@ -140,7 +140,7 @@ async fn run(args: Args) -> anyhow::Result<()> { // build our magic endpoint and the gossip protocol let (endpoint, gossip, initial_endpoints) = { // init a cell that will hold our gossip handle to be used in endpoint callbacks - let gossip_cell: OnceCell = OnceCell::new(); + let gossip_cell: OnceCell = OnceCell::new(); // init a channel that will emit once the initial endpoints of our local node are discovered let (initial_endpoints_tx, mut initial_endpoints_rx) = mpsc::channel(1); // build the magic endpoint @@ -167,7 +167,7 @@ async fn run(args: Args) -> anyhow::Result<()> { .await?; // initialize the gossip protocol - let gossip = GossipHandle::from_endpoint(endpoint.clone(), Default::default()); + let gossip = Gossip::from_endpoint(endpoint.clone(), Default::default()); // insert into the gossip cell to be used in the endpoint callbacks above gossip_cell.set(gossip.clone()).unwrap(); @@ -181,7 +181,7 @@ async fn run(args: Args) -> anyhow::Result<()> { let (topic, peers) = match &args.command { Command::Open { doc_name } => { - let topic: TopicId = blake3::hash(doc_name.as_bytes()).into(); + let topic: TopicId = iroh_bytes::Hash::new(doc_name.as_bytes()).into(); println!( "> opening document {doc_name} as namespace {} and waiting for peers to join us...", fmt_hash(topic.as_bytes()) @@ -685,7 +685,7 @@ impl FromStr for Cmd { #[derive(Debug)] struct State { - gossip: GossipHandle, + gossip: Gossip, docs: DocStore, bytes: IrohBytesHandlers, } @@ -879,25 +879,13 @@ fn parse_keypair(secret: &str) -> anyhow::Result { fn fmt_derp_map(derp_map: &Option) -> String { match derp_map { None => "None".to_string(), - Some(map) => { - let regions = map.regions.iter().map(|(id, region)| { - let nodes = region.nodes.iter().map(|node| node.url.to_string()); - (*id, nodes.collect::>()) - }); - format!("{:?}", regions.collect::>()) - } + Some(map) => map + .regions() + .flat_map(|region| region.nodes.iter().map(|node| node.url.to_string())) + .collect::>() + .join(", "), } } -fn derp_map_from_url(url: Url) -> anyhow::Result { - Ok(DerpMap::default_from_node( - url, - DEFAULT_DERP_STUN_PORT, - UseIpv4::TryDns, - UseIpv6::TryDns, - 0, - )) -} - fn canonicalize_path(path: &str) -> anyhow::Result { let path = PathBuf::from(shellexpand::tilde(&path).to_string()); Ok(path) diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 3b1ca971a2..7e35e36e1c 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -8,7 +8,7 @@ use futures::{ FutureExt, TryFutureExt, }; use iroh_gossip::{ - net::{Event, GossipHandle}, + net::{Event, Gossip}, proto::TopicId, }; use iroh_metrics::inc; @@ -64,7 +64,7 @@ pub struct LiveSync { } impl LiveSync { - pub fn spawn(endpoint: MagicEndpoint, gossip: GossipHandle) -> Self { + pub fn spawn(endpoint: MagicEndpoint, gossip: Gossip) -> Self { let (to_actor_tx, to_actor_rx) = mpsc::channel(CHANNEL_CAP); let mut actor = Actor::new(endpoint, gossip, to_actor_rx); let task = tokio::spawn(async move { @@ -102,7 +102,7 @@ impl LiveSync { // Currently peers might double-sync in both directions. struct Actor { endpoint: MagicEndpoint, - gossip: GossipHandle, + gossip: Gossip, docs: HashMap>, subscription: BoxStream<'static, Result<(TopicId, Event)>>, @@ -119,7 +119,7 @@ struct Actor { impl Actor { pub fn new( endpoint: MagicEndpoint, - gossip: GossipHandle, + gossip: Gossip, to_actor_rx: mpsc::Receiver>, ) -> Self { let (insert_tx, insert_rx) = flume::bounded(64); @@ -237,13 +237,15 @@ impl Actor { } // join gossip for the topic to receive and send message - let topic: TopicId = doc.namespace().as_bytes().into(); + let topic = TopicId::from_bytes(*doc.namespace().as_bytes()); self.pending_joins.push({ let peer_ids = peer_ids.clone(); let gossip = self.gossip.clone(); async move { - let res = gossip.join(topic, peer_ids).await; - (topic, res) + match gossip.join(topic, peer_ids).await { + Err(err) => (topic, Err(err)), + Ok(fut) => (topic, fut.await), + } } .boxed() }); From 263fa14d2d5e48bbb0b360662b427d267bb2c4fa Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 13:48:51 +0200 Subject: [PATCH 37/45] chore: fmt --- iroh-bytes/src/util.rs | 2 +- iroh/examples/sync.rs | 5 +---- iroh/src/database/flat/writable.rs | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/iroh-bytes/src/util.rs b/iroh-bytes/src/util.rs index 9b819c457c..0696d8e904 100644 --- a/iroh-bytes/src/util.rs +++ b/iroh-bytes/src/util.rs @@ -82,7 +82,7 @@ impl From<[u8; 32]> for Hash { } } -impl From for [u8; 32]{ +impl From for [u8; 32] { fn from(value: Hash) -> Self { *value.as_bytes() } diff --git a/iroh/examples/sync.rs b/iroh/examples/sync.rs index edffe96fc4..d27076a584 100644 --- a/iroh/examples/sync.rs +++ b/iroh/examples/sync.rs @@ -29,10 +29,7 @@ use iroh_metrics::{ struct_iterable::Iterable, }; use iroh_net::{ - defaults::{default_derp_map}, - derp::{DerpMap}, - magic_endpoint::get_alpn, - tls::Keypair, + defaults::default_derp_map, derp::DerpMap, magic_endpoint::get_alpn, tls::Keypair, MagicEndpoint, }; use iroh_sync::{ diff --git a/iroh/src/database/flat/writable.rs b/iroh/src/database/flat/writable.rs index 8f933813ee..6973d6e4fe 100644 --- a/iroh/src/database/flat/writable.rs +++ b/iroh/src/database/flat/writable.rs @@ -109,7 +109,7 @@ impl WritableFileDatabase { std::fs::OpenOptions::new() .write(true) .create(true) - .open(&path) + .open(path) }) .await?; From dd8cef80632f0a29fd9676d1299faf24bf99e21f Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Fri, 28 Jul 2023 14:09:27 +0200 Subject: [PATCH 38/45] feat: download from peer that informed us about a change --- iroh-net/src/tls.rs | 5 + iroh-sync/src/sync.rs | 20 ++- iroh/src/sync.rs | 26 +++- iroh/src/sync/content.rs | 267 ++++++++++++++++++++++++++++----------- iroh/src/sync/live.rs | 4 +- 5 files changed, 234 insertions(+), 88 deletions(-) diff --git a/iroh-net/src/tls.rs b/iroh-net/src/tls.rs index 07dbeda035..83e1dde697 100644 --- a/iroh-net/src/tls.rs +++ b/iroh-net/src/tls.rs @@ -118,6 +118,11 @@ impl PeerId { let key = PublicKey::from_bytes(bytes)?; Ok(PeerId(key)) } + + /// Get the peer id as a byte array. + pub fn to_bytes(&self) -> [u8; 32] { + self.0.to_bytes() + } } impl From for PeerId { diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index 7ac32f7522..b569aea487 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -211,10 +211,13 @@ impl NamespaceId { /// [parking_lot::RwLock] requiring `Sync`. pub type OnInsertCallback = Box; +/// TODO: PeerId is in iroh-net which iroh-sync doesn't depend on. Add iroh-common crate with `PeerId`. +pub type PeerIdBytes = [u8; 32]; + #[derive(Debug, Clone)] pub enum InsertOrigin { Local, - Sync, + Sync(Option), } #[derive(derive_more::Debug, Clone)] @@ -299,7 +302,11 @@ impl> Replica { RecordIdentifier::new(key, inner.namespace.id(), author.id()) } - pub fn insert_remote_entry(&self, entry: SignedEntry) -> anyhow::Result<()> { + pub fn insert_remote_entry( + &self, + entry: SignedEntry, + received_from: Option, + ) -> anyhow::Result<()> { entry.verify()?; let mut inner = self.inner.write(); let id = entry.entry.id.clone(); @@ -307,7 +314,7 @@ impl> Replica { drop(inner); let on_insert = self.on_insert.read(); for cb in &*on_insert { - cb(InsertOrigin::Sync, entry.clone()); + cb(InsertOrigin::Sync(received_from), entry.clone()); } Ok(()) } @@ -321,6 +328,7 @@ impl> Replica { pub fn sync_process_message( &self, message: crate::ranger::Message, + from_peer: Option, ) -> Result>, S::Error> { let reply = self .inner @@ -329,7 +337,7 @@ impl> Replica { .process_message(message, |_key, entry| { let on_insert = self.on_insert.read(); for cb in &*on_insert { - cb(InsertOrigin::Sync, entry.clone()); + cb(InsertOrigin::Sync(from_peer), entry.clone()); } })?; @@ -915,8 +923,8 @@ mod tests { assert!(rounds < 100, "too many rounds"); rounds += 1; println!("round {}", rounds); - if let Some(msg) = bob.sync_process_message(msg).map_err(Into::into)? { - next_to_bob = alice.sync_process_message(msg).map_err(Into::into)?; + if let Some(msg) = bob.sync_process_message(msg, None).map_err(Into::into)? { + next_to_bob = alice.sync_process_message(msg, None).map_err(Into::into); } } diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index bcadeb23f9..f56e4b5459 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -4,7 +4,7 @@ use std::net::SocketAddr; use anyhow::{bail, ensure, Context, Result}; use bytes::BytesMut; -use iroh_net::{tls::PeerId, MagicEndpoint}; +use iroh_net::{tls::PeerId, MagicEndpoint, magic_endpoint::get_peer_id}; use iroh_sync::{ store, sync::{NamespaceId, Replica}, @@ -54,7 +54,7 @@ pub async fn connect_and_sync( .await .context("dial_and_sync")?; let (mut send_stream, mut recv_stream) = connection.open_bi().await?; - let res = run_alice::(&mut send_stream, &mut recv_stream, doc).await; + let res = run_alice::(&mut send_stream, &mut recv_stream, doc, Some(peer_id)).await; debug!("sync with peer {}: finish {:?}", peer_id, res); res } @@ -64,7 +64,9 @@ pub async fn run_alice, + peer: Option, ) -> Result<()> { + let peer = peer.map(|peer| peer.to_bytes()); let mut buffer = BytesMut::with_capacity(1024); // Init message @@ -86,7 +88,7 @@ pub async fn run_alice { - if let Some(msg) = alice.sync_process_message(msg).map_err(Into::into)? { + if let Some(msg) = alice.sync_process_message(msg, peer).map_err(Into::into)? { send_sync_message(writer, msg).await?; } else { break; @@ -105,9 +107,16 @@ pub async fn handle_connection( ) -> Result<()> { let connection = connecting.await?; debug!("> connection established!"); + let peer_id = get_peer_id(&connection).await?; let (mut send_stream, mut recv_stream) = connection.accept_bi().await?; - run_bob(&mut send_stream, &mut recv_stream, replica_store).await?; + run_bob( + &mut send_stream, + &mut recv_stream, + replica_store, + Some(peer_id), + ) + .await?; send_stream.finish().await?; debug!("done"); @@ -120,7 +129,9 @@ pub async fn run_bob, ) -> Result<()> { + let peer = peer.map(|peer| peer.to_bytes()); let mut buffer = BytesMut::with_capacity(1024); let mut replica = None; @@ -135,7 +146,7 @@ pub async fn run_bob { debug!("starting sync for {}", namespace); - if let Some(msg) = r.sync_process_message(message).map_err(Into::into)? { + if let Some(msg) = r.sync_process_message(message, peer).map_err(Into::into)? { send_sync_message(writer, msg).await?; } else { break; @@ -150,7 +161,7 @@ pub async fn run_bob match replica { Some(ref replica) => { - if let Some(msg) = replica.sync_process_message(msg).map_err(Into::into)? { + if let Some(msg) = replica.sync_process_message(msg, peer).map_err(Into::into)? { send_sync_message(writer, msg).await?; } else { break; @@ -226,7 +237,7 @@ mod tests { let (mut alice_reader, mut alice_writer) = tokio::io::split(alice); let replica = alice_replica.clone(); let alice_task = tokio::task::spawn(async move { - run_alice::(&mut alice_writer, &mut alice_reader, &replica) + run_alice::(&mut alice_writer, &mut alice_reader, &replica, None) .await }); @@ -237,6 +248,7 @@ mod tests { &mut bob_writer, &mut bob_reader, bob_replica_store_task, + None ) .await }); diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 5193fe7b53..5f15885ad7 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -1,5 +1,5 @@ use std::{ - collections::{HashMap, HashSet, VecDeque}, + collections::{HashMap, VecDeque}, io, path::{Path, PathBuf}, sync::{Arc, Mutex}, @@ -20,7 +20,7 @@ use iroh_metrics::{inc, inc_by}; use iroh_net::{tls::PeerId, MagicEndpoint}; use iroh_sync::{ store::{self, Store as _}, - sync::{Author, InsertOrigin, Namespace, OnInsertCallback, Replica, SignedEntry}, + sync::{Author, InsertOrigin, Namespace, OnInsertCallback, PeerIdBytes, Replica, SignedEntry}, }; use tokio::{io::AsyncRead, sync::oneshot}; use tokio_stream::StreamExt; @@ -114,11 +114,13 @@ impl Doc { // setup on_insert callback to trigger download on remote insert if let DownloadMode::Always = download_mode { let doc_clone = doc.clone(); - doc.replica.on_insert(Box::new(move |origin, entry| { - if matches!(origin, InsertOrigin::Sync) { - doc_clone.download_content_from_author(&entry); - } - })); + doc.replica + .on_insert(Box::new(move |origin, entry| match origin { + InsertOrigin::Sync(peer) => { + doc_clone.download_content_from_author_and_other_peer(&entry, peer); + } + InsertOrigin::Local => {} + })); } // Collect metrics @@ -129,7 +131,7 @@ impl Doc { inc!(Metrics, new_entries_local); inc_by!(Metrics, new_entries_local_size, size); } - InsertOrigin::Sync => { + InsertOrigin::Sync(_) => { inc!(Metrics, new_entries_remote); inc_by!(Metrics, new_entries_remote_size, size); } @@ -184,11 +186,26 @@ impl Doc { self.insert_reader(&key, reader).await } - pub fn download_content_from_author(&self, entry: &SignedEntry) { - let hash = *entry.entry().record().content_hash(); - let peer_id = PeerId::from_bytes(entry.entry().id().author().as_bytes()) + pub fn download_content_from_author_and_other_peer( + &self, + entry: &SignedEntry, + other_peer: Option, + ) { + let author_peer_id = PeerId::from_bytes(entry.entry().id().author().as_bytes()) .expect("failed to convert author to peer id"); - self.blobs.start_download(hash, peer_id); + + let mut peers = vec![author_peer_id]; + + if let Some(other_peer) = other_peer { + let other_peer_id = + PeerId::from_bytes(&other_peer).expect("failed to convert author to peer id"); + if other_peer_id != peers[0] { + peers.push(other_peer_id); + } + } + + let hash = *entry.entry().record().content_hash(); + self.blobs.start_download(hash, peers); } pub async fn get_content_bytes(&self, entry: &SignedEntry) -> Option { @@ -234,9 +251,9 @@ impl BlobStore { self.db.db() } - pub fn start_download(&self, hash: Hash, peer: PeerId) { + pub fn start_download(&self, hash: Hash, peers: Vec) { if !self.db.has(&hash) { - self.downloader.start_download(hash, peer); + self.downloader.start_download(hash, peers); } } @@ -273,7 +290,7 @@ pub type DownloadFuture = Shared>>; #[derive(Debug)] pub struct DownloadRequest { hash: Hash, - peer: PeerId, + peers: Vec, reply: DownloadReply, } @@ -320,19 +337,21 @@ impl Downloader { } } - pub fn start_download(&self, hash: Hash, peer: PeerId) { + pub fn start_download(&self, hash: Hash, peers: Vec) { let (reply, reply_rx) = oneshot::channel(); - let req = DownloadRequest { hash, peer, reply }; - let pending_downloads = self.pending_downloads.clone(); - let fut = async move { - let res = reply_rx.await; - pending_downloads.lock().unwrap().remove(&hash); - res.ok().flatten() - }; - self.pending_downloads - .lock() - .unwrap() - .insert(hash, fut.boxed().shared()); + let req = DownloadRequest { hash, peers, reply }; + if self.pending_downloads.lock().unwrap().get(&hash).is_none() { + let pending_downloads = self.pending_downloads.clone(); + let fut = async move { + let res = reply_rx.await; + pending_downloads.lock().unwrap().remove(&hash); + res.ok().flatten() + }; + self.pending_downloads + .lock() + .unwrap() + .insert(hash, fut.boxed().shared()); + } // TODO: this is potentially blocking inside an async call. figure out a better solution if let Err(err) = self.to_actor_tx.send(req) { warn!("download actor dropped: {err}"); @@ -349,9 +368,8 @@ pub struct DownloadActor { db: WritableFileDatabase, conns: HashMap, replies: HashMap>, - peer_hashes: HashMap>, - hash_peers: HashMap>, - pending_downloads: PendingDownloadsFutures, + pending_download_futs: PendingDownloadsFutures, + queue: DownloadQueue, rx: flume::Receiver, } impl DownloadActor { @@ -366,9 +384,8 @@ impl DownloadActor { dialer: Dialer::new(endpoint), replies: Default::default(), conns: Default::default(), - pending_downloads: Default::default(), - peer_hashes: Default::default(), - hash_peers: Default::default(), + pending_download_futs: Default::default(), + queue: Default::default(), } } pub async fn run(&mut self) -> anyhow::Result<()> { @@ -386,8 +403,9 @@ impl DownloadActor { }, Err(err) => self.on_peer_fail(&peer, err), }, - Some((peer, hash, res)) = self.pending_downloads.next() => match res { + Some((peer, hash, res)) = self.pending_download_futs.next() => match res { Ok(Some((hash, size))) => { + self.queue.on_success(hash, peer); self.reply(hash, Some((hash, size))); self.on_peer_ready(peer); } @@ -409,66 +427,169 @@ impl DownloadActor { fn on_peer_fail(&mut self, peer: &PeerId, err: anyhow::Error) { warn!("download from {peer} failed: {err}"); - for hash in self.peer_hashes.remove(peer).into_iter().flatten() { - self.on_not_found(peer, hash); + for hash in self.queue.on_peer_fail(peer) { + self.reply(hash, None); } self.conns.remove(peer); } fn on_not_found(&mut self, peer: &PeerId, hash: Hash) { - if let Some(peers) = self.hash_peers.get_mut(&hash) { - peers.remove(peer); - if peers.is_empty() { - self.reply(hash, None); - self.hash_peers.remove(&hash); - } + self.queue.on_not_found(hash, *peer); + if self.queue.has_no_candidates(&hash) { + self.reply(hash, None); } } fn on_peer_ready(&mut self, peer: PeerId) { - if let Some(hash) = self - .peer_hashes - .get_mut(&peer) - .and_then(|hashes| hashes.pop_front()) - { - let conn = self.conns.get(&peer).unwrap().clone(); - let blobs = self.db.clone(); - let fut = async move { - let start = Instant::now(); - let res = blobs.download_single(conn, hash).await; - // record metrics - let elapsed = start.elapsed().as_millis(); - match &res { - Ok(Some((_hash, len))) => { - inc!(Metrics, downloads_success); - inc_by!(Metrics, download_bytes_total, *len); - inc_by!(Metrics, download_time_total, elapsed as u64); - } - Ok(None) => inc!(Metrics, downloads_notfound), - Err(_) => inc!(Metrics, downloads_error), - } - (peer, hash, res) - }; - self.pending_downloads.push(fut.boxed_local()); + if let Some(hash) = self.queue.try_next_for_peer(peer) { + self.start_download_unchecked(peer, hash); } else { self.conns.remove(&peer); - self.peer_hashes.remove(&peer); } } + fn start_download_unchecked(&mut self, peer: PeerId, hash: Hash) { + let conn = self.conns.get(&peer).unwrap().clone(); + let blobs = self.db.clone(); + let fut = async move { + let start = Instant::now(); + let res = blobs.download_single(conn, hash).await; + // record metrics + let elapsed = start.elapsed().as_millis(); + match &res { + Ok(Some((_hash, len))) => { + inc!(Metrics, downloads_success); + inc_by!(Metrics, download_bytes_total, *len); + inc_by!(Metrics, download_time_total, elapsed as u64); + } + Ok(None) => inc!(Metrics, downloads_notfound), + Err(_) => inc!(Metrics, downloads_error), + } + (peer, hash, res) + }; + self.pending_download_futs.push(fut.boxed_local()); + } + async fn on_download_request(&mut self, req: DownloadRequest) { - let DownloadRequest { peer, hash, reply } = req; + let DownloadRequest { peers, hash, reply } = req; if self.db.has(&hash) { let size = self.db.get_size(&hash).await.unwrap(); reply.send(Some((hash, size))).ok(); return; } - debug!("queue download {hash} from {peer}"); self.replies.entry(hash).or_default().push_back(reply); - self.hash_peers.entry(hash).or_default().insert(peer); - self.peer_hashes.entry(peer).or_default().push_back(hash); - if self.conns.get(&peer).is_none() && !self.dialer.is_pending(&peer) { - self.dialer.queue_dial(peer, &iroh_bytes::protocol::ALPN); + for peer in peers { + self.queue.push_candidate(hash, peer); + // TODO: Don't dial all peers instantly. + if self.conns.get(&peer).is_none() && !self.dialer.is_pending(&peer) { + self.dialer.queue_dial(peer, &iroh_bytes::protocol::ALPN); + } } } } + +#[derive(Debug, Default)] +struct DownloadQueue { + candidates_by_hash: HashMap>, + candidates_by_peer: HashMap>, + running_by_hash: HashMap, + running_by_peer: HashMap, +} + +impl DownloadQueue { + pub fn push_candidate(&mut self, hash: Hash, peer: PeerId) { + self.candidates_by_hash + .entry(hash) + .or_default() + .push_back(peer); + self.candidates_by_peer + .entry(peer) + .or_default() + .push_back(hash); + } + + pub fn try_next_for_peer(&mut self, peer: PeerId) -> Option { + let mut next = None; + for (idx, hash) in self.candidates_by_peer.get(&peer)?.iter().enumerate() { + if !self.running_by_hash.contains_key(hash) { + next = Some((idx, *hash)); + break; + } + } + if let Some((idx, hash)) = next { + self.running_by_hash.insert(hash, peer); + self.running_by_peer.insert(peer, hash); + self.candidates_by_peer.get_mut(&peer).unwrap().remove(idx); + if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { + peers.retain(|p| p != &peer); + } + self.ensure_no_empty(hash, peer); + return Some(hash); + } else { + None + } + } + + pub fn has_no_candidates(&self, hash: &Hash) -> bool { + self.candidates_by_hash.get(hash).is_none() && self.running_by_hash.get(&hash).is_none() + } + + pub fn on_success(&mut self, hash: Hash, peer: PeerId) -> Option<(PeerId, Hash)> { + let peer2 = self.running_by_hash.remove(&hash); + debug_assert_eq!(peer2, Some(peer)); + self.running_by_peer.remove(&peer); + self.try_next_for_peer(peer).map(|hash| (peer, hash)) + } + + pub fn on_peer_fail(&mut self, peer: &PeerId) -> Vec { + let mut failed = vec![]; + for hash in self + .candidates_by_peer + .remove(peer) + .map(|hashes| hashes.into_iter()) + .into_iter() + .flatten() + { + if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { + peers.retain(|p| p != peer); + if peers.is_empty() && self.running_by_hash.get(&hash).is_none() { + failed.push(hash); + } + } + } + if let Some(hash) = self.running_by_peer.remove(&peer) { + self.running_by_hash.remove(&hash); + if self.candidates_by_hash.get(&hash).is_none() { + failed.push(hash); + } + } + failed + } + + pub fn on_not_found(&mut self, hash: Hash, peer: PeerId) { + let peer2 = self.running_by_hash.remove(&hash); + debug_assert_eq!(peer2, Some(peer)); + self.running_by_peer.remove(&peer); + self.ensure_no_empty(hash, peer); + } + + fn ensure_no_empty(&mut self, hash: Hash, peer: PeerId) { + if self + .candidates_by_peer + .get(&peer) + .map_or(false, |hashes| hashes.is_empty()) + { + self.candidates_by_peer.remove(&peer); + } + if self + .candidates_by_hash + .get(&hash) + .map_or(false, |peers| peers.is_empty()) + { + self.candidates_by_hash.remove(&hash); + } + } +} + +#[cfg(test)] +mod test {} diff --git a/iroh/src/sync/live.rs b/iroh/src/sync/live.rs index 7e35e36e1c..320c236385 100644 --- a/iroh/src/sync/live.rs +++ b/iroh/src/sync/live.rs @@ -288,10 +288,10 @@ impl Actor { }; match event { // We received a gossip message. Try to insert it into our replica. - Event::Received(data, _prev_peer) => { + Event::Received(data, prev_peer) => { let op: Op = postcard::from_bytes(&data)?; match op { - Op::Put(entry) => doc.insert_remote_entry(entry)?, + Op::Put(entry) => doc.insert_remote_entry(entry, Some(prev_peer.to_bytes()))?, } } // A new neighbor appeared in the gossip swarm. Try to sync with it directly. From ed8cb2dd32cd55f2f1a6a557ca8ba31e3a7c16ce Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 13:37:54 +0200 Subject: [PATCH 39/45] refactor: move downloader out of sync module --- iroh/src/download.rs | 347 +++++++++++++++++++++++++++++++++++++++ iroh/src/lib.rs | 1 + iroh/src/sync/content.rs | 335 +------------------------------------ 3 files changed, 357 insertions(+), 326 deletions(-) create mode 100644 iroh/src/download.rs diff --git a/iroh/src/download.rs b/iroh/src/download.rs new file mode 100644 index 0000000000..c43640f38f --- /dev/null +++ b/iroh/src/download.rs @@ -0,0 +1,347 @@ +//! Download queue + +use std::{ + collections::{HashMap, VecDeque}, + sync::{Arc, Mutex}, + time::Instant, +}; + +use futures::{ + future::{BoxFuture, LocalBoxFuture, Shared}, + stream::FuturesUnordered, + FutureExt, +}; +use iroh_bytes::util::Hash; +use iroh_gossip::net::util::Dialer; +use iroh_metrics::{inc, inc_by}; +use iroh_net::{tls::PeerId, MagicEndpoint}; +use tokio::sync::oneshot; +use tokio_stream::StreamExt; +use tracing::{debug, error, warn}; + +// TODO: Move metrics to iroh-bytes metrics +use super::sync::metrics::Metrics; +// TODO: Will be replaced by proper persistent DB once +// https://github.com/n0-computer/iroh/pull/1320 is merged +use crate::database::flat::writable::WritableFileDatabase; + +/// Future for the completion of a download request +pub type DownloadFuture = Shared>>; + +/// A download queue for iroh-bytes +/// +/// Spawns a background task that handles connecting to peers and performing get requests. +/// +/// TODO: Move to iroh-bytes or replace with corresponding feature from iroh-bytes once available +/// TODO: Support retries and backoff - become a proper queue... +/// TODO: Download requests send via synchronous flume::Sender::send. Investigate if we want async +/// here. We currently use [`Downloader::push`] from [`iroh_sync::Replica::on_insert`] callbacks, +/// which are sync, thus we need a sync method on the Downloader to push new download requests. +#[derive(Debug, Clone)] +pub struct Downloader { + pending_downloads: Arc>>, + to_actor_tx: flume::Sender, +} + +impl Downloader { + /// Create a new downloader + pub fn new( + rt: iroh_bytes::util::runtime::Handle, + endpoint: MagicEndpoint, + db: WritableFileDatabase, + ) -> Self { + let (tx, rx) = flume::bounded(64); + // spawn the actor on a local pool + // the local pool is required because WritableFileDatabase::download_single + // returns a future that is !Send + rt.local_pool().spawn_pinned(move || async move { + let mut actor = DownloadActor::new(endpoint, db, rx); + if let Err(err) = actor.run().await { + error!("download actor failed with error {err:?}"); + } + }); + Self { + pending_downloads: Arc::new(Mutex::new(HashMap::new())), + to_actor_tx: tx, + } + } + + /// Add a new download request to the download queue. + /// + /// Note: This method takes only [`PeerId`]s and will attempt to connect to those peers. For + /// this to succeed, you need to add addresses for these peers to the magic endpoint's + /// addressbook yourself. See [`MagicEndpoint::add_known_addrs`]. + pub fn push(&self, hash: Hash, peers: Vec) { + let (reply, reply_rx) = oneshot::channel(); + let req = DownloadRequest { hash, peers, reply }; + + // TODO: this is potentially blocking inside an async call. figure out a better solution + if let Err(err) = self.to_actor_tx.send(req) { + warn!("download actor dropped: {err}"); + } + + if self.pending_downloads.lock().unwrap().get(&hash).is_none() { + let pending_downloads = self.pending_downloads.clone(); + let fut = async move { + let res = reply_rx.await; + pending_downloads.lock().unwrap().remove(&hash); + res.ok().flatten() + }; + self.pending_downloads + .lock() + .unwrap() + .insert(hash, fut.boxed().shared()); + } + } + + /// Returns a future that completes once the blob for `hash` has been downloaded, or all queued + /// requests for that blob have failed. + /// + /// NOTE: This does not start the download itself. Use [`Self::push`] for that. + pub fn finished(&self, hash: &Hash) -> DownloadFuture { + match self.pending_downloads.lock().unwrap().get(hash) { + Some(fut) => fut.clone(), + None => futures::future::ready(None).boxed().shared(), + } + } +} + +type DownloadReply = oneshot::Sender>; +type PendingDownloadsFutures = + FuturesUnordered>)>>; + +#[derive(Debug)] +struct DownloadRequest { + hash: Hash, + peers: Vec, + reply: DownloadReply, +} + +#[derive(Debug)] +struct DownloadActor { + dialer: Dialer, + db: WritableFileDatabase, + conns: HashMap, + replies: HashMap>, + pending_download_futs: PendingDownloadsFutures, + queue: DownloadQueue, + rx: flume::Receiver, +} +impl DownloadActor { + fn new( + endpoint: MagicEndpoint, + db: WritableFileDatabase, + rx: flume::Receiver, + ) -> Self { + Self { + rx, + db, + dialer: Dialer::new(endpoint), + replies: Default::default(), + conns: Default::default(), + pending_download_futs: Default::default(), + queue: Default::default(), + } + } + pub async fn run(&mut self) -> anyhow::Result<()> { + loop { + tokio::select! { + req = self.rx.recv_async() => match req { + Err(_) => return Ok(()), + Ok(req) => self.on_download_request(req).await + }, + (peer, conn) = self.dialer.next() => match conn { + Ok(conn) => { + debug!("connection to {peer} established"); + self.conns.insert(peer, conn); + self.on_peer_ready(peer); + }, + Err(err) => self.on_peer_fail(&peer, err), + }, + Some((peer, hash, res)) = self.pending_download_futs.next() => match res { + Ok(Some((hash, size))) => { + self.queue.on_success(hash, peer); + self.reply(hash, Some((hash, size))); + self.on_peer_ready(peer); + } + Ok(None) => { + self.on_not_found(&peer, hash); + self.on_peer_ready(peer); + } + Err(err) => self.on_peer_fail(&peer, err), + } + } + } + } + + fn reply(&mut self, hash: Hash, res: Option<(Hash, u64)>) { + for reply in self.replies.remove(&hash).into_iter().flatten() { + reply.send(res).ok(); + } + } + + fn on_peer_fail(&mut self, peer: &PeerId, err: anyhow::Error) { + warn!("download from {peer} failed: {err}"); + for hash in self.queue.on_peer_fail(peer) { + self.reply(hash, None); + } + self.conns.remove(peer); + } + + fn on_not_found(&mut self, peer: &PeerId, hash: Hash) { + self.queue.on_not_found(hash, *peer); + if self.queue.has_no_candidates(&hash) { + self.reply(hash, None); + } + } + + fn on_peer_ready(&mut self, peer: PeerId) { + if let Some(hash) = self.queue.try_next_for_peer(peer) { + self.start_download_unchecked(peer, hash); + } else { + self.conns.remove(&peer); + } + } + + fn start_download_unchecked(&mut self, peer: PeerId, hash: Hash) { + let conn = self.conns.get(&peer).unwrap().clone(); + let blobs = self.db.clone(); + let fut = async move { + let start = Instant::now(); + let res = blobs.download_single(conn, hash).await; + // record metrics + let elapsed = start.elapsed().as_millis(); + match &res { + Ok(Some((_hash, len))) => { + inc!(Metrics, downloads_success); + inc_by!(Metrics, download_bytes_total, *len); + inc_by!(Metrics, download_time_total, elapsed as u64); + } + Ok(None) => inc!(Metrics, downloads_notfound), + Err(_) => inc!(Metrics, downloads_error), + } + (peer, hash, res) + }; + self.pending_download_futs.push(fut.boxed_local()); + } + + async fn on_download_request(&mut self, req: DownloadRequest) { + let DownloadRequest { peers, hash, reply } = req; + if self.db.has(&hash) { + let size = self.db.get_size(&hash).await.unwrap(); + reply.send(Some((hash, size))).ok(); + return; + } + self.replies.entry(hash).or_default().push_back(reply); + for peer in peers { + self.queue.push_candidate(hash, peer); + // TODO: Don't dial all peers instantly. + if self.conns.get(&peer).is_none() && !self.dialer.is_pending(&peer) { + self.dialer.queue_dial(peer, &iroh_bytes::protocol::ALPN); + } + } + } +} + +#[derive(Debug, Default)] +struct DownloadQueue { + candidates_by_hash: HashMap>, + candidates_by_peer: HashMap>, + running_by_hash: HashMap, + running_by_peer: HashMap, +} + +impl DownloadQueue { + pub fn push_candidate(&mut self, hash: Hash, peer: PeerId) { + self.candidates_by_hash + .entry(hash) + .or_default() + .push_back(peer); + self.candidates_by_peer + .entry(peer) + .or_default() + .push_back(hash); + } + + pub fn try_next_for_peer(&mut self, peer: PeerId) -> Option { + let mut next = None; + for (idx, hash) in self.candidates_by_peer.get(&peer)?.iter().enumerate() { + if !self.running_by_hash.contains_key(hash) { + next = Some((idx, *hash)); + break; + } + } + if let Some((idx, hash)) = next { + self.running_by_hash.insert(hash, peer); + self.running_by_peer.insert(peer, hash); + self.candidates_by_peer.get_mut(&peer).unwrap().remove(idx); + if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { + peers.retain(|p| p != &peer); + } + self.ensure_no_empty(hash, peer); + return Some(hash); + } else { + None + } + } + + pub fn has_no_candidates(&self, hash: &Hash) -> bool { + self.candidates_by_hash.get(hash).is_none() && self.running_by_hash.get(&hash).is_none() + } + + pub fn on_success(&mut self, hash: Hash, peer: PeerId) -> Option<(PeerId, Hash)> { + let peer2 = self.running_by_hash.remove(&hash); + debug_assert_eq!(peer2, Some(peer)); + self.running_by_peer.remove(&peer); + self.try_next_for_peer(peer).map(|hash| (peer, hash)) + } + + pub fn on_peer_fail(&mut self, peer: &PeerId) -> Vec { + let mut failed = vec![]; + for hash in self + .candidates_by_peer + .remove(peer) + .map(|hashes| hashes.into_iter()) + .into_iter() + .flatten() + { + if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { + peers.retain(|p| p != peer); + if peers.is_empty() && self.running_by_hash.get(&hash).is_none() { + failed.push(hash); + } + } + } + if let Some(hash) = self.running_by_peer.remove(&peer) { + self.running_by_hash.remove(&hash); + if self.candidates_by_hash.get(&hash).is_none() { + failed.push(hash); + } + } + failed + } + + pub fn on_not_found(&mut self, hash: Hash, peer: PeerId) { + let peer2 = self.running_by_hash.remove(&hash); + debug_assert_eq!(peer2, Some(peer)); + self.running_by_peer.remove(&peer); + self.ensure_no_empty(hash, peer); + } + + fn ensure_no_empty(&mut self, hash: Hash, peer: PeerId) { + if self + .candidates_by_peer + .get(&peer) + .map_or(false, |hashes| hashes.is_empty()) + { + self.candidates_by_peer.remove(&peer); + } + if self + .candidates_by_hash + .get(&hash) + .map_or(false, |peers| peers.is_empty()) + { + self.candidates_by_hash.remove(&hash); + } + } +} diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 9eb2a54e19..345ffa344b 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -8,6 +8,7 @@ pub use iroh_net as net; pub mod collection; pub mod database; pub mod dial; +pub mod download; pub mod node; pub mod rpc_protocol; #[allow(missing_docs)] diff --git a/iroh/src/sync/content.rs b/iroh/src/sync/content.rs index 5f15885ad7..7e04323807 100644 --- a/iroh/src/sync/content.rs +++ b/iroh/src/sync/content.rs @@ -1,20 +1,12 @@ use std::{ - collections::{HashMap, VecDeque}, io, path::{Path, PathBuf}, - sync::{Arc, Mutex}, - time::Instant, + sync::Arc, }; use anyhow::Result; use bytes::Bytes; -use futures::{ - future::{BoxFuture, LocalBoxFuture, Shared}, - stream::FuturesUnordered, - FutureExt, -}; use iroh_bytes::util::Hash; -use iroh_gossip::net::util::Dialer; use iroh_io::{AsyncSliceReader, AsyncSliceReaderExt}; use iroh_metrics::{inc, inc_by}; use iroh_net::{tls::PeerId, MagicEndpoint}; @@ -22,12 +14,13 @@ use iroh_sync::{ store::{self, Store as _}, sync::{Author, InsertOrigin, Namespace, OnInsertCallback, PeerIdBytes, Replica, SignedEntry}, }; -use tokio::{io::AsyncRead, sync::oneshot}; -use tokio_stream::StreamExt; -use tracing::{debug, error, warn}; +use tokio::io::AsyncRead; use super::metrics::Metrics; -use crate::database::flat::{writable::WritableFileDatabase, Database}; +use crate::{ + database::flat::{writable::WritableFileDatabase, Database}, + download::Downloader, +}; #[derive(Debug, Copy, Clone)] pub enum DownloadMode { @@ -253,12 +246,12 @@ impl BlobStore { pub fn start_download(&self, hash: Hash, peers: Vec) { if !self.db.has(&hash) { - self.downloader.start_download(hash, peers); + self.downloader.push(hash, peers); } } pub async fn get_bytes(&self, hash: &Hash) -> anyhow::Result> { - self.downloader.wait_for_download(hash).await; + self.downloader.finished(hash).await; let Some(entry) = self.db().get(hash) else { return Ok(None) }; @@ -267,7 +260,7 @@ impl BlobStore { } pub async fn get_reader(&self, hash: &Hash) -> anyhow::Result> { - self.downloader.wait_for_download(hash).await; + self.downloader.finished(hash).await; let Some(entry) = self.db().get(hash) else { return Ok(None) }; @@ -283,313 +276,3 @@ impl BlobStore { self.db.put_reader(data).await } } - -pub type DownloadReply = oneshot::Sender>; -pub type DownloadFuture = Shared>>; - -#[derive(Debug)] -pub struct DownloadRequest { - hash: Hash, - peers: Vec, - reply: DownloadReply, -} - -/// A download queue -/// -/// Spawns a background task that handles connecting to peers and performing get requests. -/// -/// TODO: Queued downloads are pushed into an unbounded channel. Maybe make it bounded instead. -/// We want the start_download() method to be sync though because it is used -/// from sync on_insert callbacks on the replicas. -/// TODO: Move to iroh-bytes or replace with corresponding feature from iroh-bytes once available -#[derive(Debug, Clone)] -pub struct Downloader { - pending_downloads: Arc>>, - to_actor_tx: flume::Sender, -} - -impl Downloader { - pub fn new( - rt: iroh_bytes::util::runtime::Handle, - endpoint: MagicEndpoint, - blobs: WritableFileDatabase, - ) -> Self { - let (tx, rx) = flume::bounded(64); - // spawn the actor on a local pool - // the local pool is required because WritableFileDatabase::download_single - // returns a future that is !Send - rt.local_pool().spawn_pinned(move || async move { - let mut actor = DownloadActor::new(endpoint, blobs, rx); - if let Err(err) = actor.run().await { - error!("download actor failed with error {err:?}"); - } - }); - Self { - pending_downloads: Arc::new(Mutex::new(HashMap::new())), - to_actor_tx: tx, - } - } - - pub fn wait_for_download(&self, hash: &Hash) -> DownloadFuture { - match self.pending_downloads.lock().unwrap().get(hash) { - Some(fut) => fut.clone(), - None => futures::future::ready(None).boxed().shared(), - } - } - - pub fn start_download(&self, hash: Hash, peers: Vec) { - let (reply, reply_rx) = oneshot::channel(); - let req = DownloadRequest { hash, peers, reply }; - if self.pending_downloads.lock().unwrap().get(&hash).is_none() { - let pending_downloads = self.pending_downloads.clone(); - let fut = async move { - let res = reply_rx.await; - pending_downloads.lock().unwrap().remove(&hash); - res.ok().flatten() - }; - self.pending_downloads - .lock() - .unwrap() - .insert(hash, fut.boxed().shared()); - } - // TODO: this is potentially blocking inside an async call. figure out a better solution - if let Err(err) = self.to_actor_tx.send(req) { - warn!("download actor dropped: {err}"); - } - } -} - -type PendingDownloadsFutures = - FuturesUnordered>)>>; - -#[derive(Debug)] -pub struct DownloadActor { - dialer: Dialer, - db: WritableFileDatabase, - conns: HashMap, - replies: HashMap>, - pending_download_futs: PendingDownloadsFutures, - queue: DownloadQueue, - rx: flume::Receiver, -} -impl DownloadActor { - fn new( - endpoint: MagicEndpoint, - db: WritableFileDatabase, - rx: flume::Receiver, - ) -> Self { - Self { - rx, - db, - dialer: Dialer::new(endpoint), - replies: Default::default(), - conns: Default::default(), - pending_download_futs: Default::default(), - queue: Default::default(), - } - } - pub async fn run(&mut self) -> anyhow::Result<()> { - loop { - tokio::select! { - req = self.rx.recv_async() => match req { - Err(_) => return Ok(()), - Ok(req) => self.on_download_request(req).await - }, - (peer, conn) = self.dialer.next() => match conn { - Ok(conn) => { - debug!("connection to {peer} established"); - self.conns.insert(peer, conn); - self.on_peer_ready(peer); - }, - Err(err) => self.on_peer_fail(&peer, err), - }, - Some((peer, hash, res)) = self.pending_download_futs.next() => match res { - Ok(Some((hash, size))) => { - self.queue.on_success(hash, peer); - self.reply(hash, Some((hash, size))); - self.on_peer_ready(peer); - } - Ok(None) => { - self.on_not_found(&peer, hash); - self.on_peer_ready(peer); - } - Err(err) => self.on_peer_fail(&peer, err), - } - } - } - } - - fn reply(&mut self, hash: Hash, res: Option<(Hash, u64)>) { - for reply in self.replies.remove(&hash).into_iter().flatten() { - reply.send(res).ok(); - } - } - - fn on_peer_fail(&mut self, peer: &PeerId, err: anyhow::Error) { - warn!("download from {peer} failed: {err}"); - for hash in self.queue.on_peer_fail(peer) { - self.reply(hash, None); - } - self.conns.remove(peer); - } - - fn on_not_found(&mut self, peer: &PeerId, hash: Hash) { - self.queue.on_not_found(hash, *peer); - if self.queue.has_no_candidates(&hash) { - self.reply(hash, None); - } - } - - fn on_peer_ready(&mut self, peer: PeerId) { - if let Some(hash) = self.queue.try_next_for_peer(peer) { - self.start_download_unchecked(peer, hash); - } else { - self.conns.remove(&peer); - } - } - - fn start_download_unchecked(&mut self, peer: PeerId, hash: Hash) { - let conn = self.conns.get(&peer).unwrap().clone(); - let blobs = self.db.clone(); - let fut = async move { - let start = Instant::now(); - let res = blobs.download_single(conn, hash).await; - // record metrics - let elapsed = start.elapsed().as_millis(); - match &res { - Ok(Some((_hash, len))) => { - inc!(Metrics, downloads_success); - inc_by!(Metrics, download_bytes_total, *len); - inc_by!(Metrics, download_time_total, elapsed as u64); - } - Ok(None) => inc!(Metrics, downloads_notfound), - Err(_) => inc!(Metrics, downloads_error), - } - (peer, hash, res) - }; - self.pending_download_futs.push(fut.boxed_local()); - } - - async fn on_download_request(&mut self, req: DownloadRequest) { - let DownloadRequest { peers, hash, reply } = req; - if self.db.has(&hash) { - let size = self.db.get_size(&hash).await.unwrap(); - reply.send(Some((hash, size))).ok(); - return; - } - self.replies.entry(hash).or_default().push_back(reply); - for peer in peers { - self.queue.push_candidate(hash, peer); - // TODO: Don't dial all peers instantly. - if self.conns.get(&peer).is_none() && !self.dialer.is_pending(&peer) { - self.dialer.queue_dial(peer, &iroh_bytes::protocol::ALPN); - } - } - } -} - -#[derive(Debug, Default)] -struct DownloadQueue { - candidates_by_hash: HashMap>, - candidates_by_peer: HashMap>, - running_by_hash: HashMap, - running_by_peer: HashMap, -} - -impl DownloadQueue { - pub fn push_candidate(&mut self, hash: Hash, peer: PeerId) { - self.candidates_by_hash - .entry(hash) - .or_default() - .push_back(peer); - self.candidates_by_peer - .entry(peer) - .or_default() - .push_back(hash); - } - - pub fn try_next_for_peer(&mut self, peer: PeerId) -> Option { - let mut next = None; - for (idx, hash) in self.candidates_by_peer.get(&peer)?.iter().enumerate() { - if !self.running_by_hash.contains_key(hash) { - next = Some((idx, *hash)); - break; - } - } - if let Some((idx, hash)) = next { - self.running_by_hash.insert(hash, peer); - self.running_by_peer.insert(peer, hash); - self.candidates_by_peer.get_mut(&peer).unwrap().remove(idx); - if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { - peers.retain(|p| p != &peer); - } - self.ensure_no_empty(hash, peer); - return Some(hash); - } else { - None - } - } - - pub fn has_no_candidates(&self, hash: &Hash) -> bool { - self.candidates_by_hash.get(hash).is_none() && self.running_by_hash.get(&hash).is_none() - } - - pub fn on_success(&mut self, hash: Hash, peer: PeerId) -> Option<(PeerId, Hash)> { - let peer2 = self.running_by_hash.remove(&hash); - debug_assert_eq!(peer2, Some(peer)); - self.running_by_peer.remove(&peer); - self.try_next_for_peer(peer).map(|hash| (peer, hash)) - } - - pub fn on_peer_fail(&mut self, peer: &PeerId) -> Vec { - let mut failed = vec![]; - for hash in self - .candidates_by_peer - .remove(peer) - .map(|hashes| hashes.into_iter()) - .into_iter() - .flatten() - { - if let Some(peers) = self.candidates_by_hash.get_mut(&hash) { - peers.retain(|p| p != peer); - if peers.is_empty() && self.running_by_hash.get(&hash).is_none() { - failed.push(hash); - } - } - } - if let Some(hash) = self.running_by_peer.remove(&peer) { - self.running_by_hash.remove(&hash); - if self.candidates_by_hash.get(&hash).is_none() { - failed.push(hash); - } - } - failed - } - - pub fn on_not_found(&mut self, hash: Hash, peer: PeerId) { - let peer2 = self.running_by_hash.remove(&hash); - debug_assert_eq!(peer2, Some(peer)); - self.running_by_peer.remove(&peer); - self.ensure_no_empty(hash, peer); - } - - fn ensure_no_empty(&mut self, hash: Hash, peer: PeerId) { - if self - .candidates_by_peer - .get(&peer) - .map_or(false, |hashes| hashes.is_empty()) - { - self.candidates_by_peer.remove(&peer); - } - if self - .candidates_by_hash - .get(&hash) - .map_or(false, |peers| peers.is_empty()) - { - self.candidates_by_hash.remove(&hash); - } - } -} - -#[cfg(test)] -mod test {} From e4e6bc8ca584e2643df03ca3932f213c1a74348b Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 15:27:15 +0200 Subject: [PATCH 40/45] fix: rebase --- iroh-sync/src/sync.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh-sync/src/sync.rs b/iroh-sync/src/sync.rs index b569aea487..3cc7767df9 100644 --- a/iroh-sync/src/sync.rs +++ b/iroh-sync/src/sync.rs @@ -924,7 +924,7 @@ mod tests { rounds += 1; println!("round {}", rounds); if let Some(msg) = bob.sync_process_message(msg, None).map_err(Into::into)? { - next_to_bob = alice.sync_process_message(msg, None).map_err(Into::into); + next_to_bob = alice.sync_process_message(msg, None).map_err(Into::into)?; } } From bafdc638c0533b17723e0210b03e55f4203d48de Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 16:13:44 +0200 Subject: [PATCH 41/45] fix imports --- iroh/src/download.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iroh/src/download.rs b/iroh/src/download.rs index c43640f38f..08679aac1b 100644 --- a/iroh/src/download.rs +++ b/iroh/src/download.rs @@ -20,7 +20,7 @@ use tokio_stream::StreamExt; use tracing::{debug, error, warn}; // TODO: Move metrics to iroh-bytes metrics -use super::sync::metrics::Metrics; +use crate::sync::metrics::Metrics; // TODO: Will be replaced by proper persistent DB once // https://github.com/n0-computer/iroh/pull/1320 is merged use crate::database::flat::writable::WritableFileDatabase; From 1cf78b2e6f06f900a2b8246bde9c3549efc47f1b Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 16:16:26 +0200 Subject: [PATCH 42/45] chore: fmt --- iroh/src/sync.rs | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/iroh/src/sync.rs b/iroh/src/sync.rs index f56e4b5459..b710b70da5 100644 --- a/iroh/src/sync.rs +++ b/iroh/src/sync.rs @@ -4,7 +4,7 @@ use std::net::SocketAddr; use anyhow::{bail, ensure, Context, Result}; use bytes::BytesMut; -use iroh_net::{tls::PeerId, MagicEndpoint, magic_endpoint::get_peer_id}; +use iroh_net::{magic_endpoint::get_peer_id, tls::PeerId, MagicEndpoint}; use iroh_sync::{ store, sync::{NamespaceId, Replica}, @@ -146,7 +146,9 @@ pub async fn run_bob { debug!("starting sync for {}", namespace); - if let Some(msg) = r.sync_process_message(message, peer).map_err(Into::into)? { + if let Some(msg) = + r.sync_process_message(message, peer).map_err(Into::into)? + { send_sync_message(writer, msg).await?; } else { break; @@ -161,7 +163,10 @@ pub async fn run_bob match replica { Some(ref replica) => { - if let Some(msg) = replica.sync_process_message(msg, peer).map_err(Into::into)? { + if let Some(msg) = replica + .sync_process_message(msg, peer) + .map_err(Into::into)? + { send_sync_message(writer, msg).await?; } else { break; @@ -237,8 +242,13 @@ mod tests { let (mut alice_reader, mut alice_writer) = tokio::io::split(alice); let replica = alice_replica.clone(); let alice_task = tokio::task::spawn(async move { - run_alice::(&mut alice_writer, &mut alice_reader, &replica, None) - .await + run_alice::( + &mut alice_writer, + &mut alice_reader, + &replica, + None, + ) + .await }); let (mut bob_reader, mut bob_writer) = tokio::io::split(bob); @@ -248,7 +258,7 @@ mod tests { &mut bob_writer, &mut bob_reader, bob_replica_store_task, - None + None, ) .await }); From a22c9e70d3eab3a5a3777c761a5a6f3813e838db Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 16:18:41 +0200 Subject: [PATCH 43/45] fix: metrics --- iroh/src/download.rs | 2 +- iroh/src/metrics.rs | 10 ++++++++++ iroh/src/sync/metrics.rs | 10 ---------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/iroh/src/download.rs b/iroh/src/download.rs index 08679aac1b..aaa12630be 100644 --- a/iroh/src/download.rs +++ b/iroh/src/download.rs @@ -20,7 +20,7 @@ use tokio_stream::StreamExt; use tracing::{debug, error, warn}; // TODO: Move metrics to iroh-bytes metrics -use crate::sync::metrics::Metrics; +use crate::metrics::Metrics; // TODO: Will be replaced by proper persistent DB once // https://github.com/n0-computer/iroh/pull/1320 is merged use crate::database::flat::writable::WritableFileDatabase; diff --git a/iroh/src/metrics.rs b/iroh/src/metrics.rs index 3b3b7f46ef..74355f2a09 100644 --- a/iroh/src/metrics.rs +++ b/iroh/src/metrics.rs @@ -10,6 +10,11 @@ pub struct Metrics { pub requests_total: Counter, pub bytes_sent: Counter, pub bytes_received: Counter, + pub download_bytes_total: Counter, + pub download_time_total: Counter, + pub downloads_success: Counter, + pub downloads_error: Counter, + pub downloads_notfound: Counter, } impl Default for Metrics { @@ -18,6 +23,11 @@ impl Default for Metrics { requests_total: Counter::new("Total number of requests received"), bytes_sent: Counter::new("Number of bytes streamed"), bytes_received: Counter::new("Number of bytes received"), + download_bytes_total: Counter::new("Total number of content bytes downloaded"), + download_time_total: Counter::new("Total time in ms spent downloading content bytes"), + downloads_success: Counter::new("Total number of successfull downloads"), + downloads_error: Counter::new("Total number of downloads failed with error"), + downloads_notfound: Counter::new("Total number of downloads failed with not found"), } } } diff --git a/iroh/src/sync/metrics.rs b/iroh/src/sync/metrics.rs index 257f2afa07..37185e6cec 100644 --- a/iroh/src/sync/metrics.rs +++ b/iroh/src/sync/metrics.rs @@ -11,11 +11,6 @@ pub struct Metrics { pub new_entries_remote: Counter, pub new_entries_local_size: Counter, pub new_entries_remote_size: Counter, - pub download_bytes_total: Counter, - pub download_time_total: Counter, - pub downloads_success: Counter, - pub downloads_error: Counter, - pub downloads_notfound: Counter, pub initial_sync_success: Counter, pub initial_sync_failed: Counter, } @@ -27,11 +22,6 @@ impl Default for Metrics { new_entries_remote: Counter::new("Number of document entries added by peers"), new_entries_local_size: Counter::new("Total size of entry contents added locally"), new_entries_remote_size: Counter::new("Total size of entry contents added by peers"), - download_bytes_total: Counter::new("Total number of content bytes downloaded"), - download_time_total: Counter::new("Total time in ms spent downloading content bytes"), - downloads_success: Counter::new("Total number of successfull downloads"), - downloads_error: Counter::new("Total number of downloads failed with error"), - downloads_notfound: Counter::new("Total number of downloads failed with not found"), initial_sync_success: Counter::new("Number of successfull initial syncs "), initial_sync_failed: Counter::new("Number of failed initial syncs"), } From 3fc96bdac26bf142de2cfa10c22cdcfac3155b4a Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 16:53:46 +0200 Subject: [PATCH 44/45] fix: feature flags --- iroh/src/download.rs | 6 +++++- iroh/src/lib.rs | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/iroh/src/download.rs b/iroh/src/download.rs index aaa12630be..681a65b168 100644 --- a/iroh/src/download.rs +++ b/iroh/src/download.rs @@ -19,7 +19,7 @@ use tokio::sync::oneshot; use tokio_stream::StreamExt; use tracing::{debug, error, warn}; -// TODO: Move metrics to iroh-bytes metrics +#[cfg(feature = "metrics")] use crate::metrics::Metrics; // TODO: Will be replaced by proper persistent DB once // https://github.com/n0-computer/iroh/pull/1320 is merged @@ -207,9 +207,12 @@ impl DownloadActor { let conn = self.conns.get(&peer).unwrap().clone(); let blobs = self.db.clone(); let fut = async move { +#[cfg(feature = "metrics")] let start = Instant::now(); let res = blobs.download_single(conn, hash).await; // record metrics +#[cfg(feature = "metrics")] + { let elapsed = start.elapsed().as_millis(); match &res { Ok(Some((_hash, len))) => { @@ -220,6 +223,7 @@ impl DownloadActor { Ok(None) => inc!(Metrics, downloads_notfound), Err(_) => inc!(Metrics, downloads_error), } + } (peer, hash, res) }; self.pending_download_futs.push(fut.boxed_local()); diff --git a/iroh/src/lib.rs b/iroh/src/lib.rs index 345ffa344b..6a829fffec 100644 --- a/iroh/src/lib.rs +++ b/iroh/src/lib.rs @@ -8,6 +8,8 @@ pub use iroh_net as net; pub mod collection; pub mod database; pub mod dial; +// TODO: Remove feature flag once https://github.com/n0-computer/iroh/pull/1320 is merged +#[cfg(feature = "flat-db")] pub mod download; pub mod node; pub mod rpc_protocol; From 9f12429bbbf31c1d0d24bd18a43f347846908583 Mon Sep 17 00:00:00 2001 From: "Franz Heinzmann (Frando)" Date: Mon, 7 Aug 2023 16:56:24 +0200 Subject: [PATCH 45/45] chore: fmt --- iroh/src/download.rs | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/iroh/src/download.rs b/iroh/src/download.rs index 681a65b168..60be7b3f92 100644 --- a/iroh/src/download.rs +++ b/iroh/src/download.rs @@ -207,22 +207,22 @@ impl DownloadActor { let conn = self.conns.get(&peer).unwrap().clone(); let blobs = self.db.clone(); let fut = async move { -#[cfg(feature = "metrics")] + #[cfg(feature = "metrics")] let start = Instant::now(); let res = blobs.download_single(conn, hash).await; // record metrics -#[cfg(feature = "metrics")] + #[cfg(feature = "metrics")] { - let elapsed = start.elapsed().as_millis(); - match &res { - Ok(Some((_hash, len))) => { - inc!(Metrics, downloads_success); - inc_by!(Metrics, download_bytes_total, *len); - inc_by!(Metrics, download_time_total, elapsed as u64); + let elapsed = start.elapsed().as_millis(); + match &res { + Ok(Some((_hash, len))) => { + inc!(Metrics, downloads_success); + inc_by!(Metrics, download_bytes_total, *len); + inc_by!(Metrics, download_time_total, elapsed as u64); + } + Ok(None) => inc!(Metrics, downloads_notfound), + Err(_) => inc!(Metrics, downloads_error), } - Ok(None) => inc!(Metrics, downloads_notfound), - Err(_) => inc!(Metrics, downloads_error), - } } (peer, hash, res) }; @@ -283,14 +283,14 @@ impl DownloadQueue { peers.retain(|p| p != &peer); } self.ensure_no_empty(hash, peer); - return Some(hash); + Some(hash) } else { None } } pub fn has_no_candidates(&self, hash: &Hash) -> bool { - self.candidates_by_hash.get(hash).is_none() && self.running_by_hash.get(&hash).is_none() + self.candidates_by_hash.get(hash).is_none() && self.running_by_hash.get(hash).is_none() } pub fn on_success(&mut self, hash: Hash, peer: PeerId) -> Option<(PeerId, Hash)> { @@ -316,7 +316,7 @@ impl DownloadQueue { } } } - if let Some(hash) = self.running_by_peer.remove(&peer) { + if let Some(hash) = self.running_by_peer.remove(peer) { self.running_by_hash.remove(&hash); if self.candidates_by_hash.get(&hash).is_none() { failed.push(hash);