From 7dd53e17ec61706ec49429e27d97ae1c7c2524b2 Mon Sep 17 00:00:00 2001 From: tomg10 Date: Thu, 8 Feb 2024 17:26:18 +0100 Subject: [PATCH 01/27] EN integration Signed-off-by: tomg10 --- Cargo.lock | 2 ++ core/bin/external_node/Cargo.toml | 2 ++ core/bin/external_node/src/config/mod.rs | 14 ++++++++++ core/bin/external_node/src/main.rs | 35 +++++++++++++++++++++--- core/lib/snapshots_applier/src/lib.rs | 21 ++++++++++++++ 5 files changed, 70 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 12077df924b9..0d7e5d991e36 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8680,6 +8680,8 @@ dependencies = [ "zksync_core", "zksync_dal", "zksync_health_check", + "zksync_object_store", + "zksync_snapshots_applier", "zksync_state", "zksync_storage", "zksync_types", diff --git a/core/bin/external_node/Cargo.toml b/core/bin/external_node/Cargo.toml index 725ad5a56992..f6de50b5b461 100644 --- a/core/bin/external_node/Cargo.toml +++ b/core/bin/external_node/Cargo.toml @@ -19,6 +19,8 @@ zksync_utils = { path = "../../lib/utils" } zksync_state = { path = "../../lib/state" } zksync_basic_types = { path = "../../lib/basic_types" } zksync_contracts = { path = "../../lib/contracts" } +zksync_snapshots_applier = {path = "../../lib/snapshots_applier"} +zksync_object_store = {path="../../lib/object_store"} zksync_concurrency = { version = "0.1.0", git = "https://github.com/matter-labs/era-consensus.git", rev = "5b3d383d7a65b0fbe2a771fecf4313f5083be9ae" } zksync_consensus_roles = { version = "0.1.0", git = "https://github.com/matter-labs/era-consensus.git", rev = "5b3d383d7a65b0fbe2a771fecf4313f5083be9ae" } diff --git a/core/bin/external_node/src/config/mod.rs b/core/bin/external_node/src/config/mod.rs index 80caab713a70..312b67eb1818 100644 --- a/core/bin/external_node/src/config/mod.rs +++ b/core/bin/external_node/src/config/mod.rs @@ -428,6 +428,20 @@ pub(crate) fn read_consensus_config() -> anyhow::Result anyhow::Result { + // TODO add instructions where to find this bucket url + let snapshots_bucket_base_url = std::env::var("EN_SNAPSHOTS_RECOVERY_BUCKET_BASE_URL") + .context("EN_SNAPSHOTS_RECOVERY_BUCKET_BASE_URL env variable needs to be set if snapshots recovery is enabled")?; + Ok(SnapshotsRecoveryConfig { + snapshots_bucket_base_url, + }) +} + /// External Node Config contains all the configuration required for the EN operation. /// It is split into three parts: required, optional and remote for easier navigation. #[derive(Debug, Clone)] diff --git a/core/bin/external_node/src/main.rs b/core/bin/external_node/src/main.rs index 8ee0b9fc26a5..ccb4f3deb52e 100644 --- a/core/bin/external_node/src/main.rs +++ b/core/bin/external_node/src/main.rs @@ -8,7 +8,10 @@ use prometheus_exporter::PrometheusExporterConfig; use tokio::{sync::watch, task, time::sleep}; use zksync_basic_types::{Address, L2ChainId}; use zksync_concurrency::{ctx, scope}; -use zksync_config::configs::database::MerkleTreeMode; +use zksync_config::{ + configs::{database::MerkleTreeMode, object_store::ObjectStoreMode}, + ObjectStoreConfig, +}; use zksync_core::{ api_server::{ execution_sandbox::VmConcurrencyLimiter, @@ -35,6 +38,8 @@ use zksync_core::{ }; use zksync_dal::{healthcheck::ConnectionPoolHealthCheck, ConnectionPool}; use zksync_health_check::CheckHealth; +use zksync_object_store::ObjectStoreFactory; +use zksync_snapshots_applier::SnapshotsApplier; use zksync_state::PostgresStorageCaches; use zksync_storage::RocksDB; use zksync_utils::wait_for_tasks::wait_for_tasks; @@ -45,7 +50,7 @@ mod metrics; const RELEASE_MANIFEST: &str = std::include_str!("../../../../.github/release-please/manifest.json"); -use crate::config::ExternalNodeConfig; +use crate::config::{read_snapshots_recovery_config, ExternalNodeConfig}; /// Creates the state keeper configured to work in the external node mode. #[allow(clippy::too_many_arguments)] @@ -385,6 +390,8 @@ struct Cli { revert_pending_l1_batch: bool, #[arg(long)] enable_consensus: bool, + #[arg(long)] + enable_snapshots_recovery: bool, } #[tokio::main] @@ -422,10 +429,13 @@ async fn main() -> anyhow::Result<()> { config.consensus = Some(config::read_consensus_config().context("read_consensus_config()")?); } + let main_node_url = config .required .main_node_url() .context("Main node URL is incorrect")?; + let main_node_client = ::json_rpc(&main_node_url) + .context("Failed creating JSON-RPC client for main node")?; let connection_pool = ConnectionPool::builder( &config.postgres.database_url, @@ -434,6 +444,25 @@ async fn main() -> anyhow::Result<()> { .build() .await .context("failed to build a connection_pool")?; + + if opt.enable_snapshots_recovery { + let recovery_config = read_snapshots_recovery_config()?; + let object_store_config = ObjectStoreConfig { + bucket_base_url: recovery_config.snapshots_bucket_base_url.to_string(), + mode: ObjectStoreMode::GCSAnonymousReadOnly, + file_backed_base_path: "".to_string(), // not used + gcs_credential_file_path: "".to_string(), // not used + max_retries: 5, + }; + let blob_store = ObjectStoreFactory::new(object_store_config) + .create_store() + .await; + + SnapshotsApplier::load_snapshot(&connection_pool, &main_node_client, &blob_store) + .await + .unwrap(); + } + if opt.revert_pending_l1_batch { tracing::info!("Rolling pending L1 batch back.."); let reverter = BlockReverter::new( @@ -471,8 +500,6 @@ async fn main() -> anyhow::Result<()> { tracing::info!("Main node URL is: {}", main_node_url); // Make sure that genesis is performed. - let main_node_client = ::json_rpc(&main_node_url) - .context("Failed creating JSON-RPC client for main node")?; perform_genesis_if_needed( &mut connection_pool.access_storage().await.unwrap(), config.remote.l2_chain_id, diff --git a/core/lib/snapshots_applier/src/lib.rs b/core/lib/snapshots_applier/src/lib.rs index 15ba95a1a96b..c7d2a00e1ce8 100644 --- a/core/lib/snapshots_applier/src/lib.rs +++ b/core/lib/snapshots_applier/src/lib.rs @@ -16,6 +16,8 @@ use zksync_types::{ }; use zksync_utils::bytecode::hash_bytecode; use zksync_web3_decl::jsonrpsee::core::{client::Error, ClientError as RpcError}; +use zksync_web3_decl::jsonrpsee::http_client::HttpClient; +use zksync_web3_decl::namespaces::{EnNamespaceClient, SnapshotsNamespaceClient}; use self::metrics::{InitialStage, StorageLogsChunksStage, METRICS}; @@ -82,6 +84,25 @@ pub trait SnapshotsApplierMainNodeClient: fmt::Debug + Send + Sync { async fn fetch_newest_snapshot(&self) -> Result, RpcError>; } +#[async_trait] +impl SnapshotsApplierMainNodeClient for HttpClient { + async fn fetch_l2_block(&self, number: MiniblockNumber) -> Result, RpcError> { + Ok(self.sync_l2_block(number, false).await?) + } + + async fn fetch_newest_snapshot(&self) -> Result, RpcError> { + let snapshots = self.get_all_snapshots().await?; + if snapshots.snapshots_l1_batch_numbers.is_empty() { + Ok(None) + } else { + let newest_snapshot = snapshots.snapshots_l1_batch_numbers[0]; + Ok(self + .get_snapshot_by_l1_batch_number(newest_snapshot) + .await?) + } + } +} + /// Applying application-level storage snapshots to the Postgres storage. #[derive(Debug)] pub struct SnapshotsApplier<'a> { From b72a50d664ddef52a05b9260554705198de83d7f Mon Sep 17 00:00:00 2001 From: tomg10 Date: Thu, 8 Feb 2024 17:26:52 +0100 Subject: [PATCH 02/27] zk fmt Signed-off-by: tomg10 --- core/lib/snapshots_applier/src/lib.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/lib/snapshots_applier/src/lib.rs b/core/lib/snapshots_applier/src/lib.rs index c7d2a00e1ce8..bcf558aa046c 100644 --- a/core/lib/snapshots_applier/src/lib.rs +++ b/core/lib/snapshots_applier/src/lib.rs @@ -15,9 +15,13 @@ use zksync_types::{ MiniblockNumber, H256, }; use zksync_utils::bytecode::hash_bytecode; -use zksync_web3_decl::jsonrpsee::core::{client::Error, ClientError as RpcError}; -use zksync_web3_decl::jsonrpsee::http_client::HttpClient; -use zksync_web3_decl::namespaces::{EnNamespaceClient, SnapshotsNamespaceClient}; +use zksync_web3_decl::{ + jsonrpsee::{ + core::{client::Error, ClientError as RpcError}, + http_client::HttpClient, + }, + namespaces::{EnNamespaceClient, SnapshotsNamespaceClient}, +}; use self::metrics::{InitialStage, StorageLogsChunksStage, METRICS}; From 0c3d5f0b1bc31cff55b30ff06edd95762bbe4b95 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Mon, 12 Feb 2024 17:17:21 +0200 Subject: [PATCH 03/27] Rework `ObjectStoreConfig` to be more typesafe --- core/lib/config/src/configs/object_store.rs | 39 +++++-- core/lib/config/src/testonly.rs | 20 ++-- core/lib/env_config/src/object_store.rs | 62 +++++++---- core/lib/object_store/src/raw.rs | 30 ++--- core/lib/protobuf_config/src/object_store.rs | 105 +++++++++++------- .../src/proto/object_store.proto | 7 +- 6 files changed, 164 insertions(+), 99 deletions(-) diff --git a/core/lib/config/src/configs/object_store.rs b/core/lib/config/src/configs/object_store.rs index 4cf5553d639e..e5c709fbf545 100644 --- a/core/lib/config/src/configs/object_store.rs +++ b/core/lib/config/src/configs/object_store.rs @@ -1,19 +1,34 @@ use serde::Deserialize; -#[derive(Debug, Deserialize, Eq, PartialEq, Clone, Copy)] -pub enum ObjectStoreMode { - GCS, - GCSWithCredentialFile, - FileBacked, - GCSAnonymousReadOnly, -} - /// Configuration for the object store -#[derive(Debug, Deserialize, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Deserialize)] pub struct ObjectStoreConfig { - pub bucket_base_url: String, + #[serde(flatten)] pub mode: ObjectStoreMode, - pub file_backed_base_path: String, - pub gcs_credential_file_path: String, + #[serde(default = "ObjectStoreConfig::default_max_retries")] pub max_retries: u16, } + +impl ObjectStoreConfig { + const fn default_max_retries() -> u16 { + 5 + } +} + +#[derive(Debug, Clone, PartialEq, Deserialize)] +#[serde(tag = "mode")] +pub enum ObjectStoreMode { + GCS { + bucket_base_url: String, + }, + GCSAnonymousReadOnly { + bucket_base_url: String, + }, + GCSWithCredentialFile { + bucket_base_url: String, + gcs_credential_file_path: String, + }, + FileBacked { + file_backed_base_path: String, + }, +} diff --git a/core/lib/config/src/testonly.rs b/core/lib/config/src/testonly.rs index 64a38ff51ae2..4d51c8f3ff5c 100644 --- a/core/lib/config/src/testonly.rs +++ b/core/lib/config/src/testonly.rs @@ -633,10 +633,19 @@ impl RandomConfig for configs::house_keeper::HouseKeeperConfig { impl RandomConfig for configs::object_store::ObjectStoreMode { fn sample(g: &mut Gen) -> Self { match g.rng.gen_range(0..4) { - 0 => Self::GCS, - 1 => Self::GCSWithCredentialFile, - 2 => Self::FileBacked, - _ => Self::GCSAnonymousReadOnly, + 0 => Self::GCS { + bucket_base_url: g.gen(), + }, + 1 => Self::GCSWithCredentialFile { + bucket_base_url: g.gen(), + gcs_credential_file_path: g.gen(), + }, + 2 => Self::FileBacked { + file_backed_base_path: g.gen(), + }, + _ => Self::GCSAnonymousReadOnly { + bucket_base_url: g.gen(), + }, } } } @@ -644,10 +653,7 @@ impl RandomConfig for configs::object_store::ObjectStoreMode { impl RandomConfig for configs::ObjectStoreConfig { fn sample(g: &mut Gen) -> Self { Self { - bucket_base_url: g.gen(), mode: g.gen(), - file_backed_base_path: g.gen(), - gcs_credential_file_path: g.gen(), max_retries: g.gen(), } } diff --git a/core/lib/env_config/src/object_store.rs b/core/lib/env_config/src/object_store.rs index 23b1abaf5166..e9d31093c68c 100644 --- a/core/lib/env_config/src/object_store.rs +++ b/core/lib/env_config/src/object_store.rs @@ -49,12 +49,12 @@ mod tests { static MUTEX: EnvMutex = EnvMutex::new(); - fn expected_config(bucket_base_url: &str) -> ObjectStoreConfig { + fn expected_gcs_config(bucket_base_url: &str) -> ObjectStoreConfig { ObjectStoreConfig { - bucket_base_url: bucket_base_url.to_string(), - mode: ObjectStoreMode::FileBacked, - file_backed_base_path: "artifacts".to_string(), - gcs_credential_file_path: "/path/to/credentials.json".to_string(), + mode: ObjectStoreMode::GCSWithCredentialFile { + bucket_base_url: bucket_base_url.to_owned(), + gcs_credential_file_path: "/path/to/credentials.json".to_owned(), + }, max_retries: 5, } } @@ -64,14 +64,30 @@ mod tests { let mut lock = MUTEX.lock(); let config = r#" OBJECT_STORE_BUCKET_BASE_URL="/base/url" - OBJECT_STORE_MODE="FileBacked" - OBJECT_STORE_FILE_BACKED_BASE_PATH="artifacts" + OBJECT_STORE_MODE="GCSWithCredentialFile" OBJECT_STORE_GCS_CREDENTIAL_FILE_PATH="/path/to/credentials.json" OBJECT_STORE_MAX_RETRIES="5" "#; lock.set_env(config); let actual = ObjectStoreConfig::from_env().unwrap(); - assert_eq!(actual, expected_config("/base/url")); + assert_eq!(actual, expected_gcs_config("/base/url")); + } + + #[test] + fn file_backed_config_from_env() { + let mut lock = MUTEX.lock(); + let config = r#" + OBJECT_STORE_MODE="FileBacked" + OBJECT_STORE_FILE_BACKED_BASE_PATH="artifacts" + "#; + lock.set_env(config); + let actual = ObjectStoreConfig::from_env().unwrap(); + assert_eq!( + actual.mode, + ObjectStoreMode::FileBacked { + file_backed_base_path: "artifacts".to_owned(), + } + ); } #[test] @@ -79,14 +95,18 @@ mod tests { let mut lock = MUTEX.lock(); let config = r#" PUBLIC_OBJECT_STORE_BUCKET_BASE_URL="/public_base_url" - PUBLIC_OBJECT_STORE_MODE="FileBacked" - PUBLIC_OBJECT_STORE_FILE_BACKED_BASE_PATH="artifacts" - PUBLIC_OBJECT_STORE_GCS_CREDENTIAL_FILE_PATH="/path/to/credentials.json" - PUBLIC_OBJECT_STORE_MAX_RETRIES="5" + PUBLIC_OBJECT_STORE_MODE="GCSAnonymousReadOnly" + PUBLIC_OBJECT_STORE_MAX_RETRIES="3" "#; lock.set_env(config); let actual = PublicObjectStoreConfig::from_env().unwrap().0; - assert_eq!(actual, expected_config("/public_base_url")); + assert_eq!(actual.max_retries, 3); + assert_eq!( + actual.mode, + ObjectStoreMode::GCSAnonymousReadOnly { + bucket_base_url: "/public_base_url".to_owned(), + } + ); } #[test] @@ -94,14 +114,13 @@ mod tests { let mut lock = MUTEX.lock(); let config = r#" PROVER_OBJECT_STORE_BUCKET_BASE_URL="/prover_base_url" - PROVER_OBJECT_STORE_MODE="FileBacked" - PROVER_OBJECT_STORE_FILE_BACKED_BASE_PATH="artifacts" + PROVER_OBJECT_STORE_MODE="GCSWithCredentialFile" PROVER_OBJECT_STORE_GCS_CREDENTIAL_FILE_PATH="/path/to/credentials.json" PROVER_OBJECT_STORE_MAX_RETRIES="5" "#; lock.set_env(config); let actual = ProverObjectStoreConfig::from_env().unwrap().0; - assert_eq!(actual, expected_config("/prover_base_url")); + assert_eq!(actual, expected_gcs_config("/prover_base_url")); } #[test] @@ -109,13 +128,16 @@ mod tests { let mut lock = MUTEX.lock(); let config = r#" SNAPSHOTS_OBJECT_STORE_BUCKET_BASE_URL="/snapshots_base_url" - SNAPSHOTS_OBJECT_STORE_MODE="FileBacked" - SNAPSHOTS_OBJECT_STORE_FILE_BACKED_BASE_PATH="artifacts" - SNAPSHOTS_OBJECT_STORE_GCS_CREDENTIAL_FILE_PATH="/path/to/credentials.json" + SNAPSHOTS_OBJECT_STORE_MODE="GCS" SNAPSHOTS_OBJECT_STORE_MAX_RETRIES="5" "#; lock.set_env(config); let actual = SnapshotsObjectStoreConfig::from_env().unwrap().0; - assert_eq!(actual, expected_config("/snapshots_base_url")); + assert_eq!( + actual.mode, + ObjectStoreMode::GCS { + bucket_base_url: "/snapshots_base_url".to_owned(), + } + ); } } diff --git a/core/lib/object_store/src/raw.rs b/core/lib/object_store/src/raw.rs index 61340343c734..1776612577bd 100644 --- a/core/lib/object_store/src/raw.rs +++ b/core/lib/object_store/src/raw.rs @@ -190,46 +190,46 @@ impl ObjectStoreFactory { } async fn create_from_config(config: &ObjectStoreConfig) -> Arc { - let gcs_credential_file_path = match config.mode { - ObjectStoreMode::GCSWithCredentialFile => Some(config.gcs_credential_file_path.clone()), - _ => None, - }; - match config.mode { - ObjectStoreMode::GCS => { + match &config.mode { + ObjectStoreMode::GCS { bucket_base_url } => { tracing::trace!( "Initialized GoogleCloudStorage Object store without credential file" ); let store = GoogleCloudStorage::new( GoogleCloudStorageAuthMode::Authenticated, - config.bucket_base_url.clone(), + bucket_base_url.clone(), config.max_retries, ) .await; Arc::new(store) } - ObjectStoreMode::GCSWithCredentialFile => { + ObjectStoreMode::GCSWithCredentialFile { + bucket_base_url, + gcs_credential_file_path, + } => { tracing::trace!("Initialized GoogleCloudStorage Object store with credential file"); let store = GoogleCloudStorage::new( GoogleCloudStorageAuthMode::AuthenticatedWithCredentialFile( - gcs_credential_file_path - .expect("Credentials path must be provided for GCSWithCredentialFile"), + gcs_credential_file_path.clone(), ), - config.bucket_base_url.clone(), + bucket_base_url.clone(), config.max_retries, ) .await; Arc::new(store) } - ObjectStoreMode::FileBacked => { + ObjectStoreMode::FileBacked { + file_backed_base_path, + } => { tracing::trace!("Initialized FileBacked Object store"); - let store = FileBackedObjectStore::new(config.file_backed_base_path.clone()).await; + let store = FileBackedObjectStore::new(file_backed_base_path.clone()).await; Arc::new(store) } - ObjectStoreMode::GCSAnonymousReadOnly => { + ObjectStoreMode::GCSAnonymousReadOnly { bucket_base_url } => { tracing::trace!("Initialized GoogleCloudStoragePublicReadOnly store"); let store = GoogleCloudStorage::new( GoogleCloudStorageAuthMode::Anonymous, - config.bucket_base_url.clone(), + bucket_base_url.clone(), config.max_retries, ) .await; diff --git a/core/lib/protobuf_config/src/object_store.rs b/core/lib/protobuf_config/src/object_store.rs index b845007caa67..6299d724bd35 100644 --- a/core/lib/protobuf_config/src/object_store.rs +++ b/core/lib/protobuf_config/src/object_store.rs @@ -1,47 +1,43 @@ use anyhow::Context as _; -use zksync_config::configs; +use zksync_config::configs::object_store::{ObjectStoreConfig, ObjectStoreMode}; use zksync_protobuf::required; use crate::{proto, repr::ProtoRepr}; -impl proto::ObjectStoreMode { - fn new(x: &configs::object_store::ObjectStoreMode) -> Self { - type From = configs::object_store::ObjectStoreMode; - match x { - From::GCS => Self::Gcs, - From::GCSWithCredentialFile => Self::GcsWithCredentialFile, - From::FileBacked => Self::FileBacked, - From::GCSAnonymousReadOnly => Self::GcsAnonymousReadOnly, - } - } - fn parse(&self) -> configs::object_store::ObjectStoreMode { - type To = configs::object_store::ObjectStoreMode; - match self { - Self::Gcs => To::GCS, - Self::GcsWithCredentialFile => To::GCSWithCredentialFile, - Self::FileBacked => To::FileBacked, - Self::GcsAnonymousReadOnly => To::GCSAnonymousReadOnly, - } - } -} - impl ProtoRepr for proto::ObjectStore { - type Type = configs::ObjectStoreConfig; + type Type = ObjectStoreConfig; + fn read(&self) -> anyhow::Result { + let mode = match self.mode() { + proto::ObjectStoreMode::Gcs => ObjectStoreMode::GCS { + bucket_base_url: required(&self.bucket_base_url) + .context("bucket_base_url")? + .clone(), + }, + proto::ObjectStoreMode::GcsWithCredentialFile => { + ObjectStoreMode::GCSWithCredentialFile { + bucket_base_url: required(&self.bucket_base_url) + .context("bucket_base_url")? + .clone(), + gcs_credential_file_path: required(&self.gcs_credential_file_path) + .context("gcs_credential_file_path")? + .clone(), + } + } + proto::ObjectStoreMode::GcsAnonymousReadOnly => ObjectStoreMode::GCSAnonymousReadOnly { + bucket_base_url: required(&self.bucket_base_url) + .context("bucket_base_url")? + .clone(), + }, + proto::ObjectStoreMode::FileBacked => ObjectStoreMode::FileBacked { + file_backed_base_path: required(&self.file_backed_base_path) + .context("file_backed_base_path")? + .clone(), + }, + }; + Ok(Self::Type { - bucket_base_url: required(&self.bucket_base_url) - .context("bucket_base_url")? - .clone(), - mode: required(&self.mode) - .and_then(|x| Ok(proto::ObjectStoreMode::try_from(*x)?)) - .context("mode")? - .parse(), - file_backed_base_path: required(&self.file_backed_base_path) - .context("file_backed_base_path")? - .clone(), - gcs_credential_file_path: required(&self.gcs_credential_file_path) - .context("gcs_credential_file_path")? - .clone(), + mode, max_retries: required(&self.max_retries) .and_then(|x| Ok((*x).try_into()?)) .context("max_retries")?, @@ -49,12 +45,37 @@ impl ProtoRepr for proto::ObjectStore { } fn build(this: &Self::Type) -> Self { - Self { - bucket_base_url: Some(this.bucket_base_url.clone()), - mode: Some(proto::ObjectStoreMode::new(&this.mode).into()), - file_backed_base_path: Some(this.file_backed_base_path.clone()), - gcs_credential_file_path: Some(this.gcs_credential_file_path.clone()), - max_retries: Some(this.max_retries.into()), + match &this.mode { + ObjectStoreMode::GCS { bucket_base_url } => Self { + mode: Some(proto::ObjectStoreMode::Gcs.into()), + bucket_base_url: Some(bucket_base_url.clone()), + max_retries: Some(this.max_retries.into()), + ..Self::default() + }, + ObjectStoreMode::GCSWithCredentialFile { + bucket_base_url, + gcs_credential_file_path, + } => Self { + mode: Some(proto::ObjectStoreMode::GcsWithCredentialFile.into()), + bucket_base_url: Some(bucket_base_url.clone()), + gcs_credential_file_path: Some(gcs_credential_file_path.clone()), + max_retries: Some(this.max_retries.into()), + ..Self::default() + }, + ObjectStoreMode::GCSAnonymousReadOnly { bucket_base_url } => Self { + mode: Some(proto::ObjectStoreMode::GcsAnonymousReadOnly.into()), + bucket_base_url: Some(bucket_base_url.clone()), + max_retries: Some(this.max_retries.into()), + ..Self::default() + }, + ObjectStoreMode::FileBacked { + file_backed_base_path, + } => Self { + mode: Some(proto::ObjectStoreMode::FileBacked.into()), + file_backed_base_path: Some(file_backed_base_path.clone()), + max_retries: Some(this.max_retries.into()), + ..Self::default() + }, } } } diff --git a/core/lib/protobuf_config/src/proto/object_store.proto b/core/lib/protobuf_config/src/proto/object_store.proto index 941799c07c20..69a371e0010f 100644 --- a/core/lib/protobuf_config/src/proto/object_store.proto +++ b/core/lib/protobuf_config/src/proto/object_store.proto @@ -9,10 +9,11 @@ enum ObjectStoreMode { GCS_ANONYMOUS_READ_ONLY = 3; } +// FIXME: rework to use `oneof` message ObjectStore { - optional string bucket_base_url = 1; // required; url + optional string bucket_base_url = 1; // optional (depends on mode); url optional ObjectStoreMode mode = 2; // required - optional string file_backed_base_path = 3; // required; fs path - optional string gcs_credential_file_path = 4; // required; fs path + optional string file_backed_base_path = 3; // optional (depends on mode); fs path + optional string gcs_credential_file_path = 4; // optional (depends on mode); fs path optional uint32 max_retries = 5; // required } From c712613423769e70a27a46e5947fd4ff202807f6 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Mon, 12 Feb 2024 17:32:24 +0200 Subject: [PATCH 04/27] Change object store config Protobuf schema --- core/lib/protobuf_config/src/object_store.rs | 80 ++++++++++--------- .../src/proto/object_store.proto | 35 +++++--- 2 files changed, 65 insertions(+), 50 deletions(-) diff --git a/core/lib/protobuf_config/src/object_store.rs b/core/lib/protobuf_config/src/object_store.rs index 6299d724bd35..3fb67f4cc792 100644 --- a/core/lib/protobuf_config/src/object_store.rs +++ b/core/lib/protobuf_config/src/object_store.rs @@ -8,29 +8,32 @@ impl ProtoRepr for proto::ObjectStore { type Type = ObjectStoreConfig; fn read(&self) -> anyhow::Result { - let mode = match self.mode() { - proto::ObjectStoreMode::Gcs => ObjectStoreMode::GCS { - bucket_base_url: required(&self.bucket_base_url) + let mode = required(&self.mode).context("mode")?; + let mode = match mode { + proto::object_store::Mode::Gcs(mode) => ObjectStoreMode::GCS { + bucket_base_url: required(&mode.bucket_base_url) .context("bucket_base_url")? .clone(), }, - proto::ObjectStoreMode::GcsWithCredentialFile => { + proto::object_store::Mode::GcsWithCredentialFile(mode) => { ObjectStoreMode::GCSWithCredentialFile { - bucket_base_url: required(&self.bucket_base_url) + bucket_base_url: required(&mode.bucket_base_url) .context("bucket_base_url")? .clone(), - gcs_credential_file_path: required(&self.gcs_credential_file_path) + gcs_credential_file_path: required(&mode.gcs_credential_file_path) .context("gcs_credential_file_path")? .clone(), } } - proto::ObjectStoreMode::GcsAnonymousReadOnly => ObjectStoreMode::GCSAnonymousReadOnly { - bucket_base_url: required(&self.bucket_base_url) - .context("bucket_base_url")? - .clone(), - }, - proto::ObjectStoreMode::FileBacked => ObjectStoreMode::FileBacked { - file_backed_base_path: required(&self.file_backed_base_path) + proto::object_store::Mode::GcsAnonymousReadOnly(mode) => { + ObjectStoreMode::GCSAnonymousReadOnly { + bucket_base_url: required(&mode.bucket_base_url) + .context("bucket_base_url")? + .clone(), + } + } + proto::object_store::Mode::FileBacked(mode) => ObjectStoreMode::FileBacked { + file_backed_base_path: required(&mode.file_backed_base_path) .context("file_backed_base_path")? .clone(), }, @@ -45,37 +48,38 @@ impl ProtoRepr for proto::ObjectStore { } fn build(this: &Self::Type) -> Self { - match &this.mode { - ObjectStoreMode::GCS { bucket_base_url } => Self { - mode: Some(proto::ObjectStoreMode::Gcs.into()), - bucket_base_url: Some(bucket_base_url.clone()), - max_retries: Some(this.max_retries.into()), - ..Self::default() - }, + let mode = match &this.mode { + ObjectStoreMode::GCS { bucket_base_url } => { + proto::object_store::Mode::Gcs(proto::object_store::Gcs { + bucket_base_url: Some(bucket_base_url.clone()), + }) + } ObjectStoreMode::GCSWithCredentialFile { bucket_base_url, gcs_credential_file_path, - } => Self { - mode: Some(proto::ObjectStoreMode::GcsWithCredentialFile.into()), - bucket_base_url: Some(bucket_base_url.clone()), - gcs_credential_file_path: Some(gcs_credential_file_path.clone()), - max_retries: Some(this.max_retries.into()), - ..Self::default() - }, - ObjectStoreMode::GCSAnonymousReadOnly { bucket_base_url } => Self { - mode: Some(proto::ObjectStoreMode::GcsAnonymousReadOnly.into()), - bucket_base_url: Some(bucket_base_url.clone()), - max_retries: Some(this.max_retries.into()), - ..Self::default() - }, + } => proto::object_store::Mode::GcsWithCredentialFile( + proto::object_store::GcsWithCredentialFile { + bucket_base_url: Some(bucket_base_url.clone()), + gcs_credential_file_path: Some(gcs_credential_file_path.clone()), + }, + ), + ObjectStoreMode::GCSAnonymousReadOnly { bucket_base_url } => { + proto::object_store::Mode::GcsAnonymousReadOnly( + proto::object_store::GcsAnonymousReadOnly { + bucket_base_url: Some(bucket_base_url.clone()), + }, + ) + } ObjectStoreMode::FileBacked { file_backed_base_path, - } => Self { - mode: Some(proto::ObjectStoreMode::FileBacked.into()), + } => proto::object_store::Mode::FileBacked(proto::object_store::FileBacked { file_backed_base_path: Some(file_backed_base_path.clone()), - max_retries: Some(this.max_retries.into()), - ..Self::default() - }, + }), + }; + + Self { + mode: Some(mode), + max_retries: Some(this.max_retries.into()), } } } diff --git a/core/lib/protobuf_config/src/proto/object_store.proto b/core/lib/protobuf_config/src/proto/object_store.proto index 69a371e0010f..41036a16e658 100644 --- a/core/lib/protobuf_config/src/proto/object_store.proto +++ b/core/lib/protobuf_config/src/proto/object_store.proto @@ -2,18 +2,29 @@ syntax = "proto3"; package zksync.config; -enum ObjectStoreMode { - GCS = 0; - GCS_WITH_CREDENTIAL_FILE = 1; - FILE_BACKED = 2; - GCS_ANONYMOUS_READ_ONLY = 3; -} - -// FIXME: rework to use `oneof` message ObjectStore { - optional string bucket_base_url = 1; // optional (depends on mode); url - optional ObjectStoreMode mode = 2; // required - optional string file_backed_base_path = 3; // optional (depends on mode); fs path - optional string gcs_credential_file_path = 4; // optional (depends on mode); fs path + message Gcs { + optional string bucket_base_url = 1; // required; url + } + + message GcsWithCredentialFile { + optional string bucket_base_url = 1; // required; url + optional string gcs_credential_file_path = 4; // required; fs path + } + + message GcsAnonymousReadOnly { + optional string bucket_base_url = 1; // required; url + } + + message FileBacked { + optional string file_backed_base_path = 3; // required; fs path + } + + oneof mode { + Gcs gcs = 1; + GcsWithCredentialFile gcs_with_credential_file = 2; + GcsAnonymousReadOnly gcs_anonymous_read_only = 3; + FileBacked file_backed = 4; + } optional uint32 max_retries = 5; // required } From 0e32255f84616be7cb80d6498410ee78a9ae073e Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Mon, 12 Feb 2024 18:01:03 +0200 Subject: [PATCH 05/27] Make EN snapshot recovery config more versatile --- core/bin/external_node/src/config/mod.rs | 13 ++++++++----- core/bin/external_node/src/main.rs | 20 +++++++------------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/core/bin/external_node/src/config/mod.rs b/core/bin/external_node/src/config/mod.rs index 312b67eb1818..96ddc3b93e66 100644 --- a/core/bin/external_node/src/config/mod.rs +++ b/core/bin/external_node/src/config/mod.rs @@ -4,6 +4,7 @@ use anyhow::Context; use serde::Deserialize; use url::Url; use zksync_basic_types::{Address, L1ChainId, L2ChainId}; +use zksync_config::ObjectStoreConfig; use zksync_consensus_roles::node; use zksync_core::{ api_server::{ @@ -428,17 +429,19 @@ pub(crate) fn read_consensus_config() -> anyhow::Result anyhow::Result { - // TODO add instructions where to find this bucket url - let snapshots_bucket_base_url = std::env::var("EN_SNAPSHOTS_RECOVERY_BUCKET_BASE_URL") - .context("EN_SNAPSHOTS_RECOVERY_BUCKET_BASE_URL env variable needs to be set if snapshots recovery is enabled")?; + let snapshots_object_store = envy::prefixed("EN_SNAPSHOTS_OBJECT_STORE_") + .from_env::() + .context("failed loading snapshot object store config from env variables")?; Ok(SnapshotsRecoveryConfig { - snapshots_bucket_base_url, + snapshots_object_store, }) } diff --git a/core/bin/external_node/src/main.rs b/core/bin/external_node/src/main.rs index ccb4f3deb52e..9539fd1bcb27 100644 --- a/core/bin/external_node/src/main.rs +++ b/core/bin/external_node/src/main.rs @@ -8,10 +8,7 @@ use prometheus_exporter::PrometheusExporterConfig; use tokio::{sync::watch, task, time::sleep}; use zksync_basic_types::{Address, L2ChainId}; use zksync_concurrency::{ctx, scope}; -use zksync_config::{ - configs::{database::MerkleTreeMode, object_store::ObjectStoreMode}, - ObjectStoreConfig, -}; +use zksync_config::configs::database::MerkleTreeMode; use zksync_core::{ api_server::{ execution_sandbox::VmConcurrencyLimiter, @@ -447,20 +444,17 @@ async fn main() -> anyhow::Result<()> { if opt.enable_snapshots_recovery { let recovery_config = read_snapshots_recovery_config()?; - let object_store_config = ObjectStoreConfig { - bucket_base_url: recovery_config.snapshots_bucket_base_url.to_string(), - mode: ObjectStoreMode::GCSAnonymousReadOnly, - file_backed_base_path: "".to_string(), // not used - gcs_credential_file_path: "".to_string(), // not used - max_retries: 5, - }; - let blob_store = ObjectStoreFactory::new(object_store_config) + let blob_store = ObjectStoreFactory::new(recovery_config.snapshots_object_store) .create_store() .await; + tracing::info!( + "Snapshot recovery is enabled. This is an experimental feature; use at your own risk" + ); SnapshotsApplier::load_snapshot(&connection_pool, &main_node_client, &blob_store) .await - .unwrap(); + .context("snapshot recovery failed")?; + tracing::info!("Snapshot recovery is complete"); } if opt.revert_pending_l1_batch { From 9a157082b1bb3b6a2c9805007517d18a827fda97 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Mon, 12 Feb 2024 20:18:15 +0200 Subject: [PATCH 06/27] Update configs in `/etc` --- etc/env/base/object_store.toml | 12 ------------ etc/env/base/rust.toml | 3 +-- etc/env/ext-node.toml | 6 ++++++ 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/etc/env/base/object_store.toml b/etc/env/base/object_store.toml index 5fd775acb371..78878d123ca6 100644 --- a/etc/env/base/object_store.toml +++ b/etc/env/base/object_store.toml @@ -1,27 +1,15 @@ [object_store] -bucket_base_url="base_url" mode="FileBacked" file_backed_base_path="artifacts" -gcs_credential_file_path="/path/to/gcs_credentials.json" -max_retries=5 [public_object_store] -bucket_base_url="public_base_url" mode="FileBacked" file_backed_base_path="artifacts" -gcs_credential_file_path="/path/to/gcs_credentials.json" -max_retries=5 [prover_object_store] -bucket_base_url="prover_base_url" mode="FileBacked" file_backed_base_path="artifacts" -gcs_credential_file_path="/path/to/gcs_credentials.json" -max_retries=5 [snapshots_object_store] -bucket_base_url="snapshots_base_url" mode="FileBacked" file_backed_base_path="artifacts" -gcs_credential_file_path="/path/to/gcs_credentials.json" -max_retries=5 diff --git a/etc/env/base/rust.toml b/etc/env/base/rust.toml index 8eef7700067a..1bd4f5fb258a 100644 --- a/etc/env/base/rust.toml +++ b/etc/env/base/rust.toml @@ -23,9 +23,7 @@ zksync_types=info,\ zksync_mempool=debug,\ loadnext=info,\ vm=info,\ -block_sizes_test=info,\ zksync_object_store=info,\ -en_playground=info,\ zksync_external_node=info,\ zksync_witness_generator=info,\ zksync_prover_fri=info,\ @@ -33,6 +31,7 @@ zksync_witness_vector_generator=info,\ zksync_health_check=debug,\ zksync_proof_fri_compressor=info,\ vise_exporter=debug,\ +snapshots_creator=debug,\ """ # `RUST_BACKTRACE` variable diff --git a/etc/env/ext-node.toml b/etc/env/ext-node.toml index 58298a5501bd..803498c78a0f 100644 --- a/etc/env/ext-node.toml +++ b/etc/env/ext-node.toml @@ -45,6 +45,11 @@ config_path="etc/env/en_consensus_config.json" # node:public:ed25519:147bb71be895846e1d6f5b1c6a8be53848b82bdafcf66e9dfe6ca65581076a1d node_key="node:secret:ed25519:d56de77c738326c305c64c25bffe1cc94ea7c639cf71ca3ff94229df27f167ac" +[en.snapshots.object_store] +mode="FileBacked" +file_backed_base_path="artifacts" +# ^ Intentionally set to coincide with main node's in order to read locally produced snapshots + [rust] # `RUST_LOG` environment variable for `env_logger` # Here we use TOML multiline strings: newlines will be trimmed. @@ -64,6 +69,7 @@ zksync_types=info,\ loadnext=info,\ vm=info,\ zksync_external_node=info,\ +zksync_snapshots_applier=debug,\ """ # `RUST_BACKTRACE` variable From be6998f0efa18b04f6dac20fe2aa246acd9de143 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 11:35:43 +0200 Subject: [PATCH 07/27] Fix EN storage initialization --- core/bin/external_node/src/init.rs | 103 ++++++++++++++++++++++ core/bin/external_node/src/main.rs | 40 +++------ core/bin/snapshots_creator/src/creator.rs | 4 + 3 files changed, 118 insertions(+), 29 deletions(-) create mode 100644 core/bin/external_node/src/init.rs diff --git a/core/bin/external_node/src/init.rs b/core/bin/external_node/src/init.rs new file mode 100644 index 000000000000..0a5a97396465 --- /dev/null +++ b/core/bin/external_node/src/init.rs @@ -0,0 +1,103 @@ +//! EN initialization logic. + +use anyhow::Context as _; +use zksync_basic_types::{L1BatchNumber, L2ChainId}; +use zksync_core::sync_layer::genesis::perform_genesis_if_needed; +use zksync_dal::ConnectionPool; +use zksync_object_store::ObjectStoreFactory; +use zksync_snapshots_applier::{SnapshotsApplier, SnapshotsApplierError}; +use zksync_web3_decl::jsonrpsee::http_client::HttpClient; + +use crate::config::read_snapshots_recovery_config; + +#[derive(Debug)] +enum InitDecision { + /// Perform or check genesis. + Genesis, + /// Perform or check snapshot recovery. + SnapshotRecovery { is_complete: bool }, +} + +pub(crate) async fn ensure_storage_initialized( + pool: &ConnectionPool, + main_node_client: &HttpClient, + l2_chain_id: L2ChainId, + consider_snapshot_recovery: bool, +) -> anyhow::Result<()> { + let mut storage = pool.access_storage_tagged("en").await?; + let genesis_l1_batch = storage + .blocks_dal() + .get_l1_batch_header(L1BatchNumber(0)) + .await + .context("failed getting genesis batch info")?; + let snapshot_recovery = storage + .snapshot_recovery_dal() + .get_applied_snapshot_status() + .await + .context("failed getting snapshot recovery info")?; + drop(storage); + + let decision = match (genesis_l1_batch, snapshot_recovery) { + (Some(batch), Some(snapshot_recovery)) => { + anyhow::bail!( + "Node has both genesis L1 batch: {batch:?} and snapshot recovery information: {snapshot_recovery:?}. \ + This is not supported and can be caused by broken snapshot recovery." + ); + } + (Some(batch), None) => { + tracing::info!("Node has a genesis L1 batch: {batch:?} and no snapshot recovery info"); + InitDecision::Genesis + } + (None, Some(snapshot_recovery)) => { + tracing::info!("Node has no genesis L1 batch and snapshot recovery information: {snapshot_recovery:?}"); + InitDecision::SnapshotRecovery { + is_complete: snapshot_recovery.storage_logs_chunks_left_to_process() == 0, + } + } + (None, None) => { + tracing::info!("Node has neither genesis L1 batch, nor snapshot recovery info"); + if consider_snapshot_recovery { + InitDecision::SnapshotRecovery { is_complete: false } + } else { + InitDecision::Genesis + } + } + }; + + tracing::info!("Chosen node initialization strategy: {decision:?}"); + match decision { + InitDecision::Genesis => { + let mut storage = pool.access_storage_tagged("en").await?; + perform_genesis_if_needed(&mut storage, l2_chain_id, main_node_client) + .await + .context("performing genesis failed")?; + } + InitDecision::SnapshotRecovery { is_complete } => { + // Do not require the CLA opt-in once the recovery is complete. + anyhow::ensure!( + is_complete || consider_snapshot_recovery, + "Snapshot recovery is required to proceed, but it is not enabled. Enable by supplying \ + `--enable-snapshots-recovery` command-line arg to the node binary, or reset the node storage \ + to sync from genesis" + ); + + tracing::warn!("Proceeding with snapshot recovery. This is an experimental feature; use at your own risk"); + let recovery_config = read_snapshots_recovery_config()?; + let blob_store = ObjectStoreFactory::new(recovery_config.snapshots_object_store) + .create_store() + .await; + // FIXME: change error handling once #1036 is merged; this is not always correct. + SnapshotsApplier::load_snapshot(pool, main_node_client, &blob_store) + .await + .or_else(|err| match err { + SnapshotsApplierError::Canceled(message) => { + tracing::info!("Snapshot recovery is canceled: {message}"); + Ok(()) + } + _ => Err(err), + })?; + tracing::info!("Snapshot recovery is complete"); + } + } + Ok(()) +} diff --git a/core/bin/external_node/src/main.rs b/core/bin/external_node/src/main.rs index 9539fd1bcb27..d8b6f4895f45 100644 --- a/core/bin/external_node/src/main.rs +++ b/core/bin/external_node/src/main.rs @@ -29,25 +29,22 @@ use zksync_core::{ }, sync_layer::{ batch_status_updater::BatchStatusUpdater, external_io::ExternalIO, - fetcher::MainNodeFetcher, genesis::perform_genesis_if_needed, ActionQueue, MainNodeClient, - SyncState, + fetcher::MainNodeFetcher, ActionQueue, MainNodeClient, SyncState, }, }; use zksync_dal::{healthcheck::ConnectionPoolHealthCheck, ConnectionPool}; use zksync_health_check::CheckHealth; -use zksync_object_store::ObjectStoreFactory; -use zksync_snapshots_applier::SnapshotsApplier; use zksync_state::PostgresStorageCaches; use zksync_storage::RocksDB; use zksync_utils::wait_for_tasks::wait_for_tasks; +use crate::{config::ExternalNodeConfig, init::ensure_storage_initialized}; + mod config; +mod init; mod metrics; -const RELEASE_MANIFEST: &str = - std::include_str!("../../../../.github/release-please/manifest.json"); - -use crate::config::{read_snapshots_recovery_config, ExternalNodeConfig}; +const RELEASE_MANIFEST: &str = include_str!("../../../../.github/release-please/manifest.json"); /// Creates the state keeper configured to work in the external node mode. #[allow(clippy::too_many_arguments)] @@ -442,21 +439,6 @@ async fn main() -> anyhow::Result<()> { .await .context("failed to build a connection_pool")?; - if opt.enable_snapshots_recovery { - let recovery_config = read_snapshots_recovery_config()?; - let blob_store = ObjectStoreFactory::new(recovery_config.snapshots_object_store) - .create_store() - .await; - - tracing::info!( - "Snapshot recovery is enabled. This is an experimental feature; use at your own risk" - ); - SnapshotsApplier::load_snapshot(&connection_pool, &main_node_client, &blob_store) - .await - .context("snapshot recovery failed")?; - tracing::info!("Snapshot recovery is complete"); - } - if opt.revert_pending_l1_batch { tracing::info!("Rolling pending L1 batch back.."); let reverter = BlockReverter::new( @@ -493,14 +475,14 @@ async fn main() -> anyhow::Result<()> { tracing::info!("Started the external node"); tracing::info!("Main node URL is: {}", main_node_url); - // Make sure that genesis is performed. - perform_genesis_if_needed( - &mut connection_pool.access_storage().await.unwrap(), - config.remote.l2_chain_id, + // Make sure that the node storage is initialized either via genesis or snapshot recovery. + ensure_storage_initialized( + &connection_pool, &main_node_client, + config.remote.l2_chain_id, + opt.enable_snapshots_recovery, ) - .await - .context("Performing genesis failed")?; + .await?; let (task_handles, stop_sender, health_check_handle, stop_receiver) = init_tasks(config.clone(), connection_pool.clone()) diff --git a/core/bin/snapshots_creator/src/creator.rs b/core/bin/snapshots_creator/src/creator.rs index 2d2ce2335b90..d7ccf4d0a336 100644 --- a/core/bin/snapshots_creator/src/creator.rs +++ b/core/bin/snapshots_creator/src/creator.rs @@ -266,6 +266,10 @@ impl SnapshotCreator { config: SnapshotsCreatorConfig, min_chunk_count: u64, ) -> anyhow::Result<()> { + tracing::info!( + "Starting snapshot creator with object store {:?} and config {config:?}", + self.blob_store + ); let latency = METRICS.snapshot_generation_duration.start(); let Some(progress) = self From 6bc7f3c033f8a18a256f164ef30ae1defc4819af Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 11:36:32 +0200 Subject: [PATCH 08/27] Support snapshot recovery in `zk ext-node` --- infrastructure/zk/src/database.ts | 2 +- infrastructure/zk/src/run.ts | 6 +++--- infrastructure/zk/src/server.ts | 12 ++++++++++-- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/infrastructure/zk/src/database.ts b/infrastructure/zk/src/database.ts index f78486fcbecd..c865b7b68c4d 100644 --- a/infrastructure/zk/src/database.ts +++ b/infrastructure/zk/src/database.ts @@ -53,7 +53,7 @@ export async function setup() { // Remote database, we can't show the contents. console.log(`WARNING! Using prod db!`); } - if (process.env.TEMPLATE_DATABASE_URL !== undefined) { + if (process.env.TEMPLATE_DATABASE_URL) { // Dump and restore from template database (simulate backup) console.log(`Template DB URL provided. Creating a DB via dump from ${process.env.TEMPLATE_DATABASE_URL}`); await utils.spawn('cargo sqlx database drop -y'); diff --git a/infrastructure/zk/src/run.ts b/infrastructure/zk/src/run.ts index 5ad36898e35c..e67d7d058759 100644 --- a/infrastructure/zk/src/run.ts +++ b/infrastructure/zk/src/run.ts @@ -103,10 +103,10 @@ export async function readVariable(address: string, contractName: string, variab } export async function snapshots_creator() { - process.chdir(`${process.env.ZKSYNC_HOME}`); - let logLevel = 'RUST_LOG=snapshots_creator=debug'; - await utils.spawn(`${logLevel} cargo run --bin snapshots_creator --release`); + process.chdir(process.env.ZKSYNC_HOME ?? '.'); + await utils.spawn('cargo run --release --bin snapshots_creator'); } + export const command = new Command('run').description('run miscellaneous applications'); command.command('test-accounts').description('print ethereum test accounts').action(testAccounts); diff --git a/infrastructure/zk/src/server.ts b/infrastructure/zk/src/server.ts index 52b3e66c7449..f2fa3ff1f942 100644 --- a/infrastructure/zk/src/server.ts +++ b/infrastructure/zk/src/server.ts @@ -25,7 +25,11 @@ export async function server(rebuildTree: boolean, uring: boolean, components?: await utils.spawn(`cargo run --bin zksync_server --release ${options}`); } -export async function externalNode(reinit: boolean = false, enableConsensus: boolean = false) { +export async function externalNode( + reinit: boolean = false, + enableConsensus: boolean = false, + enableSnapshotsRecovery: boolean = false +) { if (process.env.ZKSYNC_ENV != 'ext-node') { console.warn(`WARNING: using ${process.env.ZKSYNC_ENV} environment for external node`); console.warn('If this is a mistake, set $ZKSYNC_ENV to "ext-node" or other environment'); @@ -49,6 +53,9 @@ export async function externalNode(reinit: boolean = false, enableConsensus: boo if (enableConsensus) { options += ' --enable-consensus'; } + if (enableSnapshotsRecovery) { + options += ' --enable-snapshots-recovery'; + } await utils.spawn(`cargo run --release --bin zksync_external_node -- ${options}`); } @@ -140,6 +147,7 @@ export const enCommand = new Command('external-node') .description('start zksync external node') .option('--reinit', 'reset postgres and rocksdb before starting') .option('--enable-consensus', 'enables consensus component') + .option('--enable-snapshots-recovery', 'enables recovery from an app-level snapshot') .action(async (cmd: Command) => { - await externalNode(cmd.reinit, cmd.enableConsensus); + await externalNode(cmd.reinit, cmd.enableConsensus, cmd.enableSnapshotsRecovery); }); From 86998e0cd6f111cea2563e250e3e45e521095454 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 12:06:59 +0200 Subject: [PATCH 09/27] Support snapshot recovery in fee address migration --- .../src/state_keeper/io/fee_address_migration.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/lib/zksync_core/src/state_keeper/io/fee_address_migration.rs b/core/lib/zksync_core/src/state_keeper/io/fee_address_migration.rs index 61d40ff18a10..90d3eebffc3a 100644 --- a/core/lib/zksync_core/src/state_keeper/io/fee_address_migration.rs +++ b/core/lib/zksync_core/src/state_keeper/io/fee_address_migration.rs @@ -41,6 +41,22 @@ pub(crate) async fn migrate_miniblocks( last_miniblock: MiniblockNumber, stop_receiver: watch::Receiver, ) -> anyhow::Result<()> { + // `migrate_miniblocks_inner` assumes that miniblocks start from the genesis (i.e., no snapshot recovery). + // Since snapshot recovery is later that the fee address migration in terms of code versioning, + // the migration is always no-op in case of snapshot recovery; all miniblocks added after recovery are guaranteed + // to have their fee address set. + let mut storage = pool.access_storage_tagged("state_keeper").await?; + if storage + .snapshot_recovery_dal() + .get_applied_snapshot_status() + .await? + .is_some() + { + tracing::info!("Detected snapshot recovery; fee address migration is skipped as no-op"); + return Ok(()); + } + drop(storage); + let MigrationOutput { miniblocks_affected, } = migrate_miniblocks_inner( From b68f52e8499687036a405e36e08d7acf70ca4ab7 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 16:56:18 +0200 Subject: [PATCH 10/27] Sketch snapshot recovery integration test --- .../tests/snapshot-recovery-test/package.json | 33 +++ .../tests/snapshot-recovery.test.ts | 238 ++++++++++++++++++ .../snapshot-recovery-test/tsconfig.json | 9 + infrastructure/zk/src/test/integration.ts | 13 +- package.json | 2 + yarn.lock | 7 + 6 files changed, 301 insertions(+), 1 deletion(-) create mode 100644 core/tests/snapshot-recovery-test/package.json create mode 100644 core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts create mode 100644 core/tests/snapshot-recovery-test/tsconfig.json diff --git a/core/tests/snapshot-recovery-test/package.json b/core/tests/snapshot-recovery-test/package.json new file mode 100644 index 000000000000..7f9fa6ac8a75 --- /dev/null +++ b/core/tests/snapshot-recovery-test/package.json @@ -0,0 +1,33 @@ +{ + "name": "snapshot-recovery-test", + "version": "1.0.0", + "license": "MIT", + "mocha": { + "timeout": 240000, + "exit": true, + "color": false, + "slow": 0, + "require": [ + "ts-node/register", + "mocha-steps" + ] + }, + "scripts": { + "snapshot-recovery-test": "zk f mocha tests/snapshot-recovery.test.ts" + }, + "devDependencies": { + "@types/chai": "^4.2.21", + "@types/mocha": "^8.2.3", + "@types/mocha-steps": "^1.3.0", + "@types/node": "^18.19.15", + "@types/node-fetch": "^2.5.7", + "chai": "^4.3.4", + "mocha": "^9.0.2", + "mocha-steps": "^1.3.0", + "node-fetch": "^2.6.1", + "protobufjs": "^7.2.5", + "ts-node": "^10.1.0", + "typescript": "^4.3.5", + "zksync-web3": "^0.15.5" + } +} diff --git a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts new file mode 100644 index 000000000000..88ed3522996f --- /dev/null +++ b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts @@ -0,0 +1,238 @@ +import { expect } from 'chai'; +import * as protobuf from 'protobufjs'; +import * as zlib from 'zlib'; +import * as zkweb3 from 'zksync-web3'; +import path from 'node:path'; +import fs from 'node:fs/promises'; +import { ChildProcess, spawn, exec } from 'node:child_process'; +import readline from 'node:readline/promises'; +import { promisify } from 'node:util'; + +interface AllSnapshotsResponse { + readonly snapshotsL1BatchNumbers: number[]; +} + +interface GetSnapshotResponse { + readonly miniblockNumber: number; + readonly l1BatchNumber: number; + readonly storageLogsChunks: Array; +} + +interface StorageLogChunkMetadata { + readonly filepath: string; +} + +interface StorageLogChunk { + readonly storageLogs: Array; +} + +interface StorageLog { + readonly accountAddress: Buffer; + readonly storageKey: Buffer; + readonly storageValue: Buffer; + readonly l1BatchNumberOfInitialWrite: number; + readonly enumerationIndex: number; +} + +// Assumptions: +// - Main node is run for the duration of the test. +describe('snapshot recovery', () => { + const STORAGE_LOG_SAMPLE_PROBABILITY = 0.05; + const IMPORTANT_LINE_REGEX = + /zksync_external_node::init|zksync_core::consistency_checker|zksync_core::reorg_detector/; + + const homeDir = process.env.ZKSYNC_HOME!!; + const externalNodeEnv = { + PATH: process.env.PATH, + ZKSYNC_HOME: homeDir, + ZKSYNC_ENV: process.env.IN_DOCKER ? 'ext-node-docker' : 'ext-node' + }; + let mainNode: zkweb3.Provider; + + before(async () => { + mainNode = new zkweb3.Provider('http://127.0.0.1:3050'); + await killExternalNode(); + }); + + after(async () => { + await killExternalNode(); + }); + + async function getAllSnapshots() { + const output = await mainNode.send('snapshots_getAllSnapshots', []); + return output as AllSnapshotsResponse; + } + + async function getSnapshot(snapshotL1Batch: number) { + const output = await mainNode.send('snapshots_getSnapshot', [snapshotL1Batch]); + return output as GetSnapshotResponse; + } + + step('create snapshot', async () => { + const logs = await fs.open('snapshot-creator.log'); + const childProcess = spawn('cargo run --release --bin snapshots_creator', { + cwd: homeDir, + stdio: [null, logs.fd, logs.fd], + shell: true + }); + try { + await waitForProcess(childProcess); + } finally { + childProcess.kill(); + } + }); + + step('validate snapshot', async () => { + const allSnapshots = await getAllSnapshots(); + console.log('Obtained all snapshots', allSnapshots); + const newBatchNumbers = allSnapshots.snapshotsL1BatchNumbers; + + const l1BatchNumber = Math.max(...newBatchNumbers); + const fullSnapshot = await getSnapshot(l1BatchNumber); + console.log('Obtained latest snapshot', fullSnapshot); + const miniblockNumber = fullSnapshot.miniblockNumber; + + const protoPath = path.join(homeDir, 'core/lib/types/src/proto/mod.proto'); + const root = await protobuf.load(protoPath); + const SnapshotStorageLogsChunk = root.lookupType('zksync.types.SnapshotStorageLogsChunk'); + + expect(fullSnapshot.l1BatchNumber).to.equal(l1BatchNumber); + for (const chunkMetadata of fullSnapshot.storageLogsChunks) { + const chunkPath = path.join(homeDir, chunkMetadata.filepath); + console.log(`Checking storage logs chunk ${chunkPath}`); + const output = SnapshotStorageLogsChunk.decode(await decompressGzip(chunkPath)) as any as StorageLogChunk; + expect(output.storageLogs.length).to.be.greaterThan(0); + console.log(`Decompressed chunk has ${output.storageLogs.length} logs`); + + let sampledCount = 0; + for (const storageLog of output.storageLogs) { + // Randomly sample logs to speed up the test. + if (Math.random() > STORAGE_LOG_SAMPLE_PROBABILITY) { + continue; + } + sampledCount++; + + const snapshotAccountAddress = '0x' + storageLog.accountAddress.toString('hex'); + const snapshotKey = '0x' + storageLog.storageKey.toString('hex'); + const snapshotValue = '0x' + storageLog.storageValue.toString('hex'); + const snapshotL1BatchNumber = storageLog.l1BatchNumberOfInitialWrite; + const valueOnBlockchain = await mainNode.getStorageAt( + snapshotAccountAddress, + snapshotKey, + miniblockNumber + ); + expect(snapshotValue).to.equal(valueOnBlockchain); + expect(snapshotL1BatchNumber).to.be.lessThanOrEqual(l1BatchNumber); + } + console.log(`Checked random ${sampledCount} logs in the chunk`); + } + }); + + step('drop external node database', async () => { + const childProcess = spawn('zk db reset', { + cwd: homeDir, + stdio: 'inherit', + shell: true, + env: { ...externalNodeEnv, TEMPLATE_DATABASE_URL: '' } + }); + try { + await waitForProcess(childProcess); + } finally { + childProcess.kill(); + } + }); + + step('drop external node storage', async () => { + const childProcess = spawn('zk clean --database', { + cwd: homeDir, + stdio: 'inherit', + shell: true, + env: externalNodeEnv + }); + try { + await waitForProcess(childProcess); + } finally { + childProcess.kill(); + } + }); + + step('initialize external node', async () => { + const logs = await fs.open('snapshot-recovery.log', 'a'); + await logs.truncate(); + + const enProcess = spawn('zk external-node --enable-snapshots-recovery', { + cwd: homeDir, + stdio: [null, 'pipe', 'inherit'], + shell: true, + env: externalNodeEnv + }); + + let consistencyCheckerSucceeded = false; + let reorgDetectorSucceeded = false; + try { + const rl = readline.createInterface({ + input: enProcess.stdout, + crlfDelay: Infinity + }); + + // TODO: use a more reliable method to detect recovery success (e.g., based on health checks) + for await (const line of rl) { + if (IMPORTANT_LINE_REGEX.test(line)) { + console.log('en> ' + line); + } + await fs.appendFile(logs, line + '\n'); + + if (/L1 batch #\d+ is consistent with L1/.test(line)) { + console.log('Consistency checker successfully checked post-snapshot L1 batch'); + consistencyCheckerSucceeded = true; + } + if (/No reorg at L1 batch #\d+/.test(line)) { + console.log('Reorg detector successfully checked post-snapshot L1 batch'); + reorgDetectorSucceeded = true; + } + + if (consistencyCheckerSucceeded && reorgDetectorSucceeded) { + break; + } + } + } finally { + enProcess.kill(); + } + }); +}); + +async function waitForProcess(childProcess: ChildProcess) { + await new Promise((resolve, reject) => { + childProcess.on('error', (error) => { + reject(error); + }); + childProcess.on('exit', (code) => { + if (code === 0) { + resolve(undefined); + } else { + reject(new Error(`Process exited with non-zero code: ${code}`)); + } + }); + }); +} + +async function decompressGzip(filePath: string): Promise { + const readStream = (await fs.open(filePath)).createReadStream(); + return new Promise((resolve, reject) => { + const gunzip = zlib.createGunzip(); + let chunks: Uint8Array[] = []; + + gunzip.on('data', (chunk) => chunks.push(chunk)); + gunzip.on('end', () => resolve(Buffer.concat(chunks))); + gunzip.on('error', reject); + readStream.pipe(gunzip); + }); +} + +async function killExternalNode() { + try { + await promisify(exec)('pkill -9 zksync_external_node'); + } catch (err) { + console.log('Failed killing external node. This is *probably* normal.', err); + } +} diff --git a/core/tests/snapshot-recovery-test/tsconfig.json b/core/tests/snapshot-recovery-test/tsconfig.json new file mode 100644 index 000000000000..6c8907a86016 --- /dev/null +++ b/core/tests/snapshot-recovery-test/tsconfig.json @@ -0,0 +1,9 @@ +{ + "compilerOptions": { + "target": "es2019", + "module": "commonjs", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true + } +} diff --git a/infrastructure/zk/src/test/integration.ts b/infrastructure/zk/src/test/integration.ts index edcd366b450e..063e14dffacf 100644 --- a/infrastructure/zk/src/test/integration.ts +++ b/infrastructure/zk/src/test/integration.ts @@ -50,6 +50,11 @@ export async function revert(bail: boolean = false) { await utils.spawn('yarn revert-test revert-and-restart-test' + flag); } +async function snapshotRecovery(bail: boolean = false) { + const flag = bail ? ' --bail' : ''; + await utils.spawn('yarn snapshot-recovery-test snapshot-recovery-test' + flag); +} + export async function upgrade(bail: boolean = false) { const flag = bail ? ' --bail' : ''; await utils.spawn('yarn upgrade-test upgrade-test' + flag); @@ -86,7 +91,13 @@ command .action(async (cmd: Command) => { await revert(cmd.bail); }); - +command + .command('snapshot-recovery') + .description('run snapshot recovery test') + .option('--bail') + .action(async (cmd: Command) => { + await snapshotRecovery(cmd.bail); + }); command .command('upgrade') .description('run upgrade test') diff --git a/package.json b/package.json index 541e6c08a8aa..152645dd1136 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "infrastructure/zk", "infrastructure/local-setup-preparation", "core/tests/revert-test", + "core/tests/snapshot-recovery-test", "core/tests/upgrade-test", "core/tests/ts-integration", "infrastructure/protocol-upgrade" @@ -29,6 +30,7 @@ "l2-contracts": "yarn workspace l2-contracts", "revert-test": "yarn workspace revert-test", "upgrade-test": "yarn workspace upgrade-test", + "snapshot-recovery-test": "yarn workspace snapshot-recovery-test", "ts-integration": "yarn workspace ts-integration", "zk": "yarn workspace zk" }, diff --git a/yarn.lock b/yarn.lock index 055bf45e23b7..25129112dd01 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3118,6 +3118,13 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-17.0.45.tgz#2c0fafd78705e7a18b7906b5201a522719dc5190" integrity sha512-w+tIMs3rq2afQdsPJlODhoUEKzFP1ayaoyl1CcnwtIlsVe7K7bA1NGm4s3PraqTLlXnbIN84zuBlxBWo1u9BLw== +"@types/node@^18.19.15": + version "18.19.15" + resolved "https://registry.yarnpkg.com/@types/node/-/node-18.19.15.tgz#313a9d75435669a57fc28dc8694e7f4c4319f419" + integrity sha512-AMZ2UWx+woHNfM11PyAEQmfSxi05jm9OlkxczuHeEqmvwPkYj6MWv44gbzDPefYOLysTOFyI3ziiy2ONmUZfpA== + dependencies: + undici-types "~5.26.4" + "@types/node@^8.0.0": version "8.10.66" resolved "https://registry.yarnpkg.com/@types/node/-/node-8.10.66.tgz#dd035d409df322acc83dff62a602f12a5783bbb3" From 641037ff822a72bb1a9842d3a660714b31041357 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 17:00:09 +0200 Subject: [PATCH 11/27] Remove old snapshots creator test --- core/tests/ts-integration/package.json | 3 +- .../tests/api/snapshots-creator.test.ts | 86 ------------------- infrastructure/zk/src/test/integration.ts | 13 --- 3 files changed, 1 insertion(+), 101 deletions(-) delete mode 100644 core/tests/ts-integration/tests/api/snapshots-creator.test.ts diff --git a/core/tests/ts-integration/package.json b/core/tests/ts-integration/package.json index 37e65991a583..d296db7174f0 100644 --- a/core/tests/ts-integration/package.json +++ b/core/tests/ts-integration/package.json @@ -10,8 +10,7 @@ "api-test": "zk f jest -- api/web3.test.ts api/debug.test.ts", "contract-verification-test": "zk f jest -- api/contract-verification.test.ts", "build": "hardhat compile", - "build-yul": "hardhat run scripts/compile-yul.ts", - "snapshots-creator-test": "zk f jest -- api/snapshots-creator.test.ts" + "build-yul": "hardhat run scripts/compile-yul.ts" }, "devDependencies": { "@matterlabs/hardhat-zksync-deploy": "^0.6.1", diff --git a/core/tests/ts-integration/tests/api/snapshots-creator.test.ts b/core/tests/ts-integration/tests/api/snapshots-creator.test.ts deleted file mode 100644 index c6d6c448bd92..000000000000 --- a/core/tests/ts-integration/tests/api/snapshots-creator.test.ts +++ /dev/null @@ -1,86 +0,0 @@ -import { TestMaster } from '../../src/index'; -import fs from 'fs'; -import * as zlib from 'zlib'; -import * as protobuf from 'protobufjs'; -import { snapshots_creator } from 'zk/build/run'; -import path from 'path'; - -describe('Snapshots API tests', () => { - let testMaster: TestMaster; - - beforeAll(() => { - testMaster = TestMaster.getInstance(__filename); - - if (process.env.ZKSYNC_ENV!.startsWith('ext-node')) { - console.warn("You are trying to run snapshots creator tests on external node. It's not supported."); - } - }); - - async function runCreator() { - await snapshots_creator(); - } - - async function rpcRequest(name: string, params: any) { - const response = await testMaster.mainAccount().provider.send(name, params); - return response; - } - - async function getAllSnapshots() { - return await rpcRequest('snapshots_getAllSnapshots', []); - } - - async function getSnapshot(snapshotL1Batch: number) { - return rpcRequest('snapshots_getSnapshot', [snapshotL1Batch]); - } - - async function decompressGzip(filePath: string): Promise { - return new Promise((resolve, reject) => { - const readStream = fs.createReadStream(filePath); - const gunzip = zlib.createGunzip(); - let chunks: Uint8Array[] = []; - - gunzip.on('data', (chunk) => chunks.push(chunk)); - gunzip.on('end', () => resolve(Buffer.concat(chunks))); - gunzip.on('error', reject); - - readStream.pipe(gunzip); - }); - } - async function createAndValidateSnapshot() { - const existingBatchNumbers = (await getAllSnapshots()).snapshotsL1BatchNumbers as number[]; - await runCreator(); - const newBatchNumbers = (await getAllSnapshots()).snapshotsL1BatchNumbers as number[]; - const addedSnapshots = newBatchNumbers.filter((x) => existingBatchNumbers.indexOf(x) === -1); - expect(addedSnapshots.length).toEqual(1); - - const l1BatchNumber = addedSnapshots[0]; - const fullSnapshot = await getSnapshot(l1BatchNumber); - const miniblockNumber = fullSnapshot.miniblockNumber; - - const protoPath = path.join(process.env.ZKSYNC_HOME as string, 'core/lib/types/src/proto/mod.proto'); - const root = await protobuf.load(protoPath); - const SnapshotStorageLogsChunk = root.lookupType('zksync.types.SnapshotStorageLogsChunk'); - - expect(fullSnapshot.l1BatchNumber).toEqual(l1BatchNumber); - for (let chunkMetadata of fullSnapshot.storageLogsChunks) { - const chunkPath = path.join(process.env.ZKSYNC_HOME as string, chunkMetadata.filepath); - const output = SnapshotStorageLogsChunk.decode(await decompressGzip(chunkPath)) as any; - expect(output['storageLogs'].length > 0); - for (const storageLog of output['storageLogs'] as any[]) { - const snapshotAccountAddress = '0x' + storageLog['accountAddress'].toString('hex'); - const snapshotKey = '0x' + storageLog['storageKey'].toString('hex'); - const snapshotValue = '0x' + storageLog['storageValue'].toString('hex'); - const snapshotL1BatchNumber = storageLog['l1BatchNumberOfInitialWrite']; - const valueOnBlockchain = await testMaster - .mainAccount() - .provider.getStorageAt(snapshotAccountAddress, snapshotKey, miniblockNumber); - expect(snapshotValue).toEqual(valueOnBlockchain); - expect(snapshotL1BatchNumber).toBeLessThanOrEqual(l1BatchNumber); - } - } - } - - test('snapshots can be created', async () => { - await createAndValidateSnapshot(); - }); -}); diff --git a/infrastructure/zk/src/test/integration.ts b/infrastructure/zk/src/test/integration.ts index 063e14dffacf..1defb235ea48 100644 --- a/infrastructure/zk/src/test/integration.ts +++ b/infrastructure/zk/src/test/integration.ts @@ -19,11 +19,6 @@ export async function contractVerification(bail: boolean = false) { await utils.spawn('yarn ts-integration contract-verification-test' + flag); } -export async function snapshotsCreator(bail: boolean = false) { - const flag = bail ? ' --bail' : ''; - await utils.spawn('yarn ts-integration snapshots-creator-test' + flag); -} - export async function server(options: string[] = []) { if (process.env.ZKSYNC_ENV?.startsWith('ext-node')) { process.env.ZKSYNC_WEB3_API_URL = `http://127.0.0.1:${process.env.EN_HTTP_PORT}`; @@ -120,11 +115,3 @@ command .action(async (cmd: Command) => { await contractVerification(cmd.bail); }); - -command - .command('snapshots-creator') - .description('run snapshots creator tests') - .option('--bail') - .action(async (cmd: Command) => { - await snapshotsCreator(cmd.bail); - }); From 9d0624fdc9c64aa63be32aeda7b45c6ae8642ff0 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 17:00:37 +0200 Subject: [PATCH 12/27] Update `@types/node` version to 18.x --- core/tests/revert-test/package.json | 3 +-- core/tests/ts-integration/package.json | 2 +- core/tests/upgrade-test/package.json | 2 +- infrastructure/local-setup-preparation/package.json | 2 +- infrastructure/protocol-upgrade/package.json | 2 +- infrastructure/zk/package.json | 6 ++---- 6 files changed, 7 insertions(+), 10 deletions(-) diff --git a/core/tests/revert-test/package.json b/core/tests/revert-test/package.json index 4f277a618564..f8e5179390fe 100644 --- a/core/tests/revert-test/package.json +++ b/core/tests/revert-test/package.json @@ -19,10 +19,9 @@ "@types/chai": "^4.2.21", "@types/mocha": "^8.2.3", "@types/mocha-steps": "^1.3.0", - "@types/node": "^14.14.5", + "@types/node": "^18.19.15", "@types/node-fetch": "^2.5.7", "chai": "^4.3.4", - "chai-as-promised": "^7.1.1", "ethereumjs-abi": "^0.6.8", "ethers": "~5.7.0", "mocha": "^9.0.2", diff --git a/core/tests/ts-integration/package.json b/core/tests/ts-integration/package.json index d296db7174f0..ae44e797f396 100644 --- a/core/tests/ts-integration/package.json +++ b/core/tests/ts-integration/package.json @@ -18,7 +18,7 @@ "@matterlabs/hardhat-zksync-vyper": "^1.0.0", "@nomiclabs/hardhat-vyper": "^3.0.5", "@types/jest": "^29.0.3", - "@types/node": "^14.14.5", + "@types/node": "^18.19.15", "@types/node-fetch": "^2.5.7", "chalk": "^4.0.0", "ethereumjs-abi": "^0.6.8", diff --git a/core/tests/upgrade-test/package.json b/core/tests/upgrade-test/package.json index 83f220431d12..b0d9c4d5c85e 100644 --- a/core/tests/upgrade-test/package.json +++ b/core/tests/upgrade-test/package.json @@ -19,7 +19,7 @@ "@types/chai": "^4.2.21", "@types/mocha": "^8.2.3", "@types/mocha-steps": "^1.3.0", - "@types/node": "^14.14.5", + "@types/node": "^18.19.15", "@types/node-fetch": "^2.5.7", "chai": "^4.3.4", "chai-as-promised": "^7.1.1", diff --git a/infrastructure/local-setup-preparation/package.json b/infrastructure/local-setup-preparation/package.json index a5908b0fe54e..d08e9a4dbb8b 100644 --- a/infrastructure/local-setup-preparation/package.json +++ b/infrastructure/local-setup-preparation/package.json @@ -10,7 +10,7 @@ }, "devDependencies": { "typescript": "^4.5.5", - "@types/node": "^14.6.1" + "@types/node": "^18.19.15" }, "scripts": { "start": "ts-node ./src/index.ts" diff --git a/infrastructure/protocol-upgrade/package.json b/infrastructure/protocol-upgrade/package.json index 96c6f124b566..7ee8b67caed6 100644 --- a/infrastructure/protocol-upgrade/package.json +++ b/infrastructure/protocol-upgrade/package.json @@ -17,7 +17,7 @@ "devDependencies": { "@matterlabs/hardhat-zksync-solc": "^0.3.15", "@types/deep-extend": "^0.4.31", - "@types/node": "^14.6.1", + "@types/node": "^18.19.15", "@types/node-fetch": "^2.5.7", "@types/tabtab": "^3.0.1", "hardhat": "=2.16.0", diff --git a/infrastructure/zk/package.json b/infrastructure/zk/package.json index 11a05a760239..a30538e6a088 100644 --- a/infrastructure/zk/package.json +++ b/infrastructure/zk/package.json @@ -19,14 +19,12 @@ "handlebars": "^4.7.8", "node-fetch": "^2.6.1", "pg": "^8.11.3", - "tabtab": "^3.0.2", - "zksync-web3": "^0.15.5", - "protobufjs": "^7.2.5" + "tabtab": "^3.0.2" }, "devDependencies": { "@matterlabs/hardhat-zksync-solc": "^0.3.15", "@types/deep-extend": "^0.4.31", - "@types/node": "^14.6.1", + "@types/node": "^18.19.15", "@types/node-fetch": "^2.5.7", "@types/pg": "^8.10.3", "@types/tabtab": "^3.0.1", From 8cee45e691100404dca0a02f936e56e5e97290a7 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 17:01:22 +0200 Subject: [PATCH 13/27] Run snapshot recovery test in CI --- .github/workflows/ci-core-reusable.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index 2893c0664b31..1e82cba1971b 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -198,6 +198,10 @@ jobs: - name: Fee projection tests run: ci_run zk test i fees + # Must run before `zksync_server` is killed. + - name: Snapshot recovery test + run: ci_run zk test i snapshot-recovery + - name: Run revert test run: | ci_run pkill zksync_server || true @@ -220,6 +224,13 @@ jobs: if: always() run: ci_run cat contract_verifier.log || true + - name: Show snapshot-creator.log logs + if: always() + run: ci_run cat core/tests/snapshot-recovery-test/snapshot-creator.log || true + - name: Show snapshot-recovery.log logs + if: always() + run: ci_run cat core/tests/snapshot-recovery-test/snapshot-recovery.log || true + - name: Show revert.log logs if: always() run: ci_run cat core/tests/revert-test/revert.log || true From 3bb26891fc1576f269e287f0a9cd978458395759 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 17:46:58 +0200 Subject: [PATCH 14/27] Filter out prover DBs for EN environment --- infrastructure/zk/src/database.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/infrastructure/zk/src/database.ts b/infrastructure/zk/src/database.ts index 457b5f1cf653..f2f0c09dc1d8 100644 --- a/infrastructure/zk/src/database.ts +++ b/infrastructure/zk/src/database.ts @@ -22,10 +22,12 @@ function getDals(opts: DbOpts): Map { let dals = new Map(); if (!opts.prover && !opts.server) { dals.set(DalPath.CoreDal, process.env.DATABASE_URL!); - dals.set(DalPath.ProverDal, process.env.DATABASE_PROVER_URL!); + if (process.env.DATABASE_PROVER_URL) { + dals.set(DalPath.ProverDal, process.env.DATABASE_PROVER_URL); + } } - if (opts.prover) { - dals.set(DalPath.ProverDal, process.env.DATABASE_PROVER_URL!); + if (opts.prover && process.env.DATABASE_PROVER_URL) { + dals.set(DalPath.ProverDal, process.env.DATABASE_PROVER_URL); } if (opts.server) { dals.set(DalPath.CoreDal, process.env.DATABASE_URL!); From bdbba81bdfa1171460fa925c0baf89ee2cf9f4e5 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 17:47:24 +0200 Subject: [PATCH 15/27] Fix EN status checks --- .../tests/snapshot-recovery.test.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts index 88ed3522996f..35400e1cb813 100644 --- a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts +++ b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts @@ -54,10 +54,6 @@ describe('snapshot recovery', () => { await killExternalNode(); }); - after(async () => { - await killExternalNode(); - }); - async function getAllSnapshots() { const output = await mainNode.send('snapshots_getAllSnapshots', []); return output as AllSnapshotsResponse; @@ -195,8 +191,14 @@ describe('snapshot recovery', () => { break; } } + + // If `enProcess` fails early, we'll trip these checks. + expect(enProcess.exitCode).to.be.null; + expect(consistencyCheckerSucceeded, 'consistency check failed').to.be.true; + expect(reorgDetectorSucceeded, 'reorg detection check failed').to.be.true; } finally { enProcess.kill(); + await killExternalNode(); } }); }); From 572bf2a61e207728d7959c1a1ce8240b9a965bd9 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 18:17:20 +0200 Subject: [PATCH 16/27] Fix `ObjectStoreConfig` uses in prover tests --- prover/prover_fri/tests/basic_test.rs | 11 +++++--- prover/witness_generator/tests/basic_test.rs | 27 ++++++++++++++------ 2 files changed, 26 insertions(+), 12 deletions(-) diff --git a/prover/prover_fri/tests/basic_test.rs b/prover/prover_fri/tests/basic_test.rs index ebcc43e93afc..d8ce27b58507 100644 --- a/prover/prover_fri/tests/basic_test.rs +++ b/prover/prover_fri/tests/basic_test.rs @@ -2,7 +2,7 @@ use std::sync::Arc; use anyhow::Context as _; use serde::Serialize; -use zksync_config::{configs::FriProverConfig, ObjectStoreConfig}; +use zksync_config::configs::{object_store::ObjectStoreMode, FriProverConfig, ObjectStoreConfig}; use zksync_env_config::FromEnv; use zksync_object_store::{bincode, ObjectStoreFactory}; use zksync_prover_fri::prover_job_processor::Prover; @@ -24,9 +24,12 @@ async fn prover_and_assert_base_layer( block_number: L1BatchNumber, sequence_number: usize, ) -> anyhow::Result<()> { - let mut object_store_config = - ObjectStoreConfig::from_env().context("ObjectStoreConfig::from_env()")?; - object_store_config.file_backed_base_path = "./tests/data/".to_owned(); + let object_store_config = ObjectStoreConfig { + mode: ObjectStoreMode::FileBacked { + file_backed_base_path: "./tests/data/".to_owned(), + }, + max_retries: 5, + }; let object_store = ObjectStoreFactory::new(object_store_config) .create_store() .await; diff --git a/prover/witness_generator/tests/basic_test.rs b/prover/witness_generator/tests/basic_test.rs index 446ee71c9226..4bb68545d799 100644 --- a/prover/witness_generator/tests/basic_test.rs +++ b/prover/witness_generator/tests/basic_test.rs @@ -1,9 +1,8 @@ use std::time::Instant; use serde::Serialize; -use zksync_config::ObjectStoreConfig; +use zksync_config::{configs::object_store::ObjectStoreMode, ObjectStoreConfig}; use zksync_dal::fri_prover_dal::types::{LeafAggregationJobMetadata, NodeAggregationJobMetadata}; -use zksync_env_config::FromEnv; use zksync_object_store::ObjectStoreFactory; use zksync_prover_fri_types::{ keys::{AggregationsKey, FriCircuitKey}, @@ -29,8 +28,12 @@ fn compare_serialized(expected: &T, actual: &T) { #[tokio::test] #[ignore] // re-enable with new artifacts async fn test_leaf_witness_gen() { - let mut object_store_config = ObjectStoreConfig::from_env().unwrap(); - object_store_config.file_backed_base_path = "./tests/data/leaf/".to_owned(); + let object_store_config = ObjectStoreConfig { + mode: ObjectStoreMode::FileBacked { + file_backed_base_path: "./tests/data/leaf/".to_owned(), + }, + max_retries: 5, + }; let object_store = ObjectStoreFactory::new(object_store_config) .create_store() .await; @@ -65,8 +68,12 @@ async fn test_leaf_witness_gen() { #[tokio::test] #[ignore] // re-enable with new artifacts async fn test_node_witness_gen() { - let mut object_store_config = ObjectStoreConfig::from_env().unwrap(); - object_store_config.file_backed_base_path = "./tests/data/node/".to_owned(); + let object_store_config = ObjectStoreConfig { + mode: ObjectStoreMode::FileBacked { + file_backed_base_path: "./tests/data/node/".to_owned(), + }, + max_retries: 5, + }; let object_store = ObjectStoreFactory::new(object_store_config) .create_store() .await; @@ -102,8 +109,12 @@ async fn test_node_witness_gen() { #[tokio::test] #[ignore] // re-enable with new artifacts async fn test_scheduler_witness_gen() { - let mut object_store_config = ObjectStoreConfig::from_env().unwrap(); - object_store_config.file_backed_base_path = "./tests/data/scheduler/".to_owned(); + let object_store_config = ObjectStoreConfig { + mode: ObjectStoreMode::FileBacked { + file_backed_base_path: "./tests/data/scheduler/".to_owned(), + }, + max_retries: 5, + }; let object_store = ObjectStoreFactory::new(object_store_config) .create_store() .await; From bd62c252e8a1e1fe113ca9f2c55827b9c33ee12c Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 20:56:10 +0200 Subject: [PATCH 17/27] Fix file flags and kill command --- .../tests/snapshot-recovery.test.ts | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts index 35400e1cb813..beb818001516 100644 --- a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts +++ b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts @@ -65,7 +65,7 @@ describe('snapshot recovery', () => { } step('create snapshot', async () => { - const logs = await fs.open('snapshot-creator.log'); + const logs = await fs.open('snapshot-creator.log', 'w'); const childProcess = spawn('cargo run --release --bin snapshots_creator', { cwd: homeDir, stdio: [null, logs.fd, logs.fd], @@ -232,9 +232,18 @@ async function decompressGzip(filePath: string): Promise { } async function killExternalNode() { + interface ChildProcessError extends Error { + readonly code: number | null; + } + try { - await promisify(exec)('pkill -9 zksync_external_node'); + await promisify(exec)('killall -q -KILL zksync_external_node'); } catch (err) { - console.log('Failed killing external node. This is *probably* normal.', err); + const typedErr = err as ChildProcessError; + if (typedErr.code === 1) { + // No matching processes were found; this is fine. + } else { + throw err; + } } } From ae3654b1b5d6d8a310fd7f4a1234566ef8c4802e Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 21:27:14 +0200 Subject: [PATCH 18/27] Fix spelling --- checks-config/era.dic | 2 +- core/bin/external_node/src/config/mod.rs | 2 +- core/bin/external_node/src/init.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/checks-config/era.dic b/checks-config/era.dic index 66480ebb51fb..01522f62240d 100644 --- a/checks-config/era.dic +++ b/checks-config/era.dic @@ -597,7 +597,7 @@ coinbase FIXME ASC DESC -Versioning +versioning initializer refactoring prefetch diff --git a/core/bin/external_node/src/config/mod.rs b/core/bin/external_node/src/config/mod.rs index 7f4be400cc62..74dd1af0d2ca 100644 --- a/core/bin/external_node/src/config/mod.rs +++ b/core/bin/external_node/src/config/mod.rs @@ -445,7 +445,7 @@ pub(crate) fn read_consensus_config() -> anyhow::Result { - // Do not require the CLA opt-in once the recovery is complete. + // Do not require the command-line opt-in once the recovery is complete. anyhow::ensure!( is_complete || consider_snapshot_recovery, "Snapshot recovery is required to proceed, but it is not enabled. Enable by supplying \ From e1121dbaad82e418a81f939b4cbb54855d4b2a53 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Tue, 13 Feb 2024 21:27:42 +0200 Subject: [PATCH 19/27] Move snapshot recovery test before fee test --- .github/workflows/ci-core-reusable.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index e3f776e7d5f0..bec8ac7cf81e 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -195,13 +195,12 @@ jobs: - name: Server integration tests run: ci_run zk test i server - - name: Fee projection tests - run: ci_run zk test i fees - - # Must run before `zksync_server` is killed. - name: Snapshot recovery test run: ci_run zk test i snapshot-recovery + - name: Fee projection tests + run: ci_run zk test i fees + - name: Run revert test run: | ci_run pkill zksync_server || true From 0cfa840011f5ad5bf3c150fe8f9ac30c5a40c6f0 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Wed, 14 Feb 2024 09:44:08 +0200 Subject: [PATCH 20/27] Do not wrap snapshot recovery test in `zk` --- .github/workflows/ci-core-reusable.yml | 5 ++++- core/tests/snapshot-recovery-test/package.json | 2 +- .../tests/snapshot-recovery.test.ts | 18 ++++++++++++------ infrastructure/zk/src/test/integration.ts | 12 ------------ 4 files changed, 17 insertions(+), 20 deletions(-) diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index bec8ac7cf81e..73672c5efa77 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -196,7 +196,10 @@ jobs: run: ci_run zk test i server - name: Snapshot recovery test - run: ci_run zk test i snapshot-recovery + # We use `yarn` directly because the test launches both `zk` commands in both server and EN envs. + # An empty topmost environment helps avoid a mess when redefining env vars shared between both envs + # (e.g., DATABASE_URL). + run: ci_run yarn snapshot-recovery-test snapshot-recovery-test - name: Fee projection tests run: ci_run zk test i fees diff --git a/core/tests/snapshot-recovery-test/package.json b/core/tests/snapshot-recovery-test/package.json index 7f9fa6ac8a75..981fa8f4f13d 100644 --- a/core/tests/snapshot-recovery-test/package.json +++ b/core/tests/snapshot-recovery-test/package.json @@ -13,7 +13,7 @@ ] }, "scripts": { - "snapshot-recovery-test": "zk f mocha tests/snapshot-recovery.test.ts" + "snapshot-recovery-test": "mocha tests/snapshot-recovery.test.ts" }, "devDependencies": { "@types/chai": "^4.2.21", diff --git a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts index beb818001516..4fe8e1bfed90 100644 --- a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts +++ b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts @@ -34,22 +34,28 @@ interface StorageLog { readonly enumerationIndex: number; } -// Assumptions: -// - Main node is run for the duration of the test. +/** + * Assumptions: + * + * - Main node is run for the duration of the test. + * - `ZKSYNC_ENV` variable is not set (checked at the start of the test). For this reason, + * the test doesn't have a `zk` wrapper; it should be launched using `yarn`. + */ describe('snapshot recovery', () => { - const STORAGE_LOG_SAMPLE_PROBABILITY = 0.05; + const STORAGE_LOG_SAMPLE_PROBABILITY = 0.1; const IMPORTANT_LINE_REGEX = /zksync_external_node::init|zksync_core::consistency_checker|zksync_core::reorg_detector/; const homeDir = process.env.ZKSYNC_HOME!!; const externalNodeEnv = { - PATH: process.env.PATH, - ZKSYNC_HOME: homeDir, + ...process.env, ZKSYNC_ENV: process.env.IN_DOCKER ? 'ext-node-docker' : 'ext-node' }; let mainNode: zkweb3.Provider; before(async () => { + expect(process.env.ZKSYNC_ENV, '`ZKSYNC_ENV` should not be set to allow running both server and EN components') + .to.be.undefined; mainNode = new zkweb3.Provider('http://127.0.0.1:3050'); await killExternalNode(); }); @@ -66,7 +72,7 @@ describe('snapshot recovery', () => { step('create snapshot', async () => { const logs = await fs.open('snapshot-creator.log', 'w'); - const childProcess = spawn('cargo run --release --bin snapshots_creator', { + const childProcess = spawn('zk run snapshots-creator', { cwd: homeDir, stdio: [null, logs.fd, logs.fd], shell: true diff --git a/infrastructure/zk/src/test/integration.ts b/infrastructure/zk/src/test/integration.ts index 1defb235ea48..20093168c2c2 100644 --- a/infrastructure/zk/src/test/integration.ts +++ b/infrastructure/zk/src/test/integration.ts @@ -45,11 +45,6 @@ export async function revert(bail: boolean = false) { await utils.spawn('yarn revert-test revert-and-restart-test' + flag); } -async function snapshotRecovery(bail: boolean = false) { - const flag = bail ? ' --bail' : ''; - await utils.spawn('yarn snapshot-recovery-test snapshot-recovery-test' + flag); -} - export async function upgrade(bail: boolean = false) { const flag = bail ? ' --bail' : ''; await utils.spawn('yarn upgrade-test upgrade-test' + flag); @@ -86,13 +81,6 @@ command .action(async (cmd: Command) => { await revert(cmd.bail); }); -command - .command('snapshot-recovery') - .description('run snapshot recovery test') - .option('--bail') - .action(async (cmd: Command) => { - await snapshotRecovery(cmd.bail); - }); command .command('upgrade') .description('run upgrade test') From 8b8080b69138290a959cde2aa041756e59b13b74 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Wed, 14 Feb 2024 11:13:02 +0200 Subject: [PATCH 21/27] Implement config inheritance --- .gitignore | 1 + etc/env/dev.toml | 26 ++++++++ etc/env/docker.toml | 2 + etc/env/ext-node-docker.toml | 66 +------------------- infrastructure/zk/src/config.ts | 73 +++++++++++------------ infrastructure/zk/src/test/integration.ts | 2 +- 6 files changed, 67 insertions(+), 103 deletions(-) create mode 100644 etc/env/dev.toml diff --git a/.gitignore b/.gitignore index c2878f7f734a..59a209d2dc5d 100644 --- a/.gitignore +++ b/.gitignore @@ -32,6 +32,7 @@ Cargo.lock /etc/env/* !/etc/env/base +!/etc/env/dev.toml !/etc/env/docker.toml !/etc/env/ext-node.toml !/etc/env/ext-node-docker.toml diff --git a/etc/env/dev.toml b/etc/env/dev.toml new file mode 100644 index 000000000000..e95ad7b9e9ee --- /dev/null +++ b/etc/env/dev.toml @@ -0,0 +1,26 @@ +[_metadata] +base = [ + 'base/alerts.toml', + 'base/api.toml', + 'base/chain.toml', + 'base/contract_verifier.toml', + 'base/contracts.toml', + 'base/database.toml', + 'base/eth_client.toml', + 'base/eth_sender.toml', + 'base/eth_watch.toml', + 'base/misc.toml', + 'base/object_store.toml', + 'base/nfs.toml', + 'base/rust.toml', + 'base/private.toml', + 'base/witness_generator.toml', + 'base/house_keeper.toml', + 'base/fri_prover.toml', + 'base/fri_witness_generator.toml', + 'base/fri_prover_group.toml', + 'base/proof_data_handler.toml', + 'base/fri_witness_vector_generator.toml', + 'base/fri_prover_gateway.toml', + 'base/fri_proof_compressor.toml', +] diff --git a/etc/env/docker.toml b/etc/env/docker.toml index 6db5a6d73cff..60f4ac81222e 100644 --- a/etc/env/docker.toml +++ b/etc/env/docker.toml @@ -12,3 +12,5 @@ web3_url = "http://geth:8545" [chain.state_keeper] miniblock_iteration_interval = 50 +[_metadata] +base = ["dev.toml"] diff --git a/etc/env/ext-node-docker.toml b/etc/env/ext-node-docker.toml index 129b41a41816..eafcf9e7d050 100644 --- a/etc/env/ext-node-docker.toml +++ b/etc/env/ext-node-docker.toml @@ -1,71 +1,9 @@ database_url = "postgres://postgres@postgres/zksync_local_ext_node" -# Optional variable. If set, "zk db setup" will recreate "database_url" db by -# cloning "template_database_url" db instead of creating an empty db. -# "template_database_url" is not used by EN itself. template_database_url = "postgres://postgres@postgres/zksync_local" test_database_url = "postgres://postgres@host:5433/zksync_local_test_ext_node" -database_pool_size = 50 -zksync_action="dont_ask" - -# Needed to run integration tests. -l1_rpc_address = "http://geth:8545" -api_contract_verification_url="http://127.0.0.1:3070" [en] -http_port = 3060 -ws_port = 3061 -prometheus_port = 3322 -healthcheck_port = 3081 -threads_per_server = 128 -l2_chain_id = 270 -l1_chain_id = 9 - -req_entities_limit = 10000 - -state_cache_path = "./db/ext-node/state_keeper" -merkle_tree_path = "./db/ext-node/lightweight" -max_l1_batches_per_tree_iter = 20 - -main_node_url = "http://127.0.0.1:3050" eth_client_url = "http://geth:8545" -api_namespaces = ["eth", "web3", "net", "pubsub", "zks", "en", "debug"] - -# Note: -# `bootloader_hash` and `default_aa_hash` are overridden from the `.init.env` values by `zk` tool. -bootloader_hash="0x0100038581be3d0e201b3cc45d151ef5cc59eb3a0f146ad44f0f72abf00b594c" -default_aa_hash="0x0100038dc66b69be75ec31653c64cb931678299b9b659472772b2550b703f41c" - -# Should be the same as chain.state_keeper.fee_account_addr. -operator_addr="0xde03a0B5963f75f1C8485B355fF6D30f3093BDE7" - -[en.consensus] -config_path="etc/env/en_consensus_config.json" -# generated with zksync_consensus_tools/src/bin/keys.rs -# node:public:ed25519:147bb71be895846e1d6f5b1c6a8be53848b82bdafcf66e9dfe6ca65581076a1d -node_key="node:secret:ed25519:d56de77c738326c305c64c25bffe1cc94ea7c639cf71ca3ff94229df27f167ac" - -[rust] -# `RUST_LOG` environment variable for `env_logger` -# Here we use TOML multiline strings: newlines will be trimmed. -log="""\ -warn,\ -zksync_consensus_bft=info,\ -zksync_consensus_network=info,\ -zksync_consensus_storage=info,\ -zksync_core=debug,\ -zksync_dal=info,\ -zksync_eth_client=info,\ -zksync_storage=info,\ -zksync_merkle_tree=info,\ -zksync_state=debug,\ -zksync_utils=debug,\ -zksync_types=info,\ -loadnext=info,\ -vm=info,\ -zksync_external_node=info,\ -""" - -# `RUST_BACKTRACE` variable -backtrace="full" -lib_backtrace="1" +[_metadata] +base = ["ext-node.toml"] diff --git a/infrastructure/zk/src/config.ts b/infrastructure/zk/src/config.ts index 6d2b722c0d25..0496f1e46d35 100644 --- a/infrastructure/zk/src/config.ts +++ b/infrastructure/zk/src/config.ts @@ -3,32 +3,6 @@ import * as toml from '@iarna/toml'; import * as fs from 'fs'; import deepExtend from 'deep-extend'; -const CONFIG_FILES = [ - 'alerts.toml', - 'api.toml', - 'chain.toml', - 'contract_verifier.toml', - 'contracts.toml', - 'database.toml', - 'eth_client.toml', - 'eth_sender.toml', - 'eth_watch.toml', - 'misc.toml', - 'object_store.toml', - 'nfs.toml', - 'rust.toml', - 'private.toml', - 'witness_generator.toml', - 'house_keeper.toml', - 'fri_prover.toml', - 'fri_witness_generator.toml', - 'fri_prover_group.toml', - 'proof_data_handler.toml', - 'fri_witness_vector_generator.toml', - 'fri_prover_gateway.toml', - 'fri_proof_compressor.toml' -]; - function loadConfigFile(path: string) { const fileContents = fs.readFileSync(path); try { @@ -75,24 +49,47 @@ export function collectVariables(config: any, prefix: string = ''): Map Date: Thu, 15 Feb 2024 09:12:57 +0200 Subject: [PATCH 22/27] Switch off snapshot recovery test with consensus --- .github/workflows/ci-core-reusable.yml | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index 73672c5efa77..0c5a05103519 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -118,9 +118,10 @@ jobs: ci_run sccache --show-stats ci_run cat /tmp/sccache_log.txt integration: + name: Integration (consensus=${{ matrix.consensus }}) strategy: matrix: - consensus: [false,true] + consensus: [false, true] env: SERVER_COMPONENTS: "api,tree,eth,state_keeper,housekeeper,basic_witness_input_producer,commitment_generator${{ matrix.consensus && ',consensus' || '' }}" @@ -196,6 +197,7 @@ jobs: run: ci_run zk test i server - name: Snapshot recovery test + if: ${{ ! matrix.consensus }} # We use `yarn` directly because the test launches both `zk` commands in both server and EN envs. # An empty topmost environment helps avoid a mess when redefining env vars shared between both envs # (e.g., DATABASE_URL). @@ -248,11 +250,12 @@ jobs: ci_run cat /tmp/sccache_log.txt external-node: + name: External node (consensus=${{ matrix.consensus }}) strategy: matrix: - consensus: [false,true] + consensus: [false, true] runs-on: [matterlabs-ci-runner] - + env: SERVER_COMPONENTS: "api,tree,eth,state_keeper,housekeeper,basic_witness_input_producer,commitment_generator${{ matrix.consensus && ',consensus' || '' }}" EXT_NODE_FLAGS: "${{ matrix.consensus && '--enable-consensus' || '' }}" From 61e25c0df919fa3c2aed627bb98e8972a5243d95 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Thu, 15 Feb 2024 09:51:12 +0200 Subject: [PATCH 23/27] Make snapshot recovery exclusive w/ consensus --- core/bin/external_node/src/init.rs | 13 +++++-------- core/bin/external_node/src/main.rs | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/core/bin/external_node/src/init.rs b/core/bin/external_node/src/init.rs index ca25748dc12a..b86640c7595a 100644 --- a/core/bin/external_node/src/init.rs +++ b/core/bin/external_node/src/init.rs @@ -15,7 +15,7 @@ enum InitDecision { /// Perform or check genesis. Genesis, /// Perform or check snapshot recovery. - SnapshotRecovery { is_complete: bool }, + SnapshotRecovery, } pub(crate) async fn ensure_storage_initialized( @@ -50,14 +50,12 @@ pub(crate) async fn ensure_storage_initialized( } (None, Some(snapshot_recovery)) => { tracing::info!("Node has no genesis L1 batch and snapshot recovery information: {snapshot_recovery:?}"); - InitDecision::SnapshotRecovery { - is_complete: snapshot_recovery.storage_logs_chunks_left_to_process() == 0, - } + InitDecision::SnapshotRecovery } (None, None) => { tracing::info!("Node has neither genesis L1 batch, nor snapshot recovery info"); if consider_snapshot_recovery { - InitDecision::SnapshotRecovery { is_complete: false } + InitDecision::SnapshotRecovery } else { InitDecision::Genesis } @@ -72,10 +70,9 @@ pub(crate) async fn ensure_storage_initialized( .await .context("performing genesis failed")?; } - InitDecision::SnapshotRecovery { is_complete } => { - // Do not require the command-line opt-in once the recovery is complete. + InitDecision::SnapshotRecovery => { anyhow::ensure!( - is_complete || consider_snapshot_recovery, + consider_snapshot_recovery, "Snapshot recovery is required to proceed, but it is not enabled. Enable by supplying \ `--enable-snapshots-recovery` command-line arg to the node binary, or reset the node storage \ to sync from genesis" diff --git a/core/bin/external_node/src/main.rs b/core/bin/external_node/src/main.rs index ef4914f287a9..e3efd1090526 100644 --- a/core/bin/external_node/src/main.rs +++ b/core/bin/external_node/src/main.rs @@ -387,14 +387,23 @@ async fn shutdown_components( healthcheck_handle.stop().await; } +/// External node for zkSync Era. #[derive(Debug, Parser)] -#[structopt(author = "Matter Labs", version)] +#[command(author = "Matter Labs", version)] struct Cli { + /// Revert the pending L1 batch and exit. #[arg(long)] revert_pending_l1_batch: bool, + /// Enables consensus-based syncing instead of JSON-RPC based one. This is an experimental and incomplete feature; + /// do not use unless you know what you're doing. #[arg(long)] enable_consensus: bool, - #[arg(long)] + /// Enables application-level snapshot recovery. Required to start a node that was recovered from a snapshot, + /// or to initialize a node from a snapshot. Has no effect if a node that was initialized from a Postgres dump + /// or was synced from genesis. + /// + /// This is an experimental and incomplete feature; do not use unless you know what you're doing. + #[arg(long, conflicts_with = "enable_consensus")] enable_snapshots_recovery: bool, } @@ -430,6 +439,12 @@ async fn main() -> anyhow::Result<()> { .await .context("Failed to load external node config")?; if opt.enable_consensus { + // This is more of a sanity check; the mutual exclusion of `enable_consensus` and `enable_snapshots_recovery` + // should be ensured by `clap`. + anyhow::ensure!( + !opt.enable_snapshots_recovery, + "Consensus logic does not support snapshot recovery yet" + ); config.consensus = Some(config::read_consensus_config().context("read_consensus_config()")?); } From 76295b32d3fb22c9ba37cab623d9a2b07a1b779f Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Thu, 15 Feb 2024 09:51:48 +0200 Subject: [PATCH 24/27] Pass EN args from `zk external-node` --- .github/workflows/ci-core-reusable.yml | 2 +- infrastructure/zk/src/server.ts | 16 +++------------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index 0c5a05103519..08ef90f5fca9 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -258,7 +258,7 @@ jobs: env: SERVER_COMPONENTS: "api,tree,eth,state_keeper,housekeeper,basic_witness_input_producer,commitment_generator${{ matrix.consensus && ',consensus' || '' }}" - EXT_NODE_FLAGS: "${{ matrix.consensus && '--enable-consensus' || '' }}" + EXT_NODE_FLAGS: "${{ matrix.consensus && '-- --enable-consensus' || '' }}" steps: - name: Checkout code # Checks out the repository under $GITHUB_WORKSPACE, so the job can access it. diff --git a/infrastructure/zk/src/server.ts b/infrastructure/zk/src/server.ts index bdc75030d75a..5bf0eff03d3a 100644 --- a/infrastructure/zk/src/server.ts +++ b/infrastructure/zk/src/server.ts @@ -27,8 +27,7 @@ export async function server(rebuildTree: boolean, uring: boolean, components?: export async function externalNode( reinit: boolean = false, - enableConsensus: boolean = false, - enableSnapshotsRecovery: boolean = false + args: string[] ) { if (process.env.ZKSYNC_ENV != 'ext-node') { console.warn(`WARNING: using ${process.env.ZKSYNC_ENV} environment for external node`); @@ -49,14 +48,7 @@ export async function externalNode( clean(path.dirname(process.env.EN_MERKLE_TREE_PATH!)); } - let options = ''; - if (enableConsensus) { - options += ' --enable-consensus'; - } - if (enableSnapshotsRecovery) { - options += ' --enable-snapshots-recovery'; - } - await utils.spawn(`cargo run --release --bin zksync_external_node -- ${options}`); + await utils.spawn(`cargo run --release --bin zksync_external_node -- ${args.join(' ')}`); } async function create_genesis(cmd: string) { @@ -146,8 +138,6 @@ export const serverCommand = new Command('server') export const enCommand = new Command('external-node') .description('start zksync external node') .option('--reinit', 'reset postgres and rocksdb before starting') - .option('--enable-consensus', 'enables consensus component') - .option('--enable-snapshots-recovery', 'enables recovery from an app-level snapshot') .action(async (cmd: Command) => { - await externalNode(cmd.reinit, cmd.enableConsensus, cmd.enableSnapshotsRecovery); + await externalNode(cmd.reinit, cmd.args); }); From f1b44314343d489206a32cdc497654cfb7e6fb39 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Thu, 15 Feb 2024 10:08:55 +0200 Subject: [PATCH 25/27] Use `EnrichedClientError` in snapshot applier --- core/lib/snapshots_applier/src/lib.rs | 58 +++++++++++-------- core/lib/snapshots_applier/src/tests/utils.rs | 9 ++- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/core/lib/snapshots_applier/src/lib.rs b/core/lib/snapshots_applier/src/lib.rs index 377a7c191740..473686fe04f9 100644 --- a/core/lib/snapshots_applier/src/lib.rs +++ b/core/lib/snapshots_applier/src/lib.rs @@ -18,10 +18,8 @@ use zksync_types::{ }; use zksync_utils::bytecode::hash_bytecode; use zksync_web3_decl::{ - jsonrpsee::{ - core::{client::Error, ClientError as RpcError}, - http_client::HttpClient, - }, + error::{ClientRpcContext, EnrichedClientError, EnrichedClientResult}, + jsonrpsee::{core::client, http_client::HttpClient}, namespaces::{EnNamespaceClient, SnapshotsNamespaceClient}, }; @@ -75,13 +73,13 @@ impl From for SnapshotsApplierError { } } -impl From for SnapshotsApplierError { - fn from(error: RpcError) -> Self { - match error { - Error::Transport(_) | Error::RequestTimeout | Error::RestartNeeded(_) => { - Self::Retryable(error.into()) +impl From for SnapshotsApplierError { + fn from(err: EnrichedClientError) -> Self { + match err.as_ref() { + client::Error::Transport(_) | client::Error::RequestTimeout => { + Self::Retryable(err.into()) } - _ => Self::Fatal(error.into()), + _ => Self::Fatal(err.into()), } } } @@ -101,28 +99,38 @@ pub enum SnapshotsApplierOutcome { /// Main node API used by the [`SnapshotsApplier`]. #[async_trait] pub trait SnapshotsApplierMainNodeClient: fmt::Debug + Send + Sync { - async fn fetch_l2_block(&self, number: MiniblockNumber) -> Result, RpcError>; + async fn fetch_l2_block( + &self, + number: MiniblockNumber, + ) -> EnrichedClientResult>; - async fn fetch_newest_snapshot(&self) -> Result, RpcError>; + async fn fetch_newest_snapshot(&self) -> EnrichedClientResult>; } #[async_trait] impl SnapshotsApplierMainNodeClient for HttpClient { - async fn fetch_l2_block(&self, number: MiniblockNumber) -> Result, RpcError> { - // FIXME: use RPC client extensions - Ok(self.sync_l2_block(number, false).await?) + async fn fetch_l2_block( + &self, + number: MiniblockNumber, + ) -> EnrichedClientResult> { + self.sync_l2_block(number, false) + .rpc_context("sync_l2_block") + .with_arg("number", &number) + .await } - async fn fetch_newest_snapshot(&self) -> Result, RpcError> { - let snapshots = self.get_all_snapshots().await?; - if snapshots.snapshots_l1_batch_numbers.is_empty() { - Ok(None) - } else { - let newest_snapshot = snapshots.snapshots_l1_batch_numbers[0]; - Ok(self - .get_snapshot_by_l1_batch_number(newest_snapshot) - .await?) - } + async fn fetch_newest_snapshot(&self) -> EnrichedClientResult> { + let snapshots = self + .get_all_snapshots() + .rpc_context("get_all_snapshots") + .await?; + let Some(newest_snapshot) = snapshots.snapshots_l1_batch_numbers.first() else { + return Ok(None); + }; + self.get_snapshot_by_l1_batch_number(*newest_snapshot) + .rpc_context("get_snapshot_by_l1_batch_number") + .with_arg("number", newest_snapshot) + .await } } diff --git a/core/lib/snapshots_applier/src/tests/utils.rs b/core/lib/snapshots_applier/src/tests/utils.rs index 1069aa37ec3e..d52ccc8c737e 100644 --- a/core/lib/snapshots_applier/src/tests/utils.rs +++ b/core/lib/snapshots_applier/src/tests/utils.rs @@ -16,7 +16,7 @@ use zksync_types::{ AccountTreeId, Bytes, L1BatchNumber, MiniblockNumber, ProtocolVersionId, StorageKey, StorageValue, H160, H256, }; -use zksync_web3_decl::jsonrpsee::core::ClientError as RpcError; +use zksync_web3_decl::error::EnrichedClientResult; use crate::SnapshotsApplierMainNodeClient; @@ -28,11 +28,14 @@ pub(super) struct MockMainNodeClient { #[async_trait] impl SnapshotsApplierMainNodeClient for MockMainNodeClient { - async fn fetch_l2_block(&self, number: MiniblockNumber) -> Result, RpcError> { + async fn fetch_l2_block( + &self, + number: MiniblockNumber, + ) -> EnrichedClientResult> { Ok(self.fetch_l2_block_responses.get(&number).cloned()) } - async fn fetch_newest_snapshot(&self) -> Result, RpcError> { + async fn fetch_newest_snapshot(&self) -> EnrichedClientResult> { Ok(self.fetch_newest_snapshot_response.clone()) } } From 8d2b5ff05ecefb368d5b6dd8ea12a8140721849d Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Thu, 15 Feb 2024 11:00:34 +0200 Subject: [PATCH 26/27] Fix integration test --- core/bin/external_node/src/init.rs | 2 +- .../snapshot-recovery-test/tests/snapshot-recovery.test.ts | 2 +- infrastructure/zk/src/server.ts | 7 ++----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/core/bin/external_node/src/init.rs b/core/bin/external_node/src/init.rs index 5ba7237c16e8..c0d158025992 100644 --- a/core/bin/external_node/src/init.rs +++ b/core/bin/external_node/src/init.rs @@ -97,7 +97,7 @@ pub(crate) async fn ensure_storage_initialized( SnapshotsApplierOutcome::InitializedWithoutSnapshot => { anyhow::bail!( "Node contains a non-genesis L1 batch, but no genesis; snapshot recovery is unsafe. \ - This should never occur unless the node DB was manually tampered with" + This should never occur unless the node DB was manually tampered with" ); } } diff --git a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts index 4fe8e1bfed90..e80eb00fbeb9 100644 --- a/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts +++ b/core/tests/snapshot-recovery-test/tests/snapshot-recovery.test.ts @@ -162,7 +162,7 @@ describe('snapshot recovery', () => { const logs = await fs.open('snapshot-recovery.log', 'a'); await logs.truncate(); - const enProcess = spawn('zk external-node --enable-snapshots-recovery', { + const enProcess = spawn('zk external-node -- --enable-snapshots-recovery', { cwd: homeDir, stdio: [null, 'pipe', 'inherit'], shell: true, diff --git a/infrastructure/zk/src/server.ts b/infrastructure/zk/src/server.ts index 5bf0eff03d3a..aa853bcdc212 100644 --- a/infrastructure/zk/src/server.ts +++ b/infrastructure/zk/src/server.ts @@ -25,10 +25,7 @@ export async function server(rebuildTree: boolean, uring: boolean, components?: await utils.spawn(`cargo run --bin zksync_server --release ${options}`); } -export async function externalNode( - reinit: boolean = false, - args: string[] -) { +export async function externalNode(reinit: boolean = false, args: string[]) { if (process.env.ZKSYNC_ENV != 'ext-node') { console.warn(`WARNING: using ${process.env.ZKSYNC_ENV} environment for external node`); console.warn('If this is a mistake, set $ZKSYNC_ENV to "ext-node" or other environment'); @@ -139,5 +136,5 @@ export const enCommand = new Command('external-node') .description('start zksync external node') .option('--reinit', 'reset postgres and rocksdb before starting') .action(async (cmd: Command) => { - await externalNode(cmd.reinit, cmd.args); + await externalNode(cmd.reinit, cmd.args); }); From 807f6dff8cce11fc4cac4ea49fcad6de9ba23957 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Thu, 15 Feb 2024 13:26:35 +0200 Subject: [PATCH 27/27] Fix minor nits --- .github/workflows/ci-core-reusable.yml | 2 +- core/bin/external_node/Cargo.toml | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci-core-reusable.yml b/.github/workflows/ci-core-reusable.yml index 08ef90f5fca9..b4be3eb14110 100644 --- a/.github/workflows/ci-core-reusable.yml +++ b/.github/workflows/ci-core-reusable.yml @@ -198,7 +198,7 @@ jobs: - name: Snapshot recovery test if: ${{ ! matrix.consensus }} - # We use `yarn` directly because the test launches both `zk` commands in both server and EN envs. + # We use `yarn` directly because the test launches `zk` commands in both server and EN envs. # An empty topmost environment helps avoid a mess when redefining env vars shared between both envs # (e.g., DATABASE_URL). run: ci_run yarn snapshot-recovery-test snapshot-recovery-test diff --git a/core/bin/external_node/Cargo.toml b/core/bin/external_node/Cargo.toml index f6de50b5b461..6d9f61042c53 100644 --- a/core/bin/external_node/Cargo.toml +++ b/core/bin/external_node/Cargo.toml @@ -19,18 +19,16 @@ zksync_utils = { path = "../../lib/utils" } zksync_state = { path = "../../lib/state" } zksync_basic_types = { path = "../../lib/basic_types" } zksync_contracts = { path = "../../lib/contracts" } -zksync_snapshots_applier = {path = "../../lib/snapshots_applier"} -zksync_object_store = {path="../../lib/object_store"} - -zksync_concurrency = { version = "0.1.0", git = "https://github.com/matter-labs/era-consensus.git", rev = "5b3d383d7a65b0fbe2a771fecf4313f5083be9ae" } -zksync_consensus_roles = { version = "0.1.0", git = "https://github.com/matter-labs/era-consensus.git", rev = "5b3d383d7a65b0fbe2a771fecf4313f5083be9ae" } - +zksync_snapshots_applier = { path = "../../lib/snapshots_applier" } +zksync_object_store = { path="../../lib/object_store" } prometheus_exporter = { path = "../../lib/prometheus_exporter" } zksync_health_check = { path = "../../lib/health_check" } zksync_web3_decl = { path = "../../lib/web3_decl" } zksync_types = { path = "../../lib/types" } vlog = { path = "../../lib/vlog" } +zksync_concurrency = { version = "0.1.0", git = "https://github.com/matter-labs/era-consensus.git", rev = "5b3d383d7a65b0fbe2a771fecf4313f5083be9ae" } +zksync_consensus_roles = { version = "0.1.0", git = "https://github.com/matter-labs/era-consensus.git", rev = "5b3d383d7a65b0fbe2a771fecf4313f5083be9ae" } vise = { git = "https://github.com/matter-labs/vise.git", version = "0.1.0", rev = "1c9cc500e92cf9ea052b230e114a6f9cce4fb2c1" } anyhow = "1.0"