Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add health checkup mechanism #1145

Merged
merged 11 commits into from
Jul 6, 2021
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,26 @@

## Unreleased

Many thanks to Fraccaroli Gianmarco (@Fraccaman) for helping us improve the
reliability of Hermes ([#697]).

### FEATURES

- [ibc-relayer-cli]
- Added `config validate` CLI to Hermes ([#600])
- Added basic channel filter ([#1140])
- Added `query channel ends` CLI command ([#1062])
- Added a health checkup mechanism for Hermes ([#697, #1057])

### IMPROVEMENTS

- Update to `tendermint-rs` v0.20.0 ([#1125])
- Add inline documentation to config.toml ([#1127])

[#600]: https://github.com/informalsystems/ibc-rs/issues/600
[#697]: https://github.com/informalsystems/ibc-rs/issues/697
[#1062]: https://github.com/informalsystems/ibc-rs/issues/1062
[#1057]: https://github.com/informalsystems/ibc-rs/issues/1057
[#1125]: https://github.com/informalsystems/ibc-rs/issues/1125
[#1127]: https://github.com/informalsystems/ibc-rs/issues/1127
[#1140]: https://github.com/informalsystems/ibc-rs/issues/1140
Expand Down
11 changes: 9 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions proto/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ pub mod cosmos {
pub mod v1beta1 {
include!("prost/cosmos.base.v1beta1.rs");
}
pub mod tendermint {
pub mod v1beta1 {
include!("prost/cosmos.base.tendermint.v1beta1.rs");
}
}
}
pub mod crypto {
pub mod multisig {
Expand Down
1 change: 1 addition & 0 deletions relayer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dyn-clone = "1.0.3"
retry = { version = "1.2.1", default-features = false }
async-stream = "0.3.2"
fraction = {version = "0.8.0", default-features = false }
semver = "1.0"

[dependencies.tendermint]
version = "=0.20.0"
Expand Down
119 changes: 115 additions & 4 deletions relayer/src/chain/cosmos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ use tendermint::consensus::Params;
use tendermint_light_client::types::LightBlock as TMLightBlock;
use tendermint_proto::Protobuf;
use tendermint_rpc::endpoint::tx::Response as ResultTx;
use tendermint_rpc::query::Query;
use tendermint_rpc::query::{EventType, Query};
use tendermint_rpc::{endpoint::broadcast::tx_sync::Response, Client, HttpClient, Order};
use tokio::runtime::Runtime as TokioRuntime;
use tonic::codegen::http::Uri;
use tracing::{debug, trace};
use tracing::{debug, trace, warn};

use ibc::downcast;
use ibc::events::{from_tx_response_event, IbcEvent};
Expand All @@ -50,6 +50,8 @@ use ibc::query::{QueryTxHash, QueryTxRequest};
use ibc::signer::Signer;
use ibc::Height as ICSHeight;
use ibc_proto::cosmos::auth::v1beta1::{BaseAccount, QueryAccountRequest};
use ibc_proto::cosmos::base::tendermint::v1beta1::service_client::ServiceClient;
use ibc_proto::cosmos::base::tendermint::v1beta1::GetNodeInfoRequest;
use ibc_proto::cosmos::base::v1beta1::Coin;
use ibc_proto::cosmos::tx::v1beta1::mode_info::{Single, Sum};
use ibc_proto::cosmos::tx::v1beta1::{
Expand Down Expand Up @@ -82,6 +84,8 @@ use crate::light_client::Verified;

use super::Chain;

mod compatibility;

const DEFAULT_MAX_GAS: u64 = 300_000;
const DEFAULT_GAS_PRICE_ADJUSTMENT: f64 = 0.1;

Expand Down Expand Up @@ -109,6 +113,109 @@ pub struct CosmosSdkChain {
}

impl CosmosSdkChain {
/// Does multiple RPC calls to the full node, to check for
/// reachability and that some basic APIs are available.
///
/// Currently this checks that:
/// - the node responds OK to `/health` RPC call;
/// - the node has transaction indexing enabled;
/// - the SDK version is supported.
///
/// Emits a log warning in case anything is amiss.
/// Exits early if any health check fails, without doing any
/// further checks.
fn health_checkup(&self) {
async fn do_health_checkup(chain: &CosmosSdkChain) -> Result<(), Error> {
let chain_id = chain.id();
let grpc_address = chain.grpc_addr.to_string();
let rpc_address = chain.config.rpc_addr.to_string();

// Checkup on the self-reported health endpoint
chain
.rpc_client
.health()
.await
.map_err(|e| Kind::HealthCheckJsonRpc {
chain_id: chain_id.clone(),
address: rpc_address.clone(),
endpoint: "/health".to_string(),
cause: e,
})?;

// Checkup on transaction indexing
chain
.rpc_client
.tx_search(
Query::from(EventType::NewBlock),
false,
1,
1,
Order::Ascending,
)
.await
.map_err(|e| Kind::HealthCheckJsonRpc {
chain_id: chain_id.clone(),
address: rpc_address.clone(),
endpoint: "/tx_search".to_string(),
cause: e,
})?;

let mut client = ServiceClient::connect(chain.grpc_addr.clone())
.await
.map_err(|e| {
// Failed to create the gRPC client to call into `/node_info`.
Kind::HealthCheckGrpc {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
endpoint: "tendermint::ServiceClient".to_string(),
cause: e.to_string(),
}
})?;

let request = tonic::Request::new(GetNodeInfoRequest {});

let response =
client
.get_node_info(request)
.await
.map_err(|e| Kind::HealthCheckGrpc {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
endpoint: "tendermint::GetNodeInfoRequest".to_string(),
cause: e.to_string(),
})?;

let version =
response
.into_inner()
.application_version
.ok_or_else(|| Kind::HealthCheckGrpc {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
endpoint: "tendermint::GetNodeInfoRequest".to_string(),
cause: "the gRPC response contains no application version information"
.to_string(),
})?;

// Checkup on the underlying SDK version
if let Some(diagnostic) = compatibility::run_diagnostic(version) {
return Err(Kind::SdkModuleVersion {
chain_id: chain_id.clone(),
address: grpc_address.clone(),
cause: diagnostic.to_string(),
}
.into());
}

Ok(())
}

if let Err(e) = self.block_on(do_health_checkup(self)) {
warn!("{}", e);
warn!("some Hermes features may not work in this mode!");
}
}

/// The unbonding period of this chain
pub fn unbonding_period(&self) -> Result<Duration, Error> {
crate::time!("unbonding_period");
Expand Down Expand Up @@ -573,14 +680,18 @@ impl Chain for CosmosSdkChain {
let grpc_addr =
Uri::from_str(&config.grpc_addr.to_string()).map_err(|e| Kind::Grpc.context(e))?;

Ok(Self {
let chain = Self {
config,
rpc_client,
grpc_addr,
rt,
keybase,
account: None,
})
};

chain.health_checkup();

Ok(chain)
}

fn init_light_client(&self) -> Result<Box<dyn LightClient<Self>>, Error> {
Expand Down
122 changes: 122 additions & 0 deletions relayer/src/chain/cosmos/compatibility.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
//! Cosmos-SDK compatibility constants and helper methods.

use thiserror::Error;

use ibc_proto::cosmos::base::tendermint::v1beta1::VersionInfo;

/// Specifies the SDK module path, as it is expected to appear
/// in the application version information.
///
/// The module identification is captured in a [`Module`]
/// with the following structure as an example:
/// ```json,ignore
/// Module {
/// path: "github.com/cosmos/cosmos-sdk",
/// version: "v0.42.4",
/// sum: "h1:yaD4PyOx0LnyfiWasC5egg1U76lT83GRxjJjupPo7Gk=",
/// },
/// ```
const SDK_MODULE_NAME: &str = "cosmos/cosmos-sdk";

/// Specifies the SDK module version requirement.
///
/// # Note: Should be consistent with [features] guide page.
///
/// [features]: https://hermes.informal.systems/features.html
const SDK_MODULE_VERSION_REQ: &str = ">=0.41.3, <=0.42.6";

/// Helper struct to capture all the reported information of an
/// IBC application, e.g., `gaiad`.
#[derive(Clone, Debug)]
pub struct AppInfo {
app_name: String,
version: String,
git_commit: String,
}

impl std::fmt::Display for AppInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}:{}-{}", self.app_name, self.version, self.git_commit)
}
}

#[derive(Error, Debug)]
pub enum Diagnostic {
#[error("no SDK module '{pattern}' was found for application {app}")]
SdkModuleNotFound { pattern: String, app: AppInfo },

#[error("failed parsing the SDK module ('{module_path}') version number '{raw_version}' into a semver for application {app}; cause: {cause}")]
VersionParsingFailed {
module_path: String,
raw_version: String,
cause: String,
app: AppInfo,
},

#[error("SDK module at version '{found}' does not meet compatibility requirements {requirements} for application {app}")]
MismatchingSdkModuleVersion {
requirements: String,
found: String,
app: AppInfo,
},
}

/// Runs a diagnostic check on the provided [`VersionInfo`]
/// to ensure that the Sdk module version matches the
/// predefined requirements.
///
/// Returns `None` upon success, or a [`Diagnostic`] upon
/// an error.
///
/// Relies on the constant [`SDK_MODULE_NAME`] to find the
/// Sdk module by name, as well as the constant
/// [`SDK_MODULE_VERSION_REQ`] for version compatibility
/// requirements.
pub(crate) fn run_diagnostic(v: VersionInfo) -> Option<Diagnostic> {
let app_info = AppInfo {
app_name: v.app_name,
version: v.version,
git_commit: v.git_commit,
};

// Parse the requirements into a semver
let reqs = semver::VersionReq::parse(SDK_MODULE_VERSION_REQ)
.expect("parsing the SDK module requirements into semver");

// Find the Cosmos SDK module
match v
.build_deps
.iter()
.find(|&m| m.path.contains(SDK_MODULE_NAME))
{
None => Some(Diagnostic::SdkModuleNotFound {
pattern: SDK_MODULE_NAME.to_string(),
app: app_info,
}),
Some(sdk_module) => {
// The raw version number has a leading 'v', trim it out;
let plain_version = sdk_module.version.trim_start_matches('v');

// Parse the module version
match semver::Version::parse(plain_version).map_err(|e| {
Diagnostic::VersionParsingFailed {
module_path: sdk_module.path.clone(),
raw_version: sdk_module.version.clone(),
cause: e.to_string(),
app: app_info.clone(),
}
}) {
// Finally, check the version requirements
Ok(v) => match reqs.matches(&v) {
true => None,
false => Some(Diagnostic::MismatchingSdkModuleVersion {
requirements: SDK_MODULE_VERSION_REQ.to_string(),
found: v.to_string(),
app: app_info,
}),
},
Err(d) => Some(d),
}
}
}
}
Loading