Skip to content

Commit

Permalink
Enable Retries for Transient Errors Connecting to Graphs/Subgraphs (#…
Browse files Browse the repository at this point in the history
…1936)

This PR fixes two separate issues identified with the way Rover handles retries for communicating with graphs/subgraphs when using `rover dev`.
  • Loading branch information
jonathanrainer authored Jul 15, 2024
1 parent 7c7bc8f commit da07a74
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 20 deletions.
1 change: 1 addition & 0 deletions src/command/dev/do_dev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ impl Dev {
follower_messenger.clone(),
self.opts.subgraph_opts.subgraph_polling_interval,
&self.opts.plugin_opts.profile,
self.opts.subgraph_opts.subgraph_retries,
)
.transpose()
.unwrap_or_else(|| {
Expand Down
5 changes: 3 additions & 2 deletions src/command/dev/introspect.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use anyhow::anyhow;
use reqwest::blocking::Client;

use rover_std::Style;

use crate::command::dev::protocol::{SubgraphSdl, SubgraphUrl};
Expand Down Expand Up @@ -116,7 +117,7 @@ impl SubgraphIntrospectRunner {
watch: false,
},
}
.exec(&self.client, false)
.exec(&self.client, true)
}
}

Expand All @@ -140,6 +141,6 @@ impl GraphIntrospectRunner {
watch: false,
},
}
.exec(&self.client, false)
.exec(&self.client, true)
}
}
13 changes: 12 additions & 1 deletion src/command/dev/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,12 @@ impl OptionalSubgraphOpts {
}

if let Some(schema) = schema {
SubgraphSchemaWatcher::new_from_file_path((name, url), schema, follower_messenger)
SubgraphSchemaWatcher::new_from_file_path(
(name, url),
schema,
follower_messenger,
self.subgraph_retries,
)
} else {
let client = client_config
.get_builder()
Expand All @@ -86,6 +91,7 @@ impl OptionalSubgraphOpts {
follower_messenger,
self.subgraph_polling_interval,
None,
self.subgraph_retries,
url,
)
}
Expand All @@ -100,6 +106,7 @@ impl SupergraphOpts {
follower_messenger: FollowerMessenger,
polling_interval: u64,
profile_opt: &ProfileOpt,
subgraph_retries: u64,
) -> RoverResult<Option<Vec<SubgraphSchemaWatcher>>> {
if supergraph_config.is_none() {
return Ok(None);
Expand Down Expand Up @@ -130,6 +137,7 @@ impl SupergraphOpts {
(yaml_subgraph_name, routing_url),
file,
follower_messenger.clone(),
subgraph_retries,
)
}
SchemaSource::SubgraphIntrospection {
Expand All @@ -143,6 +151,7 @@ impl SupergraphOpts {
follower_messenger.clone(),
polling_interval,
introspection_headers,
subgraph_retries,
subgraph_url,
)
}
Expand All @@ -154,6 +163,7 @@ impl SupergraphOpts {
(yaml_subgraph_name, routing_url),
sdl,
follower_messenger.clone(),
subgraph_retries,
)
}
SchemaSource::Subgraph {
Expand All @@ -175,6 +185,7 @@ impl SupergraphOpts {
yaml_subgraph_name,
follower_messenger.clone(),
studio_client,
subgraph_retries,
)
}
}
Expand Down
64 changes: 47 additions & 17 deletions src/command/dev/watcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,16 @@ pub struct SubgraphSchemaWatcher {
schema_watcher_kind: SubgraphSchemaWatcherKind,
subgraph_key: SubgraphKey,
message_sender: FollowerMessenger,
subgraph_retries: u64,
subgraph_retry_countdown: u64,
}

impl SubgraphSchemaWatcher {
pub fn new_from_file_path<P>(
subgraph_key: SubgraphKey,
path: P,
message_sender: FollowerMessenger,
subgraph_retries: u64,
) -> RoverResult<Self>
where
P: AsRef<Utf8Path>,
Expand All @@ -42,6 +45,8 @@ impl SubgraphSchemaWatcher {
schema_watcher_kind: SubgraphSchemaWatcherKind::File(path.as_ref().to_path_buf()),
subgraph_key,
message_sender,
subgraph_retries,
subgraph_retry_countdown: 0,
})
}

Expand All @@ -51,6 +56,7 @@ impl SubgraphSchemaWatcher {
message_sender: FollowerMessenger,
polling_interval: u64,
headers: Option<HashMap<String, String>>,
subgraph_retries: u64,
subgraph_url: Url,
) -> RoverResult<Self> {
let headers = headers.map(|header_map| header_map.into_iter().collect());
Expand All @@ -64,18 +70,22 @@ impl SubgraphSchemaWatcher {
introspect_runner,
message_sender,
polling_interval,
subgraph_retries,
)
}

pub fn new_from_sdl(
subgraph_key: SubgraphKey,
sdl: String,
message_sender: FollowerMessenger,
subgraph_retries: u64,
) -> RoverResult<Self> {
Ok(Self {
schema_watcher_kind: SubgraphSchemaWatcherKind::Once(sdl),
subgraph_key,
message_sender,
subgraph_retries,
subgraph_retry_countdown: 0,
})
}

Expand All @@ -86,6 +96,7 @@ impl SubgraphSchemaWatcher {
yaml_subgraph_name: String,
message_sender: FollowerMessenger,
client: &StudioClient,
subgraph_retries: u64,
) -> RoverResult<Self> {
// given a graph_ref and subgraph, run subgraph fetch to
// obtain SDL and add it to subgraph_definition.
Expand Down Expand Up @@ -125,6 +136,7 @@ impl SubgraphSchemaWatcher {
(yaml_subgraph_name, routing_url),
response.sdl.contents,
message_sender,
subgraph_retries,
)
}

Expand All @@ -133,6 +145,7 @@ impl SubgraphSchemaWatcher {
introspect_runner: IntrospectRunnerKind,
message_sender: FollowerMessenger,
polling_interval: u64,
subgraph_retries: u64,
) -> RoverResult<Self> {
Ok(Self {
schema_watcher_kind: SubgraphSchemaWatcherKind::Introspect(
Expand All @@ -141,6 +154,8 @@ impl SubgraphSchemaWatcher {
),
subgraph_key,
message_sender,
subgraph_retries,
subgraph_retry_countdown: 0,
})
}

Expand Down Expand Up @@ -184,10 +199,6 @@ impl SubgraphSchemaWatcher {
}

fn update_subgraph(&mut self, last_message: Option<&String>) -> RoverResult<Option<String>> {
let print_error = |e: RoverError| {
let _ = e.print();
};

let maybe_update_message = match self.get_subgraph_definition_and_maybe_new_runner() {
Ok((subgraph_definition, maybe_new_refresher)) => {
if let Some(new_refresher) = maybe_new_refresher {
Expand All @@ -196,30 +207,49 @@ impl SubgraphSchemaWatcher {
match last_message {
Some(last_message) => {
if &subgraph_definition.sdl != last_message {
if self.subgraph_retry_countdown < self.subgraph_retries {
eprintln!(
"{} subgraph connectivity restored for {}",
Emoji::Reload,
self.subgraph_key.0
)
}
self.message_sender.update_subgraph(&subgraph_definition)?;
}
}
None => {
self.message_sender.add_subgraph(&subgraph_definition)?;
}
}
self.subgraph_retry_countdown = self.subgraph_retries;
Some(subgraph_definition.sdl)
}
Err(e) => {
let error_str = e.to_string();
match last_message {
Some(prev_message) => {
if &error_str != prev_message {
print_error(e);
self.message_sender.remove_subgraph(&self.subgraph_key.0)?;
}
}
None => {
print_error(e);
let _ = self.message_sender.remove_subgraph(&self.subgraph_key.0);
}
// `subgraph-retries` can be set by the user away from the default value of 0,
// this defaults to Rover's current behaviour.
//
// If a user does set this value to a non-zero one, and we get a non-retryable error
// from one of our subgraphs, we'll retain the old schema we had and continue
// operation. This will happen until the countdown hits 0 at which point the
// subgraph will be disconnected from the supergraph.
//
// Every time we successfully communicate with the subgraph we set the countdown
// back to the maximum value.
//
if self.subgraph_retry_countdown > 0 {
self.subgraph_retry_countdown -= 1;
eprintln!("{} error detected communicating with subgraph '{}', schema changes will not be reflected.\nWill retry but subgraph logs should be inspected", Emoji::Warn, &self.subgraph_key.0);
eprintln!("Error: {:}", e);
Some(e.to_string())
} else {
eprintln!(
"{} retries exhausted for subgraph {}. To add more run `rover dev` with the --subgraph-retries flag.",
Emoji::Stop,
&self.subgraph_key.0,
);
self.message_sender.remove_subgraph(&self.subgraph_key.0)?;
None
}
Some(error_str)
}
};

Expand Down
6 changes: 6 additions & 0 deletions src/options/subgraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ pub struct OptionalSubgraphOpts {
)]
#[serde(skip_serializing)]
pub subgraph_polling_interval: u64,

/// The number of times to retry a subgraph if an error is detected from it
/// The default value is 0.
#[arg(long = "subgraph-retries", short = 'r', default_value = "0")]
#[serde(skip_serializing)]
pub subgraph_retries: u64,
}

#[cfg(feature = "composition-js")]
Expand Down

0 comments on commit da07a74

Please sign in to comment.