Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean Shutdown on Debug Builds (Valgrind-friendly) #2535

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 80 additions & 42 deletions src/api_server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,71 +198,109 @@ impl ApiServer {
}

server.start_server().expect("Cannot start HTTP server");

loop {
match server.requests() {
Ok(request_vec) => {
for server_request in request_vec {
let request_processing_start_us =
utils::time::get_time_us(utils::time::ClockType::Monotonic);
server
.respond(
// Use `self.handle_request()` as the processing callback.
server_request.process(|request| {
self.handle_request(request, request_processing_start_us)
}),
)
.or_else(|e| {
error!("API Server encountered an error on response: {}", e);
Ok(())
})?;
let delta_us = utils::time::get_time_us(utils::time::ClockType::Monotonic)
- request_processing_start_us;
debug!("Total previous API call duration: {} us.", delta_us);
if self.vmm_fatal_error {
// Flush the remaining outgoing responses
// and proceed to exit
server.flush_outgoing_writes();
error!(
"Fatal error with exit code: {}",
FC_EXIT_CODE_BAD_CONFIGURATION
);
unsafe {
libc::_exit(i32::from(FC_EXIT_CODE_BAD_CONFIGURATION));
}
}
}
}
Err(e) => {
let request_vec = match server.requests() {
Ok(vec) => vec,
Err(e) => { // print request error, but keep server running
error!(
"API Server error on retrieving incoming request. Error: {}",
e
);
continue
}
};

for server_request in request_vec {
let request_processing_start_us =
utils::time::get_time_us(utils::time::ClockType::Monotonic);

// !!! ServerRequest::process() doesn't appear to do anything besides put a private
// id field into the response. But since it takes an inner function it cannot
// return from this function without giving a response. Review a better way of
// exiting cleanly than by reaching out of the closure to set a boolean.

let mut shutting_down = false;

server
.respond(
server_request.process(|request| {
match self.handle_request_or_finishing(request, request_processing_start_us) {
Some(response) => response,
None => {
shutting_down = true;
Response::new(Version::Http11, StatusCode::NoContent)
}
}
}),
)
.or_else(|e| {
error!("API Server encountered an error on response: {}", e);
Ok(())
})?;

let delta_us = utils::time::get_time_us(utils::time::ClockType::Monotonic)
- request_processing_start_us;
debug!("Total previous API call duration: {} us.", delta_us);

if shutting_down {
debug!("/shutdown-internal request received, API server thread now ending itself");
return Ok(());
}

if self.vmm_fatal_error {
// Flush the remaining outgoing responses
// and proceed to exit
server.flush_outgoing_writes();
error!(
"Fatal error with exit code: {}",
FC_EXIT_CODE_BAD_CONFIGURATION
);
unsafe {
libc::_exit(i32::from(FC_EXIT_CODE_BAD_CONFIGURATION));
}
}
}
}
}

/// Handles an API request received through the associated socket.
pub fn handle_request(
/// If None is given back, it means a ShutdownInternal was requested and no response
/// is expected (should be requested by the main thread, not by clients)
pub fn handle_request_or_finishing(
&mut self,
request: &Request,
request_processing_start_us: u64,
) -> Response {
) -> Option<Response> {
match ParsedRequest::try_from_request(request) {
Ok(ParsedRequest::Sync(vmm_action)) => {
self.serve_vmm_action_request(vmm_action, request_processing_start_us)
Some(self.serve_vmm_action_request(vmm_action, request_processing_start_us))
}
Ok(ParsedRequest::GetInstanceInfo) => self.get_instance_info(),
Ok(ParsedRequest::GetMMDS) => self.get_mmds(),
Ok(ParsedRequest::PatchMMDS(value)) => self.patch_mmds(value),
Ok(ParsedRequest::PutMMDS(value)) => self.put_mmds(value),
Ok(ParsedRequest::GetInstanceInfo) => Some(self.get_instance_info()),
Ok(ParsedRequest::GetMMDS) => Some(self.get_mmds()),
Ok(ParsedRequest::PatchMMDS(value)) => Some(self.patch_mmds(value)),
Ok(ParsedRequest::PutMMDS(value)) => Some(self.put_mmds(value)),

#[cfg(debug_assertions)]
Ok(ParsedRequest::ShutdownInternal) => None,

Err(e) => {
error!("{}", e);
e.into()
Some(e.into())
}
}
}

/// Variant of handle_request that is used by tests, and does not know about the
/// ShutdownInternal mechanism. So the response is non-optional.
pub fn handle_request(
&mut self,
request: &Request,
request_processing_start_us: u64,
) -> Response {
self.handle_request_or_finishing(request, request_processing_start_us).unwrap()
}

fn serve_vmm_action_request(
&mut self,
vmm_action: Box<VmmAction>,
Expand Down
13 changes: 13 additions & 0 deletions src/api_server/src/parsed_request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ pub(crate) enum ParsedRequest {
PatchMMDS(Value),
PutMMDS(Value),
Sync(Box<VmmAction>),

#[cfg(debug_assertions)]
ShutdownInternal, // !!! not an API, used by shutdown to thread::join the API thread
}

impl ParsedRequest {
Expand All @@ -57,6 +60,16 @@ impl ParsedRequest {

match (request.method(), path, request.body.as_ref()) {
(Method::Get, "", None) => parse_get_instance_info(),

#[cfg(debug_assertions)]
(Method::Get, "shutdown-internal", None) => {
//
// This isn't a user-facing API, and was added solely to facilitate clean shutdowns.
// Calling it manually will cause problems, so only enable it in debug builds.
//
Ok(ParsedRequest::ShutdownInternal)
},

(Method::Get, "balloon", None) => parse_get_balloon(path_tokens.get(1)),
(Method::Get, "machine-config", None) => parse_get_machine_config(),
(Method::Get, "mmds", None) => parse_get_mmds(),
Expand Down
45 changes: 37 additions & 8 deletions src/firecracker/src/api_server_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@ use std::{
sync::mpsc::{channel, Receiver, Sender, TryRecvError},
sync::{Arc, Mutex},
thread,

os::unix::net::UnixStream, // for main thread to send ShutdownInternal to API thread
io::prelude::*, // UnixStream write_all() requires prelude
};

use api_server::{ApiRequest, ApiResponse, ApiServer};
use logger::{error, warn, ProcessTimeReporter};
use mmds::MMDS;
use polly::event_manager::{EventManager, Subscriber};
use polly::event_manager::{EventManager, Subscriber, ExitCode};
use seccomp::BpfProgram;
use utils::{
epoll::{EpollEvent, EventSet},
Expand Down Expand Up @@ -43,7 +46,7 @@ impl ApiServerAdapter {
vm_resources: VmResources,
vmm: Arc<Mutex<Vmm>>,
event_manager: &mut EventManager,
) {
) -> ExitCode {
let api_adapter = Arc::new(Mutex::new(Self {
api_event_fd,
from_api,
Expand All @@ -53,10 +56,13 @@ impl ApiServerAdapter {
event_manager
.add_subscriber(api_adapter)
.expect("Cannot register the api event to the event manager.");

loop {
event_manager
.run()
let opt_exit_code = event_manager
.run_maybe_exiting()
.expect("EventManager events driver fatal error");

if let Some(exit_code) = opt_exit_code { return exit_code; }
}
}

Expand Down Expand Up @@ -127,7 +133,7 @@ pub(crate) fn run_with_api(
instance_info: InstanceInfo,
process_time_reporter: ProcessTimeReporter,
boot_timer_enabled: bool,
) {
) -> ExitCode {
// FD to notify of API events. This is a blocking eventfd by design.
// It is used in the config/pre-boot loop which is a simple blocking loop
// which only consumes API events.
Expand All @@ -143,9 +149,10 @@ pub(crate) fn run_with_api(
.try_clone()
.expect("Failed to clone API event FD");

let api_bind_path = bind_path.clone();
let api_seccomp_filter = seccomp_filter.clone();
// Start the separate API thread.
thread::Builder::new()
let api_thread = thread::Builder::new()
.name("fc_api".to_owned())
.spawn(move || {
mask_handled_signals().expect("Unable to install signal mask on API thread.");
Expand All @@ -157,7 +164,7 @@ pub(crate) fn run_with_api(
from_vmm,
to_vmm_event_fd,
)
.bind_and_run(bind_path, process_time_reporter, api_seccomp_filter)
.bind_and_run(api_bind_path, process_time_reporter, api_seccomp_filter)
{
Ok(_) => (),
Err(api_server::Error::Io(inner)) => match inner.kind() {
Expand Down Expand Up @@ -235,12 +242,34 @@ pub(crate) fn run_with_api(
.expect("Poisoned lock")
.start(super::metrics::WRITE_METRICS_PERIOD_MS);

ApiServerAdapter::run_microvm(
let exit_code = ApiServerAdapter::run_microvm(
api_event_fd,
from_api,
to_api,
vm_resources,
vmm,
&mut event_manager,
);

// Note: In the release build, this is never reached...because exit() is called
// abruptly (the OS does faster cleanup, and it reduces the risk of hanging).
// Top level main() will complain if the bubbling process happens in release builds.

// We want to tell the API thread to shut down for a clean exit. But this is after
// the Vmm.stop() has been called, so it's a moment of internal finalization (as
// opposed to be something the client might call to shut the Vm down). Since it's
// an internal signal implementing it with an HTTP request is probably not the ideal
// way to do it...but having another way would involve waiting on the socket or some
// other signal. This leverages the existing wait.
//
let mut sock = UnixStream::connect(bind_path).unwrap();
assert!(sock.write_all(b"GET /shutdown-internal HTTP/1.1\r\n\r\n").is_ok());

// This call to thread::join() should block until the API thread has processed the
// shutdown-internal and returns from its function. If it doesn't block here, then
// that means it got the message; so no need to process a response.
//
api_thread.join().unwrap();

exit_code
}
Loading