diff --git a/src/agent/onefuzz-telemetry/src/lib.rs b/src/agent/onefuzz-telemetry/src/lib.rs index 9d8ba22481..025784d683 100644 --- a/src/agent/onefuzz-telemetry/src/lib.rs +++ b/src/agent/onefuzz-telemetry/src/lib.rs @@ -12,6 +12,19 @@ pub enum ClientType { Shared, } +#[derive(Clone, Debug, PartialEq)] +pub enum Role { + Proxy, +} + +impl Role { + pub fn as_str(&self) -> &'static str { + match self { + Self::Proxy => "proxy", + } + } +} + #[allow(non_camel_case_types)] #[derive(Clone, Debug)] pub enum Event { @@ -75,6 +88,7 @@ pub enum EventData { CoverageMaxDepth(u64), ToolName(String), Region(String), + Role(Role), } impl EventData { @@ -111,6 +125,7 @@ impl EventData { Self::Coverage(x) => ("coverage", x.to_string()), Self::ToolName(x) => ("tool_name", x.to_owned()), Self::Region(x) => ("region", x.to_owned()), + Self::Role(x) => ("role", x.as_str().to_owned()), } } @@ -149,6 +164,7 @@ impl EventData { Self::Coverage(_) => true, Self::ToolName(_) => true, Self::Region(_) => false, + Self::Role(_) => false, } } } diff --git a/src/api-service/__app__/onefuzzlib/proxy.py b/src/api-service/__app__/onefuzzlib/proxy.py index 11d1a8a877..60368f388c 100644 --- a/src/api-service/__app__/onefuzzlib/proxy.py +++ b/src/api-service/__app__/onefuzzlib/proxy.py @@ -36,6 +36,7 @@ PROXY_SKU = "Standard_B2s" PROXY_IMAGE = "Canonical:UbuntuServer:18.04-LTS:latest" +PROXY_LOG_PREFIX = "scaleset-proxy: " # This isn't intended to ever be shared to the client, hence not being in @@ -101,7 +102,7 @@ def set_failed(self, error: Error) -> None: if self.error is not None: return - logging.error("proxy vm failed: %s - %s", self.region, error) + logging.error(PROXY_LOG_PREFIX + "vm failed: %s - %s", self.region, error) send_event(EventProxyFailed(region=self.region, error=error)) self.error = error self.state = VmState.stopping @@ -142,19 +143,19 @@ def extensions_launch(self) -> None: def stopping(self) -> None: vm = self.get_vm() if not vm.is_deleted(): - logging.info("stopping proxy: %s", self.region) + logging.info(PROXY_LOG_PREFIX + "stopping proxy: %s", self.region) vm.delete() self.save() else: self.stopped() def stopped(self) -> None: - logging.info("removing proxy: %s", self.region) + logging.info(PROXY_LOG_PREFIX + "removing proxy: %s", self.region) self.delete() def is_used(self) -> bool: if len(self.get_forwards()) == 0: - logging.info("proxy has no forwards: %s", self.region) + logging.info(PROXY_LOG_PREFIX + "no forwards: %s", self.region) return False return True @@ -170,13 +171,21 @@ def is_alive(self) -> bool: and self.heartbeat.timestamp < ten_minutes_ago_no_tz ): logging.error( - "proxy last heartbeat is more than an 10 minutes old: %s", self.region + PROXY_LOG_PREFIX + "last heartbeat is more than an 10 minutes old: " + "%s - last heartbeat:%s compared_to:%s", + self.region, + self.heartbeat, + ten_minutes_ago_no_tz, ) return False elif not self.heartbeat and self.Timestamp and self.Timestamp < ten_minutes_ago: logging.error( - "proxy has no heartbeat in the last 10 minutes: %s", self.region + PROXY_LOG_PREFIX + "no heartbeat in the last 10 minutes: " + "%s timestamp: %s compared_to:%s", + self.region, + self.Timestamp, + ten_minutes_ago, ) return False @@ -237,6 +246,12 @@ def get_or_create(cls, region: Region) -> Optional["Proxy"]: proxy = Proxy.get(region) if proxy is not None: if proxy.version != __version__: + logging.info( + PROXY_LOG_PREFIX + "mismatch version: proxy:%s service:%s state:%s", + proxy.version, + __version__, + proxy.state, + ) if proxy.state != VmState.stopping: # If the proxy is out-of-date, delete and re-create it proxy.state = VmState.stopping @@ -244,6 +259,7 @@ def get_or_create(cls, region: Region) -> Optional["Proxy"]: return None return proxy + logging.info(PROXY_LOG_PREFIX + "creating proxy: region:%s", region) proxy = Proxy(region=region) proxy.save() send_event(EventProxyCreated(region=region)) diff --git a/src/api-service/__app__/queue_proxy_update/__init__.py b/src/api-service/__app__/queue_proxy_update/__init__.py index 8d4904eccf..93a141cf19 100644 --- a/src/api-service/__app__/queue_proxy_update/__init__.py +++ b/src/api-service/__app__/queue_proxy_update/__init__.py @@ -10,17 +10,19 @@ from onefuzztypes.models import ProxyHeartbeat from ..onefuzzlib.events import get_events -from ..onefuzzlib.proxy import Proxy +from ..onefuzzlib.proxy import PROXY_LOG_PREFIX, Proxy def main(msg: func.QueueMessage, dashboard: func.Out[str]) -> None: body = msg.get_body() - logging.info("proxy heartbeat: %s", body) + logging.info(PROXY_LOG_PREFIX + "heartbeat: %s", body) raw = json.loads(body) heartbeat = ProxyHeartbeat.parse_obj(raw) proxy = Proxy.get(heartbeat.region) if proxy is None: - logging.warning("received heartbeat for missing proxy: %s", body) + logging.warning( + PROXY_LOG_PREFIX + "received heartbeat for missing proxy: %s", body + ) return proxy.heartbeat = heartbeat proxy.save() diff --git a/src/api-service/__app__/timer_proxy/__init__.py b/src/api-service/__app__/timer_proxy/__init__.py index aa413d416a..4b062aafdf 100644 --- a/src/api-service/__app__/timer_proxy/__init__.py +++ b/src/api-service/__app__/timer_proxy/__init__.py @@ -10,7 +10,7 @@ from ..onefuzzlib.events import get_events from ..onefuzzlib.orm import process_state_updates -from ..onefuzzlib.proxy import Proxy +from ..onefuzzlib.proxy import PROXY_LOG_PREFIX, Proxy def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: F841 @@ -18,14 +18,20 @@ def main(mytimer: func.TimerRequest, dashboard: func.Out[str]) -> None: # noqa: # a proxy, the plan is: delete and recreate it. for proxy in Proxy.search(): if not proxy.is_alive(): - logging.error("proxy alive check failed, stopping: %s", proxy.region) + logging.error( + PROXY_LOG_PREFIX + "alive check failed, stopping: %s", proxy.region + ) proxy.state = VmState.stopping proxy.save() else: proxy.save_proxy_config() if proxy.state in VmState.needs_work(): - logging.info("update proxy vm: %s", proxy.region) + logging.info( + PROXY_LOG_PREFIX + "update state. proxy:%s state:%s", + proxy.region, + proxy.state, + ) process_state_updates(proxy) events = get_events() diff --git a/src/cli/onefuzz/debug.py b/src/cli/onefuzz/debug.py index 7faab0f1ed..568d5f92f4 100644 --- a/src/cli/onefuzz/debug.py +++ b/src/cli/onefuzz/debug.py @@ -105,9 +105,9 @@ def _get_proxy_setup( scaleset_id, machine_id, port, duration=duration ) if proxy.ip is None: - return (False, "waiting on proxy", None) + return (False, "waiting on proxy ip", None) - return (True, "waiting on proxy", (proxy.ip, proxy.forward.src_port)) + return (True, "waiting on proxy port", (proxy.ip, proxy.forward.src_port)) def rdp( self, diff --git a/src/proxy-manager/src/config.rs b/src/proxy-manager/src/config.rs index d5733227e0..55cbbbfe81 100644 --- a/src/proxy-manager/src/config.rs +++ b/src/proxy-manager/src/config.rs @@ -3,7 +3,7 @@ use crate::proxy; use anyhow::Result; -use onefuzz_telemetry::{set_appinsights_clients, EventData}; +use onefuzz_telemetry::{set_appinsights_clients, EventData, Role}; use reqwest_retry::SendRetry; use serde::{Deserialize, Serialize}; use std::{fs::File, io::BufReader, path::PathBuf}; @@ -78,6 +78,7 @@ impl Config { onefuzz_telemetry::set_property(EventData::Region(data.region.to_owned())); onefuzz_telemetry::set_property(EventData::Version(env!("ONEFUZZ_VERSION").to_string())); onefuzz_telemetry::set_property(EventData::InstanceId(data.instance_id)); + onefuzz_telemetry::set_property(EventData::Role(Role::Proxy)); Ok(Self { config_path, @@ -129,6 +130,7 @@ impl Config { } pub async fn notify(&self) -> Result<()> { + info!("notifying service of proxy update"); let client = QueueClient::new(self.data.notification.clone()); client diff --git a/src/proxy-manager/src/main.rs b/src/proxy-manager/src/main.rs index c631c4d15a..d49d94f73c 100644 --- a/src/proxy-manager/src/main.rs +++ b/src/proxy-manager/src/main.rs @@ -26,7 +26,7 @@ use tokio::{ const MINIMUM_NOTIFY_INTERVAL: Duration = Duration::from_secs(120); const POLL_INTERVAL: Duration = Duration::from_secs(5); -async fn run(mut proxy_config: Config) -> Result<()> { +async fn run_loop(mut proxy_config: Config) -> Result<()> { let mut last_notified = Instant::now(); loop { info!("checking updates"); @@ -41,6 +41,15 @@ async fn run(mut proxy_config: Config) -> Result<()> { } } +async fn run(proxy_config: Config) -> Result<()> { + let result = run_loop(proxy_config).await; + if let Err(err) = &result { + error!("run loop failed: {}", err); + } + onefuzz_telemetry::try_flush_and_close(); + result +} + fn main() -> Result<()> { env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();