diff --git a/app/app.go b/app/app.go index 09369bdec2eb..570c3dc2c9da 100644 --- a/app/app.go +++ b/app/app.go @@ -21,59 +21,68 @@ import ( nodeconfig "github.com/ava-labs/avalanchego/config/node" ) -const Header = ` _____ .__ .__ - / _ \___ _______ | | _____ ____ ____ | |__ ____ ,_ o - / /_\ \ \/ /\__ \ | | \__ \ / \_/ ___\| | \_/ __ \ / //\, - / | \ / / __ \| |__/ __ \| | \ \___| Y \ ___/ \>> | - \____|__ /\_/ (____ /____(____ /___| /\___ >___| /\___ > \\ - \/ \/ \/ \/ \/ \/ \/` +// Header is a standard ASCII art display for the application startup. +const Header = ` _____ __ .__ + / _ \___ _______ | | _____ ____ | |__ ____ + / /_\ \ \/ /\__ \ | | \__ \ / \_/ ___\| | \_/ __ \ + / | \ / / __ \| |__/ __ \| | \ \___| | \ ___/ + \____|__ /\_/ (____ /____(____ /___| /\___ >___| /\___ > + \/ \/ \/ \/ \/ \/ \/` var _ App = (*app)(nil) +// App defines the interface for the main application lifecycle management. type App interface { - // Start kicks off the application and returns immediately. - // Start should only be called once. + // Start begins the application's main logic (the node) and returns immediately. Start() - // Stop notifies the application to exit and returns immediately. - // Stop should only be called after [Start]. - // It is safe to call Stop multiple times. + // Stop signals the application to gracefully shut down. + // It's safe to call Stop multiple times. Stop() - // ExitCode should only be called after [Start] returns. It - // should block until the application finishes + // ExitCode blocks until the application finishes and returns the exit status. ExitCode() int } +// New initializes the application wrapper around a node. func New(config nodeconfig.Config) (App, error) { - // Set the data directory permissions to be read write. + // 1. Set required permissions on data directories. if err := perms.ChmodR(config.DatabaseConfig.Path, true, perms.ReadWriteExecute); err != nil { - return nil, fmt.Errorf("failed to restrict the permissions of the database directory with: %w", err) + return nil, fmt.Errorf("failed to restrict permissions of the database directory: %w", err) } if err := perms.ChmodR(config.LoggingConfig.Directory, true, perms.ReadWriteExecute); err != nil { - return nil, fmt.Errorf("failed to restrict the permissions of the log directory with: %w", err) + return nil, fmt.Errorf("failed to restrict permissions of the log directory: %w", err) } logFactory := logging.NewFactory(config.LoggingConfig) + // Use defer to ensure factory resources are closed on initialization failure. + defer func() { + if r := recover(); r != nil { + logFactory.Close() + panic(r) + } + }() + log, err := logFactory.Make("main") if err != nil { - logFactory.Close() + logFactory.Close() // Explicitly close if log initialization fails return nil, fmt.Errorf("failed to initialize log: %w", err) } - // update fd limit + // 2. Update file descriptor limit. fdLimit := config.FdLimit if err := ulimit.Set(fdLimit, log); err != nil { - log.Fatal("failed to set fd-limit", - zap.Error(err), - ) + // Log the failure, but return the error to the caller for graceful exit + log.Error("failed to set fd-limit", zap.Error(err)) logFactory.Close() return nil, err } + // 3. Initialize the core node. n, err := node.New(&config, logFactory, log) if err != nil { - log.Fatal("failed to initialize node", zap.Error(err)) + // Log.Stop() is implicitly called by the defer in Start on error, but good practice to close resources. + log.Error("failed to initialize node", zap.Error(err)) log.Stop() logFactory.Close() return nil, fmt.Errorf("failed to initialize node: %w", err) @@ -86,18 +95,19 @@ func New(config nodeconfig.Config) (App, error) { }, nil } +// Run executes the application's main loop, handling system signals. func Run(app App) int { - // start running the application + // Start the application's business logic. app.Start() - // register terminationSignals to kill the application + // Setup channels to catch termination signals (SIGINT, SIGTERM) and stack trace signal (SIGABRT). terminationSignals := make(chan os.Signal, 1) signal.Notify(terminationSignals, syscall.SIGINT, syscall.SIGTERM) stackTraceSignal := make(chan os.Signal, 1) signal.Notify(stackTraceSignal, syscall.SIGABRT) - // start up a new go routine to handle attempts to kill the application + // Goroutine to handle termination signals: calls Stop() on the application. go func() { for range terminationSignals { app.Stop() @@ -105,58 +115,56 @@ func Run(app App) int { } }() - // start a goroutine to listen on SIGABRT signals, - // to print the stack trace to standard error. + // Goroutine to handle SIGABRT: prints the current stack trace to stderr. go func() { for range stackTraceSignal { fmt.Fprint(os.Stderr, utils.GetStacktrace(true)) } }() - // wait for the app to exit and get the exit code response + // Block and wait for the application to exit gracefully, then retrieve the exit code. exitCode := app.ExitCode() - // shut down the termination signal go routine + // Clean up signal handlers and channels. signal.Stop(terminationSignals) close(terminationSignals) - - // shut down the stack trace go routine signal.Stop(stackTraceSignal) close(stackTraceSignal) - // return the exit code that the application reported return exitCode } -// app is a wrapper around a node that runs in this process +// app is a process wrapper managing the lifecycle of an AvalancheGo node. type app struct { node *node.Node log logging.Logger logFactory logging.Factory - exitWG sync.WaitGroup + exitWG sync.WaitGroup // Waits for the internal node goroutine to finish. } -// Start the business logic of the node (as opposed to config reading, etc). -// Does not block until the node is done. +// Start kicks off the node's main dispatch loop in a new goroutine. func (a *app) Start() { - // [p.ExitCode] will block until [p.exitWG.Done] is called + // ExitCode() will block until a.exitWG.Done() is called. a.exitWG.Add(1) + go func() { + // Outer defer: executes after the inner defer/panic, cleans up resources. defer func() { if r := recover(); r != nil { + // Log the panic that occurred outside of the node's dispatch. fmt.Println("caught panic", r) } a.log.Stop() a.logFactory.Close() a.exitWG.Done() }() + + // Inner defer: executes first on panic, calls StopOnPanic to ensure logs are flushed. defer func() { - // If [p.node.Dispatch()] panics, then we should log the panic and - // then re-raise the panic. This is why the above defer is broken - // into two parts. a.log.StopOnPanic() }() + // Start the node's main event loop. err := a.node.Dispatch() a.log.Debug("dispatch returned", zap.Error(err), @@ -164,14 +172,14 @@ func (a *app) Start() { }() } -// Stop attempts to shutdown the currently running node. This function will -// block until Shutdown returns. +// Stop attempts to gracefully shut down the running node. func (a *app) Stop() { + // The shutdown timeout is set to 0, which typically means immediate/default shutdown. a.node.Shutdown(0) } -// ExitCode returns the exit code that the node is reporting. This function -// blocks until the node has been shut down. +// ExitCode blocks until the application's main goroutine (Dispatch) has finished +// and returns the node's reported exit status. func (a *app) ExitCode() int { a.exitWG.Wait() return a.node.ExitCode()