Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 52 additions & 44 deletions app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,59 +21,68 @@ import (
nodeconfig "github.com/ava-labs/avalanchego/config/node"
)

const Header = ` _____ .__ .__
/ _ \___ _______ | | _____ ____ ____ | |__ ____ ,_ o
/ /_\ \ \/ /\__ \ | | \__ \ / \_/ ___\| | \_/ __ \ / //\,
/ | \ / / __ \| |__/ __ \| | \ \___| Y \ ___/ \>> |
\____|__ /\_/ (____ /____(____ /___| /\___ >___| /\___ > \\
\/ \/ \/ \/ \/ \/ \/`
// Header is a standard ASCII art display for the application startup.
const Header = ` _____ __ .__
/ _ \___ _______ | | _____ ____ | |__ ____
/ /_\ \ \/ /\__ \ | | \__ \ / \_/ ___\| | \_/ __ \
/ | \ / / __ \| |__/ __ \| | \ \___| | \ ___/
\____|__ /\_/ (____ /____(____ /___| /\___ >___| /\___ >
\/ \/ \/ \/ \/ \/ \/`

var _ App = (*app)(nil)

// App defines the interface for the main application lifecycle management.
type App interface {
// Start kicks off the application and returns immediately.
// Start should only be called once.
// Start begins the application's main logic (the node) and returns immediately.
Start()

// Stop notifies the application to exit and returns immediately.
// Stop should only be called after [Start].
// It is safe to call Stop multiple times.
// Stop signals the application to gracefully shut down.
// It's safe to call Stop multiple times.
Stop()

// ExitCode should only be called after [Start] returns. It
// should block until the application finishes
// ExitCode blocks until the application finishes and returns the exit status.
ExitCode() int
}

// New initializes the application wrapper around a node.
func New(config nodeconfig.Config) (App, error) {
// Set the data directory permissions to be read write.
// 1. Set required permissions on data directories.
if err := perms.ChmodR(config.DatabaseConfig.Path, true, perms.ReadWriteExecute); err != nil {
return nil, fmt.Errorf("failed to restrict the permissions of the database directory with: %w", err)
return nil, fmt.Errorf("failed to restrict permissions of the database directory: %w", err)
}
if err := perms.ChmodR(config.LoggingConfig.Directory, true, perms.ReadWriteExecute); err != nil {
return nil, fmt.Errorf("failed to restrict the permissions of the log directory with: %w", err)
return nil, fmt.Errorf("failed to restrict permissions of the log directory: %w", err)
}

logFactory := logging.NewFactory(config.LoggingConfig)
// Use defer to ensure factory resources are closed on initialization failure.
defer func() {
if r := recover(); r != nil {
logFactory.Close()
panic(r)
}
}()

log, err := logFactory.Make("main")
if err != nil {
logFactory.Close()
logFactory.Close() // Explicitly close if log initialization fails
return nil, fmt.Errorf("failed to initialize log: %w", err)
}

// update fd limit
// 2. Update file descriptor limit.
fdLimit := config.FdLimit
if err := ulimit.Set(fdLimit, log); err != nil {
log.Fatal("failed to set fd-limit",
zap.Error(err),
)
// Log the failure, but return the error to the caller for graceful exit
log.Error("failed to set fd-limit", zap.Error(err))
logFactory.Close()
return nil, err
}

// 3. Initialize the core node.
n, err := node.New(&config, logFactory, log)
if err != nil {
log.Fatal("failed to initialize node", zap.Error(err))
// Log.Stop() is implicitly called by the defer in Start on error, but good practice to close resources.
log.Error("failed to initialize node", zap.Error(err))
log.Stop()
logFactory.Close()
return nil, fmt.Errorf("failed to initialize node: %w", err)
Expand All @@ -86,92 +95,91 @@ func New(config nodeconfig.Config) (App, error) {
}, nil
}

// Run executes the application's main loop, handling system signals.
func Run(app App) int {
// start running the application
// Start the application's business logic.
app.Start()

// register terminationSignals to kill the application
// Setup channels to catch termination signals (SIGINT, SIGTERM) and stack trace signal (SIGABRT).
terminationSignals := make(chan os.Signal, 1)
signal.Notify(terminationSignals, syscall.SIGINT, syscall.SIGTERM)

stackTraceSignal := make(chan os.Signal, 1)
signal.Notify(stackTraceSignal, syscall.SIGABRT)

// start up a new go routine to handle attempts to kill the application
// Goroutine to handle termination signals: calls Stop() on the application.
go func() {
for range terminationSignals {
app.Stop()
return
}
}()

// start a goroutine to listen on SIGABRT signals,
// to print the stack trace to standard error.
// Goroutine to handle SIGABRT: prints the current stack trace to stderr.
go func() {
for range stackTraceSignal {
fmt.Fprint(os.Stderr, utils.GetStacktrace(true))
}
}()

// wait for the app to exit and get the exit code response
// Block and wait for the application to exit gracefully, then retrieve the exit code.
exitCode := app.ExitCode()

// shut down the termination signal go routine
// Clean up signal handlers and channels.
signal.Stop(terminationSignals)
close(terminationSignals)

// shut down the stack trace go routine
signal.Stop(stackTraceSignal)
close(stackTraceSignal)

// return the exit code that the application reported
return exitCode
}

// app is a wrapper around a node that runs in this process
// app is a process wrapper managing the lifecycle of an AvalancheGo node.
type app struct {
node *node.Node
log logging.Logger
logFactory logging.Factory
exitWG sync.WaitGroup
exitWG sync.WaitGroup // Waits for the internal node goroutine to finish.
}

// Start the business logic of the node (as opposed to config reading, etc).
// Does not block until the node is done.
// Start kicks off the node's main dispatch loop in a new goroutine.
func (a *app) Start() {
// [p.ExitCode] will block until [p.exitWG.Done] is called
// ExitCode() will block until a.exitWG.Done() is called.
a.exitWG.Add(1)

go func() {
// Outer defer: executes after the inner defer/panic, cleans up resources.
defer func() {
if r := recover(); r != nil {
// Log the panic that occurred outside of the node's dispatch.
fmt.Println("caught panic", r)
}
a.log.Stop()
a.logFactory.Close()
a.exitWG.Done()
}()

// Inner defer: executes first on panic, calls StopOnPanic to ensure logs are flushed.
defer func() {
// If [p.node.Dispatch()] panics, then we should log the panic and
// then re-raise the panic. This is why the above defer is broken
// into two parts.
a.log.StopOnPanic()
}()

// Start the node's main event loop.
err := a.node.Dispatch()
a.log.Debug("dispatch returned",
zap.Error(err),
)
}()
}

// Stop attempts to shutdown the currently running node. This function will
// block until Shutdown returns.
// Stop attempts to gracefully shut down the running node.
func (a *app) Stop() {
// The shutdown timeout is set to 0, which typically means immediate/default shutdown.
a.node.Shutdown(0)
}

// ExitCode returns the exit code that the node is reporting. This function
// blocks until the node has been shut down.
// ExitCode blocks until the application's main goroutine (Dispatch) has finished
// and returns the node's reported exit status.
func (a *app) ExitCode() int {
a.exitWG.Wait()
return a.node.ExitCode()
Expand Down