Skip to content

Commit

Permalink
Respect telemetry configuration when running as a Windows service
Browse files Browse the repository at this point in the history
  • Loading branch information
pjanotti committed Mar 8, 2024
1 parent 2413346 commit 827ec2a
Show file tree
Hide file tree
Showing 6 changed files with 267 additions and 11 deletions.
25 changes: 25 additions & 0 deletions .chloggen/windows-service-respects-telemetry-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Use this changelog template to create an entry for release notes.

# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: 'bug_fix'

# The name of the component, or a single word describing the area of concern, (e.g. otlpreceiver)
component: otelcol

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Respect telemetry configuration when running as a Windows service

# One or more tracking issues or pull requests related to the change
issues: [5300]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:

# Optional: The change log or logs in which this entry should be included.
# e.g. '[user]' or '[user, api]'
# Include 'user' if the change is relevant to end users.
# Include 'api' if there is a change to a library API.
# Default: '[user]'
change_logs: [user]
7 changes: 4 additions & 3 deletions .github/workflows/build-and-test-windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,16 @@ jobs:

- name: Install otelcorecol as a service
run: |
New-Service -Name "otelcorecol" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml"
New-Service -Name "otelcorecol" -StartupType "Manual" -BinaryPathName "${PWD}\bin\otelcorecol_windows_amd64 --config ${PWD}\examples\local\otel-config.yaml"
eventcreate.exe /t information /id 1 /l application /d "Creating event provider for 'otelcorecol'" /so otelcorecol
- name: Test otelcorecol service
working-directory: ${{ github.workspace }}/otelcol
run: |
(Start-Service otelcorecol -PassThru).WaitForStatus('Running', '00:00:30')
(Stop-Service otelcorecol -PassThru).WaitForStatus('Stopped', '00:00:30')
go test -timeout 30s -run ^TestCollectorAsService$ -v -tags=win32service
- name: Remove otelcorecol service
if: always()
run: |
Remove-Service otelcorecol
Remove-Item HKLM:\SYSTEM\CurrentControlSet\Services\EventLog\Application\otelcorecol
6 changes: 4 additions & 2 deletions otelcol/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,9 @@ type Collector struct {

configProvider ConfigProvider

service *service.Service
state *atomic.Int32
serviceConfig *service.Config
service *service.Service
state *atomic.Int32

// shutdownChan is used to terminate the collector.
shutdownChan chan struct{}
Expand Down Expand Up @@ -182,6 +183,7 @@ func (col *Collector) setupConfigurationComponents(ctx context.Context) error {
return fmt.Errorf("invalid configuration: %w", err)
}

col.serviceConfig = &cfg.Service
col.service, err = service.New(ctx, service.Settings{
BuildInfo: col.set.BuildInfo,
CollectorConf: conf,
Expand Down
30 changes: 24 additions & 6 deletions otelcol/collector_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"golang.org/x/sys/windows/svc/eventlog"

"go.opentelemetry.io/collector/featuregate"
"go.opentelemetry.io/collector/service"
)

type windowsService struct {
Expand Down Expand Up @@ -76,11 +77,6 @@ func (s *windowsService) Execute(args []string, requests <-chan svc.ChangeReques
}

func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) error {
// Append to new slice instead of the already existing s.settings.LoggingOptions slice to not change that.
s.settings.LoggingOptions = append(
[]zap.Option{zap.WrapCore(withWindowsCore(elog))},
s.settings.LoggingOptions...,
)
// Parse all the flags manually.
if err := s.flags.Parse(os.Args[1:]); err != nil {
return err
Expand All @@ -96,6 +92,18 @@ func (s *windowsService) start(elog *eventlog.Log, colErrorChannel chan error) e
return err
}

// The logging options need to be in place before the collector Run method is called
// since the telemetry creates the logger at the time of the Run method call.
// However, the zap.WrapCore function needs to read the serviceConfig to determine
// if the Windows Event Log should be used, however, the serviceConfig is also
// only read at the time of the Run method call. To work around this, we pass the
// serviceConfig as a pointer to the logging options, and then read its value
// when the zap.Logger is created by the telemetry.
s.col.set.LoggingOptions = append(
s.col.set.LoggingOptions,
zap.WrapCore(withWindowsCore(elog, &s.col.serviceConfig)),
)

// col.Run blocks until receiving a SIGTERM signal, so needs to be started
// asynchronously, but it will exit early if an error occurs on startup
go func() {
Expand Down Expand Up @@ -192,8 +200,18 @@ func (w windowsEventLogCore) Sync() error {
return w.core.Sync()
}

func withWindowsCore(elog *eventlog.Log) func(zapcore.Core) zapcore.Core {
func withWindowsCore(elog *eventlog.Log, serviceConfig **service.Config) func(zapcore.Core) zapcore.Core {
return func(core zapcore.Core) zapcore.Core {
if serviceConfig != nil {
for _, output := range (*serviceConfig).Telemetry.Logs.OutputPaths {
if output != "stdout" && output != "stderr" {
// A log file was specified in the configuration, so we should not use the Windows Event Log
return core
}
}
}

// Use the Windows Event Log
encoderConfig := zap.NewProductionEncoderConfig()
encoderConfig.LineEnding = "\r\n"
return windowsEventLogCore{core, elog, zapcore.NewConsoleEncoder(encoderConfig)}
Expand Down
180 changes: 180 additions & 0 deletions otelcol/collector_windows_service_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:build windows && win32service

package otelcol

import (
"encoding/xml"
"fmt"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/stretchr/testify/require"
"golang.org/x/sys/windows/svc"
"golang.org/x/sys/windows/svc/mgr"
)

const (
collectorServiceName = "otelcorecol"
)

// Test the collector as a Windows service.
// The test assumes that the service and respective event source are already created.
// The test also must be executed with administrative privileges.
func TestCollectorAsService(t *testing.T) {
collector_executable, err := filepath.Abs(filepath.Join("..", "bin", "otelcorecol_windows_amd64"))
require.NoError(t, err)
_, err = os.Stat(collector_executable)
require.NoError(t, err)

scm, err := mgr.Connect()
require.NoError(t, err)
defer scm.Disconnect()

service, err := scm.OpenService(collectorServiceName)
require.NoError(t, err)
defer service.Close()

tests := []struct {
name string
configFile string
expectStartFailure bool
customSetup func(*testing.T)
customValidation func(*testing.T)
}{
{
name: "Default",
configFile: filepath.Join("..", "examples", "local", "otel-config.yaml"),
},
{
name: "ConfigFileNotFound",
configFile: filepath.Join(".", "non", "existent", "otel-config.yaml"),
expectStartFailure: true,
},
{
name: "LogToFile",
configFile: filepath.Join(".", "testdata", "otel-log-to-file.yaml"),
customSetup: func(t *testing.T) {
// Create the folder and clean the log file if it exists
programDataPath := os.Getenv("ProgramData")
logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs")
err := os.MkdirAll(logsPath, os.ModePerm)
require.NoError(t, err)

logFilePath := filepath.Join(logsPath, "otelcol.log")
err = os.Remove(logFilePath)
if err != nil && !os.IsNotExist(err) {
require.NoError(t, err)
}
},
customValidation: func(t *testing.T) {
// Check that the log file was created
programDataPath := os.Getenv("ProgramData")
logsPath := filepath.Join(programDataPath, "OpenTelemetry", "Collector", "Logs")
logFilePath := filepath.Join(logsPath, "otelcol.log")
fileinfo, err := os.Stat(logFilePath)
require.NoError(t, err)
require.NotEmpty(t, fileinfo.Size())
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
serviceConfig, err := service.Config()
require.NoError(t, err)

// Setup the command line to launch the collector as a service
fullConfigPath, err := filepath.Abs(tt.configFile)
require.NoError(t, err)

serviceConfig.BinaryPathName = fmt.Sprintf("\"%s\" --config \"%s\"", collector_executable, fullConfigPath)
err = service.UpdateConfig(serviceConfig)
require.NoError(t, err)

if tt.customSetup != nil {
tt.customSetup(t)
}

startTime := time.Now()

err = service.Start()
require.NoError(t, err)

expectedState := svc.Running
if tt.expectStartFailure {
expectedState = svc.Stopped
} else {
defer func() {
_, err = service.Control(svc.Stop)
require.NoError(t, err)
}()
}

// Wait for the service to reach the expected state
require.Eventually(t, func() bool {
status, _ := service.Query()
return status.State == expectedState
}, 10*time.Second, 500*time.Millisecond)

if tt.customValidation != nil {
tt.customValidation(t)
} else {
// Read the events from the otelcorecol source and check that they were emitted after the service
// command started. This is a simple validation that the messages are being logged on the
// Windows event log.
cmd := exec.Command("wevtutil.exe", "qe", "Application", "/c:1", "/rd:true", "/f:RenderedXml", "/q:*[System[Provider[@Name='otelcorecol']]]")
out, err := cmd.CombinedOutput()
require.NoError(t, err)

var e Event
require.NoError(t, xml.Unmarshal([]byte(out), &e))

eventTime, err := time.Parse("2006-01-02T15:04:05.9999999Z07:00", e.System.TimeCreated.SystemTime)
require.NoError(t, err)

require.True(t, eventTime.After(startTime.In(time.UTC)))
}
})
}
}

// Helper types to read the XML events from the event log using wevtutil
type Event struct {
XMLName xml.Name `xml:"Event"`
System System `xml:"System"`
Data string `xml:"EventData>Data"`
}

type System struct {
Provider Provider `xml:"Provider"`
EventID int `xml:"EventID"`
Version int `xml:"Version"`
Level int `xml:"Level"`
Task int `xml:"Task"`
Opcode int `xml:"Opcode"`
Keywords string `xml:"Keywords"`
TimeCreated TimeCreated `xml:"TimeCreated"`
EventRecordID int `xml:"EventRecordID"`
Execution Execution `xml:"Execution"`
Channel string `xml:"Channel"`
Computer string `xml:"Computer"`
}

type Provider struct {
Name string `xml:"Name,attr"`
}

type TimeCreated struct {
SystemTime string `xml:"SystemTime,attr"`
}

type Execution struct {
ProcessID string `xml:"ProcessID,attr"`
ThreadID string `xml:"ThreadID,attr"`
}
30 changes: 30 additions & 0 deletions otelcol/testdata/otel-log-to-file.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
extensions:
zpages:

receivers:
otlp:
protocols:
grpc:
http:

exporters:
debug:
verbosity: detailed

service:
telemetry:
logs:
level: warn
output_paths:
# The folder need to be created prior to starting the collector
- ${ProgramData}\OpenTelemetry\Collector\Logs\otelcol.log

pipelines:
traces:
receivers: [otlp]
exporters: [debug]
metrics:
receivers: [otlp]
exporters: [debug]

extensions: [zpages]

0 comments on commit 827ec2a

Please sign in to comment.