Skip to content

Commit

Permalink
Add App Pool metrics to IIS module (#70)
Browse files Browse the repository at this point in the history
* Initial attempt at app pool collector

* Rename and tweak

* Attempt to convert uptime to unix timestamp

* Merge app pool metrics with existing IIS modules

* Update comments to reference new class

* Add state label to CurrentApplicationPoolState metric

* Return 0 value metrics for all non-active app states

Return 1 for active state

* Remove unnecessary float cast

* Add calculation for uptime

* Convert iis uptimes to unix timestamps and alter help msg

* Rename application pool uptime to start_time
  • Loading branch information
simonwillcock authored and carlpett committed Apr 26, 2017
1 parent 012e143 commit 9d51525
Showing 1 changed file with 259 additions and 2 deletions.
261 changes: 259 additions & 2 deletions collector/iis.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
// returns data points from Win32_PerfRawData_W3SVC_WebService
// returns data points from the following classes:
// - Win32_PerfRawData_W3SVC_WebService
// https://msdn.microsoft.com/en-us/library/aa394345 - Win32_OperatingSystem class
// - Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS

package collector

Expand All @@ -20,9 +22,12 @@ func init() {
var (
siteWhitelist = flag.String("collector.iis.site-whitelist", ".+", "Regexp of sites to whitelist. Site name must both match whitelist and not match blacklist to be included.")
siteBlacklist = flag.String("collector.iis.site-blacklist", "", "Regexp of sites to blacklist. Site name must both match whitelist and not match blacklist to be included.")
appWhitelist = flag.String("collector.iis.app-whitelist", ".+", "Regexp of apps to whitelist. App name must both match whitelist and not match blacklist to be included.")
appBlacklist = flag.String("collector.iis.app-blacklist", "", "Regexp of apps to blacklist. App name must both match whitelist and not match blacklist to be included.")
)

// A IISCollector is a Prometheus collector for WMI Win32_PerfRawData_W3SVC_WebService metrics
// A IISCollector is a Prometheus collector for WMI Win32_PerfRawData_W3SVC_WebService
// and Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS metrics
type IISCollector struct {
CurrentAnonymousUsers *prometheus.Desc
CurrentBlockedAsyncIORequests *prometheus.Desc
Expand All @@ -49,13 +54,31 @@ type IISCollector struct {

siteWhitelistPattern *regexp.Regexp
siteBlacklistPattern *regexp.Regexp

CurrentApplicationPoolState *prometheus.Desc
CurrentApplicationPoolUptime *prometheus.Desc
CurrentWorkerProcesses *prometheus.Desc
MaximumWorkerProcesses *prometheus.Desc
RecentWorkerProcessFailures *prometheus.Desc
TimeSinceLastWorkerProcessFailure *prometheus.Desc
TotalApplicationPoolRecycles *prometheus.Desc
TotalApplicationPoolUptime *prometheus.Desc
TotalWorkerProcessesCreated *prometheus.Desc
TotalWorkerProcessFailures *prometheus.Desc
TotalWorkerProcessPingFailures *prometheus.Desc
TotalWorkerProcessShutdownFailures *prometheus.Desc
TotalWorkerProcessStartupFailures *prometheus.Desc

appWhitelistPattern *regexp.Regexp
appBlacklistPattern *regexp.Regexp
}

// NewIISCollector ...
func NewIISCollector() (Collector, error) {
const subsystem = "iis"

return &IISCollector{
// Websites
// Gauges
CurrentAnonymousUsers: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_anonymous_users"),
Expand Down Expand Up @@ -188,6 +211,92 @@ func NewIISCollector() (Collector, error) {

siteWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteWhitelist)),
siteBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteBlacklist)),

// App Pools
// Guages
CurrentApplicationPoolState: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_application_pool_state"),
"The current status of the application pool (1 - Uninitialized, 2 - Initialized, 3 - Running, 4 - Disabling, 5 - Disabled, 6 - Shutdown Pending, 7 - Delete Pending) (CurrentApplicationPoolState)",
[]string{"app","state"},
nil,
),
CurrentApplicationPoolUptime: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_application_pool_start_time"),
"The unix timestamp for the application pool start time (CurrentApplicationPoolUptime)",
[]string{"app"},
nil,
),
CurrentWorkerProcesses: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "current_worker_processes"),
"The current number of worker processes that are running in the application pool (CurrentWorkerProcesses)",
[]string{"app"},
nil,
),
MaximumWorkerProcesses: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "maximum_worker_processes"),
"The maximum number of worker processes that have been created for the application pool since Windows Process Activation Service (WAS) started (MaximumWorkerProcesses)",
[]string{"app"},
nil,
),
RecentWorkerProcessFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "recent_worker_process_failures"),
"The number of times that worker processes for the application pool failed during the rapid-fail protection interval (RecentWorkerProcessFailures)",
[]string{"app"},
nil,
),

// Counters
TimeSinceLastWorkerProcessFailure: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "time_since_last_worker_process_failure"),
"The length of time, in seconds, since the last worker process failure occurred for the application pool (TimeSinceLastWorkerProcessFailure)",
[]string{"app"},
nil,
),
TotalApplicationPoolRecycles: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_application_pool_recycles"),
"The number of times that the application pool has been recycled since Windows Process Activation Service (WAS) started (TotalApplicationPoolRecycles)",
[]string{"app"},
nil,
),
TotalApplicationPoolUptime: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_application_pool_start_time"),
"The unix timestamp for the application pool of when the Windows Process Activation Service (WAS) started (TotalApplicationPoolUptime)",
[]string{"app"},
nil,
),
TotalWorkerProcessesCreated: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_processes_created"),
"The number of worker processes created for the application pool since Windows Process Activation Service (WAS) started (TotalWorkerProcessesCreated)",
[]string{"app"},
nil,
),
TotalWorkerProcessFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_failures"),
"The number of times that worker processes have crashed since the application pool was started (TotalWorkerProcessFailures)",
[]string{"app"},
nil,
),
TotalWorkerProcessPingFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_ping_failures"),
"The number of times that Windows Process Activation Service (WAS) did not receive a response to ping messages sent to a worker process (TotalWorkerProcessPingFailures)",
[]string{"app"},
nil,
),
TotalWorkerProcessShutdownFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_shutdown_failures"),
"The number of times that Windows Process Activation Service (WAS) failed to shut down a worker process (TotalWorkerProcessShutdownFailures)",
[]string{"app"},
nil,
),
TotalWorkerProcessStartupFailures: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, subsystem, "total_worker_process_startup_failures"),
"The number of times that Windows Process Activation Service (WAS) failed to start a worker process (TotalWorkerProcessStartupFailures)",
[]string{"app"},
nil,
),

appWhitelistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteWhitelist)),
appBlacklistPattern: regexp.MustCompile(fmt.Sprintf("^(?:%s)$", *siteBlacklist)),
}, nil
}

Expand Down Expand Up @@ -245,6 +354,36 @@ type Win32_PerfRawData_W3SVC_WebService struct {
TotalUnlockRequests uint32
}

type Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS struct {
Name string
Frequency_Object uint64
Timestamp_Object uint64

CurrentApplicationPoolState uint32
CurrentApplicationPoolUptime uint64
CurrentWorkerProcesses uint32
MaximumWorkerProcesses uint32
RecentWorkerProcessFailures uint32
TimeSinceLastWorkerProcessFailure uint64
TotalApplicationPoolRecycles uint32
TotalApplicationPoolUptime uint64
TotalWorkerProcessesCreated uint32
TotalWorkerProcessFailures uint32
TotalWorkerProcessPingFailures uint32
TotalWorkerProcessShutdownFailures uint32
TotalWorkerProcessStartupFailures uint32
}

var ApplicationStates = map[uint32]string{
1: "Uninitialized",
2: "Initialized",
3: "Running",
4: "Disabling",
5: "Disabled",
6: "Shutdown Pending",
7: "Delete Pending",
}

func (c *IISCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, error) {
var dst []Win32_PerfRawData_W3SVC_WebService
q := wmi.CreateQuery(&dst, "")
Expand Down Expand Up @@ -498,6 +637,124 @@ func (c *IISCollector) collect(ch chan<- prometheus.Metric) (*prometheus.Desc, e
)

}

var dst2 []Win32_PerfRawData_APPPOOLCountersProvider_APPPOOLWAS
q2 := wmi.CreateQuery(&dst2, "")
if err := wmi.Query(q2, &dst2); err != nil {
return nil, err
}

for _, app := range dst2 {
if app.Name == "_Total" ||
c.appBlacklistPattern.MatchString(app.Name) ||
!c.appWhitelistPattern.MatchString(app.Name) {
continue
}

// Guages
for key, label := range ApplicationStates {
isCurrentState := 0.0
if key == app.CurrentApplicationPoolState {
isCurrentState = 1.0
}
ch <- prometheus.MustNewConstMetric(
c.CurrentApplicationPoolState,
prometheus.GaugeValue,
isCurrentState,
app.Name,
label,
)
}


ch <- prometheus.MustNewConstMetric(
c.CurrentApplicationPoolUptime,
prometheus.GaugeValue,
// convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970)
float64(app.CurrentApplicationPoolUptime - 116444736000000000) / float64(app.Frequency_Object),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.CurrentWorkerProcesses,
prometheus.GaugeValue,
float64(app.CurrentWorkerProcesses),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.MaximumWorkerProcesses,
prometheus.GaugeValue,
float64(app.MaximumWorkerProcesses),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.RecentWorkerProcessFailures,
prometheus.GaugeValue,
float64(app.RecentWorkerProcessFailures),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TimeSinceLastWorkerProcessFailure,
prometheus.GaugeValue,
float64(app.TimeSinceLastWorkerProcessFailure),
app.Name,
)

// Counters
ch <- prometheus.MustNewConstMetric(
c.TotalApplicationPoolRecycles,
prometheus.CounterValue,
float64(app.TotalApplicationPoolRecycles),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TotalApplicationPoolUptime,
prometheus.CounterValue,
// convert from Windows timestamp (1 jan 1601) to unix timestamp (1 jan 1970)
float64(app.TotalApplicationPoolUptime - 116444736000000000) / float64(app.Frequency_Object),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessesCreated,
prometheus.CounterValue,
float64(app.TotalWorkerProcessesCreated),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessFailures),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessPingFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessPingFailures),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessShutdownFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessShutdownFailures),
app.Name,
)

ch <- prometheus.MustNewConstMetric(
c.TotalWorkerProcessStartupFailures,
prometheus.CounterValue,
float64(app.TotalWorkerProcessStartupFailures),
app.Name,
)

}

return nil, nil
}

0 comments on commit 9d51525

Please sign in to comment.