Skip to content

Commit

Permalink
[spark] An AgentCheck that gathers metrics for Apache Spark
Browse files Browse the repository at this point in the history
  • Loading branch information
Zachary Radtka authored and Zachary Radtka committed Apr 26, 2016
1 parent b7165e9 commit 2f96b4a
Show file tree
Hide file tree
Showing 10 changed files with 1,064 additions and 0 deletions.
453 changes: 453 additions & 0 deletions checks.d/spark.py

Large diffs are not rendered by default.

23 changes: 23 additions & 0 deletions conf.d/spark.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
init_config:

instances:
#
# The Spark check retrieves metrics from YARN's ResourceManager. This
# check must be run from the Master Node and the ResourceManager URI must
# be specified below. The ResourceManager URI is composed of the
# ResourceManager's hostname and port.
#
# The ResourceManager hostname can be found in the yarn-site.xml conf file
# under the property yarn.resourcemanager.address
#
# The ResourceManager port can be found in the yarn-site.xml conf file under
# the property yarn.resourcemanager.webapp.address
- resourcemanager_uri: http://localhost:8088

# A Required friendly name for the cluster.
# cluster_name: MySparkCluster

# Optional tags to be applied to every emitted metric.
# tags:
# - key:value
# - instance:production
11 changes: 11 additions & 0 deletions tests/checks/fixtures/spark/apps_metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"apps": {
"app": [
{
"id": "application_1459362484344_0011",
"name": "PySpark",
"trackingUrl": "http://localhost:8088/proxy/application_1459362484344_0011/"
}
]
}
}
5 changes: 5 additions & 0 deletions tests/checks/fixtures/spark/cluster_info
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"clusterInfo": {
"id": 1453738555560
}
}
36 changes: 36 additions & 0 deletions tests/checks/fixtures/spark/executor_metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
[
{
"id": "driver",
"hostPort": "10.0.2.15:33870",
"rddBlocks": 99,
"memoryUsed": 98,
"diskUsed": 97,
"activeTasks": 96,
"failedTasks": 95,
"completedTasks": 94,
"totalTasks": 93,
"totalDuration": 92,
"totalInputBytes": 91,
"totalShuffleRead": 90,
"totalShuffleWrite": 89,
"maxMemory": 278019440,
"executorLogs": {}
},
{
"id": "1",
"hostPort": "10.0.2.15:33870",
"rddBlocks": 1,
"memoryUsed": 2,
"diskUsed": 3,
"activeTasks": 4,
"failedTasks": 5,
"completedTasks": 6,
"totalTasks": 7,
"totalDuration": 8,
"totalInputBytes": 9,
"totalShuffleRead": 10,
"totalShuffleWrite": 11,
"maxMemory": 555755765,
"executorLogs": {}
}
]
102 changes: 102 additions & 0 deletions tests/checks/fixtures/spark/job_metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
[
{
"jobId": 0,
"name": "saveAsTextFile at NativeMethodAccessorImpl.java:-2",
"submissionTime": "2016-03-31T03:05:43.301GMT",
"completionTime": "2016-03-31T03:05:47.080GMT",
"stageIds": [
0,
1
],
"status": "RUNNING",
"numTasks": 20,
"numActiveTasks": 30,
"numCompletedTasks": 40,
"numSkippedTasks": 50,
"numFailedTasks": 60,
"numActiveStages": 70,
"numCompletedStages": 80,
"numSkippedStages": 90,
"numFailedStages": 100
},
{
"jobId": 0,
"name": "saveAsTextFile at NativeMethodAccessorImpl.java:-2",
"submissionTime": "2016-03-31T03:05:43.301GMT",
"completionTime": "2016-03-31T03:05:47.080GMT",
"stageIds": [
0,
1
],
"status": "RUNNING",
"numTasks": 0,
"numActiveTasks": 0,
"numCompletedTasks": 0,
"numSkippedTasks": 0,
"numFailedTasks": 0,
"numActiveStages": 0,
"numCompletedStages": 0,
"numSkippedStages": 0,
"numFailedStages": 0
},
{
"jobId": 0,
"name": "saveAsTextFile at NativeMethodAccessorImpl.java:-2",
"submissionTime": "2016-03-31T03:05:43.301GMT",
"completionTime": "2016-03-31T03:05:47.080GMT",
"stageIds": [
0,
1
],
"status": "SUCCEEDED",
"numTasks": 1000,
"numActiveTasks": 2000,
"numCompletedTasks": 3000,
"numSkippedTasks": 4000,
"numFailedTasks": 5000,
"numActiveStages": 6000,
"numCompletedStages": 7000,
"numSkippedStages": 8000,
"numFailedStages": 9000
},
{
"jobId": 0,
"name": "saveAsTextFile at NativeMethodAccessorImpl.java:-2",
"submissionTime": "2016-03-31T03:05:43.301GMT",
"completionTime": "2016-03-31T03:05:47.080GMT",
"stageIds": [
0,
1
],
"status": "SUCCEEDED",
"numTasks": 0,
"numActiveTasks": 0,
"numCompletedTasks": 0,
"numSkippedTasks": 0,
"numFailedTasks": 0,
"numActiveStages": 0,
"numCompletedStages": 0,
"numSkippedStages": 0,
"numFailedStages": 0
},
{
"jobId": 0,
"name": "saveAsTextFile at NativeMethodAccessorImpl.java:-2",
"submissionTime": "2016-03-31T03:05:43.301GMT",
"completionTime": "2016-03-31T03:05:47.080GMT",
"stageIds": [
0,
1
],
"status": "SUCCEEDED",
"numTasks": 0,
"numActiveTasks": 0,
"numCompletedTasks": 0,
"numSkippedTasks": 0,
"numFailedTasks": 0,
"numActiveStages": 0,
"numCompletedStages": 0,
"numSkippedStages": 0,
"numFailedStages": 0
}
]
11 changes: 11 additions & 0 deletions tests/checks/fixtures/spark/rdd_metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[
{
"id": 6,
"name": "PythonRDD",
"numPartitions": 2,
"numCachedPartitions": 2,
"storageLevel": "Memory Serialized 1x Replicated",
"memoryUsed": 284,
"diskUsed": 0
}
]
14 changes: 14 additions & 0 deletions tests/checks/fixtures/spark/spark_apps
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[
{
"id": "app_001",
"name": "PySparkShell",
"attempts": [
{
"startTime": "2016-04-12T12:48:17.576GMT",
"endTime": "1969-12-31T23:59:59.999GMT",
"sparkUser": "",
"completed": false
}
]
}
]
117 changes: 117 additions & 0 deletions tests/checks/fixtures/spark/stage_metrics
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
[
{
"status": "COMPLETE",
"stageId": 0,
"attemptId": 0,
"numActiveTasks": 100,
"numCompleteTasks": 101,
"numFailedTasks": 102,
"executorRunTime": 103,
"inputBytes": 104,
"inputRecords": 105,
"outputBytes": 106,
"outputRecords": 107,
"shuffleReadBytes": 108,
"shuffleReadRecords": 109,
"shuffleWriteBytes": 110,
"shuffleWriteRecords": 111,
"memoryBytesSpilled": 112,
"diskBytesSpilled": 113,
"name": "reduceByKey at <stdin>:1",
"details": "",
"schedulingPool": "default",
"accumulatorUpdates": []
},
{
"status": "COMPLETE",
"stageId": 0,
"attemptId": 0,
"numActiveTasks": 0,
"numCompleteTasks": 0,
"numFailedTasks": 0,
"executorRunTime": 0,
"inputBytes": 0,
"inputRecords": 0,
"outputBytes": 0,
"outputRecords": 0,
"shuffleReadBytes": 0,
"shuffleReadRecords": 0,
"shuffleWriteBytes": 0,
"shuffleWriteRecords": 0,
"memoryBytesSpilled": 0,
"diskBytesSpilled": 0,
"name": "reduceByKey at <stdin>:1",
"details": "",
"schedulingPool": "default",
"accumulatorUpdates": []
},
{
"status": "RUNNING",
"stageId": 1,
"attemptId": 2,
"numActiveTasks": 3,
"numCompleteTasks": 4,
"numFailedTasks": 5,
"executorRunTime": 6,
"inputBytes": 7,
"inputRecords": 8,
"outputBytes": 9,
"outputRecords": 10,
"shuffleReadBytes": 11,
"shuffleReadRecords": 12,
"shuffleWriteBytes": 13,
"shuffleWriteRecords": 14,
"memoryBytesSpilled": 15,
"diskBytesSpilled": 16,
"name": "reduceByKey at <stdin>:1",
"details": "",
"schedulingPool": "default",
"accumulatorUpdates": []
},
{
"status": "RUNNING",
"stageId": 0,
"attemptId": 0,
"numActiveTasks": 0,
"numCompleteTasks": 0,
"numFailedTasks": 0,
"executorRunTime": 0,
"inputBytes": 0,
"inputRecords": 0,
"outputBytes": 0,
"outputRecords": 0,
"shuffleReadBytes": 0,
"shuffleReadRecords": 0,
"shuffleWriteBytes": 0,
"shuffleWriteRecords": 0,
"memoryBytesSpilled": 0,
"diskBytesSpilled": 0,
"name": "reduceByKey at <stdin>:1",
"details": "",
"schedulingPool": "default",
"accumulatorUpdates": []
},
{
"status": "RUNNING",
"stageId": 0,
"attemptId": 0,
"numActiveTasks": 0,
"numCompleteTasks": 0,
"numFailedTasks": 0,
"executorRunTime": 0,
"inputBytes": 0,
"inputRecords": 0,
"outputBytes": 0,
"outputRecords": 0,
"shuffleReadBytes": 0,
"shuffleReadRecords": 0,
"shuffleWriteBytes": 0,
"shuffleWriteRecords": 0,
"memoryBytesSpilled": 0,
"diskBytesSpilled": 0,
"name": "reduceByKey at <stdin>:1",
"details": "",
"schedulingPool": "default",
"accumulatorUpdates": []
}
]
Loading

0 comments on commit 2f96b4a

Please sign in to comment.