From 3b4373ca2f236b5eaf232b21f39e0af25a483db8 Mon Sep 17 00:00:00 2001 From: Qiumin Xu Date: Fri, 12 Apr 2019 10:16:25 -0700 Subject: [PATCH 01/14] Add a pod viewer tool for analyzing TPU pod performance. --- tensorboard/plugins/profile/BUILD | 1 + .../profile/pod_viewer/details_card/BUILD | 18 + .../pod_viewer/details_card/details-card.html | 101 ++++ .../pod_viewer/details_card/details-card.ts | 160 ++++++ .../pod_viewer/pod_viewer_dashboard/BUILD | 21 + .../pod-viewer-dashboard.html | 124 ++++ .../pod-viewer-dashboard.ts | 232 ++++++++ .../profile/pod_viewer/stack_bar_chart/BUILD | 18 + .../stack_bar_chart/stack-bar-chart.html | 51 ++ .../stack_bar_chart/stack-bar-chart.ts | 203 +++++++ .../profile/pod_viewer/topology_graph/BUILD | 25 + .../topology_graph/topology-graph.html | 171 ++++++ .../topology_graph/topology-graph.ts | 544 ++++++++++++++++++ .../profile/profile_demo.pod_viewer.json | 1 + tensorboard/plugins/profile/profile_demo.py | 3 + tensorboard/plugins/profile/profile_plugin.py | 2 + .../profile/tf_profile_dashboard/BUILD | 1 + .../tf-profile-dashboard.html | 26 +- 18 files changed, 1701 insertions(+), 1 deletion(-) create mode 100644 tensorboard/plugins/profile/pod_viewer/details_card/BUILD create mode 100644 tensorboard/plugins/profile/pod_viewer/details_card/details-card.html create mode 100644 tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts create mode 100644 tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD create mode 100644 tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html create mode 100644 tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts create mode 100644 tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD create mode 100644 tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html create mode 100644 tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts create mode 100644 tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD create mode 100644 tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html create mode 100644 tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts create mode 100644 tensorboard/plugins/profile/profile_demo.pod_viewer.json diff --git a/tensorboard/plugins/profile/BUILD b/tensorboard/plugins/profile/BUILD index 26dd7ba2b9..0b57017472 100644 --- a/tensorboard/plugins/profile/BUILD +++ b/tensorboard/plugins/profile/BUILD @@ -90,6 +90,7 @@ py_binary( "profile_demo.google_chart_demo.json", "profile_demo.memory_viewer.json", "profile_demo.op_profile.json", + "profile_demo.pod_viewer.json", ], srcs_version = "PY2AND3", deps = [ diff --git a/tensorboard/plugins/profile/pod_viewer/details_card/BUILD b/tensorboard/plugins/profile/pod_viewer/details_card/BUILD new file mode 100644 index 0000000000..07904fe56c --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/details_card/BUILD @@ -0,0 +1,18 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "details_card", + srcs = [ + "details-card.html", + "details-card.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:polymer", + "@org_polymer_paper_card", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/details_card/details-card.html b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.html new file mode 100644 index 0000000000..07d3db3b46 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.html @@ -0,0 +1,101 @@ + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts new file mode 100644 index 0000000000..9fe0909e7a --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts @@ -0,0 +1,160 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_details_card { + +Polymer({ + is: 'details-card', + properties: { + nodes: { + type: Array, + notify: true, + observer: 'updateCard_', + }, + name: { + type: String, + value: null, + }, + id: { + type: Number, + }, + utilization: { + type: Number, + }, + isChannel: { + type: Boolean, + value: false, + }, + isAllReduce: { + type: Boolean, + value: false, + }, + hasReplicaGroups: { + type: Boolean, + value: false, + }, + isStepBreakdown: { + type: Boolean, + value: false, + }, + stepBreakdownEle: { + type: Array, + }, + }, + /** + * Update the details card. + */ + updateCard_: function(nodes) { + if (!nodes || nodes.length == 0) return; + this.isChannel = false; + this.isAllReduce = false; + this.isStepBreakdown = false; + this.hasReplicaGroups = false; + if (nodes[0].channelId) { + this.name = 'Channel #'; + this.id = nodes[0].channelId; + this.isChannel = true; + } else if (nodes[0].hostName) { + this.name = 'Step breakdown of chip'; + this.id = nodes[0].chipId; + this.isStepBreakdown = true; + } else if (nodes[0].replicaGroups) { + this.name = nodes[0].name; + this.id = null; + this.isAllReduce = true; + this.hasReplicaGroups = nodes[0].replicaGroups.length; + } + }, + /** + * Converts from number of bytes to MiB. + */ + bytesToMiB_: function(numBytes: number): number { + return numBytes / 1048576; + }, + /** + * Return the formatted data size in MiB. + */ + sizeMiB_: function(dataSize: undefined|number): string { + if (!dataSize) { + return ''; + } + return this.format_(this.bytesToMiB_(dataSize)); + }, + /** + * Return the formatted link bandwidth in GiB/s. + * The link bandwidth here is defined by the data size transferred over the + * duration between the start of the send operation to the end of the + * recv-done operation. + */ + bw_: function(dataSize: undefined|number, duration: undefined|number): + string { + if (!dataSize || !duration) { + return ''; + } + return this.format_(dataSize / duration / 1073.74); + }, + /** + * Return the chip id given the global core id. + */ + chipId_: function(coreId: undefined|number): number { + if (!coreId) { + return 0; + } + return Math.floor(coreId / 2); + }, + /** + * Return the node ordinal given the global core id. + */ + nodeId_: function(coreId: undefined|number): number { + if (!coreId) { + return 0; + } + return coreId & 1; + }, + /** + * Format a number with two digits after the decimal point. + */ + format_: function(number: undefined|number): string { + return number == null ? '' : number.toFixed(2); + }, + /** + * Return a formatted value associated with a specific breakdown. + */ + getStepBreakdownValue_: function(node, key): string { + if (!key || !node) { + return ''; + } + return this.format_(node[key]); + }, + /** + * Return a the percentage of a specific breakdown. + */ + getStepBreakdownPct_: function(node, key): string { + if (!key || !node || !node.totalDurationUs) { + return ''; + } + return (node[key] / node.totalDurationUs * 100).toFixed(2) + '%'; + }, + ready() { + this.stepBreakdownEle = [ + {key: 'highFlopsComputeUs', label: 'High flops compute'}, + {key: 'lowFlopsComputeUs', label: 'Low flops compute'}, + {key: 'hostInfeedDurationUs', label: 'Infeed'}, + {key: 'hostOutfeedDurationUs', label: 'Outfeed'}, + {key: 'crsDurationUs', label: 'All reduce'}, + {key: 'sendDurationUs', label: 'Send'}, + {key: 'recvDurationUs', label: 'Recv'} + ]; + }, +}); + +} // namespace pod_viewer_details_card diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD new file mode 100644 index 0000000000..a4c62cf348 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD @@ -0,0 +1,21 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "pod_viewer_dashboard", + srcs = [ + "pod-viewer-dashboard.html", + "pod-viewer-dashboard.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:polymer", + "//tensorboard/plugins/profile/pod_viewer/details_card", + "//tensorboard/plugins/profile/pod_viewer/stack_bar_chart", + "//tensorboard/plugins/profile/pod_viewer/topology_graph", + "@org_polymer_paper_slider", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html new file mode 100644 index 0000000000..f845f67e36 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html @@ -0,0 +1,124 @@ + + + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts new file mode 100644 index 0000000000..b9cb473325 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts @@ -0,0 +1,232 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_dashboard { + +Polymer({ + is: 'pod-viewer-dashboard', + properties: { + _data: { + type: Object, + notify: true, + }, + podStats: { + type: Object, + notify: true, + observer: 'podStatsChanged_', + }, + stepStats: { + type: Array, + value: null, + notify: true, + }, + channelDb: { + type: Array, + value: null, + notify: true, + }, + allReduceDb: { + type: Array, + value: null, + notify: true, + }, + stepBreakdownEle: { + type: Array, + notify: true, + }, + channelEle: { + type: Array, + notify: true, + }, + allReduceEle: { + type: Array, + notify: true, + }, + stepBreakdownFunc: { + type: Object, + notify: true, + }, + channelFunc: { + type: Object, + notify: true, + }, + allReduceFunc: { + type: Object, + notify: true, + }, + runEnvironment: { + type: Object, + notify: true, + }, + curStepId: { + type: Number, + value: 0, + observer: 'stepChanged_', + }, + maxStepId: { + type: Number, + }, + stepNum: { + type: Number, + computed: 'getStepNum(podStats)', + }, + selectedChipId: { + type: Number, + value: -1, + notify: true, + }, + selectedChannel: { + type: Array, + notify: true, + observer: 'selectedChannelChanged_', + }, + activeBarChartEle: { + type: Object, + notify: true, + observer: 'activeBarChartEleChanged_', + }, + hloInfoMap: { + type: Object, + notify: true, + }, + active: { + type: Array, + value: () => [], + notify: true, + }, + ready_: { + type: Boolean, + value: false, + }, + }, + observers: [ + 'dataChanged_(_data, ready_)', + ], + /** + * Updates the UI when new data is loaded. + */ + dataChanged_(newData, ready) { + if (!newData || !ready) { + return; + } + this.maxStepId = newData.podStatsSequence.podStatsMap.length - 1; + this.curStepId = 0; + if (this.maxStepId > 0) { + this.podStats = this.populateLowFlopsCompute_( + newData.podStatsSequence.podStatsMap['0']); + } + this.runEnvironment = newData.runEnvironment; + this.hloInfoMap = newData.hloInfoMap; + }, + /** + * Updates the UI when curStepId changes. + */ + stepChanged_(newStep: number) { + if (!this._data) { + return; + } + if (newStep > this.maxStepId) { + return; + } + this.podStats = this.populateLowFlopsCompute_( + this._data.podStatsSequence.podStatsMap[newStep.toString()]); + }, + /** + * Updates the input of the details card when selected channel changed. + */ + selectedChannelChanged_(newChannel) { + if (!newChannel) { + return; + } + this.active = newChannel; + }, + activeBarChartEleChanged_(newEle) { + if (!newEle) { + return; + } + this.active = [newEle]; + }, + populateLowFlopsCompute_(podStats) { + if (!podStats || !this.ready_) return null; + let podStatsPerCore = podStats['podStatsPerCore']; + for (let i in podStatsPerCore) { + let val = podStatsPerCore[i]; + if (val.hasOwnProperty('lowFlopsComputeUs')) { + // already populated. + return; + } + // lowFlopsComputeUs is calculated by deducting all other breakdown from + // the total duration. + val['lowFlopsComputeUs'] = val.totalDurationUs; + for (let j = 0; j < this.stepBreakdownEle.length; j++) { + if (j == 1) { + continue; + } + // Skip the lowFlopsComputeUs. + val['lowFlopsComputeUs'] -= val[this.stepBreakdownEle[j].key]; + } + } + return podStats; + }, + /** + * Updates the data sent to stack bar chart when pod stats changed. + */ + podStatsChanged_(newStats) { + if (!newStats) { + return; + } + let stepStats = []; + for (const i in newStats['podStatsPerCore']) { + stepStats.push(newStats['podStatsPerCore'][i]); + } + stepStats.sort((a, b) => a.chipId - b.chipId); + this.stepStats = stepStats; + if (newStats['channelDb'].length > 0) { + this.channelDb = newStats['channelDb'].sort( + (a, b) => b.durationUs - a.durationUs); + } + if (newStats['allReduceOpDb'].length > 0) { + this.allReduceDb = newStats['allReduceOpDb'].sort( + (a, b) => b.durationUs - a.durationUs); + } + }, + /** + * Returns the step number of the current step. + */ + getStepNum(podStats): number { + return parseInt(podStats.stepNum, 10); + }, + ready() { + this.stepBreakdownEle = [ + {key: 'highFlopsComputeUs', label: 'High flops compute'}, + {key: 'lowFlopsComputeUs', label: 'Low flops compute'}, + {key: 'hostInfeedDurationUs', label: 'Infeed'}, + {key: 'hostOutfeedDurationUs', label: 'Outfeed'}, + {key: 'crsDurationUs', label: 'All reduce'}, + {key: 'sendDurationUs', label: 'Send'}, + {key: 'recvDurationUs', label: 'Recv'} + ]; + this.channelEle = [{key: 'durationUs', label: 'Duration (us)'}]; + this.allReduceEle = [{key: 'durationUs', label: 'Duration (us)'}]; + this.stepBreakdownFunc = (d) => { + return '(' + d.chipId + ',' + d.nodeId + ')'; + }; + this.channelFunc = (d) => d.channelId; + this.allReduceFunc = function(d) { + const res = d.name.replace(/ll-reduce.|usion.|ll-reduce|usion/, ''); + return res.length > 1 ? res : res + '0'; + }; + this.ready_ = true; + } +}); + +} // namespace pod_viewer_dashboard diff --git a/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD new file mode 100644 index 0000000000..b975082b62 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD @@ -0,0 +1,18 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "stack_bar_chart", + srcs = [ + "stack-bar-chart.html", + "stack-bar-chart.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:d3", + "//tensorboard/components/tf_imports:polymer", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html new file mode 100644 index 0000000000..5ac4af5a68 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html @@ -0,0 +1,51 @@ + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts new file mode 100644 index 0000000000..148e6b5171 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts @@ -0,0 +1,203 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_stack_bar_chart { + +Polymer({ + is: 'stack-bar-chart', + properties: { + id: { + type: String, + }, + data: { + type: Object, + notify: true, + observer: 'dataChanged_', + }, + ready_: { + type: Boolean, + value: false, + }, + stackEle: { + type: Array, + notify: true, + observer: 'onStackEleChanged_', + }, + xDomainFunc: { + type: Object, + notify: true, + }, + active: { + type: Object, + notify: true, + }, + }, + /** + * Main function to draw a stacked bar chart. + */ + stackBarChart: function(data) { + if (!data || !this.ready_ || this.stackEle.length == 0) { + return; + } + d3.selectAll('#' + this.id + ' g > *').remove(); + d3.select('#' + this.id + ' svg').remove(); + d3.select('#' + this.id + '.svg-container').remove(); + const stackKey = this.stackEle.map((d) => d.key); + const stackLabel = this.stackEle.map((d) => d.label); + const margin = {top: 20, right: 20, bottom: 30, left: 100}; + const width = 1600 - margin.left - margin.right; + const height = 300 - margin.top - margin.bottom; + const barWidth = 50; + const xScaleRange = data.length * barWidth; + let xScale = d3.scaleBand().range([0, xScaleRange]).padding(0.4); + let yScale = d3.scaleLinear().range([height, 0]); + let colorScale = d3.scaleOrdinal(d3.schemeCategory10) + .domain([0, 19]); + let svg = d3.select('#' + this.id + ' #chart') + .append('svg') + .attr( + 'width', + Math.max(width, xScaleRange + margin.left + margin.right)) + .attr('height', height + margin.top + margin.bottom) + .append('g') + .attr( + 'transform', + 'translate(' + margin.left + ',' + margin.top + ')'); + let stack = d3.stack() + .keys(stackKey) + .order(d3.stackOrderNone) + .offset(d3.stackOffsetNone); + const layers = stack(data); + xScale.domain(data.map(this.xDomainFunc)); + yScale.domain([0, d3.max(layers[layers.length - 1], (d) => d[0] + d[1])]) + .nice(); + this.drawLayers(svg, layers, xScale, yScale, colorScale); + this.drawAxes(svg, xScale, yScale, height); + this.drawLegend(svg, stackLabel, colorScale); + }, + /** + * Draw the layers for all the bars. + */ + drawLayers: function(svg, layers, xScale, yScale, colorScale) { + let parent = this; + let layer = svg.selectAll('.layer') + .data(layers) + .enter() + .append('g') + .attr('class', 'layer') + .style('fill', (d, i) => colorScale(i)); + layer.selectAll('rect') + .data((d) => d) + .enter() + .append('rect') + .attr('width', xScale.bandwidth()) + .attr('y', (d) => yScale(d[1])) + .attr('height', (d) => yScale(d[0]) - yScale(d[1])) + .attr('x', (d, i) => xScale(parent.xDomainFunc(d.data))) + .on('mouseover', + function(d) { + d3.select(this).style('opacity', 0.5); + parent.active = d.data; + }) + .on('mouseout', function(d) { + d3.select(this).style('opacity', 1.0); + parent.active = null; + }); + }, + /** + * Draw the axes of the chart. + */ + drawAxes: function(svg, xScale, yScale, height) { + let xAxis = d3.axisBottom(xScale); + let yAxis = d3.axisLeft(yScale); + svg.append('g') + .attr('class', 'axis axis--x') + .style('font-size', 14) + .attr('transform', 'translate(0,' + (height + 5) + ')') + .call(xAxis); + svg.append('g') + .attr('class', 'axis axis--y') + .style('font-size', 14) + .attr('transform', 'translate(0,0)') + .call(yAxis); + }, + /** + * Draw the legends of the chart. + */ + drawLegend: function(svg, labels, colorScale) { + const legendWidth = 150; + const legendHeight = 30; + const iconSize = 19; + const labelsPerLane = 5; + const margin = 5; + const yAxisToLegend = 200; + let legend = + svg.append('g') + .attr('font-family', 'sans-serif') + .attr('font-size', 14) + .attr('text-anchor', 'start') + .selectAll('g') + .data(labels.slice()) + .enter() + .append('g') + .attr( + 'transform', + (d, i) => 'translate(' + + (i * legendWidth - + Math.floor(i / labelsPerLane) * legendWidth * + labelsPerLane) + + ',' + Math.floor(i / labelsPerLane) * legendHeight + ')'); + legend.append('rect') + .attr('x', yAxisToLegend) + .attr('width', iconSize) + .attr('height', iconSize) + .attr('fill', (d, i) => colorScale(i)); + legend.append('text') + .attr('x', yAxisToLegend + margin + iconSize) + .attr('y', 9.5) + .attr('dy', '0.32em') + .text((d) => d); + }, + /** + * Redraw the stack bar chart. + */ + redraw: function(data) { + if (!data) { + return; + } + this.stackBarChart(data); + }, + /** + * Redraws the stack bar chart when the stack elements changed. + */ + onStackEleChanged_: function(newData) { + if (!newData || newData.length == 0) { + return; + } + this.redraw(this.data); + }, + /** + * Redraws the stack bar chart when the input data changed. + */ + dataChanged_: function(newData) { + if (!newData) { + return; + } + this.redraw(newData); + }, + attached: function() { + this.ready_ = true; + this.redraw(this.data); + }, +}); + +} // namespace pod_viewer_stack_bar_chart diff --git a/tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD b/tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD new file mode 100644 index 0000000000..4b5cc181f3 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD @@ -0,0 +1,25 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "topology_graph", + srcs = [ + "topology-graph.html", + "topology-graph.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:d3", + "//tensorboard/components/tf_imports:polymer", + "@org_polymer_iron_icons", + "@org_polymer_paper_icon_button", + "@org_polymer_paper_item", + "@org_polymer_paper_listbox", + "@org_polymer_paper_menu", + "@org_polymer_paper_menu_button", + "@org_polymer_paper_slider", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html new file mode 100644 index 0000000000..8951adfc6f --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html @@ -0,0 +1,171 @@ + + + + + + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts new file mode 100644 index 0000000000..5a1942e157 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts @@ -0,0 +1,544 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_topology_graph { + +Polymer({ + is: 'topology-graph', + properties: { + data_: {type: Object, value: null}, + runEnvironment_: + {type: Object, observer: 'runEnvironmentChanged_', value: null}, + topoData_: {type: Object}, + linkData_: {type: Object}, + tpuType_: {type: String, observer: 'updateSystemInfo_'}, + hostXStride_: {type: Number, value: 2}, + hostYStride_: {type: Number, value: 2}, + nodesPerChip_: {type: Number, value: 2}, + hostGridWidth_: {type: Number}, + hostGridHeight_: {type: Number}, + chipGridSize_: {type: Number}, + nodeGridHeight_: {type: Number}, + nodeGridWidth_: {type: Number}, + chipToChipMargin_: {type: Number}, + hostToChipMargin_: {type: Number}, + hostToHostMargin_: {type: Number}, + xDimension_: {type: Number}, + yDimension_: {type: Number}, + totalCoreCount_ : {type: Number}, + ready_: {type: Boolean, value: false}, + metrics: {type: Array, notify: true, value: null}, + allChannels: {type: Array, notify: true}, + selectedChipId: {type: Number, notify: true}, + selectedMetricIdx: {type: Number, value: 0}, + selectedMetricLabel: { + type: String, + computed: 'getSelectedMetricLabel_(metrics, selectedMetricIdx)' + }, + selectedChannelId: + {type: Number, value: 0, observer: 'selectedChannelIdChanged_'}, + minChannelId: {type: Number, value: 0}, + maxChannelId: {type: Number, value: 0}, + gLink_: {type: Object}, + selectedChannel: {type: Array, notify: true}, + activeBarChartEle: + {type: Object, notify: true, observer: 'activeBarChartEleChanged_'} + }, + observers: + ['updateAllData(data_, runEnvironment_, metrics, selectedMetricIdx)', + 'updateTopology(topoData_, ready_)', + 'updateLinks(linkData_, ready_)', + ], + /** + * Main function to draw topology graph based on TPU topology. + */ + topologyGraph: function(data) { + d3.selectAll('#tpgraph g > *').remove(); + d3.select('#tpgraph svg').remove(); + d3.select('#tpgraph.svg-container').remove(); + const margin = {top: 50, right: 0, bottom: 100, left: 30}; + const width = 1620; + this.chipGridSize_ = 30; + this.chipToChipMargin_ = 10; + this.hostToChipMargin_ = 15; + this.hostToHostMargin_ = 10; + this.hostGridWidth_ = this.getHostGridSize(this.hostXStride_); + this.hostGridHeight_ = this.getHostGridSize(this.hostYStride_); + this.nodeGridWidth_ = this.chipGridSize_ / this.nodesPerChip_; + this.nodeGridHeight_ = this.chipGridSize_; + const hostXDim = this.xDimension_ / this.hostXStride_; + const hostYDim = this.yDimension_ / this.hostYStride_; + const colors = [ + '#ffffd9', '#edf8b1', '#c7e9b4', '#7fcdbb', '#41b6c4', '#1d91c0', + '#225ea8', '#253494', '#081d58' + ]; + const colorScale = + d3.scaleQuantile().domain([0, 1.0]).range(colors); + const chipXDims = Array.from(Array(this.xDimension_).keys()); + const chipYDims = Array.from(Array(this.yDimension_).keys()); + let svg = + d3.select('#tpgraph') + .append('svg') + .attr('width', width) + .attr( + 'height', + hostYDim * this.hostGridHeight_ + margin.bottom + margin.top) + .append('g') + .attr( + 'transform', + 'translate(' + margin.left + ',' + margin.top + ')'); + const hostData = this.createHostData(hostXDim, hostYDim); + this.drawHostCards( + svg, hostData, this.hostGridWidth_, this.hostGridHeight_, + this.hostToHostMargin_); + this.drawNodeCards(svg, data, this.nodesPerChip_, colorScale, colors); + + // Creates separate groups, so that the z-index remains in the right order. + this.gLink_ = svg.append('svg:g').classed('link', true); + + // Add a svg:defs for the arrow head. + svg.append('svg:defs') + .append('svg:marker') + .attr('id', 'arrow') + .attr('viewBox', '0 -5 10 10') + .attr('markerWidth', 5) + .attr('markerHeight', 5) + .attr('orient', 'auto') + .append('svg:path') + .style('stroke', 'red') + .style('fill', 'red') + .attr('d', 'M0,-5L10,0L0,5'); + this.drawLabels(svg, chipXDims, chipYDims); + const legendYLoc = + this.hostGridHeight_ * Math.ceil(this.yDimension_ / this.hostYStride_) + + this.hostToHostMargin_; + this.drawLegend(svg, legendYLoc, this.chipGridSize_, colorScale, colors); + }, + /** + * Returns the size of host grid, including the host card size and the margin + * between two hosts. + */ + getHostGridSize(stride: number): number { + return this.hostToChipMargin_ * 2 + this.chipToChipMargin_ * (stride - 1) + + this.chipGridSize_ * stride + this.hostToHostMargin_; + }, + /** + * Returns the x-axis location for the xChip'th chip of the xHost'th host. + */ + getChipXLoc: function(xHost: number, xChip: number): number { + return xHost * this.hostGridWidth_ + this.hostToChipMargin_ + + xChip * (this.chipGridSize_ + this.chipToChipMargin_); + }, + /** + * Returns the y-axis location for the yChip'th chip of the yHost'th host. + */ + getChipYLoc: function(yHost: number, yChip: number): number { + return yHost * this.hostGridHeight_ + this.hostToChipMargin_ + + yChip * (this.chipGridSize_ + this.chipToChipMargin_); + }, + /** + * Returns the x-axis location for the xNode'th node of the xChip'th chip of + * the xHost'th host. + */ + getNodeXLoc: function(xHost: number, xChip: number, xNode: number): number { + return this.getChipXLoc(xHost, xChip) + xNode * this.nodeGridWidth_; + }, + /** + * Returns the location for each host in the system. + */ + createHostData: function(hostXDim: number, hostYDim: number): any { + let hostData = []; + for (let i = 0; i < hostXDim; i++) { + for (let j = 0; j < hostYDim; j++) { + hostData.push({xdim: i, ydim: j}); + } + } + return hostData; + }, + /** + * Draw the labels on x-axis and y-axis. + */ + drawLabels: function(svg, xdims: number[], ydims: number[]) { + let parent = this; + + // Draw label on x axis. + svg.selectAll('.xLabel') + .data(xdims) + .enter() + .append('text') + .text(function(d) { + return d; + }) + .attr( + 'x', + (d, i) => parent.getChipXLoc( + Math.floor(i / this.hostXStride_), i % this.hostXStride_)) + .attr('y', 0) + .style('text-anchor', 'middle') + .attr('transform', 'translate(' + this.chipGridSize_ / 2 + ', -6)') + .attr('class', 'axis'); + + // Draw label on y axis. + svg.selectAll('.yLabel') + .data(ydims) + .enter() + .append('text') + .text((d) => d) + .attr('x', 0) + .attr( + 'y', + (d, i) => parent.getChipYLoc( + Math.floor(i / this.hostYStride_), i % this.hostYStride_)) + .style('text-anchor', 'middle') + .attr('transform', 'translate(-12,' + this.chipGridSize_ / 2 + ')') + .attr('class', 'axis'); + }, + /** + * Draw the UI of host cards. + */ + drawHostCards: function( + svg, data, gridWidth: number, + gridHeight: number, hostToHostMargin: number) { + const border = 1; + const borderColor = 'black'; + let cards = svg.selectAll('.xdim').data(data, (d) => d.xdim); + cards.enter() + .append('rect') + .attr('x', (d) => d.xdim * gridWidth) + .attr('y', (d) => d.ydim * gridHeight) + .attr('rx', 4 * gridWidth / gridHeight) + .attr('ry', 4) + .attr('class', 'hour bordered') + .attr('width', gridWidth - hostToHostMargin) + .attr('height', gridHeight - hostToHostMargin) + .attr('border', border) + .style('fill', 'F0F0F0') + .style('stroke', borderColor) + .style('stroke-width', border) + .merge(cards) + .transition() + .duration(1000); + cards.exit().remove(); + }, + /** + * Draw the UI of node cards. + */ + drawNodeCards: function( + svg, data, nodesPerChip, colorScale, colors) { + let parent = this; + const border = 1; + const borderColor = 'black'; + let cards = svg.selectAll('.xdim').data(data, (d) => d.xdim); + cards.enter() + .append('rect') + .attr('id', (d) => 'rid' + d.rid) + .attr( + 'x', + (d) => parent.getNodeXLoc( + Math.floor(d.xdim / parent.hostXStride_), + d.xdim % parent.hostXStride_, d.nid)) + .attr( + 'y', + (d) => parent.getChipYLoc( + Math.floor(d.ydim / parent.hostYStride_), + d.ydim % parent.hostYStride_)) + .attr('rx', 4 / nodesPerChip) + .attr('ry', 4) + .attr('class', 'hour bordered') + .attr('width', parent.nodeGridWidth_) + .attr('height', parent.nodeGridHeight_) + .attr('border', border) + .style('fill', colors[0]) + .style('stroke', borderColor) + .style('stroke-width', border) + .merge(cards) + .on('mouseover', + function(d) { + // highlight text + d3.select(this).classed('cell-hover', true).style('opacity', 0.5); + parent.selectedChipId = d.cid; + + // Update the tooltip position and value + d3.select('#tooltip') + .style('left', d3.event.pageX + 10 + 'px') + .style('top', d3.event.pageY - 10 + 'px') + .select('#value') + .text(parent.getToolTipText_(d)); + d3.select('#tooltip') + .classed('hidden', false); + }) + .on('mouseout', + function() { + parent.selectedChipId = -1; + d3.select(this) + .classed('cell-hover', false) + .style('opacity', 1.0); + d3.select('#tooltip').classed('hidden', true); + }) + .transition() + .duration(1000) + .style('fill', (d) => colorScale(d.value / d.total)); + cards.exit().remove(); + }, + /** + * Draw the UI of chip to chip links. + */ + drawLinks: function(linkData) { + let parent = this; + if (!linkData || linkData.length == 0 || !this.gLink_) { + return; + } + + // Handle links; + let links = this.gLink_.selectAll('.link').data(linkData); + + // attach the arrow from defs + links.enter() + .append('svg:path') + .attr('id', (d) => 'cid' + d.channelId) + .attr('stroke-width', 2) + .attr('stroke', 'red') + .attr('fill', 'none') + .attr('marker-end', 'url(#arrow)') + .style('visibility', 'hidden') + .merge(links) + .attr('d', (d) => parent.linkToPath(d)); + + // Handle deleted links. + links.exit().remove(); + this.selectedChannelIdChanged_(this.selectedChannelId); + }, + /** + * Given the global core id, returns the (x, y) coordinates in the topology + * graph. + * @return [x, y] + */ + coreIdToPos: function(id: number): number[] { + let p = this; + const chipId = Math.floor(id / 2); + const nodeId = id & 1; + const xDim = chipId % p.xDimension_; + const yDim = Math.floor(chipId / p.xDimension_); + const x = + p.getNodeXLoc( + Math.floor(xDim / p.hostXStride_), xDim % p.hostXStride_, nodeId) + + p.chipGridSize_ / p.nodesPerChip_ / 2; + const y = p.getChipYLoc( + Math.floor(yDim / p.hostYStride_), yDim % p.hostYStride_) + + p.chipGridSize_ / 2; + return [x, y]; + }, + /** + * Returns the svg path given the src and dst core and node id. + * @return Path in svg format. + */ + linkToPath: function(link): string { + let p = this; + const src = p.coreIdToPos(link.srcCoreId); + const dst = p.coreIdToPos(link.dstCoreId); + const path = 'M ' + src[0] + ' ' + src[1] + 'L ' + dst[0] + ' ' + dst[1]; + return path; + }, + /** + * Returns the text to visualize in the tool tips. + * @return String to render in tool tips. + */ + getToolTipText_: function(data): string { + let parent = this; + let res = 'pos: (' + data.ydim + ',' + data.xdim + ')\n'; + res += 'host: ' + data.host + '\n'; + res += 'chip id: ' + data.cid + '\n'; + res += 'node id: ' + data.nid + '\n'; + res += 'replica id: ' + data.rid + '\n'; + if (parent.selectedMetricIdx >= 0) { + res += parent.metrics[parent.selectedMetricIdx].label + ' spends ' + + data.value.toFixed(2) + 'us in total, '; + const pcnt = data.value / data.total * 100; + res += 'taking ' + pcnt.toFixed(2) + '% of a step.'; + } + return res; + }, + /** + * Draw the legend of the graph. + */ + drawLegend: function( + svg: any, height: number, legendElementHeight: number, + colorScale: any, colors: number[]) { + const legendElementWidth = legendElementHeight * 2; + let legend = svg.selectAll('.legend').data( + [0].concat(colorScale.quantiles()), (d) => d); + let legend_g = legend.enter().append('g').attr('class', 'legend'); + legend_g.append('rect') + .attr('x', (d, i) => legendElementWidth * i) + .attr('y', height) + .attr('width', legendElementWidth) + .attr('height', legendElementHeight) + .style('fill', (d, i) => colors[i]); + legend_g.append('text') + .text((d) => '\u2265 0.' + Math.round(d * 10)) + .attr('x', (d, i) => legendElementWidth * i) + .attr('y', height + legendElementHeight * 2); + legend.exit().remove(); + }, + /** + * Updates the data to be loaded into the topology graph. + */ + updateAllData: function( + data, runEnvironment, metrics, idx) { + if (!data || !runEnvironment || !runEnvironment.topology || !metrics || + idx >= metrics.length || idx < 0) { + return; + } + const xdim = runEnvironment.topology.xDimension; + let result = []; + + Object.keys(data.podStatsPerCore).forEach(function(val) { + const obj = data.podStatsPerCore[val]; + result.push({ + xdim: obj.chipId % xdim, + ydim: Math.floor(obj.chipId / xdim), + nid: obj.nodeId, + cid: obj.chipId, + rid: data.coreIdToReplicaIdMap[val], // replica id. + host: obj.hostName, + value: obj[metrics[idx].key], + total: obj.totalDurationUs + }); + }); + this.topoData_ = result; + this.updateLinkData_(data); + }, + /** + * Updates the data to be rendered as links. + */ + updateLinkData_: function(data) { + if (!data.channelDb || data.channelDb.length == 0) { + return; + } + let links = {}; + let min = data.channelDb[0].channelId; + let max = 0; + for (let i = 0; i < data.channelDb.length; i++) { + const channel = data.channelDb[i]; + const cid = channel.channelId; + if (!links[cid]) { + links[cid] = []; + } + links[cid].push(channel); + min = Math.min(cid, min); + max = Math.max(cid, max); + } + this.linkData_ = links; + this.minChannelId = min; + this.maxChannelId = max; + }, + /** + * Updates the data to be rendered when run environment changed. + */ + runEnvironmentChanged_: function(newData) { + if (!newData || !newData.topology) { + return; + } + this.tpuType_ = newData.tpuType; + this.xDimension_ = parseInt(newData.topology.xDimension, 10); + this.yDimension_ = parseInt(newData.topology.yDimension, 10); + this.totalCoreCount_ = + this.xDimension_ * this.yDimension_ * this.nodesPerChip_; + }, + /** + * Updates the system info when the type of TPU changed. + */ + updateSystemInfo_: function(tpuType: string) { + if (!tpuType) { + return; + } + switch (tpuType) { + case 'TPU v2': + this.hostXStride_ = 2; + this.hostYStride_ = 2; + this.nodesPerChip_ = 2; + break; + case 'TPU v3': + this.hostXStride_ = 4; + this.hostYStride_ = 2; + this.nodesPerChip_ = 2; + break; + default: + console.warn('TPU type: ', tpuType, 'is not supported by pod viewer.'); + break; + } + }, + /** + * Redraws the graph when the data to be rendered changed. + */ + updateTopology: function(newData, ready) { + if (!ready) { + return; + } + this.topologyGraph(newData); + }, + /** + * Redraws the links when link data changed. + */ + updateLinks: function(link, ready) { + if (!ready || !link) return; + this.drawLinks(this.data_.channelDb); + }, + /** + * Updates the visible links when the selectedChannelIdChanged. + */ + selectedChannelIdChanged_: function(newData, oldData) { + if (!this.linkData_) { + return; + } + if (this.linkData_[oldData]) { + d3.selectAll('#cid' + oldData).style('visibility', 'hidden'); + } + if (this.linkData_[newData]) { + d3.selectAll('#cid' + newData).style('visibility', 'visible'); + this.selectedChannel = this.linkData_[newData]; + } + }, + /** + * Updates the topology color coding when the activeBarChartEle changed. + */ + activeBarChartEleChanged_: function(newData) { + const colorScale = d3.scaleOrdinal(d3.schemeCategory10) + .domain(d3.range(0, 19)); + if (!newData || !newData.replicaGroups || !newData.replicaGroups.length) { + return; + } + // Colors the nodes within the same replica group to the same color. + for (let i = 0; i < newData.replicaGroups.length; i++) { + const group = newData.replicaGroups[i].replicaIds; + for (let j = 0; j < group.length; j++) { + d3.selectAll('#rid' + group[j]) + .style('fill', colorScale(i % 20)); + } + } + this.selectedMetricIdx = -1; + }, + /** + * Returns a label for the current metric selection. + */ + getSelectedMetricLabel_: function(metrics, idx) { + if (idx < 0 || !metrics || idx > metrics.length) { + return 'Please select a metric'; + } + return 'Color: ' + metrics[idx].label; + }, + attached: function() { + this.ready_ = true; + } +}); + +} // namespace pod_viewer_topology_graph diff --git a/tensorboard/plugins/profile/profile_demo.pod_viewer.json b/tensorboard/plugins/profile/profile_demo.pod_viewer.json new file mode 100644 index 0000000000..4b90bd081b --- /dev/null +++ b/tensorboard/plugins/profile/profile_demo.pod_viewer.json @@ -0,0 +1 @@ +{"hloInfoMap":{},"podStatsSequence":{"podStatsMap":[{"allReduceOpDb":[{"dataSize":"30661632","durationUs":10811.507589281251,"name":"all-reduce.2","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":2521.165580375,"name":"all-reduce.1","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":27.263195568548387,"name":"all-reduce","occurrences":992,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"12356608","durationUs":463.57660712500001,"name":"all-reduce.6","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":375.51763399999999,"name":"all-reduce.5","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":374.02147321874997,"name":"all-reduce.4","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"31460352","durationUs":203.90008928125002,"name":"all-reduce.3","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"28456960","durationUs":178.54026781249996,"name":"all-reduce.7","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]}],"channelDb":[{"channelId":"16436","dataSize":"2764800","dstCoreId":7,"durationUs":34.014285000000001,"hloNames":["send.42","recv-done.52"],"occurrences":1,"replicaId":0,"sendDelayUs":0,"srcCoreId":14,"utilization":0},{"channelId":"17643","dataSize":"20480","dstCoreId":13,"durationUs":33.602857,"hloNames":["recv-done.100","send.83"],"occurrences":1,"replicaId":0,"sendDelayUs":73.25,"srcCoreId":20,"utilization":0},{"channelId":"17764","dataSize":"20480","dstCoreId":13,"durationUs":17.538571999999998,"hloNames":["recv-done.108","send.91"],"occurrences":1,"replicaId":0,"sendDelayUs":0,"srcCoreId":20,"utilization":0},{"channelId":"18733","dataSize":"1388544","dstCoreId":13,"durationUs":276.38428599999997,"hloNames":["send.97","recv-done.115"],"occurrences":1,"replicaId":0,"sendDelayUs":0,"srcCoreId":12,"utilization":0}],"coreIdToReplicaIdMap":{"1":0,"3":0,"4":0,"6":0,"7":0,"9":0,"10":0,"12":0,"101":0,"103":0,"104":0,"106":0,"107":0,"109":0,"110":0,"112":0,"201":0,"203":0,"204":0,"206":0,"207":0,"209":0,"210":0,"212":0,"301":0,"303":0,"304":0,"306":0,"307":0,"309":0,"310":0,"312":0},"podStatsPerCore":{"1":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":13662.288571999999,"highFlopsComputeUs":207443.10142299999,"hostInfeedDurationUs":1716.472857,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":39666.888588000002,"sendDurationUs":17942.088581,"stepNum":221,"totalDurationUs":397539.37142899999},"3":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":12525.011429,"highFlopsComputeUs":207267.62999799999,"hostInfeedDurationUs":1720.9542859999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42163.548598000001,"sendDurationUs":17411.900007,"stepNum":221,"totalDurationUs":397533.00428599998},"4":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":13641.975716999999,"highFlopsComputeUs":207397.042862,"hostInfeedDurationUs":1714.3714279999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40267.061457000003,"sendDurationUs":18224.534286999999,"stepNum":221,"totalDurationUs":397529.65000000002},"6":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":14221.517142999999,"highFlopsComputeUs":207237.375715,"hostInfeedDurationUs":1720.331428,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":37693.192874,"sendDurationUs":17266.478574000001,"stepNum":221,"totalDurationUs":397522.37714300002},"7":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":28291.208572,"highFlopsComputeUs":209338.76857799999,"hostInfeedDurationUs":1722.7785710000001,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":23558.318590999999,"sendDurationUs":16176.947163999999,"stepNum":221,"totalDurationUs":397523.881429},"9":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":27233.165712999999,"highFlopsComputeUs":209416.73571499999,"hostInfeedDurationUs":1715.047143,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":22646.498597999998,"sendDurationUs":15508.522876999999,"stepNum":221,"totalDurationUs":397536.05571400002},"10":{"bottleneck":"All-Reduce","chipId":0,"crsDurationUs":47407.805716000003,"highFlopsComputeUs":214328.448569,"hostInfeedDurationUs":1722.491428,"hostName":"njsw1:14059","hostOutfeedDurationUs":0.33428600000000003,"nodeId":0,"recvDurationUs":31073.378583999998,"sendDurationUs":9243.6159069999994,"stepNum":221,"totalDurationUs":397533.54999999999},"12":{"bottleneck":"Send and Recv","chipId":0,"crsDurationUs":29543.902859000002,"highFlopsComputeUs":209375.63143499999,"hostInfeedDurationUs":1723.292858,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":22063.067186,"sendDurationUs":15378.035719,"stepNum":221,"totalDurationUs":397530.62428599998},"101":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":15312.845713999999,"highFlopsComputeUs":206435.22714500001,"hostInfeedDurationUs":1721.767143,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37267.852864,"sendDurationUs":21720.582865,"stepNum":221,"totalDurationUs":397528.37142899999},"103":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":11385.511428,"highFlopsComputeUs":206367.96714200001,"hostInfeedDurationUs":1721.1814280000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34349.292868999997,"sendDurationUs":23999.150002999999,"stepNum":221,"totalDurationUs":397521.98857099999},"104":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":21573.184282999999,"highFlopsComputeUs":206408.11714700001,"hostInfeedDurationUs":1716.2114280000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":35173.970019,"sendDurationUs":22466.791434999999,"stepNum":221,"totalDurationUs":397536.74428599997},"106":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":22866.147140000001,"highFlopsComputeUs":206441.69428900001,"hostInfeedDurationUs":1721.6771429999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34358.401440000001,"sendDurationUs":21782.570012,"stepNum":221,"totalDurationUs":397516.06857100001},"107":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":6976.4671449999996,"highFlopsComputeUs":206484.33286200001,"hostInfeedDurationUs":1722.4171429999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":44825.472866999997,"sendDurationUs":20205.755723999999,"stepNum":221,"totalDurationUs":397528.058571},"109":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":10383.294287000001,"highFlopsComputeUs":206421.94142700001,"hostInfeedDurationUs":1721.8,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42431.608587000002,"sendDurationUs":19537.359998,"stepNum":221,"totalDurationUs":397524.17142899998},"110":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":6958.8485710000004,"highFlopsComputeUs":206801.57427899999,"hostInfeedDurationUs":1721.7628569999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37036.555727999999,"sendDurationUs":28790.634296,"stepNum":221,"totalDurationUs":397531.248571},"112":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":6315.3885739999996,"highFlopsComputeUs":206643.78142399999,"hostInfeedDurationUs":1724.3414290000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":38032.754304000002,"sendDurationUs":26248.458580999999,"stepNum":221,"totalDurationUs":397527.70714299998},"201":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11437.222857999999,"highFlopsComputeUs":207479.62857299999,"hostInfeedDurationUs":1720.052858,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":50580.821446000002,"sendDurationUs":12685.305724,"stepNum":221,"totalDurationUs":397535.06285699998},"203":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11313.951428,"highFlopsComputeUs":207375.24000200001,"hostInfeedDurationUs":1719.8828579999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":43393.255727999996,"sendDurationUs":18422.137151999999,"stepNum":221,"totalDurationUs":397527.107143},"204":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":14351.392854,"highFlopsComputeUs":207361.78142700001,"hostInfeedDurationUs":1717.3757149999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45239.260018000001,"sendDurationUs":15432.444296,"stepNum":221,"totalDurationUs":397534.654286},"206":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":18417.787139,"highFlopsComputeUs":207409.85428500001,"hostInfeedDurationUs":1720.9442859999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34324.372879000002,"sendDurationUs":19130.807158,"stepNum":221,"totalDurationUs":397530.81},"207":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":10979.084285000001,"highFlopsComputeUs":209588.235713,"hostInfeedDurationUs":1722.248572,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":41652.098591000002,"sendDurationUs":16386.584304,"stepNum":221,"totalDurationUs":397537.892857},"209":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":10668.465715,"highFlopsComputeUs":209670.76285599999,"hostInfeedDurationUs":1718.0285710000001,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33091.415739999997,"sendDurationUs":20989.570013,"stepNum":221,"totalDurationUs":397524.43571400002},"210":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":22569.698573000001,"highFlopsComputeUs":209561.282863,"hostInfeedDurationUs":1723.038571,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":29127.498575000001,"sendDurationUs":17153.287159,"stepNum":221,"totalDurationUs":397524.417143},"212":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":10958.324285999999,"highFlopsComputeUs":209709.25285700001,"hostInfeedDurationUs":1720.3199999999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33260.837164999997,"sendDurationUs":21612.090024000001,"stepNum":221,"totalDurationUs":397519.06857100001},"301":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6839.6614289999998,"highFlopsComputeUs":206388.21713999999,"hostInfeedDurationUs":1718.6714280000001,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40407.600012000003,"sendDurationUs":23857.128575999999,"stepNum":221,"totalDurationUs":397523.52000000002},"303":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6154.3114310000001,"highFlopsComputeUs":206478.51285599999,"hostInfeedDurationUs":1720.3128569999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34218.168587,"sendDurationUs":27609.502862000001,"stepNum":221,"totalDurationUs":397536.58000000002},"304":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":12866.675716,"highFlopsComputeUs":206449.47286000001,"hostInfeedDurationUs":1719.551428,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":36298.152870999998,"sendDurationUs":22423.432865999999,"stepNum":221,"totalDurationUs":397535.19857100002},"306":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":11542.1,"highFlopsComputeUs":208336.03572000001,"hostInfeedDurationUs":1717.902857,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":25607.762868000002,"sendDurationUs":30848.608572000001,"stepNum":221,"totalDurationUs":397535.19571399997},"307":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":20327.537143000001,"highFlopsComputeUs":206528.55142900001,"hostInfeedDurationUs":1721.614286,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":59447.052862999997,"sendDurationUs":14906.371434999999,"stepNum":221,"totalDurationUs":397531.28714299999},"309":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":28428.101427000001,"highFlopsComputeUs":199572.87714900001,"hostInfeedDurationUs":1722.391429,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":68760.547139000002,"sendDurationUs":8874.361433,"stepNum":221,"totalDurationUs":397530.28714299999},"310":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":9556.1885719999991,"highFlopsComputeUs":206564.66570799999,"hostInfeedDurationUs":1721.712857,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":44789.454297999997,"sendDurationUs":25511.827150000001,"stepNum":221,"totalDurationUs":397531.29857099999},"312":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":10039.360000000001,"highFlopsComputeUs":206610.517139,"hostInfeedDurationUs":1719.181429,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":40464.617155,"sendDurationUs":28622.022863999999,"stepNum":221,"totalDurationUs":397527.12571400002}},"stepNum":221},{"allReduceOpDb":[{"dataSize":"30661632","durationUs":11028.761383906251,"name":"all-reduce.2","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":2549.2764285624999,"name":"all-reduce.1","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":26.863336685483873,"name":"all-reduce","occurrences":992,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"12356608","durationUs":471.05571434375003,"name":"all-reduce.6","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":374.18040184374996,"name":"all-reduce.5","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":373.74691959375002,"name":"all-reduce.4","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"31460352","durationUs":203.84754465624999,"name":"all-reduce.3","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"28456960","durationUs":178.48656253125,"name":"all-reduce.7","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]}],"channelDb":[{"channelId":"20078","dataSize":"20480","dstCoreId":23,"durationUs":3.4528569999999998,"hloNames":["send.813","recv-done.160"],"occurrences":1,"replicaId":0,"sendDelayUs":121.395714,"srcCoreId":0,"utilization":0},{"channelId":"15232","dataSize":"1388544","dstCoreId":19,"durationUs":18.525713,"hloNames":["send.33","recv-done.43"],"occurrences":1,"replicaId":0,"sendDelayUs":1.752858,"srcCoreId":26,"utilization":0},{"channelId":"19161","dataSize":"20480","dstCoreId":26,"durationUs":66.484285999999997,"hloNames":["recv-done.178","send.141"],"occurrences":1,"replicaId":0,"sendDelayUs":465.205714,"srcCoreId":27,"utilization":0},{"channelId":"14533","dataSize":"33792","dstCoreId":4,"durationUs":0.78714200000000001,"hloNames":["send.190","recv-done.24"],"occurrences":1,"replicaId":0,"sendDelayUs":16.741429,"srcCoreId":0,"utilization":0}],"coreIdToReplicaIdMap":{"1":0,"3":0,"4":0,"6":0,"7":0,"9":0,"10":0,"12":0,"101":0,"103":0,"104":0,"106":0,"107":0,"109":0,"110":0,"112":0,"201":0,"203":0,"204":0,"206":0,"207":0,"209":0,"210":0,"212":0,"301":0,"303":0,"304":0,"306":0,"307":0,"309":0,"310":0,"312":0},"podStatsPerCore":{"1":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":14084.704286,"highFlopsComputeUs":207388.11713699999,"hostInfeedDurationUs":1714.6500000000001,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40092.490015000003,"sendDurationUs":17907.175722,"stepNum":222,"totalDurationUs":398214.19},"3":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":12933.194285,"highFlopsComputeUs":207175.73999900001,"hostInfeedDurationUs":1721.297143,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42453.528593000003,"sendDurationUs":17400.428577999999,"stepNum":222,"totalDurationUs":398206.37},"4":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":14091.925713000001,"highFlopsComputeUs":207233.68285499999,"hostInfeedDurationUs":1714.9614280000001,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40773.725741000002,"sendDurationUs":18154.350004,"stepNum":222,"totalDurationUs":398215.85999999999},"6":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":14643.508569,"highFlopsComputeUs":207201.06857100001,"hostInfeedDurationUs":1719.775715,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":37772.052881000003,"sendDurationUs":17354.907153,"stepNum":222,"totalDurationUs":398223.51428599999},"7":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":28561.428573000001,"highFlopsComputeUs":209284.70429699999,"hostInfeedDurationUs":1719.4142859999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":23662.600021999999,"sendDurationUs":16392.162879,"stepNum":222,"totalDurationUs":398222.01000000001},"9":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":27548.952856,"highFlopsComputeUs":209323.77714300001,"hostInfeedDurationUs":1714.862858,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":23081.745739999998,"sendDurationUs":15528.412866999999,"stepNum":222,"totalDurationUs":398202.69714300003},"10":{"bottleneck":"All-Reduce","chipId":0,"crsDurationUs":47794.405713,"highFlopsComputeUs":214142.01000000001,"hostInfeedDurationUs":1725.318571,"hostName":"njsw1:14059","hostOutfeedDurationUs":0.33428600000000003,"nodeId":0,"recvDurationUs":31277.588578999999,"sendDurationUs":9243.7616180000005,"stepNum":222,"totalDurationUs":398218.81714300002},"12":{"bottleneck":"Send and Recv","chipId":0,"crsDurationUs":29989.367144,"highFlopsComputeUs":209330.50857500001,"hostInfeedDurationUs":1720.7142859999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":22356.507182000001,"sendDurationUs":15068.790005999999,"stepNum":222,"totalDurationUs":398221.08714299998},"101":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":15768.207146000001,"highFlopsComputeUs":206462.34,"hostInfeedDurationUs":1721.5142860000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37759.097155000003,"sendDurationUs":21392.548583,"stepNum":222,"totalDurationUs":398210.69428599998},"103":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":11855.951426,"highFlopsComputeUs":206393.64000300001,"hostInfeedDurationUs":1720.547143,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34500.818593000004,"sendDurationUs":23976.541434999999,"stepNum":222,"totalDurationUs":398226.32000000001},"104":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":21747.387143,"highFlopsComputeUs":206452.350003,"hostInfeedDurationUs":1716.8085719999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":34653.311450000001,"sendDurationUs":23122.740008000001,"stepNum":222,"totalDurationUs":398220.43571400002},"106":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":23243.028571999999,"highFlopsComputeUs":206402.74143299999,"hostInfeedDurationUs":1719.9328579999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34642.314304,"sendDurationUs":21864.134290999998,"stepNum":222,"totalDurationUs":398221.83857099997},"107":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":7191.5542859999996,"highFlopsComputeUs":206397.13000100001,"hostInfeedDurationUs":1720.2185710000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45218.691448999998,"sendDurationUs":20616.661442000001,"stepNum":222,"totalDurationUs":398218.45142900001},"109":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":10542.825712,"highFlopsComputeUs":206355.82142600001,"hostInfeedDurationUs":1721.8028569999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42577.165732000001,"sendDurationUs":19943.402869000001,"stepNum":222,"totalDurationUs":398218.48714300001},"110":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":7101.1871419999998,"highFlopsComputeUs":206774.09428399999,"hostInfeedDurationUs":1720,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37804.437158000001,"sendDurationUs":28580.401439000001,"stepNum":222,"totalDurationUs":398218.88428599999},"112":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":6546.7385709999999,"highFlopsComputeUs":206534.630003,"hostInfeedDurationUs":1725.5599999999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":38827.308576000003,"sendDurationUs":25982.980004000001,"stepNum":222,"totalDurationUs":398211.53714299999},"201":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11428.060001,"highFlopsComputeUs":207484.80286,"hostInfeedDurationUs":1719.905714,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":51026.815746,"sendDurationUs":12542.751434,"stepNum":222,"totalDurationUs":398221.40857099998},"203":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11529.891432,"highFlopsComputeUs":207440.33285800001,"hostInfeedDurationUs":1717.9342859999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":43967.371439000002,"sendDurationUs":18111.422860999999,"stepNum":222,"totalDurationUs":398221.57285699999},"204":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":14598.571427000001,"highFlopsComputeUs":207388.28285700001,"hostInfeedDurationUs":1716.6614279999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45798.657154,"sendDurationUs":15260.657144999999,"stepNum":222,"totalDurationUs":398219.68142899999},"206":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":18598.618567000001,"highFlopsComputeUs":207425.13143099999,"hostInfeedDurationUs":1720.98,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34483.765736000001,"sendDurationUs":19299.474289000002,"stepNum":222,"totalDurationUs":398210.20857100002},"207":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":11141.364286,"highFlopsComputeUs":209598.264287,"hostInfeedDurationUs":1725.9857139999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":42046.937171999998,"sendDurationUs":16300.778582000001,"stepNum":222,"totalDurationUs":398209.53857099998},"209":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":10844.927143000001,"highFlopsComputeUs":209686.52570299999,"hostInfeedDurationUs":1720.198572,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33322.021460999997,"sendDurationUs":21084.285728999999,"stepNum":222,"totalDurationUs":398225.23714300001},"210":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":22723.787143000001,"highFlopsComputeUs":209638.03713800001,"hostInfeedDurationUs":1721.0342860000001,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":29179.897165999999,"sendDurationUs":17325.542878,"stepNum":222,"totalDurationUs":398215.82857100002},"212":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":11106.59,"highFlopsComputeUs":209698.075713,"hostInfeedDurationUs":1720.4485709999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33466.327159,"sendDurationUs":21666.767158999999,"stepNum":222,"totalDurationUs":398221.23714300001},"301":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6939.8599990000002,"highFlopsComputeUs":206484.84713899999,"hostInfeedDurationUs":1719.9400000000001,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40927.997155999998,"sendDurationUs":24104.938582999999,"stepNum":222,"totalDurationUs":398229.75285699998},"303":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6207.8842869999999,"highFlopsComputeUs":206502.38143000001,"hostInfeedDurationUs":1717.6014279999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34081.265721999996,"sendDurationUs":28601.130002000002,"stepNum":222,"totalDurationUs":398216.57428599999},"304":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":13154.048575000001,"highFlopsComputeUs":206499.264284,"hostInfeedDurationUs":1718.3771429999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37100.294298000001,"sendDurationUs":22281.692856000001,"stepNum":222,"totalDurationUs":398219.09857099998},"306":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":11805.220001,"highFlopsComputeUs":208345.81571600001,"hostInfeedDurationUs":1717.1671429999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":26777.952864999999,"sendDurationUs":30477.042860000001,"stepNum":222,"totalDurationUs":398213.78428600001},"307":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":20388.585715000001,"highFlopsComputeUs":206606.38714000001,"hostInfeedDurationUs":1723.625714,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":60240.227149999999,"sendDurationUs":14901.322856000001,"stepNum":222,"totalDurationUs":398215.022857},"309":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":28490.097142999999,"highFlopsComputeUs":199656.02714399999,"hostInfeedDurationUs":1720.3228570000001,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":69555.061436000004,"sendDurationUs":8856.7071419999993,"stepNum":222,"totalDurationUs":398216.78999999998},"310":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":9660.9585690000004,"highFlopsComputeUs":206617.80999400001,"hostInfeedDurationUs":1724.1528579999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45693.587157000002,"sendDurationUs":25409.315714,"stepNum":222,"totalDurationUs":398213.79285700002},"312":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":10124.957141000001,"highFlopsComputeUs":206628.12999799999,"hostInfeedDurationUs":1719.7085709999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":41098.437146999997,"sendDurationUs":28642.645713000002,"stepNum":222,"totalDurationUs":398218.39000000001}},"stepNum":222}]},"runEnvironment":{"hostCount":4, "numCoresPerReplica":32,"perCoreBatchSize":8,"replicaCount":1,"topology":{"xDimension":"4","yDimension":"4","zDimension":"0"},"tpuCoreCount":32,"tpuType":"TPU v3"}} diff --git a/tensorboard/plugins/profile/profile_demo.py b/tensorboard/plugins/profile/profile_demo.py index d28c480ef3..99b13df537 100644 --- a/tensorboard/plugins/profile/profile_demo.py +++ b/tensorboard/plugins/profile/profile_demo.py @@ -88,6 +88,9 @@ def dump_data(logdir): shutil.copyfile( 'tensorboard/plugins/profile/profile_demo.memory_viewer.json', os.path.join(run_dir, 'memory_viewer.json')) + shutil.copyfile( + 'tensorboard/plugins/profile/profile_demo.pod_viewer.json', + os.path.join(run_dir, 'pod_viewer.json')) shutil.copyfile( 'tensorboard/plugins/profile/profile_demo.google_chart_demo.json', os.path.join(run_dir, 'google_chart_demo.json')) diff --git a/tensorboard/plugins/profile/profile_plugin.py b/tensorboard/plugins/profile/profile_plugin.py index 05810fea05..05e0f09922 100644 --- a/tensorboard/plugins/profile/profile_plugin.py +++ b/tensorboard/plugins/profile/profile_plugin.py @@ -55,6 +55,7 @@ 'input_pipeline_analyzer': 'input_pipeline.json', 'overview_page': 'overview_page.json', 'memory_viewer': 'memory_viewer.json', + 'pod_viewer': 'pod_viewer.json', 'google_chart_demo': 'google_chart_demo.json', } @@ -63,6 +64,7 @@ 'op_profile', 'overview_page', 'memory_viewer', + 'pod_viewer', 'google_chart_demo',]) def process_raw_trace(raw_trace): diff --git a/tensorboard/plugins/profile/tf_profile_dashboard/BUILD b/tensorboard/plugins/profile/tf_profile_dashboard/BUILD index 5151265e79..f96b60e006 100644 --- a/tensorboard/plugins/profile/tf_profile_dashboard/BUILD +++ b/tensorboard/plugins/profile/tf_profile_dashboard/BUILD @@ -19,6 +19,7 @@ tf_web_library( "//tensorboard/plugins/profile/input_pipeline_analyzer", "//tensorboard/plugins/profile/memory_viewer/memory_viewer_dashboard", "//tensorboard/plugins/profile/overview_page", + "//tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard", "//tensorboard/plugins/profile/tf_op_profile", "//tensorboard/plugins/profile/tf_profile_common", "@org_polymer", diff --git a/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html b/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html index dc56e66e77..a49e634d5e 100644 --- a/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html +++ b/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html @@ -31,6 +31,7 @@ + @@ -185,6 +186,11 @@

No profile data was found.

node="[[_activeBufferDetails]]" > +