diff --git a/tensorboard/plugins/profile/BUILD b/tensorboard/plugins/profile/BUILD index 26dd7ba2b9..0b57017472 100644 --- a/tensorboard/plugins/profile/BUILD +++ b/tensorboard/plugins/profile/BUILD @@ -90,6 +90,7 @@ py_binary( "profile_demo.google_chart_demo.json", "profile_demo.memory_viewer.json", "profile_demo.op_profile.json", + "profile_demo.pod_viewer.json", ], srcs_version = "PY2AND3", deps = [ diff --git a/tensorboard/plugins/profile/pod_viewer/details_card/BUILD b/tensorboard/plugins/profile/pod_viewer/details_card/BUILD new file mode 100644 index 0000000000..dd6a0e8b76 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/details_card/BUILD @@ -0,0 +1,19 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "details_card", + srcs = [ + "details-card.html", + "details-card.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:polymer", + "//tensorboard/plugins/profile/pod_viewer/pod_viewer_common", + "@org_polymer_paper_card", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/details_card/details-card.html b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.html new file mode 100644 index 0000000000..f9d8f44e38 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.html @@ -0,0 +1,100 @@ + + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts new file mode 100644 index 0000000000..99fe0ca3d2 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/details_card/details-card.ts @@ -0,0 +1,130 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_details_card { + +type DetailNode = podviewer.proto.ChannelInfo | podviewer.proto.PodStatsRecord + | podviewer.proto.AllReduceOpInfo; + +Polymer({ + is: 'details-card', + properties: { + nodes: { + type: Array, + }, + _name: { + type: String, + computed: '_computeName(nodes)', + }, + stepBreakdownLayers: { + type: Array, + value: () => [ + {key: 'highFlopsComputeUs', label: 'High flops compute'}, + {key: 'lowFlopsComputeUs', label: 'Low flops compute'}, + {key: 'hostInfeedDurationUs', label: 'Infeed'}, + {key: 'hostOutfeedDurationUs', label: 'Outfeed'}, + {key: 'crsDurationUs', label: 'All reduce'}, + {key: 'sendDurationUs', label: 'Send'}, + {key: 'recvDurationUs', label: 'Recv'}, + ], + }, + }, + _isAllReduce(node: DetailNode): node is podviewer.proto.AllReduceOpInfo { + return (node).replicaGroups != undefined; + }, + _isChannel(node: DetailNode): node is podviewer.proto.ChannelInfo { + return (node).channelId != undefined; + }, + _isStep(node: DetailNode): node is podviewer.proto.PodStatsRecord { + return (node).hostName != undefined; + }, + _hasReplicaGroups(node: podviewer.proto.AllReduceOpInfo): boolean { + return node.replicaGroups && node.replicaGroups.length > 0; + }, + _computeName: function(nodes: Array): string|undefined { + if (!nodes || nodes.length == 0) return; + const node = nodes[0]; + if (this._isChannel(node)){ + return 'Channel # ' + (node).channelId; + } else if (this._isAllReduce(node)) { + return (node).name; + } else if (this._isStep(node)) { + return 'Step breakdown of chip ' + + (node).chipId + + ', core ' + (node).nodeId; + } + return; + }, + /** + * Converts from number of bytes to MiB. + */ + _bytesToMiB: function(numBytes: number): number { + return numBytes / 1048576; + }, + /** + * Return the formatted data size in MiB. + */ + _sizeMiB: function(dataSize: undefined|number): string|undefined { + if (!dataSize) return; + return this._format(this._bytesToMiB(dataSize)); + }, + /** + * Return the formatted link bandwidth in GiB/s. + * The link bandwidth here is defined by the data size transferred over the + * duration between the start of the send operation to the end of the + * recv-done operation. + */ + _bandwidth: function( + dataSize: undefined|number, duration: undefined|number): + string|undefined { + if (!dataSize || !duration) return; + return this._format(dataSize / duration / 1073.74); + }, + /** + * Return the chip id given the global core id. + */ + _chipId: function(coreId: number): number { + return Math.floor(coreId / 2); + }, + /** + * Return the node ordinal given the global core id. + */ + _nodeId: function(coreId: number): number { + return coreId & 1; + }, + /** + * Format a number with two digits after the decimal point. + */ + _format: function(number: undefined|number): string { + return number == null ? '' : number.toFixed(2); + }, + /** + * Return a formatted value associated with a specific breakdown. + */ + _getStepBreakdownValue: + function(node: undefined|podviewer.proto.PodStatsRecord, + key: undefined|string): string|undefined { + if (!key || !node) return; + return this._format(node[key]); + }, + /** + * Return a the percentage of a specific breakdown. + */ + _getStepBreakdownPct: + function(node: undefined|podviewer.proto.PodStatsRecord, + key: undefined|string): string|undefined { + if (!key || !node || !node.totalDurationUs) return; + return (node[key] / node.totalDurationUs * 100).toFixed(2) + '%'; + }, +}); + +} // namespace pod_viewer_details_card diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/BUILD b/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/BUILD new file mode 100644 index 0000000000..9cedefa214 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/BUILD @@ -0,0 +1,14 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "pod_viewer_common", + srcs = [ + "pod-viewer-common.html", + "proto.ts", + ], + path = "/pod-viewer", +) diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/pod-viewer-common.html b/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/pod-viewer-common.html new file mode 100644 index 0000000000..871ad1134e --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/pod-viewer-common.html @@ -0,0 +1,18 @@ + + + diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/proto.ts b/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/proto.ts new file mode 100644 index 0000000000..7b0149535b --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_common/proto.ts @@ -0,0 +1,180 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +module podviewer.proto { + /** + * Describes the replica groups in an all-reduce op (e.g., all-reduce and + * all-to-all). + */ + export interface ReplicaGroup { + /** + * The ids of the replicas that belongs to the same group. The ordering of + * the ids matters in some op (e.g., all-to-all). + */ + replicaIds: Array; + } + + /** + * Pod system topology, which describes the number of chips in a pod + * and the connectivity style. + */ + export interface SystemTopology { + /** + * The X, Y, and Z dimensions of this topology. 0 means that dimension does + * not exist. + */ + xDimension: string; + yDimension: string; + zDimension?: string; + } + + /** + * The run environment of a profiling session. + */ + export interface RunEnvironment { + /** Number of hosts used. */ + hostCount?: number; + /** The type of TPU used. */ + tpuType: string; + /** The number of TPU cores used. */ + tpuCoreCount?: number; + /** Pod system topology. */ + topology: SystemTopology; + } + + /** + * Performance and extra info on all-reduce ops. + */ + export interface AllReduceOpInfo { + /** Name of this op. */ + name: string; + /** Number of times this op occurred. */ + occurrences: number; + /** + * Time in microseconds spent on this op (averaged across all of its + * occurrences). + */ + durationUs: number; + /** Byte size of data transferred. */ + dataSize: number; + /** Replica groups. */ + replicaGroups: Array; + } + + /** There is one PodStatsRecord for each step traced on each TPU node. */ + export interface PodStatsRecord { + /** The host name where the trace was collected. */ + hostName: string; + /** The TPU global chip id where the trace was collected. */ + chipId: number; + /** The TPU node id where the trace was collected. */ + nodeId: number; + /** The step number. */ + stepNum: number; + /** The step duration in micro-seconds. */ + totalDurationUs: number; + /** + * The time spent running high flops ops, such as convolution and output + * fusion. + */ + highFlopsComputeUs: number; + /** The time spent on infeed from host to TPU core in micro-seconds. */ + hostInfeedDurationUs: number; + /** The time spent on outfeed from TPU core to host in micro-seconds. */ + hostOutfeedDurationUs: number; + /** The time spent on send operations. */ + sendDurationUs: number; + /** The time spent on recv operations. */ + recvDurationUs: number; + /** + * The time spent on all-reduce in micro-seconds + * (used to be cross-replica-sum). + */ + crsDurationUs: number; + /** bottleneck out of the above mentioned metrics. */ + bottleneck: string; + } + + /** + * Performance and extra info in a training step across all cores. + */ + export interface PodStatsMap { + /** Step number */ + stepNum: number; + /** A map from core_id to PodStatsRecord. */ + podStatsPerCore: {[key: number]: PodStatsRecord}; + /** Send and receive channel info. */ + channelDb: Array; + /** + * A map from core ID to program replica id. Replica id map could change + * during a profile session, but should stay stable within a step. + */ + coreIdToReplicaIdMap: {[key: number]: number}; + /** All-reduce op info. */ + allReduceOpDb: Array; + } + + /** A sequence of PodStatsMap for each step. */ + export interface PodStatsSequence { + podStatsMap: Array; + } + + /** Information about a send and recv channel. */ + export interface ChannelInfo { + /** Id of the channel. */ + channelId: number; + /** Core id of the send op. */ + srcCoreId: number; + /** Core id of the recv op. */ + dstCoreId: number; + /** Byte size of the data transferred. */ + dataSize: number; + /** + * Duration from the beginning of the send op to the end of the recv-done + * op in microseconds. + */ + durationUs: number; + /** Number of occurrences of a channel. */ + occurrences: number; + /** Percentage of the link bandwidth used over the peak link bandwidth. */ + utilization: number; + /** A list of hlo names associated with this channel id. */ + hloNames: Array; + /** + * Duration from the beginning of the recv-done to the beginning of send in + * microseconds. If the recv-done op starts after the beginning of the send + * op, the delay is zero. + */ + sendDelayUs: number; + /** The replica_id of the program executing the send and recv ops. */ + replicaId: number; + } + + /** Data input to the pod viewer tool. */ + export interface PodViewerInputData { + /** Pod level stats for each step. */ + podStatsSequence: PodStatsSequence; + /** Job run environment, including number of hosts used, type of TPU used. */ + runEnvironment: RunEnvironment; + } + + /** Layer in stack bar chart. */ + export interface StackLayer { + /** key to select the data. */ + key: string; + /** Label to be shown in the UI. */ + label: string; + } +} diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD new file mode 100644 index 0000000000..df7fae1f0d --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/BUILD @@ -0,0 +1,22 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "pod_viewer_dashboard", + srcs = [ + "pod-viewer-dashboard.html", + "pod-viewer-dashboard.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:polymer", + "//tensorboard/plugins/profile/pod_viewer/details_card", + "//tensorboard/plugins/profile/pod_viewer/pod_viewer_common", + "//tensorboard/plugins/profile/pod_viewer/stack_bar_chart", + "//tensorboard/plugins/profile/pod_viewer/topology_graph", + "@org_polymer_paper_slider", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html new file mode 100644 index 0000000000..26d3cc1f80 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.html @@ -0,0 +1,144 @@ + + + + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts new file mode 100644 index 0000000000..9fcbf638fe --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard/pod-viewer-dashboard.ts @@ -0,0 +1,233 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_dashboard { + +Polymer({ + is: 'pod-viewer-dashboard', + properties: { + /** + * @type {?podviewer.proto.PodViewerInputData} + */ + data: { + type: Object, + observer: '_dataChanged', + }, + /** + * Active elements selected to be shown in the details card. + */ + activeDetails: { + type: Array, + notify: true, + }, + selectedChannel: { + type: Array, + notify: true, + observer: '_selectedChannelChanged', + }, + activeBar: { + type: Object, + notify: true, + observer: '_activeBarChanged', + }, + curStepId: { + type: Number, + value: 0, + }, + _podStatsMaps: { + type: Object, + computed: '_computePodStatsMaps(data)', + }, + _maxStepId: { + type: Number, + computed: '_computeMaxStepId(_podStatsMaps)', + }, + _errorMessage: { + type: String, + computed: '_computeErrorMessage(_maxStepId)', + }, + _runEnvironment: { + type: Object, + computed: '_computeRunEnvironment(data)', + }, + _stepBreakdownLayers: { + type: Object, + value: () => [ + {key: 'highFlopsComputeUs', label: 'High flops compute'}, + {key: 'lowFlopsComputeUs', label: 'Low flops compute'}, + {key: 'hostInfeedDurationUs', label: 'Infeed'}, + {key: 'hostOutfeedDurationUs', label: 'Outfeed'}, + {key: 'crsDurationUs', label: 'All reduce'}, + {key: 'sendDurationUs', label: 'Send'}, + {key: 'recvDurationUs', label: 'Recv'}, + ], + }, + _podStatsMap: { + type: Object, + computed: + '_computePodStatsMap(_podStatsMaps, curStepId, _stepBreakdownLayers)', + }, + _stepStats: { + type: Array, + value: null, + computed: '_computeStepStats(_podStatsMap)', + }, + _channelDb: { + type: Array, + value: null, + computed: '_computeChannelDb(_podStatsMap)', + }, + _allReduceDb: { + type: Array, + value: null, + computed: '_computeAllReduceDb(_podStatsMap)', + }, + _channelLayers: { + type: Array, + value: () => [ + {key: 'durationUs', label: 'Duration (s)'}, + ], + }, + _allReduceLayers: { + type: Array, + value: () => [ + {key: 'durationUs', label: 'Duration (µs)'}, + ], + }, + _stepBreakdownFunc: { + type: Object, + value: () => (d) => `(${d.chipId}, ${d.nodeId})`, + }, + _channelFunc: { + type: Object, + value: () => (d) => d.channelId, + }, + _allReduceFunc: { + type: Object, + value: () => function(d) { + if (!d.name) return; + const res = + d.name.replace(/ll-reduce.|usion.|ll-reduce|usion/, ''); + return res.length > 1 ? res : res + '0'; + }, + }, + }, + _computePodStatsMaps(data: podviewer.proto.PodViewerInputData|undefined|null): + Array { + if (!data) return []; + return data.podStatsSequence.podStatsMap; + }, + _computeRunEnvironment( + data: podviewer.proto.PodViewerInputData|undefined|null): + podviewer.proto.RunEnvironment { + return data.runEnvironment; + }, + _computeMaxStepId(podStatsMaps: Array): number { + return podStatsMaps.length - 1; + }, + _computeErrorMessage(maxStepId: number): string { + if (maxStepId >= 0) return ''; + return "WARNING: No step time measured. " + + "This might happen if your profile duration is too short, " + + "try increase profile duration to cover a full step. " + + "If you have an inference job or not use TpuEstimator, " + + "please skip this tool."; + }, + /** + * Calculate the lowFlopsComputeUs by deducting all other breakdown from the + * total duration. + */ + _populateLowFlopsCompute(podStatsMap: podviewer.proto.PodStatsMap|undefined, + layers: Array): + podviewer.proto.PodStatsMap { + if (!podStatsMap || !layers) return; + let podStatsPerCore = podStatsMap.podStatsPerCore; + for (let coreId in podStatsPerCore) { + let val = podStatsPerCore[coreId]; + if (val.hasOwnProperty('lowFlopsComputeUs')) { + // already populated. + return podStatsMap; + } + val['lowFlopsComputeUs'] = val.totalDurationUs; + for (let j = 0; j < layers.length; j++) { + if (j == 1) { + continue; + } + // Skip the lowFlopsComputeUs. + val['lowFlopsComputeUs'] -= val[layers[j].key]; + } + } + return podStatsMap; + }, + _computePodStatsMap(podStatsMaps: Array, + curStepId: number, + layers: Array): + podviewer.proto.PodStatsMap { + if (curStepId < 0 || curStepId >= podStatsMaps.length || !layers) return; + return this._populateLowFlopsCompute(podStatsMaps[curStepId], layers); + }, + _computeStepStats(podStatsMap: podviewer.proto.PodStatsMap): + Array|undefined { + if (!podStatsMap || !podStatsMap.podStatsPerCore) return; + const obj = podStatsMap.podStatsPerCore; + return Object.keys(obj).map((key) => obj[key]) + .sort((a, b) => a.chipId - b.chipId); + }, + _computeChannelDb(podStatsMap: podviewer.proto.PodStatsMap): + Array|undefined { + if (!podStatsMap || !podStatsMap.channelDb + || podStatsMap.channelDb.length <= 0) { + return; + } + return podStatsMap.channelDb.slice() + .sort((a, b) => b.durationUs - a.durationUs); + }, + _computeAllReduceDb(podStatsMap: podviewer.proto.PodStatsMap): + Array|undefined { + if (!podStatsMap || !podStatsMap.allReduceOpDb + || podStatsMap.allReduceOpDb.length <= 0) { + return; + } + return podStatsMap.allReduceOpDb.slice() + .sort((a, b) => b.durationUs - a.durationUs); + }, + _dataChanged(newData: podviewer.proto.PodViewerInputData) { + if (!newData) return; + this.curStepId = 0; + }, + /** + * Updates the input of the details card when selected channel changed. + */ + _selectedChannelChanged(newChannel: Array) { + if (newChannel) { + this.activeDetails = newChannel; + } + }, + /** + * The active bar could be one of the PodStatsRecord, ChannelInfo or + * AllReduceOpInfo. Reuse the details_card component to show any of these + * details. + */ + _activeBarChanged(newBar: any) { + if (newBar) { + this.activeDetails = [newBar]; + } + }, + /** + * Returns the step number of the current step. + */ + _getStepNum(podStatsMap: podviewer.proto.PodStatsMap): number { + return podStatsMap ? podStatsMap.stepNum : 0; + }, +}); + +} // namespace pod_viewer_dashboard diff --git a/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD new file mode 100644 index 0000000000..24054c1e18 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/BUILD @@ -0,0 +1,19 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "stack_bar_chart", + srcs = [ + "stack-bar-chart.html", + "stack-bar-chart.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:d3", + "//tensorboard/components/tf_imports:polymer", + "//tensorboard/plugins/profile/pod_viewer/pod_viewer_common", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html new file mode 100644 index 0000000000..50c05798d5 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.html @@ -0,0 +1,56 @@ + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts new file mode 100644 index 0000000000..421667ffdf --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/stack_bar_chart/stack-bar-chart.ts @@ -0,0 +1,188 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_stack_bar_chart { + +const BAR_WIDTH = 50; +const SVG_HEIGHT = 300; +const SVG_MIN_WIDTH = 1600; +const SVG_MARGIN = {top: 20, right: 20, bottom: 30, left: 100}; + +/** constants for legends */ +const LEGEND_WIDTH = 150; +const LEGEND_HEIGHT = 30; +const ICON_SIZE = 19; +const LABELS_PER_LANE = 5; +const LEGEND_MARGIN = 5; +const YAXIS_TO_LEGEND = 200; +const LEGEND_TEXT_HEIGHT = 9.5; +const LEGEND_TEXT_SIZE = '0.32em'; + +const FONT_SIZE = 14; + +Polymer({ + is: 'stack-bar-chart', + properties: { + data: { + type: Array, + value: () => [], + observer: '_dataChanged', + }, + activeBar: { + type: Object, + notify: true, + }, + xDomainFunc: { + type: Object, + }, + stackLayers: { + type: Array, + value: () => [], + observer: '_onStackLayersChanged', + }, + }, + /** + * Main function to draw a stacked bar chart. + */ + stackBarChart: function(data : Array) { + if (!data.length || !this.isAttached || this.stackLayers.length == 0) { + return; + } + d3.select(this.$.chart).selectAll('g > *').remove(); + d3.select(this.$.chart).select('svg').remove(); + d3.select(this.$.chart).select('.svg-container').remove(); + const stackKey = this.stackLayers.map((d) => d.key); + const stackLabel = this.stackLayers.map((d) => d.label); + const height = SVG_HEIGHT - SVG_MARGIN.top - SVG_MARGIN.bottom; + const xScaleRange = data.length * BAR_WIDTH; + let xScale = d3.scaleBand().range([0, xScaleRange]).padding(0.4); + let yScale = d3.scaleLinear().range([height, 0]); + let colorScale = d3.scaleOrdinal(d3.schemeCategory10) + .domain([0, 19]); + let svg = d3.select(this.$.chart).append('svg') + .attr('width', Math.max(SVG_MIN_WIDTH, + xScaleRange + SVG_MARGIN.left + SVG_MARGIN.right)) + .attr('height', SVG_HEIGHT) + .append('g') + .attr('transform', + 'translate(' + SVG_MARGIN.left + ',' + SVG_MARGIN.top + ')'); + let stack = d3.stack().keys(stackKey).order(d3.stackOrderNone) + .offset(d3.stackOffsetNone); + const layers = stack(data); + xScale.domain(data.map(this.xDomainFunc)); + yScale.domain([0, d3.max(layers[layers.length - 1], (d) => d[0] + d[1])]) + .nice(); + this.drawLayers(svg, layers, xScale, yScale, colorScale); + this.drawAxes(svg, xScale, yScale, height); + this.drawLegend(svg, stackLabel, colorScale); + }, + /** + * Draw the layers for all the bars. + */ + drawLayers: function(svg: any, layers: any, xScale: any, yScale: any, + colorScale: any) { + let parent = this; + let layer = svg.selectAll('.layer').data(layers); + layer.enter().append('g').merge(layer) + .attr('class', 'layer') + .style('fill', (d, i) => colorScale(i)) + .selectAll('rect').data((d) => d) + .enter().append('rect') + .attr('width', xScale.bandwidth()) + .attr('y', (d) => yScale(d[1])) + .attr('height', (d) => yScale(d[0]) - yScale(d[1])) + .attr('x', (d, i) => xScale(parent.xDomainFunc(d.data))) + .on('mouseover', + function(d) { + d3.select(this).style('opacity', 0.5); + parent.activeBar = d.data; + }) + .on('mouseout', + function(d) { + d3.select(this).style('opacity', 1.0); + parent.activeBar = null; + }); + }, + /** + * Draw the axes of the chart. + */ + drawAxes: function(svg: any, xScale: any, yScale: any, height: number) { + svg.append('g') + .attr('class', 'axis axis--x') + .style('font-size', FONT_SIZE) + .attr('transform', 'translate(0,' + (height + 5) + ')') + .call(d3.axisBottom(xScale)); + svg.append('g') + .attr('class', 'axis axis--y') + .style('font-size', FONT_SIZE) + .attr('transform', 'translate(0,0)') + .call(d3.axisLeft(yScale)); + }, + /** + * Draw the legends of the chart. + */ + drawLegend: function(svg: any, labels: Array, colorScale: any) { + let legend = svg.append('g') + .attr('font-family', 'sans-serif') + .attr('font-size', FONT_SIZE) + .attr('text-anchor', 'start') + .selectAll('g') + .data(labels.slice()); + legend.enter().append('g').merge(legend) + .attr('transform', + (d, i) => 'translate(' + + (i * LEGEND_WIDTH - + Math.floor(i / LABELS_PER_LANE) * LEGEND_WIDTH * + LABELS_PER_LANE) + ',' + + Math.floor(i / LABELS_PER_LANE) * + LEGEND_HEIGHT + ')' + ); + + legend.append('rect') + .attr('x', YAXIS_TO_LEGEND) + .attr('width', ICON_SIZE) + .attr('height', ICON_SIZE) + .attr('fill', (d, i) => colorScale(i)); + legend.append('text') + .attr('x', YAXIS_TO_LEGEND + LEGEND_MARGIN + ICON_SIZE) + .attr('y', LEGEND_TEXT_HEIGHT) + .attr('dy', LEGEND_TEXT_SIZE) + .text((d) => d); + }, + /** + * Redraw the stack bar chart. + */ + redraw: function(data: Array) { + if (!data || data.length == 0) return; + this.stackBarChart(data); + }, + /** + * Redraws the stack bar chart when the stack elements changed. + */ + _onStackLayersChanged: + function(newData: Array) { + if (!newData || newData.length == 0) return; + this.redraw(this.data); + }, + /** + * Redraws the stack bar chart when the input data changed. + */ + _dataChanged: function(newData: Array) { + if (!newData || newData.length == 0) return; + this.redraw(newData); + }, + attached: function() { + this.redraw(this.data); + }, +}); + +} // namespace pod_viewer_stack_bar_chart diff --git a/tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD b/tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD new file mode 100644 index 0000000000..01ce82b097 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/topology_graph/BUILD @@ -0,0 +1,26 @@ +package(default_visibility = ["//tensorboard:internal"]) + +load("//tensorboard/defs:web.bzl", "tf_web_library") + +licenses(["notice"]) # Apache 2.0 + +tf_web_library( + name = "topology_graph", + srcs = [ + "topology-graph.html", + "topology-graph.ts", + ], + path = "/pod-viewer", + deps = [ + "//tensorboard/components/tf_imports:d3", + "//tensorboard/components/tf_imports:polymer", + "//tensorboard/plugins/profile/pod_viewer/pod_viewer_common", + "@org_polymer_iron_icons", + "@org_polymer_paper_icon_button", + "@org_polymer_paper_item", + "@org_polymer_paper_listbox", + "@org_polymer_paper_menu", + "@org_polymer_paper_menu_button", + "@org_polymer_paper_slider", + ], +) diff --git a/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html new file mode 100644 index 0000000000..b810433c70 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.html @@ -0,0 +1,188 @@ + + + + + + + + + + + + + + + + + + diff --git a/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts new file mode 100644 index 0000000000..10092bbe18 --- /dev/null +++ b/tensorboard/plugins/profile/pod_viewer/topology_graph/topology-graph.ts @@ -0,0 +1,575 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the 'License'); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an 'AS IS' BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +namespace pod_viewer_topology_graph { + +const MAIN_COLORS = [ + '#ffffd9', '#edf8b1', '#c7e9b4', '#7fcdbb', '#41b6c4', '#1d91c0', + '#225ea8', '#253494', '#081d58' +]; +const SVG_WIDTH = 1620; +const SVG_MARGIN = {top: 50, right: 0, bottom: 100, left: 30}; + +const CHIP_GRID_SIZE = 30; +const CHIP_TO_CHIP_MARGIN = 10; +const HOST_TO_CHIP_MARGIN = 15; +const HOST_TO_HOST_MARGIN = 10; + +const HOST_Y_STRIDE = 2; +const NODES_PER_CHIP = 2; + +interface Position { + x: number, + y: number, +}; + +/** Data to render in the node cards. */ +interface TopoData { + /** Index on x-dimension. */ + xdim: number, + /** Index on y-dimension. */ + ydim: number, + /** Node id. */ + nid: number, + /** Chip id. */ + cid: number, + /** Replica id. */ + rid: number, + /** Host name. */ + host: string, + /** Value of the selected metric. */ + value: number, + /** Step total duration. */ + total: number, +}; + +/** Data to render in the host cards. */ +interface HostData { + /** Index on x-dimension. */ + xdim: number, + /** Index on y-dimension. */ + ydim: number, +}; + +/** Links grouped by channel id. */ +interface LinkData { + [key: number]: Array +}; + +Polymer({ + is: 'topology-graph', + properties: { + data: { + type: Object, + }, + runEnvironment: { + type: Object, + }, + metrics: { + type: Array, + value: () => [], + }, + activeBar:{ + type: Object, + observer: '_activeBarChanged', + }, + selectedChannel: { + type: Array, + notify: true, + }, + selectedMetricIdx: { + type: Number, + value: 0, + }, + selectedChannelId: { + type: Number, + value: 0, + observer: '_selectedChannelIdChanged', + }, + _topoData: { + type: Object, + computed: + '_computeTopoData(data, runEnvironment, metrics, selectedMetricIdx)', + }, + _linkData: { + type: Object, + computed: '_computeLinkData(data)', + }, + _minChannelId: { + type: Number, + computed: '_computeMinChannelId(data)', + }, + _maxChannelId: { + type: Number, + value: 0, + computed: '_computeMaxChannelId(data)', + }, + _xDimension: { + type: Number, + computed: '_computeXDimension(runEnvironment)', + }, + _yDimension: { + type: Number, + computed: '_computeYDimension(runEnvironment)', + }, + _totalCoreCount: { + type: Number, + computed: '_computeTotalCoreCount(_xDimension, _yDimension)', + }, + _tpuType: { + type: String, + computed: '_computeTpuType(runEnvironment)', + }, + _hostXStride: { + type: Number, + computed: '_computeHostXStride(_tpuType)', + }, + _hostGridWidth: { + type: Number, + }, + _hostGridHeight: { + type: Number, + }, + _nodeGridHeight: { + type: Number, + }, + _nodeGridWidth: { + type: Number, + }, + _gLink: { + type: Object, + }, + }, + observers: ['drawTopology(_topoData, runEnvironment)'], + /** + * Computes the topoData to be loaded into the topology graph. + */ + _computeTopoData: function( + data: podviewer.proto.PodStatsMap|undefined, + runEnvironment: podviewer.proto.RunEnvironment|undefined, + metrics: Array, + idx: number): Array { + if (!data || !runEnvironment || !runEnvironment.topology || !metrics || + idx >= metrics.length || idx < 0) { + return; + } + const xdim = parseInt(runEnvironment.topology.xDimension, 10); + return Object.keys(data.podStatsPerCore).map((core) => { + const podStats = data.podStatsPerCore[core]; + return { + xdim: podStats.chipId % xdim, + ydim: Math.floor(podStats.chipId / xdim), + nid: podStats.nodeId, + cid: podStats.chipId, + rid: data.coreIdToReplicaIdMap[core], // replica id. + host: podStats.hostName, + value: podStats[metrics[idx].key], + total: podStats.totalDurationUs, + }; + }); + }, + /** + * Compute the data to be rendered as links. + */ + _computeLinkData: function( + data: podviewer.proto.PodStatsMap): LinkData { + if (!data || !data.channelDb || data.channelDb.length == 0) return {}; + let links = {}; + data.channelDb.forEach(function(channel) { + if (!links[channel.channelId]) { + links[channel.channelId] = [channel]; + } else { + links[channel.channelId].push(channel); + } + }); + return links; + }, + /** Compute the min channel id.*/ + _computeMinChannelId: function( + data: podviewer.proto.PodStatsMap): number { + if (!data || !data.channelDb || data.channelDb.length == 0) { + return; + } + return data.channelDb.reduce( + (min, p) => Math.min(min, p.channelId), data.channelDb[0].channelId); + }, + /** Compute the max channel id.*/ + _computeMaxChannelId: function( + data: podviewer.proto.PodStatsMap): number { + if (!data || !data.channelDb || data.channelDb.length == 0) { + return; + } + return data.channelDb.reduce( + (max, p) => Math.max(max, p.channelId), data.channelDb[0].channelId); + }, + _computeTpuType: function(env: podviewer.proto.RunEnvironment): string { + if (!env) return; + return env.tpuType; + }, + _computeXDimension: function(env: podviewer.proto.RunEnvironment): number { + if (!env || !env.topology) return; + return parseInt(env.topology.xDimension, 10); + }, + _computeYDimension: function(env: podviewer.proto.RunEnvironment): number { + if (!env || !env.topology) return; + return parseInt(env.topology.yDimension, 10); + }, + _computeTotalCoreCount: function(xdim: number, ydim: number): number { + return xdim * ydim * NODES_PER_CHIP; + }, + _computeHostXStride: function(tpuType: string): number { + return tpuType == 'TPU v3' ? 4 : 2; + }, + /** + * Main function to draw topology graph based on TPU topology. + */ + topologyGraph: function(data: Array) { + d3.select(this.$.tpgraph).selectAll('g > *').remove(); + d3.select(this.$.tpgraph).select('svg').remove(); + d3.select(this.$.tpgraph).select('.svg-container').remove(); + this._hostGridWidth = this.getHostGridSize(this._hostXStride); + this._hostGridHeight = this.getHostGridSize(HOST_Y_STRIDE); + this._nodeGridWidth = CHIP_GRID_SIZE / NODES_PER_CHIP; + this._nodeGridHeight = CHIP_GRID_SIZE; + const hostXDim = this._xDimension / this._hostXStride; + const hostYDim = this._yDimension / HOST_Y_STRIDE; + const colorScale = + d3.scaleQuantile().domain([0, 1.0]).range(MAIN_COLORS); + const chipXDims = Array.from(Array(this._xDimension).keys()); + const chipYDims = Array.from(Array(this._yDimension).keys()); + let svg = d3.select(this.$.tpgraph) + .append('svg') + .attr('width', SVG_WIDTH) + .attr('height', hostYDim * this._hostGridHeight + + SVG_MARGIN.bottom + SVG_MARGIN.top) + .append('g') + .attr('transform', + 'translate(' + SVG_MARGIN.left + ',' + SVG_MARGIN.top + ')'); + const hostData = this.createHostData(hostXDim, hostYDim); + this.drawHostCards( + svg, hostData, this._hostGridWidth, this._hostGridHeight); + this.drawNodeCards(svg, data, colorScale); + + // Creates separate groups, so that the z-index remains in the right order. + this._gLink = svg.append('svg:g').classed('link', true); + + // Add a svg:defs for the arrow head. + svg.append('svg:defs').append('svg:marker') + .attr('id', 'arrow') + .attr('viewBox', '0 -5 10 10') + .attr('markerWidth', 5) + .attr('markerHeight', 5) + .attr('orient', 'auto') + .append('svg:path') + .style('stroke', 'red') + .style('fill', 'red') + .attr('d', 'M0,-5L10,0L0,5'); + this.drawLabels(svg, chipXDims, chipYDims); + const legendYLoc = + this._hostGridHeight * Math.ceil(this._yDimension / HOST_Y_STRIDE) + + HOST_TO_HOST_MARGIN; + this.drawLegend(svg, legendYLoc, colorScale); + }, + /** + * Returns the size of host grid, including the host card size and the margin + * between two hosts. + */ + getHostGridSize(stride: number): number { + return HOST_TO_CHIP_MARGIN * 2 + CHIP_TO_CHIP_MARGIN * (stride - 1) + + CHIP_GRID_SIZE * stride + HOST_TO_HOST_MARGIN; + }, + /** + * Returns the x-axis location for the xChip'th chip of the xHost'th host. + */ + getChipXLoc: function(xHost: number, xChip: number): number { + return xHost * this._hostGridWidth + HOST_TO_CHIP_MARGIN + + xChip * (CHIP_GRID_SIZE + CHIP_TO_CHIP_MARGIN); + }, + /** + * Returns the y-axis location for the yChip'th chip of the yHost'th host. + */ + getChipYLoc: function(yHost: number, yChip: number): number { + return yHost * this._hostGridHeight + HOST_TO_CHIP_MARGIN + + yChip * (CHIP_GRID_SIZE + CHIP_TO_CHIP_MARGIN); + }, + /** + * Returns the x-axis location for the xNode'th node of the xChip'th chip of + * the xHost'th host. + */ + getNodeXLoc: function(xHost: number, xChip: number, xNode: number): number { + return this.getChipXLoc(xHost, xChip) + xNode * this._nodeGridWidth; + }, + /** + * Returns the location for each host in the system. + */ + createHostData: function( + hostXDim: number, hostYDim: number): Array { + let hostData = []; + for (let i = 0; i < hostXDim; i++) { + for (let j = 0; j < hostYDim; j++) { + hostData.push({xdim: i, ydim: j}); + } + } + return hostData; + }, + /** + * Draw the labels on x-axis and y-axis. + */ + drawLabels: function(svg: any, xdims: number[], ydims: number[]) { + // Draw label on x axis. + let xLabel = svg.selectAll('.xLabel').data(xdims); + xLabel.enter().append('text').merge(xLabel) + .text((d) => d) + .attr('x', (d, i) => this.getChipXLoc( + Math.floor(i / this._hostXStride), + i % this._hostXStride)) + .attr('y', 0) + .style('text-anchor', 'middle') + .attr('transform', 'translate(' + CHIP_GRID_SIZE / 2 + ', -6)') + .attr('class', 'axis'); + + // Draw label on y axis. + let yLabel = svg.selectAll('.yLabel').data(ydims); + yLabel.enter().append('text').merge(yLabel) + .text((d) => d) + .attr('x', 0) + .attr('y', (d, i) => this.getChipYLoc( + Math.floor(i / HOST_Y_STRIDE), i % HOST_Y_STRIDE)) + .style('text-anchor', 'middle') + .attr('transform', 'translate(-12,' + CHIP_GRID_SIZE / 2 + ')') + .attr('class', 'axis'); + }, + /** + * Draw the UI of host cards. + */ + drawHostCards: function(svg, data, gridWidth: number, gridHeight: number) { + let cards = svg.selectAll('.xdim').data(data, (d) => d.xdim); + cards.enter().append('rect').merge(cards) + .attr('x', (d) => d.xdim * gridWidth) + .attr('y', (d) => d.ydim * gridHeight) + .attr('rx', 4 * gridWidth / gridHeight) + .attr('ry', 4) + .attr('class', 'hour bordered') + .attr('width', gridWidth - HOST_TO_HOST_MARGIN) + .attr('height', gridHeight - HOST_TO_HOST_MARGIN) + .attr('border', 1) + .style('fill', 'F0F0F0') + .style('stroke', 'black') + .style('stroke-width', 1) + .transition() + .duration(1000); + cards.exit().remove(); + }, + /** + * Draw the UI of node cards. + */ + drawNodeCards: function(svg: any, data: Array, colorScale: any) { + let cards = svg.selectAll('.xdim').data(data, (d) => d.xdim); + let parent = this; + cards.enter().append('rect').merge(cards) + .attr('id', (d) => 'rid' + d.rid) + .attr('x', (d) => { + return this.getNodeXLoc( + Math.floor(d.xdim / this._hostXStride), + d.xdim % this._hostXStride, d.nid); + }) + .attr('y', (d) => { + return this.getChipYLoc( + Math.floor(d.ydim / HOST_Y_STRIDE), + d.ydim % HOST_Y_STRIDE); + }) + .attr('rx', 4 / NODES_PER_CHIP) + .attr('ry', 4) + .attr('class', 'hour bordered') + .attr('width', this._nodeGridWidth) + .attr('height', this._nodeGridHeight) + .attr('border', 1) + .style('stroke', 'black') + .style('stroke-width', 1) + .style('fill', (d) => colorScale(d.value / d.total)) + .on('mouseover', function(d) { + // highlight text + d3.select(this).classed('cell-hover', true).style('opacity', 0.5); + + // Update the tooltip position and value + d3.select(parent.$.tooltip) + .style('left', d3.event.pageX + 10 + 'px') + .style('top', d3.event.pageY - 10 + 'px') + .select('#value') + .text(parent._getToolTipText(d)); + d3.select(parent.$.tooltip).classed('hidden', false); + }) + .on('mouseout', function() { + d3.select(this).classed('cell-hover', false).style('opacity', 1.0); + d3.select(parent.$.tooltip).classed('hidden', true); + }); + cards.exit().remove(); + }, + /** + * Draw the UI of chip to chip links. + */ + drawLinks: function(linkData: Array) { + if (!linkData || linkData.length == 0 || !this._gLink) { + return; + } + + // Handle links; + let links = this._gLink.selectAll('.link').data(linkData); + + // attach the arrow from defs + links.enter().append('svg:path').merge(links) + .attr('id', (d) => 'cid' + d.channelId) + .attr('stroke-width', 2) + .attr('stroke', 'red') + .attr('fill', 'none') + .attr('marker-end', 'url(#arrow)') + .style('visibility', 'hidden') + .attr('d', (d) => this.linkToPath(d)); + + // Handle deleted links. + links.exit().remove(); + this._selectedChannelIdChanged(this.selectedChannelId); + }, + /** + * Given the global core id, returns the (x, y) coordinates in the topology + * graph. + */ + coreIdToPos: function(id: number): Position { + const chipId = Math.floor(id / 2); + const nodeId = id & 1; + const xDim = chipId % this._xDimension; + const yDim = Math.floor(chipId / this._xDimension); + const x = CHIP_GRID_SIZE / NODES_PER_CHIP / 2 + + this.getNodeXLoc( + Math.floor(xDim / this._hostXStride), + xDim % this._hostXStride, nodeId); + const y = this.getChipYLoc( + Math.floor(yDim / HOST_Y_STRIDE), + yDim % HOST_Y_STRIDE) + + CHIP_GRID_SIZE / 2; + return {x: x, y: y}; + }, + /** + * Returns the svg path given the src and dst core and node id. + * @return Path in svg format. + */ + linkToPath: function(link: podviewer.proto.ChannelInfo): string { + const src = this.coreIdToPos(link.srcCoreId); + const dst = this.coreIdToPos(link.dstCoreId); + const path = 'M ' + src.x + ' ' + src.y + 'L ' + dst.x + ' ' + dst.y; + return path; + }, + /** + * Returns the text to visualize in the tool tips. + * @return String to render in tool tips. + */ + _getToolTipText: function(data: TopoData): string { + const label = this.selectedMetricIdx >= 0 ? + this.metrics[this.selectedMetricIdx].label : ''; + const nf = new Intl.NumberFormat(navigator.language, + {style: 'percent', minimumFractionDigits: 2}); + + const res = `pos: (${data.ydim}, ${data.xdim}), + host: ${data.host}, + chip id: ${data.cid}, + core id: ${data.nid}, + replica id: ${data.rid} + ${label ? `${label} spends ${data.value.toFixed(2)}µs in total, + taking ${nf.format(data.value / data.total)} of a step.` : ''}` + return res; + }, + /** + * Draw the legend of the graph. + */ + drawLegend: function(svg: any, height: number, colorScale: any) { + const legendElementWidth = CHIP_GRID_SIZE * 2; + let legend = svg.selectAll('.legend').data( + [0].concat(colorScale.quantiles()), (d) => d); + let legendG = legend.enter().append('g').merge(legend) + .attr('class', 'legend'); + legendG.append('rect') + .attr('x', (d, i) => legendElementWidth * i) + .attr('y', height) + .attr('width', legendElementWidth) + .attr('height', CHIP_GRID_SIZE) + .style('fill', (d, i) => MAIN_COLORS[i]); + legendG.append('text') + .text((d) => '\u2265 0.' + Math.round(d * 10)) + .attr('x', (d, i) => legendElementWidth * i) + .attr('y', height + CHIP_GRID_SIZE * 2); + legend.exit().remove(); + }, + /** + * Redraws the graph when the data to be rendered changed. + */ + drawTopology: function( + topoData: Array, + runEnvironment: podviewer.proto.RunEnvironment) { + if (!topoData || !runEnvironment || !this.isAttached) { + return; + } + this.topologyGraph(topoData); + this.drawLinks(this.data.channelDb); + }, + attached: function() { + this.drawTopology(this._topoData, this.runEnvironment); + }, + /** + * Updates the visible links when the selectedChannelIdChanged. + */ + _selectedChannelIdChanged: function(newData: number, oldData: number) { + if (!this._linkData) return; + if (this._linkData[oldData]) { + d3.select(this.$.tpgraph) + .selectAll('#cid' + oldData).style('visibility', 'hidden'); + } + if (this._linkData[newData]) { + d3.select(this.$.tpgraph) + .selectAll('#cid' + newData).style('visibility', 'visible'); + this.selectedChannel = this._linkData[newData]; + } + }, + /** + * Updates the topology color coding or selected channel id when the + * activeBar changed. + */ + _activeBarChanged: function(newData) { + const colorScale = d3.scaleOrdinal(d3.schemeCategory10) + .domain(d3.range(0, 19)); + if (!newData) return; + if (newData.replicaGroups && newData.replicaGroups.length > 0) { + // Colors the nodes within the same replica group to the same color. + for (let i = 0; i < newData.replicaGroups.length; i++) { + const group = newData.replicaGroups[i].replicaIds; + for (let j = 0; j < group.length; j++) { + d3.select(this.$.tpgraph).selectAll('#rid' + group[j]) + .style('fill', colorScale(i % 20)); + } + } + this.selectedMetricIdx = -1; + } else if (newData.channelId) { + this.selectedChannelId = newData.channelId; + } + }, + /** + * Returns a label for the current metric selection. + */ + _getSelectedMetricLabel: function( + metrics: Array, idx:number): string { + if (idx < 0 || !metrics || idx > metrics.length) { + return 'Please select a metric'; + } + return 'Color: ' + metrics[idx].label; + }, +}); + +} // namespace pod_viewer_topology_graph diff --git a/tensorboard/plugins/profile/profile_demo.pod_viewer.json b/tensorboard/plugins/profile/profile_demo.pod_viewer.json new file mode 100644 index 0000000000..4b90bd081b --- /dev/null +++ b/tensorboard/plugins/profile/profile_demo.pod_viewer.json @@ -0,0 +1 @@ +{"hloInfoMap":{},"podStatsSequence":{"podStatsMap":[{"allReduceOpDb":[{"dataSize":"30661632","durationUs":10811.507589281251,"name":"all-reduce.2","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":2521.165580375,"name":"all-reduce.1","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":27.263195568548387,"name":"all-reduce","occurrences":992,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"12356608","durationUs":463.57660712500001,"name":"all-reduce.6","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":375.51763399999999,"name":"all-reduce.5","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":374.02147321874997,"name":"all-reduce.4","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"31460352","durationUs":203.90008928125002,"name":"all-reduce.3","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"28456960","durationUs":178.54026781249996,"name":"all-reduce.7","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]}],"channelDb":[{"channelId":"16436","dataSize":"2764800","dstCoreId":7,"durationUs":34.014285000000001,"hloNames":["send.42","recv-done.52"],"occurrences":1,"replicaId":0,"sendDelayUs":0,"srcCoreId":14,"utilization":0},{"channelId":"17643","dataSize":"20480","dstCoreId":13,"durationUs":33.602857,"hloNames":["recv-done.100","send.83"],"occurrences":1,"replicaId":0,"sendDelayUs":73.25,"srcCoreId":20,"utilization":0},{"channelId":"17764","dataSize":"20480","dstCoreId":13,"durationUs":17.538571999999998,"hloNames":["recv-done.108","send.91"],"occurrences":1,"replicaId":0,"sendDelayUs":0,"srcCoreId":20,"utilization":0},{"channelId":"18733","dataSize":"1388544","dstCoreId":13,"durationUs":276.38428599999997,"hloNames":["send.97","recv-done.115"],"occurrences":1,"replicaId":0,"sendDelayUs":0,"srcCoreId":12,"utilization":0}],"coreIdToReplicaIdMap":{"1":0,"3":0,"4":0,"6":0,"7":0,"9":0,"10":0,"12":0,"101":0,"103":0,"104":0,"106":0,"107":0,"109":0,"110":0,"112":0,"201":0,"203":0,"204":0,"206":0,"207":0,"209":0,"210":0,"212":0,"301":0,"303":0,"304":0,"306":0,"307":0,"309":0,"310":0,"312":0},"podStatsPerCore":{"1":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":13662.288571999999,"highFlopsComputeUs":207443.10142299999,"hostInfeedDurationUs":1716.472857,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":39666.888588000002,"sendDurationUs":17942.088581,"stepNum":221,"totalDurationUs":397539.37142899999},"3":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":12525.011429,"highFlopsComputeUs":207267.62999799999,"hostInfeedDurationUs":1720.9542859999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42163.548598000001,"sendDurationUs":17411.900007,"stepNum":221,"totalDurationUs":397533.00428599998},"4":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":13641.975716999999,"highFlopsComputeUs":207397.042862,"hostInfeedDurationUs":1714.3714279999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40267.061457000003,"sendDurationUs":18224.534286999999,"stepNum":221,"totalDurationUs":397529.65000000002},"6":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":14221.517142999999,"highFlopsComputeUs":207237.375715,"hostInfeedDurationUs":1720.331428,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":37693.192874,"sendDurationUs":17266.478574000001,"stepNum":221,"totalDurationUs":397522.37714300002},"7":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":28291.208572,"highFlopsComputeUs":209338.76857799999,"hostInfeedDurationUs":1722.7785710000001,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":23558.318590999999,"sendDurationUs":16176.947163999999,"stepNum":221,"totalDurationUs":397523.881429},"9":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":27233.165712999999,"highFlopsComputeUs":209416.73571499999,"hostInfeedDurationUs":1715.047143,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":22646.498597999998,"sendDurationUs":15508.522876999999,"stepNum":221,"totalDurationUs":397536.05571400002},"10":{"bottleneck":"All-Reduce","chipId":0,"crsDurationUs":47407.805716000003,"highFlopsComputeUs":214328.448569,"hostInfeedDurationUs":1722.491428,"hostName":"njsw1:14059","hostOutfeedDurationUs":0.33428600000000003,"nodeId":0,"recvDurationUs":31073.378583999998,"sendDurationUs":9243.6159069999994,"stepNum":221,"totalDurationUs":397533.54999999999},"12":{"bottleneck":"Send and Recv","chipId":0,"crsDurationUs":29543.902859000002,"highFlopsComputeUs":209375.63143499999,"hostInfeedDurationUs":1723.292858,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":22063.067186,"sendDurationUs":15378.035719,"stepNum":221,"totalDurationUs":397530.62428599998},"101":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":15312.845713999999,"highFlopsComputeUs":206435.22714500001,"hostInfeedDurationUs":1721.767143,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37267.852864,"sendDurationUs":21720.582865,"stepNum":221,"totalDurationUs":397528.37142899999},"103":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":11385.511428,"highFlopsComputeUs":206367.96714200001,"hostInfeedDurationUs":1721.1814280000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34349.292868999997,"sendDurationUs":23999.150002999999,"stepNum":221,"totalDurationUs":397521.98857099999},"104":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":21573.184282999999,"highFlopsComputeUs":206408.11714700001,"hostInfeedDurationUs":1716.2114280000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":35173.970019,"sendDurationUs":22466.791434999999,"stepNum":221,"totalDurationUs":397536.74428599997},"106":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":22866.147140000001,"highFlopsComputeUs":206441.69428900001,"hostInfeedDurationUs":1721.6771429999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34358.401440000001,"sendDurationUs":21782.570012,"stepNum":221,"totalDurationUs":397516.06857100001},"107":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":6976.4671449999996,"highFlopsComputeUs":206484.33286200001,"hostInfeedDurationUs":1722.4171429999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":44825.472866999997,"sendDurationUs":20205.755723999999,"stepNum":221,"totalDurationUs":397528.058571},"109":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":10383.294287000001,"highFlopsComputeUs":206421.94142700001,"hostInfeedDurationUs":1721.8,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42431.608587000002,"sendDurationUs":19537.359998,"stepNum":221,"totalDurationUs":397524.17142899998},"110":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":6958.8485710000004,"highFlopsComputeUs":206801.57427899999,"hostInfeedDurationUs":1721.7628569999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37036.555727999999,"sendDurationUs":28790.634296,"stepNum":221,"totalDurationUs":397531.248571},"112":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":6315.3885739999996,"highFlopsComputeUs":206643.78142399999,"hostInfeedDurationUs":1724.3414290000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":38032.754304000002,"sendDurationUs":26248.458580999999,"stepNum":221,"totalDurationUs":397527.70714299998},"201":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11437.222857999999,"highFlopsComputeUs":207479.62857299999,"hostInfeedDurationUs":1720.052858,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":50580.821446000002,"sendDurationUs":12685.305724,"stepNum":221,"totalDurationUs":397535.06285699998},"203":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11313.951428,"highFlopsComputeUs":207375.24000200001,"hostInfeedDurationUs":1719.8828579999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":43393.255727999996,"sendDurationUs":18422.137151999999,"stepNum":221,"totalDurationUs":397527.107143},"204":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":14351.392854,"highFlopsComputeUs":207361.78142700001,"hostInfeedDurationUs":1717.3757149999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45239.260018000001,"sendDurationUs":15432.444296,"stepNum":221,"totalDurationUs":397534.654286},"206":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":18417.787139,"highFlopsComputeUs":207409.85428500001,"hostInfeedDurationUs":1720.9442859999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34324.372879000002,"sendDurationUs":19130.807158,"stepNum":221,"totalDurationUs":397530.81},"207":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":10979.084285000001,"highFlopsComputeUs":209588.235713,"hostInfeedDurationUs":1722.248572,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":41652.098591000002,"sendDurationUs":16386.584304,"stepNum":221,"totalDurationUs":397537.892857},"209":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":10668.465715,"highFlopsComputeUs":209670.76285599999,"hostInfeedDurationUs":1718.0285710000001,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33091.415739999997,"sendDurationUs":20989.570013,"stepNum":221,"totalDurationUs":397524.43571400002},"210":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":22569.698573000001,"highFlopsComputeUs":209561.282863,"hostInfeedDurationUs":1723.038571,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":29127.498575000001,"sendDurationUs":17153.287159,"stepNum":221,"totalDurationUs":397524.417143},"212":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":10958.324285999999,"highFlopsComputeUs":209709.25285700001,"hostInfeedDurationUs":1720.3199999999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33260.837164999997,"sendDurationUs":21612.090024000001,"stepNum":221,"totalDurationUs":397519.06857100001},"301":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6839.6614289999998,"highFlopsComputeUs":206388.21713999999,"hostInfeedDurationUs":1718.6714280000001,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40407.600012000003,"sendDurationUs":23857.128575999999,"stepNum":221,"totalDurationUs":397523.52000000002},"303":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6154.3114310000001,"highFlopsComputeUs":206478.51285599999,"hostInfeedDurationUs":1720.3128569999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34218.168587,"sendDurationUs":27609.502862000001,"stepNum":221,"totalDurationUs":397536.58000000002},"304":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":12866.675716,"highFlopsComputeUs":206449.47286000001,"hostInfeedDurationUs":1719.551428,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":36298.152870999998,"sendDurationUs":22423.432865999999,"stepNum":221,"totalDurationUs":397535.19857100002},"306":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":11542.1,"highFlopsComputeUs":208336.03572000001,"hostInfeedDurationUs":1717.902857,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":25607.762868000002,"sendDurationUs":30848.608572000001,"stepNum":221,"totalDurationUs":397535.19571399997},"307":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":20327.537143000001,"highFlopsComputeUs":206528.55142900001,"hostInfeedDurationUs":1721.614286,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":59447.052862999997,"sendDurationUs":14906.371434999999,"stepNum":221,"totalDurationUs":397531.28714299999},"309":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":28428.101427000001,"highFlopsComputeUs":199572.87714900001,"hostInfeedDurationUs":1722.391429,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":68760.547139000002,"sendDurationUs":8874.361433,"stepNum":221,"totalDurationUs":397530.28714299999},"310":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":9556.1885719999991,"highFlopsComputeUs":206564.66570799999,"hostInfeedDurationUs":1721.712857,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":44789.454297999997,"sendDurationUs":25511.827150000001,"stepNum":221,"totalDurationUs":397531.29857099999},"312":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":10039.360000000001,"highFlopsComputeUs":206610.517139,"hostInfeedDurationUs":1719.181429,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":40464.617155,"sendDurationUs":28622.022863999999,"stepNum":221,"totalDurationUs":397527.12571400002}},"stepNum":221},{"allReduceOpDb":[{"dataSize":"30661632","durationUs":11028.761383906251,"name":"all-reduce.2","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":2549.2764285624999,"name":"all-reduce.1","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"2048","durationUs":26.863336685483873,"name":"all-reduce","occurrences":992,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"12356608","durationUs":471.05571434375003,"name":"all-reduce.6","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":374.18040184374996,"name":"all-reduce.5","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"134217728","durationUs":373.74691959375002,"name":"all-reduce.4","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"31460352","durationUs":203.84754465624999,"name":"all-reduce.3","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]},{"dataSize":"28456960","durationUs":178.48656253125,"name":"all-reduce.7","occurrences":32,"replicaGroups":[{"replicaIds":["0"]}]}],"channelDb":[{"channelId":"20078","dataSize":"20480","dstCoreId":23,"durationUs":3.4528569999999998,"hloNames":["send.813","recv-done.160"],"occurrences":1,"replicaId":0,"sendDelayUs":121.395714,"srcCoreId":0,"utilization":0},{"channelId":"15232","dataSize":"1388544","dstCoreId":19,"durationUs":18.525713,"hloNames":["send.33","recv-done.43"],"occurrences":1,"replicaId":0,"sendDelayUs":1.752858,"srcCoreId":26,"utilization":0},{"channelId":"19161","dataSize":"20480","dstCoreId":26,"durationUs":66.484285999999997,"hloNames":["recv-done.178","send.141"],"occurrences":1,"replicaId":0,"sendDelayUs":465.205714,"srcCoreId":27,"utilization":0},{"channelId":"14533","dataSize":"33792","dstCoreId":4,"durationUs":0.78714200000000001,"hloNames":["send.190","recv-done.24"],"occurrences":1,"replicaId":0,"sendDelayUs":16.741429,"srcCoreId":0,"utilization":0}],"coreIdToReplicaIdMap":{"1":0,"3":0,"4":0,"6":0,"7":0,"9":0,"10":0,"12":0,"101":0,"103":0,"104":0,"106":0,"107":0,"109":0,"110":0,"112":0,"201":0,"203":0,"204":0,"206":0,"207":0,"209":0,"210":0,"212":0,"301":0,"303":0,"304":0,"306":0,"307":0,"309":0,"310":0,"312":0},"podStatsPerCore":{"1":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":14084.704286,"highFlopsComputeUs":207388.11713699999,"hostInfeedDurationUs":1714.6500000000001,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40092.490015000003,"sendDurationUs":17907.175722,"stepNum":222,"totalDurationUs":398214.19},"3":{"bottleneck":"Send and Recv","chipId":5,"crsDurationUs":12933.194285,"highFlopsComputeUs":207175.73999900001,"hostInfeedDurationUs":1721.297143,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42453.528593000003,"sendDurationUs":17400.428577999999,"stepNum":222,"totalDurationUs":398206.37},"4":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":14091.925713000001,"highFlopsComputeUs":207233.68285499999,"hostInfeedDurationUs":1714.9614280000001,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40773.725741000002,"sendDurationUs":18154.350004,"stepNum":222,"totalDurationUs":398215.85999999999},"6":{"bottleneck":"Send and Recv","chipId":1,"crsDurationUs":14643.508569,"highFlopsComputeUs":207201.06857100001,"hostInfeedDurationUs":1719.775715,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":37772.052881000003,"sendDurationUs":17354.907153,"stepNum":222,"totalDurationUs":398223.51428599999},"7":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":28561.428573000001,"highFlopsComputeUs":209284.70429699999,"hostInfeedDurationUs":1719.4142859999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":23662.600021999999,"sendDurationUs":16392.162879,"stepNum":222,"totalDurationUs":398222.01000000001},"9":{"bottleneck":"Send and Recv","chipId":4,"crsDurationUs":27548.952856,"highFlopsComputeUs":209323.77714300001,"hostInfeedDurationUs":1714.862858,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":23081.745739999998,"sendDurationUs":15528.412866999999,"stepNum":222,"totalDurationUs":398202.69714300003},"10":{"bottleneck":"All-Reduce","chipId":0,"crsDurationUs":47794.405713,"highFlopsComputeUs":214142.01000000001,"hostInfeedDurationUs":1725.318571,"hostName":"njsw1:14059","hostOutfeedDurationUs":0.33428600000000003,"nodeId":0,"recvDurationUs":31277.588578999999,"sendDurationUs":9243.7616180000005,"stepNum":222,"totalDurationUs":398218.81714300002},"12":{"bottleneck":"Send and Recv","chipId":0,"crsDurationUs":29989.367144,"highFlopsComputeUs":209330.50857500001,"hostInfeedDurationUs":1720.7142859999999,"hostName":"njsw1:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":22356.507182000001,"sendDurationUs":15068.790005999999,"stepNum":222,"totalDurationUs":398221.08714299998},"101":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":15768.207146000001,"highFlopsComputeUs":206462.34,"hostInfeedDurationUs":1721.5142860000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37759.097155000003,"sendDurationUs":21392.548583,"stepNum":222,"totalDurationUs":398210.69428599998},"103":{"bottleneck":"Send and Recv","chipId":6,"crsDurationUs":11855.951426,"highFlopsComputeUs":206393.64000300001,"hostInfeedDurationUs":1720.547143,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34500.818593000004,"sendDurationUs":23976.541434999999,"stepNum":222,"totalDurationUs":398226.32000000001},"104":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":21747.387143,"highFlopsComputeUs":206452.350003,"hostInfeedDurationUs":1716.8085719999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":34653.311450000001,"sendDurationUs":23122.740008000001,"stepNum":222,"totalDurationUs":398220.43571400002},"106":{"bottleneck":"Send and Recv","chipId":2,"crsDurationUs":23243.028571999999,"highFlopsComputeUs":206402.74143299999,"hostInfeedDurationUs":1719.9328579999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34642.314304,"sendDurationUs":21864.134290999998,"stepNum":222,"totalDurationUs":398221.83857099997},"107":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":7191.5542859999996,"highFlopsComputeUs":206397.13000100001,"hostInfeedDurationUs":1720.2185710000001,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45218.691448999998,"sendDurationUs":20616.661442000001,"stepNum":222,"totalDurationUs":398218.45142900001},"109":{"bottleneck":"Send and Recv","chipId":7,"crsDurationUs":10542.825712,"highFlopsComputeUs":206355.82142600001,"hostInfeedDurationUs":1721.8028569999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":42577.165732000001,"sendDurationUs":19943.402869000001,"stepNum":222,"totalDurationUs":398218.48714300001},"110":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":7101.1871419999998,"highFlopsComputeUs":206774.09428399999,"hostInfeedDurationUs":1720,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37804.437158000001,"sendDurationUs":28580.401439000001,"stepNum":222,"totalDurationUs":398218.88428599999},"112":{"bottleneck":"Send and Recv","chipId":3,"crsDurationUs":6546.7385709999999,"highFlopsComputeUs":206534.630003,"hostInfeedDurationUs":1725.5599999999999,"hostName":"njsw1:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":38827.308576000003,"sendDurationUs":25982.980004000001,"stepNum":222,"totalDurationUs":398211.53714299999},"201":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11428.060001,"highFlopsComputeUs":207484.80286,"hostInfeedDurationUs":1719.905714,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":51026.815746,"sendDurationUs":12542.751434,"stepNum":222,"totalDurationUs":398221.40857099998},"203":{"bottleneck":"Send and Recv","chipId":13,"crsDurationUs":11529.891432,"highFlopsComputeUs":207440.33285800001,"hostInfeedDurationUs":1717.9342859999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":43967.371439000002,"sendDurationUs":18111.422860999999,"stepNum":222,"totalDurationUs":398221.57285699999},"204":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":14598.571427000001,"highFlopsComputeUs":207388.28285700001,"hostInfeedDurationUs":1716.6614279999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45798.657154,"sendDurationUs":15260.657144999999,"stepNum":222,"totalDurationUs":398219.68142899999},"206":{"bottleneck":"Send and Recv","chipId":9,"crsDurationUs":18598.618567000001,"highFlopsComputeUs":207425.13143099999,"hostInfeedDurationUs":1720.98,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34483.765736000001,"sendDurationUs":19299.474289000002,"stepNum":222,"totalDurationUs":398210.20857100002},"207":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":11141.364286,"highFlopsComputeUs":209598.264287,"hostInfeedDurationUs":1725.9857139999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":42046.937171999998,"sendDurationUs":16300.778582000001,"stepNum":222,"totalDurationUs":398209.53857099998},"209":{"bottleneck":"Send and Recv","chipId":12,"crsDurationUs":10844.927143000001,"highFlopsComputeUs":209686.52570299999,"hostInfeedDurationUs":1720.198572,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33322.021460999997,"sendDurationUs":21084.285728999999,"stepNum":222,"totalDurationUs":398225.23714300001},"210":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":22723.787143000001,"highFlopsComputeUs":209638.03713800001,"hostInfeedDurationUs":1721.0342860000001,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":29179.897165999999,"sendDurationUs":17325.542878,"stepNum":222,"totalDurationUs":398215.82857100002},"212":{"bottleneck":"Send and Recv","chipId":8,"crsDurationUs":11106.59,"highFlopsComputeUs":209698.075713,"hostInfeedDurationUs":1720.4485709999999,"hostName":"njsw3:14059","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":33466.327159,"sendDurationUs":21666.767158999999,"stepNum":222,"totalDurationUs":398221.23714300001},"301":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6939.8599990000002,"highFlopsComputeUs":206484.84713899999,"hostInfeedDurationUs":1719.9400000000001,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":40927.997155999998,"sendDurationUs":24104.938582999999,"stepNum":222,"totalDurationUs":398229.75285699998},"303":{"bottleneck":"Send and Recv","chipId":14,"crsDurationUs":6207.8842869999999,"highFlopsComputeUs":206502.38143000001,"hostInfeedDurationUs":1717.6014279999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":34081.265721999996,"sendDurationUs":28601.130002000002,"stepNum":222,"totalDurationUs":398216.57428599999},"304":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":13154.048575000001,"highFlopsComputeUs":206499.264284,"hostInfeedDurationUs":1718.3771429999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":37100.294298000001,"sendDurationUs":22281.692856000001,"stepNum":222,"totalDurationUs":398219.09857099998},"306":{"bottleneck":"Send and Recv","chipId":10,"crsDurationUs":11805.220001,"highFlopsComputeUs":208345.81571600001,"hostInfeedDurationUs":1717.1671429999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":26777.952864999999,"sendDurationUs":30477.042860000001,"stepNum":222,"totalDurationUs":398213.78428600001},"307":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":20388.585715000001,"highFlopsComputeUs":206606.38714000001,"hostInfeedDurationUs":1723.625714,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":60240.227149999999,"sendDurationUs":14901.322856000001,"stepNum":222,"totalDurationUs":398215.022857},"309":{"bottleneck":"Send and Recv","chipId":15,"crsDurationUs":28490.097142999999,"highFlopsComputeUs":199656.02714399999,"hostInfeedDurationUs":1720.3228570000001,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":69555.061436000004,"sendDurationUs":8856.7071419999993,"stepNum":222,"totalDurationUs":398216.78999999998},"310":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":9660.9585690000004,"highFlopsComputeUs":206617.80999400001,"hostInfeedDurationUs":1724.1528579999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":0,"recvDurationUs":45693.587157000002,"sendDurationUs":25409.315714,"stepNum":222,"totalDurationUs":398213.79285700002},"312":{"bottleneck":"Send and Recv","chipId":11,"crsDurationUs":10124.957141000001,"highFlopsComputeUs":206628.12999799999,"hostInfeedDurationUs":1719.7085709999999,"hostName":"njsw3:14061","hostOutfeedDurationUs":0,"nodeId":1,"recvDurationUs":41098.437146999997,"sendDurationUs":28642.645713000002,"stepNum":222,"totalDurationUs":398218.39000000001}},"stepNum":222}]},"runEnvironment":{"hostCount":4, "numCoresPerReplica":32,"perCoreBatchSize":8,"replicaCount":1,"topology":{"xDimension":"4","yDimension":"4","zDimension":"0"},"tpuCoreCount":32,"tpuType":"TPU v3"}} diff --git a/tensorboard/plugins/profile/profile_demo.py b/tensorboard/plugins/profile/profile_demo.py index d28c480ef3..99b13df537 100644 --- a/tensorboard/plugins/profile/profile_demo.py +++ b/tensorboard/plugins/profile/profile_demo.py @@ -88,6 +88,9 @@ def dump_data(logdir): shutil.copyfile( 'tensorboard/plugins/profile/profile_demo.memory_viewer.json', os.path.join(run_dir, 'memory_viewer.json')) + shutil.copyfile( + 'tensorboard/plugins/profile/profile_demo.pod_viewer.json', + os.path.join(run_dir, 'pod_viewer.json')) shutil.copyfile( 'tensorboard/plugins/profile/profile_demo.google_chart_demo.json', os.path.join(run_dir, 'google_chart_demo.json')) diff --git a/tensorboard/plugins/profile/profile_plugin.py b/tensorboard/plugins/profile/profile_plugin.py index 05810fea05..05e0f09922 100644 --- a/tensorboard/plugins/profile/profile_plugin.py +++ b/tensorboard/plugins/profile/profile_plugin.py @@ -55,6 +55,7 @@ 'input_pipeline_analyzer': 'input_pipeline.json', 'overview_page': 'overview_page.json', 'memory_viewer': 'memory_viewer.json', + 'pod_viewer': 'pod_viewer.json', 'google_chart_demo': 'google_chart_demo.json', } @@ -63,6 +64,7 @@ 'op_profile', 'overview_page', 'memory_viewer', + 'pod_viewer', 'google_chart_demo',]) def process_raw_trace(raw_trace): diff --git a/tensorboard/plugins/profile/tf_profile_dashboard/BUILD b/tensorboard/plugins/profile/tf_profile_dashboard/BUILD index 5151265e79..f96b60e006 100644 --- a/tensorboard/plugins/profile/tf_profile_dashboard/BUILD +++ b/tensorboard/plugins/profile/tf_profile_dashboard/BUILD @@ -19,6 +19,7 @@ tf_web_library( "//tensorboard/plugins/profile/input_pipeline_analyzer", "//tensorboard/plugins/profile/memory_viewer/memory_viewer_dashboard", "//tensorboard/plugins/profile/overview_page", + "//tensorboard/plugins/profile/pod_viewer/pod_viewer_dashboard", "//tensorboard/plugins/profile/tf_op_profile", "//tensorboard/plugins/profile/tf_profile_common", "@org_polymer", diff --git a/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html b/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html index dc56e66e77..359f4d1d8a 100644 --- a/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html +++ b/tensorboard/plugins/profile/tf_profile_dashboard/tf-profile-dashboard.html @@ -31,6 +31,7 @@ + @@ -185,6 +186,11 @@

No profile data was found.

node="[[_activeBufferDetails]]" > +