diff --git a/frontend/app/common/constants/roofline_model_constants.ts b/frontend/app/common/constants/roofline_model_constants.ts
index 980cd03a..e943aa90 100644
--- a/frontend/app/common/constants/roofline_model_constants.ts
+++ b/frontend/app/common/constants/roofline_model_constants.ts
@@ -12,7 +12,7 @@ export const SCATTER_CHART_AXIS = {
minX: 0.00001,
maxX: 100000,
minY: 0.00001,
- maxY: 1000000,
+ maxY: 10000000,
};
/** scatter base options for roofline chart */
@@ -191,6 +191,13 @@ export const DEVICE_INFO = [
unit: 'Flop/byte',
display: false,
},
+ {
+ id: 'time_scale_multiplier',
+ label: 'Time Scale Multiplier',
+ type: 'number',
+ unit: '',
+ display: true,
+ },
];
/**
diff --git a/frontend/app/components/roofline_model/BUILD b/frontend/app/components/roofline_model/BUILD
index e01fa163..787f0f62 100644
--- a/frontend/app/components/roofline_model/BUILD
+++ b/frontend/app/components/roofline_model/BUILD
@@ -19,6 +19,10 @@ xprof_ng_module(
"@npm//@ngrx/store",
"@npm//@types/google.visualization",
"@npm//rxjs",
+ "@org_xprof//frontend/app/common/angular:angular_material_icon",
+ "@org_xprof//frontend/app/common/angular:angular_material_progress_bar",
+ "@org_xprof//frontend/app/common/angular:angular_material_slide_toggle",
+ "@org_xprof//frontend/app/common/angular:angular_material_tooltip",
"@org_xprof//frontend/app/common/classes",
"@org_xprof//frontend/app/common/constants:roofline_model_constants",
"@org_xprof//frontend/app/common/interfaces",
@@ -39,4 +43,5 @@ sass_binary(
src = "roofline_model.scss",
# stack = True,
sourcemap = False,
+ deps = ["@org_xprof//frontend/app/styles:common"],
)
diff --git a/frontend/app/components/roofline_model/roofline_model.ng.html b/frontend/app/components/roofline_model/roofline_model.ng.html
index b4737609..063d3c29 100644
--- a/frontend/app/components/roofline_model/roofline_model.ng.html
+++ b/frontend/app/components/roofline_model/roofline_model.ng.html
@@ -1,4 +1,4 @@
-
+
Device Information
@@ -10,6 +10,19 @@
Device Information
{{info.label}}: {{deviceInfoText(info)}}
+
+
+
+ Apply Dvfs Scaling Factor
+
+ info
+
+
+
+
@@ -30,9 +43,13 @@
Section1: Program-Level Analysis
(6) "Average" shows the average step information by aggregating the operations in the complete steps only.
-
+
+
+
Loading analysis...
+
+
@@ -51,7 +68,11 @@
Section2: Operation-Level Analysis
(7) Ops with zero FLOP (e.g., data formatting ops like reshape, IDLE, etc.) do not show up in the roofline chart.
-
+
+
+
Loading analysis...
+
+
diff --git a/frontend/app/components/roofline_model/roofline_model.scss b/frontend/app/components/roofline_model/roofline_model.scss
index da57955c..c1b8b565 100644
--- a/frontend/app/components/roofline_model/roofline_model.scss
+++ b/frontend/app/components/roofline_model/roofline_model.scss
@@ -1,3 +1,9 @@
+@import 'frontend/app/styles/common';
+
+:host {
+ display: block;
+}
+
.section-container {
margin: 20px 20px 0px;
}
@@ -37,3 +43,21 @@
background-color: #ffcccb;
color: red;
}
+
+.tooltip-icon {
+ transform: scale(0.8);
+}
+
+.control-title {
+ display: flex;
+ flex-direction: row;
+ align-items: center;
+}
+
+.control {
+ display: flex;
+ flex-direction: row;
+ align-items: center;
+ justify-content: left;
+ gap: 10px;
+}
diff --git a/frontend/app/components/roofline_model/roofline_model.ts b/frontend/app/components/roofline_model/roofline_model.ts
index 896594d0..94128571 100644
--- a/frontend/app/components/roofline_model/roofline_model.ts
+++ b/frontend/app/components/roofline_model/roofline_model.ts
@@ -8,7 +8,7 @@ import {getGigaflopsReadableString, setLoadingState} from 'org_xprof/frontend/ap
import {DATA_SERVICE_INTERFACE_TOKEN, DataServiceV2Interface} from 'org_xprof/frontend/app/services/data_service_v2/data_service_v2_interface';
import {setCurrentToolStateAction} from 'org_xprof/frontend/app/store/actions';
import {ReplaySubject} from 'rxjs';
-import {takeUntil} from 'rxjs/operators';
+import {take, takeUntil} from 'rxjs/operators';
import {OperationLevelAnalysis} from './operation_level_analysis/operation_level_analysis';
import {ProgramLevelAnalysis} from './program_level_analysis/program_level_analysis';
@@ -27,6 +27,7 @@ declare interface DeviceIndicators {
hasCmem: boolean;
hasMegacore: boolean;
isGpu: boolean;
+ timeScaleMultiplier: number;
}
type ColumnIdxArr = Array;
@@ -61,6 +62,9 @@ export class RooflineModel implements OnDestroy {
@ViewChild('opLevelAnalysis') opLevelAnalysis?: OperationLevelAnalysis;
host = '';
+ applyScalingFactor = false;
+ loadingAnalysis = false;
+
// Device Information section data
deviceInfoArray: DeviceInfoData[] = [];
// Some critical indicators
@@ -69,6 +73,7 @@ export class RooflineModel implements OnDestroy {
hasCmem: false,
hasMegacore: false,
isGpu: false,
+ timeScaleMultiplier: 1.0,
};
// dataTableRaw from the raw roofline model data
@@ -178,6 +183,20 @@ export class RooflineModel implements OnDestroy {
});
}
+ updateAnalysis() {
+ this.loadingAnalysis = true;
+ const params = new Map();
+ if (this.applyScalingFactor) {
+ params.set('apply_time_scale_multiplier', this.applyScalingFactor);
+ }
+ this.dataService.getData(this.sessionId, this.tool, this.host, params)
+ .pipe(take(1))
+ .subscribe((data) => {
+ this.parseData(data as RooflineModelData[]);
+ this.loadingAnalysis = false;
+ });
+ }
+
parseData(data?: RooflineModelData[]) {
if (!google?.visualization) {
console.log('gviz lib is not loaded yet.');
@@ -203,6 +222,11 @@ export class RooflineModel implements OnDestroy {
this.processScatterDataOp();
}
+ hasValidTimeScaleMultiplier(): boolean {
+ return this.deviceIndicators.timeScaleMultiplier > 0 &&
+ this.deviceIndicators.timeScaleMultiplier !== 1;
+ }
+
/** parse the device information from the original dataset */
parseDeviceInfoData(dataTableRaw: google.visualization.DataTable) {
this.deviceIndicators = {
@@ -211,6 +235,8 @@ export class RooflineModel implements OnDestroy {
hasMegacore: !!Number(dataTableRaw.getTableProperty('megacore')),
isGpu: dataTableRaw.getTableProperty('device_type')
.startsWith(NVIDIA_GPU_TYPE_PREFIX),
+ timeScaleMultiplier:
+ Number(dataTableRaw.getTableProperty('time_scale_multiplier')) || 1,
};
this.deviceInfoArray = DEVICE_INFO.reduce(
@@ -250,13 +276,25 @@ export class RooflineModel implements OnDestroy {
curInfo.context +=
'(if yes, the analysis assumes Megacore where an HLO runs on both TensorCores utilizing the full chip\'s resources so that the rooflines are twice higher)';
curInfo.value = this.deviceIndicators.hasMegacore ? 'Yes' : 'No';
+ } else if (
+ cur.id === 'time_scale_multiplier' &&
+ !this.hasValidTimeScaleMultiplier()) {
+ curInfo.display = false;
}
}
- const value = this.dataTableRaw!.getTableProperty(cur.id);
+ let value = this.dataTableRaw!.getTableProperty(cur.id);
+ value = cur.type === 'number' ? Number(value) : value;
+ if ([
+ 'peak_flop_rate', 'peak_vmem_read_bw', 'peak_vmem_write_bw'
+ ].includes(cur.id)) {
+ curInfo.value = this.applyScalingFactor ?
+ (value * this.deviceIndicators.timeScaleMultiplier).toFixed(2) :
+ value;
+ }
acc.push({
// convert numeric value to numbers, as some ridge numbers will be
// used as axis values in chart
- value: cur.type === 'number' ? Number(value) : value,
+ value,
// put cur at last to overwrite with preprocessed data
...curInfo,
});
@@ -1185,6 +1223,11 @@ export class RooflineModel implements OnDestroy {
}
}
+ toggleScalingFactor() {
+ this.applyScalingFactor = !this.applyScalingFactor;
+ this.updateAnalysis();
+ }
+
ngOnDestroy() {
setLoadingState(false, this.store);
this.destroyed.next();
diff --git a/frontend/app/components/roofline_model/roofline_model_module.ts b/frontend/app/components/roofline_model/roofline_model_module.ts
index f4444fef..b15a6d33 100644
--- a/frontend/app/components/roofline_model/roofline_model_module.ts
+++ b/frontend/app/components/roofline_model/roofline_model_module.ts
@@ -1,5 +1,9 @@
import {CommonModule} from '@angular/common';
import {NgModule} from '@angular/core';
+import {MatIconModule} from '@angular/material/icon';
+import {MatProgressBarModule} from '@angular/material/progress-bar';
+import {MatSlideToggleModule} from '@angular/material/slide-toggle';
+import {MatTooltipModule} from '@angular/material/tooltip';
import {TableModule} from 'org_xprof/frontend/app/components/chart/table/table_module';
import {CategoryFilterModule} from 'org_xprof/frontend/app/components/controls/category_filter/category_filter_module';
import {ExportAsCsvModule} from 'org_xprof/frontend/app/components/controls/export_as_csv/export_as_csv_module';
@@ -20,6 +24,10 @@ import {RooflineModel} from './roofline_model';
StringFilterModule,
ProgramLevelAnalysisModule,
OperationLevelAnalysisModule,
+ MatTooltipModule,
+ MatSlideToggleModule,
+ MatIconModule,
+ MatProgressBarModule,
],
exports: [RooflineModel],
})
diff --git a/frontend/app/services/data_service_v2/data_service_v2.ts b/frontend/app/services/data_service_v2/data_service_v2.ts
index 28210da6..ec7ee0eb 100644
--- a/frontend/app/services/data_service_v2/data_service_v2.ts
+++ b/frontend/app/services/data_service_v2/data_service_v2.ts
@@ -103,7 +103,7 @@ export class DataServiceV2 implements DataServiceV2Interface {
private getHTTPParamsForDataQuery(
run: string, tag: string, host: string,
- parameters: Map = new Map()): HttpParams {
+ parameters: Map = new Map()): HttpParams {
// Update searchparams with the updated run, tag and host.
// In a Single Page App, we need to update the searchparams with the updated
// run, tag and host on tool change for consistency.
@@ -130,7 +130,7 @@ export class DataServiceV2 implements DataServiceV2Interface {
getData(
sessionId: string, tool: string, host: string,
- parameters: Map = new Map()):
+ parameters: Map = new Map()):
Observable {
const params =
this.getHTTPParamsForDataQuery(sessionId, tool, host, parameters);
diff --git a/frontend/app/services/data_service_v2/data_service_v2_interface.ts b/frontend/app/services/data_service_v2/data_service_v2_interface.ts
index 6e582f58..de8217c9 100644
--- a/frontend/app/services/data_service_v2/data_service_v2_interface.ts
+++ b/frontend/app/services/data_service_v2/data_service_v2_interface.ts
@@ -25,7 +25,7 @@ export interface DataServiceV2Interface {
sessionId: string,
tool: string,
host?: string,
- parameters?: Map,
+ parameters?: Map,
ignoreError?: boolean,
): Observable;
diff --git a/plugin/xprof/protobuf/roofline_model.proto b/plugin/xprof/protobuf/roofline_model.proto
index d9b10f28..c74d6650 100644
--- a/plugin/xprof/protobuf/roofline_model.proto
+++ b/plugin/xprof/protobuf/roofline_model.proto
@@ -26,6 +26,7 @@ enum RecordType {
}
// A database of RooflineModel records.
+// Next ID: 17
message RooflineModelDatabase {
// The device type.
optional string device_type = 1;
@@ -63,11 +64,16 @@ message RooflineModelDatabase {
// Error and warning messages for diagnosing profiling issues.
optional tensorflow.profiler.Diagnostics diagnostics = 7;
+ // The weighted average time scale multiplier over all device op metrics.
+ // It is calculated by the total time of each op if running on default pstate
+ // divided by actual total op time.
+ optional double time_scale_multiplier = 16;
+
reserved 3, 4, 6;
}
// There is one RooflineModelRecord for each HLO operation profiled.
-// Next ID: 44
+// Next ID: 45
message RooflineModelRecord {
// The record type.
optional RecordType record_type = 18;
@@ -187,6 +193,10 @@ message RooflineModelRecord {
// Whether the record is calculated including infeed and outfeed ops.
optional bool include_infeed_outfeed = 26;
+ // Whether the device metrics (eg. flops utilization) is calculated with by
+ // applying the time scale multiplier.
+ optional bool apply_time_scale_multiplier = 44;
+
// Flops for the record
optional uint64 flops = 36;
diff --git a/xprof/convert/BUILD b/xprof/convert/BUILD
index b923dfa9..d4c4cba8 100644
--- a/xprof/convert/BUILD
+++ b/xprof/convert/BUILD
@@ -543,6 +543,7 @@ cc_library(
hdrs = ["op_metrics_to_record.h"],
deps = [
"@com_google_absl//absl/algorithm:container",
+ "@com_google_absl//absl/log:check",
"@com_google_absl//absl/strings",
"@org_xprof//plugin/xprof/protobuf:hardware_types_proto_cc",
"@org_xprof//plugin/xprof/protobuf:op_metrics_proto_cc",
diff --git a/xprof/convert/op_metrics_to_record.h b/xprof/convert/op_metrics_to_record.h
index 27739f10..eac7de8f 100644
--- a/xprof/convert/op_metrics_to_record.h
+++ b/xprof/convert/op_metrics_to_record.h
@@ -20,6 +20,7 @@ limitations under the License.
#include
#include
+#include "absl/log/check.h"
#include "absl/strings/string_view.h"
#include "xla/tsl/profiler/utils/math_utils.h"
#include "plugin/xprof/protobuf/hardware_types.pb.h"
@@ -40,6 +41,20 @@ inline double GigaFlopsPerSecondPerCore(const OpMetrics& metrics) {
metrics.flops(), tsl::profiler::PicoToNano(metrics.time_ps()));
}
+// Normalized flop rate if running on default pstate.
+// Used to compare with default device peak flop rate to get utilization.
+inline double GigaFlopsPerSecondPerCoreNormalizedOnDvfs(
+ const OpMetrics& metrics) {
+ // If dvfs tracing is not enabled, the normalized time ps is not set thus
+ // default to 0, should be no-op on the peak flops calculation.
+ if (metrics.normalized_time_ps() == 0) {
+ return GigaFlopsPerSecondPerCore(metrics);
+ }
+ return GigaFlopsPerSecondPerCore(metrics) *
+ (tsl::profiler::SafeDivide(metrics.normalized_time_ps(),
+ metrics.time_ps()));
+}
+
inline double GigaModelFlopsPerSecondPerCore(const OpMetrics& metrics) {
// flops and time_ps are accumulated across all occurrences on all cores.
// time_ps is used instead of self_time_ps because flops for an op includes
@@ -157,7 +172,8 @@ static inline double GetMemoryPeakBandwidth(const PerfEnv& perf_env,
template
inline void SetRooflineMetrics(const OpMetrics& metrics, const PerfEnv perf_env,
- const RunEnvironment& run_env, Record* record) {
+ const RunEnvironment& run_env, Record* record,
+ bool apply_time_scale_factor = false) {
using ::tensorflow::profiler::MemorySpace;
using ::tensorflow::profiler::PerformanceInfo;
@@ -203,16 +219,19 @@ inline void SetRooflineMetrics(const OpMetrics& metrics, const PerfEnv perf_env,
// access as HBM access.
hbm_bytes = metrics.bytes_accessed();
}
+ int64_t device_time_ps = apply_time_scale_factor
+ ? metrics.normalized_time_ps()
+ : metrics.time_ps();
record->set_hbm_bw(tsl::profiler::GibibytesPerSecond(
hbm_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
record->set_cmem_read_bw(tsl::profiler::GibibytesPerSecond(
- cmem_read_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
+ cmem_read_bytes, tsl::profiler::PicoToNano(device_time_ps)));
record->set_cmem_write_bw(tsl::profiler::GibibytesPerSecond(
- cmem_write_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
+ cmem_write_bytes, tsl::profiler::PicoToNano(device_time_ps)));
record->set_vmem_read_bw(tsl::profiler::GibibytesPerSecond(
- vmem_read_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
+ vmem_read_bytes, tsl::profiler::PicoToNano(device_time_ps)));
record->set_vmem_write_bw(tsl::profiler::GibibytesPerSecond(
- vmem_write_bytes, tsl::profiler::PicoToNano(metrics.time_ps())));
+ vmem_write_bytes, tsl::profiler::PicoToNano(device_time_ps)));
record->set_hbm_operational_intensity(
tsl::profiler::SafeDivide(metrics.flops(), hbm_bytes));
record->set_cmem_read_operational_intensity(
diff --git a/xprof/convert/op_stats_to_roofline_model.cc b/xprof/convert/op_stats_to_roofline_model.cc
index 3638286f..05e55ef4 100644
--- a/xprof/convert/op_stats_to_roofline_model.cc
+++ b/xprof/convert/op_stats_to_roofline_model.cc
@@ -61,8 +61,8 @@ const uint32_t kMaxNumRecords = 1000;
RooflineModelRecord ConvertOpMetricsToRooflineModelRecord(
const OpStats& op_stats, const OpMetrics& metrics, RecordType record_type,
uint32_t step_num, uint64_t total_time_ps,
- const RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed) {
+ const RooflineModelDatabase& roofline_model_db, bool include_infeed_outfeed,
+ bool apply_time_scale_multiplier) {
RooflineModelRecord record;
record.set_hlo_name(metrics.name());
record.set_hlo_category(metrics.category());
@@ -90,7 +90,7 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord(
// Set the roofline-specific fields.
SetRooflineMetrics(metrics, op_stats.perf_env(), op_stats.run_environment(),
- &record);
+ &record, apply_time_scale_multiplier);
const double cmem_wr_utilization =
roofline_model_db.has_cmem()
? tsl::profiler::SafeDivide(record.cmem_write_bw(),
@@ -111,8 +111,14 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord(
? tsl::profiler::SafeDivide(record.vmem_write_bw(),
roofline_model_db.peak_vmem_write_bw())
: 0;
- const double flops_utilization = tsl::profiler::SafeDivide(
+ double flops_utilization = tsl::profiler::SafeDivide(
record.measured_flop_rate(), roofline_model_db.peak_flop_rate());
+ if (apply_time_scale_multiplier) {
+ double measured_flop_rate_normalized =
+ GigaFlopsPerSecondPerCoreNormalizedOnDvfs(metrics);
+ flops_utilization = tsl::profiler::SafeDivide(
+ measured_flop_rate_normalized, roofline_model_db.peak_flop_rate());
+ }
const double hbm_utilization = tsl::profiler::SafeDivide(
record.hbm_bw(), roofline_model_db.peak_hbm_bw());
@@ -130,6 +136,7 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord(
record.set_memory_bw_relative_to_hw_limit(max_mem_utilization);
record.set_include_infeed_outfeed(include_infeed_outfeed);
+ record.set_apply_time_scale_multiplier(apply_time_scale_multiplier);
return record;
}
@@ -137,7 +144,7 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord(
RooflineModelRecord GenerateRooflineModelProgramRecord(
const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type,
uint32_t step_num, const RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed) {
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier) {
OpMetrics program_metrics;
program_metrics.set_name("Program");
program_metrics.set_category("Program");
@@ -165,7 +172,7 @@ RooflineModelRecord GenerateRooflineModelProgramRecord(
program_metrics.set_time_ps(total_time_ps);
RooflineModelRecord program_record = ConvertOpMetricsToRooflineModelRecord(
op_stats, program_metrics, record_type, step_num, total_time_ps,
- roofline_model_db, include_infeed_outfeed);
+ roofline_model_db, include_infeed_outfeed, apply_time_scale_multiplier);
program_record.set_rank(0);
program_record.set_total_self_time_as_fraction(0.0);
program_record.set_cumulative_total_self_time_as_fraction(0.0);
@@ -176,12 +183,12 @@ tsl::protobuf::RepeatedPtrField
ConvertOpMetricsDbToRooflineModelRecords(
const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type,
uint32_t step_num, const RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed) {
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier) {
tsl::protobuf::RepeatedPtrField roofline_model_records;
RooflineModelRecord* program_record = roofline_model_records.Add();
*program_record = GenerateRooflineModelProgramRecord(
op_stats, db, record_type, step_num, roofline_model_db,
- include_infeed_outfeed);
+ include_infeed_outfeed, apply_time_scale_multiplier);
const RooflineModelRecord* prev_record = program_record;
uint64_t infeed_outfeed_time = 0;
if (!include_infeed_outfeed) {
@@ -203,7 +210,7 @@ ConvertOpMetricsDbToRooflineModelRecords(
RooflineModelRecord* record = roofline_model_records.Add();
*record = ConvertOpMetricsToRooflineModelRecord(
op_stats, *metrics, record_type, step_num, total_time_ps,
- roofline_model_db, include_infeed_outfeed);
+ roofline_model_db, include_infeed_outfeed, apply_time_scale_multiplier);
SetRankAndTimeFractions(total_time_us, *prev_record, record);
prev_record = record;
}
@@ -219,6 +226,9 @@ RooflineModelDatabase InitializeRooflineModelDatabaseFromOpStats(
RooflineModelDatabase roofline_model_db;
const PerfEnv& perf_env = op_stats.perf_env();
roofline_model_db.set_device_type(op_stats.run_environment().device_type());
+ roofline_model_db.set_time_scale_multiplier(tsl::profiler::SafeDivide(
+ op_stats.device_op_metrics_db().normalized_total_op_time_ps(),
+ op_stats.device_op_metrics_db().total_op_time_ps()));
// Set peak flop rate in GFLOPs/s.
roofline_model_db.set_peak_flop_rate(
@@ -256,7 +266,8 @@ RooflineModelDatabase InitializeRooflineModelDatabaseFromOpStats(
}
RooflineModelDatabase ConvertOpStatsToRooflineModel(
- const OpStats& op_stats, bool include_infeed_outfeed) {
+ const OpStats& op_stats, bool include_infeed_outfeed,
+ bool apply_time_scale_multiplier) {
HardwareType hardware_type = op_stats.run_environment().hardware_type();
if (hardware_type != GPU && hardware_type != TPU) {
return RooflineModelDatabase();
@@ -267,11 +278,14 @@ RooflineModelDatabase ConvertOpStatsToRooflineModel(
include_infeed_outfeed);
AddRooflineModelRecordForProfileDuration(op_stats, roofline_model_db,
- include_infeed_outfeed);
+ include_infeed_outfeed,
+ apply_time_scale_multiplier);
AddRooflineModelRecordsForCompleteSteps(op_stats, roofline_model_db,
- include_infeed_outfeed);
+ include_infeed_outfeed,
+ apply_time_scale_multiplier);
AddRooflineModelRecordsPerStep(op_stats, roofline_model_db,
- include_infeed_outfeed);
+ include_infeed_outfeed,
+ apply_time_scale_multiplier);
PopulateStepDiagnostics(op_stats, roofline_model_db.mutable_diagnostics());
return roofline_model_db;
}
@@ -582,6 +596,8 @@ std::unique_ptr GetRooflineModelDataTable(
{"vmem_write_ridge_point",
absl::StrCat(RidgePoint(roofline_model_db.peak_flop_rate(),
roofline_model_db.peak_vmem_write_bw()))},
+ {"time_scale_multiplier",
+ absl::StrCat(roofline_model_db.time_scale_multiplier())},
};
for (const std::vector& property : kCustomProperties) {
diff --git a/xprof/convert/op_stats_to_roofline_model.h b/xprof/convert/op_stats_to_roofline_model.h
index c2eaad3d..4f4ab9cc 100644
--- a/xprof/convert/op_stats_to_roofline_model.h
+++ b/xprof/convert/op_stats_to_roofline_model.h
@@ -38,23 +38,24 @@ using tensorflow::profiler::roofline_model::RooflineModelRecord;
RooflineModelRecord ConvertOpMetricsToRooflineModelRecord(
const OpStats& op_stats, const OpMetrics& metrics, RecordType record_type,
uint32_t step_num, uint64_t total_time_ps,
- const RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed);
+ const RooflineModelDatabase& roofline_model_db, bool include_infeed_outfeed,
+ bool apply_time_scale_multiplier = false);
RooflineModelRecord GenerateRooflineModelProgramRecord(
const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type,
uint32_t step_num, const RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed);
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier = false);
tsl::protobuf::RepeatedPtrField
ConvertOpMetricsDbToRooflineModelRecords(
const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type,
uint32_t step_num, const RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed);
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier = false);
tensorflow::profiler::roofline_model::RooflineModelDatabase
ConvertOpStatsToRooflineModel(const tensorflow::profiler::OpStats& tf_op_stats,
- bool include_infeed_outfeed);
+ bool include_infeed_outfeed,
+ bool apply_time_scale_multiplier = false);
tensorflow::profiler::roofline_model::RooflineModelDatabase
InitializeRooflineModelDatabaseFromOpStats(const OpStats& op_stats,
@@ -63,30 +64,31 @@ InitializeRooflineModelDatabaseFromOpStats(const OpStats& op_stats,
// duration including incomplete steps.
inline void AddRooflineModelRecordForProfileDuration(
const OpStats& op_stats, RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed) {
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier = false) {
*roofline_model_db.mutable_roofline_model_record() =
ConvertOpMetricsDbToRooflineModelRecords(
op_stats, op_stats.device_op_metrics_db(), RecordType::ALL,
- /*step_num=*/0, roofline_model_db, include_infeed_outfeed);
+ /*step_num=*/0, roofline_model_db, include_infeed_outfeed,
+ apply_time_scale_multiplier);
}
// Generate RooflineModelRecord for the HLO DB over complete steps only.
inline void AddRooflineModelRecordsForCompleteSteps(
const OpStats& op_stats, RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed) {
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier = false) {
if (op_stats.has_hlo_metrics_db_complete_steps_only()) {
*roofline_model_db.add_roofline_model_record() =
GenerateRooflineModelProgramRecord(
op_stats, op_stats.hlo_metrics_db_complete_steps_only(),
RecordType::AVERAGE_STEP, /*step_num=*/0, roofline_model_db,
- include_infeed_outfeed);
+ include_infeed_outfeed, apply_time_scale_multiplier);
}
}
// Generate RooflineModelRecords for the per-step DBs.
inline void AddRooflineModelRecordsPerStep(
const OpStats& op_stats, RooflineModelDatabase& roofline_model_db,
- bool include_infeed_outfeed) {
+ bool include_infeed_outfeed, bool apply_time_scale_multiplier = false) {
for (const auto& step_info : op_stats.step_db().step_sequence()) {
*roofline_model_db.add_roofline_model_record() =
GenerateRooflineModelProgramRecord(