diff --git a/frontend/app/common/constants/roofline_model_constants.ts b/frontend/app/common/constants/roofline_model_constants.ts index 980cd03a..e943aa90 100644 --- a/frontend/app/common/constants/roofline_model_constants.ts +++ b/frontend/app/common/constants/roofline_model_constants.ts @@ -12,7 +12,7 @@ export const SCATTER_CHART_AXIS = { minX: 0.00001, maxX: 100000, minY: 0.00001, - maxY: 1000000, + maxY: 10000000, }; /** scatter base options for roofline chart */ @@ -191,6 +191,13 @@ export const DEVICE_INFO = [ unit: 'Flop/byte', display: false, }, + { + id: 'time_scale_multiplier', + label: 'Time Scale Multiplier', + type: 'number', + unit: '', + display: true, + }, ]; /** diff --git a/frontend/app/components/roofline_model/BUILD b/frontend/app/components/roofline_model/BUILD index e01fa163..787f0f62 100644 --- a/frontend/app/components/roofline_model/BUILD +++ b/frontend/app/components/roofline_model/BUILD @@ -19,6 +19,10 @@ xprof_ng_module( "@npm//@ngrx/store", "@npm//@types/google.visualization", "@npm//rxjs", + "@org_xprof//frontend/app/common/angular:angular_material_icon", + "@org_xprof//frontend/app/common/angular:angular_material_progress_bar", + "@org_xprof//frontend/app/common/angular:angular_material_slide_toggle", + "@org_xprof//frontend/app/common/angular:angular_material_tooltip", "@org_xprof//frontend/app/common/classes", "@org_xprof//frontend/app/common/constants:roofline_model_constants", "@org_xprof//frontend/app/common/interfaces", @@ -39,4 +43,5 @@ sass_binary( src = "roofline_model.scss", # stack = True, sourcemap = False, + deps = ["@org_xprof//frontend/app/styles:common"], ) diff --git a/frontend/app/components/roofline_model/roofline_model.ng.html b/frontend/app/components/roofline_model/roofline_model.ng.html index b4737609..063d3c29 100644 --- a/frontend/app/components/roofline_model/roofline_model.ng.html +++ b/frontend/app/components/roofline_model/roofline_model.ng.html @@ -1,4 +1,4 @@ -
+

Device Information

@@ -10,6 +10,19 @@

Device Information

{{info.label}}: {{deviceInfoText(info)}}
+ +
+
+ Apply Dvfs Scaling Factor + + info + +
+ +
@@ -30,9 +43,13 @@

Section1: Program-Level Analysis

(6) "Average" shows the average step information by aggregating the operations in the complete steps only.
-
+
+
+
Loading analysis...
+ +
@@ -51,7 +68,11 @@

Section2: Operation-Level Analysis

(7) Ops with zero FLOP (e.g., data formatting ops like reshape, IDLE, etc.) do not show up in the roofline chart.
-
+
+
+
Loading analysis...
+ +
diff --git a/frontend/app/components/roofline_model/roofline_model.scss b/frontend/app/components/roofline_model/roofline_model.scss index da57955c..c1b8b565 100644 --- a/frontend/app/components/roofline_model/roofline_model.scss +++ b/frontend/app/components/roofline_model/roofline_model.scss @@ -1,3 +1,9 @@ +@import 'frontend/app/styles/common'; + +:host { + display: block; +} + .section-container { margin: 20px 20px 0px; } @@ -37,3 +43,21 @@ background-color: #ffcccb; color: red; } + +.tooltip-icon { + transform: scale(0.8); +} + +.control-title { + display: flex; + flex-direction: row; + align-items: center; +} + +.control { + display: flex; + flex-direction: row; + align-items: center; + justify-content: left; + gap: 10px; +} diff --git a/frontend/app/components/roofline_model/roofline_model.ts b/frontend/app/components/roofline_model/roofline_model.ts index 896594d0..94128571 100644 --- a/frontend/app/components/roofline_model/roofline_model.ts +++ b/frontend/app/components/roofline_model/roofline_model.ts @@ -8,7 +8,7 @@ import {getGigaflopsReadableString, setLoadingState} from 'org_xprof/frontend/ap import {DATA_SERVICE_INTERFACE_TOKEN, DataServiceV2Interface} from 'org_xprof/frontend/app/services/data_service_v2/data_service_v2_interface'; import {setCurrentToolStateAction} from 'org_xprof/frontend/app/store/actions'; import {ReplaySubject} from 'rxjs'; -import {takeUntil} from 'rxjs/operators'; +import {take, takeUntil} from 'rxjs/operators'; import {OperationLevelAnalysis} from './operation_level_analysis/operation_level_analysis'; import {ProgramLevelAnalysis} from './program_level_analysis/program_level_analysis'; @@ -27,6 +27,7 @@ declare interface DeviceIndicators { hasCmem: boolean; hasMegacore: boolean; isGpu: boolean; + timeScaleMultiplier: number; } type ColumnIdxArr = Array; @@ -61,6 +62,9 @@ export class RooflineModel implements OnDestroy { @ViewChild('opLevelAnalysis') opLevelAnalysis?: OperationLevelAnalysis; host = ''; + applyScalingFactor = false; + loadingAnalysis = false; + // Device Information section data deviceInfoArray: DeviceInfoData[] = []; // Some critical indicators @@ -69,6 +73,7 @@ export class RooflineModel implements OnDestroy { hasCmem: false, hasMegacore: false, isGpu: false, + timeScaleMultiplier: 1.0, }; // dataTableRaw from the raw roofline model data @@ -178,6 +183,20 @@ export class RooflineModel implements OnDestroy { }); } + updateAnalysis() { + this.loadingAnalysis = true; + const params = new Map(); + if (this.applyScalingFactor) { + params.set('apply_time_scale_multiplier', this.applyScalingFactor); + } + this.dataService.getData(this.sessionId, this.tool, this.host, params) + .pipe(take(1)) + .subscribe((data) => { + this.parseData(data as RooflineModelData[]); + this.loadingAnalysis = false; + }); + } + parseData(data?: RooflineModelData[]) { if (!google?.visualization) { console.log('gviz lib is not loaded yet.'); @@ -203,6 +222,11 @@ export class RooflineModel implements OnDestroy { this.processScatterDataOp(); } + hasValidTimeScaleMultiplier(): boolean { + return this.deviceIndicators.timeScaleMultiplier > 0 && + this.deviceIndicators.timeScaleMultiplier !== 1; + } + /** parse the device information from the original dataset */ parseDeviceInfoData(dataTableRaw: google.visualization.DataTable) { this.deviceIndicators = { @@ -211,6 +235,8 @@ export class RooflineModel implements OnDestroy { hasMegacore: !!Number(dataTableRaw.getTableProperty('megacore')), isGpu: dataTableRaw.getTableProperty('device_type') .startsWith(NVIDIA_GPU_TYPE_PREFIX), + timeScaleMultiplier: + Number(dataTableRaw.getTableProperty('time_scale_multiplier')) || 1, }; this.deviceInfoArray = DEVICE_INFO.reduce( @@ -250,13 +276,25 @@ export class RooflineModel implements OnDestroy { curInfo.context += '(if yes, the analysis assumes Megacore where an HLO runs on both TensorCores utilizing the full chip\'s resources so that the rooflines are twice higher)'; curInfo.value = this.deviceIndicators.hasMegacore ? 'Yes' : 'No'; + } else if ( + cur.id === 'time_scale_multiplier' && + !this.hasValidTimeScaleMultiplier()) { + curInfo.display = false; } } - const value = this.dataTableRaw!.getTableProperty(cur.id); + let value = this.dataTableRaw!.getTableProperty(cur.id); + value = cur.type === 'number' ? Number(value) : value; + if ([ + 'peak_flop_rate', 'peak_vmem_read_bw', 'peak_vmem_write_bw' + ].includes(cur.id)) { + curInfo.value = this.applyScalingFactor ? + (value * this.deviceIndicators.timeScaleMultiplier).toFixed(2) : + value; + } acc.push({ // convert numeric value to numbers, as some ridge numbers will be // used as axis values in chart - value: cur.type === 'number' ? Number(value) : value, + value, // put cur at last to overwrite with preprocessed data ...curInfo, }); @@ -1185,6 +1223,11 @@ export class RooflineModel implements OnDestroy { } } + toggleScalingFactor() { + this.applyScalingFactor = !this.applyScalingFactor; + this.updateAnalysis(); + } + ngOnDestroy() { setLoadingState(false, this.store); this.destroyed.next(); diff --git a/frontend/app/components/roofline_model/roofline_model_module.ts b/frontend/app/components/roofline_model/roofline_model_module.ts index f4444fef..b15a6d33 100644 --- a/frontend/app/components/roofline_model/roofline_model_module.ts +++ b/frontend/app/components/roofline_model/roofline_model_module.ts @@ -1,5 +1,9 @@ import {CommonModule} from '@angular/common'; import {NgModule} from '@angular/core'; +import {MatIconModule} from '@angular/material/icon'; +import {MatProgressBarModule} from '@angular/material/progress-bar'; +import {MatSlideToggleModule} from '@angular/material/slide-toggle'; +import {MatTooltipModule} from '@angular/material/tooltip'; import {TableModule} from 'org_xprof/frontend/app/components/chart/table/table_module'; import {CategoryFilterModule} from 'org_xprof/frontend/app/components/controls/category_filter/category_filter_module'; import {ExportAsCsvModule} from 'org_xprof/frontend/app/components/controls/export_as_csv/export_as_csv_module'; @@ -20,6 +24,10 @@ import {RooflineModel} from './roofline_model'; StringFilterModule, ProgramLevelAnalysisModule, OperationLevelAnalysisModule, + MatTooltipModule, + MatSlideToggleModule, + MatIconModule, + MatProgressBarModule, ], exports: [RooflineModel], }) diff --git a/frontend/app/services/data_service_v2/data_service_v2.ts b/frontend/app/services/data_service_v2/data_service_v2.ts index 28210da6..ec7ee0eb 100644 --- a/frontend/app/services/data_service_v2/data_service_v2.ts +++ b/frontend/app/services/data_service_v2/data_service_v2.ts @@ -103,7 +103,7 @@ export class DataServiceV2 implements DataServiceV2Interface { private getHTTPParamsForDataQuery( run: string, tag: string, host: string, - parameters: Map = new Map()): HttpParams { + parameters: Map = new Map()): HttpParams { // Update searchparams with the updated run, tag and host. // In a Single Page App, we need to update the searchparams with the updated // run, tag and host on tool change for consistency. @@ -130,7 +130,7 @@ export class DataServiceV2 implements DataServiceV2Interface { getData( sessionId: string, tool: string, host: string, - parameters: Map = new Map()): + parameters: Map = new Map()): Observable { const params = this.getHTTPParamsForDataQuery(sessionId, tool, host, parameters); diff --git a/frontend/app/services/data_service_v2/data_service_v2_interface.ts b/frontend/app/services/data_service_v2/data_service_v2_interface.ts index 6e582f58..de8217c9 100644 --- a/frontend/app/services/data_service_v2/data_service_v2_interface.ts +++ b/frontend/app/services/data_service_v2/data_service_v2_interface.ts @@ -25,7 +25,7 @@ export interface DataServiceV2Interface { sessionId: string, tool: string, host?: string, - parameters?: Map, + parameters?: Map, ignoreError?: boolean, ): Observable; diff --git a/plugin/xprof/protobuf/roofline_model.proto b/plugin/xprof/protobuf/roofline_model.proto index d9b10f28..c74d6650 100644 --- a/plugin/xprof/protobuf/roofline_model.proto +++ b/plugin/xprof/protobuf/roofline_model.proto @@ -26,6 +26,7 @@ enum RecordType { } // A database of RooflineModel records. +// Next ID: 17 message RooflineModelDatabase { // The device type. optional string device_type = 1; @@ -63,11 +64,16 @@ message RooflineModelDatabase { // Error and warning messages for diagnosing profiling issues. optional tensorflow.profiler.Diagnostics diagnostics = 7; + // The weighted average time scale multiplier over all device op metrics. + // It is calculated by the total time of each op if running on default pstate + // divided by actual total op time. + optional double time_scale_multiplier = 16; + reserved 3, 4, 6; } // There is one RooflineModelRecord for each HLO operation profiled. -// Next ID: 44 +// Next ID: 45 message RooflineModelRecord { // The record type. optional RecordType record_type = 18; @@ -187,6 +193,10 @@ message RooflineModelRecord { // Whether the record is calculated including infeed and outfeed ops. optional bool include_infeed_outfeed = 26; + // Whether the device metrics (eg. flops utilization) is calculated with by + // applying the time scale multiplier. + optional bool apply_time_scale_multiplier = 44; + // Flops for the record optional uint64 flops = 36; diff --git a/xprof/convert/BUILD b/xprof/convert/BUILD index b923dfa9..d4c4cba8 100644 --- a/xprof/convert/BUILD +++ b/xprof/convert/BUILD @@ -543,6 +543,7 @@ cc_library( hdrs = ["op_metrics_to_record.h"], deps = [ "@com_google_absl//absl/algorithm:container", + "@com_google_absl//absl/log:check", "@com_google_absl//absl/strings", "@org_xprof//plugin/xprof/protobuf:hardware_types_proto_cc", "@org_xprof//plugin/xprof/protobuf:op_metrics_proto_cc", diff --git a/xprof/convert/op_metrics_to_record.h b/xprof/convert/op_metrics_to_record.h index 27739f10..eac7de8f 100644 --- a/xprof/convert/op_metrics_to_record.h +++ b/xprof/convert/op_metrics_to_record.h @@ -20,6 +20,7 @@ limitations under the License. #include #include +#include "absl/log/check.h" #include "absl/strings/string_view.h" #include "xla/tsl/profiler/utils/math_utils.h" #include "plugin/xprof/protobuf/hardware_types.pb.h" @@ -40,6 +41,20 @@ inline double GigaFlopsPerSecondPerCore(const OpMetrics& metrics) { metrics.flops(), tsl::profiler::PicoToNano(metrics.time_ps())); } +// Normalized flop rate if running on default pstate. +// Used to compare with default device peak flop rate to get utilization. +inline double GigaFlopsPerSecondPerCoreNormalizedOnDvfs( + const OpMetrics& metrics) { + // If dvfs tracing is not enabled, the normalized time ps is not set thus + // default to 0, should be no-op on the peak flops calculation. + if (metrics.normalized_time_ps() == 0) { + return GigaFlopsPerSecondPerCore(metrics); + } + return GigaFlopsPerSecondPerCore(metrics) * + (tsl::profiler::SafeDivide(metrics.normalized_time_ps(), + metrics.time_ps())); +} + inline double GigaModelFlopsPerSecondPerCore(const OpMetrics& metrics) { // flops and time_ps are accumulated across all occurrences on all cores. // time_ps is used instead of self_time_ps because flops for an op includes @@ -157,7 +172,8 @@ static inline double GetMemoryPeakBandwidth(const PerfEnv& perf_env, template inline void SetRooflineMetrics(const OpMetrics& metrics, const PerfEnv perf_env, - const RunEnvironment& run_env, Record* record) { + const RunEnvironment& run_env, Record* record, + bool apply_time_scale_factor = false) { using ::tensorflow::profiler::MemorySpace; using ::tensorflow::profiler::PerformanceInfo; @@ -203,16 +219,19 @@ inline void SetRooflineMetrics(const OpMetrics& metrics, const PerfEnv perf_env, // access as HBM access. hbm_bytes = metrics.bytes_accessed(); } + int64_t device_time_ps = apply_time_scale_factor + ? metrics.normalized_time_ps() + : metrics.time_ps(); record->set_hbm_bw(tsl::profiler::GibibytesPerSecond( hbm_bytes, tsl::profiler::PicoToNano(metrics.time_ps()))); record->set_cmem_read_bw(tsl::profiler::GibibytesPerSecond( - cmem_read_bytes, tsl::profiler::PicoToNano(metrics.time_ps()))); + cmem_read_bytes, tsl::profiler::PicoToNano(device_time_ps))); record->set_cmem_write_bw(tsl::profiler::GibibytesPerSecond( - cmem_write_bytes, tsl::profiler::PicoToNano(metrics.time_ps()))); + cmem_write_bytes, tsl::profiler::PicoToNano(device_time_ps))); record->set_vmem_read_bw(tsl::profiler::GibibytesPerSecond( - vmem_read_bytes, tsl::profiler::PicoToNano(metrics.time_ps()))); + vmem_read_bytes, tsl::profiler::PicoToNano(device_time_ps))); record->set_vmem_write_bw(tsl::profiler::GibibytesPerSecond( - vmem_write_bytes, tsl::profiler::PicoToNano(metrics.time_ps()))); + vmem_write_bytes, tsl::profiler::PicoToNano(device_time_ps))); record->set_hbm_operational_intensity( tsl::profiler::SafeDivide(metrics.flops(), hbm_bytes)); record->set_cmem_read_operational_intensity( diff --git a/xprof/convert/op_stats_to_roofline_model.cc b/xprof/convert/op_stats_to_roofline_model.cc index 3638286f..05e55ef4 100644 --- a/xprof/convert/op_stats_to_roofline_model.cc +++ b/xprof/convert/op_stats_to_roofline_model.cc @@ -61,8 +61,8 @@ const uint32_t kMaxNumRecords = 1000; RooflineModelRecord ConvertOpMetricsToRooflineModelRecord( const OpStats& op_stats, const OpMetrics& metrics, RecordType record_type, uint32_t step_num, uint64_t total_time_ps, - const RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed) { + const RooflineModelDatabase& roofline_model_db, bool include_infeed_outfeed, + bool apply_time_scale_multiplier) { RooflineModelRecord record; record.set_hlo_name(metrics.name()); record.set_hlo_category(metrics.category()); @@ -90,7 +90,7 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord( // Set the roofline-specific fields. SetRooflineMetrics(metrics, op_stats.perf_env(), op_stats.run_environment(), - &record); + &record, apply_time_scale_multiplier); const double cmem_wr_utilization = roofline_model_db.has_cmem() ? tsl::profiler::SafeDivide(record.cmem_write_bw(), @@ -111,8 +111,14 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord( ? tsl::profiler::SafeDivide(record.vmem_write_bw(), roofline_model_db.peak_vmem_write_bw()) : 0; - const double flops_utilization = tsl::profiler::SafeDivide( + double flops_utilization = tsl::profiler::SafeDivide( record.measured_flop_rate(), roofline_model_db.peak_flop_rate()); + if (apply_time_scale_multiplier) { + double measured_flop_rate_normalized = + GigaFlopsPerSecondPerCoreNormalizedOnDvfs(metrics); + flops_utilization = tsl::profiler::SafeDivide( + measured_flop_rate_normalized, roofline_model_db.peak_flop_rate()); + } const double hbm_utilization = tsl::profiler::SafeDivide( record.hbm_bw(), roofline_model_db.peak_hbm_bw()); @@ -130,6 +136,7 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord( record.set_memory_bw_relative_to_hw_limit(max_mem_utilization); record.set_include_infeed_outfeed(include_infeed_outfeed); + record.set_apply_time_scale_multiplier(apply_time_scale_multiplier); return record; } @@ -137,7 +144,7 @@ RooflineModelRecord ConvertOpMetricsToRooflineModelRecord( RooflineModelRecord GenerateRooflineModelProgramRecord( const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type, uint32_t step_num, const RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed) { + bool include_infeed_outfeed, bool apply_time_scale_multiplier) { OpMetrics program_metrics; program_metrics.set_name("Program"); program_metrics.set_category("Program"); @@ -165,7 +172,7 @@ RooflineModelRecord GenerateRooflineModelProgramRecord( program_metrics.set_time_ps(total_time_ps); RooflineModelRecord program_record = ConvertOpMetricsToRooflineModelRecord( op_stats, program_metrics, record_type, step_num, total_time_ps, - roofline_model_db, include_infeed_outfeed); + roofline_model_db, include_infeed_outfeed, apply_time_scale_multiplier); program_record.set_rank(0); program_record.set_total_self_time_as_fraction(0.0); program_record.set_cumulative_total_self_time_as_fraction(0.0); @@ -176,12 +183,12 @@ tsl::protobuf::RepeatedPtrField ConvertOpMetricsDbToRooflineModelRecords( const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type, uint32_t step_num, const RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed) { + bool include_infeed_outfeed, bool apply_time_scale_multiplier) { tsl::protobuf::RepeatedPtrField roofline_model_records; RooflineModelRecord* program_record = roofline_model_records.Add(); *program_record = GenerateRooflineModelProgramRecord( op_stats, db, record_type, step_num, roofline_model_db, - include_infeed_outfeed); + include_infeed_outfeed, apply_time_scale_multiplier); const RooflineModelRecord* prev_record = program_record; uint64_t infeed_outfeed_time = 0; if (!include_infeed_outfeed) { @@ -203,7 +210,7 @@ ConvertOpMetricsDbToRooflineModelRecords( RooflineModelRecord* record = roofline_model_records.Add(); *record = ConvertOpMetricsToRooflineModelRecord( op_stats, *metrics, record_type, step_num, total_time_ps, - roofline_model_db, include_infeed_outfeed); + roofline_model_db, include_infeed_outfeed, apply_time_scale_multiplier); SetRankAndTimeFractions(total_time_us, *prev_record, record); prev_record = record; } @@ -219,6 +226,9 @@ RooflineModelDatabase InitializeRooflineModelDatabaseFromOpStats( RooflineModelDatabase roofline_model_db; const PerfEnv& perf_env = op_stats.perf_env(); roofline_model_db.set_device_type(op_stats.run_environment().device_type()); + roofline_model_db.set_time_scale_multiplier(tsl::profiler::SafeDivide( + op_stats.device_op_metrics_db().normalized_total_op_time_ps(), + op_stats.device_op_metrics_db().total_op_time_ps())); // Set peak flop rate in GFLOPs/s. roofline_model_db.set_peak_flop_rate( @@ -256,7 +266,8 @@ RooflineModelDatabase InitializeRooflineModelDatabaseFromOpStats( } RooflineModelDatabase ConvertOpStatsToRooflineModel( - const OpStats& op_stats, bool include_infeed_outfeed) { + const OpStats& op_stats, bool include_infeed_outfeed, + bool apply_time_scale_multiplier) { HardwareType hardware_type = op_stats.run_environment().hardware_type(); if (hardware_type != GPU && hardware_type != TPU) { return RooflineModelDatabase(); @@ -267,11 +278,14 @@ RooflineModelDatabase ConvertOpStatsToRooflineModel( include_infeed_outfeed); AddRooflineModelRecordForProfileDuration(op_stats, roofline_model_db, - include_infeed_outfeed); + include_infeed_outfeed, + apply_time_scale_multiplier); AddRooflineModelRecordsForCompleteSteps(op_stats, roofline_model_db, - include_infeed_outfeed); + include_infeed_outfeed, + apply_time_scale_multiplier); AddRooflineModelRecordsPerStep(op_stats, roofline_model_db, - include_infeed_outfeed); + include_infeed_outfeed, + apply_time_scale_multiplier); PopulateStepDiagnostics(op_stats, roofline_model_db.mutable_diagnostics()); return roofline_model_db; } @@ -582,6 +596,8 @@ std::unique_ptr GetRooflineModelDataTable( {"vmem_write_ridge_point", absl::StrCat(RidgePoint(roofline_model_db.peak_flop_rate(), roofline_model_db.peak_vmem_write_bw()))}, + {"time_scale_multiplier", + absl::StrCat(roofline_model_db.time_scale_multiplier())}, }; for (const std::vector& property : kCustomProperties) { diff --git a/xprof/convert/op_stats_to_roofline_model.h b/xprof/convert/op_stats_to_roofline_model.h index c2eaad3d..4f4ab9cc 100644 --- a/xprof/convert/op_stats_to_roofline_model.h +++ b/xprof/convert/op_stats_to_roofline_model.h @@ -38,23 +38,24 @@ using tensorflow::profiler::roofline_model::RooflineModelRecord; RooflineModelRecord ConvertOpMetricsToRooflineModelRecord( const OpStats& op_stats, const OpMetrics& metrics, RecordType record_type, uint32_t step_num, uint64_t total_time_ps, - const RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed); + const RooflineModelDatabase& roofline_model_db, bool include_infeed_outfeed, + bool apply_time_scale_multiplier = false); RooflineModelRecord GenerateRooflineModelProgramRecord( const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type, uint32_t step_num, const RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed); + bool include_infeed_outfeed, bool apply_time_scale_multiplier = false); tsl::protobuf::RepeatedPtrField ConvertOpMetricsDbToRooflineModelRecords( const OpStats& op_stats, const OpMetricsDb& db, RecordType record_type, uint32_t step_num, const RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed); + bool include_infeed_outfeed, bool apply_time_scale_multiplier = false); tensorflow::profiler::roofline_model::RooflineModelDatabase ConvertOpStatsToRooflineModel(const tensorflow::profiler::OpStats& tf_op_stats, - bool include_infeed_outfeed); + bool include_infeed_outfeed, + bool apply_time_scale_multiplier = false); tensorflow::profiler::roofline_model::RooflineModelDatabase InitializeRooflineModelDatabaseFromOpStats(const OpStats& op_stats, @@ -63,30 +64,31 @@ InitializeRooflineModelDatabaseFromOpStats(const OpStats& op_stats, // duration including incomplete steps. inline void AddRooflineModelRecordForProfileDuration( const OpStats& op_stats, RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed) { + bool include_infeed_outfeed, bool apply_time_scale_multiplier = false) { *roofline_model_db.mutable_roofline_model_record() = ConvertOpMetricsDbToRooflineModelRecords( op_stats, op_stats.device_op_metrics_db(), RecordType::ALL, - /*step_num=*/0, roofline_model_db, include_infeed_outfeed); + /*step_num=*/0, roofline_model_db, include_infeed_outfeed, + apply_time_scale_multiplier); } // Generate RooflineModelRecord for the HLO DB over complete steps only. inline void AddRooflineModelRecordsForCompleteSteps( const OpStats& op_stats, RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed) { + bool include_infeed_outfeed, bool apply_time_scale_multiplier = false) { if (op_stats.has_hlo_metrics_db_complete_steps_only()) { *roofline_model_db.add_roofline_model_record() = GenerateRooflineModelProgramRecord( op_stats, op_stats.hlo_metrics_db_complete_steps_only(), RecordType::AVERAGE_STEP, /*step_num=*/0, roofline_model_db, - include_infeed_outfeed); + include_infeed_outfeed, apply_time_scale_multiplier); } } // Generate RooflineModelRecords for the per-step DBs. inline void AddRooflineModelRecordsPerStep( const OpStats& op_stats, RooflineModelDatabase& roofline_model_db, - bool include_infeed_outfeed) { + bool include_infeed_outfeed, bool apply_time_scale_multiplier = false) { for (const auto& step_info : op_stats.step_db().step_sequence()) { *roofline_model_db.add_roofline_model_record() = GenerateRooflineModelProgramRecord(