Skip to content

Commit ab1ed89

Browse files
committed
enhance profiler config
Summary: allow users to specify the profiler schedule
1 parent eb13ba2 commit ab1ed89

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

torchtitan/config/job_config.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,20 @@ class Profiling:
3434
profile_freq: int = 10
3535
"""How often to collect profile traces, in iterations"""
3636

37+
profiler_active: int = 1
38+
"""
39+
The steps profiler is active for.
40+
41+
This is used to configure torch.profile.schedule.
42+
"""
43+
44+
profiler_warmup: int = 3
45+
"""
46+
The number of warmup steps before the active step in each profiling cycle.
47+
48+
This is used to configure torch.profile.schedule.
49+
"""
50+
3751
enable_memory_snapshot: bool = False
3852
"""Whether to dump memory snapshot"""
3953

torchtitan/tools/profiling.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@
1414
from torchtitan.config import Profiling as ProfilingConfig
1515
from torchtitan.tools.logging import logger
1616

17-
# the number of warmup steps before the active step in each profiling cycle
18-
WARMUP = 3
19-
2017
# how much memory allocation/free ops to record in memory snapshots
2118
MEMORY_SNAPSHOT_MAX_ENTRIES = 100000
2219

@@ -58,7 +55,10 @@ def trace_handler(prof):
5855
if not os.path.exists(trace_dir):
5956
os.makedirs(trace_dir, exist_ok=True)
6057

61-
warmup, active = WARMUP, 1
58+
warmup, active = (
59+
profiling_config.profiler_warmup,
60+
profiling_config.profiler_active,
61+
)
6262
wait = profile_freq - (active + warmup)
6363
assert (
6464
wait >= 0

0 commit comments

Comments
 (0)