Skip to content

Commit 647c26f

Browse files
committed
Switch base: openai#1334 SP4096+DepthRecur+ParallelResid+MuonEqR (1.0897)
1 parent c2da73b commit 647c26f

2 files changed

Lines changed: 662 additions & 206 deletions

File tree

evaluate.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,19 +101,31 @@ def _make_job_command(commit_sha, branch=None):
101101
if LOCAL_VOLUME:
102102
data_setup = """
103103
CACHE_DIR=/mnt/pgolf-data/pgolf-cache
104-
if [ -f "$CACHE_DIR/.download_complete_sp4096" ]; then
105-
echo "Using cached SP4096 data from node-local volume"
104+
if [ -f "$CACHE_DIR/.download_complete" ]; then
105+
echo "Using cached data from node-local volume"
106+
export DATA_PATH=${DATA_PATH:-$CACHE_DIR/datasets/fineweb10B_sp1024}
107+
export TOKENIZER_PATH=${TOKENIZER_PATH:-$CACHE_DIR/tokenizers/fineweb_1024_bpe.model}
106108
else
107-
python data/cached_challenge_fineweb.py --train-shards 80 --variant sp4096
108-
mkdir -p $CACHE_DIR && cp -r data/datasets data/tokenizers $CACHE_DIR/ && touch $CACHE_DIR/.download_complete_sp4096
109+
python data/cached_challenge_fineweb.py --train-shards 80
110+
mkdir -p $CACHE_DIR && cp -r data/datasets data/tokenizers $CACHE_DIR/ && touch $CACHE_DIR/.download_complete
111+
export DATA_PATH=${DATA_PATH:-./data/datasets/fineweb10B_sp1024}
112+
export TOKENIZER_PATH=${TOKENIZER_PATH:-./data/tokenizers/fineweb_1024_bpe.model}
109113
fi
110114
"""
111115
else:
112116
data_setup = """
113-
if [ ! -f "data/datasets/.download_complete_sp4096" ]; then
114-
python data/cached_challenge_fineweb.py --train-shards 80 --variant sp4096
115-
touch data/datasets/.download_complete_sp4096
117+
# Auto-detect vocab size from train_gpt.py (default sp1024, supports sp4096+)
118+
VOCAB=$(grep -oP "VOCAB_SIZE['\"],\\s*\\K[0-9]+" train_gpt.py 2>/dev/null || echo "1024")
119+
[ "$VOCAB" = "" ] && VOCAB=1024
120+
SHARDS=80
121+
[ "$VOCAB" -gt 1024 ] && SHARDS=143
122+
echo "data_setup: vocab=$VOCAB shards=$SHARDS"
123+
if [ ! -f "data/datasets/.download_complete_sp${VOCAB}" ]; then
124+
python data/cached_challenge_fineweb.py --variant sp${VOCAB} --train-shards $SHARDS
125+
touch "data/datasets/.download_complete_sp${VOCAB}"
116126
fi
127+
export DATA_PATH=${DATA_PATH:-./data/datasets/fineweb10B_sp${VOCAB}}
128+
export TOKENIZER_PATH=${TOKENIZER_PATH:-./data/tokenizers/fineweb_${VOCAB}_bpe.model}
117129
"""
118130

119131
clone_setup = f"""

0 commit comments

Comments
 (0)