Skip to content

Commit daaad1f

Browse files
committed
Try: LN Scale + bigram, no VE (bigram compresses well)
1 parent a82f3b6 commit daaad1f

File tree

1 file changed

+2
-2
lines changed
  • records/track_10min_16mb/2026-03-25_11L_ParallelMuon_MLP3x_TTT

1 file changed

+2
-2
lines changed

records/track_10min_16mb/2026-03-25_11L_ParallelMuon_MLP3x_TTT/train_gpt.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,12 @@ class Hyperparameters:
8888
xsa_last_n = int(os.environ.get("XSA_LAST_N", 4))
8989
rope_dims = int(os.environ.get("ROPE_DIMS", 16))
9090
ln_scale = bool(int(os.environ.get("LN_SCALE", "1")))
91-
ve_enabled = bool(int(os.environ.get("VE_ENABLED", "1")))
91+
ve_enabled = bool(int(os.environ.get("VE_ENABLED", "0")))
9292
ve_dim = int(os.environ.get("VE_DIM", 32))
9393
ve_layers = os.environ.get("VE_LAYERS", "9,10")
9494

9595
use_smeargate = bool(int(os.environ.get("USE_SMEARGATE", "1")))
96-
use_bigramhash = bool(int(os.environ.get("USE_BIGRAMHASH", "0")))
96+
use_bigramhash = bool(int(os.environ.get("USE_BIGRAMHASH", "1")))
9797
use_value_residual = bool(int(os.environ.get("USE_VALUE_RESIDUAL", "1")))
9898
use_gated_attention = bool(int(os.environ.get("USE_GATED_ATTENTION", "1")))
9999

0 commit comments

Comments
 (0)