-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsubmission.json
More file actions
46 lines (46 loc) · 1.57 KB
/
submission.json
File metadata and controls
46 lines (46 loc) · 1.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
{
"name": "Connectome-JEPA",
"author": "Eric B.",
"github_id": "ericdatum",
"date": "2026-03-30",
"track": "non_record_16mb",
"val_bpb": 1.7942,
"val_bpb_std": 0.0028,
"val_bpb_seeds": [1.7955, 1.7909, 1.7961],
"val_loss": 3.0294,
"artifact_bytes": 1967950,
"model_bytes": 1920530,
"code_bytes": 47420,
"params": 3336051,
"training_time_seconds": 600,
"training_steps": 4175,
"hardware": "8x H100 SXM (Modal)",
"summary": "Sparse I/O bottleneck architecture inspired by C. elegans connectome (300 neurons, 5084 synapses). 6 sparse hops with gated skip connections, 4-head cross-position attention, JEPA + SigREG training. First JEPA and first biologically-inspired architecture submission. 1.97MB artifact (12.3% of budget). Ablations show sparse I/O bottleneck matters but biological topology does not help for language.",
"architecture": {
"type": "connectome-jepa",
"embed_dim": 768,
"vocab_size": 1024,
"num_hops": 6,
"cross_attn_every": 2,
"skip_every": 2,
"head_dim": 128,
"num_attn_heads": 4,
"neurons": 300,
"edges": 5084,
"sensory_neurons": 88,
"motor_neurons": 123,
"connectome_density": 0.0565
},
"training": {
"optimizer": "AdamW",
"learning_rate": 0.001,
"lr_schedule": "cosine",
"weight_decay": 0.1,
"batch_tokens": 524288,
"seq_len": 1024,
"warmup_steps": 100,
"jepa_weight": 0.5,
"sigreg_weight": 0.0001,
"mask_ratio": 0.3
}
}