Skip to content

[MLPerf] Add DLRM-DCNv2 #144

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 38 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
5ca57a7
Add DLRM-DCNv2 example for MLPerf
abheesht17 Aug 18, 2025
090098d
Fix table_stacking arg
abheesht17 Aug 18, 2025
1dd9422
Rename dir
abheesht17 Aug 18, 2025
25233c9
Add blank __init__ file to configs
abheesht17 Aug 18, 2025
3d519d2
Fix imports
abheesht17 Aug 18, 2025
eef6568
Fix imports
abheesht17 Aug 18, 2025
fc77ad4
Fix imports
abheesht17 Aug 18, 2025
0a31e0f
Fix imports
abheesht17 Aug 18, 2025
53e9c23
Fix num_processes
abheesht17 Aug 18, 2025
f22b5ff
Add bash script
abheesht17 Aug 18, 2025
41f2977
Add bash script
abheesht17 Aug 18, 2025
09ca14c
Modify bash script to take in config name
abheesht17 Aug 18, 2025
2a1c759
Add way to load real dataset
abheesht17 Aug 18, 2025
b50a7f5
Add way to load real dataset (1)
abheesht17 Aug 18, 2025
fe8dc41
Add dataset path
abheesht17 Aug 18, 2025
ca297d4
Dataloader fixes (1)
abheesht17 Aug 18, 2025
35c3d61
Dataloader fixes (2)
abheesht17 Aug 18, 2025
9cc9b88
Dataloader fixes (3)
abheesht17 Aug 18, 2025
a0431ba
Feature naming edit
abheesht17 Aug 19, 2025
2b9538f
Feature naming edit
abheesht17 Aug 19, 2025
187ccd5
Feature naming edit
abheesht17 Aug 19, 2025
a98d431
Feature naming edit
abheesht17 Aug 19, 2025
d15957d
Actual dataset loading fixes (1)
abheesht17 Aug 20, 2025
8870c8d
Fix feature spec dtypes
abheesht17 Aug 20, 2025
e971f19
Fix feature spec dtypes
abheesht17 Aug 20, 2025
02c1881
Allow different batch sizes from file batch size
abheesht17 Aug 20, 2025
b5db304
Allow different batch sizes from file batch size (fixes)
abheesht17 Aug 20, 2025
e98bdf9
Fix feature naming
abheesht17 Aug 20, 2025
73ca477
Fix batching
abheesht17 Aug 20, 2025
c2ad8a9
Fix batching
abheesht17 Aug 20, 2025
a080143
Fix dense features
abheesht17 Aug 20, 2025
28b7189
Fix dense features concat
abheesht17 Aug 20, 2025
a47817d
Rename multi_hot_size to feature_list_length
abheesht17 Aug 20, 2025
9a33f09
Rename sparse to lookup
abheesht17 Aug 20, 2025
a66e1c6
Debug
abheesht17 Aug 20, 2025
a56532a
Try out XLA flags
abheesht17 Aug 20, 2025
0a9d00b
Format
abheesht17 Aug 20, 2025
42c4022
Format
abheesht17 Aug 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
Empty file.
166 changes: 166 additions & 0 deletions examples/ml_perf/configs/datasets/dummy_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from keras.utils import Config

# === Dataset ===
dataset_config = Config()
dataset_config.file_pattern = None
# Features
dataset_config.label = "clicked"
dataset_config.dense = [f"int-feature-{i}" for i in range(1, 14)]
dataset_config.lookup = [
{
"name": "categorical-feature-14",
"vocabulary_size": 40000000,
"feature_list_length": 3,
"new_name": "cat_14",
},
{
"name": "categorical-feature-15",
"vocabulary_size": 39060,
"feature_list_length": 2,
"new_name": "cat_15",
},
{
"name": "categorical-feature-16",
"vocabulary_size": 17295,
"feature_list_length": 1,
"new_name": "cat_16",
},
{
"name": "categorical-feature-17",
"vocabulary_size": 7424,
"feature_list_length": 2,
"new_name": "cat_17",
},
{
"name": "categorical-feature-18",
"vocabulary_size": 20265,
"feature_list_length": 6,
"new_name": "cat_18",
},
{
"name": "categorical-feature-19",
"vocabulary_size": 3,
"feature_list_length": 1,
"new_name": "cat_19",
},
{
"name": "categorical-feature-20",
"vocabulary_size": 7122,
"feature_list_length": 1,
"new_name": "cat_20",
},
{
"name": "categorical-feature-21",
"vocabulary_size": 1543,
"feature_list_length": 1,
"new_name": "cat_21",
},
{
"name": "categorical-feature-22",
"vocabulary_size": 63,
"feature_list_length": 1,
"new_name": "cat_22",
},
{
"name": "categorical-feature-23",
"vocabulary_size": 40000000,
"feature_list_length": 7,
"new_name": "cat_23",
},
{
"name": "categorical-feature-24",
"vocabulary_size": 3067956,
"feature_list_length": 3,
"new_name": "cat_24",
},
{
"name": "categorical-feature-25",
"vocabulary_size": 405282,
"feature_list_length": 8,
"new_name": "cat_25",
},
{
"name": "categorical-feature-26",
"vocabulary_size": 10,
"feature_list_length": 1,
"new_name": "cat_26",
},
{
"name": "categorical-feature-27",
"vocabulary_size": 2209,
"feature_list_length": 6,
"new_name": "cat_27",
},
{
"name": "categorical-feature-28",
"vocabulary_size": 11938,
"feature_list_length": 9,
"new_name": "cat_28",
},
{
"name": "categorical-feature-29",
"vocabulary_size": 155,
"feature_list_length": 5,
"new_name": "cat_29",
},
{
"name": "categorical-feature-30",
"vocabulary_size": 4,
"feature_list_length": 1,
"new_name": "cat_30",
},
{
"name": "categorical-feature-31",
"vocabulary_size": 976,
"feature_list_length": 1,
"new_name": "cat_31",
},
{
"name": "categorical-feature-32",
"vocabulary_size": 14,
"feature_list_length": 1,
"new_name": "cat_32",
},
{
"name": "categorical-feature-33",
"vocabulary_size": 40000000,
"feature_list_length": 12,
"new_name": "cat_33",
},
{
"name": "categorical-feature-34",
"vocabulary_size": 40000000,
"feature_list_length": 100,
"new_name": "cat_34",
},
{
"name": "categorical-feature-35",
"vocabulary_size": 40000000,
"feature_list_length": 27,
"new_name": "cat_35",
},
{
"name": "categorical-feature-36",
"vocabulary_size": 590152,
"feature_list_length": 10,
"new_name": "cat_36",
},
{
"name": "categorical-feature-37",
"vocabulary_size": 12973,
"feature_list_length": 3,
"new_name": "cat_37",
},
{
"name": "categorical-feature-38",
"vocabulary_size": 108,
"feature_list_length": 1,
"new_name": "cat_38",
},
{
"name": "categorical-feature-39",
"vocabulary_size": 36,
"feature_list_length": 1,
"new_name": "cat_39",
},
]
Empty file.
19 changes: 19 additions & 0 deletions examples/ml_perf/configs/models/default_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from keras.utils import Config

# === Model ===
model_config = Config()
# Embedding
model_config.embedding_dim = 128
model_config.allow_id_dropping = True
model_config.embedding_threshold = 21000
model_config.max_ids_per_partition = 4096
model_config.max_unique_ids_per_partition = 2048
model_config.learning_rate = 0.005

# MLP
model_config.bottom_mlp_dims = [512, 256, 128]
model_config.top_mlp_dims = [1024, 1024, 512, 256, 1]

# DCN
model_config.num_dcn_layers = 3
model_config.dcn_projection_dim = 512
Empty file.
7 changes: 7 additions & 0 deletions examples/ml_perf/configs/training/default_training.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from keras.utils import Config

# === Training Hyperparameters ===
training_config = Config()
training_config.learning_rate = 0.005
training_config.global_batch_size = 128
training_config.num_epochs = 1
16 changes: 16 additions & 0 deletions examples/ml_perf/configs/v6e_16.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from keras.utils import Config

from .datasets.dummy_dataset import dataset_config
from .models.default_model import model_config
from .training.default_training import training_config

config = Config()

config.experiment_name = "v6e_16"
config.model_dir = "./v6e_16"

config.dataset = dataset_config
config.model = model_config
config.training = training_config

config.freeze()
16 changes: 16 additions & 0 deletions examples/ml_perf/configs/v6e_8.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from keras.utils import Config

from .datasets.dummy_dataset import dataset_config
from .models.default_model import model_config
from .training.default_training import training_config

config = Config()

config.experiment_name = "v6e_8"
config.model_dir = "./v6e_8"

config.dataset = dataset_config
config.model = model_config
config.training = training_config

config.freeze()
27 changes: 27 additions & 0 deletions examples/ml_perf/configs/v6e_8_full_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from keras.utils import Config

from .datasets.dummy_dataset import dataset_config
from .models.default_model import model_config
from .training.default_training import training_config

config = Config()

config.experiment_name = "v6e_8_full_dataset"
config.model_dir = "./v6e_8_full_dataset"

config.dataset = dataset_config
config.dataset.file_pattern = (
"gs://qinyiyan-vm/mlperf-dataset/criteo_merge_balanced_4224/"
"train-00000-of-01024tfrecord"
)
config.dataset.val_file_pattern = (
"gs://qinyiyan-vm/mlperf-dataset/criteo_merge_balanced_4224/"
"train-00000-of-01024tfrecord"
)
# The path which we are reading from already has the batched dataset.
config.dataset.file_batch_size = 4224
config.model = model_config
config.training = training_config
config.training.batch_size = 256

config.freeze()
Loading