Skip to content

Commit cb738d9

Browse files
committed
added lm_config.py file for lm_model.py
1 parent 02cb0c1 commit cb738d9

File tree

2 files changed

+33
-4
lines changed

2 files changed

+33
-4
lines changed

gpt/lm_config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Model config
2+
EMBEDDING_SIZE=8
3+
BATCH_SIZE=4
4+
BLOCK_SIZE=8
5+
STEPS=5000
6+
LEARNING_RATE=0.0001
7+
HEAD_COUNT=4
8+
LAYER_COUNT=3
9+
DROPOUT=0.2
10+
11+
# Tokenizer Config
12+
TOKENIZER_NAME="tiktoken"
13+
TOKENIZER_MODEL="o200k_base"
14+
15+
# Data Config
16+
TRAIN_DATA_PATH="data/threebody.txt"

gpt/lm_model.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,19 @@
1414
from tqdm import tqdm
1515
import argparse
1616
import optparse
17+
from .lm_config import *
18+
19+
# Load the default model configuration
20+
LM_MODEL_CONFIG = [
21+
EMBEDDING_SIZE,
22+
BATCH_SIZE,
23+
BLOCK_SIZE,
24+
LEARNING_RATE,
25+
STEPS,
26+
HEAD_COUNT,
27+
LAYER_COUNT,
28+
DROPOUT
29+
]
1730

1831
# Set the device to use for training
1932
device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -404,13 +417,13 @@ def useLocal(filename, model_name="tokenizer_models/umb100k-1.model"):
404417
if __name__ == "__main__":
405418
parser=argparse.ArgumentParser(
406419
description="""Train a language model on a dataset and generate text""")
407-
parser.add_argument('-t', '--tokenizer', type=str, default="tokenizer", help='Specify the tokenizer to use (default: tokenizer)')
408-
parser.add_argument('-m', '--tokenizer_model', type=str, default="tokenizer_models/umb100k-1.model", help='Specify the tokenizer model to use (default: tokenizer_models/umb100k-1.model)')
420+
parser.add_argument('-t', '--tokenizer', type=str, default=TOKENIZER_NAME, help=f'Specify the tokenizer to use (default: {TOKENIZER_NAME})')
421+
parser.add_argument('-m', '--tokenizer_model', type=str, default=TOKENIZER_MODEL, help=f'Specify the tokenizer model to use (default: {TOKENIZER_MODEL})')
409422
parser.add_argument('-l', '--load_model', type=str, default="untrained", help='Specify the model to use [model_path] (default: untrained)')
410423
parser.add_argument('-s', '--save_model', type=str, default="default", help='Specify the model to save the model to [model_path] (default: same as load_model path, no_save: do not save model)')
411-
parser.add_argument('-d', '--data', type=str, default="data/threebody.txt", help='Specify the data to use for training (default: data/threebody.txt)')
424+
parser.add_argument('-d', '--data', type=str, default=TRAIN_DATA_PATH, help=f'Specify the data to use for training (default: {TRAIN_DATA_PATH})')
412425
parser.add_argument('--no_train', type=bool, default=False, help='Do not train the model')
413-
parser.add_argument('params', nargs='*', default=[8, 4, 8, 1e-3, 5000, 4, 3, 0.1], help='Training parameters for the model [embedding_size, batch_size, block_size, learning_rate, steps, head_count, layer_count, dropout]\n(default: [4, 8, 8, 1e-3, 5000, 4, 3, 0.1])')
426+
parser.add_argument('params', nargs='*', default=LM_MODEL_CONFIG, help=f'Training parameters for the model [embedding_size, batch_size, block_size, learning_rate, steps, head_count, layer_count, dropout]\n(default: {LM_MODEL_CONFIG})')
414427
# python
415428
args=parser.parse_args()
416429
print(args)

0 commit comments

Comments
 (0)