|
14 | 14 | from tqdm import tqdm
|
15 | 15 | import argparse
|
16 | 16 | import optparse
|
| 17 | +from .lm_config import * |
| 18 | + |
| 19 | +# Load the default model configuration |
| 20 | +LM_MODEL_CONFIG = [ |
| 21 | + EMBEDDING_SIZE, |
| 22 | + BATCH_SIZE, |
| 23 | + BLOCK_SIZE, |
| 24 | + LEARNING_RATE, |
| 25 | + STEPS, |
| 26 | + HEAD_COUNT, |
| 27 | + LAYER_COUNT, |
| 28 | + DROPOUT |
| 29 | +] |
17 | 30 |
|
18 | 31 | # Set the device to use for training
|
19 | 32 | device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -404,13 +417,13 @@ def useLocal(filename, model_name="tokenizer_models/umb100k-1.model"):
|
404 | 417 | if __name__ == "__main__":
|
405 | 418 | parser=argparse.ArgumentParser(
|
406 | 419 | description="""Train a language model on a dataset and generate text""")
|
407 |
| - parser.add_argument('-t', '--tokenizer', type=str, default="tokenizer", help='Specify the tokenizer to use (default: tokenizer)') |
408 |
| - parser.add_argument('-m', '--tokenizer_model', type=str, default="tokenizer_models/umb100k-1.model", help='Specify the tokenizer model to use (default: tokenizer_models/umb100k-1.model)') |
| 420 | + parser.add_argument('-t', '--tokenizer', type=str, default=TOKENIZER_NAME, help=f'Specify the tokenizer to use (default: {TOKENIZER_NAME})') |
| 421 | + parser.add_argument('-m', '--tokenizer_model', type=str, default=TOKENIZER_MODEL, help=f'Specify the tokenizer model to use (default: {TOKENIZER_MODEL})') |
409 | 422 | parser.add_argument('-l', '--load_model', type=str, default="untrained", help='Specify the model to use [model_path] (default: untrained)')
|
410 | 423 | parser.add_argument('-s', '--save_model', type=str, default="default", help='Specify the model to save the model to [model_path] (default: same as load_model path, no_save: do not save model)')
|
411 |
| - parser.add_argument('-d', '--data', type=str, default="data/threebody.txt", help='Specify the data to use for training (default: data/threebody.txt)') |
| 424 | + parser.add_argument('-d', '--data', type=str, default=TRAIN_DATA_PATH, help=f'Specify the data to use for training (default: {TRAIN_DATA_PATH})') |
412 | 425 | parser.add_argument('--no_train', type=bool, default=False, help='Do not train the model')
|
413 |
| - parser.add_argument('params', nargs='*', default=[8, 4, 8, 1e-3, 5000, 4, 3, 0.1], help='Training parameters for the model [embedding_size, batch_size, block_size, learning_rate, steps, head_count, layer_count, dropout]\n(default: [4, 8, 8, 1e-3, 5000, 4, 3, 0.1])') |
| 426 | + parser.add_argument('params', nargs='*', default=LM_MODEL_CONFIG, help=f'Training parameters for the model [embedding_size, batch_size, block_size, learning_rate, steps, head_count, layer_count, dropout]\n(default: {LM_MODEL_CONFIG})') |
414 | 427 | # python
|
415 | 428 | args=parser.parse_args()
|
416 | 429 | print(args)
|
|
0 commit comments