From 46a7bd9a9c3a8fb12752e5a75a24087b642d16ed Mon Sep 17 00:00:00 2001 From: lovepro240 Date: Wed, 6 Nov 2024 13:18:57 +0900 Subject: [PATCH] first Modify_lovepro240 --- preprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/preprocess.py b/preprocess.py index 4c3b06e..b62dd18 100644 --- a/preprocess.py +++ b/preprocess.py @@ -11,7 +11,8 @@ MAX_INTERNAL_PATHS = 0 MAX_PATH_LENGTH = 0 MAX_RELATIVE_PATH_LENGTH = 0 -MAX_EXAMPLES_IN_SHARD = 500000 +#MAX_EXAMPLES_IN_SHARD = 500000 +MAX_EXAMPLES_IN_SHARD = 100000 def save_dictionaries(dataset_name, subtok_to_count, node_to_count, max_contexts, max_internal_paths, max_path_length, max_path_width, max_relative_path_length, max_child_id):