Skip to content

Commit 0b58962

Browse files
authored
Cp download parallel (#477)
* add more info on what chunk is downloading, and make chunk folder foe each file (#469) * Download iso chunk folder (#470) * add more info on what chunk is downloading, and make chunk folder foe each file * fix bug * comments * add .cache/sparsezoo/neuralmagic/ * Multiple download bug (#476) * src/sparsezoo/utils/download.py * revert readme"
1 parent 3e9e322 commit 0b58962

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

src/sparsezoo/utils/download.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from pathlib import Path
2626
from queue import Queue
2727
from typing import Any, Callable, Dict, Optional
28+
from uuid import uuid4
2829

2930
import requests
3031
from tqdm import tqdm
@@ -103,15 +104,20 @@ def get_chunk_download_path(self, path: str) -> str:
103104
stub = path.split(os.path.sep)[-3]
104105
path = "_".join(path.split(os.path.sep)[-2:])
105106
file_name_as_folder = path.replace(".", "_")
107+
file_id = str(uuid4())[:4]
106108

109+
# Note: parallel download may cause multiple processes to download
110+
# the same file
107111
# save the chunks on a different folder than the root model folder
112+
# ~/.cache/sparsezoo/neuralmagic/chunks/stub/file_id/tokenizer_json/{chunk1, ...} # noqa
108113
return os.path.join(
109114
str(Path.home()),
110115
".cache",
111116
"sparsezoo",
112117
"neuralmagic",
113118
"chunks",
114119
stub,
120+
file_id,
115121
file_name_as_folder,
116122
)
117123

@@ -410,7 +416,7 @@ def combine_chunks_and_delete(self, download_path: str, progress_bar: tqdm) -> N
410416
combined_file.write(data)
411417
progress_bar.update(len(data))
412418

413-
shutil.rmtree(self.chunk_download_path)
419+
shutil.rmtree(os.path.dirname(self.chunk_download_path))
414420

415421
def get_chunk_file_path(self, file_range: str) -> str:
416422
"""

0 commit comments

Comments
 (0)