-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathdata_structure.py
186 lines (146 loc) · 5.29 KB
/
data_structure.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#!/usr/bin/env python
# coding: utf-8
"""
data_structure.py: Main utilities about how to handle the data structure
"""
from typing import List
########
# Imports
########
import os.path
import warnings
########
# Main data folder
########
DATA_FOLDER = os.path.normpath("data")
########
# DCM dataset
########
DCM_FOLDER = os.path.join(DATA_FOLDER, "dcm_dataset.git")
DCM_IMAGES_FOLDER = os.path.join(DCM_FOLDER, "images")
DCM_ANNOTATIONS_FOLDER = os.path.join(DCM_FOLDER, "groundtruth")
DCM_TRAIN_FILE = os.path.join(DCM_FOLDER, "train.txt")
DCM_VALIDATION_FILE = os.path.join(DCM_FOLDER, "val.txt")
DCM_TEST_FILE = os.path.join(DCM_FOLDER, "test.txt")
DCM_FILENAMES = {
"folder": DCM_FOLDER,
"images": DCM_IMAGES_FOLDER,
"annotations": DCM_ANNOTATIONS_FOLDER,
"train": DCM_TRAIN_FILE,
"validation": DCM_VALIDATION_FILE,
"test": DCM_TEST_FILE
}
def ASSERT_DCM() -> None:
"""Asserts that the DCM dataset structure is fine.
"""
for folder in ["folder", "images", "annotations"]:
assert os.path.exists(DCM_FILENAMES[folder])
assert os.path.isdir(DCM_FILENAMES[folder])
for file in ["train", "validation", "test"]:
print(DCM_FILENAMES[file])
assert os.path.exists(DCM_FILENAMES[file])
assert os.path.isfile(DCM_FILENAMES[file])
def DCM_GET_FILES_LIST(subset_filename: str) -> List[str]:
"""Gets the files list of the given subset.
Args:
subset_filename (str): The desired subset.
Returns:
List[str]: The files list of the given subset.
"""
with open(subset_filename) as subset_file:
lines = subset_file.readlines()
files_list = [line.rstrip() for line in lines if line.rstrip() != ""]
return files_list
def DCM_IMAGE_PATH_FROM_NAME(filename: str) -> str:
"""Gets the image path from the given image name.
Args:
filename (str): The name of the desired image.
Returns:
str: The image path of the desired image.
"""
image_path = os.path.join(
DCM_FILENAMES["images"],
filename+".jpg"
)
return image_path
def DCM_ANNOTATIONS_PATH_FROM_NAME(filename: str) -> str:
"""Gets the annotations path from the given image name.
Args:
filename (str): The name of the desired image.
Returns:
str: The annotations path of the desired image.
"""
annot_path = os.path.join(
DCM_FILENAMES["annotations"],
filename+".txt"
)
return annot_path
def DCM_READ_ANNOTATIONS(annot_path: str) -> List[List[int]]:
"""Gets the annotations from the annotations file path.
Args:
annot_path (str): The desired annotations file path.
Returns:
List[List[int]]: The annotations
(class_id, x1_frame, y1_frame, x2_frame, y2_frame)
from the annotations file path in a list.
"""
with open(annot_path) as annot:
annotations = annot.readlines()
annotations_list_str = [annotation.split(
" ") for annotation in annotations]
annotations_list_int = []
for annotation in annotations_list_str:
class_id, x1_frame, y1_frame, x2_frame, y2_frame = annotation
annotations_list_int.append([
int(class_id),
int(x1_frame),
int(y1_frame),
int(x2_frame),
int(y2_frame)
])
return annotations_list_int
########
# DCM cropped frames
########
DCM_CROPPED_FOLDER = os.path.join(DATA_FOLDER, "dcm_cropped")
DCM_CROPPED_IMAGES_FOLDER = os.path.join(DCM_CROPPED_FOLDER, "images")
DCM_CROPPED_TRAIN_FILE = os.path.join(DCM_CROPPED_FOLDER, "train.txt")
DCM_CROPPED_VALIDATION_FILE = os.path.join(
DCM_CROPPED_FOLDER, "validation.txt")
DCM_CROPPED_TEST_FILE = os.path.join(DCM_CROPPED_FOLDER, "test.txt")
DCM_CROPPED_FILENAMES = {
"folder": DCM_CROPPED_FOLDER,
"images": DCM_CROPPED_IMAGES_FOLDER,
"train": DCM_CROPPED_TRAIN_FILE,
"validation": DCM_CROPPED_VALIDATION_FILE,
"test": DCM_CROPPED_TEST_FILE
}
def ASSERT_DCM_CROPPED() -> None:
""" Asserts that the "cropped DCM" dataset structure is fine.
"""
for folder in ["folder", "images"]:
assert os.path.exists(DCM_CROPPED_FILENAMES[folder])
assert os.path.isdir(DCM_CROPPED_FILENAMES[folder])
for file in ["train", "validation", "test"]:
assert os.path.exists(DCM_CROPPED_FILENAMES[file])
assert os.path.isfile(DCM_CROPPED_FILENAMES[file])
def CREATE_DCM_CROPPED() -> None:
""" Creates the "cropped DCM" folder.
"""
if os.path.exists(DCM_CROPPED_FILENAMES["folder"]):
assert os.path.isdir(DCM_CROPPED_FILENAMES["folder"])
warnings.warn("The \"cropped DCM\" folder already exists")
os.makedirs(DCM_CROPPED_FILENAMES["folder"], exist_ok=True)
def DCM_CROPPED_IMAGE_PATH_FROM_NAME(filename: str) -> str:
"""Gets the image path from the given image name.
Args:
filename (str): The name of the desired image.
Returns:
str: The image path of the desired image.
"""
image_path = os.path.join(
DCM_CROPPED_FILENAMES["images"],
filename+".jpg"
)
os.makedirs(os.path.dirname(image_path), exist_ok=True)
return image_path