Skip to content

Commit b9a2c63

Browse files
updates
1 parent ce70e18 commit b9a2c63

File tree

10 files changed

+91
-39
lines changed

10 files changed

+91
-39
lines changed

ML/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
from torch.hub import *
2929
import torchtext.functional as F
3030
import warnings
31+
import torch.multiprocessing
3132

32-
warnings.filterwarnings("ignore")
3333
print(torch.__version__, torchvision.__version__, torchtext.__version__)
34+
torch.multiprocessing.set_sharing_strategy("file_system")
35+
warnings.filterwarnings("ignore")
3436
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"
3537
# os.environ["WANDB_SILENT"] = "true"
3638
PROJECT_NAME = "NLP-Disaster Tweets"

ML/dataset/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Import all of the files in the dataset loading process such as `loader`, `main_loaders`, `valid_loaders`"""
2+
13
from ML.dataset.loader import *
24
from ML.dataset.main_loaders import *
35
from ML.dataset.valid_loaders import *

ML/dataset/loader.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,21 @@
11
from ML import *
22

3+
"""This file contains the main loader class, which is inherited from in `main_loaders` and `valid_loaders`"""
4+
35

46
class Loader(Dataset):
5-
def __init__(self, path: str, transform: bool = None) -> None:
7+
def __init__(self, path: str, transform: torchtext.transforms) -> None:
8+
"""initalization of the Loader class
9+
10+
Keyword arguments:
11+
path -- path of the .csv file to load
12+
transform -- the transformation to be applied to the data
13+
Return: None
14+
"""
615
self.path = path
716
self.transform = transform
817
self.data: pd.DataFrame = pd.read_csv(self.path)
918

1019
def __len__(self) -> int:
20+
"""returns the length of the dataset"""
1121
return len(self.data)

ML/dataset/main_loaders.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from ML import *
22
from ML.dataset.loader import *
33

4+
"""Contains the main dataloader used to load the train and testing data"""
5+
46

57
class Main_DL(Loader):
68
def __init__(
@@ -11,6 +13,15 @@ def __init__(
1113
batch_size: int = 32,
1214
**kwargs,
1315
) -> None:
16+
"""initalization of the Main Dataloader which inherits from the `Loader` class
17+
18+
Keyword arguments:
19+
train -- bool, if the data is for training or testing
20+
test_split -- float between 0 and 1
21+
seed -- int, to prevent change of results
22+
batch_size -- int, the size of the batches
23+
Return: None
24+
"""
1425
super().__init__(**kwargs)
1526
self.X = self.data["text"].to_numpy()
1627
self.y = self.data["target"].to_numpy()
@@ -30,6 +41,7 @@ def __init__(
3041
self.get_batches()
3142

3243
def get_batches(self) -> None:
44+
"""create the batches for training"""
3345
X = self.X_train if self.train else self.X_test
3446
y = self.y_train if self.train else self.y_test
3547
X_batches = []
@@ -53,6 +65,12 @@ def get_batches(self) -> None:
5365
self.y_test = np.array(y_batches)
5466

5567
def __getitem__(self, index) -> Tuple[torch.tensor, torch.tensor]:
68+
"""get an specific item using an specific index
69+
70+
Keyword arguments:
71+
index -- the index of the item to retrieve
72+
Return: Tuple
73+
"""
5674
if self.train:
5775
return (
5876
self.X_train[index],
@@ -64,4 +82,8 @@ def __getitem__(self, index) -> Tuple[torch.tensor, torch.tensor]:
6482
)
6583

6684
def __len__(self) -> int:
85+
"""get the length / no. of batches of the dataset
86+
87+
Return: Int
88+
"""
6789
return len(self.y_train) if self.train else len(self.y_test)

ML/dataset/valid_loaders.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
from ML import *
22
from ML.dataset.loader import *
33

4+
"""Contains the validation dataloader used to load the validation"""
5+
46

57
class Valid_Loader(Loader):
68
def __init__(self, *args) -> None:
9+
"""Initialization of the Valid Loader which inherits from the Loader class"""
710
super().__init__(*args)
811
self.data["id"].dropna(inplace=True)
912
self.X = self.data["text"].to_numpy()
1013
self.ids = self.data["id"].to_numpy()
1114
print(len(self.X), len(self.ids))
1215

1316
def __getitem__(self, index) -> np.array:
17+
"""get and specific item according to the index given
18+
19+
Keyword arguments:
20+
index -- The index of the item
21+
Return: Tuple
22+
"""
1423
return (self.ids[index], [self.transform(self.X[index])])

ML/helper_functions/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1+
from ML.helper_functions.clearcache import *
12
from ML.helper_functions.load_data import *
23
from ML.helper_functions.test import *
34
from ML.helper_functions.train import *
4-
from ML.helper_functions.transformations.transformer import *
5+
from ML.helper_functions.transformations import *

ML/helper_functions/test.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@ def test(self) -> Dict:
4343
f1_tot += f1score
4444
l_tot += loss.item()
4545
n += 1
46-
print(loss.item(), l_tot, l_tot / n)
4746
return {
4847
f"{self.name} precision": p_tot / n,
4948
f"{self.name} recall": r_tot / n,
@@ -55,10 +54,10 @@ def test(self) -> Dict:
5554
def make_predictions(self, run_name: str, epoch: int) -> pd.DataFrame:
5655
ids = []
5756
target = []
58-
for i, X in enumerate(self.valid_dataloader):
57+
for _id, X in self.valid_dataloader:
5958
X = F.to_tensor(X, padding_value=1).to("cuda")
6059
pred = torch.argmax(torch.softmax(self.model(X), dim=1), dim=1).squeeze().cpu().item()
61-
ids.append(i)
60+
ids.append(_id.item())
6261
target.append(pred)
6362
if run_name not in os.listdir("./ML/predictions/"):
6463
os.mkdir(f"./ML/predictions/{run_name}")

ML/helper_functions/train.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,3 +72,7 @@ def train(self, run_name: str) -> None:
7272
self.model.train()
7373
wandb.save()
7474
wandb.finish()
75+
self.save_model(run_name)
76+
77+
def save_model(self, run_name: str) -> None:
78+
pass

run.py

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,38 @@
11
from ML import *
22

3-
train_data_loader, test_data_loader, valid_data_loader = Load_Data(
4-
Main_DL,
5-
Valid_Loader,
6-
[
7-
"/media/user/Main/Programmer-RD-AI/Programming/Learning/JS/NLP-Disaster-Tweets/ML/data/train.csv",
8-
32,
9-
Transformer().transform(),
10-
],
11-
[
12-
"/media/user/Main/Programmer-RD-AI/Programming/Learning/JS/NLP-Disaster-Tweets/ML/data/test.csv",
13-
1,
14-
],
15-
0.125,
16-
42,
17-
).ld()
18-
model = TL().to(device)
19-
learning_rate = 1e-5
20-
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
21-
criterion = nn.CrossEntropyLoss()
22-
config = {
23-
"model": model,
24-
"criterion": criterion,
25-
"optimizer": optimizer,
26-
"learning_rate": learning_rate,
27-
}
28-
Train(
29-
model, 10, config, train_data_loader, test_data_loader, valid_data_loader, criterion, optimizer
30-
).train("wit_randomize")
3+
lrs = [1e-0, 1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]
4+
for lr in lrs:
5+
train_data_loader, test_data_loader, valid_data_loader = Load_Data(
6+
Main_DL,
7+
Valid_Loader,
8+
[
9+
"/media/user/Main/Programmer-RD-AI/Programming/Learning/JS/NLP-Disaster-Tweets/ML/data/train.csv",
10+
32,
11+
Transformer().transform(),
12+
],
13+
[
14+
"/media/user/Main/Programmer-RD-AI/Programming/Learning/JS/NLP-Disaster-Tweets/ML/data/test.csv",
15+
1,
16+
],
17+
0.25,
18+
42,
19+
).ld()
20+
model = TL().to(device)
21+
optimizer = optim.Adam(model.parameters(), lr=lr)
22+
criterion = nn.CrossEntropyLoss()
23+
config = {
24+
"model": model,
25+
"criterion": criterion,
26+
"optimizer": optimizer,
27+
"learning_rate": lr,
28+
}
29+
Train(
30+
model,
31+
5,
32+
config,
33+
train_data_loader,
34+
test_data_loader,
35+
valid_data_loader,
36+
criterion,
37+
optimizer,
38+
).train(f"{lr}")

test.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +0,0 @@
1-
from torchvision import transforms
2-
3-
import torch
4-
5-
print(torch.rand(1).item())

0 commit comments

Comments
 (0)