created db

AlexDerhacobian · AlexDerhacobian · commit 1684f6c3dedc · 2022-05-18T15:21:07.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -1 +1,2 @@
 ezmode/__pycache__/*
+*.egg-info
diff --git a/ezmode/data.py b/ezmode/data.py
@@ -153,102 +153,6 @@ def get_user_actions(self, start_idx, end_idx, with_labels = True):
 
         return data
 
-    '''
-    Returns mapping from class index to corresponding importance weight
-    '''
-    def get_imp_weights(self,
-            rare_class, 
-            binary = True, 
-            exp_factor = 1):
-
-        self.get_class_dict(binary = binary, rare_class = rare_class)
-        weights = {}
-
-        if (binary):
-            num_exs_pos = len(self.train_df[self.train_df['label'] == 1])
-            cls_weight_pos = (1 / num_exs_pos) ** exp_factor
-            weights[1] = cls_weight_pos
-
-            num_exs_neg = len(self.train_df[self.train_df['label'] == 0])
-            cls_weight_neg = (1 / num_exs_neg) ** exp_factor
-            weights[0] = cls_weight_neg
-
-
-        '''
-        TODO ELSE NON-BINARY CASE
-        '''
-
-        total_weights = np.sum([weights[cls] for cls in self.train_df['label']])
-        for cls_idx in weights.keys():
-            weights[cls_idx] /= total_weights
-
-        return weights
-        
-
-    '''
-    Return train data as df, if out is a path, write data to path
-    '''
-    def get_early_stopping_val_data(self, 
-            root = True, 
-            zero_index = True, 
-            shuffle = True, 
-            out = None, 
-            binary = False, 
-            rare_class = None):
-
-        val_data = []
-        with self.engine.connect() as con: 
-            rs = con.execute(
-                    'SELECT annotations.image_base_path, '
-                    'annotations.xmin, '
-                    'annotations.ymin, '
-                    'annotations.xmax, '
-                    'annotations.ymax, '
-                    'annotations.label '
-                    'FROM annotations '
-                    'INNER JOIN labels ON annotations.label=labels.label '
-                    'WHERE annotations.split=\'val_es\'')
-            for row in rs:
-                image_base_path = str(row[0])
-
-                xmin = int(row[1])
-                ymin = int(row[2])
-                xmax = int(row[3])
-                ymax = int(row[4])
-
-                label = int(row[5])
-
-                if (root):
-                    image_base_path = os.path.join(self.root, image_base_path)
-
-                val_data.append([image_base_path, xmin, ymin, xmax, ymax, label])
-
-        val_df = pd.DataFrame(data = val_data, columns = ['image', 'xmin', 'ymin', 'xmax', 'ymax', 'label'])
-
-        #Shuffle training data
-        if (shuffle):
-
-            print("shuffling")
-            val_df = val_df.sample(frac = 1).reset_index(drop = True)
-
-        #Zero index the data labels for binary classification tasks
-        if (binary and rare_class != None):
-            
-            binary_labels = []
-
-            for label in val_df['label'].to_numpy():
-                if (label == rare_class):
-                    binary_labels.append(1)
-                else:
-                    binary_labels.append(0)
-
-            val_df['label'] = binary_labels
-
-        #Zero index the data labels for N-way classification tasks
-        if (out != None):
-            val_df.to_csv(out)
-
-        return val_df
    
     '''
     Return train data as df, if out is a path, write data to path
diff --git a/ezmode/infer.py b/ezmode/infer.py
@@ -1,3 +1,4 @@
+import subprocess
 import ezmode
 import pandas as pd
 from ezmode import data
@@ -13,25 +14,16 @@ def __init__(self,
         self.model = model
         self.gpus = gpus
 
-    def write_bash_scripts(self, script = None):
+    def deploy(self, script = None):
         self.dataloader.init_metadata()
         val_csv = self.dataloader.get_val_data()
         val_data = pd.read_csv(val_csv)
 
         videos = val_data['vid_base_path'].tolist()
         num_videos = len(videos)
 
-        if (script == None):
-            infer_script_fname = os.path.join('/'.join(ezmode.__file__.split('/')[:-1]), 'infer_script.py')
-        else:
-            infer_script_fname = script
-
-        #assert(os.path.exists(infer_script_fname))
-
-        bash_dest = os.path.join(self.dataloader.working_dir, 'run_infer.sh')
-
         processes = []
-        
+
         videos_left = num_videos 
 
         for gpu in range(self.gpus):
@@ -56,11 +48,7 @@ def write_bash_scripts(self, script = None):
                     f'--num_videos {num_videos} '
                     f'--to_process {to_process} \n')
 
+        infer_job = [subprocess.Popen(process, shell = True) for process in processes]
+        for infer_job in infer_jobs:
+            infer_job.wait()
 
-
-        if (os.path.exists(bash_dest)):
-            os.remove(bash_dest)
-
-        f = open(bash_dest, "a")
-        f.writelines(processes)
-        f.close()
diff --git a/ezmode/select.py b/ezmode/select.py
@@ -8,7 +8,7 @@
 num_labeled = 0
 true_pos = 0
 
-class Selector:
+class SelectEngine:
     def __init__(self, 
             dataloader, 
             rare_class, 
diff --git a/ezmode/train.py b/ezmode/train.py
@@ -13,8 +13,9 @@
 from torchvision import transforms
 from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
 from .data import DataLoader
+from .utils import FRCNNDataLoader
 
-class Trainer:
+class TrainEngine:
     def __init__(self, 
             model_backbone, 
             dataloader,
@@ -29,61 +30,26 @@ def __init__(self,
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
             ])
 
-    def acc(self, labels, pred):
-
-        acc = (labels == pred.argmax(-1)).float().mean().item()
-        return acc
-
-    def run(self, model, lr, train_data, nb_epochs):
+    def run(self, model, loader, lr, nb_epochs):
+        model.train()
         optimizer = torch.optim.SGD(
                 filter(lambda p: p.requires_grad, model.parameters()),
-                lr,
-                momentum=0.9,
-                weight_decay=1e-4
-                )
-
-        for epoch in range(nb_epochs): 
-            print("Training on Epoch={}".format(epoch+1))
-            for i in tqdm.tqdm(range(len(train_data))):
-                cur_row = train_data.iloc[i]
-                img_path = cur_row['image']
-                print(img_path)
-                assert(os.path.exists(img_path))
-                im = cv2.imread(img_path)
-                inp = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
-                inp = self.transform(inp)
-                inp = inp.unsqueeze(0)
-
-                boxes = torch.tensor([[cur_row['xmin'], cur_row['ymin'], cur_row['xmax'], cur_row['ymax']]]).type(torch.FloatTensor).cuda()
-                labels = torch.tensor([cur_row['label']], dtype=torch.int64).cuda()
-                image_id = torch.tensor([1], dtype=torch.int64).cuda()
-
-                bbox_area =  (cur_row['xmax'] - cur_row['xmin']) * (cur_row['ymax'] - cur_row['ymin'])
-                bbox_area = torch.tensor(bbox_area).cuda()
-
-                iscrowd = torch.zeros((1,), dtype=torch.int64).cuda()
-
-
-                target = {
-                        'boxes': boxes, 
-                        'labels': labels, 
-                        'image_id': image_id, 
-                        'area': bbox_area, 
-                        'iscrowd': iscrowd
-                        }
-                inp = inp.cuda()
-                model.cuda()
-                outp, detections = model(inp, [target])
-                loss_classifier = outp['loss_classifier']
-                loss_box_reg = outp['loss_box_reg']
-                loss_objectness = outp['loss_objectness']
-                loss_rpn_box_reg = outp['loss_rpn_box_reg']
-
-                print(outp)
-                losses = sum(loss for loss in outp.values())
-                optimizer.zero_grad()
-                losses.backward()
-                optimizer.step()
+                lr = lr,
+                momentum=0.9, weight_decay=1e-4
+        )
+        for epoch in range(nb_epochs):
+            self.train_epoch(model, optimizer, loader)
+
+    def train_epoch(self, model, optimizer, loader):
+        for batch_idx, (target, inp) in enumerate(loader):
+            inp = inp.cuda(non_blocking=True)
+            outp, detection = model(inp, [target])
+
+            losses = sum(loss for loss in outp.values())
+
+            optimizer.zero_grad()
+            losses.backward()
+            optimizer.step()
 
 
     def load_model(self, model_backbone, model_path, num_classes):
@@ -113,25 +79,23 @@ def save_model(self,
             model, 
             lr, 
             nb_epochs):
-        print("Saving trained Faster-RCNN model...")
 
         dest = os.path.join(self.dataloader.round_working_dir, 'model_lr={}_epochs={}_backbone={}.pth'.format(lr, nb_epochs, self.model_backbone))
         torch.save(model.state_dict(), dest)
 
-        print("Done! Saved to {}".format(dest))
+        print("Done Training! Model saved to {}".format(dest))
         return dest
 
-    def train(self, lr, nb_epochs):
+    def train(self, lr, nb_epochs, batch_size):
 
         train_data = self.dataloader.get_train_data()
+        dset = FRCNNDataLoader(train_data)
+        loader = torch.utils.data.DataLoader(dataset = dset, batch_size = batch_size, shuffle=True)
 
         num_classes = self.dataloader.get_num_classes()
-        print(num_classes)
-
         model = self.load_model(self.model_backbone, self.model_path, num_classes)
 
-        self.run(model, lr, train_data, nb_epochs)  
+        self.run(model, loader, lr, nb_epochs)  
 
         model_dest = self.save_model(model, lr, nb_epochs)
-
         return model_dest
diff --git a/ezmode/train_batch.py b/ezmode/train_batch.py