From 50a229e0f2ce8e38aed4bf6ed796d69037c748d1 Mon Sep 17 00:00:00 2001 From: Apple Date: Sun, 22 Sep 2019 03:09:03 +0900 Subject: [PATCH 1/2] add multi-gpus torch-summary capability --- torchsummary/torchsummary.py | 138 ++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 65 deletions(-) diff --git a/torchsummary/torchsummary.py b/torchsummary/torchsummary.py index cbe18e3..f9e9a27 100644 --- a/torchsummary/torchsummary.py +++ b/torchsummary/torchsummary.py @@ -41,75 +41,83 @@ def hook(module, input, output): ): hooks.append(module.register_forward_hook(hook)) - device = device.lower() - assert device in [ - "cuda", - "cpu", - ], "Input device is not valid, please specify 'cuda' or 'cpu'" - - if device == "cuda" and torch.cuda.is_available(): - dtype = torch.cuda.FloatTensor - else: - dtype = torch.FloatTensor + if(isinstance(device, str)): + device.lower() + # torch parse function that returns an object of type: torch.device. argument can be passed as a torch.device object, a string('cuda:1') or integer device index(1) + device=torch._C._nn._parse_to(device)[0] # multiple inputs to the network if isinstance(input_size, tuple): input_size = [input_size] # batch_size of 2 for batchnorm - x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size] - # print(type(x[0])) - - # create properties - summary = OrderedDict() - hooks = [] - - # register hook - model.apply(register_hook) - - # make a forward pass - # print(x.shape) - model(*x) - - # remove these hooks - for h in hooks: - h.remove() - - print("----------------------------------------------------------------") - line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") - print(line_new) - print("================================================================") - total_params = 0 - total_output = 0 - trainable_params = 0 - for layer in summary: - # input_shape, output_shape, trainable, nb_params - line_new = "{:>20} {:>25} {:>15}".format( - layer, - str(summary[layer]["output_shape"]), - "{0:,}".format(summary[layer]["nb_params"]), - ) - total_params += summary[layer]["nb_params"] - total_output += np.prod(summary[layer]["output_shape"]) - if "trainable" in summary[layer]: - if summary[layer]["trainable"] == True: - trainable_params += summary[layer]["nb_params"] - print(line_new) + try: + if (device == torch.device('cuda')): + if(torch.cuda.is_available()): + x = [torch.rand(2, *in_size).to('cuda') for in_size in input_size] + else: + raise Exception("No CUDA-capable device detected.") + elif not (device == torch.device('cpu') or device == torch.device('cpu:0')): + with torch.cuda.device(device): + if(torch.cuda.is_available()): + x = [torch.rand(2, *in_size).to(device) for in_size in input_size] + except RuntimeError: + raise Exception("Specified device either doesn't exist or is not CUDA-capable. ") from None + else: + if (device == torch.device('cpu') or device == torch.device('cpu:0')): + x = [torch.rand(2, *in_size).to('cpu') for in_size in input_size] + # print(type(x[0])) + + # create properties + summary = OrderedDict() + hooks = [] - # assume 4 bytes/number (float on cuda). - total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) - total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients - total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) - total_size = total_params_size + total_output_size + total_input_size - - print("================================================================") - print("Total params: {0:,}".format(total_params)) - print("Trainable params: {0:,}".format(trainable_params)) - print("Non-trainable params: {0:,}".format(total_params - trainable_params)) - print("----------------------------------------------------------------") - print("Input size (MB): %0.2f" % total_input_size) - print("Forward/backward pass size (MB): %0.2f" % total_output_size) - print("Params size (MB): %0.2f" % total_params_size) - print("Estimated Total Size (MB): %0.2f" % total_size) - print("----------------------------------------------------------------") - # return summary + # register hook + model.apply(register_hook) + + # make a forward pass + # print(x.shape) + model(*x) + + # remove these hooks + for h in hooks: + h.remove() + + print("----------------------------------------------------------------") + line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") + print(line_new) + print("================================================================") + total_params = 0 + total_output = 0 + trainable_params = 0 + for layer in summary: + # input_shape, output_shape, trainable, nb_params + line_new = "{:>20} {:>25} {:>15}".format( + layer, + str(summary[layer]["output_shape"]), + "{0:,}".format(summary[layer]["nb_params"]), + ) + total_params += summary[layer]["nb_params"] + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + print(line_new) + + # assume 4 bytes/number (float on cuda). + total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) + total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients + total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) + total_size = total_params_size + total_output_size + total_input_size + + print("================================================================") + print("Total params: {0:,}".format(total_params)) + print("Trainable params: {0:,}".format(trainable_params)) + print("Non-trainable params: {0:,}".format(total_params - trainable_params)) + print("----------------------------------------------------------------") + print("Input size (MB): %0.2f" % total_input_size) + print("Forward/backward pass size (MB): %0.2f" % total_output_size) + print("Params size (MB): %0.2f" % total_params_size) + print("Estimated Total Size (MB): %0.2f" % total_size) + print("----------------------------------------------------------------") + # return summary From a52f625b1172b58e91585666c013e2baa61d2978 Mon Sep 17 00:00:00 2001 From: Apple Date: Sun, 22 Sep 2019 03:16:03 +0900 Subject: [PATCH 2/2] multi-gpu compatible: torch-summary for cuda:1 and so on --- torchsummary/torchsummary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchsummary/torchsummary.py b/torchsummary/torchsummary.py index f9e9a27..62efe3f 100644 --- a/torchsummary/torchsummary.py +++ b/torchsummary/torchsummary.py @@ -104,7 +104,7 @@ def hook(module, input, output): trainable_params += summary[layer]["nb_params"] print(line_new) - # assume 4 bytes/number (float on cuda). + # assume 4 bytes/number (float on cuda). total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))