Merge pull request #15 from mrmartin/cpu-capable

fix: works with gpu = -1
chuanli11 · Apr 5, 2016 · 37c582d · 37c582d
2 parents db5f549 + b0733ba
commit 37c582d
Show file tree

Hide file tree

Showing 3 changed files with 111 additions and 61 deletions.
diff --git a/mylib/mrf.lua b/mylib/mrf.lua
@@ -22,10 +22,12 @@ function MRFMM:implement(mode, target_mrf, tensor_target_mrf, target_mrfnorm, so
   self.padH = padH or self.padW
   self.bias = torch.Tensor(nOutputPlane):fill(0)
   self.backend = backend
-  if self.backend == 'cudnn' then
-    self.bias = self.bias:cuda()
-  else
-    self.bias = self.bias:cl()
+  if params.gpu >= 0 then
+    if self.backend == 'cudnn' then
+      self.bias = self.bias:cuda()
+    else
+      self.bias = self.bias:cl()
+    end
   end
   self.gradTO = torch.Tensor(input_size[1], input_size[2], input_size[3])
   self.gradTO_confident = torch.Tensor(input_size[2], input_size[3])
@@ -116,10 +118,12 @@ function MRFMM:updateGradInput(input, gradOutput)
       source_mrfnorm = torch.sqrt(torch.sum(torch.cmul(source_mrf, source_mrf), 2)):resize(1, y:nElement(), x:nElement())
   end
   local tensor_source_mrfnorm = torch.repeatTensor(source_mrfnorm, self.gpu_chunck_size_1, 1, 1)
-  if self.backend == 'cudnn' then
-    tensor_source_mrfnorm = tensor_source_mrfnorm:cuda()
-  else
-    tensor_source_mrfnorm = tensor_source_mrfnorm:cl()
+  if params.gpu >= 0 then
+    if self.backend == 'cudnn' then
+      tensor_source_mrfnorm = tensor_source_mrfnorm:cuda()
+    else
+      tensor_source_mrfnorm = tensor_source_mrfnorm:cl()
+    end
   end
   local nOutputPlane_all = self.nOutputPlane -- hacked for memory safety
   local num_chunk = math.ceil(nOutputPlane_all / self.gpu_chunck_size_1) 
@@ -139,18 +143,38 @@ function MRFMM:updateGradInput(input, gradOutput)
 
     if self.mode == 'memory' then
       -- local timer_IO = torch.Timer()
-      if self.backend == 'cudnn' then
-        self.weight = self.weight:cuda()
-      else
-        self.weight = self.weight:cl()
+      if params.gpu >= 0 then
+        if self.backend == 'cudnn' then
+          self.weight = self.weight:cuda()
+        else
+          self.weight = self.weight:cl()
+        end
       end
       -- t_io = t_io + timer_IO:time().real
     end
     self.nOutputPlane = i_end - i_start + 1
 
     -- local timer_CONV = torch.Timer()
-    local temp = input.nn.SpatialConvolutionMM_updateOutput(self, input)
+    --local temp = input.nn.SpatialConvolutionMM_updateOutput(self, input)
     -- t_conv = t_conv + timer_CONV:time().real
+    local subBias = self.bias:sub(i_start, i_end)
+    if params.gpu < 0 then
+      self.finput = torch.Tensor()
+      self.fgradInput = torch.Tensor()
+    end
+
+    input.THNN.SpatialConvolutionMM_updateOutput(
+      input:cdata(),
+      self.output:cdata(),
+      self.weight:cdata(),
+      subBias:cdata(),
+      self.finput:cdata(),
+      self.fgradInput:cdata(),
+      self.kW, self.kH,
+      self.dW, self.dH,
+      self.padW, self.padH
+    )
+    local temp = self.output
 
     -- normalize w.r.t source_mrfnorm
     if i_chunk < num_chunk then
@@ -202,10 +226,14 @@ function MRFMM:updateGradInput(input, gradOutput)
   -- local t_syn = timer_SYN:time().real
 
   if gradOutput:size()[1] == input:size()[1] then
-    if self.backend == 'cudnn' then
-      self.gradInput = gradOutput:clone() + self.gradTO:cuda() * self.strength * (-1)
+    if params.gpu >= 0 then
+      if self.backend == 'cudnn' then
+        self.gradInput = gradOutput:clone() + self.gradTO:cuda() * self.strength * (-1)
+      else
+        self.gradInput = gradOutput:clone() + self.gradTO:cl() * self.strength * (-1)
+      end
     else
-      self.gradInput = gradOutput:clone() + self.gradTO:cl() * self.strength * (-1)
+      self.gradInput = gradOutput:clone() + self.gradTO * self.strength * (-1)
     end
   else
     self.gradInput = self.gradTO * self.strength * (-1)
@@ -229,4 +257,4 @@ function MRFMM:type(type)
    self.finput = torch.Tensor()
    self.fgradInput = torch.Tensor()
    return parent.type(self,type)
-end
+end
diff --git a/syn_CNNMRF_wrapper.lua b/syn_CNNMRF_wrapper.lua
@@ -88,10 +88,12 @@ local function main(params)
       for i_s = -params.target_num_scale, params.target_num_scale do
         local max_sz = math.floor(math.max(target_image_rt_caffe:size()[2], target_image_rt_caffe:size()[3]) * torch.pow(params.target_step_scale, i_s))
         local target_image_rt_s_caffe = image.scale(target_image_rt_caffe, max_sz, 'bilinear')
-        if params.backend == 'cudnn' then
-          target_image_rt_s_caffe = target_image_rt_s_caffe:cuda()
-        else
-          target_image_rt_s_caffe = target_image_rt_s_caffe:cl()
+        if params.gpu >= 0 then
+          if params.backend == 'cudnn' then
+            target_image_rt_s_caffe = target_image_rt_s_caffe:cuda()
+          else
+            target_image_rt_s_caffe = target_image_rt_s_caffe:cl()
+          end
         end
         table.insert(target_images_caffe, target_image_rt_s_caffe)
       end
@@ -174,10 +176,14 @@ local function main(params)
     -- print('*****************************************************')
     -- print(string.format('process source image'));
     -- print('*****************************************************')
-    if params.backend == 'cudnn' then
-      net:forward(pyramid_source_image_caffe[cur_res]:cuda())
+    if params.gpu >= 0 then
+      if params.backend == 'cudnn' then
+        net:forward(pyramid_source_image_caffe[cur_res]:cuda())
+      else
+        net:forward(pyramid_source_image_caffe[cur_res]:cl())
+      end
     else
-      net:forward(pyramid_source_image_caffe[cur_res]:cl())
+      net:forward(pyramid_source_image_caffe[cur_res])
     end
     local source_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float()
     if params.mrf_patch_size[id_mrf] > source_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > source_feature_map:size()[3] then
@@ -280,7 +286,7 @@ local function main(params)
       cltorch.setDevice(params.gpu + 1)
     end
   else
-    params.backend = 'nn-cpu'
+    params.backend = 'nn'
   end
 
   if params.backend == 'cudnn' then
@@ -324,12 +330,13 @@ local function main(params)
       else
         error('Invalid init type')
       end
-      if params.backend == 'cudnn' then
-        input_image = input_image:cuda()
-      else
-        input_image = input_image:cl()
+      if params.gpu >= 0 then
+        if params.backend == 'cudnn' then
+          input_image = input_image:cuda()
+        else
+          input_image = input_image:cl()
+        end
       end
-
       -----------------------------------------------------
       -- add a tv layer
       -----------------------------------------------------
@@ -378,12 +385,13 @@ local function main(params)
       print('network has been built.')
     else
       input_image = image.scale(input_image:float(), pyramid_source_image_caffe[i_res]:size()[3], pyramid_source_image_caffe[i_res]:size()[2], 'bilinear'):clone()
-      if params.backend == 'cudnn' then
-        input_image = input_image:cuda()
-      else
-        input_image = input_image:cl()
+      if params.gpu >= 0 then
+        if params.backend == 'cudnn' then
+          input_image = input_image:cuda()
+        else
+          input_image = input_image:cl()
+        end
       end
-
     end
 
     print('*****************************************************')
@@ -399,10 +407,12 @@ local function main(params)
     end
 
     local mask = torch.Tensor(input_image:size()):fill(1)
-    if params.backend == 'cudnn' then
-      mask = mask:cuda()
-    else
-      mask = mask:cl()
+    if params.gpu >= 0 then
+      if params.backend == 'cudnn' then
+        mask = mask:cuda()
+      else
+        mask = mask:cl()
+      end
     end
 
     y = net:forward(input_image)
@@ -495,4 +505,4 @@ end
 return {
   run_test = run_test,
   main = main
-}
+}
diff --git a/transfer_CNNMRF_wrapper.lua b/transfer_CNNMRF_wrapper.lua
@@ -166,10 +166,12 @@ local function main(params)
       for i_s = -params.target_num_scale, params.target_num_scale do
         local max_sz = math.floor(math.max(target_image_rt_caffe:size()[2], target_image_rt_caffe:size()[3]) * torch.pow(params.target_step_scale, i_s))
         local target_image_rt_s_caffe = image.scale(target_image_rt_caffe, max_sz, 'bilinear')
-        if params.backend == 'cudnn' then
-          target_image_rt_s_caffe = target_image_rt_s_caffe:cuda()
-        else
-          target_image_rt_s_caffe = target_image_rt_s_caffe:cl()
+        if params.gpu >= 0 then
+          if params.backend == 'cudnn' then
+            target_image_rt_s_caffe = target_image_rt_s_caffe:cuda()
+          else
+            target_image_rt_s_caffe = target_image_rt_s_caffe:cl()
+          end
         end
         table.insert(target_images_caffe, target_image_rt_s_caffe)
       end
@@ -252,10 +254,14 @@ local function main(params)
     -- print('*****************************************************')
     -- print(string.format('process source image'));
     -- print('*****************************************************')
-    if params.backend == 'cudnn' then
-      net:forward(pyramid_source_image_caffe[cur_res]:cuda())
+    if params.gpu >= 0 then
+      if params.backend == 'cudnn' then
+        net:forward(pyramid_source_image_caffe[cur_res]:cuda())
+      else
+        net:forward(pyramid_source_image_caffe[cur_res]:cl())
+      end
     else
-      net:forward(pyramid_source_image_caffe[cur_res]:cl())
+      net:forward(pyramid_source_image_caffe[cur_res])
     end
     local source_feature_map = net:get(mrf_layers[id_mrf] - 1).output:float()
     if params.mrf_patch_size[id_mrf] > source_feature_map:size()[2] or params.mrf_patch_size[id_mrf] > source_feature_map:size()[3] then
@@ -358,7 +364,7 @@ local function main(params)
       cltorch.setDevice(params.gpu + 1)
     end
   else
-    params.backend = 'nn-cpu'
+    params.backend = 'nn'
   end
 
   if params.backend == 'cudnn' then
@@ -402,10 +408,12 @@ local function main(params)
       else
         error('Invalid init type')
       end
-      if params.backend == 'cudnn' then
-        input_image = input_image:cuda()
-      else
-        input_image = input_image:cl()
+      if params.gpu >= 0 then
+        if params.backend == 'cudnn' then
+          input_image = input_image:cuda()
+        else
+          input_image = input_image:cl()
+        end
       end
 
       -----------------------------------------------------
@@ -465,10 +473,12 @@ local function main(params)
       print('network has been built.')
     else
       input_image = image.scale(input_image:float(), pyramid_source_image_caffe[i_res]:size()[3], pyramid_source_image_caffe[i_res]:size()[2], 'bilinear'):clone()
-      if params.backend == 'cudnn' then
-        input_image = input_image:cuda()
-      else
-        input_image = input_image:cl()
+      if params.gpu >= 0 then
+        if params.backend == 'cudnn' then
+          input_image = input_image:cuda()
+        else
+          input_image = input_image:cl()
+        end
       end
 
       -- -- update content layers
@@ -492,10 +502,12 @@ local function main(params)
     end
 
     local mask = torch.Tensor(input_image:size()):fill(1)
-    if params.backend == 'cudnn' then
-      mask = mask:cuda()
-    else
-      mask = mask:cl()
+    if params.gpu >= 0 then
+      if params.backend == 'cudnn' then
+        mask = mask:cuda()
+      else
+        mask = mask:cl()
+      end
     end
 
     y = net:forward(input_image)
@@ -587,4 +599,4 @@ end
 return {
   run_test = run_test,
   main = main
-}
+}