sentient-codebot
diff --git a/‎.vscode/launch.json
Lines changed: 1 addition & 1 deletion b/‎.vscode/launch.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎group_operations.py
Lines changed: 5 additions & 3 deletions b/‎group_operations.py
Lines changed: 5 additions & 3 deletions
@@ -65,7 +65,7 @@
                 "--experiment_name",
                 "SPRITES_SASBD_444",
                 "--cfg_json",
-                "configs/mmnist/rim_sasbd.json",
+                "configs/mmnist/altscoff_sasbd.json",
                 "--dataset_dir",
                 "data",
                 "--decode_hidden",
 
@@ -454,7 +454,8 @@ def forward(self, input, h):
             
         Outputs:
             `hnext`: [N, num_hidden, single_hidden_size],
-            `attn`: [N, num_OFs, n_templates] (num_bloccks==k==num_object_files)
+            `attn_sm`: [N, num_hidden, num_rules] from softmax
+            `attn_gsm`: [N, num_OFs, n_templates] (num_bloccks==k==num_object_files) from gumbel_softmax
         """
 
         #self.blockify_params()
@@ -484,15 +485,16 @@ def forward(self, input, h):
         else:
             write_key = self.rule_embeddings # [1, num_rules, kdim]
 
-        att = torch.nn.functional.gumbel_softmax(torch.matmul(h_read, write_key.permute(0, 2, 1)),  tau=0.5, hard=True)    # Shape: [N*num_hidden, 1, num_rules]
+        att_logits = torch.matmul(h_read, write_key.permute(0, 2, 1))
+        att = torch.nn.functional.gumbel_softmax(att_logits,  tau=0.5, hard=True)    # Shape: [N*num_hidden, 1, num_rules]
 
         #print('hnext shape before att', hnext.shape)
         hnext = torch.bmm(att, hnext)   # [N*num_hidden, 1, num_rules], [N*num_hidden, num_rules, hidden_size] -> [N*num_hidden, 1, hidden_size]
         hnext = hnext.mean(dim=1) # [N*num_hidden, hidden_size]
         hnext = hnext.reshape((bs, self.num_hidden, self.hidden_size)) # [N, num_hidden, hidden_size]
         #print('shapes', hnext.shape, cnext.shape)
 
-        return hnext, att.data.reshape(bs,self.num_hidden,self.num_rules)
+        return hnext, nn.Softmax(-1)(att_logits).data.reshape(bs,self.num_hidden,self.num_rules), att.data.reshape(bs,self.num_hidden,self.num_rules)
 
 class SharedBlockLSTM(nn.Module):
     """Dynamic sharing of parameters between blocks(RIM's)