-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaffine_grid_sampler_mask_values.py
137 lines (112 loc) · 5.1 KB
/
affine_grid_sampler_mask_values.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import torch
from torch.nn.functional import grid_sample
from typing import List
import math
# from torchvision.transforms.v2.functional._geometry import _get_inverse_affine_matrix
def _get_inverse_affine_matrix(
center: List[float], angle: float, translate: List[float], scale: float, shear: List[float], inverted: bool = True
) -> List[float]:
# Helper method to compute inverse matrix for affine transformation
# Pillow requires inverse affine transformation matrix:
# Affine matrix is : M = T * C * RotateScaleShear * C^-1
#
# where T is translation matrix: [1, 0, tx | 0, 1, ty | 0, 0, 1]
# C is translation matrix to keep center: [1, 0, cx | 0, 1, cy | 0, 0, 1]
# RotateScaleShear is rotation with scale and shear matrix
#
# RotateScaleShear(a, s, (sx, sy)) =
# = R(a) * S(s) * SHy(sy) * SHx(sx)
# = [ s*cos(a - sy)/cos(sy), s*(-cos(a - sy)*tan(sx)/cos(sy) - sin(a)), 0 ]
# [ s*sin(a - sy)/cos(sy), s*(-sin(a - sy)*tan(sx)/cos(sy) + cos(a)), 0 ]
# [ 0 , 0 , 1 ]
# where R is a rotation matrix, S is a scaling matrix, and SHx and SHy are the shears:
# SHx(s) = [1, -tan(s)] and SHy(s) = [1 , 0]
# [0, 1 ] [-tan(s), 1]
#
# Thus, the inverse is M^-1 = C * RotateScaleShear^-1 * C^-1 * T^-1
rot = math.radians(angle)
sx = math.radians(shear[0])
sy = math.radians(shear[1])
cx, cy = center
tx, ty = translate
# Cached results
cos_sy = math.cos(sy)
tan_sx = math.tan(sx)
rot_minus_sy = rot - sy
cx_plus_tx = cx + tx
cy_plus_ty = cy + ty
# Rotate Scale Shear (RSS) without scaling
a = math.cos(rot_minus_sy) / cos_sy
b = -(a * tan_sx + math.sin(rot))
c = math.sin(rot_minus_sy) / cos_sy
d = math.cos(rot) - c * tan_sx
if inverted:
# Inverted rotation matrix with scale and shear
# det([[a, b], [c, d]]) == 1, since det(rotation) = 1 and det(shear) = 1
matrix = [d / scale, -b / scale, 0.0, -c / scale, a / scale, 0.0]
# Apply inverse of translation and of center translation: RSS^-1 * C^-1 * T^-1
# and then apply center translation: C * RSS^-1 * C^-1 * T^-1
matrix[2] += cx - matrix[0] * cx_plus_tx - matrix[1] * cy_plus_ty
matrix[5] += cy - matrix[3] * cx_plus_tx - matrix[4] * cy_plus_ty
else:
matrix = [a * scale, b * scale, 0.0, c * scale, d * scale, 0.0]
# Apply inverse of center translation: RSS * C^-1
# and then apply translation and center : T * C * RSS * C^-1
matrix[2] += cx_plus_tx - matrix[0] * cx - matrix[1] * cy
matrix[5] += cy_plus_ty - matrix[3] * cx - matrix[4] * cy
return matrix
from torch import Tensor
# from torchvision.transforms._functional_tensor import _gen_affine_grid
def _gen_affine_grid(
theta: Tensor,
w: int,
h: int,
ow: int,
oh: int,
) -> Tensor:
# https://github.com/pytorch/pytorch/blob/74b65c32be68b15dc7c9e8bb62459efbfbde33d8/aten/src/ATen/native/
# AffineGridGenerator.cpp#L18
# Difference with AffineGridGenerator is that:
# 1) we normalize grid values after applying theta
# 2) we can normalize by other image size, such that it covers "extend" option like in PIL.Image.rotate
d = 0.5
base_grid = torch.empty(1, oh, ow, 3, dtype=theta.dtype, device=theta.device)
x_grid = torch.linspace(-ow * 0.5 + d, ow * 0.5 + d - 1, steps=ow, device=theta.device, dtype=theta.dtype)
base_grid[..., 0].copy_(x_grid)
y_grid = torch.linspace(-oh * 0.5 + d, oh * 0.5 + d - 1, steps=oh, device=theta.device, dtype=theta.dtype).unsqueeze_(-1)
base_grid[..., 1].copy_(y_grid)
base_grid[..., 2].fill_(1)
rescaled_theta = theta.transpose(1, 2) / torch.tensor([0.5 * w, 0.5 * h], dtype=theta.dtype, device=theta.device)
output_grid = base_grid.view(1, oh * ow, 3).bmm(rescaled_theta)
return output_grid.view(1, oh, ow, 2)
dtype = torch.float32
# dtype = torch.float64
# angle = 56.0
mode = "bilinear"
# mode = "nearest"
orig_img = torch.ones(1, 3, 46, 46, dtype=dtype)
orig_img[:, 1, ...] = 0.0
orig_img[:, 2, ...] = 0.0
mask = torch.ones((orig_img.shape[0], 1, orig_img.shape[2], orig_img.shape[3]), dtype=orig_img.dtype, device=orig_img.device)
img = torch.cat((orig_img, mask), dim=1)
# center_f = [0.0, 0.0]
# matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
# w, h = img.shape[-1], img.shape[-2]
# ow, oh = w, h
# dtype = img.dtype if torch.is_floating_point(img) else torch.float32
# theta = torch.tensor(matrix, dtype=dtype, device=img.device).reshape(1, 2, 3)
# grid = _gen_affine_grid(theta, w=w, h=h, ow=ow, oh=oh)
n, c, w, h = img.shape
grid = torch.rand(n, h, w, 2, dtype=dtype)
out = grid_sample(img, grid, mode=mode, padding_mode="zeros", align_corners=False)
mask = out[:, -1:, :, :] # N * 1 * H * W
out = out[:, :-1, :, :] # N * C * H * W
torch.set_printoptions(precision=16)
print("Unique:", mask.unique())
mask = mask[0, 0, ...]
m = (mask > 0.999) & (mask < 1.0)
print(mask[m])
print(torch.where(m))
# m = mask[0, 0, ...].clone()
# m[m < 0.999] = 0
# print("m", m)