@@ -49,7 +49,7 @@ class RandomPerspective(BaseImagePreprocessingLayer):
49
49
def __init__ (
50
50
self ,
51
51
factor = 1.0 ,
52
- scale = 0.3 ,
52
+ scale = 1.0 ,
53
53
interpolation = "bilinear" ,
54
54
fill_value = 0.0 ,
55
55
seed = None ,
@@ -103,6 +103,10 @@ def get_random_transformation(self, data, training=True, seed=None):
103
103
batch_size = 1
104
104
else :
105
105
batch_size = images_shape [0 ]
106
+ height , width = (
107
+ images .shape [self .height_axis ],
108
+ images .shape [self .width_axis ],
109
+ )
106
110
107
111
seed = seed or self ._get_seed_generator (self .backend ._backend )
108
112
@@ -122,32 +126,42 @@ def get_random_transformation(self, data, training=True, seed=None):
122
126
apply_perspective = random_threshold < transformation_probability
123
127
124
128
perspective_factor = self .backend .random .uniform (
125
- minval = - self . scale ,
126
- maxval = self .scale ,
127
- shape = [ batch_size , 4 ] ,
129
+ shape = ( batch_size , 4 , 2 ) ,
130
+ minval = - 0.5 * self .scale ,
131
+ maxval = 0.5 * self . scale ,
128
132
seed = seed ,
129
133
dtype = self .compute_dtype ,
130
134
)
131
135
136
+ start_points = self .backend .convert_to_tensor (
137
+ [
138
+ [
139
+ [0.0 , 0.0 ],
140
+ [width - 1 , 0.0 ],
141
+ [0.0 , height - 1 ],
142
+ [width - 1 , height - 1 ],
143
+ ]
144
+ ],
145
+ dtype = self .compute_dtype ,
146
+ )
147
+
148
+ start_points = self .backend .numpy .repeat (
149
+ start_points , batch_size , axis = 0
150
+ )
151
+ end_points = start_points + start_points * perspective_factor
152
+
132
153
return {
133
154
"apply_perspective" : apply_perspective ,
134
- "perspective_factor" : perspective_factor ,
155
+ "start_points" : start_points ,
156
+ "end_points" : end_points ,
135
157
"input_shape" : images_shape ,
136
158
}
137
159
138
160
def transform_images (self , images , transformation , training = True ):
139
161
images = self .backend .cast (images , self .compute_dtype )
140
162
if training and transformation is not None :
141
- apply_perspective = transformation ["apply_perspective" ]
142
- perspective_images = self ._perspective_inputs (
143
- images , transformation
144
- )
145
-
146
- images = self .backend .numpy .where (
147
- apply_perspective [:, None , None , None ],
148
- perspective_images ,
149
- images ,
150
- )
163
+ images = self ._perspective_inputs (images , transformation )
164
+ images = self .backend .cast (images , self .compute_dtype )
151
165
return images
152
166
153
167
def _perspective_inputs (self , inputs , transformation ):
@@ -159,63 +173,36 @@ def _perspective_inputs(self, inputs, transformation):
159
173
if unbatched :
160
174
inputs = self .backend .numpy .expand_dims (inputs , axis = 0 )
161
175
162
- perspective_factor = self . backend . core . convert_to_tensor (
163
- transformation ["perspective_factor" ], dtype = self . compute_dtype
164
- )
165
- outputs = self .backend .image .affine_transform (
176
+ start_points = transformation [ "start_points" ]
177
+ end_points = transformation ["end_points" ]
178
+
179
+ outputs = self .backend .image .perspective_transform (
166
180
inputs ,
167
- transform = self ._get_perspective_matrix (perspective_factor ),
181
+ start_points ,
182
+ end_points ,
168
183
interpolation = self .interpolation ,
169
- fill_mode = "constant" ,
170
184
fill_value = self .fill_value ,
171
185
data_format = self .data_format ,
172
186
)
173
187
188
+ apply_perspective = transformation ["apply_perspective" ]
189
+ outputs = self .backend .numpy .where (
190
+ apply_perspective [:, None , None , None ],
191
+ outputs ,
192
+ inputs ,
193
+ )
194
+
174
195
if unbatched :
175
196
outputs = self .backend .numpy .squeeze (outputs , axis = 0 )
176
197
return outputs
177
198
178
- def _get_perspective_matrix (self , perspectives ):
179
- perspectives = self .backend .core .convert_to_tensor (
180
- perspectives , dtype = self .compute_dtype
181
- )
182
- num_perspectives = self .backend .shape (perspectives )[0 ]
183
- return self .backend .numpy .concatenate (
184
- [
185
- self .backend .numpy .ones (
186
- (num_perspectives , 1 ), dtype = self .compute_dtype
187
- )
188
- + perspectives [:, :1 ],
189
- perspectives [:, :1 ],
190
- perspectives [:, 2 :3 ],
191
- perspectives [:, 1 :2 ],
192
- self .backend .numpy .ones (
193
- (num_perspectives , 1 ), dtype = self .compute_dtype
194
- )
195
- + perspectives [:, 1 :2 ],
196
- perspectives [:, 3 :4 ],
197
- self .backend .numpy .zeros ((num_perspectives , 2 )),
198
- ],
199
- axis = 1 ,
200
- )
201
-
202
- def _get_transformed_coordinates (self , x , y , transform ):
203
- a0 , a1 , a2 , b0 , b1 , b2 , c0 , c1 = self .backend .numpy .split (
204
- transform , 8 , axis = - 1
205
- )
206
-
207
- x_transformed = (a1 * (y - b2 ) - b1 * (x - a2 )) / (a1 * b0 - a0 * b1 )
208
- y_transformed = (b0 * (x - a2 ) - a0 * (y - b2 )) / (a1 * b0 - a0 * b1 )
209
-
210
- return x_transformed , y_transformed
211
-
212
199
def transform_bounding_boxes (
213
200
self ,
214
201
bounding_boxes ,
215
202
transformation ,
216
203
training = True ,
217
204
):
218
- if training :
205
+ if training and transformation is not None :
219
206
if backend_utils .in_tf_graph ():
220
207
self .backend .set_backend ("tensorflow" )
221
208
@@ -233,26 +220,33 @@ def transform_bounding_boxes(
233
220
)
234
221
235
222
boxes = bounding_boxes ["boxes" ]
236
-
237
223
x0 , y0 , x1 , y1 = self .backend .numpy .split (boxes , 4 , axis = - 1 )
238
224
239
- perspective_factor = transformation ["perspective_factor" ]
240
- transform = self ._get_perspective_matrix (perspective_factor )
225
+ start_points = transformation ["start_points" ]
226
+ end_points = transformation ["end_points" ]
227
+ transform = self .backend .image .compute_homography_matrix (
228
+ start_points , end_points
229
+ )
241
230
transform = self .backend .numpy .expand_dims (transform , axis = 1 )
242
231
transform = self .backend .cast (transform , dtype = self .compute_dtype )
243
232
244
- x_1 , y_1 = self ._get_transformed_coordinates (x0 , y0 , transform )
245
- x_2 , y_2 = self ._get_transformed_coordinates (x1 , y1 , transform )
246
- x_3 , y_3 = self ._get_transformed_coordinates (x0 , y1 , transform )
247
- x_4 , y_4 = self ._get_transformed_coordinates (x1 , y0 , transform )
233
+ corners = [
234
+ self ._get_transformed_coordinates (x , y , transform )
235
+ for x , y in [(x0 , y0 ), (x1 , y1 ), (x0 , y1 ), (x1 , y0 )]
236
+ ]
237
+ x_corners , y_corners = zip (* corners )
248
238
249
- xs = self .backend .numpy .concatenate ([ x_1 , x_2 , x_3 , x_4 ] , axis = - 1 )
250
- ys = self .backend .numpy .concatenate ([ y_1 , y_2 , y_3 , y_4 ] , axis = - 1 )
239
+ xs = self .backend .numpy .stack ( x_corners , axis = - 1 )
240
+ ys = self .backend .numpy .stack ( y_corners , axis = - 1 )
251
241
252
- min_x = self .backend .numpy .min (xs , axis = - 1 )
253
- max_x = self .backend .numpy .max (xs , axis = - 1 )
254
- min_y = self .backend .numpy .min (ys , axis = - 1 )
255
- max_y = self .backend .numpy .max (ys , axis = - 1 )
242
+ min_x , max_x = (
243
+ self .backend .numpy .min (xs , axis = - 1 ),
244
+ self .backend .numpy .max (xs , axis = - 1 ),
245
+ )
246
+ min_y , max_y = (
247
+ self .backend .numpy .min (ys , axis = - 1 ),
248
+ self .backend .numpy .max (ys , axis = - 1 ),
249
+ )
256
250
257
251
min_x = self .backend .numpy .expand_dims (min_x , axis = - 1 )
258
252
max_x = self .backend .numpy .expand_dims (max_x , axis = - 1 )
@@ -280,8 +274,43 @@ def transform_bounding_boxes(
280
274
bounding_box_format = "xyxy" ,
281
275
)
282
276
277
+ self .backend .reset ()
278
+
283
279
return bounding_boxes
284
280
281
+ def _get_transformed_coordinates (
282
+ self , x_coords , y_coords , transformation_matrix
283
+ ):
284
+ backend = self .backend
285
+
286
+ batch_size = backend .shape (transformation_matrix )[0 ]
287
+
288
+ homogeneous_transform = backend .numpy .concatenate (
289
+ [transformation_matrix , backend .numpy .ones ((batch_size , 1 , 1 ))],
290
+ axis = - 1 ,
291
+ )
292
+ homogeneous_transform = backend .numpy .reshape (
293
+ homogeneous_transform , (batch_size , 3 , 3 )
294
+ )
295
+
296
+ inverse_transform = backend .linalg .inv (homogeneous_transform )
297
+
298
+ ones_column = backend .numpy .ones_like (x_coords )
299
+ homogeneous_coords = backend .numpy .concatenate (
300
+ [x_coords , y_coords , ones_column ], axis = - 1
301
+ )
302
+
303
+ homogeneous_coords = backend .numpy .moveaxis (homogeneous_coords , - 1 , - 2 )
304
+ transformed_coords = backend .numpy .matmul (
305
+ inverse_transform , homogeneous_coords
306
+ )
307
+ transformed_coords = backend .numpy .moveaxis (transformed_coords , - 1 , - 2 )
308
+
309
+ x_transformed = transformed_coords [..., 0 ] / transformed_coords [..., 2 ]
310
+ y_transformed = transformed_coords [..., 1 ] / transformed_coords [..., 2 ]
311
+
312
+ return x_transformed , y_transformed
313
+
285
314
def transform_labels (self , labels , transformation , training = True ):
286
315
return labels
287
316
0 commit comments