diff --git a/CMT.py b/CMT.py index 8ca0d96..dac8665 100644 --- a/CMT.py +++ b/CMT.py @@ -1,7 +1,7 @@ import cv2 import itertools from numpy import array, zeros, vstack, hstack, math, nan, argsort, median, \ - argmax, isnan, append + argmax, isnan, append import scipy.cluster import scipy.spatial import time @@ -12,372 +12,458 @@ class CMT(object): - DETECTOR = 'BRISK' - DESCRIPTOR = 'BRISK' - DESC_LENGTH = 512 - MATCHER = 'BruteForce-Hamming' - THR_OUTLIER = 20 - THR_CONF = 0.75 - THR_RATIO = 0.8 + DETECTOR = 'BRISK' + DESCRIPTOR = 'BRISK' + DESC_LENGTH = 512 + MATCHER = 'BruteForce-Hamming' + THR_OUTLIER = 20 + THR_CONF = 0.75 + THR_RATIO = 0.8 - estimate_scale = True - estimate_rotation = True + estimate_scale = True + estimate_rotation = True - def initialise(self, im_gray0, tl, br): + def initialise(self, im_gray0, tl, br): - # Initialise detector, descriptor, matcher - self.detector = cv2.FeatureDetector_create(self.DETECTOR) - self.descriptor = cv2.DescriptorExtractor_create(self.DESCRIPTOR) - self.matcher = cv2.DescriptorMatcher_create(self.MATCHER) + # Initialise detector, descriptor, matcher + self.detector = cv2.FeatureDetector_create(self.DETECTOR) + self.descriptor = cv2.DescriptorExtractor_create(self.DESCRIPTOR) + self.matcher = cv2.DescriptorMatcher_create(self.MATCHER) - # Get initial keypoints in whole image - keypoints_cv = self.detector.detect(im_gray0) + # Get initial keypoints in whole image + keypoints_cv = self.detector.detect(im_gray0) - # Remember keypoints that are in the rectangle as selected keypoints - ind = util.in_rect(keypoints_cv, tl, br) - selected_keypoints_cv = list(itertools.compress(keypoints_cv, ind)) - selected_keypoints_cv, self.selected_features = self.descriptor.compute(im_gray0, selected_keypoints_cv) - selected_keypoints = util.keypoints_cv_to_np(selected_keypoints_cv) - num_selected_keypoints = len(selected_keypoints_cv) + # Remember keypoints that are in the rectangle as selected keypoints + ind = util.in_rect(keypoints_cv, tl, br) + selected_keypoints_cv = list(itertools.compress(keypoints_cv, ind)) + selected_keypoints_cv, self.selected_features = self.descriptor.compute( + im_gray0, selected_keypoints_cv) + selected_keypoints = util.keypoints_cv_to_np(selected_keypoints_cv) + num_selected_keypoints = len(selected_keypoints_cv) - if num_selected_keypoints == 0: - raise Exception('No keypoints found in selection') + if num_selected_keypoints == 0: + raise Exception('No keypoints found in selection') - # Remember keypoints that are not in the rectangle as background keypoints - background_keypoints_cv = list(itertools.compress(keypoints_cv, ~ind)) - background_keypoints_cv, background_features = self.descriptor.compute(im_gray0, background_keypoints_cv) - _ = util.keypoints_cv_to_np(background_keypoints_cv) + # Remember keypoints that are not in the rectangle as background + # keypoints + background_keypoints_cv = list(itertools.compress(keypoints_cv, ~ind)) + background_keypoints_cv, background_features = self.descriptor.compute( + im_gray0, background_keypoints_cv) + _ = util.keypoints_cv_to_np(background_keypoints_cv) - # Assign each keypoint a class starting from 1, background is 0 - self.selected_classes = array(range(num_selected_keypoints)) + 1 - background_classes = zeros(len(background_keypoints_cv)) + # Assign each keypoint a class starting from 1, background is 0 + self.selected_classes = array(range(num_selected_keypoints)) + 1 + background_classes = zeros(len(background_keypoints_cv)) - # Stack background features and selected features into database - self.features_database = vstack((background_features, self.selected_features)) + # Stack background features and selected features into database + self.features_database = vstack( + (background_features, self.selected_features)) - # Same for classes - self.database_classes = hstack((background_classes, self.selected_classes)) + # Same for classes + self.database_classes = hstack( + (background_classes, self.selected_classes)) - # Get all distances between selected keypoints in squareform - pdist = scipy.spatial.distance.pdist(selected_keypoints) - self.squareform = scipy.spatial.distance.squareform(pdist) + # Get all distances between selected keypoints in squareform + pdist = scipy.spatial.distance.pdist(selected_keypoints) + self.squareform = scipy.spatial.distance.squareform(pdist) - # Get all angles between selected keypoints - angles = np.empty((num_selected_keypoints, num_selected_keypoints)) - for k1, i1 in zip(selected_keypoints, range(num_selected_keypoints)): - for k2, i2 in zip(selected_keypoints, range(num_selected_keypoints)): + # Get all angles between selected keypoints + angles = np.empty((num_selected_keypoints, num_selected_keypoints)) + for k1, i1 in zip(selected_keypoints, range(num_selected_keypoints)): + for k2, i2 in zip( + selected_keypoints, range(num_selected_keypoints)): - # Compute vector from k1 to k2 - v = k2 - k1 + # Compute vector from k1 to k2 + v = k2 - k1 - # Compute angle of this vector with respect to x axis - angle = math.atan2(v[1], v[0]) + # Compute angle of this vector with respect to x axis + angle = math.atan2(v[1], v[0]) - # Store angle - angles[i1, i2] = angle + # Store angle + angles[i1, i2] = angle - self.angles = angles + self.angles = angles - # Find the center of selected keypoints - center = np.mean(selected_keypoints, axis=0) + # Find the center of selected keypoints + center = np.mean(selected_keypoints, axis=0) - # Remember the rectangle coordinates relative to the center - self.center_to_tl = np.array(tl) - center - self.center_to_tr = np.array([br[0], tl[1]]) - center - self.center_to_br = np.array(br) - center - self.center_to_bl = np.array([tl[0], br[1]]) - center + # Remember the rectangle coordinates relative to the center + self.center_to_tl = np.array(tl) - center + self.center_to_tr = np.array([br[0], tl[1]]) - center + self.center_to_br = np.array(br) - center + self.center_to_bl = np.array([tl[0], br[1]]) - center - # Calculate springs of each keypoint - self.springs = selected_keypoints - center + # Calculate springs of each keypoint + self.springs = selected_keypoints - center - # Set start image for tracking - self.im_prev = im_gray0 + # Set start image for tracking + self.im_prev = im_gray0 - # Make keypoints 'active' keypoints - self.active_keypoints = np.copy(selected_keypoints) + # Make keypoints 'active' keypoints + self.active_keypoints = np.copy(selected_keypoints) - # Attach class information to active keypoints - self.active_keypoints = hstack((selected_keypoints, self.selected_classes[:, None])) + # Attach class information to active keypoints + self.active_keypoints = hstack( + (selected_keypoints, + self.selected_classes[ + :, + None])) - # Remember number of initial keypoints - self.num_initial_keypoints = len(selected_keypoints_cv) + # Remember number of initial keypoints + self.num_initial_keypoints = len(selected_keypoints_cv) - def estimate(self, keypoints): + def estimate(self, keypoints): - center = array((nan, nan)) - scale_estimate = nan - med_rot = nan + center = array((nan, nan)) + scale_estimate = nan + med_rot = nan - # At least 2 keypoints are needed for scale - if keypoints.size > 1: + # At least 2 keypoints are needed for scale + if keypoints.size > 1: - # Extract the keypoint classes - keypoint_classes = keypoints[:, 2].squeeze().astype(np.int) + # Extract the keypoint classes + keypoint_classes = keypoints[:, 2].squeeze().astype(np.int) - # Retain singular dimension - if keypoint_classes.size == 1: - keypoint_classes = keypoint_classes[None] + # Retain singular dimension + if keypoint_classes.size == 1: + keypoint_classes = keypoint_classes[None] - # Sort - ind_sort = argsort(keypoint_classes) - keypoints = keypoints[ind_sort] - keypoint_classes = keypoint_classes[ind_sort] + # Sort + ind_sort = argsort(keypoint_classes) + keypoints = keypoints[ind_sort] + keypoint_classes = keypoint_classes[ind_sort] - # Get all combinations of keypoints - all_combs = array([val for val in itertools.product(range(keypoints.shape[0]), repeat=2)]) + # Get all combinations of keypoints + all_combs = array( + [val for val in itertools.product(range(keypoints.shape[0]), + repeat=2)]) - # But exclude comparison with itself - all_combs = all_combs[all_combs[:, 0] != all_combs[:, 1], :] + # But exclude comparison with itself + all_combs = all_combs[all_combs[:, 0] != all_combs[:, 1], :] - # Measure distance between allcombs[0] and allcombs[1] - ind1 = all_combs[:, 0] - ind2 = all_combs[:, 1] + # Measure distance between allcombs[0] and allcombs[1] + ind1 = all_combs[:, 0] + ind2 = all_combs[:, 1] - class_ind1 = keypoint_classes[ind1] - 1 - class_ind2 = keypoint_classes[ind2] - 1 + class_ind1 = keypoint_classes[ind1] - 1 + class_ind2 = keypoint_classes[ind2] - 1 - duplicate_classes = class_ind1 == class_ind2 + duplicate_classes = class_ind1 == class_ind2 - if not all(duplicate_classes): - ind1 = ind1[~duplicate_classes] - ind2 = ind2[~duplicate_classes] + if not all(duplicate_classes): + ind1 = ind1[~duplicate_classes] + ind2 = ind2[~duplicate_classes] - class_ind1 = class_ind1[~duplicate_classes] - class_ind2 = class_ind2[~duplicate_classes] + class_ind1 = class_ind1[~duplicate_classes] + class_ind2 = class_ind2[~duplicate_classes] - pts_allcombs0 = keypoints[ind1, :2] - pts_allcombs1 = keypoints[ind2, :2] + pts_allcombs0 = keypoints[ind1, :2] + pts_allcombs1 = keypoints[ind2, :2] - # This distance might be 0 for some combinations, - # as it can happen that there is more than one keypoint at a single location - dists = util.L2norm(pts_allcombs0 - pts_allcombs1) + # This distance might be 0 for some combinations, + # as it can happen that there is more than one keypoint at a + # single location + dists = util.L2norm(pts_allcombs0 - pts_allcombs1) - original_dists = self.squareform[class_ind1, class_ind2] + original_dists = self.squareform[class_ind1, class_ind2] - scalechange = dists / original_dists + scalechange = dists / original_dists - # Compute angles - angles = np.empty((pts_allcombs0.shape[0])) + # Compute angles + angles = np.empty((pts_allcombs0.shape[0])) - v = pts_allcombs1 - pts_allcombs0 - angles = np.arctan2(v[:, 1], v[:, 0]) - - original_angles = self.angles[class_ind1, class_ind2] + v = pts_allcombs1 - pts_allcombs0 + angles = np.arctan2(v[:, 1], v[:, 0]) - angle_diffs = angles - original_angles + original_angles = self.angles[class_ind1, class_ind2] - # Fix long way angles - long_way_angles = np.abs(angle_diffs) > math.pi + angle_diffs = angles - original_angles - angle_diffs[long_way_angles] = angle_diffs[long_way_angles] - np.sign(angle_diffs[long_way_angles]) * 2 * math.pi + # Fix long way angles + long_way_angles = np.abs(angle_diffs) > math.pi - scale_estimate = median(scalechange) - if not self.estimate_scale: - scale_estimate = 1; + angle_diffs[long_way_angles] = angle_diffs[ + long_way_angles] - np.sign( + angle_diffs[long_way_angles]) * 2 * math.pi - med_rot = median(angle_diffs) - if not self.estimate_rotation: - med_rot = 0; + scale_estimate = median(scalechange) + if not self.estimate_scale: + scale_estimate = 1 - keypoint_class = keypoints[:, 2].astype(np.int) - votes = keypoints[:, :2] - scale_estimate * (util.rotate(self.springs[keypoint_class - 1], med_rot)) + med_rot = median(angle_diffs) + if not self.estimate_rotation: + med_rot = 0 - # Remember all votes including outliers - self.votes = votes + keypoint_class = keypoints[:, 2].astype(np.int) + votes = keypoints[ + :, :2] - scale_estimate * (util.rotate( + self.springs[keypoint_class - 1], med_rot)) - # Compute pairwise distance between votes - pdist = scipy.spatial.distance.pdist(votes) + # Remember all votes including outliers + self.votes = votes - # Compute linkage between pairwise distances - linkage = scipy.cluster.hierarchy.linkage(pdist) + # Compute pairwise distance between votes + pdist = scipy.spatial.distance.pdist(votes) - # Perform hierarchical distance-based clustering - T = scipy.cluster.hierarchy.fcluster(linkage, self.THR_OUTLIER, criterion='distance') + # Compute linkage between pairwise distances + linkage = scipy.cluster.hierarchy.linkage(pdist) - # Count votes for each cluster - cnt = np.bincount(T) # Dummy 0 label remains - - # Get largest class - Cmax = argmax(cnt) + # Perform hierarchical distance-based clustering + T = scipy.cluster.hierarchy.fcluster( + linkage, + self.THR_OUTLIER, + criterion='distance') - # Identify inliers (=members of largest class) - inliers = T == Cmax - # inliers = med_dists < THR_OUTLIER + # Count votes for each cluster + cnt = np.bincount(T) # Dummy 0 label remains - # Remember outliers - self.outliers = keypoints[~inliers, :] + # Get largest class + Cmax = argmax(cnt) - # Stop tracking outliers - keypoints = keypoints[inliers, :] + # Identify inliers (=members of largest class) + inliers = T == Cmax + # inliers = med_dists < THR_OUTLIER - # Remove outlier votes - votes = votes[inliers, :] + # Remember outliers + self.outliers = keypoints[~inliers, :] - # Compute object center - center = np.mean(votes, axis=0) + # Stop tracking outliers + keypoints = keypoints[inliers, :] - return (center, scale_estimate, med_rot, keypoints) + # Remove outlier votes + votes = votes[inliers, :] - def process_frame(self, im_gray): + # Compute object center + center = np.mean(votes, axis=0) - tracked_keypoints, _ = util.track(self.im_prev, im_gray, self.active_keypoints) - (center, scale_estimate, rotation_estimate, tracked_keypoints) = self.estimate(tracked_keypoints) + return (center, scale_estimate, med_rot, keypoints) - # Detect keypoints, compute descriptors - keypoints_cv = self.detector.detect(im_gray) - keypoints_cv, features = self.descriptor.compute(im_gray, keypoints_cv) + def process_frame(self, im_gray): - # Create list of active keypoints - active_keypoints = zeros((0, 3)) + tracked_keypoints, _ = util.track( + self.im_prev, im_gray, self.active_keypoints) + (center, + scale_estimate, + rotation_estimate, + tracked_keypoints) = self.estimate(tracked_keypoints) - # Get the best two matches for each feature - matches_all = self.matcher.knnMatch(features, self.features_database, 2) - # Get all matches for selected features - if not any(isnan(center)): - selected_matches_all = self.matcher.knnMatch(features, self.selected_features, len(self.selected_features)) + # Detect keypoints, compute descriptors + keypoints_cv = self.detector.detect(im_gray) + keypoints_cv, features = self.descriptor.compute(im_gray, keypoints_cv) + # Create list of active keypoints + active_keypoints = zeros((0, 3)) - # For each keypoint and its descriptor - if len(keypoints_cv) > 0: - transformed_springs = scale_estimate * util.rotate(self.springs, -rotation_estimate) - for i in range(len(keypoints_cv)): + # Get the best two matches for each feature + matches_all = self.matcher.knnMatch( + features, + self.features_database, + 2) + # Get all matches for selected features + if not any(isnan(center)): + selected_matches_all = self.matcher.knnMatch( + features, self.selected_features, len( + self.selected_features)) - # Retrieve keypoint location - location = np.array(keypoints_cv[i].pt) + # For each keypoint and its descriptor + if len(keypoints_cv) > 0: + transformed_springs = scale_estimate * \ + util.rotate(self.springs, -rotation_estimate) + for i in range(len(keypoints_cv)): - # First: Match over whole image - # Compute distances to all descriptors - matches = matches_all[i] - distances = np.array([m.distance for m in matches]) + # Retrieve keypoint location + location = np.array(keypoints_cv[i].pt) - # Convert distances to confidences, do not weight - combined = 1 - distances / self.DESC_LENGTH - - classes = self.database_classes - - # Get best and second best index - bestInd = matches[0].trainIdx - secondBestInd = matches[1].trainIdx - - # Compute distance ratio according to Lowe - ratio = (1 - combined[0]) / (1 - combined[1]) - - # Extract class of best match - keypoint_class = classes[bestInd] - - # If distance ratio is ok and absolute distance is ok and keypoint class is not background - if ratio < self.THR_RATIO and combined[0] > self.THR_CONF and keypoint_class != 0: - - # Add keypoint to active keypoints - new_kpt = append(location, keypoint_class) - active_keypoints = append(active_keypoints, array([new_kpt]), axis=0) - - # In a second step, try to match difficult keypoints - # If structural constraints are applicable - if not any(isnan(center)): - - # Compute distances to initial descriptors - matches = selected_matches_all[i] - distances = np.array([m.distance for m in matches]) - # Re-order the distances based on indexing - idxs = np.argsort(np.array([m.trainIdx for m in matches])) - distances = distances[idxs] - - # Convert distances to confidences - confidences = 1 - distances / self.DESC_LENGTH - - # Compute the keypoint location relative to the object center - relative_location = location - center - - # Compute the distances to all springs - displacements = util.L2norm(transformed_springs - relative_location) - - # For each spring, calculate weight - weight = displacements < self.THR_OUTLIER # Could be smooth function - - combined = weight * confidences - - classes = self.selected_classes - - # Sort in descending order - sorted_conf = argsort(combined)[::-1] # reverse - - # Get best and second best index - bestInd = sorted_conf[0] - secondBestInd = sorted_conf[1] - - # Compute distance ratio according to Lowe - ratio = (1 - combined[bestInd]) / (1 - combined[secondBestInd]) - - # Extract class of best match - keypoint_class = classes[bestInd] - - # If distance ratio is ok and absolute distance is ok and keypoint class is not background - if ratio < self.THR_RATIO and combined[bestInd] > self.THR_CONF and keypoint_class != 0: - - # Add keypoint to active keypoints - new_kpt = append(location, keypoint_class) - - # Check whether same class already exists - if active_keypoints.size > 0: - same_class = np.nonzero(active_keypoints[:, 2] == keypoint_class) - active_keypoints = np.delete(active_keypoints, same_class, axis=0) - - active_keypoints = append(active_keypoints, array([new_kpt]), axis=0) - - # If some keypoints have been tracked - if tracked_keypoints.size > 0: - - # Extract the keypoint classes - tracked_classes = tracked_keypoints[:, 2] - - # If there already are some active keypoints - if active_keypoints.size > 0: - - # Add all tracked keypoints that have not been matched - associated_classes = active_keypoints[:, 2] - missing = ~np.in1d(tracked_classes, associated_classes) - active_keypoints = append(active_keypoints, tracked_keypoints[missing, :], axis=0) - - # Else use all tracked keypoints - else: - active_keypoints = tracked_keypoints - - # Update object state estimate - _ = active_keypoints - self.center = center - self.scale_estimate = scale_estimate - self.rotation_estimate = rotation_estimate - self.tracked_keypoints = tracked_keypoints - self.active_keypoints = active_keypoints - self.im_prev = im_gray - self.keypoints_cv = keypoints_cv - _ = time.time() - - self.tl = (nan, nan) - self.tr = (nan, nan) - self.br = (nan, nan) - self.bl = (nan, nan) - - self.bb = array([nan, nan, nan, nan]) - - self.has_result = False - if not any(isnan(self.center)) and self.active_keypoints.shape[0] > self.num_initial_keypoints / 10: - self.has_result = True - - tl = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_tl[None, :], rotation_estimate).squeeze()) - tr = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_tr[None, :], rotation_estimate).squeeze()) - br = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_br[None, :], rotation_estimate).squeeze()) - bl = util.array_to_int_tuple(center + scale_estimate * util.rotate(self.center_to_bl[None, :], rotation_estimate).squeeze()) - - min_x = min((tl[0], tr[0], br[0], bl[0])) - min_y = min((tl[1], tr[1], br[1], bl[1])) - max_x = max((tl[0], tr[0], br[0], bl[0])) - max_y = max((tl[1], tr[1], br[1], bl[1])) - - self.tl = tl - self.tr = tr - self.bl = bl - self.br = br - - self.bb = np.array([min_x, min_y, max_x - min_x, max_y - min_y]) + # First: Match over whole image + # Compute distances to all descriptors + matches = matches_all[i] + distances = np.array([m.distance for m in matches]) + + # Convert distances to confidences, do not weight + combined = 1 - distances / self.DESC_LENGTH + + classes = self.database_classes + + # Get best and second best index + bestInd = matches[0].trainIdx + secondBestInd = matches[1].trainIdx + + # Compute distance ratio according to Lowe + ratio = (1 - combined[0]) / (1 - combined[1]) + + # Extract class of best match + keypoint_class = classes[bestInd] + + # If distance ratio is ok and absolute distance is ok and + # keypoint class is not background + if ratio < self.THR_RATIO and combined[ + 0] > self.THR_CONF and keypoint_class != 0: + + # Add keypoint to active keypoints + new_kpt = append(location, keypoint_class) + active_keypoints = append( + active_keypoints, + array( + [new_kpt]), + axis=0) + + # In a second step, try to match difficult keypoints + # If structural constraints are applicable + if not any(isnan(center)): + + # Compute distances to initial descriptors + matches = selected_matches_all[i] + distances = np.array([m.distance for m in matches]) + # Re-order the distances based on indexing + idxs = np.argsort(np.array([m.trainIdx for m in matches])) + distances = distances[idxs] + + # Convert distances to confidences + confidences = 1 - distances / self.DESC_LENGTH + + # Compute the keypoint location relative to the object + # center + relative_location = location - center + + # Compute the distances to all springs + displacements = util.L2norm( + transformed_springs - + relative_location) + + # For each spring, calculate weight + # Could be smooth function + weight = displacements < self.THR_OUTLIER + + combined = weight * confidences + + classes = self.selected_classes + + # Sort in descending order + sorted_conf = argsort(combined)[::-1] # reverse + + # Get best and second best index + bestInd = sorted_conf[0] + secondBestInd = sorted_conf[1] + + # Compute distance ratio according to Lowe + ratio = (1 - combined[bestInd]) / \ + (1 - combined[secondBestInd]) + + # Extract class of best match + keypoint_class = classes[bestInd] + + # If distance ratio is ok and absolute distance is ok and + # keypoint class is not background + if ratio < self.THR_RATIO and combined[ + bestInd] > self.THR_CONF and keypoint_class != 0: + + # Add keypoint to active keypoints + new_kpt = append(location, keypoint_class) + + # Check whether same class already exists + if active_keypoints.size > 0: + same_class = np.nonzero( + active_keypoints[ + :, + 2] == keypoint_class) + active_keypoints = np.delete( + active_keypoints, + same_class, + axis=0) + + active_keypoints = append( + active_keypoints, + array( + [new_kpt]), + axis=0) + + # If some keypoints have been tracked + if tracked_keypoints.size > 0: + + # Extract the keypoint classes + tracked_classes = tracked_keypoints[:, 2] + + # If there already are some active keypoints + if active_keypoints.size > 0: + + # Add all tracked keypoints that have not been matched + associated_classes = active_keypoints[:, 2] + missing = ~np.in1d(tracked_classes, associated_classes) + active_keypoints = append( + active_keypoints, + tracked_keypoints[ + missing, + :], + axis=0) + + # Else use all tracked keypoints + else: + active_keypoints = tracked_keypoints + + # Update object state estimate + _ = active_keypoints + self.center = center + self.scale_estimate = scale_estimate + self.rotation_estimate = rotation_estimate + self.tracked_keypoints = tracked_keypoints + self.active_keypoints = active_keypoints + self.im_prev = im_gray + self.keypoints_cv = keypoints_cv + _ = time.time() + + self.tl = (nan, nan) + self.tr = (nan, nan) + self.br = (nan, nan) + self.bl = (nan, nan) + + self.bb = array([nan, nan, nan, nan]) + + self.has_result = False + if not any(isnan(self.center)) and self.active_keypoints.shape[ + 0] > self.num_initial_keypoints / 10: + self.has_result = True + + tl = util.array_to_int_tuple( + center + + scale_estimate * + util.rotate( + self.center_to_tl[ + None, + :], + rotation_estimate).squeeze()) + tr = util.array_to_int_tuple( + center + + scale_estimate * + util.rotate( + self.center_to_tr[ + None, + :], + rotation_estimate).squeeze()) + br = util.array_to_int_tuple( + center + + scale_estimate * + util.rotate( + self.center_to_br[ + None, + :], + rotation_estimate).squeeze()) + bl = util.array_to_int_tuple( + center + + scale_estimate * + util.rotate( + self.center_to_bl[ + None, + :], + rotation_estimate).squeeze()) + + min_x = min((tl[0], tr[0], br[0], bl[0])) + min_y = min((tl[1], tr[1], br[1], bl[1])) + max_x = max((tl[0], tr[0], br[0], bl[0])) + max_y = max((tl[1], tr[1], br[1], bl[1])) + + self.tl = tl + self.tr = tr + self.bl = bl + self.br = br + + self.bb = np.array([min_x, min_y, max_x - min_x, max_y - min_y]) diff --git a/run.py b/run.py index 125bd4e..21ddbec 100755 --- a/run.py +++ b/run.py @@ -17,16 +17,61 @@ parser = argparse.ArgumentParser(description='Track an object.') parser.add_argument('inputpath', nargs='?', help='The input path.') -parser.add_argument('--challenge', dest='challenge', action='store_true', help='Enter challenge mode.') -parser.add_argument('--preview', dest='preview', action='store_const', const=True, default=None, help='Force preview') -parser.add_argument('--no-preview', dest='preview', action='store_const', const=False, default=None, help='Disable preview') -parser.add_argument('--no-scale', dest='estimate_scale', action='store_false', help='Disable scale estimation') -parser.add_argument('--with-rotation', dest='estimate_rotation', action='store_true', help='Enable rotation estimation') -parser.add_argument('--bbox', dest='bbox', help='Specify initial bounding box.') -parser.add_argument('--pause', dest='pause', action='store_true', help='Specify initial bounding box.') -parser.add_argument('--output-dir', dest='output', help='Specify a directory for output data.') -parser.add_argument('--quiet', dest='quiet', action='store_true', help='Do not show graphical output (Useful in combination with --output-dir ).') -parser.add_argument('--skip', dest='skip', action='store', default=None, help='Skip the first n frames', type=int) +parser.add_argument( + '--challenge', + dest='challenge', + action='store_true', + help='Enter challenge mode.') +parser.add_argument( + '--preview', + dest='preview', + action='store_const', + const=True, + default=None, + help='Force preview') +parser.add_argument( + '--no-preview', + dest='preview', + action='store_const', + const=False, + default=None, + help='Disable preview') +parser.add_argument( + '--no-scale', + dest='estimate_scale', + action='store_false', + help='Disable scale estimation') +parser.add_argument( + '--with-rotation', + dest='estimate_rotation', + action='store_true', + help='Enable rotation estimation') +parser.add_argument( + '--bbox', + dest='bbox', + help='Specify initial bounding box.') +parser.add_argument( + '--pause', + dest='pause', + action='store_true', + help='Specify initial bounding box.') +parser.add_argument( + '--output-dir', + dest='output', + help='Specify a directory for output data.') +parser.add_argument( + '--quiet', + dest='quiet', + action='store_true', + help='Do not show graphical output \ + (Useful in combination with --output-dir ).') +parser.add_argument( + '--skip', + dest='skip', + action='store', + default=None, + help='Skip the first n frames', + type=int) args = parser.parse_args() @@ -34,201 +79,219 @@ CMT.estimate_rotation = args.estimate_rotation if args.pause: - pause_time = 0 + pause_time = 0 else: - pause_time = 10 + pause_time = 10 if args.output is not None: - if not os.path.exists(args.output): - os.mkdir(args.output) - elif not os.path.isdir(args.output): - raise Exception(args.output + ' exists, but is not a directory') + if not os.path.exists(args.output): + os.mkdir(args.output) + elif not os.path.isdir(args.output): + raise Exception(args.output + ' exists, but is not a directory') if args.challenge: - with open('images.txt') as f: - images = [line.strip() for line in f] + with open('images.txt') as f: + images = [line.strip() for line in f] - init_region = np.genfromtxt('region.txt', delimiter=',') - num_frames = len(images) + init_region = np.genfromtxt('region.txt', delimiter=',') + num_frames = len(images) - results = empty((num_frames, 4)) - results[:] = nan + results = empty((num_frames, 4)) + results[:] = nan - results[0, :] = init_region + results[0, :] = init_region - frame = 0 + frame = 0 - im0 = cv2.imread(images[frame]) - im_gray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) - im_draw = np.copy(im0) + im0 = cv2.imread(images[frame]) + im_gray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) + im_draw = np.copy(im0) - tl, br = (util.array_to_int_tuple(init_region[:2]), util.array_to_int_tuple(init_region[:2] + init_region[2:4])) + tl, br = (util.array_to_int_tuple(init_region[:2]), util.array_to_int_tuple( + init_region[:2] + init_region[2:4])) - try: - CMT.initialise(im_gray0, tl, br) - while frame < num_frames: - im = cv2.imread(images[frame]) - im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) - CMT.process_frame(im_gray) - results[frame, :] = CMT.bb + try: + CMT.initialise(im_gray0, tl, br) + while frame < num_frames: + im = cv2.imread(images[frame]) + im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + CMT.process_frame(im_gray) + results[frame, :] = CMT.bb - # Advance frame number - frame += 1 - except: - pass # Swallow errors + # Advance frame number + frame += 1 + except: + pass # Swallow errors - np.savetxt('output.txt', results, delimiter=',') + np.savetxt('output.txt', results, delimiter=',') else: - # Clean up - cv2.destroyAllWindows() - - preview = args.preview - - if args.inputpath is not None: - - # If a path to a file was given, assume it is a single video file - if os.path.isfile(args.inputpath): - cap = cv2.VideoCapture(args.inputpath) - - #Skip first frames if required - if args.skip is not None: - cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, args.skip) - - - # Otherwise assume it is a format string for reading images - else: - cap = util.FileVideoCapture(args.inputpath) - - #Skip first frames if required - if args.skip is not None: - cap.frame = 1 + args.skip - - # By default do not show preview in both cases - if preview is None: - preview = False - - else: - # If no input path was specified, open camera device - cap = cv2.VideoCapture(0) - if preview is None: - preview = True - - # Check if videocapture is working - if not cap.isOpened(): - print 'Unable to open video input.' - sys.exit(1) - - while preview: - status, im = cap.read() - cv2.imshow('Preview', im) - k = cv2.waitKey(10) - if not k == -1: - break - - # Read first frame - status, im0 = cap.read() - im_gray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) - im_draw = np.copy(im0) - - if args.bbox is not None: - # Try to disassemble user specified bounding box - values = args.bbox.split(',') - try: - values = [int(v) for v in values] - except: - raise Exception('Unable to parse bounding box') - if len(values) != 4: - raise Exception('Bounding box must have exactly 4 elements') - bbox = np.array(values) - - # Convert to point representation, adding singleton dimension - bbox = util.bb2pts(bbox[None, :]) - - # Squeeze - bbox = bbox[0, :] - - tl = bbox[:2] - br = bbox[2:4] - else: - # Get rectangle input from user - (tl, br) = util.get_rect(im_draw) - - print 'using', tl, br, 'as init bb' - - - CMT.initialise(im_gray0, tl, br) - - frame = 1 - while True: - # Read image - status, im = cap.read() - if not status: - break - im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) - im_draw = np.copy(im) - - tic = time.time() - CMT.process_frame(im_gray) - toc = time.time() - - # Display results - - # Draw updated estimate - if CMT.has_result: - - cv2.line(im_draw, CMT.tl, CMT.tr, (255, 0, 0), 4) - cv2.line(im_draw, CMT.tr, CMT.br, (255, 0, 0), 4) - cv2.line(im_draw, CMT.br, CMT.bl, (255, 0, 0), 4) - cv2.line(im_draw, CMT.bl, CMT.tl, (255, 0, 0), 4) - - util.draw_keypoints(CMT.tracked_keypoints, im_draw, (255, 255, 255)) - # this is from simplescale - util.draw_keypoints(CMT.votes[:, :2], im_draw) # blue - util.draw_keypoints(CMT.outliers[:, :2], im_draw, (0, 0, 255)) - - if args.output is not None: - # Original image - cv2.imwrite('{0}/input_{1:08d}.png'.format(args.output, frame), im) - # Output image - cv2.imwrite('{0}/output_{1:08d}.png'.format(args.output, frame), im_draw) - - # Keypoints - with open('{0}/keypoints_{1:08d}.csv'.format(args.output, frame), 'w') as f: - f.write('x y\n') - np.savetxt(f, CMT.tracked_keypoints[:, :2], fmt='%.2f') - - # Outlier - with open('{0}/outliers_{1:08d}.csv'.format(args.output, frame), 'w') as f: - f.write('x y\n') - np.savetxt(f, CMT.outliers, fmt='%.2f') - - # Votes - with open('{0}/votes_{1:08d}.csv'.format(args.output, frame), 'w') as f: - f.write('x y\n') - np.savetxt(f, CMT.votes, fmt='%.2f') - - # Bounding box - with open('{0}/bbox_{1:08d}.csv'.format(args.output, frame), 'w') as f: - f.write('x y\n') - # Duplicate entry tl is not a mistake, as it is used as a drawing instruction - np.savetxt(f, np.array((CMT.tl, CMT.tr, CMT.br, CMT.bl, CMT.tl)), fmt='%.2f') - - if not args.quiet: - cv2.imshow('main', im_draw) - - # Check key input - k = cv2.waitKey(pause_time) - key = chr(k & 255) - if key == 'q': - break - if key == 'd': - import ipdb; ipdb.set_trace() - - # Remember image - im_prev = im_gray - - # Advance frame number - frame += 1 - - print '{5:04d}: center: {0:.2f},{1:.2f} scale: {2:.2f}, active: {3:03d}, {4:04.0f}ms'.format(CMT.center[0], CMT.center[1], CMT.scale_estimate, CMT.active_keypoints.shape[0], 1000 * (toc - tic), frame) + # Clean up + cv2.destroyAllWindows() + + preview = args.preview + + if args.inputpath is not None: + + # If a path to a file was given, assume it is a single video file + if os.path.isfile(args.inputpath): + cap = cv2.VideoCapture(args.inputpath) + + # Skip first frames if required + if args.skip is not None: + cap.set(cv2.cv.CV_CAP_PROP_POS_FRAMES, args.skip) + + # Otherwise assume it is a format string for reading images + else: + cap = util.FileVideoCapture(args.inputpath) + + # Skip first frames if required + if args.skip is not None: + cap.frame = 1 + args.skip + + # By default do not show preview in both cases + if preview is None: + preview = False + + else: + # If no input path was specified, open camera device + cap = cv2.VideoCapture(0) + if preview is None: + preview = True + + # Check if videocapture is working + if not cap.isOpened(): + print 'Unable to open video input.' + sys.exit(1) + + while preview: + status, im = cap.read() + cv2.imshow('Preview', im) + k = cv2.waitKey(10) + if not k == -1: + break + + # Read first frame + status, im0 = cap.read() + im_gray0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY) + im_draw = np.copy(im0) + + if args.bbox is not None: + # Try to disassemble user specified bounding box + values = args.bbox.split(',') + try: + values = [int(v) for v in values] + except: + raise Exception('Unable to parse bounding box') + if len(values) != 4: + raise Exception('Bounding box must have exactly 4 elements') + bbox = np.array(values) + + # Convert to point representation, adding singleton dimension + bbox = util.bb2pts(bbox[None, :]) + + # Squeeze + bbox = bbox[0, :] + + tl = bbox[:2] + br = bbox[2:4] + else: + # Get rectangle input from user + (tl, br) = util.get_rect(im_draw) + + print 'using', tl, br, 'as init bb' + + CMT.initialise(im_gray0, tl, br) + + frame = 1 + while True: + # Read image + status, im = cap.read() + if not status: + break + im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + im_draw = np.copy(im) + + tic = time.time() + CMT.process_frame(im_gray) + toc = time.time() + + # Display results + + # Draw updated estimate + if CMT.has_result: + + cv2.line(im_draw, CMT.tl, CMT.tr, (255, 0, 0), 4) + cv2.line(im_draw, CMT.tr, CMT.br, (255, 0, 0), 4) + cv2.line(im_draw, CMT.br, CMT.bl, (255, 0, 0), 4) + cv2.line(im_draw, CMT.bl, CMT.tl, (255, 0, 0), 4) + + util.draw_keypoints(CMT.tracked_keypoints, im_draw, (255, 255, 255)) + # this is from simplescale + util.draw_keypoints(CMT.votes[:, :2], im_draw) # blue + util.draw_keypoints(CMT.outliers[:, :2], im_draw, (0, 0, 255)) + + if args.output is not None: + # Original image + cv2.imwrite('{0}/input_{1:08d}.png'.format(args.output, frame), im) + # Output image + cv2.imwrite( + '{0}/output_{1:08d}.png'.format(args.output, frame), im_draw) + + # Keypoints + with open('{0}/keypoints_{1:08d}.csv'.format(args.output, + frame), 'w') as f: + f.write('x y\n') + np.savetxt(f, CMT.tracked_keypoints[:, :2], fmt='%.2f') + + # Outlier + with open('{0}/outliers_{1:08d}.csv'.format(args.output, + frame), 'w') as f: + f.write('x y\n') + np.savetxt(f, CMT.outliers, fmt='%.2f') + + # Votes + with open('{0}/votes_{1:08d}.csv'.format(args.output, + frame), 'w') as f: + f.write('x y\n') + np.savetxt(f, CMT.votes, fmt='%.2f') + + # Bounding box + with open('{0}/bbox_{1:08d}.csv'.format(args.output, + frame), 'w') as f: + f.write('x y\n') + # Duplicate entry tl is not a mistake, as it is used as a + # drawing instruction + np.savetxt( + f, + np.array( + (CMT.tl, + CMT.tr, + CMT.br, + CMT.bl, + CMT.tl)), + fmt='%.2f') + + if not args.quiet: + cv2.imshow('main', im_draw) + + # Check key input + k = cv2.waitKey(pause_time) + key = chr(k & 255) + if key == 'q': + break + if key == 'd': + import ipdb + ipdb.set_trace() + + # Remember image + im_prev = im_gray + + # Advance frame number + frame += 1 + + print '{5:04d}: center: {0:.2f},{1:.2f} scale: {2:.2f},\ + active: {3:03d}, {4:04.0f}ms'.format(CMT.center[0], CMT.center[1], + CMT.scale_estimate, + CMT.active_keypoints.shape[0], + 1000 * (toc - tic), frame) diff --git a/util.py b/util.py index c0ef844..797d32a 100644 --- a/util.py +++ b/util.py @@ -6,189 +6,204 @@ class FileVideoCapture(object): - def __init__(self, path): - self.path = path - self.frame = 1 - - def isOpened(self): - im = cv2.imread(self.path.format(self.frame)) - return im != None - - def read(self): - im = cv2.imread(self.path.format(self.frame)) - status = im != None - if status: - self.frame += 1 - return status, im + def __init__(self, path): + self.path = path + self.frame = 1 + + def isOpened(self): + im = cv2.imread(self.path.format(self.frame)) + return im is not None + + def read(self): + im = cv2.imread(self.path.format(self.frame)) + status = im is not None + if status: + self.frame += 1 + return status, im + def squeeze_pts(X): - X = X.squeeze() - if len(X.shape) == 1: - X = np.array([X]) - return X + X = X.squeeze() + if len(X.shape) == 1: + X = np.array([X]) + return X + def array_to_int_tuple(X): - return (int(X[0]), int(X[1])) + return (int(X[0]), int(X[1])) + def L2norm(X): - return np.sqrt((X ** 2).sum(axis=1)) + return np.sqrt((X ** 2).sum(axis=1)) current_pos = None tl = None br = None + def get_rect(im, title='get_rect'): - global current_pos - global tl - global br - global released_once + global current_pos + global tl + global br + global released_once - current_pos = None - tl = None - br = None - released_once = False + current_pos = None + tl = None + br = None + released_once = False - cv2.namedWindow(title) - cv2.moveWindow(title, 100, 100) + cv2.namedWindow(title) + cv2.moveWindow(title, 100, 100) - def onMouse(event, x, y, flags, param): - global current_pos - global tl - global br - global released_once + def onMouse(event, x, y, flags, param): + global current_pos + global tl + global br + global released_once - current_pos = (x, y) + current_pos = (x, y) - if tl is not None and not (flags & cv2.EVENT_FLAG_LBUTTON): - released_once = True + if tl is not None and not (flags & cv2.EVENT_FLAG_LBUTTON): + released_once = True - if flags & cv2.EVENT_FLAG_LBUTTON: - if tl is None: - tl = current_pos - elif released_once: - br = current_pos + if flags & cv2.EVENT_FLAG_LBUTTON: + if tl is None: + tl = current_pos + elif released_once: + br = current_pos - cv2.setMouseCallback(title, onMouse) - cv2.imshow(title, im) + cv2.setMouseCallback(title, onMouse) + cv2.imshow(title, im) - while br is None: - im_draw = np.copy(im) + while br is None: + im_draw = np.copy(im) - if tl is not None: - cv2.rectangle(im_draw, tl, current_pos, (255, 0, 0)) + if tl is not None: + cv2.rectangle(im_draw, tl, current_pos, (255, 0, 0)) - cv2.imshow(title, im_draw) - _ = cv2.waitKey(10) + cv2.imshow(title, im_draw) + _ = cv2.waitKey(10) - cv2.destroyWindow(title) + cv2.destroyWindow(title) + + return (tl, br) - return (tl, br) def in_rect(keypoints, tl, br): - if type(keypoints) is list: - keypoints = keypoints_cv_to_np(keypoints) + if isinstance(keypoints, list): + keypoints = keypoints_cv_to_np(keypoints) + + x = keypoints[:, 0] + y = keypoints[:, 1] - x = keypoints[:, 0] - y = keypoints[:, 1] + C1 = x > tl[0] + C2 = y > tl[1] + C3 = x < br[0] + C4 = y < br[1] - C1 = x > tl[0] - C2 = y > tl[1] - C3 = x < br[0] - C4 = y < br[1] + result = C1 & C2 & C3 & C4 - result = C1 & C2 & C3 & C4 + return result - return result def keypoints_cv_to_np(keypoints_cv): - keypoints = np.array([k.pt for k in keypoints_cv]) - return keypoints + keypoints = np.array([k.pt for k in keypoints_cv]) + return keypoints + def find_nearest_keypoints(keypoints, pos, number=1): - if type(pos) is tuple: - pos = np.array(pos) - if type(keypoints) is list: - keypoints = keypoints_cv_to_np(keypoints) + if isinstance(pos, tuple): + pos = np.array(pos) + if isinstance(keypoints, list): + keypoints = keypoints_cv_to_np(keypoints) + + pos_to_keypoints = np.sqrt(np.power(keypoints - pos, 2).sum(axis=1)) + ind = np.argsort(pos_to_keypoints) + return ind[:number] - pos_to_keypoints = np.sqrt(np.power(keypoints - pos, 2).sum(axis=1)) - ind = np.argsort(pos_to_keypoints) - return ind[:number] def draw_keypoints(keypoints, im, color=(255, 0, 0)): - - for k in keypoints: - radius = 3 # int(k.size / 2) - center = (int(k[0]), int(k[1])) - # Draw circle - cv2.circle(im, center, radius, color) + for k in keypoints: + radius = 3 # int(k.size / 2) + center = (int(k[0]), int(k[1])) + + # Draw circle + cv2.circle(im, center, radius, color) + def track(im_prev, im_gray, keypoints, THR_FB=20): - if type(keypoints) is list: - keypoints = keypoints_cv_to_np(keypoints) + if isinstance(keypoints, list): + keypoints = keypoints_cv_to_np(keypoints) - num_keypoints = keypoints.shape[0] + num_keypoints = keypoints.shape[0] - # Status of tracked keypoint - True means successfully tracked - status = [False] * num_keypoints + # Status of tracked keypoint - True means successfully tracked + status = [False] * num_keypoints - # If at least one keypoint is active - if num_keypoints > 0: - # Prepare data for opencv: - # Add singleton dimension - # Use only first and second column - # Make sure dtype is float32 - pts = keypoints[:, None, :2].astype(np.float32) + # If at least one keypoint is active + if num_keypoints > 0: + # Prepare data for opencv: + # Add singleton dimension + # Use only first and second column + # Make sure dtype is float32 + pts = keypoints[:, None, :2].astype(np.float32) - # Calculate forward optical flow for prev_location - nextPts, status, _ = cv2.calcOpticalFlowPyrLK(im_prev, im_gray, pts, None) + # Calculate forward optical flow for prev_location + nextPts, status, _ = cv2.calcOpticalFlowPyrLK( + im_prev, im_gray, pts, None) - # Calculate backward optical flow for prev_location - pts_back, _, _ = cv2.calcOpticalFlowPyrLK(im_gray, im_prev, nextPts, None) + # Calculate backward optical flow for prev_location + pts_back, _, _ = cv2.calcOpticalFlowPyrLK( + im_gray, im_prev, nextPts, None) - # Remove singleton dimension - pts_back = squeeze_pts(pts_back) - pts = squeeze_pts(pts) - nextPts = squeeze_pts(nextPts) - status = status.squeeze() + # Remove singleton dimension + pts_back = squeeze_pts(pts_back) + pts = squeeze_pts(pts) + nextPts = squeeze_pts(nextPts) + status = status.squeeze() - # Calculate forward-backward error - fb_err = np.sqrt(np.power(pts_back - pts, 2).sum(axis=1)) + # Calculate forward-backward error + fb_err = np.sqrt(np.power(pts_back - pts, 2).sum(axis=1)) - # Set status depending on fb_err and lk error - large_fb = fb_err > THR_FB - status = ~large_fb & status.astype(np.bool) + # Set status depending on fb_err and lk error + large_fb = fb_err > THR_FB + status = ~large_fb & status.astype(np.bool) - nextPts = nextPts[status, :] - keypoints_tracked = keypoints[status, :] - keypoints_tracked[:, :2] = nextPts + nextPts = nextPts[status, :] + keypoints_tracked = keypoints[status, :] + keypoints_tracked[:, :2] = nextPts + + else: + keypoints_tracked = np.array([]) + return keypoints_tracked, status - else: - keypoints_tracked = np.array([]) - return keypoints_tracked, status def rotate(pt, rad): - if(rad == 0): - return pt - - pt_rot = np.empty(pt.shape) + if(rad == 0): + return pt + + pt_rot = np.empty(pt.shape) - s, c = [f(rad) for f in (math.sin, math.cos)] + s, c = [f(rad) for f in (math.sin, math.cos)] - pt_rot[:, 0] = c * pt[:, 0] - s * pt[:, 1] - pt_rot[:, 1] = s * pt[:, 0] + c * pt[:, 1] + pt_rot[:, 0] = c * pt[:, 0] - s * pt[:, 1] + pt_rot[:, 1] = s * pt[:, 0] + c * pt[:, 1] + + return pt_rot - return pt_rot def br(bbs): - result = hstack((bbs[:, [0]] + bbs[:, [2]] - 1, bbs[:, [1]] + bbs[:, [3]] - 1)) + result = hstack( + (bbs[:, [0]] + bbs[:, [2]] - 1, bbs[:, [1]] + bbs[:, [3]] - 1)) + + return result - return result def bb2pts(bbs): - pts = hstack((bbs[:, :2], br(bbs))) + pts = hstack((bbs[:, :2], br(bbs))) - return pts + return pts