to train LCHF, need another 8GB RAM...

meiqua · meiqua · commit b6bf3a97acf1 · 2018-08-30T15:22:29.000+08:00
diff --git a/LCHF_test.py b/LCHF_test.py
@@ -119,11 +119,11 @@ def nms(dets, thresh):
 if scene_ids:
     scene_ids_curr = set(scene_ids_curr).intersection(scene_ids)
 
-# mode = 'render_train'
-mode = 'test'
+mode = 'render_train'
+# mode = 'test'
 
 base_path = join(dp['base_path'], 'LCHF')
-train_from_radius = 1000
+train_from_radius = 500
 if mode == 'render_train':
     start_time = time.time()
     visual = True
@@ -160,7 +160,7 @@ def nms(dets, thresh):
             # Sample views
             views, views_level = view_sampler.sample_views(min_n_views, radius,
                                                            azimuth_range, elev_range,
-                                                           tilt_range=(0, 2*math.pi), tilt_step=0.1*math.pi)
+                                                           tilt_range=(-math.pi/2, math.pi/2), tilt_step=0.2*math.pi)
             print('Sampled views: ' + str(len(views)))
 
             # Render the object model from all the views
@@ -210,41 +210,37 @@ def nms(dets, thresh):
                 cols = depth.shape[1]
                 # have read rgb, depth, pose, obj_bb, obj_id, bbox, mask here
 
-                # 5 box
-                for i in range(5):
-                    j = (i - (i%2))/2
-
-                    # offset, width, height, depth
-                    offset1 = [int(i%2*cols/2), int(j*rows/2), int(cols / 2), int(rows / 2)]
-                    if i == 4:
-                        offset1 = [int(cols / 4), int(rows / 4), int(cols / 2), int(rows / 2), t[2]]
-
-                    rgb1 = rgb[offset1[1]:(offset1[1] + offset1[3]), offset1[0]:(offset1[0] + offset1[2]), :]
-                    depth1 = depth[offset1[1]:(offset1[1] + offset1[3]), offset1[0]:(offset1[0] + offset1[2])]
-
-                    visualized = False
-                    if visualized:
-                        rgb_ = np.copy(rgb)
-                        cv2.rectangle(rgb_, (offset1[0], offset1[1]),
-                                      (offset1[0] + offset1[2], offset1[1] + offset1[3]), (0, 0, 255), 1)
-                        cv2.imshow('rgb', rgb_)
-                        cv2.imshow('rgb1', rgb1)
-                        cv2.waitKey(0)
-
-                    LCHF_linemod_feat = cxxLCHF_pybind.Linemod_feature(rgb1, depth1)
-                    if LCHF_linemod_feat.constructEmbedding():  # extract template OK
-                        LCHF_linemod_feat.constructResponse()  # extract response map for simi func
-                    else:
-                        # print('points not enough')
-                        continue  # no enough points for template extraction, pass
-
-                    LCHF_linemod_feats.append(LCHF_linemod_feat)  # record feature
-
-                    LCHF_info = cxxLCHF_pybind.Info()
-                    LCHF_info.rpy = (rotationMatrixToEulerAngles(R)).astype(np.float32)  # make sure consistent
-                    LCHF_info.t = (np.array(offset1)).astype(np.float32)
-                    LCHF_info.id = str(obj_id)
-                    LCHF_infos.append(LCHF_info)  # record info
+                # 5x5 cm patch, stride 5, assume 1pix = 1mm in around 500mm depth
+                stride = 10
+                for row in range(0, rows - 50, stride):
+                    for col in range(0, cols - 50, stride):
+                        offset1 = [col, row, 50, 50]
+                        rgb1 = rgb[offset1[1]:(offset1[1] + offset1[3]), offset1[0]:(offset1[0] + offset1[2]), :]
+                        depth1 = depth[offset1[1]:(offset1[1] + offset1[3]), offset1[0]:(offset1[0] + offset1[2])]
+
+                        visualized = False
+                        if visualized:
+                            rgb_ = np.copy(rgb)
+                            cv2.rectangle(rgb_, (offset1[0], offset1[1]),
+                                          (offset1[0] + offset1[2], offset1[1] + offset1[3]), (0, 0, 255), 1)
+                            cv2.imshow('rgb', rgb_)
+                            cv2.imshow('rgb1', rgb1)
+                            cv2.waitKey(0)
+
+                        LCHF_linemod_feat = cxxLCHF_pybind.Linemod_feature(rgb1, depth1)
+                        if LCHF_linemod_feat.constructEmbedding():  # extract template OK
+                            LCHF_linemod_feat.constructResponse()  # extract response map for simi func
+                        else:
+                            # print('points not enough')
+                            continue  # no enough points for template extraction, pass
+
+                        LCHF_linemod_feats.append(LCHF_linemod_feat)  # record feature
+
+                        LCHF_info = cxxLCHF_pybind.Info()
+                        LCHF_info.rpy = (rotationMatrixToEulerAngles(R)).astype(np.float32)  # make sure consistent
+                        LCHF_info.t = (np.array(offset1)).astype(np.float32)
+                        LCHF_info.id = str(obj_id)
+                        LCHF_infos.append(LCHF_info)  # record info
 
                 del rgb, depth, mask
 
@@ -258,7 +254,6 @@ def nms(dets, thresh):
     forest = cxxLCHF_pybind.lchf_model_train(LCHF_linemod_feats, LCHF_infos)
     cxxLCHF_pybind.lchf_model_saveForest(forest, base_path)
 
-
     elapsed_time = time.time() - start_time
     print('train time: {}\n'.format(elapsed_time))
 
@@ -307,7 +302,7 @@ def nms(dets, thresh):
 
             rows = depth.shape[0]
             cols = depth.shape[1]
-            stride = 3
+            stride = 5
 
             # should be max_bbox * render_depth/max_scene_depth
             width = 50  # bigger is OK, top left corner should align obj
@@ -345,47 +340,66 @@ def nms(dets, thresh):
             start_time = time.time()
             print('forest predict time: {}'.format(elapsed_time))
 
-            # voting isn't working well, and
-            # should meanshift the leaf first
-            num_x_bins = int(cols/20)
-            num_y_bins = int(rows/20)
+            steps = 10
+            num_x_bins = int(cols/steps)
+            num_y_bins = int(rows/steps)
             num_angle_bins = 10
 
+            print('x_bins: {}, y_bins: {}'.format(num_x_bins, num_y_bins))
+
             votes = np.zeros(shape=(num_x_bins, num_y_bins, num_angle_bins, num_angle_bins, num_angle_bins),
                              dtype=np.float32)
 
+            voted_ids = {}
+
             for scene_i in range(len(leaf_of_trees_of_scene)):
                 trees_of_scene = leaf_of_trees_of_scene[scene_i]
                 roi = rois[scene_i]
+
                 for tree_i in range(len(trees_of_scene)):
                     leaf_i = trees_of_scene[tree_i]
-                    leaf_map = leaf_feats_map[tree_i]
-                    predicted_ids = leaf_map[leaf_i]
-                    for id_ in predicted_ids:
-                        info = LCHF_infos[id_]
-                        offset = info.t
-                        offset_x = offset[0] * train_from_radius / roi[4]
-                        offset_y = offset[1] * train_from_radius / roi[4]
-
-                        x = int((roi[0] - offset_x) / 20)
-                        y = int((roi[1] - offset_y) / 20)
-                        theta0 = int(info.rpy[0] / 2 / 3.14 * num_angle_bins)
-                        theta1 = int(info.rpy[1] / 2 / 3.14 * num_angle_bins)
-                        theta2 = int(info.rpy[2] / 2 / 3.14 * num_angle_bins)
-
-                        # votes[x-1:x+1, y-1:y+1, theta0-1:theta0+1, theta1-1:theta1+1, theta2-1:theta2+1] \
-                        #     += 1.0/len(predicted_ids)/len(trees_of_scene)
-                        votes[x, y, theta0, theta1, theta2] \
-                            += 1.0/len(predicted_ids)/len(trees_of_scene)
+
+                    # if leaf_i has predicted
+                    if (tree_i, leaf_i) in voted_ids:
+                        votes += voted_ids[(tree_i, leaf_i)]
+                    else:
+                        # leaf_i votes
+                        votes_local = np.zeros(
+                            shape=(num_x_bins, num_y_bins, num_angle_bins, num_angle_bins, num_angle_bins),
+                            dtype=np.float32)
+
+                        leaf_map = leaf_feats_map[tree_i]
+                        predicted_ids = leaf_map[leaf_i]
+                        for id_ in predicted_ids:
+                            info = LCHF_infos[id_]
+                            offset = info.t
+                            offset_x = offset[0] * train_from_radius / roi[4]
+                            offset_y = offset[1] * train_from_radius / roi[4]
+
+                            x = int((roi[0] - offset_x) / steps)
+                            y = int((roi[1] - offset_y) / steps)
+                            theta0 = int(info.rpy[0] / 2 / 3.14 * num_angle_bins)
+                            theta1 = int(info.rpy[1] / 2 / 3.14 * num_angle_bins)
+                            theta2 = int(info.rpy[2] / 2 / 3.14 * num_angle_bins)
+
+                            # votes[x-1:x+1, y-1:y+1, theta0-1:theta0+1, theta1-1:theta1+1, theta2-1:theta2+1] \
+                            #     += 1.0/len(predicted_ids)/len(trees_of_scene)
+                            votes_local[x, y, theta0, theta1, theta2] \
+                                += 1.0 / len(predicted_ids) / len(trees_of_scene)
+                            votes += votes_local
+
+                            # cache
+                            voted_ids[(tree_i, leaf_i)] = votes_local
 
             votes_sort_idx = np.dstack(np.unravel_index(np.argsort(votes.ravel()), votes.shape))
 
-            top10 = 100
+            top10 = 10
+            if top10>votes_sort_idx.shape[1]:
+                top10 = votes_sort_idx.shape[1]
+
+            print('top {}'.format(top10))
             for i in range(1, top10):
-                if 19 > votes_sort_idx[0, -i, 0] > 1 and 19 > votes_sort_idx[0, -i, 1] > 1:
-                    cv2.circle(rgb, (votes_sort_idx[0, -i, 1]*20, votes_sort_idx[0, -i, 0]*20), 2, (0, 0, 255), -1)
-                    print('votes_sort_idx: {}, votes: {}'.format(votes_sort_idx[0, -i, :],
-                                                                 votes[tuple(votes_sort_idx[0, -i, :])]))
+                    cv2.circle(rgb, (votes_sort_idx[0, -i, 0]*steps, votes_sort_idx[0, -i, 1]*steps), 4, (0, 255-i*2, 0), -1)
 
             elapsed_time = time.time() - start_time
             print('voting time: {}'.format(elapsed_time))
diff --git a/cxxLCHF/CMakeLists.txt b/cxxLCHF/CMakeLists.txt
@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8)
 set (CMAKE_CXX_STANDARD 14)
 project(cxxLCHF_pybind)
 
-SET(CMAKE_BUILD_TYPE "Debug")
-#SET(CMAKE_BUILD_TYPE "Release")
+#SET(CMAKE_BUILD_TYPE "Debug")
+SET(CMAKE_BUILD_TYPE "Release")
 SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb -fPIC")
 SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -fPIC")
 
diff --git a/cxxLCHF/lchf.cpp b/cxxLCHF/lchf.cpp
@@ -719,28 +719,40 @@ float Linemod_feature::similarity(const Linemod_feature &other) const{
     float score = 0;
     auto& rgb_res = other.embedding.rgb_response;
 
-//    cv::Mat templ = cv::Mat::zeros(500, 500, CV_8UC1);
-//    cv::Mat templ2 = cv::Mat::zeros(500, 500, CV_8UC1);
-    for(auto element: embedding.rgb_embedding){
+    auto get_depth = [](const cv::Mat& depth, int y, int x){
+        int ker_size = 5;
+        int x_tl = x - ker_size/2;
+        if(x_tl<0) x_tl = 0;
+        int y_tl = y - ker_size/2;
+        if(y_tl<0) y_tl = 0;
+
+        int width = ker_size;
+        if(width>depth.cols-x_tl) width = depth.cols-x_tl;
+        int height = ker_size;
+        if(height>depth.rows-y_tl) height = depth.rows-y_tl;
 
-//        templ.at<uchar>(element.y+100, element.x+100) = 255;
+        cv::Rect roi(x_tl, y_tl, width, height);
+        int ave_depth = int(cv::sum(depth(roi))[0]/cv::countNonZero(depth(roi)));
+
+        return ave_depth;
+    };
+
+    for(auto element: embedding.rgb_embedding){
 
         if(other.embedding.center_dep>0 && embedding.center_dep>0){
             int normalize_x = element.x*embedding.center_dep/other.embedding.center_dep;
             int normalize_y = element.y*embedding.center_dep/other.embedding.center_dep;
 
-//            templ2.at<uchar>(normalize_y+100, normalize_x+100) = 255;
-
             if(element.y>=depth.rows || element.x>=depth.cols ||
                     normalize_y>=other.depth.rows || normalize_x>=other.depth.cols){
                 continue;
             }
 
-//            int z_1 = embedding.center_dep-get_depth(depth, element.y, element.x);
-//            int z_2 = other.embedding.center_dep-get_depth(other.depth, normalize_y,normalize_x);
+            int z_1 = embedding.center_dep-get_depth(depth, element.y, element.x);
+            int z_2 = other.embedding.center_dep-get_depth(other.depth, normalize_y,normalize_x);
 
-//            bool valid = std::abs(z_1-z_2) < embedding.z_check;
-//            if(valid)
+            bool valid = std::abs(z_1-z_2) < embedding.z_check;
+            if(valid)
             {
                 auto response = rgb_res[element.label];
                 score += response.at<uchar>(normalize_y,normalize_x);
@@ -749,11 +761,6 @@ float Linemod_feature::similarity(const Linemod_feature &other) const{
         }
     }
 
-//    std::cout << embedding.rgb_embedding.size() << std::endl;
-//    cv::imshow("t1", templ);
-//    cv::imshow("t2", templ2);
-//    cv::waitKey(0);
-
     auto& dep_res = other.embedding.dep_response;
     for(auto element: embedding.depth_embedding){
         if(other.embedding.center_dep>0 && embedding.center_dep>0){
@@ -765,11 +772,11 @@ float Linemod_feature::similarity(const Linemod_feature &other) const{
                 continue;
             }
 
-//            int z_1 = embedding.center_dep-get_depth(depth, element.y, element.x);
-//            int z_2 = other.embedding.center_dep-get_depth(other.depth, normalize_y,normalize_x);
+            int z_1 = embedding.center_dep-get_depth(depth, element.y, element.x);
+            int z_2 = other.embedding.center_dep-get_depth(other.depth, normalize_y,normalize_x);
 
-//            bool valid = std::abs(z_1-z_2) < embedding.z_check;
-//            if(valid)
+            bool valid = std::abs(z_1-z_2) < embedding.z_check;
+            if(valid)
             {
                 auto response = dep_res[element.label];
                 score += response.at<uchar>(normalize_y,normalize_x);
diff --git a/cxxLCHF/lchf.h b/cxxLCHF/lchf.h
@@ -26,7 +26,7 @@ class Linemod_embedding {
         distance_threshold(2000),
         difference_threshold(50),
         extract_threshold(2),
-        z_check(100){}
+        z_check(200){}
     float weak_threshold, strong_threshold;
     int num_features, distance_threshold, difference_threshold, extract_threshold;
     class element {
diff --git a/cxxLCHF/test.cpp b/cxxLCHF/test.cpp
@@ -163,20 +163,36 @@ void API_test(){
     std::vector<std::vector<int>> rois;
     for(int x=0; x<cols-width-2*stride; x+=stride){
         for(int y=0; y<rows-height-2*stride; y+=stride){
-            std::vector<int> roi = {x, y, width, height, dep_x, dep_y};
+
+            int dep_value = depth.at<ushort>(y+dep_y, x+dep_x);
+            if(dep_value==0) continue;
+            std::vector<int> roi = {x, y, width, height, dep_value};
             rois.push_back(roi);
         }
     }
-
     auto scene_feats = lchf_model::get_feats_from_scene(rgb, depth, rois);
-
     auto leaf_of_trees_of_scene = lchf_model::predict(forest, feats, scene_feats);
+    auto leaf_feats_map = lchf_model::getLeaf_feats_map(forest);
+
+    std::map<int, double> bg_prob;
+    for(int scene_iter=0; scene_iter<leaf_of_trees_of_scene.size(); scene_iter++){
+        auto& trees_of_scene = leaf_of_trees_of_scene[scene_iter];
+        auto& roi = rois[scene_iter];
+        for(int tree_iter=0; tree_iter<trees_of_scene.size(); tree_iter++){
+            auto& leaf_iter = trees_of_scene[tree_iter];
+            auto& leaf_map = leaf_feats_map[tree_iter];
+            auto& predicted_ids = leaf_map[leaf_iter];
+
+            for(auto id: predicted_ids){
+                if(bg_prob.find(id) == bg_prob.end()){
+                    bg_prob[id] = 1.0/predicted_ids.size()
+                            /trees_of_scene.size()/leaf_of_trees_of_scene.size();
+                }else{
+                    bg_prob[id] += 1.0/predicted_ids.size()
+                            /trees_of_scene.size()/leaf_of_trees_of_scene.size();
+                }
+            }
 
-    auto first_one = leaf_of_trees_of_scene[0];
-    for(auto& leaf_of_trees: leaf_of_trees_of_scene){
-        if(leaf_of_trees!=first_one){
-            std::cout << "found it!" << std::endl;
-            first_one = leaf_of_trees;
         }
     }
 }
@@ -209,21 +225,21 @@ void simi_test(){
 //                std::cout << "\nself simi(should be 100): " << simi << std::endl;
 //            }
 
-            { // result is around 75, 55-95
-                cv::Mat rgb_2, depth_2;
-                pyrDown(rgb(bbox), rgb_2);
+//            { // result is around 75, 55-95
+//                cv::Mat rgb_2, depth_2;
+//                pyrDown(rgb(bbox), rgb_2);
 
-                imshow("rgb_2", rgb_2);
+//                imshow("rgb_2", rgb_2);
 
-                pyrDown(depth(bbox), depth_2);
-                depth_2 *= 2;
+//                pyrDown(depth(bbox), depth_2);
+//                depth_2 *= 2;
 
-                Linemod_feature f_2(rgb_2, depth_2);
-                f_2.constructResponse();
-                float simi = features[features.size()-1]
-                                        .similarity(f_2);
-                std::cout << "\ndiff depth simi: " << simi << std::endl;
-            }
+//                Linemod_feature f_2(rgb_2, depth_2);
+//                f_2.constructResponse();
+//                float simi = features[features.size()-1]
+//                                        .similarity(f_2);
+//                std::cout << "\ndiff depth simi: " << simi << std::endl;
+//            }
 
 //            if(features.size() > 1){
 //                float simi = features[features.size()-2]
@@ -235,7 +251,7 @@ void simi_test(){
     }
 }
 int main(){
-
+//    API_test();
     simi_test();
     //    google::protobuf::ShutdownProtobufLibrary();
     cout << "end" << endl;