configure dynamic shape tensorrt

felixhjh · felixhjh · commit ccfaad36a67f · 2022-01-13T07:59:18.000Z
diff --git a/examples/Pipeline/PaddleOCR/ocr/config.yml b/examples/Pipeline/PaddleOCR/ocr/config.yml
@@ -37,7 +37,7 @@ op:
             model_config: ocr_det_model
 
             #Fetch结果列表，以client_config中fetch_var的alias_name为准
-            fetch_list: ["concat_1.tmp_0"]
+            fetch_list: ["save_infer_model/scale_0.tmp_1"]
             
             # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
             device_type: 0
@@ -53,6 +53,9 @@ op:
 
             #ir_optim
             ir_optim: True
+            
+            #开启tensorrt后，进行优化的子图包含的最少节点数
+            #min_subgraph_size: 13
     rec:
         #并发数，is_thread_op=True时，为线程并发；否则为进程并发
         concurrency: 3
@@ -73,7 +76,7 @@ op:
             model_config: ocr_rec_model
 
             #Fetch结果列表，以client_config中fetch_var的alias_name为准
-            fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] 
+            fetch_list: ["save_infer_model/scale_0.tmp_1"]
             # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
             device_type: 0
 
@@ -88,3 +91,6 @@ op:
 
             #ir_optim
             ir_optim: True
+
+            #开启tensorrt后，进行优化的子图包含的最少节点数
+            #min_subgraph_size: 3
diff --git a/examples/Pipeline/PaddleOCR/ocr/web_service.py b/examples/Pipeline/PaddleOCR/ocr/web_service.py
@@ -40,6 +40,37 @@ def init_op(self):
             "min_size": 3
         })
 
+    def set_dynamic_shape_info(self):
+        min_input_shape = {
+            "x": [1, 3, 50, 50],
+            "conv2d_182.tmp_0": [1, 1, 20, 20],
+            "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20],
+            "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20],
+            "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20],
+            "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20]
+        }
+        max_input_shape = {
+            "x": [1, 3, 1536, 1536],
+            "conv2d_182.tmp_0": [20, 200, 960, 960],
+            "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960],
+            "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960],
+            "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960],
+            "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960],
+        }
+        opt_input_shape = {
+            "x": [1, 3, 960, 960],
+            "conv2d_182.tmp_0": [3, 96, 240, 240],
+            "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240],
+            "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240],
+            "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240],
+            "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240],
+        }
+        self.dynamic_shape_info = {
+            "min_input_shape": min_input_shape,
+            "max_input_shape": max_input_shape,
+            "opt_input_shape": opt_input_shape,
+        }    
+
     def preprocess(self, input_dicts, data_id, log_id):
         (_, input_dict), = input_dicts.items()
         imgs = []
@@ -52,11 +83,11 @@ def preprocess(self, input_dicts, data_id, log_id):
             det_img = self.det_preprocess(self.im)
             _, self.new_h, self.new_w = det_img.shape
             imgs.append(det_img[np.newaxis, :].copy())
-        return {"image": np.concatenate(imgs, axis=0)}, False, None, ""
+        return {"x": np.concatenate(imgs, axis=0)}, False, None, ""
 
     def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
         #        print(fetch_dict)
-        det_out = fetch_dict["concat_1.tmp_0"]
+        det_out = fetch_dict["save_infer_model/scale_0.tmp_1"]
         ratio_list = [
             float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
         ]
@@ -71,6 +102,25 @@ def init_op(self):
         self.ocr_reader = OCRReader()
         self.get_rotate_crop_image = GetRotateCropImage()
         self.sorted_boxes = SortedBoxes()
+    
+    def set_dynamic_shape_info(self):
+        min_input_shape = {
+            "x": [1, 3, 32, 10],
+            "lstm_1.tmp_0": [1, 1, 128]
+        }
+        max_input_shape = {
+            "x": [50, 3, 32, 1000],
+            "lstm_1.tmp_0": [500, 50, 128]
+        }
+        opt_input_shape = {
+            "x": [6, 3, 32, 100],
+            "lstm_1.tmp_0": [25, 5, 128]
+        }
+        self.dynamic_shape_info = {
+            "min_input_shape": min_input_shape,
+            "max_input_shape": max_input_shape,
+            "opt_input_shape": opt_input_shape,
+        }
 
     def preprocess(self, input_dicts, data_id, log_id):
         (_, input_dict), = input_dicts.items()
@@ -143,7 +193,7 @@ def preprocess(self, input_dicts, data_id, log_id):
             for id, img in enumerate(img_list):
                 norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
                 imgs[id] = norm_img
-            feed = {"image": imgs.copy()}
+            feed = {"x": imgs.copy()}
             feed_list.append(feed)
         #_LOGGER.info("feed_list : {}".format(feed_list))
 
diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py
@@ -88,7 +88,9 @@ def load_model_config(self,
                           mkldnn_op_list=None,
                           mkldnn_bf16_op_list=None,
                           use_feed_fetch_ops=False,
-                          use_ascend_cl=False):
+                          use_ascend_cl=False,
+                          min_subgraph_size=3,
+                          dynamic_shape_info={}):
         """
         Load model configs and create the paddle predictor by Paddle Inference API.
    
@@ -102,6 +104,9 @@ def load_model_config(self,
             ir_optim: open calculation chart optimization, False default.
             use_trt: use nvidia TensorRT optimization, False default
             use_lite: use Paddle-Lite Engint, False default
+            ir_optim: open calculation chart optimization, False default.
+            use_trt: use nvidia TensorRT optimization, False default
+            use_lite: use Paddle-Lite Engint, False default
             use_xpu: run predict on Baidu Kunlun, False default
             precision: precision mode, "fp32" default
             use_calib: use TensorRT calibration, False default
@@ -211,9 +216,13 @@ def load_model_config(self,
                     precision_mode=precision_type,
                     workspace_size=1 << 20,
                     max_batch_size=32,
-                    min_subgraph_size=3,
+                    min_subgraph_size=min_subgraph_size,
                     use_static=False,
                     use_calib_mode=False)
+
+                if len(dynamic_shape_info):
+                     config.set_trt_dynamic_shape_info(
+                         dynamic_shape_info['min_input_shape'], dynamic_shape_info['max_input_shape'], dynamic_shape_info['opt_input_shape'])       
         # set lite
         if use_lite:
             config.enable_lite_engine(
diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py
@@ -50,7 +50,9 @@ def __init__(self,
                  use_mkldnn=False,
                  mkldnn_cache_capacity=0,
                  mkldnn_op_list=None,
-                 mkldnn_bf16_op_list=None):
+                 mkldnn_bf16_op_list=None,
+                 min_subgraph_size=3,
+                 dynamic_shape_info={}):
         """
         Initialization of localservicehandler
 
@@ -92,6 +94,8 @@ def __init__(self,
         self._mkldnn_cache_capacity = 0
         self._mkldnn_op_list = None
         self._mkldnn_bf16_op_list = None
+        self.min_subgraph_size = 3
+        self.dynamic_shape_info = {}
 
         if device_type == -1:
             # device_type is not set, determined by `devices`, 
@@ -120,6 +124,8 @@ def __init__(self,
             self._use_gpu = True
             devices = [int(x) for x in devices.split(",")]
             self._use_trt = True
+            self.min_subgraph_size = min_subgraph_size
+            self.dynamic_shape_info = dynamic_shape_info
         elif device_type == 3:
             # ARM CPU
             self._device_name = "arm"
@@ -176,14 +182,14 @@ def __init__(self,
             "mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
             "client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, "
             "mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
-            "mkldnn_bf16_op_list:{}, use_ascend_cl:{}".format(
+            "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}".format(
                 model_config, self._device_name, self._use_gpu, self._use_trt,
                 self._use_lite, self._use_xpu, device_type, self._devices,
                 self._mem_optim, self._ir_optim, self._use_profile,
                 self._thread_num, self._client_type, self._fetch_names,
                 self._precision, self._use_mkldnn, self._mkldnn_cache_capacity,
                 self._mkldnn_op_list, self._mkldnn_bf16_op_list,
-                self._use_ascend_cl))
+                self._use_ascend_cl, self.min_subgraph_size))
 
     def get_fetch_list(self):
         return self._fetch_names
@@ -240,7 +246,9 @@ def get_client(self, concurrency_idx):
                 mkldnn_cache_capacity=self._mkldnn_cache_capacity,
                 mkldnn_op_list=self._mkldnn_op_list,
                 mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
-                use_ascend_cl=self._use_ascend_cl)
+                use_ascend_cl=self._use_ascend_cl,
+                min_subgraph_size=self.min_subgraph_size,
+                dynamic_shape_info=self.dynamic_shape_info)
         return self._local_predictor_client
 
     def get_client_config(self):
diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py
@@ -116,6 +116,11 @@ def __init__(self,
         self._for_close_op_lock = threading.Lock()
         self._succ_init_op = False
         self._succ_close_op = False
+        self.dynamic_shape_info = {} 
+        self.set_dynamic_shape_info()
+    
+    def set_dynamic_shape_info(self):
+        pass
 
     # for feed/fetch dict cehck
     @staticmethod
@@ -182,6 +187,7 @@ def init_from_dict(self, conf):
         self.mkldnn_cache_capacity = 0
         self.mkldnn_op_list = None
         self.mkldnn_bf16_op_list = None
+        self.min_subgraph_size = 3
 
         if self._server_endpoints is None:
             server_endpoints = conf.get("server_endpoints", [])
@@ -212,6 +218,8 @@ def init_from_dict(self, conf):
                         "mkldnn_op_list")
                     self.mkldnn_bf16_op_list = local_service_conf.get(
                         "mkldnn_bf16_op_list")
+                    self.min_subgraph_size = local_service_conf.get(
+                        "min_subgraph_size")
 
                     if self.model_config is None:
                         self.with_serving = False
@@ -233,7 +241,9 @@ def init_from_dict(self, conf):
                                 mkldnn_cache_capacity=self.
                                 mkldnn_cache_capacity,
                                 mkldnn_op_list=self.mkldnn_bf16_op_list,
-                                mkldnn_bf16_op_list=self.mkldnn_bf16_op_list)
+                                mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
+                                min_subgraph_size=self.min_subgraph_size,
+                                dynamic_shape_info=self.dynamic_shape_info)
                             service_handler.prepare_server()  # get fetch_list
                             serivce_ports = service_handler.get_port_list()
                             self._server_endpoints = [
@@ -261,7 +271,9 @@ def init_from_dict(self, conf):
                                 mkldnn_cache_capacity=self.
                                 mkldnn_cache_capacity,
                                 mkldnn_op_list=self.mkldnn_op_list,
-                                mkldnn_bf16_op_list=self.mkldnn_bf16_op_list)
+                                mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
+                                min_subgraph_size=self.min_subgraph_size,
+                                dynamic_shape_info=self.dynamic_shape_info)
                             if self._client_config is None:
                                 self._client_config = service_handler.get_client_config(
                                 )
@@ -766,7 +778,9 @@ def start_with_process(self):
                       self.ir_optim, self.precision, self.use_mkldnn,
                       self.mkldnn_cache_capacity, self.mkldnn_op_list,
                       self.mkldnn_bf16_op_list, self.is_jump_op(),
-                      self.get_output_channels_of_jump_ops()))
+                      self.get_output_channels_of_jump_ops(),
+                      self.min_subgraph_size,
+                      self.dynamic_shape_info))
             p.daemon = True
             p.start()
             process.append(p)
@@ -803,7 +817,9 @@ def start_with_thread(self):
                       self.ir_optim, self.precision, self.use_mkldnn,
                       self.mkldnn_cache_capacity, self.mkldnn_op_list,
                       self.mkldnn_bf16_op_list, self.is_jump_op(),
-                      self.get_output_channels_of_jump_ops()))
+                      self.get_output_channels_of_jump_ops(),
+                      self.min_subgraph_size,
+                      self.dynamic_shape_info))
             # When a process exits, it attempts to terminate
             # all of its daemonic child processes.
             t.daemon = True
@@ -1264,7 +1280,7 @@ def _run(self, concurrency_idx, input_channel, output_channels,
              is_thread_op, trace_buffer, model_config, workdir, thread_num,
              device_type, devices, mem_optim, ir_optim, precision, use_mkldnn,
              mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list,
-             is_jump_op, output_channels_of_jump_ops):
+             is_jump_op, output_channels_of_jump_ops, min_subgraph_size, dynamic_shape_info):
         """
         _run() is the entry function of OP process / thread model.When client 
         type is local_predictor in process mode, the CUDA environment needs to 
@@ -1316,7 +1332,9 @@ def _run(self, concurrency_idx, input_channel, output_channels,
                     use_mkldnn=use_mkldnn,
                     mkldnn_cache_capacity=mkldnn_cache_capacity,
                     mkldnn_op_list=mkldnn_op_list,
-                    mkldnn_bf16_op_list=mkldnn_bf16_op_list)
+                    mkldnn_bf16_op_list=mkldnn_bf16_op_list,
+                    min_subgraph_size=min_subgraph_size,
+                    dynamic_shape_info=dynamic_shape_info)
 
                 _LOGGER.info("Init cuda env in process {}".format(
                     concurrency_idx))
diff --git a/python/pipeline/pipeline_server.py b/python/pipeline/pipeline_server.py
@@ -260,6 +260,7 @@ def _init_ops(self, op_conf):
                 "use_calib": False,
                 "use_mkldnn": False,
                 "mkldnn_cache_capacity": 0,
+                "min_subgraph_size": 3,
             },
         }
         for op in self._used_op:

Original file line number	Diff line number	Diff line change
`@@ -260,6 +260,7 @@ def _init_ops(self, op_conf):`
`260`	`260`	`"use_calib": False,`
`261`	`261`	`"use_mkldnn": False,`
`262`	`262`	`"mkldnn_cache_capacity": 0,`
	`263`	`+ "min_subgraph_size": 3,`
`263`	`264`	`},`
`264`	`265`	`}`
`265`	`266`	`for op in self._used_op:`