Skip to content

Commit ccfaad3

Browse files
committed
configure dynamic shape tensorrt
1 parent 27d9e17 commit ccfaad3

File tree

6 files changed

+109
-17
lines changed

6 files changed

+109
-17
lines changed

examples/Pipeline/PaddleOCR/ocr/config.yml

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ op:
3737
model_config: ocr_det_model
3838

3939
#Fetch结果列表,以client_config中fetch_var的alias_name为准
40-
fetch_list: ["concat_1.tmp_0"]
40+
fetch_list: ["save_infer_model/scale_0.tmp_1"]
4141

4242
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
4343
device_type: 0
@@ -53,6 +53,9 @@ op:
5353

5454
#ir_optim
5555
ir_optim: True
56+
57+
#开启tensorrt后,进行优化的子图包含的最少节点数
58+
#min_subgraph_size: 13
5659
rec:
5760
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
5861
concurrency: 3
@@ -73,7 +76,7 @@ op:
7376
model_config: ocr_rec_model
7477

7578
#Fetch结果列表,以client_config中fetch_var的alias_name为准
76-
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
79+
fetch_list: ["save_infer_model/scale_0.tmp_1"]
7780
# device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu
7881
device_type: 0
7982

@@ -88,3 +91,6 @@ op:
8891

8992
#ir_optim
9093
ir_optim: True
94+
95+
#开启tensorrt后,进行优化的子图包含的最少节点数
96+
#min_subgraph_size: 3

examples/Pipeline/PaddleOCR/ocr/web_service.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,37 @@ def init_op(self):
4040
"min_size": 3
4141
})
4242

43+
def set_dynamic_shape_info(self):
44+
min_input_shape = {
45+
"x": [1, 3, 50, 50],
46+
"conv2d_182.tmp_0": [1, 1, 20, 20],
47+
"nearest_interp_v2_2.tmp_0": [1, 1, 20, 20],
48+
"nearest_interp_v2_3.tmp_0": [1, 1, 20, 20],
49+
"nearest_interp_v2_4.tmp_0": [1, 1, 20, 20],
50+
"nearest_interp_v2_5.tmp_0": [1, 1, 20, 20]
51+
}
52+
max_input_shape = {
53+
"x": [1, 3, 1536, 1536],
54+
"conv2d_182.tmp_0": [20, 200, 960, 960],
55+
"nearest_interp_v2_2.tmp_0": [20, 200, 960, 960],
56+
"nearest_interp_v2_3.tmp_0": [20, 200, 960, 960],
57+
"nearest_interp_v2_4.tmp_0": [20, 200, 960, 960],
58+
"nearest_interp_v2_5.tmp_0": [20, 200, 960, 960],
59+
}
60+
opt_input_shape = {
61+
"x": [1, 3, 960, 960],
62+
"conv2d_182.tmp_0": [3, 96, 240, 240],
63+
"nearest_interp_v2_2.tmp_0": [3, 96, 240, 240],
64+
"nearest_interp_v2_3.tmp_0": [3, 24, 240, 240],
65+
"nearest_interp_v2_4.tmp_0": [3, 24, 240, 240],
66+
"nearest_interp_v2_5.tmp_0": [3, 24, 240, 240],
67+
}
68+
self.dynamic_shape_info = {
69+
"min_input_shape": min_input_shape,
70+
"max_input_shape": max_input_shape,
71+
"opt_input_shape": opt_input_shape,
72+
}
73+
4374
def preprocess(self, input_dicts, data_id, log_id):
4475
(_, input_dict), = input_dicts.items()
4576
imgs = []
@@ -52,11 +83,11 @@ def preprocess(self, input_dicts, data_id, log_id):
5283
det_img = self.det_preprocess(self.im)
5384
_, self.new_h, self.new_w = det_img.shape
5485
imgs.append(det_img[np.newaxis, :].copy())
55-
return {"image": np.concatenate(imgs, axis=0)}, False, None, ""
86+
return {"x": np.concatenate(imgs, axis=0)}, False, None, ""
5687

5788
def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
5889
# print(fetch_dict)
59-
det_out = fetch_dict["concat_1.tmp_0"]
90+
det_out = fetch_dict["save_infer_model/scale_0.tmp_1"]
6091
ratio_list = [
6192
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
6293
]
@@ -71,6 +102,25 @@ def init_op(self):
71102
self.ocr_reader = OCRReader()
72103
self.get_rotate_crop_image = GetRotateCropImage()
73104
self.sorted_boxes = SortedBoxes()
105+
106+
def set_dynamic_shape_info(self):
107+
min_input_shape = {
108+
"x": [1, 3, 32, 10],
109+
"lstm_1.tmp_0": [1, 1, 128]
110+
}
111+
max_input_shape = {
112+
"x": [50, 3, 32, 1000],
113+
"lstm_1.tmp_0": [500, 50, 128]
114+
}
115+
opt_input_shape = {
116+
"x": [6, 3, 32, 100],
117+
"lstm_1.tmp_0": [25, 5, 128]
118+
}
119+
self.dynamic_shape_info = {
120+
"min_input_shape": min_input_shape,
121+
"max_input_shape": max_input_shape,
122+
"opt_input_shape": opt_input_shape,
123+
}
74124

75125
def preprocess(self, input_dicts, data_id, log_id):
76126
(_, input_dict), = input_dicts.items()
@@ -143,7 +193,7 @@ def preprocess(self, input_dicts, data_id, log_id):
143193
for id, img in enumerate(img_list):
144194
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
145195
imgs[id] = norm_img
146-
feed = {"image": imgs.copy()}
196+
feed = {"x": imgs.copy()}
147197
feed_list.append(feed)
148198
#_LOGGER.info("feed_list : {}".format(feed_list))
149199

python/paddle_serving_app/local_predict.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,9 @@ def load_model_config(self,
8888
mkldnn_op_list=None,
8989
mkldnn_bf16_op_list=None,
9090
use_feed_fetch_ops=False,
91-
use_ascend_cl=False):
91+
use_ascend_cl=False,
92+
min_subgraph_size=3,
93+
dynamic_shape_info={}):
9294
"""
9395
Load model configs and create the paddle predictor by Paddle Inference API.
9496
@@ -102,6 +104,9 @@ def load_model_config(self,
102104
ir_optim: open calculation chart optimization, False default.
103105
use_trt: use nvidia TensorRT optimization, False default
104106
use_lite: use Paddle-Lite Engint, False default
107+
ir_optim: open calculation chart optimization, False default.
108+
use_trt: use nvidia TensorRT optimization, False default
109+
use_lite: use Paddle-Lite Engint, False default
105110
use_xpu: run predict on Baidu Kunlun, False default
106111
precision: precision mode, "fp32" default
107112
use_calib: use TensorRT calibration, False default
@@ -211,9 +216,13 @@ def load_model_config(self,
211216
precision_mode=precision_type,
212217
workspace_size=1 << 20,
213218
max_batch_size=32,
214-
min_subgraph_size=3,
219+
min_subgraph_size=min_subgraph_size,
215220
use_static=False,
216221
use_calib_mode=False)
222+
223+
if len(dynamic_shape_info):
224+
config.set_trt_dynamic_shape_info(
225+
dynamic_shape_info['min_input_shape'], dynamic_shape_info['max_input_shape'], dynamic_shape_info['opt_input_shape'])
217226
# set lite
218227
if use_lite:
219228
config.enable_lite_engine(

python/pipeline/local_service_handler.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,9 @@ def __init__(self,
5050
use_mkldnn=False,
5151
mkldnn_cache_capacity=0,
5252
mkldnn_op_list=None,
53-
mkldnn_bf16_op_list=None):
53+
mkldnn_bf16_op_list=None,
54+
min_subgraph_size=3,
55+
dynamic_shape_info={}):
5456
"""
5557
Initialization of localservicehandler
5658
@@ -92,6 +94,8 @@ def __init__(self,
9294
self._mkldnn_cache_capacity = 0
9395
self._mkldnn_op_list = None
9496
self._mkldnn_bf16_op_list = None
97+
self.min_subgraph_size = 3
98+
self.dynamic_shape_info = {}
9599

96100
if device_type == -1:
97101
# device_type is not set, determined by `devices`,
@@ -120,6 +124,8 @@ def __init__(self,
120124
self._use_gpu = True
121125
devices = [int(x) for x in devices.split(",")]
122126
self._use_trt = True
127+
self.min_subgraph_size = min_subgraph_size
128+
self.dynamic_shape_info = dynamic_shape_info
123129
elif device_type == 3:
124130
# ARM CPU
125131
self._device_name = "arm"
@@ -176,14 +182,14 @@ def __init__(self,
176182
"mem_optim:{}, ir_optim:{}, use_profile:{}, thread_num:{}, "
177183
"client_type:{}, fetch_names:{}, precision:{}, use_mkldnn:{}, "
178184
"mkldnn_cache_capacity:{}, mkldnn_op_list:{}, "
179-
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}".format(
185+
"mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}".format(
180186
model_config, self._device_name, self._use_gpu, self._use_trt,
181187
self._use_lite, self._use_xpu, device_type, self._devices,
182188
self._mem_optim, self._ir_optim, self._use_profile,
183189
self._thread_num, self._client_type, self._fetch_names,
184190
self._precision, self._use_mkldnn, self._mkldnn_cache_capacity,
185191
self._mkldnn_op_list, self._mkldnn_bf16_op_list,
186-
self._use_ascend_cl))
192+
self._use_ascend_cl, self.min_subgraph_size))
187193

188194
def get_fetch_list(self):
189195
return self._fetch_names
@@ -240,7 +246,9 @@ def get_client(self, concurrency_idx):
240246
mkldnn_cache_capacity=self._mkldnn_cache_capacity,
241247
mkldnn_op_list=self._mkldnn_op_list,
242248
mkldnn_bf16_op_list=self._mkldnn_bf16_op_list,
243-
use_ascend_cl=self._use_ascend_cl)
249+
use_ascend_cl=self._use_ascend_cl,
250+
min_subgraph_size=self.min_subgraph_size,
251+
dynamic_shape_info=self.dynamic_shape_info)
244252
return self._local_predictor_client
245253

246254
def get_client_config(self):

python/pipeline/operator.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,11 @@ def __init__(self,
116116
self._for_close_op_lock = threading.Lock()
117117
self._succ_init_op = False
118118
self._succ_close_op = False
119+
self.dynamic_shape_info = {}
120+
self.set_dynamic_shape_info()
121+
122+
def set_dynamic_shape_info(self):
123+
pass
119124

120125
# for feed/fetch dict cehck
121126
@staticmethod
@@ -182,6 +187,7 @@ def init_from_dict(self, conf):
182187
self.mkldnn_cache_capacity = 0
183188
self.mkldnn_op_list = None
184189
self.mkldnn_bf16_op_list = None
190+
self.min_subgraph_size = 3
185191

186192
if self._server_endpoints is None:
187193
server_endpoints = conf.get("server_endpoints", [])
@@ -212,6 +218,8 @@ def init_from_dict(self, conf):
212218
"mkldnn_op_list")
213219
self.mkldnn_bf16_op_list = local_service_conf.get(
214220
"mkldnn_bf16_op_list")
221+
self.min_subgraph_size = local_service_conf.get(
222+
"min_subgraph_size")
215223

216224
if self.model_config is None:
217225
self.with_serving = False
@@ -233,7 +241,9 @@ def init_from_dict(self, conf):
233241
mkldnn_cache_capacity=self.
234242
mkldnn_cache_capacity,
235243
mkldnn_op_list=self.mkldnn_bf16_op_list,
236-
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list)
244+
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
245+
min_subgraph_size=self.min_subgraph_size,
246+
dynamic_shape_info=self.dynamic_shape_info)
237247
service_handler.prepare_server() # get fetch_list
238248
serivce_ports = service_handler.get_port_list()
239249
self._server_endpoints = [
@@ -261,7 +271,9 @@ def init_from_dict(self, conf):
261271
mkldnn_cache_capacity=self.
262272
mkldnn_cache_capacity,
263273
mkldnn_op_list=self.mkldnn_op_list,
264-
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list)
274+
mkldnn_bf16_op_list=self.mkldnn_bf16_op_list,
275+
min_subgraph_size=self.min_subgraph_size,
276+
dynamic_shape_info=self.dynamic_shape_info)
265277
if self._client_config is None:
266278
self._client_config = service_handler.get_client_config(
267279
)
@@ -766,7 +778,9 @@ def start_with_process(self):
766778
self.ir_optim, self.precision, self.use_mkldnn,
767779
self.mkldnn_cache_capacity, self.mkldnn_op_list,
768780
self.mkldnn_bf16_op_list, self.is_jump_op(),
769-
self.get_output_channels_of_jump_ops()))
781+
self.get_output_channels_of_jump_ops(),
782+
self.min_subgraph_size,
783+
self.dynamic_shape_info))
770784
p.daemon = True
771785
p.start()
772786
process.append(p)
@@ -803,7 +817,9 @@ def start_with_thread(self):
803817
self.ir_optim, self.precision, self.use_mkldnn,
804818
self.mkldnn_cache_capacity, self.mkldnn_op_list,
805819
self.mkldnn_bf16_op_list, self.is_jump_op(),
806-
self.get_output_channels_of_jump_ops()))
820+
self.get_output_channels_of_jump_ops(),
821+
self.min_subgraph_size,
822+
self.dynamic_shape_info))
807823
# When a process exits, it attempts to terminate
808824
# all of its daemonic child processes.
809825
t.daemon = True
@@ -1264,7 +1280,7 @@ def _run(self, concurrency_idx, input_channel, output_channels,
12641280
is_thread_op, trace_buffer, model_config, workdir, thread_num,
12651281
device_type, devices, mem_optim, ir_optim, precision, use_mkldnn,
12661282
mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list,
1267-
is_jump_op, output_channels_of_jump_ops):
1283+
is_jump_op, output_channels_of_jump_ops, min_subgraph_size, dynamic_shape_info):
12681284
"""
12691285
_run() is the entry function of OP process / thread model.When client
12701286
type is local_predictor in process mode, the CUDA environment needs to
@@ -1316,7 +1332,9 @@ def _run(self, concurrency_idx, input_channel, output_channels,
13161332
use_mkldnn=use_mkldnn,
13171333
mkldnn_cache_capacity=mkldnn_cache_capacity,
13181334
mkldnn_op_list=mkldnn_op_list,
1319-
mkldnn_bf16_op_list=mkldnn_bf16_op_list)
1335+
mkldnn_bf16_op_list=mkldnn_bf16_op_list,
1336+
min_subgraph_size=min_subgraph_size,
1337+
dynamic_shape_info=dynamic_shape_info)
13201338

13211339
_LOGGER.info("Init cuda env in process {}".format(
13221340
concurrency_idx))

python/pipeline/pipeline_server.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def _init_ops(self, op_conf):
260260
"use_calib": False,
261261
"use_mkldnn": False,
262262
"mkldnn_cache_capacity": 0,
263+
"min_subgraph_size": 3,
263264
},
264265
}
265266
for op in self._used_op:

0 commit comments

Comments
 (0)