diff --git a/CNN/Dockerfile b/CNN/Dockerfile new file mode 100644 index 0000000..f3d02f8 --- /dev/null +++ b/CNN/Dockerfile @@ -0,0 +1,5 @@ +FROM helmuthva/jetson-xavier-tensorflow-serving-base + +EXPOSE 8500 + +CMD ["tensorflow_model_server", "--port=8500", "--model_config_file=/models/model/models.config", "--grpc_channel_arguments=grpc.max_send_message_length=50*1024*1024", "--grpc_channel_arguments=grpc.max_receive_length=50*1024*1024", "--grpc_max_threads=1000"] \ No newline at end of file diff --git a/CNN/raw_image_classification_inference.py b/CNN/cnn_raw_image_classification_inference.py similarity index 100% rename from CNN/raw_image_classification_inference.py rename to CNN/cnn_raw_image_classification_inference.py diff --git a/CNN/coral-edgetpu-tflite-raw_image_classification_inference.py b/CNN/coral-edgetpu-tflite-raw_image_classification_inference.py new file mode 100644 index 0000000..584f103 --- /dev/null +++ b/CNN/coral-edgetpu-tflite-raw_image_classification_inference.py @@ -0,0 +1,91 @@ +import argparse +import time +import os +import numpy as np + +from PIL import Image + +from model import classify +import tflite_runtime.interpreter as tflite +import platform + +EDGETPU_SHARED_LIB = { + 'Linux': 'libedgetpu.so.1', + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll' +}[platform.system()] + + +def load_labels(path, encoding='utf-8'): + with open(path, 'r', encoding=encoding) as f: + lines = f.readlines() + if not lines: + return {} + + if lines[0].split(' ', maxsplit=1)[0].isdigit(): + pairs = [line.split(' ', maxsplit=1) for line in lines] + return {int(index): label.strip() for index, label in pairs} + else: + return {index: line.strip() for index, line in enumerate(lines)} + + +def make_interpreter(model_file): + model_file, *device = model_file.split('@') + return tflite.Interpreter( + model_path=model_file, + experimental_delegates=[ + tflite.load_delegate(EDGETPU_SHARED_LIB, + {'device': device[0]} if device else {}) + ]) + + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument( + '-m', '--model', required=True, help='File path of .tflite file.') + parser.add_argument( + '-l', '--labels', help='File path of labels file.') + parser.add_argument( + '-k', '--top_k', type=int, default=1, + help='Max number of classification results') + parser.add_argument( + '-t', '--threshold', type=float, default=0.0, + help='Classification score threshold') + args = parser.parse_args() + + labels = load_labels(args.labels) if args.labels else {} + + model_load_time = time.time() + interpreter = make_interpreter(args.model) + interpreter.allocate_tensors() + model_load_time = time.time() - model_load_time + + dataset_load_time=time.time() + image = Image.open('./dataset/imagenet/imagenet_1000_raw/n02782093_1.JPEG') + dataset_load_time = time.time() - dataset_load_time + + + size = classify.input_size(interpreter) + image = image.convert('RGB').resize(size, Image.ANTIALIAS) + classify.set_input(interpreter, image) + start = time.perf_counter() + interpreter.invoke() + iter_times = time.perf_counter() - start + classes = classify.get_output(interpreter, args.top_k, args.threshold) + for klass in classes: + accuracy = klass.score + + + print('***** TF-lite matric *****') + print('accuracy =', accuracy) + print('model_load_time =', model_load_time) + print('dataset_load_time =', dataset_load_time) + print('inference_time =', iter_times) + print('IPS =', 1 / (model_load_time + dataset_load_time + iter_times)) + print('IPS(inf) =', 1 / iter_times) + + +if __name__ == '__main__': + main() diff --git a/CNN/dataset/dataset_download.sh b/CNN/dataset/dataset_download.sh index 2105943..7b6b6e3 100644 --- a/CNN/dataset/dataset_download.sh +++ b/CNN/dataset/dataset_download.sh @@ -18,7 +18,7 @@ mkdir coco_2017 curl -O https://edge-inference.s3.us-west-2.amazonaws.com/CNN/dataset/coco_2017/val_dataset.py python3 val_dataset.py mv val_dataset.py ./coco_2017 -unzip -q coco2017val.zip -d ../model/yolo_v5/datasets && rm coco2017val.zip +unzip -q coco2017val.zip -d ./ && rm coco2017val.zip #object detection video dataset diff --git a/CNN/docker_build_tf_serving.sh b/CNN/docker_build_tf_serving.sh new file mode 100644 index 0000000..82955f6 --- /dev/null +++ b/CNN/docker_build_tf_serving.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +docker build -t edge-tf-serving . \ No newline at end of file diff --git a/CNN/docker_run_tf_serving.sh b/CNN/docker_run_tf_serving.sh new file mode 100644 index 0000000..62f5bed --- /dev/null +++ b/CNN/docker_run_tf_serving.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +docker run --rm \ + --device /dev/nvhost-ctrl \ + --device /dev/nvhost-ctrl-gpu \ + --device /dev/nvhost-prof-gpu \ + --device /dev/nvmap \ + --device /dev/nvhost-gpu \ + --device /dev/nvhost-as-gpu \ + -p 8500:8500 \ + -v ~/edge-inference/CNN/model/:/models/model/ \ + edge-tf-serving:latest \ No newline at end of file diff --git a/CNN/edge_inference_request_scheduler.py b/CNN/edge_inference_request_scheduler.py new file mode 100644 index 0000000..4bbb62d --- /dev/null +++ b/CNN/edge_inference_request_scheduler.py @@ -0,0 +1,217 @@ +import argparse +import requests +import roundrobin +from numpy import random +import time +from threading import Thread + +parser = argparse.ArgumentParser() +parser.add_argument('--edge', default=None, type=str) + +######### 임시 코드 ################### +parser.add_argument('--reqs', default='mobilenet,10', type=str) +parser.add_argument('--random', action='store_true') + +temp_args = parser.parse_args() + +inference_requests = temp_args.reqs.split(',') +inference_random_flag = temp_args.random +#################################### + +args = parser.parse_args() + +edges_to_inference = args.edge + + +# 이 부분만 설정하면 모델추가나 장비추가가 수월함. 각 장비의 ip와 로드된 모델들을 설정해주어야함. +edges_info = {'nvidia-xavier2': {'ip_addr': '192.168.0.32', + 'ports': [5001, 5002], + 'models': ['mobilenet', 'mobilenet_v2', 'inception_v3', 'yolo_v5'] + }, + 'nvidia-tx2': {'ip_addr': '192.168.0.22', + 'ports': [5001], + 'models': ['mobilenet', 'mobilenet_v2', 'inception_v3', 'yolo_v5'] + }, + 'nvidia-nano1': {'ip_addr': '192.168.0.41', + 'ports': [5001], + 'models': ['mobilenet'] + } + } + + +# --edge 옵션이 없을 시 등록되어 있는 모든 장비들에 추론 요청, 요청장비들은 edges_info에 등록되어 있어야함. 입력 형식은 'a, b, ...' +edges_register = list(edges_info.keys()) + +if edges_to_inference is None: + edges_to_inference = edges_register +else: + edges_to_inference = edges_to_inference.split(',') + +for edge in edges_to_inference: + if edge not in edges_register: + print(f'--edge arg must be in {edges_register}') + exit(1) + +print(f'Edges to inference: {edges_to_inference}') + + +# 추론 요청 할 장비들에서 요청 가능한 모델들 +models_to_inference = [] + +for edge_name in edges_to_inference: + edge_info = edges_info.get(edge_name) + models = edge_info.get('models') + models_to_inference.extend(models) + +models_to_inference = set(models_to_inference) + +print(f'Models to inference: {models_to_inference}') + + +# 현재 스케줄링 방식: 딕셔너리에 모델별로 엣지장비이름 등록, 들어오는 요청에 따라 각 장비들에 라운드로빈으로 스케줄링 +# 문제점 각 모델이 하나씩 들어오면 장비들 중 하나에만 요청이 들어감 -> 고민 +model_edge_info = {} + +for edge in edges_to_inference: + edge_info = edges_info.get(edge) + for model in edge_info.get('models'): + if model not in model_edge_info.keys(): + model_edge_info[model] = [] + + model_edge_info[model].append((edge, 1)) + +print(f'model-edge dataset: {model_edge_info}') + +for model in model_edge_info.keys(): + dataset = model_edge_info.get(model) + model_edge_info[model] = roundrobin.smooth(dataset) + + +# 각 엣지 장비별 포트들을 라운드 로빈으로 얻어올 수 있도록 딕셔너리 구성 +edge_port_info = {} + +for edge in edges_to_inference: + if edge not in edge_port_info.keys(): + edge_port_info[edge] = [] + + edge_info = edges_info.get(edge) + for port in edge_info.get('ports'): + edge_port_info[edge].append((port, 1)) + +print(f'edge-port dataset: {edge_port_info}') + +for edge in edge_port_info.keys(): + dataset = edge_port_info.get(edge) + edge_port_info[edge] = roundrobin.smooth(dataset) + + +# 모델로 엣지장비 이름 얻는 함수, 모델마다 엣지장비들의 정보가 담겨 있고 얻어올 때는 라운드로빈으로 순서대로 가져옴 +def get_edge_by_model_rr(model): + if model in model_edge_info.keys(): + return model_edge_info.get(model)() + else: + return None + + +# 엣지장비 이름으로 해당 포트번호를 얻는 함수, 엣지장비마다 포트 번호 정보가 담겨 있고 라운드로빈으로 순서대로 가져옴 +def get_port_by_edge_rr(edge): + if edge in edge_port_info.keys(): + return edge_port_info.get(edge)() + else: + return None + + +# 추론을 요청하는 함수, 인자로는 추론을 요청할 엣지 장비, 모델, 요청임. 엣지장비와 모델은 위의 edges_info에 등록되어 있어야함 +def model_request(edge, model, order): + if edge not in edges_to_inference: + print(f'[{order}] edge must be in {edges_to_inference}/ input value: {edge}') + return + + if model not in models_to_inference: + print(f'[{order}] model must be in {models_to_inference}/ input value: {model}') + return + + edge_info = edges_info.get(edge) + + edge_ip_addr = edge_info.get('ip_addr') + port = get_port_by_edge_rr(edge) + url = f'http://{edge_ip_addr}:{port}/{model}' + + req_processing_start_time = time.time() + res = requests.get(url) + processing_time = time.time() - req_processing_start_time + + inference_time = res.text.split(':')[1] + inference_time = inference_time.split('\n')[0] + inference_time_results[order-1] = float(inference_time) + request_time_results[order-1] = float(processing_time) + + print(f'[{order}:{edge}({port})/{model}] total request time: {processing_time}\n{res.text}') + return + + +### 들어오는 요청들 임시 코드임!!! ### +requests_list = [] +for idx in range(0, len(inference_requests), 2): + model = inference_requests[idx] + inference_num = int(inference_requests[idx+1]) + + for _ in range(inference_num): + requests_list.append(model) + +if inference_random_flag: + random.shuffle(requests_list) +############################## + + +# 요청을 각 장비에 전달, 여러요청을 동시에 다룰 수 있도록 쓰레드 이용 +threads = [] +order = 0 + +inference_time_results = [0 for _ in range(len(requests_list))] +request_time_results = [0 for _ in range(len(requests_list))] +request_sleep_time = 1 / len(requests_list) # 요청들을 1초에 나눠서 보내기 위한 슬립시간 + +for req in requests_list: + edge_to_inference = get_edge_by_model_rr(req) + if edge_to_inference is None: + print(f'{req} can\'t be inference') + continue + + order += 1 + th = Thread(target=model_request, args=(edge_to_inference, req, order)) + th.start() + threads.append(th) + time.sleep(request_sleep_time) + +for th in threads: + th.join() + + +# 추론요청 결과 출력 (최소, 중간, 최대, 평균) +inference_time_results.sort() +len_inference_time_results = len(inference_time_results) +request_time_results.sort() +len_request_time_results = len(request_time_results) + +total_inference_time = sum(inference_time_results) +avg_inference_time = total_inference_time / len_inference_time_results +min_inference_time = inference_time_results[0] +mid_inference_time = inference_time_results[int(len_inference_time_results / 2)] +max_inference_time = inference_time_results[-1] + +total_request_time = sum(request_time_results) +avg_request_time = total_request_time / len_request_time_results +min_request_time = request_time_results[0] +mid_request_time = request_time_results[int(len_request_time_results / 2)] +max_request_time = request_time_results[-1] + +print(f'평균 추론 시간: {avg_inference_time}') +print(f'최소 추론 시간: {min_inference_time}') +print(f'중간 추론 시간: {mid_inference_time}') +print(f'최대 추론 시간: {max_inference_time}\n') + +print(f'평균 응답 시간: {avg_request_time}') +print(f'최소 응답 시간: {min_request_time}') +print(f'중간 응답 시간: {mid_request_time}') +print(f'최대 응답 시간: {max_request_time}\n') diff --git a/CNN/edge_inference_request_server.py b/CNN/edge_inference_request_server.py new file mode 100644 index 0000000..2b1125c --- /dev/null +++ b/CNN/edge_inference_request_server.py @@ -0,0 +1,174 @@ +import argparse + +from model.yolov5.models.common import DetectMultiBackend +from model.yolov5.utils.dataloaders import LoadImages +from model.yolov5.utils.general import (Profile, check_img_size) +import torch +from flask import Flask +import tensorflow as tf +import numpy as np +import shutil +import os +import time +from tensorflow.keras.applications import ( + mobilenet, + mobilenet_v2, + inception_v3 +) + + +gpus = tf.config.experimental.list_physical_devices('GPU') +if gpus: + tf.config.experimental.set_virtual_device_configuration( + gpus[0], + [tf.config.experimental.VirtualDeviceConfiguration( + memory_limit=0.6 * 1024)]) + + +parser = argparse.ArgumentParser() +parser.add_argument('--model', default='mobilenet,mobilenet_v2,inception_v3,yolo_v5', type=str) +parser.add_argument('--hostname', default='0.0.0.0', type=str) +parser.add_argument('--port', default=5001, type=int) +args = parser.parse_args() +models_to_load = args.model.split(',') +hostname = args.hostname +port = args.port + + +# mobilenet, mobilenet v2, inception v3 +models = { + 'mobilenet': mobilenet, + 'mobilenet_v2': mobilenet_v2, + 'inception_v3': inception_v3 +} + +models_detail = { + 'mobilenet': mobilenet.MobileNet(weights='imagenet'), + 'mobilenet_v2': mobilenet_v2.MobileNetV2(weights='imagenet'), + 'inception_v3': inception_v3.InceptionV3(weights='imagenet') +} + + +def preprocess_image(image_path, model, target_size): + test_image = tf.keras.preprocessing.image.load_img(image_path, target_size=target_size) + test_image_array = tf.keras.preprocessing.image.img_to_array(test_image, dtype=np.int32) + return model.preprocess_input(test_image_array[tf.newaxis, ...]) + + +def save_model(model, saved_model_dir): + model = models_detail[model] + shutil.rmtree(saved_model_dir, ignore_errors=True) + model.save(saved_model_dir, include_optimizer=False, save_format='tf') + +print('\npreprossing images...') + +mobilenetv1_image_path = './dataset/imagenet/imagenet_1000_raw/n02782093_1.JPEG' +mobilenetv2_image_path = './dataset/imagenet/imagenet_1000_raw/n04404412_1.JPEG' +inceptionv3_image_path = './dataset/imagenet/imagenet_1000_raw/n13040303_1.JPEG' + +mobilenetv1_test_image_preprocessed = preprocess_image(mobilenetv1_image_path, mobilenet, [224, 224]) +mobilenetv2_test_image_preprocessed = preprocess_image(mobilenetv2_image_path, mobilenet_v2, [224, 224]) +inceptionv3_test_image_preprocessed = preprocess_image(inceptionv3_image_path, inception_v3, [299, 299]) + +print('image preprocessing completed!\n') + +print('\nsaving and loading models...') + +loaded_models = {} + +for model_name in models_to_load: + model_names = models_detail.keys() + if model_name in model_names: + model_path = f'{model_name}_saved_model' + if os.path.isdir(model_path) == False: + print('\nmodel save!\n') + save_model(model_name, model_path) + loaded_models[model_name] = tf.keras.models.load_model(model_path) + else: + continue + +print('saving and loading models completed!\n') + + +# Yolo v5 +if 'yolo_v5' in models_to_load: + yolov5_image_path = './dataset/imagenet/imagenet_1000_raw/n02782093_1.JPEG' + weights = './model/yolov5/yolov5s_saved_model' + data = './model/yolov5/coco.yaml' # dataset.yaml path + imgsz = (640, 640) # inference size (height, width) + conf_thres = 0.25 # confidence threshold + iou_thres = 0.45 # NMS IOU threshold + max_det = 1000 # maximum detections per image + vid_stride = 1, # video frame-rate stride + + yolo_model = DetectMultiBackend(weights, data=data) + stride, names, pt = yolo_model.stride, yolo_model.names, yolo_model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Dataloader + bs = 1 # batch_size + yolov5_dataset = LoadImages(yolov5_image_path, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) + + yolo_model.warmup(imgsz=(1 if pt or yolo_model.triton else bs, 3, *imgsz)) # warmup + seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) + ims = [im for path, im, im0s, vid_cap, s in yolov5_dataset] + im = ims[0] + + im = torch.from_numpy(im).to(yolo_model.device) + im = im.half() if yolo_model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + + +app = Flask(__name__) + + +@app.route('/mobilenet') +def mobilenetv1(): + inference_start_time = time.time() + result = loaded_models['mobilenet'].predict(mobilenetv1_test_image_preprocessed) + inference_end_time = time.time() + + inference_time = inference_end_time - inference_start_time + + return f'mobilenetv1 inference success\ninference time:{inference_time}\n' + + +@app.route('/mobilenet_v2') +def mobilenetv2(): + inference_start_time = time.time() + result = loaded_models['mobilenet_v2'].predict(mobilenetv2_test_image_preprocessed) + inference_end_time = time.time() + + inference_time = inference_end_time - inference_start_time + + # print(result) + + return f'mobilenetv2 inference success\ninference time:{inference_time}\n' + + +@app.route('/inception_v3') +def inceptionv3(): + inference_start_time = time.time() + result = loaded_models['inception_v3'].predict(inceptionv3_test_image_preprocessed) + inference_end_time = time.time() + + inference_time = inference_end_time - inference_start_time + + # print(result) + + return f'inceptionv3 inference success\ninference time:{inference_time}\n' + + +@app.route('/yolo_v5') +def yolov5(): + inference_start_time = time.time() + result = yolo_model(im) + inference_end_time = time.time() + inference_time = inference_end_time - inference_start_time + + return f'yolov5 inference success\ninference time:{inference_time}\n' + + +app.run(host=hostname, port=port, threaded=False) diff --git a/CNN/model/models.config b/CNN/model/models.config new file mode 100644 index 0000000..8453ba4 --- /dev/null +++ b/CNN/model/models.config @@ -0,0 +1,27 @@ +# models.config + +model_config_list { + config { + name : "mobilenet_v1" + base_path: "/models/model/mobilenet_v1/" + model_platform: "tensorflow" + } + + config { + name: "mobilenet_v2" + base_path: "/models/model/mobilenet_v2/" + model_platform: "tensorflow" + } + + config { + name: "inception_v3" + base_path: "/models/model/inception_v3/" + model_platform: "tensorflow" + } + +# config { +# name: "yolo_v5" +# base_path: "models/model/yolo_v5/" +# model_platform: "tensorflow" +# } +} \ No newline at end of file diff --git a/CNN/object_detect_raw_image_classification_inference.py b/CNN/object_detect_raw_image_classification_inference.py new file mode 100644 index 0000000..88b128b --- /dev/null +++ b/CNN/object_detect_raw_image_classification_inference.py @@ -0,0 +1,107 @@ +import argparse +import os +import platform +import sys +from pathlib import Path + +import torch + +FILE = Path(__file__).resolve() +ROOT = FILE.parents[0] # YOLOv5 root directory +if str(ROOT) not in sys.path: + sys.path.append(str(ROOT)) # add ROOT to PATH +ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative + +from model.yolov5.models.common import DetectMultiBackend +from model.yolov5.utils.dataloaders import IMG_FORMATS, VID_FORMATS, LoadImages, LoadScreenshots, LoadStreams +from model.yolov5.utils.general import (LOGGER, Profile, check_file, check_img_size, check_imshow, check_requirements, colorstr, cv2, + increment_path, non_max_suppression, print_args, scale_boxes, strip_optimizer, xyxy2xywh) +from model.yolov5.utils.plots import Annotator, colors, save_one_box +from model.yolov5.utils.torch_utils import select_device, smart_inference_mode + + +@smart_inference_mode() +def run( + weights=ROOT / 'yolov5s.pt', # model path or triton URL + source=ROOT / 'data/images', # file/dir/URL/glob/screen/0(webcam) + data=ROOT / 'data/coco128.yaml', # dataset.yaml path + imgsz=(640, 640), # inference size (height, width) + conf_thres=0.25, # confidence threshold + iou_thres=0.45, # NMS IOU threshold + max_det=1000, # maximum detections per image + device='', # cuda device, i.e. 0 or 0,1,2,3 or cpu + view_img=False, # show results + vid_stride=1, # video frame-rate stride +): + source = str(source) + + # Load model + # device = select_device(device) + model = DetectMultiBackend(weights, data=data) + stride, names, pt = model.stride, model.names, model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size + + # Dataloader + bs = 1 # batch_size + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride) + vid_path, vid_writer = [None] * bs, [None] * bs + + # Run inference + model.warmup(imgsz=(1 if pt or model.triton else bs, 3, *imgsz)) # warmup + seen, windows, dt = 0, [], (Profile(), Profile(), Profile()) + for path, im, im0s, vid_cap, s in dataset: + with dt[0]: + im = torch.from_numpy(im).to(model.device) + im = im.half() if model.fp16 else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim + + # Inference + with dt[1]: + pred = model(im) + + # NMS + with dt[2]: + pred = non_max_suppression(pred, conf_thres, iou_thres, max_det=max_det) + + + # Process predictions + for i, det in enumerate(pred): # per image + seen += 1 + p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) + p = Path(p) # to Path + + # Print time (inference-only) + LOGGER.info(f"{s}{'' if len(det) else '(no detections), '}{dt[1].dt * 1E3:.1f}ms") + + # Print results + t = tuple(x.t / seen * 1E3 for x in dt) # speeds per image + LOGGER.info(f'Speed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {(1, 3, *imgsz)}' % t) + + +def parse_opt(): + parser = argparse.ArgumentParser() + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path or triton URL') + parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob/screen/0(webcam)') + parser.add_argument('--data', type=str, default=ROOT / 'data/coco128.yaml', help='(optional) dataset.yaml path') + parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') + parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') + parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold') + parser.add_argument('--max-det', type=int, default=1000, help='maximum detections per image') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument('--view-img', action='store_true', help='show results') + parser.add_argument('--vid-stride', type=int, default=1, help='video frame-rate stride') + opt = parser.parse_args() + opt.imgsz *= 2 if len(opt.imgsz) == 1 else 1 # expand + print_args(vars(opt)) + return opt + + +def main(opt): + run(**vars(opt)) + + +if __name__ == "__main__": + opt = parse_opt() + main(opt) diff --git a/CNN/roundrobin_poisson_inference_request.py b/CNN/roundrobin_poisson_inference_request.py index 0024789..1f2abb4 100644 --- a/CNN/roundrobin_poisson_inference_request.py +++ b/CNN/roundrobin_poisson_inference_request.py @@ -98,9 +98,13 @@ def ModelRequest(model, data): res = requests.post(url, data, headers) response = json.loads(res.text)['predictions'] - end_time = time.time() - inference_start + inference_end = time.time() + end_time = inference_end - inference_start + + key = model + " " + str(inference_start) + " " + str(inference_end) + + print(key, end_time) - key = model + " " + str(inference_start) time_tracking[key] = end_time return response @@ -109,17 +113,17 @@ def ModelRequest(model, data): get_weighted_smooth = roundrobin.smooth(models) model_sequence = [get_weighted_smooth() for _ in range(MAX)] RoundPerEvent = 10 -TotalEvent = 10 +TotalEvent = 1 poisson_distribution = random.poisson(RoundPerEvent, TotalEvent) +sleep_val = 1 if __name__ == "__main__": request_start = time.time() threads = [] for events in poisson_distribution: - print('request', events) event_start = time.time() - + print('request', events) for model_idx in range(events): model = model_sequence[model_idx % len(model_sequence)] th = Thread(target=ModelRequest, args=(model, datas[model])) @@ -130,13 +134,12 @@ def ModelRequest(model, data): # ModelRequest(model, datas[model]) while True: - if time.time() - event_start >=1: + if time.time() - event_start >= sleep_val: break for thread in threads: thread.join() request_end = time.time() - request_start - print("Return value:", poisson_distribution) print("Length of return value:", len(poisson_distribution)) print("total request time", request_end) diff --git a/CNN/run_tf_serving_container.sh b/CNN/run_tf_serving_container.sh new file mode 100644 index 0000000..c891448 --- /dev/null +++ b/CNN/run_tf_serving_container.sh @@ -0,0 +1,28 @@ +#!/bin/bash + +is_dockerfile=$(ls Dockerfile) + +if [ -z $is_dockerfile ] +then + echo "Dockerfile not exist!" + exit +fi + +image_name="edge-tf-serving" +image_id=$(docker images -aq $image_name) + +if [ -z "$image_id" ] +then + docker build -t edge-tf-serving . +fi + +docker run --rm \ + --device /dev/nvhost-ctrl \ + --device /dev/nvhost-ctrl-gpu \ + --device /dev/nvhost-prof-gpu \ + --device /dev/nvmap \ + --device /dev/nvhost-gpu \ + --device /dev/nvhost-as-gpu \ + -p 8500:8500 \ + -v ~/edge-inference/CNN/model/:/models/model/ \ + edge-tf-serving:latest \ No newline at end of file