Merge branch 'dev' of github.com:m5stack/StackFlow into dev

dianjixz · dianjixz · commit ff9c730ef423 · 2025-04-02T19:21:40.000+08:00
diff --git a/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-Int4-ax630c.json b/projects/llm_framework/main_llm/models/mode_qwen2.5-0.5B-Int4-ax630c.json
@@ -0,0 +1,35 @@
+{
+    "mode":"qwen2.5-0.5B-Int4-ax630c",
+    "type":"llm",
+    "homepage":"https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct-GPTQ-Int4",
+    "capabilities":[
+        "text_generation",
+        "chat"
+    ],
+    "input_type":[
+        "llm.utf-8",
+        "llm.utf-8.stream",
+        "llm.chat_completion",
+        "llm.chat_completion.stream"
+    ],
+    "output_type":[
+        "llm.utf-8",
+        "llm.utf-8.stream"
+    ],
+    "mode_param":{
+        "tokenizer_type":2,
+        "filename_tokenizer_model":"http://localhost:8080",
+        "filename_tokens_embed":"model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel":"qwen2_post.axmodel",
+        "template_filename_axmodel":"qwen2_p128_l%d_together.axmodel",
+        "b_use_topk":false,
+        "b_bos":false,
+        "b_eos":false,
+        "axmodel_num":24,
+        "tokens_embed_num":151936,
+        "tokens_embed_size":896,
+        "b_use_mmap_load_embed":true,
+        "b_dynamic_load_axmodel_layer":false,
+        "ext_scripts":["tokenizer_qwen2.5-0.5B-Int4-ax630c.py"]
+    }
+}
diff --git a/projects/llm_framework/main_llm/models/mode_qwen2.5-1.5B-Int4-ax630c.json b/projects/llm_framework/main_llm/models/mode_qwen2.5-1.5B-Int4-ax630c.json
@@ -0,0 +1,35 @@
+{
+    "mode":"qwen2.5-1.5B-Int4-ax630c",
+    "type":"llm",
+    "homepage":"https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GPTQ-Int4",
+    "capabilities":[
+        "text_generation",
+        "chat"
+    ],
+    "input_type":[
+        "llm.utf-8",
+        "llm.utf-8.stream",
+        "llm.chat_completion",
+        "llm.chat_completion.stream"
+    ],
+    "output_type":[
+        "llm.utf-8",
+        "llm.utf-8.stream"
+    ],
+    "mode_param":{
+        "tokenizer_type":2,
+        "filename_tokenizer_model":"http://localhost:8080",
+        "filename_tokens_embed":"model.embed_tokens.weight.bfloat16.bin",
+        "filename_post_axmodel":"qwen2_post.axmodel",
+        "template_filename_axmodel":"qwen2_p128_l%d_together.axmodel",
+        "b_use_topk":false,
+        "b_bos":false,
+        "b_eos":false,
+        "axmodel_num":28,
+        "tokens_embed_num":151936,
+        "tokens_embed_size":1536,
+        "b_use_mmap_load_embed":true,
+        "b_dynamic_load_axmodel_layer":false,
+        "ext_scripts":["tokenizer_qwen2.5-1.5B-Int4-ax630c.py"]
+    }
+}
diff --git a/projects/llm_framework/main_llm/scripts/tokenizer_qwen2.5-0.5B-Int4-ax630c.py b/projects/llm_framework/main_llm/scripts/tokenizer_qwen2.5-0.5B-Int4-ax630c.py
@@ -0,0 +1,131 @@
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import argparse
+
+class Tokenizer_Http():
+
+    def __init__(self, model_id):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+    def encode(self, prompt, content):
+        messages = [
+            {"role": "system", "content": content},
+            {"role": "user", "content": prompt}
+        ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        print(text)
+        token_ids = self.tokenizer.encode(text)
+        return token_ids
+
+    def decode(self, token_ids):
+        return self.tokenizer.decode(token_ids)
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.bos_token_id
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.eos_token_id
+    
+    @property
+    def bos_token(self):
+        return self.tokenizer.bos_token
+
+    @property
+    def eos_token(self):
+        return self.tokenizer.eos_token
+
+class Request(BaseHTTPRequestHandler):
+    #通过类继承，新定义类
+    timeout = 5
+    server_version = 'Apache'
+
+    def do_GET(self):
+        print(self.path)
+        #在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        self.send_response(200)
+        self.send_header("type", "get")  #设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == '/bos_id':
+            bos_id = tokenizer.bos_id
+            # print(bos_id)
+            # to json
+            if bos_id is None:
+                msg = json.dumps({'bos_id': -1})
+            else:
+                msg = json.dumps({'bos_id': bos_id})
+        elif self.path == '/eos_id':
+            eos_id = tokenizer.eos_id
+            if eos_id is None:
+                msg = json.dumps({'eos_id': -1})
+            else:
+                msg = json.dumps({'eos_id': eos_id})
+        else:
+            msg = 'error'
+
+        print(msg)
+        msg = str(msg).encode()  #转为str再转为byte格式
+
+        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+
+    def do_POST(self):
+        #在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(int(
+            self.headers['content-length']))  #获取从客户端传入的参数（byte格式）
+        data = data.decode()  #将byte格式转为str格式
+
+        self.send_response(200)
+        self.send_header("type", "post")  #设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == '/encode':
+            req = json.loads(data)
+            prompt = req['text']
+
+            token_ids = tokenizer.encode(prompt, args.content)
+            if token_ids is None:
+                msg = json.dumps({'token_ids': -1})
+            else:
+                msg = json.dumps({'token_ids': token_ids})
+
+        elif self.path == '/decode':
+            req = json.loads(data)
+            token_ids = req['token_ids']
+            text = tokenizer.decode(token_ids)
+            if text is None:
+                msg = json.dumps({'text': ""})
+            else:
+                msg = json.dumps({'text': text})
+        else:
+            msg = 'error'
+        print(msg)
+        msg = str(msg).encode()  #转为str再转为byte格式
+
+        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+
+
+if __name__ == "__main__":
+
+    args = argparse.ArgumentParser()
+    args.add_argument('--host', type=str, default='localhost')
+    args.add_argument('--port', type=int, default=8080)
+    args.add_argument('--model_id', type=str, default='qwen2.5_coder_tokenizer')
+    args.add_argument('--content', type=str, default='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.')
+    args = args.parse_args()
+
+    tokenizer = Tokenizer_Http(args.model_id)
+
+    # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
+    # print(tokenizer.encode("hello world", args.content))
+
+    host = (args.host, args.port)  #设定地址与端口号，'localhost'等价于'127.0.0.1'
+    print('http://%s:%s' % host)
+    server = HTTPServer(host, Request)  #根据地址端口号和新定义的类，创建服务器实例
+    server.serve_forever()  #开启服务
diff --git a/projects/llm_framework/main_llm/scripts/tokenizer_qwen2.5-1.5B-Int4-ax630c.py b/projects/llm_framework/main_llm/scripts/tokenizer_qwen2.5-1.5B-Int4-ax630c.py
@@ -0,0 +1,131 @@
+from transformers import AutoTokenizer, PreTrainedTokenizerFast
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import json
+import argparse
+
+class Tokenizer_Http():
+
+    def __init__(self, model_id):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id)
+
+    def encode(self, prompt, content):
+        messages = [
+            {"role": "system", "content": content},
+            {"role": "user", "content": prompt}
+        ]
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        print(text)
+        token_ids = self.tokenizer.encode(text)
+        return token_ids
+
+    def decode(self, token_ids):
+        return self.tokenizer.decode(token_ids)
+
+    @property
+    def bos_id(self):
+        return self.tokenizer.bos_token_id
+
+    @property
+    def eos_id(self):
+        return self.tokenizer.eos_token_id
+    
+    @property
+    def bos_token(self):
+        return self.tokenizer.bos_token
+
+    @property
+    def eos_token(self):
+        return self.tokenizer.eos_token
+
+class Request(BaseHTTPRequestHandler):
+    #通过类继承，新定义类
+    timeout = 5
+    server_version = 'Apache'
+
+    def do_GET(self):
+        print(self.path)
+        #在新类中定义get的内容（当客户端向该服务端使用get请求时，本服务端将如下运行）
+        self.send_response(200)
+        self.send_header("type", "get")  #设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == '/bos_id':
+            bos_id = tokenizer.bos_id
+            # print(bos_id)
+            # to json
+            if bos_id is None:
+                msg = json.dumps({'bos_id': -1})
+            else:
+                msg = json.dumps({'bos_id': bos_id})
+        elif self.path == '/eos_id':
+            eos_id = tokenizer.eos_id
+            if eos_id is None:
+                msg = json.dumps({'eos_id': -1})
+            else:
+                msg = json.dumps({'eos_id': eos_id})
+        else:
+            msg = 'error'
+
+        print(msg)
+        msg = str(msg).encode()  #转为str再转为byte格式
+
+        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+
+    def do_POST(self):
+        #在新类中定义post的内容（当客户端向该服务端使用post请求时，本服务端将如下运行）
+        data = self.rfile.read(int(
+            self.headers['content-length']))  #获取从客户端传入的参数（byte格式）
+        data = data.decode()  #将byte格式转为str格式
+
+        self.send_response(200)
+        self.send_header("type", "post")  #设置响应头，可省略或设置多个
+        self.end_headers()
+
+        if self.path == '/encode':
+            req = json.loads(data)
+            prompt = req['text']
+
+            token_ids = tokenizer.encode(prompt, args.content)
+            if token_ids is None:
+                msg = json.dumps({'token_ids': -1})
+            else:
+                msg = json.dumps({'token_ids': token_ids})
+
+        elif self.path == '/decode':
+            req = json.loads(data)
+            token_ids = req['token_ids']
+            text = tokenizer.decode(token_ids)
+            if text is None:
+                msg = json.dumps({'text': ""})
+            else:
+                msg = json.dumps({'text': text})
+        else:
+            msg = 'error'
+        print(msg)
+        msg = str(msg).encode()  #转为str再转为byte格式
+
+        self.wfile.write(msg)  #将byte格式的信息返回给客户端
+
+
+if __name__ == "__main__":
+
+    args = argparse.ArgumentParser()
+    args.add_argument('--host', type=str, default='localhost')
+    args.add_argument('--port', type=int, default=8080)
+    args.add_argument('--model_id', type=str, default='qwen2.5_coder_tokenizer')
+    args.add_argument('--content', type=str, default='You are Qwen, created by Alibaba Cloud. You are a helpful assistant.')
+    args = args.parse_args()
+
+    tokenizer = Tokenizer_Http(args.model_id)
+
+    # print(tokenizer.bos_id, tokenizer.bos_token, tokenizer.eos_id, tokenizer.eos_token)
+    # print(tokenizer.encode("hello world", args.content))
+
+    host = (args.host, args.port)  #设定地址与端口号，'localhost'等价于'127.0.0.1'
+    print('http://%s:%s' % host)
+    server = HTTPServer(host, Request)  #根据地址端口号和新定义的类，创建服务器实例
+    server.serve_forever()  #开启服务
diff --git a/projects/llm_framework/main_whisper/mode_whisper-base.json b/projects/llm_framework/main_whisper/mode_whisper-base.json
@@ -1,6 +1,7 @@
 {
     "mode": "whisper-base",
     "type": "asr",
+    "homepage":"https://huggingface.co/openai/whisper-base",
     "capabilities": [
         "Automatic_Speech_Recognition",
         "English",
diff --git a/projects/llm_framework/main_whisper/mode_whisper-tiny.json b/projects/llm_framework/main_whisper/mode_whisper-tiny.json
@@ -1,6 +1,7 @@
 {
     "mode": "whisper-tiny",
     "type": "asr",
+    "homepage":"https://huggingface.co/openai/whisper-tiny",
     "capabilities": [
         "Automatic_Speech_Recognition",
         "English",
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n-hand-pose.json b/projects/llm_framework/main_yolo/mode_yolo11n-hand-pose.json
@@ -1,6 +1,7 @@
 {
     "mode":"yolo11n-hand-pose",
     "type":"cv",
+    "homepage":"https://github.com/ultralytics/ultralytics",
     "capabilities":[
         "Pose"
     ],
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n-pose.json b/projects/llm_framework/main_yolo/mode_yolo11n-pose.json
@@ -1,6 +1,7 @@
 {
     "mode":"yolo11n-pose",
     "type":"cv",
+    "homepage":"https://github.com/ultralytics/ultralytics",
     "capabilities":[
         "Pose"
     ],
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n-seg.json b/projects/llm_framework/main_yolo/mode_yolo11n-seg.json
@@ -1,6 +1,7 @@
 {
     "mode":"yolo11s-seg",
     "type":"cv",
+    "homepage":"https://github.com/ultralytics/ultralytics",
     "capabilities":[
         "Segmentation"
     ],
diff --git a/projects/llm_framework/main_yolo/mode_yolo11n.json b/projects/llm_framework/main_yolo/mode_yolo11n.json
@@ -1,6 +1,7 @@
 {
     "mode":"yolo11n",
     "type":"cv",
+    "homepage":"https://github.com/ultralytics/ultralytics",
     "capabilities":[
         "Detection"
     ],
diff --git a/projects/llm_framework/tools/llm_pack.py b/projects/llm_framework/tools/llm_pack.py
@@ -345,8 +345,10 @@ def create_bin_deb(package_name, version, src_folder, revision = 'm5stack1'):
         'llm-model-silero-vad':[create_data_deb,'llm-model-silero-vad', '0.3', src_folder, revision],
         'llm-model-qwen2.5-0.5B-prefill-20e':[create_data_deb,'llm-model-qwen2.5-0.5B-prefill-20e', data_version, src_folder, revision],
         'llm-model-qwen2.5-0.5B-p256-ax630c':[create_data_deb,'llm-model-qwen2.5-0.5B-p256-ax630c', '0.4', src_folder, revision],
+        'llm-model-qwen2.5-0.5B-Int4-ax630c':[create_data_deb,'llm-model-qwen2.5-0.5B-Int4-ax630c', '0.4', src_folder, revision],
         'llm-model-qwen2.5-1.5B-ax630c':[create_data_deb,'llm-model-qwen2.5-1.5B-ax630c', '0.3', src_folder, revision],
         'llm-model-qwen2.5-1.5B-p256-ax630c':[create_data_deb,'llm-model-qwen2.5-1.5B-p256-ax630c', '0.4', src_folder, revision],
+        'llm-model-qwen2.5-1.5B-Int4-ax630c':[create_data_deb,'llm-model-qwen2.5-1.5B-Int4-ax630c', '0.4', src_folder, revision],
         'llm-model-qwen2.5-coder-0.5B-ax630c':[create_data_deb,'llm-model-qwen2.5-coder-0.5B-ax630c', data_version, src_folder, revision],
         'llm-model-llama3.2-1B-prefill-ax630c':[create_data_deb,'llm-model-llama3.2-1B-prefill-ax630c', data_version, src_folder, revision],
         'llm-model-llama3.2-1B-p256-ax630c':[create_data_deb,'llm-model-llama3.2-1B-p256-ax630c', '0.4', src_folder, revision],

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"mode": "whisper-base",`
`3`	`3`	`"type": "asr",`
	`4`	`+ "homepage":"https://huggingface.co/openai/whisper-base",`
`4`	`5`	`"capabilities": [`
`5`	`6`	`"Automatic_Speech_Recognition",`
`6`	`7`	`"English",`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"mode": "whisper-tiny",`
`3`	`3`	`"type": "asr",`
	`4`	`+ "homepage":"https://huggingface.co/openai/whisper-tiny",`
`4`	`5`	`"capabilities": [`
`5`	`6`	`"Automatic_Speech_Recognition",`
`6`	`7`	`"English",`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"mode":"yolo11n-hand-pose",`
`3`	`3`	`"type":"cv",`
	`4`	`+ "homepage":"https://github.com/ultralytics/ultralytics",`
`4`	`5`	`"capabilities":[`
`5`	`6`	`"Pose"`
`6`	`7`	`],`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"mode":"yolo11n-pose",`
`3`	`3`	`"type":"cv",`
	`4`	`+ "homepage":"https://github.com/ultralytics/ultralytics",`
`4`	`5`	`"capabilities":[`
`5`	`6`	`"Pose"`
`6`	`7`	`],`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"mode":"yolo11s-seg",`
`3`	`3`	`"type":"cv",`
	`4`	`+ "homepage":"https://github.com/ultralytics/ultralytics",`
`4`	`5`	`"capabilities":[`
`5`	`6`	`"Segmentation"`
`6`	`7`	`],`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`{`
`2`	`2`	`"mode":"yolo11n",`
`3`	`3`	`"type":"cv",`
	`4`	`+ "homepage":"https://github.com/ultralytics/ultralytics",`
`4`	`5`	`"capabilities":[`
`5`	`6`	`"Detection"`
`6`	`7`	`],`