Skip to content
This repository was archived by the owner on Aug 29, 2025. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/emd/models/llms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
deepseek,
baichuan,
jina,
kimi,
txgemma,
medgemma
)
46 changes: 46 additions & 0 deletions src/emd/models/llms/kimi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from .. import Model
from ..engines import ktransformers_engine
from ..services import (
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
)
from ..frameworks import fastapi_framework
from ..instances import (
g6e24xlarge_instance,
g6e48xlarge_instance,
local_instance
)
from emd.models.utils.constants import ModelType
from ..model_series import KIMI_SERIES

Model.register(
dict(
model_id="Kimi-K2-Instruct-Q4_K_M-GGUF",
supported_engines=[ktransformers_engine],
supported_instances=[
g6e24xlarge_instance, # 4 GPUs, 96 vCPU, 768GB RAM - Minimum viable
g6e48xlarge_instance, # 8 GPUs, 192 vCPU, 1536GB RAM - Optimal
local_instance # Local deployment (600GB+ RAM required)
],
supported_services=[
sagemaker_service,
sagemaker_async_service,
ecs_service,
local_service
],
supported_frameworks=[
fastapi_framework
],
allow_china_region=True,
huggingface_model_id="KVCache-ai/Kimi-K2-Instruct-GGUF",
huggingface_model_download_kwargs=dict(allow_patterns=["*Q4_K_M*"]),
require_huggingface_token=False,
application_scenario="Agentic AI, tool use, reasoning, coding, autonomous problem-solving",
description="Kimi K2 1T parameter MoE model with 32B activated parameters in GGUF Q4_K_M format. Optimized for KTransformers deployment with 600GB+ RAM requirement. Achieves 10-14 TPS performance.",
model_type=ModelType.LLM,
model_series=KIMI_SERIES,
need_prepare_model=False
)
)
6 changes: 6 additions & 0 deletions src/emd/models/model_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,9 @@
description="Baichuan Intelligent Technology.",
reference_link="https://github.com/baichuan-inc"
)

KIMI_SERIES = ModelSeries(
model_series_name=ModelSeriesType.KIMI,
description="Kimi K2 is a state-of-the-art mixture-of-experts (MoE) language model with 32 billion activated parameters and 1 trillion total parameters. Trained with the Muon optimizer, Kimi K2 achieves exceptional performance across frontier knowledge, reasoning, and coding tasks while being meticulously optimized for agentic capabilities.",
reference_link="https://github.com/moonshotai/Kimi-K2"
)
1 change: 1 addition & 0 deletions src/emd/models/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,3 +234,4 @@ class ModelSeriesType(ConstantBase):
DEEPSEEK_REASONING_MODEL = "deepseek reasoning model"
DEEPSEEK_v3 = "deepseek v3"
BAICHUAN = "baichuan"
KIMI = "kimi"
Loading