Team-SEBAF · YUDINDIN1005 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026 · Feb 24, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,6 +25,8 @@ jobs:
           python -m pip install -e .
           python -m pip install pytest ruff
           python -m pip install pytesseract
+          python -m pip install openai-whisper
+          python -m pip install ffmpeg-python
 
       - name: Lint (ruff)
         run: ruff check src tests

diff --git a/src/ansimon_ai/stt/__init__.py b/src/ansimon_ai/stt/__init__.py
@@ -1,10 +1,12 @@
 from .types import STTResult, STTSegment
 from .base import STTEngine
 from .mock import MockSTT
+from .whisper_stt import WhisperSTT
 
 __all__ = [
     "STTResult",
     "STTSegment",
     "STTEngine",
     "MockSTT",
+    "WhisperSTT",
 ]
diff --git a/src/ansimon_ai/stt/whisper_stt.py b/src/ansimon_ai/stt/whisper_stt.py
@@ -0,0 +1,25 @@
+from .base import STTEngine
+from .types import STTResult, STTSegment
+import whisper
+
+class WhisperSTT(STTEngine):
+    def __init__(self, model_size: str = "base"):
+        self.model = whisper.load_model(model_size)
+        self.engine_name = f"whisper-{model_size}"
+
+    def transcribe(self, audio_path: str) -> STTResult:
+        result = self.model.transcribe(audio_path, language="ko")
+        segments = [
+            STTSegment(
+                start=seg["start"],
+                end=seg["end"],
+                text=seg["text"]
+            )
+            for seg in result["segments"]
+        ]
+        return STTResult(
+            full_text=result["text"],
+            segments=segments,
+            language=result.get("language", "ko"),
+            engine=self.engine_name
+        )
diff --git a/tests/stt/test_whisper_stt_tags.py b/tests/stt/test_whisper_stt_tags.py
@@ -0,0 +1,20 @@
+import os
+import pytest
+from ansimon_ai.stt import WhisperSTT
+from ansimon_ai.structuring.tag_patterns import extract_tags_from_structuring_input
+
+SAMPLE_AUDIO = os.environ.get("WHISPER_TEST_AUDIO", "D:/Project/AnsimOn/call_sample2.m4a")
+
+@pytest.mark.skipif(not os.path.exists(SAMPLE_AUDIO), reason="샘플 오디오 파일이 존재하지 않음")
+def test_whisper_stt_and_tag_extraction():
+    stt = WhisperSTT()
+    stt_result = stt.transcribe(SAMPLE_AUDIO)
+    assert stt_result.full_text, "전체 텍스트가 비어 있지 않아야 함"
+    assert stt_result.segments, "segment가 1개 이상 있어야 함"
+    tags = extract_tags_from_structuring_input(stt_result)
+    assert isinstance(tags, list)
+
+    for seg in stt_result.segments:
+        seg_result = stt_result.model_copy(update={"segments": [seg]})
+        seg_tags = extract_tags_from_structuring_input(seg_result)
+        assert isinstance(seg_tags, list)