Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ jobs:
python -m pip install -e .
python -m pip install pytest ruff
python -m pip install pytesseract
python -m pip install openai-whisper
python -m pip install ffmpeg-python

- name: Lint (ruff)
run: ruff check src tests
Expand Down
2 changes: 2 additions & 0 deletions src/ansimon_ai/stt/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from .types import STTResult, STTSegment
from .base import STTEngine
from .mock import MockSTT
from .whisper_stt import WhisperSTT

__all__ = [
"STTResult",
"STTSegment",
"STTEngine",
"MockSTT",
"WhisperSTT",
]
25 changes: 25 additions & 0 deletions src/ansimon_ai/stt/whisper_stt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from .base import STTEngine
from .types import STTResult, STTSegment
import whisper

class WhisperSTT(STTEngine):
def __init__(self, model_size: str = "base"):
self.model = whisper.load_model(model_size)
self.engine_name = f"whisper-{model_size}"

def transcribe(self, audio_path: str) -> STTResult:
result = self.model.transcribe(audio_path, language="ko")
segments = [
STTSegment(
start=seg["start"],
end=seg["end"],
text=seg["text"]
)
for seg in result["segments"]
]
return STTResult(
full_text=result["text"],
segments=segments,
language=result.get("language", "ko"),
engine=self.engine_name
)
20 changes: 20 additions & 0 deletions tests/stt/test_whisper_stt_tags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
import pytest
from ansimon_ai.stt import WhisperSTT
from ansimon_ai.structuring.tag_patterns import extract_tags_from_structuring_input

SAMPLE_AUDIO = os.environ.get("WHISPER_TEST_AUDIO", "D:/Project/AnsimOn/call_sample2.m4a")

@pytest.mark.skipif(not os.path.exists(SAMPLE_AUDIO), reason="샘플 오디오 파일이 존재하지 않음")
def test_whisper_stt_and_tag_extraction():
stt = WhisperSTT()
stt_result = stt.transcribe(SAMPLE_AUDIO)
assert stt_result.full_text, "전체 텍스트가 비어 있지 않아야 함"
assert stt_result.segments, "segment가 1개 이상 있어야 함"
tags = extract_tags_from_structuring_input(stt_result)
assert isinstance(tags, list)

for seg in stt_result.segments:
seg_result = stt_result.model_copy(update={"segments": [seg]})
seg_tags = extract_tags_from_structuring_input(seg_result)
assert isinstance(seg_tags, list)