This repository has been archived on 2025-09-28. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
transcript_api/transcriptapi/stt.py

57 lines
2 KiB
Python
Raw Normal View History

2024-01-31 16:11:59 +01:00
# Copyright (c) 2024 Julian Müller (ChaoticByte)
from io import BytesIO as _BytesIO
from pathlib import Path as _Path
from faster_whisper import WhisperModel as _WhisperModel
from .msg import ComponentLogger as _ComponentLogger
class STT:
def __init__(self, model_path: _Path, n_threads: int = 4, use_int8: bool = True, logger: _ComponentLogger = _ComponentLogger("STT")):
assert isinstance(model_path, _Path)
assert type(n_threads) == int and n_threads >= 0
assert type(use_int8) == bool
assert isinstance(logger, _ComponentLogger)
self.logger = logger
self.model_path = model_path.expanduser().resolve()
assert self.model_path.exists()
self.n_threads = n_threads
if use_int8:
self.compute_type = "int8"
else:
self.compute_type = "default"
self._model = None
def init(self):
if self._model is None:
self.logger.debug("Initializing ...")
self._model = _WhisperModel(
self.model_path.__str__(),
device="cpu",
cpu_threads=self.n_threads,
compute_type=self.compute_type,
local_files_only=True)
self.logger.debug("Initialized.")
def transcribe(self, audio: bytes) -> str:
'''Transcibes audio and yields the segment strings'''
assert type(audio) == bytes
with _BytesIO(audio) as bio:
bio.seek(0)
self.init()
self.logger.debug("Transcribing audio ...")
segments, _ = self._model.transcribe(
bio,
beam_size=5, # beam size -> performance/quality
vad_filter=True) # remove silence
for s in segments:
self.logger.debug(f"... segment #{s.id}")
if s.id == 1:
text = s.text.lstrip(" ")
else:
text = s.text
yield text