commit cc818946b7fcd93212b295b9df9c984b534ed8fd Author: Julian Müller (ChaoticByte) Date: Wed Jan 31 16:11:59 2024 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..044373f --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.venv/ +__pycache__/ +env.sh diff --git a/app.py b/app.py new file mode 100644 index 0000000..410e824 --- /dev/null +++ b/app.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python3 +# Copyright (c) 2024 Julian Müller (ChaoticByte) + +from transcriptapi.server import get_app + +app = get_app() diff --git a/env_example.sh b/env_example.sh new file mode 100644 index 0000000..565c34e --- /dev/null +++ b/env_example.sh @@ -0,0 +1,3 @@ + +export ACCESS_CONTROL_ALLOW_ORIGIN="*" +export API_STT_MODEL="/Models/faster-whisper-medium" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..810a97d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +faster-whisper +sanic diff --git a/transcriptapi/__init__.py b/transcriptapi/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/transcriptapi/env.py b/transcriptapi/env.py new file mode 100644 index 0000000..334fcb2 --- /dev/null +++ b/transcriptapi/env.py @@ -0,0 +1,17 @@ +# Copyright (c) 2024 Julian Müller (ChaoticByte) + +from os import environ as _environ +from pathlib import Path as _Path +from .msg import ComponentLogger as _ComponentLogger + +_logger = _ComponentLogger("Environment", print_timestamp=False) + +try: + ACCESS_CONTROL_ALLOW_ORIGIN = str(_environ["ACCESS_CONTROL_ALLOW_ORIGIN"]) + API_STT_MODEL = _Path(_environ["API_STT_MODEL"]) +except KeyError as e: + _logger.critical(f"Missing {e}") + exit(1) +except Exception as e: + _logger.critical(f"An exception occured: {e}") + exit(1) diff --git a/transcriptapi/msg.py b/transcriptapi/msg.py new file mode 100644 index 0000000..edee99c --- /dev/null +++ b/transcriptapi/msg.py @@ -0,0 +1,54 @@ +# Copyright (c) 2024 Julian Müller (ChaoticByte) + +from datetime import datetime as _datetime +from os import environ as _environ +from sys import stderr as _stderr +from sys import stdout as _stdout +from typing import Any as _Any + +class ComponentLogger: + + LEVELS = [ + "DEBUG", + "INFO", + "WARN", + "ERROR", + "CRITICAL" + ] + + def __init__(self, component: str, level: int = 1, print_timestamp: bool = True): + '''level may be overwritten by environment variable LOGLEVEL''' + assert type(component) == str + assert type(level) == int + assert type(print_timestamp) == bool + self.component = component + self.level = level + if "LOGLEVEL" in _environ: + loglevel_ = _environ["LOGLEVEL"] + if loglevel_ in self.LEVELS: + self.level = self.LEVELS.index(loglevel_) + self.print_timestamp = print_timestamp + + def _log(self, msg: _Any, level: int, file = _stdout): + assert type(level) == int + if level >= self.level: + if self.print_timestamp: + t = _datetime.now().astimezone().strftime(r'%Y-%m-%d %H:%M:%S %z') + print(f"[{t}] [{self.component}] [{self.LEVELS[level]}] {msg}", file=file) + else: + print(f"[{self.component}] [{self.LEVELS[level]}] {msg}", file=file) + + def debug(self, msg: _Any): + self._log(msg, 0) + + def info(self, msg: _Any): + self._log(msg, 1) + + def warning(self, msg: _Any): + self._log(msg, 2, file=_stderr) + + def error(self, msg: _Any): + self._log(msg, 3, file=_stderr) + + def critical(self, msg: _Any): + self._log(msg, 4, file=_stderr) diff --git a/transcriptapi/server.py b/transcriptapi/server.py new file mode 100644 index 0000000..94ad773 --- /dev/null +++ b/transcriptapi/server.py @@ -0,0 +1,44 @@ +# Copyright (c) 2024 Julian Müller (ChaoticByte) + +from os import getpid as _getpid + +from sanic import Sanic as _Sanic +from sanic import empty as _empty +from sanic import Request as _Request + +from . import env as _env + +from .msg import ComponentLogger as _ComponentLogger +from .stt import STT as _STT + + +def get_app() -> _Sanic: + app = _Sanic("TranscriptAPI") + + @app.get("/ping") + async def ping(_): + return _empty(status=200) + + @app.post('/') + async def transcribe(request: _Request): + audio = request.files.get("audio").body + if len(audio) < 1: + return _empty(400) + resp = await request.respond(content_type="text/plain") + for s in app.ctx.stt.transcribe(audio): + await resp.send(s) + await resp.eof() + + @app.before_server_start + async def setup_stt(app): + app.ctx.stt = _STT(_env.API_STT_MODEL, logger=_ComponentLogger(f"{_getpid()}/STT")) + + @app.after_server_start + async def init_stt(app): + app.ctx.stt.init() + + @app.on_response + async def middleware(_, response): + response.headers["Access-Control-Allow-Origin"] = _env.ACCESS_CONTROL_ALLOW_ORIGIN + + return app diff --git a/transcriptapi/stt.py b/transcriptapi/stt.py new file mode 100644 index 0000000..1ad66d1 --- /dev/null +++ b/transcriptapi/stt.py @@ -0,0 +1,56 @@ +# Copyright (c) 2024 Julian Müller (ChaoticByte) + +from io import BytesIO as _BytesIO +from pathlib import Path as _Path + +from faster_whisper import WhisperModel as _WhisperModel + +from .msg import ComponentLogger as _ComponentLogger + + +class STT: + + def __init__(self, model_path: _Path, n_threads: int = 4, use_int8: bool = True, logger: _ComponentLogger = _ComponentLogger("STT")): + assert isinstance(model_path, _Path) + assert type(n_threads) == int and n_threads >= 0 + assert type(use_int8) == bool + assert isinstance(logger, _ComponentLogger) + self.logger = logger + self.model_path = model_path.expanduser().resolve() + assert self.model_path.exists() + self.n_threads = n_threads + if use_int8: + self.compute_type = "int8" + else: + self.compute_type = "default" + self._model = None + + def init(self): + if self._model is None: + self.logger.debug("Initializing ...") + self._model = _WhisperModel( + self.model_path.__str__(), + device="cpu", + cpu_threads=self.n_threads, + compute_type=self.compute_type, + local_files_only=True) + self.logger.debug("Initialized.") + + def transcribe(self, audio: bytes) -> str: + '''Transcibes audio and yields the segment strings''' + assert type(audio) == bytes + with _BytesIO(audio) as bio: + bio.seek(0) + self.init() + self.logger.debug("Transcribing audio ...") + segments, _ = self._model.transcribe( + bio, + beam_size=5, # beam size -> performance/quality + vad_filter=True) # remove silence + for s in segments: + self.logger.debug(f"... segment #{s.id}") + if s.id == 1: + text = s.text.lstrip(" ") + else: + text = s.text + yield text