Initial commit

2024-01-31 16:11:59 +01:00 · 2024-01-31 16:11:59 +01:00 · cc818946b7
commit cc818946b7
9 changed files with 185 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+.venv/
+__pycache__/
+env.sh
--- a/app.py
+++ b/app.py
@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+# Copyright (c) 2024 Julian Müller (ChaoticByte)
+
+from transcriptapi.server import get_app
+
+app = get_app()
--- a/env_example.sh
+++ b/env_example.sh
@ -0,0 +1,3 @@
+
+export ACCESS_CONTROL_ALLOW_ORIGIN="*"
+export API_STT_MODEL="/Models/faster-whisper-medium"
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+faster-whisper
+sanic
--- a/transcriptapi/init.py
+++ b/transcriptapi/init.py
--- a/transcriptapi/env.py
+++ b/transcriptapi/env.py
@ -0,0 +1,17 @@
+# Copyright (c) 2024 Julian Müller (ChaoticByte)
+
+from os import environ as _environ
+from pathlib import Path as _Path
+from .msg import ComponentLogger as _ComponentLogger
+
+_logger = _ComponentLogger("Environment", print_timestamp=False)
+
+try:
+    ACCESS_CONTROL_ALLOW_ORIGIN = str(_environ["ACCESS_CONTROL_ALLOW_ORIGIN"])
+    API_STT_MODEL = _Path(_environ["API_STT_MODEL"])
+except KeyError as e:
+    _logger.critical(f"Missing {e}")
+    exit(1)
+except Exception as e:
+    _logger.critical(f"An exception occured: {e}")
+    exit(1)
--- a/transcriptapi/msg.py
+++ b/transcriptapi/msg.py
@ -0,0 +1,54 @@
+# Copyright (c) 2024 Julian Müller (ChaoticByte)
+
+from datetime import datetime as _datetime
+from os import environ as _environ
+from sys import stderr as _stderr
+from sys import stdout as _stdout
+from typing import Any as _Any
+
+class ComponentLogger:
+
+    LEVELS = [
+        "DEBUG",
+        "INFO",
+        "WARN",
+        "ERROR",
+        "CRITICAL"
+    ]
+
+    def __init__(self, component: str, level: int = 1, print_timestamp: bool = True):
+        '''level may be overwritten by environment variable LOGLEVEL'''
+        assert type(component) == str
+        assert type(level) == int
+        assert type(print_timestamp) == bool
+        self.component = component
+        self.level = level
+        if "LOGLEVEL" in _environ:
+            loglevel_ = _environ["LOGLEVEL"]
+            if loglevel_ in self.LEVELS:
+                self.level = self.LEVELS.index(loglevel_)
+        self.print_timestamp = print_timestamp
+
+    def _log(self, msg: _Any, level: int, file = _stdout):
+        assert type(level) == int
+        if level >= self.level:
+            if self.print_timestamp:
+                t = _datetime.now().astimezone().strftime(r'%Y-%m-%d %H:%M:%S %z')
+                print(f"[{t}] [{self.component}] [{self.LEVELS[level]}] {msg}", file=file)
+            else:
+                print(f"[{self.component}] [{self.LEVELS[level]}] {msg}", file=file)
+
+    def debug(self, msg: _Any):
+        self._log(msg, 0)
+
+    def info(self, msg: _Any):
+        self._log(msg, 1)
+
+    def warning(self, msg: _Any):
+        self._log(msg, 2, file=_stderr)
+
+    def error(self, msg: _Any):
+        self._log(msg, 3, file=_stderr)
+
+    def critical(self, msg: _Any):
+        self._log(msg, 4, file=_stderr)
--- a/transcriptapi/server.py
+++ b/transcriptapi/server.py
@ -0,0 +1,44 @@
+# Copyright (c) 2024 Julian Müller (ChaoticByte)
+
+from os import getpid as _getpid
+
+from sanic import Sanic as _Sanic
+from sanic import empty as _empty
+from sanic import Request as _Request
+
+from . import env as _env
+
+from .msg import ComponentLogger as _ComponentLogger
+from .stt import STT as _STT
+
+
+def get_app() -> _Sanic:
+    app = _Sanic("TranscriptAPI")
+
+    @app.get("/ping")
+    async def ping(_):
+        return _empty(status=200)
+
+    @app.post('/')
+    async def transcribe(request: _Request):
+        audio = request.files.get("audio").body
+        if len(audio) < 1:
+            return _empty(400)
+        resp = await request.respond(content_type="text/plain")
+        for s in app.ctx.stt.transcribe(audio):
+            await resp.send(s)
+        await resp.eof()
+
+    @app.before_server_start
+    async def setup_stt(app):
+        app.ctx.stt = _STT(_env.API_STT_MODEL, logger=_ComponentLogger(f"{_getpid()}/STT"))
+    
+    @app.after_server_start
+    async def init_stt(app):
+        app.ctx.stt.init()
+
+    @app.on_response
+    async def middleware(_, response):
+        response.headers["Access-Control-Allow-Origin"] = _env.ACCESS_CONTROL_ALLOW_ORIGIN
+
+    return app
--- a/transcriptapi/stt.py
+++ b/transcriptapi/stt.py
@ -0,0 +1,56 @@
+# Copyright (c) 2024 Julian Müller (ChaoticByte)
+
+from io import BytesIO as _BytesIO
+from pathlib import Path as _Path
+
+from faster_whisper import WhisperModel as _WhisperModel
+
+from .msg import ComponentLogger as _ComponentLogger
+
+
+class STT:
+
+    def __init__(self, model_path: _Path, n_threads: int = 4, use_int8: bool = True, logger: _ComponentLogger = _ComponentLogger("STT")):
+        assert isinstance(model_path, _Path)
+        assert type(n_threads) == int and n_threads >= 0
+        assert type(use_int8) == bool
+        assert isinstance(logger, _ComponentLogger)
+        self.logger = logger
+        self.model_path = model_path.expanduser().resolve()
+        assert self.model_path.exists()
+        self.n_threads = n_threads
+        if use_int8:
+            self.compute_type = "int8"
+        else:
+            self.compute_type = "default"
+        self._model = None
+
+    def init(self):
+        if self._model is None:
+            self.logger.debug("Initializing ...")
+            self._model = _WhisperModel(
+                self.model_path.__str__(),
+                device="cpu",
+                cpu_threads=self.n_threads,
+                compute_type=self.compute_type,
+                local_files_only=True)
+            self.logger.debug("Initialized.")
+
+    def transcribe(self, audio: bytes) -> str:
+        '''Transcibes audio and yields the segment strings'''
+        assert type(audio) == bytes
+        with _BytesIO(audio) as bio:
+            bio.seek(0)
+            self.init()
+            self.logger.debug("Transcribing audio ...")
+            segments, _ = self._model.transcribe(
+                bio,
+                beam_size=5, # beam size -> performance/quality
+                vad_filter=True) # remove silence
+            for s in segments:
+                self.logger.debug(f"... segment #{s.id}")
+                if s.id == 1:
+                    text = s.text.lstrip(" ")
+                else:
+                    text = s.text
+                yield text