diff --git a/.gitignore b/.gitignore
index 64a0616..35db8fd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,3 @@
 tmp/
-vendor/*
 .venv/
 .vscode/
diff --git a/README.md b/README.md
index 9329fdf..7bedd55 100644
--- a/README.md
+++ b/README.md
@@ -1,48 +1,40 @@
 # audio-summarize
 
-An audio summarizer that glues together ffmpeg, whisper.cpp and BART.
+An audio summarizer that glues together [faster-whisper](https://github.com/SYSTRAN/faster-whisper) and [BART](https://huggingface.co/facebook/bart-large-cnn).
 
 ## Dependencies
 
 - Python 3 (tested: 3.12)
-- ffmpeg
-- git
-- make
-- c/c++ compiler (on Ubuntu, installing `build-essential` does the trick)
 
 ## Setup
 
-Create a virtual environment for python and activate it:
+Create a virtual environment for python, activate it and install the required python packages:
 
 ```bash
 python3 -m venv .venv
 source .venv/bin/activate
-```
-
-Run setup.sh
-
-```bash
-./setup.sh
+pip3 install -r requirements.txt
 ```
 
 ## Run
 
-1. You need a whisper.cpp compatible model file (-> https://huggingface.co/ggerganov/whisper.cpp)
-2. In your terminal, make shure you have your python venv activated
-3. Run audio-summarize.py
+1. In your terminal, make shure you have your python venv activated
+2. Run audio-summarize.py
 
 ### Usage
 
 ```
-./audio-summarize.py -m filepath -i filepath -o filepath
-                   [--summin n] [--summax n] [--segmax n]
+./audio-summarize.py -i filepath -o filepath
+                     [--summin n] [--summax n] [--segmax n]
+                     [--lang lang] [-m name]
 
 options:
   -h, --help   show this help message and exit
   --summin n   The minimum lenght of a segment summary [10, min: 5]
   --summax n   The maximum lenght of a segment summary [90, min: 5]
   --segmax n   The maximum number of tokens per segment [375, 5 - 500]
-  -m filepath  The path to a whisper.cpp-compatible model file
+  --lang lang  The language of the audio source ['en']
+  -m name      The name of the whisper model to be used ['small.en']
   -i filepath  The path to the media file
   -o filepath  Where to save the output text to
 ```
@@ -50,16 +42,14 @@ options:
 Example:
 
 ```bash
-./audio-summarize.py -m ./tmp/whisper_ggml-small.en-q5_1.bin -i ./tmp/test.webm -o ./tmp/output.txt
+./audio-summarize.py -i ./tmp/test.webm -o ./tmp/output.txt
 ```
 
 ## How does it work?
 
 To summarize a media file, the program executes the following steps:
 
-1. Convert the media file with [ffmpeg](https://www.ffmpeg.org/) to a mono 16kHz 16bit-PCM wav file
-2. Transcribe that wav file using [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
-3. Clean up the transcript (newlines, whitespaces at the beginning and end)
-4. Semantically split up the transcript into segments using [semantic-text-splitter](https://github.com/benbrandt/text-splitter) and the tokenizer for BART
-5. Summarize each segment using BART ([`facebook/bart-large-cnn`](https://huggingface.co/facebook/bart-large-cnn))
-6. Write the results to a text file
+1. Convert and transcribe the media file using [faster-whisper](https://github.com/SYSTRAN/faster-whisper), using [ffmpeg](https://www.ffmpeg.org/) and [ctranslate2](https://github.com/OpenNMT/CTranslate2/) under the hood
+2. Semantically split up the transcript into segments using [semantic-text-splitter](https://github.com/benbrandt/text-splitter) and the tokenizer for BART
+3. Summarize each segment using BART ([`facebook/bart-large-cnn`](https://huggingface.co/facebook/bart-large-cnn))
+4. Write the results to a text file
diff --git a/audio-summarize.py b/audio-summarize.py
index 9c49ab6..4bbaaae 100755
--- a/audio-summarize.py
+++ b/audio-summarize.py
@@ -10,55 +10,39 @@ warnings.simplefilter(action='ignore', category=FutureWarning)
 
 from argparse import ArgumentParser
 from pathlib import Path
-from subprocess import check_call, DEVNULL
-from tempfile import TemporaryDirectory
 from typing import List
 
+from faster_whisper import WhisperModel
 from semantic_text_splitter import TextSplitter
 from tokenizers import Tokenizer
 from transformers import pipeline
 
-# Some constant variables
+
+# Transcription
+
+def transcribe(model_name: str, audio_file: str, language: str) -> str:
+    '''Transcribe the media using faster-whisper'''
+    t_chunks = []
+    print("* Loading model ", end="", flush=True)
+    model = WhisperModel(model_name, device="auto", compute_type="int8")
+    segments, _ = model.transcribe(audio_file, language=language, beam_size=5, condition_on_previous_text=False)
+    print()
+    print("* Transcribing audio ", end="", flush=True)
+    for s in segments:
+        print(".", end="", flush=True)
+        t_chunks.append(s.text)
+    print()
+    t = "".join(t_chunks)
+    return t
+
+
+# NLP
 
 NLP_MODEL = "facebook/bart-large-cnn"
-root_dir = Path(__file__).parent
-whisper_cpp_binary = (root_dir / "vendor" / "whisper.cpp" / "main").__str__()
-
-# Steps
-
-def convert_audio(media_file: str, output_file: str):
-    '''Convert media to mono 16kHz pcm_s16le wav using ffmpeg'''
-    check_call([
-        "ffmpeg",
-        "-hide_banner",
-        "-loglevel", "error",
-        "-i", media_file,
-        "-ac", "1",
-        "-ar", "16000",
-        "-c:a", "pcm_s16le",
-        output_file])
-
-def transcribe(model_file: str, audio_file: str, output_file: str):
-    '''Transcribe audio file using whisper.cpp'''
-    check_call([
-        whisper_cpp_binary,
-        "-m", model_file,
-        "--max-context", "64",
-        "--beam-size", "5",
-        "--no-prints",
-        "--no-timestamps",
-        "--output-txt",
-        "--output-file", output_file[:-4], # strip '.txt' file ending
-        audio_file], stdout=DEVNULL) 
-
-def cleanup_text(t: str) -> str:
-    t = t.replace("\n", "")
-    t = t.replace("\r", "")
-    t = t.strip()
-    return t
 
 def split_text(t: str, max_tokens: int) -> List[str]:
     '''Split text into semantic segments'''
+    print("* Splitting up transcript into semantic segments")
     tokenizer = Tokenizer.from_pretrained(NLP_MODEL)
     splitter = TextSplitter.from_huggingface_tokenizer(
         tokenizer, (int(max_tokens*0.8), max_tokens))
@@ -67,13 +51,17 @@ def split_text(t: str, max_tokens: int) -> List[str]:
 
 def summarize(chunks: List[str], summary_min: int, summary_max: int) -> str:
     '''Summarize all segments (chunks) using a language model'''
+    print("* Summarizing transcript segments ", end="", flush=True)
     chunks_summarized = []
     summ = pipeline("summarization", model=NLP_MODEL)
     for c in chunks:
+        print(".", end="", flush=True)
         chunks_summarized.append(
             summ(c, max_length=summary_max, min_length=summary_min, do_sample=False)[0]['summary_text'].strip())
+    print()
     return "\n".join(chunks_summarized)
 
+
 # Main
 
 if __name__ == "__main__":
@@ -82,7 +70,8 @@ if __name__ == "__main__":
     argp.add_argument("--summin", metavar="n", type=int, default=10, help="The minimum lenght of a segment summary [10, min: 5]")
     argp.add_argument("--summax", metavar="n", type=int, default=90, help="The maximum lenght of a segment summary [90, min: 5]")
     argp.add_argument("--segmax", metavar="n", type=int, default=375, help="The maximum number of tokens per segment [375, 5 - 500]")
-    argp.add_argument("-m", required=True, metavar="filepath", type=Path, help="The path to a whisper.cpp-compatible model file")
+    argp.add_argument("--lang", metavar="lang", type=str, default="en", help="The language of the audio source ['en']")
+    argp.add_argument("-m", metavar="name", type=str, default="small.en", help="The name of the whisper model to be used ['small.en']")
     argp.add_argument("-i", required=True, metavar="filepath", type=Path, help="The path to the media file")
     argp.add_argument("-o", required=True, metavar="filepath", type=Path, help="Where to save the output text to")
     args = argp.parse_args()
@@ -90,21 +79,9 @@ if __name__ == "__main__":
     args.summin = max(5, args.summin)
     args.summax = max(5, args.summax)
     args.segmax = max(5, min(args.segmax, 500))
-    # create tmpdir
-    with TemporaryDirectory(suffix="as") as d:
-        converted_audio_path = (Path(d) / "audio.wav").__str__()
-        transcript_path = (Path(d) / "transcript.txt").__str__()
-        # convert using ffmpeg
-        print("* Converting media to the correct format ...")
-        convert_audio(args.i.__str__(), converted_audio_path)
-        # transcribe
-        print("* Transcribing audio ...")
-        transcribe(args.m.__str__(), converted_audio_path, transcript_path)
-        # read transcript
-        text = Path(transcript_path).read_text()
-    # cleanup text & summarize
-    print("* Summarizing transcript ...")
-    text = cleanup_text(text)
+    # transcribe
+    text = transcribe(args.m, args.i, args.lang).strip()
+    # split up into semantic segments & summarize
     chunks = split_text(text, args.segmax)
     summary = summarize(chunks, args.summin, args.summax)
     print(f"\n{summary}\n")
diff --git a/requirements.txt b/requirements.txt
index b71f2e6..793ed1e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+faster-whisper
 semantic-text-splitter
 torch
 transformers
diff --git a/setup.sh b/setup.sh
deleted file mode 100755
index f632d4d..0000000
--- a/setup.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env bash
-
-# init
-oldcwd=$(pwd)
-function cleanup {
-    cd ${oldcwd}
-}
-trap cleanup EXIT
-
-export root_dir=$(realpath $(dirname $0))
-export vendor_dir=${root_dir}/vendor
-
-# Prepare installation of dependencies
-
-mkdir -p ${vendor_dir}
-cd ${vendor_dir}
-
-# Install whisper.cpp
-
-if [ ! -d ./whisper.cpp ]; then
-    git clone -b v1.6.2 https://github.com/ggerganov/whisper.cpp.git
-fi
-cd whisper.cpp
-make
-cd ${vendor_dir}
-
-# Install python packages
-
-if ! python3 -m pip install -r "${root_dir}/requirements.txt"; then
-    echo
-    echo "Make shure to run this script in a python virtual environment!"
-fi