13 changed files with 40 additions and 74 deletions
--- a/README.md
+++ b/README.md
@ -8,18 +8,16 @@ A frontend for large language models like [🐨 Koala](https://bair.berkeley.edu

 - Python 3.10
 - The pip packages listed in `requirements.txt`
- An AI model in the ggml format (should be quantized)
+- A Koala model in the ggml format (should be quantized)

-For memory and disk requirements for the different models, see [llama.cpp - Memory/Disk Requirements](https://github.com/ggerganov/llama.cpp#memorydisk-requirements)
+The 7B-Model of Koala, `q4_0`-quantized, requires approx. 5 GB of RAM.

 ## Supported Models

 - [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
- [🦙 Vicuna v.0](https://lmsys.org/blog/2023-03-30-vicuna/)
- [🦙 Vicuna v.1.1](https://lmsys.org/blog/2023-03-30-vicuna/)
- [🦁 Manticore Chat](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
+- [🦙 Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/)

-(see `./profiles/`)
+see `./profiles/`

 ## Usage

@ -44,7 +42,7 @@ python3 api-server.py [-h] -m MODEL [--host HOST] [--port PORT]

 The following command-line options are available:

-* `--profile`: Path to the profile file for the model.
+* `--profile`: Path to the profile file for the model. Defaults to `./profiles/koala.json`.
 * `--host`: Specifies the IP address or hostname to listen on. Defaults to `localhost`.
 * `--port`: Specifies the port number to listen on. Defaults to `8080`.
 * `--api`: Specifies the URL of the API server. Defaults to `http://localhost:7331`.
--- a/api-server.py
+++ b/api-server.py
@ -4,8 +4,6 @@
 from argparse import ArgumentParser
 from os import environ

-from llama_cpp.server.app import create_app
-
 import uvicorn

 if __name__ == "__main__":
@ -15,7 +13,10 @@ if __name__ == "__main__":
    ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost")
    ap.add_argument("--port", help="Port to listen on (default: 7331)", type=int, default=7331)
    args = ap.parse_args()
+    # Set environment variable before importing api server
    environ["MODEL"] = args.model
+    # Import api server
+    from llama_cpp.server.app import create_app
    # Run
    app = create_app()
    uvicorn.run(app, host=args.host, port=args.port)
--- a/frontend-server.py
+++ b/frontend-server.py
@ -9,9 +9,10 @@ import uvicorn
 from frontend.app import app

 if __name__ == "__main__":
+    koala_profile_path = Path(__file__).parent / "profiles" / "koala.json"
    # CLI
    ap = ArgumentParser()
-    ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model", type=Path, required=True)
+    ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model (default: ./profiles/koala.json)", type=Path, default=koala_profile_path)
    ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost")
    ap.add_argument("--port", help="Port to listen on (default: 8080)", type=int, default=8080)
    ap.add_argument("--api", help="URL of the API Server (default: 'http://localhost:7331')", type=str, default="http://localhost:7331")
@ -33,7 +34,6 @@ if __name__ == "__main__":
            "conversation_prefix": profile["conversation_prefix"],
            "user_keyword": profile["user_keyword"],
            "assistant_keyword": profile["assistant_keyword"],
-            "separator": profile["separator"],
            "stop_sequences": profile["stop_sequences"]
        }
    }
--- a/frontend/static/main.js
+++ b/frontend/static/main.js
@ -111,10 +111,10 @@ fetch("/config")
    function addMessage(message, role) {
        if (role == Roles.USER) {
            conversation.push(
-                frontend_config.profile.user_keyword + " "
-                + message + frontend_config.profile.separator + frontend_config.profile.assistant_keyword);
+                " " + frontend_config.profile.user_keyword + " "
+                + message + " " + frontend_config.profile.assistant_keyword);
        }
-        else { conversation.push(message + frontend_config.profile.separator); }
+        else { conversation.push(message); }
        // UI
        let messageRoleElem = document.createElement("div");
        messageRoleElem.classList.add("message-type");
@ -181,7 +181,7 @@ fetch("/config")
            let prompt = conversation.join("");
            let settings = getSettings();
            apiCompletion(prompt, settings).then(r => {
-                addMessage(r.trim(), Roles.ASSISTANT);
+                addMessage(r, Roles.ASSISTANT);
                enableInput();
            });
        }
--- a/frontend/static/style.css
+++ b/frontend/static/style.css
@ -11,7 +11,6 @@
    --icon-button-fill: #ffffff;
    --send-icon-button-fill: #29c76d;
    --color: #fafafa;
-    --color2: #bbbbbb;
    --border-radius: .5rem;
 }

@ -70,38 +69,19 @@ input[type="number"] {
 }

 .messages {
-    gap: 1rem;
+    gap: 1.1rem;
    margin-bottom: 1rem;
    overflow-y: scroll;
    max-height: 89vh;
-    align-items: center;
-    flex-grow: 2;
 }

 .message {
    display: flex;
-    flex-direction: column;
-    flex-wrap: wrap;
-    gap: 1rem;
-}
-
-.message-type {
-    color: var(--color2);
-    text-align: center;
-}
-
-.message-text {
-    white-space: pre-wrap;
-    padding: .5rem .8rem;
+    flex-direction: row;
+    gap: .5rem;
+    padding: .5rem;
    border-radius: var(--border-radius);
-}
-
-.message-bg-assistant > .message-text {
-    background: var(--background2);
-}
-
-.message-bg-user > .message-text {
-    background: var(--background3);
+    max-width: fit-content;
 }

 button {
@ -134,6 +114,19 @@ button:hover {
    width: 100%;
 }

+.message-bg-assistant {
+    background: var(--background2);
+}
+
+.message-bg-user {
+    background: var(--background3);
+}
+
+.message-type {
+    min-width: 3.5rem;
+    padding-left: .1rem;
+}
+
 .input-container {
    margin-top: auto;
    flex-direction: row;
--- a/misc/screenshot.png
+++ b/misc/screenshot.png
--- a/profiles/empty.json
+++ b/profiles/empty.json
@ -1,8 +0,0 @@
-{
-    "name": "None",
-    "conversation_prefix": "",
-    "user_keyword": "",
-    "assistant_keyword": "",
-    "separator": "",
-    "stop_sequences": []
-}
--- a/profiles/koala.json
+++ b/profiles/koala.json
@ -3,6 +3,5 @@
    "conversation_prefix": "BEGINNING OF CONVERSATION: ",
    "user_keyword": "USER:",
    "assistant_keyword": "GPT:",
-    "separator": " ",
    "stop_sequences": ["</s>"]
 }
--- a/profiles/manticore-chat.json
+++ b/profiles/manticore-chat.json
@ -1,8 +0,0 @@
-{
-    "name": "Manticore",
-    "conversation_prefix": "",
-    "user_keyword": "USER:",
-    "assistant_keyword": "ASSISTANT:",
-    "separator": "\n",
-    "stop_sequences": ["</s>", "<unk>", "### USER:", "USER:"]
-}
--- a/profiles/vicuna-v0.json
+++ b/profiles/vicuna-v0.json
@ -1,8 +0,0 @@
-{
-    "name": "Vicuna v0",
-    "conversation_prefix": "A chat between a curious human and a helpful AI assistant.\n\n",
-    "user_keyword": "### Human:",
-    "assistant_keyword": "### Assistant:",
-    "separator": "\n",
-    "stop_sequences": ["### Human:"]
-}
--- a/profiles/vicuna-v1.1.json
+++ b/profiles/vicuna-v1.1.json
@ -1,8 +0,0 @@
-{
-    "name": "Vicuna v1.1",
-    "conversation_prefix": "A chat between a curious user and a helpful AI assistant.\n\n",
-    "user_keyword": "USER:",
-    "assistant_keyword": "ASSISTANT:",
-    "separator": "\n",
-    "stop_sequences": ["</s>"]
-}
--- a/profiles/vicuna.json
+++ b/profiles/vicuna.json
@ -0,0 +1,7 @@
+{
+    "name": "Vicuna",
+    "conversation_prefix": "A chat between a curious user and a helpful AI assistant. ",
+    "user_keyword": "### Human:",
+    "assistant_keyword": "### Assistant:",
+    "stop_sequences": ["### Human:"]
+}
--- a/requirements.txt
+++ b/requirements.txt
@ -1,3 +1,3 @@
-llama-cpp-python[server]==0.1.56
+llama-cpp-python[server]==0.1.50
 uvicorn==0.22.0
 sanic==23.3.0