diff --git a/README.md b/README.md index 1db57b3..12f40c6 100644 --- a/README.md +++ b/README.md @@ -8,16 +8,18 @@ A frontend for large language models like [🐨 Koala](https://bair.berkeley.edu - Python 3.10 - The pip packages listed in `requirements.txt` -- A Koala model in the ggml format (should be quantized) +- An AI model in the ggml format (should be quantized) -The 7B-Model of Koala, `q4_0`-quantized, requires approx. 5 GB of RAM. +For memory and disk requirements for the different models, see [llama.cpp - Memory/Disk Requirements](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) ## Supported Models - [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) -- [🦙 Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/) +- [🦙 Vicuna v.0](https://lmsys.org/blog/2023-03-30-vicuna/) +- [🦙 Vicuna v.1.1](https://lmsys.org/blog/2023-03-30-vicuna/) +- [🦁 Manticore Chat](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg) -see `./profiles/` +(see `./profiles/`) ## Usage @@ -42,7 +44,7 @@ python3 api-server.py [-h] -m MODEL [--host HOST] [--port PORT] The following command-line options are available: -* `--profile`: Path to the profile file for the model. Defaults to `./profiles/koala.json`. +* `--profile`: Path to the profile file for the model. * `--host`: Specifies the IP address or hostname to listen on. Defaults to `localhost`. * `--port`: Specifies the port number to listen on. Defaults to `8080`. * `--api`: Specifies the URL of the API server. Defaults to `http://localhost:7331`. diff --git a/api-server.py b/api-server.py index c836a5e..f9fab7d 100644 --- a/api-server.py +++ b/api-server.py @@ -4,6 +4,8 @@ from argparse import ArgumentParser from os import environ +from llama_cpp.server.app import create_app + import uvicorn if __name__ == "__main__": @@ -13,10 +15,7 @@ if __name__ == "__main__": ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost") ap.add_argument("--port", help="Port to listen on (default: 7331)", type=int, default=7331) args = ap.parse_args() - # Set environment variable before importing api server environ["MODEL"] = args.model - # Import api server - from llama_cpp.server.app import create_app # Run app = create_app() uvicorn.run(app, host=args.host, port=args.port) diff --git a/frontend-server.py b/frontend-server.py index e631058..383e110 100644 --- a/frontend-server.py +++ b/frontend-server.py @@ -9,10 +9,9 @@ import uvicorn from frontend.app import app if __name__ == "__main__": - koala_profile_path = Path(__file__).parent / "profiles" / "koala.json" # CLI ap = ArgumentParser() - ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model (default: ./profiles/koala.json)", type=Path, default=koala_profile_path) + ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model", type=Path, required=True) ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost") ap.add_argument("--port", help="Port to listen on (default: 8080)", type=int, default=8080) ap.add_argument("--api", help="URL of the API Server (default: 'http://localhost:7331')", type=str, default="http://localhost:7331") @@ -34,6 +33,7 @@ if __name__ == "__main__": "conversation_prefix": profile["conversation_prefix"], "user_keyword": profile["user_keyword"], "assistant_keyword": profile["assistant_keyword"], + "separator": profile["separator"], "stop_sequences": profile["stop_sequences"] } } diff --git a/frontend/static/main.js b/frontend/static/main.js index ebfc0ef..f579035 100644 --- a/frontend/static/main.js +++ b/frontend/static/main.js @@ -111,10 +111,10 @@ fetch("/config") function addMessage(message, role) { if (role == Roles.USER) { conversation.push( - " " + frontend_config.profile.user_keyword + " " - + message + " " + frontend_config.profile.assistant_keyword); + frontend_config.profile.user_keyword + " " + + message + frontend_config.profile.separator + frontend_config.profile.assistant_keyword); } - else { conversation.push(message); } + else { conversation.push(message + frontend_config.profile.separator); } // UI let messageRoleElem = document.createElement("div"); messageRoleElem.classList.add("message-type"); @@ -181,7 +181,7 @@ fetch("/config") let prompt = conversation.join(""); let settings = getSettings(); apiCompletion(prompt, settings).then(r => { - addMessage(r, Roles.ASSISTANT); + addMessage(r.trim(), Roles.ASSISTANT); enableInput(); }); } diff --git a/frontend/static/style.css b/frontend/static/style.css index 791f096..6a58b20 100644 --- a/frontend/static/style.css +++ b/frontend/static/style.css @@ -11,6 +11,7 @@ --icon-button-fill: #ffffff; --send-icon-button-fill: #29c76d; --color: #fafafa; + --color2: #bbbbbb; --border-radius: .5rem; } @@ -69,19 +70,38 @@ input[type="number"] { } .messages { - gap: 1.1rem; + gap: 1rem; margin-bottom: 1rem; overflow-y: scroll; max-height: 89vh; + align-items: center; + flex-grow: 2; } .message { display: flex; - flex-direction: row; - gap: .5rem; - padding: .5rem; + flex-direction: column; + flex-wrap: wrap; + gap: 1rem; +} + +.message-type { + color: var(--color2); + text-align: center; +} + +.message-text { + white-space: pre-wrap; + padding: .5rem .8rem; border-radius: var(--border-radius); - max-width: fit-content; +} + +.message-bg-assistant > .message-text { + background: var(--background2); +} + +.message-bg-user > .message-text { + background: var(--background3); } button { @@ -114,19 +134,6 @@ button:hover { width: 100%; } -.message-bg-assistant { - background: var(--background2); -} - -.message-bg-user { - background: var(--background3); -} - -.message-type { - min-width: 3.5rem; - padding-left: .1rem; -} - .input-container { margin-top: auto; flex-direction: row; diff --git a/misc/screenshot.png b/misc/screenshot.png index 9651407..0d97f75 100644 Binary files a/misc/screenshot.png and b/misc/screenshot.png differ diff --git a/profiles/empty.json b/profiles/empty.json new file mode 100644 index 0000000..a52526e --- /dev/null +++ b/profiles/empty.json @@ -0,0 +1,8 @@ +{ + "name": "None", + "conversation_prefix": "", + "user_keyword": "", + "assistant_keyword": "", + "separator": "", + "stop_sequences": [] +} \ No newline at end of file diff --git a/profiles/koala.json b/profiles/koala.json index 78790f9..056b372 100644 --- a/profiles/koala.json +++ b/profiles/koala.json @@ -3,5 +3,6 @@ "conversation_prefix": "BEGINNING OF CONVERSATION: ", "user_keyword": "USER:", "assistant_keyword": "GPT:", + "separator": " ", "stop_sequences": [""] } \ No newline at end of file diff --git a/profiles/manticore-chat.json b/profiles/manticore-chat.json new file mode 100644 index 0000000..ffc7450 --- /dev/null +++ b/profiles/manticore-chat.json @@ -0,0 +1,8 @@ +{ + "name": "Manticore", + "conversation_prefix": "", + "user_keyword": "USER:", + "assistant_keyword": "ASSISTANT:", + "separator": "\n", + "stop_sequences": ["", "", "### USER:", "USER:"] +} \ No newline at end of file diff --git a/profiles/vicuna-v0.json b/profiles/vicuna-v0.json new file mode 100644 index 0000000..17e0844 --- /dev/null +++ b/profiles/vicuna-v0.json @@ -0,0 +1,8 @@ +{ + "name": "Vicuna v0", + "conversation_prefix": "A chat between a curious human and a helpful AI assistant.\n\n", + "user_keyword": "### Human:", + "assistant_keyword": "### Assistant:", + "separator": "\n", + "stop_sequences": ["### Human:"] +} \ No newline at end of file diff --git a/profiles/vicuna-v1.1.json b/profiles/vicuna-v1.1.json new file mode 100644 index 0000000..c5b5b08 --- /dev/null +++ b/profiles/vicuna-v1.1.json @@ -0,0 +1,8 @@ +{ + "name": "Vicuna v1.1", + "conversation_prefix": "A chat between a curious user and a helpful AI assistant.\n\n", + "user_keyword": "USER:", + "assistant_keyword": "ASSISTANT:", + "separator": "\n", + "stop_sequences": [""] +} \ No newline at end of file diff --git a/profiles/vicuna.json b/profiles/vicuna.json deleted file mode 100644 index 2f47f50..0000000 --- a/profiles/vicuna.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "name": "Vicuna", - "conversation_prefix": "A chat between a curious user and a helpful AI assistant. ", - "user_keyword": "### Human:", - "assistant_keyword": "### Assistant:", - "stop_sequences": ["### Human:"] -} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 63dd6d0..977cf56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -llama-cpp-python[server]==0.1.50 +llama-cpp-python[server]==0.1.56 uvicorn==0.22.0 sanic==23.3.0 \ No newline at end of file