Compare commits
15 commits
Author | SHA1 | Date | |
---|---|---|---|
74e59a9fd4 | |||
2a2241ce08 | |||
f4abe93735 | |||
faed129586 | |||
abb8054892 | |||
de194bead6 | |||
2a46750ee9 | |||
ae0058bdee | |||
bd44e45801 | |||
![]() |
5cfa6a7b0a | ||
8c29a31598 | |||
345d0cfc5c | |||
ea2f59f94e | |||
060d522f6c | |||
![]() |
1718520de9 |
13 changed files with 74 additions and 40 deletions
12
README.md
12
README.md
|
@ -8,16 +8,18 @@ A frontend for large language models like [🐨 Koala](https://bair.berkeley.edu
|
||||||
|
|
||||||
- Python 3.10
|
- Python 3.10
|
||||||
- The pip packages listed in `requirements.txt`
|
- The pip packages listed in `requirements.txt`
|
||||||
- A Koala model in the ggml format (should be quantized)
|
- An AI model in the ggml format (should be quantized)
|
||||||
|
|
||||||
The 7B-Model of Koala, `q4_0`-quantized, requires approx. 5 GB of RAM.
|
For memory and disk requirements for the different models, see [llama.cpp - Memory/Disk Requirements](https://github.com/ggerganov/llama.cpp#memorydisk-requirements)
|
||||||
|
|
||||||
## Supported Models
|
## Supported Models
|
||||||
|
|
||||||
- [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
|
- [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
|
||||||
- [🦙 Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/)
|
- [🦙 Vicuna v.0](https://lmsys.org/blog/2023-03-30-vicuna/)
|
||||||
|
- [🦙 Vicuna v.1.1](https://lmsys.org/blog/2023-03-30-vicuna/)
|
||||||
|
- [🦁 Manticore Chat](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg)
|
||||||
|
|
||||||
see `./profiles/`
|
(see `./profiles/`)
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
@ -42,7 +44,7 @@ python3 api-server.py [-h] -m MODEL [--host HOST] [--port PORT]
|
||||||
|
|
||||||
The following command-line options are available:
|
The following command-line options are available:
|
||||||
|
|
||||||
* `--profile`: Path to the profile file for the model. Defaults to `./profiles/koala.json`.
|
* `--profile`: Path to the profile file for the model.
|
||||||
* `--host`: Specifies the IP address or hostname to listen on. Defaults to `localhost`.
|
* `--host`: Specifies the IP address or hostname to listen on. Defaults to `localhost`.
|
||||||
* `--port`: Specifies the port number to listen on. Defaults to `8080`.
|
* `--port`: Specifies the port number to listen on. Defaults to `8080`.
|
||||||
* `--api`: Specifies the URL of the API server. Defaults to `http://localhost:7331`.
|
* `--api`: Specifies the URL of the API server. Defaults to `http://localhost:7331`.
|
||||||
|
|
|
@ -4,6 +4,8 @@
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from os import environ
|
from os import environ
|
||||||
|
|
||||||
|
from llama_cpp.server.app import create_app
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
@ -13,10 +15,7 @@ if __name__ == "__main__":
|
||||||
ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost")
|
ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost")
|
||||||
ap.add_argument("--port", help="Port to listen on (default: 7331)", type=int, default=7331)
|
ap.add_argument("--port", help="Port to listen on (default: 7331)", type=int, default=7331)
|
||||||
args = ap.parse_args()
|
args = ap.parse_args()
|
||||||
# Set environment variable before importing api server
|
|
||||||
environ["MODEL"] = args.model
|
environ["MODEL"] = args.model
|
||||||
# Import api server
|
|
||||||
from llama_cpp.server.app import create_app
|
|
||||||
# Run
|
# Run
|
||||||
app = create_app()
|
app = create_app()
|
||||||
uvicorn.run(app, host=args.host, port=args.port)
|
uvicorn.run(app, host=args.host, port=args.port)
|
||||||
|
|
|
@ -9,10 +9,9 @@ import uvicorn
|
||||||
from frontend.app import app
|
from frontend.app import app
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
koala_profile_path = Path(__file__).parent / "profiles" / "koala.json"
|
|
||||||
# CLI
|
# CLI
|
||||||
ap = ArgumentParser()
|
ap = ArgumentParser()
|
||||||
ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model (default: ./profiles/koala.json)", type=Path, default=koala_profile_path)
|
ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model", type=Path, required=True)
|
||||||
ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost")
|
ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost")
|
||||||
ap.add_argument("--port", help="Port to listen on (default: 8080)", type=int, default=8080)
|
ap.add_argument("--port", help="Port to listen on (default: 8080)", type=int, default=8080)
|
||||||
ap.add_argument("--api", help="URL of the API Server (default: 'http://localhost:7331')", type=str, default="http://localhost:7331")
|
ap.add_argument("--api", help="URL of the API Server (default: 'http://localhost:7331')", type=str, default="http://localhost:7331")
|
||||||
|
@ -34,6 +33,7 @@ if __name__ == "__main__":
|
||||||
"conversation_prefix": profile["conversation_prefix"],
|
"conversation_prefix": profile["conversation_prefix"],
|
||||||
"user_keyword": profile["user_keyword"],
|
"user_keyword": profile["user_keyword"],
|
||||||
"assistant_keyword": profile["assistant_keyword"],
|
"assistant_keyword": profile["assistant_keyword"],
|
||||||
|
"separator": profile["separator"],
|
||||||
"stop_sequences": profile["stop_sequences"]
|
"stop_sequences": profile["stop_sequences"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -111,10 +111,10 @@ fetch("/config")
|
||||||
function addMessage(message, role) {
|
function addMessage(message, role) {
|
||||||
if (role == Roles.USER) {
|
if (role == Roles.USER) {
|
||||||
conversation.push(
|
conversation.push(
|
||||||
" " + frontend_config.profile.user_keyword + " "
|
frontend_config.profile.user_keyword + " "
|
||||||
+ message + " " + frontend_config.profile.assistant_keyword);
|
+ message + frontend_config.profile.separator + frontend_config.profile.assistant_keyword);
|
||||||
}
|
}
|
||||||
else { conversation.push(message); }
|
else { conversation.push(message + frontend_config.profile.separator); }
|
||||||
// UI
|
// UI
|
||||||
let messageRoleElem = document.createElement("div");
|
let messageRoleElem = document.createElement("div");
|
||||||
messageRoleElem.classList.add("message-type");
|
messageRoleElem.classList.add("message-type");
|
||||||
|
@ -181,7 +181,7 @@ fetch("/config")
|
||||||
let prompt = conversation.join("");
|
let prompt = conversation.join("");
|
||||||
let settings = getSettings();
|
let settings = getSettings();
|
||||||
apiCompletion(prompt, settings).then(r => {
|
apiCompletion(prompt, settings).then(r => {
|
||||||
addMessage(r, Roles.ASSISTANT);
|
addMessage(r.trim(), Roles.ASSISTANT);
|
||||||
enableInput();
|
enableInput();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
--icon-button-fill: #ffffff;
|
--icon-button-fill: #ffffff;
|
||||||
--send-icon-button-fill: #29c76d;
|
--send-icon-button-fill: #29c76d;
|
||||||
--color: #fafafa;
|
--color: #fafafa;
|
||||||
|
--color2: #bbbbbb;
|
||||||
--border-radius: .5rem;
|
--border-radius: .5rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,19 +70,38 @@ input[type="number"] {
|
||||||
}
|
}
|
||||||
|
|
||||||
.messages {
|
.messages {
|
||||||
gap: 1.1rem;
|
gap: 1rem;
|
||||||
margin-bottom: 1rem;
|
margin-bottom: 1rem;
|
||||||
overflow-y: scroll;
|
overflow-y: scroll;
|
||||||
max-height: 89vh;
|
max-height: 89vh;
|
||||||
|
align-items: center;
|
||||||
|
flex-grow: 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message {
|
.message {
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: row;
|
flex-direction: column;
|
||||||
gap: .5rem;
|
flex-wrap: wrap;
|
||||||
padding: .5rem;
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-type {
|
||||||
|
color: var(--color2);
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-text {
|
||||||
|
white-space: pre-wrap;
|
||||||
|
padding: .5rem .8rem;
|
||||||
border-radius: var(--border-radius);
|
border-radius: var(--border-radius);
|
||||||
max-width: fit-content;
|
}
|
||||||
|
|
||||||
|
.message-bg-assistant > .message-text {
|
||||||
|
background: var(--background2);
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-bg-user > .message-text {
|
||||||
|
background: var(--background3);
|
||||||
}
|
}
|
||||||
|
|
||||||
button {
|
button {
|
||||||
|
@ -114,19 +134,6 @@ button:hover {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message-bg-assistant {
|
|
||||||
background: var(--background2);
|
|
||||||
}
|
|
||||||
|
|
||||||
.message-bg-user {
|
|
||||||
background: var(--background3);
|
|
||||||
}
|
|
||||||
|
|
||||||
.message-type {
|
|
||||||
min-width: 3.5rem;
|
|
||||||
padding-left: .1rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.input-container {
|
.input-container {
|
||||||
margin-top: auto;
|
margin-top: auto;
|
||||||
flex-direction: row;
|
flex-direction: row;
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 117 KiB After Width: | Height: | Size: 129 KiB |
8
profiles/empty.json
Normal file
8
profiles/empty.json
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"name": "None",
|
||||||
|
"conversation_prefix": "",
|
||||||
|
"user_keyword": "",
|
||||||
|
"assistant_keyword": "",
|
||||||
|
"separator": "",
|
||||||
|
"stop_sequences": []
|
||||||
|
}
|
|
@ -3,5 +3,6 @@
|
||||||
"conversation_prefix": "BEGINNING OF CONVERSATION: ",
|
"conversation_prefix": "BEGINNING OF CONVERSATION: ",
|
||||||
"user_keyword": "USER:",
|
"user_keyword": "USER:",
|
||||||
"assistant_keyword": "GPT:",
|
"assistant_keyword": "GPT:",
|
||||||
|
"separator": " ",
|
||||||
"stop_sequences": ["</s>"]
|
"stop_sequences": ["</s>"]
|
||||||
}
|
}
|
8
profiles/manticore-chat.json
Normal file
8
profiles/manticore-chat.json
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"name": "Manticore",
|
||||||
|
"conversation_prefix": "",
|
||||||
|
"user_keyword": "USER:",
|
||||||
|
"assistant_keyword": "ASSISTANT:",
|
||||||
|
"separator": "\n",
|
||||||
|
"stop_sequences": ["</s>", "<unk>", "### USER:", "USER:"]
|
||||||
|
}
|
8
profiles/vicuna-v0.json
Normal file
8
profiles/vicuna-v0.json
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"name": "Vicuna v0",
|
||||||
|
"conversation_prefix": "A chat between a curious human and a helpful AI assistant.\n\n",
|
||||||
|
"user_keyword": "### Human:",
|
||||||
|
"assistant_keyword": "### Assistant:",
|
||||||
|
"separator": "\n",
|
||||||
|
"stop_sequences": ["### Human:"]
|
||||||
|
}
|
8
profiles/vicuna-v1.1.json
Normal file
8
profiles/vicuna-v1.1.json
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
{
|
||||||
|
"name": "Vicuna v1.1",
|
||||||
|
"conversation_prefix": "A chat between a curious user and a helpful AI assistant.\n\n",
|
||||||
|
"user_keyword": "USER:",
|
||||||
|
"assistant_keyword": "ASSISTANT:",
|
||||||
|
"separator": "\n",
|
||||||
|
"stop_sequences": ["</s>"]
|
||||||
|
}
|
|
@ -1,7 +0,0 @@
|
||||||
{
|
|
||||||
"name": "Vicuna",
|
|
||||||
"conversation_prefix": "A chat between a curious user and a helpful AI assistant. ",
|
|
||||||
"user_keyword": "### Human:",
|
|
||||||
"assistant_keyword": "### Assistant:",
|
|
||||||
"stop_sequences": ["### Human:"]
|
|
||||||
}
|
|
|
@ -1,3 +1,3 @@
|
||||||
llama-cpp-python[server]==0.1.50
|
llama-cpp-python[server]==0.1.56
|
||||||
uvicorn==0.22.0
|
uvicorn==0.22.0
|
||||||
sanic==23.3.0
|
sanic==23.3.0
|
Reference in a new issue