diff --git a/README.md b/README.md index 12f40c6..1db57b3 100644 --- a/README.md +++ b/README.md @@ -8,18 +8,16 @@ A frontend for large language models like [🐨 Koala](https://bair.berkeley.edu - Python 3.10 - The pip packages listed in `requirements.txt` -- An AI model in the ggml format (should be quantized) +- A Koala model in the ggml format (should be quantized) -For memory and disk requirements for the different models, see [llama.cpp - Memory/Disk Requirements](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) +The 7B-Model of Koala, `q4_0`-quantized, requires approx. 5 GB of RAM. ## Supported Models - [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) -- [🦙 Vicuna v.0](https://lmsys.org/blog/2023-03-30-vicuna/) -- [🦙 Vicuna v.1.1](https://lmsys.org/blog/2023-03-30-vicuna/) -- [🦁 Manticore Chat](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg) +- [🦙 Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/) -(see `./profiles/`) +see `./profiles/` ## Usage @@ -44,7 +42,7 @@ python3 api-server.py [-h] -m MODEL [--host HOST] [--port PORT] The following command-line options are available: -* `--profile`: Path to the profile file for the model. +* `--profile`: Path to the profile file for the model. Defaults to `./profiles/koala.json`. * `--host`: Specifies the IP address or hostname to listen on. Defaults to `localhost`. * `--port`: Specifies the port number to listen on. Defaults to `8080`. * `--api`: Specifies the URL of the API server. Defaults to `http://localhost:7331`. diff --git a/api-server.py b/api-server.py index f9fab7d..c836a5e 100644 --- a/api-server.py +++ b/api-server.py @@ -4,8 +4,6 @@ from argparse import ArgumentParser from os import environ -from llama_cpp.server.app import create_app - import uvicorn if __name__ == "__main__": @@ -15,7 +13,10 @@ if __name__ == "__main__": ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost") ap.add_argument("--port", help="Port to listen on (default: 7331)", type=int, default=7331) args = ap.parse_args() + # Set environment variable before importing api server environ["MODEL"] = args.model + # Import api server + from llama_cpp.server.app import create_app # Run app = create_app() uvicorn.run(app, host=args.host, port=args.port) diff --git a/frontend-server.py b/frontend-server.py index 383e110..e631058 100644 --- a/frontend-server.py +++ b/frontend-server.py @@ -9,9 +9,10 @@ import uvicorn from frontend.app import app if __name__ == "__main__": + koala_profile_path = Path(__file__).parent / "profiles" / "koala.json" # CLI ap = ArgumentParser() - ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model", type=Path, required=True) + ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model (default: ./profiles/koala.json)", type=Path, default=koala_profile_path) ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost") ap.add_argument("--port", help="Port to listen on (default: 8080)", type=int, default=8080) ap.add_argument("--api", help="URL of the API Server (default: 'http://localhost:7331')", type=str, default="http://localhost:7331") @@ -33,7 +34,6 @@ if __name__ == "__main__": "conversation_prefix": profile["conversation_prefix"], "user_keyword": profile["user_keyword"], "assistant_keyword": profile["assistant_keyword"], - "separator": profile["separator"], "stop_sequences": profile["stop_sequences"] } } diff --git a/frontend/static/index.html b/frontend/static/index.html index 23d3b6b..b13e19a 100644 --- a/frontend/static/index.html +++ b/frontend/static/index.html @@ -18,57 +18,49 @@ -