diff --git a/README.md b/README.md index 50f997d..12f40c6 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Eucalyptus Chat -A frontend for [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) running on CPU with [llama.cpp](https://github.com/ggerganov/llama.cpp), using the API server library provided by [llama-cpp-python](https://github.com/abetlen/llama-cpp-python). +A frontend for large language models like [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) or [🦙 Vicuna](https://lmsys.org/blog/2023-03-30-vicuna/) running on CPU with [llama.cpp](https://github.com/ggerganov/llama.cpp), using the API server library provided by [llama-cpp-python](https://github.com/abetlen/llama-cpp-python). ![](misc/screenshot.png) @@ -8,22 +8,33 @@ A frontend for [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) running - Python 3.10 - The pip packages listed in `requirements.txt` -- A Koala model in the ggml format (should be quantized) +- An AI model in the ggml format (should be quantized) -The 7B-Model, `q4_0`-quantized, requires approx. 5 GB of RAM. +For memory and disk requirements for the different models, see [llama.cpp - Memory/Disk Requirements](https://github.com/ggerganov/llama.cpp#memorydisk-requirements) + +## Supported Models + +- [🐨 Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) +- [🦙 Vicuna v.0](https://lmsys.org/blog/2023-03-30-vicuna/) +- [🦙 Vicuna v.1.1](https://lmsys.org/blog/2023-03-30-vicuna/) +- [🦁 Manticore Chat](https://huggingface.co/openaccess-ai-collective/manticore-13b-chat-pyg) + +(see `./profiles/`) ## Usage To use Eucalyptus locally, start both the API-Server (`api-server.py`) and the Frontend-Server (`frontend-server.py`). The default URL of the Frontend-Server is http://localhost:8080. +You have to choose the correct profile for the model you use. See [Supported Models](#supported-models) and [Frontend Server CLI Argument](#frontend-server-cli-arguments). + ### API Server CLI Arguments The following command-line arguments are available: * `-m` or `--model`: Specifies the path to the model file. This is required and must be provided. -* `--host`: Specifies the address to listen on. By default, it listens on localhost. -* `--port`: Specifies the port number to listen on. The default value is 7331. +* `--host`: Specifies the address to listen on. By default, it listens on `localhost`. +* `--port`: Specifies the port number to listen on. The default value is `7331`. ```bash python3 api-server.py [-h] -m MODEL [--host HOST] [--port PORT] @@ -33,12 +44,13 @@ python3 api-server.py [-h] -m MODEL [--host HOST] [--port PORT] The following command-line options are available: -* `--host`: Specifies the IP address or hostname to listen on. Defaults to "localhost". -* `--port`: Specifies the port number to listen on. Defaults to 8080. -* `--api`: Specifies the URL of the API server. Defaults to http://localhost:7331. +* `--profile`: Path to the profile file for the model. +* `--host`: Specifies the IP address or hostname to listen on. Defaults to `localhost`. +* `--port`: Specifies the port number to listen on. Defaults to `8080`. +* `--api`: Specifies the URL of the API server. Defaults to `http://localhost:7331`. ```bash -python3 frontend-server.py [-h] [--host HOST] [--port PORT] [--api API] +python3 frontend-server.py [-h] [--profile PROFILE] [--host HOST] [--port PORT] [--api API] ``` ## Third-Party Licenses diff --git a/api-server.py b/api-server.py index c836a5e..f9fab7d 100644 --- a/api-server.py +++ b/api-server.py @@ -4,6 +4,8 @@ from argparse import ArgumentParser from os import environ +from llama_cpp.server.app import create_app + import uvicorn if __name__ == "__main__": @@ -13,10 +15,7 @@ if __name__ == "__main__": ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost") ap.add_argument("--port", help="Port to listen on (default: 7331)", type=int, default=7331) args = ap.parse_args() - # Set environment variable before importing api server environ["MODEL"] = args.model - # Import api server - from llama_cpp.server.app import create_app # Run app = create_app() uvicorn.run(app, host=args.host, port=args.port) diff --git a/frontend-server.py b/frontend-server.py index 2e1e7c3..383e110 100644 --- a/frontend-server.py +++ b/frontend-server.py @@ -2,6 +2,8 @@ # Copyright (c) 2023 Julian Müller (ChaoticByte) from argparse import ArgumentParser +from json import load +from pathlib import Path import uvicorn from frontend.app import app @@ -9,11 +11,31 @@ from frontend.app import app if __name__ == "__main__": # CLI ap = ArgumentParser() + ap.add_argument("--profile", help="Path to a profile file that includes settings for a specific model", type=Path, required=True) ap.add_argument("--host", help="Address to listen on (default: localhost)", type=str, default="localhost") ap.add_argument("--port", help="Port to listen on (default: 8080)", type=int, default=8080) ap.add_argument("--api", help="URL of the API Server (default: 'http://localhost:7331')", type=str, default="http://localhost:7331") args = ap.parse_args() + # Read profile + with args.profile.open("r") as pf: + profile = load(pf) + # Check profile + assert "name" in profile + assert "conversation_prefix" in profile + assert "user_keyword" in profile + assert "assistant_keyword" in profile + assert "stop_sequences" in profile # Pass frontend config to the app - app.config.frontend_config = {"api_url": args.api.rstrip("/")} + app.config.frontend_config = { + "api_url": args.api.rstrip("/"), + "profile": { + "name": profile["name"], + "conversation_prefix": profile["conversation_prefix"], + "user_keyword": profile["user_keyword"], + "assistant_keyword": profile["assistant_keyword"], + "separator": profile["separator"], + "stop_sequences": profile["stop_sequences"] + } + } # Run uvicorn.run(app, host=args.host, port=args.port) diff --git a/frontend/static/index.html b/frontend/static/index.html index 34eae86..23d3b6b 100644 --- a/frontend/static/index.html +++ b/frontend/static/index.html @@ -18,45 +18,57 @@ -
-
Settings
-
-
-
max_tokens
-
+