47 lines
1.3 KiB
Plaintext
47 lines
1.3 KiB
Plaintext
|
|
# AUTARCH LLM Configuration Template
|
||
|
|
# Hardware: AMD Radeon RX 6700 XT (12GB VRAM)
|
||
|
|
# Optimized for: GPU inference with ROCm/HIP support
|
||
|
|
#
|
||
|
|
# This configuration is optimized for AMD GPUs using ROCm.
|
||
|
|
# The RX 6700 XT has 12GB VRAM, excellent for 7B-13B models.
|
||
|
|
# Requires ROCm drivers and PyTorch with ROCm support.
|
||
|
|
|
||
|
|
[llama]
|
||
|
|
# GGUF Model Settings (llama.cpp)
|
||
|
|
# Note: llama.cpp requires HIP/ROCm build for AMD GPU support
|
||
|
|
# Build with: CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
|
||
|
|
model_path =
|
||
|
|
n_ctx = 8192
|
||
|
|
n_threads = 8
|
||
|
|
n_gpu_layers = -1
|
||
|
|
temperature = 0.7
|
||
|
|
top_p = 0.9
|
||
|
|
top_k = 40
|
||
|
|
repeat_penalty = 1.1
|
||
|
|
max_tokens = 4096
|
||
|
|
seed = -1
|
||
|
|
|
||
|
|
[transformers]
|
||
|
|
# SafeTensors Model Settings (HuggingFace)
|
||
|
|
# ROCm uses 'cuda' device identifier in PyTorch
|
||
|
|
model_path =
|
||
|
|
device = cuda
|
||
|
|
torch_dtype = float16
|
||
|
|
load_in_8bit = false
|
||
|
|
load_in_4bit = false
|
||
|
|
trust_remote_code = false
|
||
|
|
max_tokens = 4096
|
||
|
|
temperature = 0.7
|
||
|
|
top_p = 0.9
|
||
|
|
top_k = 40
|
||
|
|
repetition_penalty = 1.1
|
||
|
|
|
||
|
|
# Notes:
|
||
|
|
# - 12GB VRAM allows running 13B models at float16
|
||
|
|
# - For 33B+ models, enable load_in_4bit = true
|
||
|
|
# - ROCm support requires specific PyTorch version:
|
||
|
|
# pip install torch --index-url https://download.pytorch.org/whl/rocm5.6
|
||
|
|
# - llama.cpp needs HIP build for GPU acceleration
|
||
|
|
# - If GPU not detected, falls back to CPU (check ROCm installation)
|
||
|
|
# - n_ctx = 8192 works well with 12GB VRAM
|