# AUTARCH LLM Configuration Template
# Hardware: AMD Radeon RX 6700 XT (12GB VRAM)
# Optimized for: GPU inference with ROCm/HIP support
#
# This configuration is optimized for AMD GPUs using ROCm.
# The RX 6700 XT has 12GB VRAM, excellent for 7B-13B models.
# Requires ROCm drivers and PyTorch with ROCm support.

[llama]
# GGUF Model Settings (llama.cpp)
# Note: llama.cpp requires HIP/ROCm build for AMD GPU support
# Build with: CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python
model_path =
n_ctx = 8192
n_threads = 8
n_gpu_layers = -1
temperature = 0.7
top_p = 0.9
top_k = 40
repeat_penalty = 1.1
max_tokens = 4096
seed = -1

[transformers]
# SafeTensors Model Settings (HuggingFace)
# ROCm uses 'cuda' device identifier in PyTorch
model_path =
device = cuda
torch_dtype = float16
load_in_8bit = false
load_in_4bit = false
trust_remote_code = false
max_tokens = 4096
temperature = 0.7
top_p = 0.9
top_k = 40
repetition_penalty = 1.1

# Notes:
# - 12GB VRAM allows running 13B models at float16
# - For 33B+ models, enable load_in_4bit = true
# - ROCm support requires specific PyTorch version:
#   pip install torch --index-url https://download.pytorch.org/whl/rocm5.6
# - llama.cpp needs HIP build for GPU acceleration
# - If GPU not detected, falls back to CPU (check ROCm installation)
# - n_ctx = 8192 works well with 12GB VRAM