# AUTARCH LLM Configuration Template # Hardware: AMD Radeon RX 6700 XT (12GB VRAM) # Optimized for: GPU inference with ROCm/HIP support # # This configuration is optimized for AMD GPUs using ROCm. # The RX 6700 XT has 12GB VRAM, excellent for 7B-13B models. # Requires ROCm drivers and PyTorch with ROCm support. [llama] # GGUF Model Settings (llama.cpp) # Note: llama.cpp requires HIP/ROCm build for AMD GPU support # Build with: CMAKE_ARGS="-DLLAMA_HIPBLAS=on" pip install llama-cpp-python model_path = n_ctx = 8192 n_threads = 8 n_gpu_layers = -1 temperature = 0.7 top_p = 0.9 top_k = 40 repeat_penalty = 1.1 max_tokens = 4096 seed = -1 [transformers] # SafeTensors Model Settings (HuggingFace) # ROCm uses 'cuda' device identifier in PyTorch model_path = device = cuda torch_dtype = float16 load_in_8bit = false load_in_4bit = false trust_remote_code = false max_tokens = 4096 temperature = 0.7 top_p = 0.9 top_k = 40 repetition_penalty = 1.1 # Notes: # - 12GB VRAM allows running 13B models at float16 # - For 33B+ models, enable load_in_4bit = true # - ROCm support requires specific PyTorch version: # pip install torch --index-url https://download.pytorch.org/whl/rocm5.6 # - llama.cpp needs HIP build for GPU acceleration # - If GPU not detected, falls back to CPU (check ROCm installation) # - n_ctx = 8192 works well with 12GB VRAM