# AUTARCH LLM Configuration Template
# Hardware: Orange Pi 5 Plus with ARM Mali-G610 MP4 GPU
# Status: EXPERIMENTAL - Mali GPU support for LLMs is limited
#
# WARNING: This configuration is experimental!
# The Mali-G610 GPU has limited LLM support. Most frameworks
# fall back to CPU. This config attempts to leverage what GPU
# acceleration is available.

[llama]
# GGUF Model Settings (llama.cpp)
# Note: llama.cpp OpenCL backend may provide some acceleration
# Build with: CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
# Requires: libclblast-dev, opencl-headers, ocl-icd-opencl-dev
model_path =
n_ctx = 2048
n_threads = 4
n_gpu_layers = 8
temperature = 0.7
top_p = 0.9
top_k = 40
repeat_penalty = 1.1
max_tokens = 1024
seed = -1

[transformers]
# SafeTensors Model Settings (HuggingFace)
# Note: PyTorch has experimental Vulkan backend for mobile GPUs
# This is highly experimental and may not work
model_path =
device = cpu
torch_dtype = float32
load_in_8bit = false
load_in_4bit = true
trust_remote_code = false
max_tokens = 1024
temperature = 0.7
top_p = 0.9
top_k = 40
repetition_penalty = 1.1

# EXPERIMENTAL NOTES:
#
# Mali-G610 GPU Support Status:
# - OpenCL: Partial support via CLBlast, may accelerate some layers
# - Vulkan: PyTorch vulkan backend is experimental
# - Direct Mali: No native support in major LLM frameworks
#
# To enable OpenCL acceleration for llama.cpp:
# 1. Install dependencies:
#    sudo apt install libclblast-dev opencl-headers ocl-icd-opencl-dev
# 2. Install Mali OpenCL driver (if available for your distro)
# 3. Rebuild llama-cpp-python with CLBlast:
#    CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python --force-reinstall
#
# n_gpu_layers = 8: Offloads only some layers (conservative)
# - Increase if stable, decrease if crashes
# - Set to 0 if OpenCL not working
#
# For transformers:
# - load_in_4bit = true reduces memory pressure
# - CPU inference is the reliable fallback
#
# Performance Expectations:
# - Best case: 20-30% speedup over pure CPU
# - Likely case: Similar to CPU or unstable
# - Use orangepi5plus_cpu.conf for stable operation