# AUTARCH LLM Configuration Template # Hardware: Orange Pi 5 Plus with ARM Mali-G610 MP4 GPU # Status: EXPERIMENTAL - Mali GPU support for LLMs is limited # # WARNING: This configuration is experimental! # The Mali-G610 GPU has limited LLM support. Most frameworks # fall back to CPU. This config attempts to leverage what GPU # acceleration is available. [llama] # GGUF Model Settings (llama.cpp) # Note: llama.cpp OpenCL backend may provide some acceleration # Build with: CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python # Requires: libclblast-dev, opencl-headers, ocl-icd-opencl-dev model_path = n_ctx = 2048 n_threads = 4 n_gpu_layers = 8 temperature = 0.7 top_p = 0.9 top_k = 40 repeat_penalty = 1.1 max_tokens = 1024 seed = -1 [transformers] # SafeTensors Model Settings (HuggingFace) # Note: PyTorch has experimental Vulkan backend for mobile GPUs # This is highly experimental and may not work model_path = device = cpu torch_dtype = float32 load_in_8bit = false load_in_4bit = true trust_remote_code = false max_tokens = 1024 temperature = 0.7 top_p = 0.9 top_k = 40 repetition_penalty = 1.1 # EXPERIMENTAL NOTES: # # Mali-G610 GPU Support Status: # - OpenCL: Partial support via CLBlast, may accelerate some layers # - Vulkan: PyTorch vulkan backend is experimental # - Direct Mali: No native support in major LLM frameworks # # To enable OpenCL acceleration for llama.cpp: # 1. Install dependencies: # sudo apt install libclblast-dev opencl-headers ocl-icd-opencl-dev # 2. Install Mali OpenCL driver (if available for your distro) # 3. Rebuild llama-cpp-python with CLBlast: # CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python --force-reinstall # # n_gpu_layers = 8: Offloads only some layers (conservative) # - Increase if stable, decrease if crashes # - Set to 0 if OpenCL not working # # For transformers: # - load_in_4bit = true reduces memory pressure # - CPU inference is the reliable fallback # # Performance Expectations: # - Best case: 20-30% speedup over pure CPU # - Likely case: Similar to CPU or unstable # - Use orangepi5plus_cpu.conf for stable operation