Autarch/.config/orangepi5plus_mali.conf

# AUTARCH LLM Configuration Template
# Hardware: Orange Pi 5 Plus with ARM Mali-G610 MP4 GPU
# Status: EXPERIMENTAL - Mali GPU support for LLMs is limited
#
# WARNING: This configuration is experimental!
# The Mali-G610 GPU has limited LLM support. Most frameworks
# fall back to CPU. This config attempts to leverage what GPU
# acceleration is available.

[llama]
# GGUF Model Settings (llama.cpp)
# Note: llama.cpp OpenCL backend may provide some acceleration
# Build with: CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
# Requires: libclblast-dev, opencl-headers, ocl-icd-opencl-dev
model_path =
n_ctx = 2048
n_threads = 4
n_gpu_layers = 8
temperature = 0.7
top_p = 0.9
top_k = 40
repeat_penalty = 1.1
max_tokens = 1024
seed = -1

[transformers]
# SafeTensors Model Settings (HuggingFace)
# Note: PyTorch has experimental Vulkan backend for mobile GPUs
# This is highly experimental and may not work
model_path =
device = cpu
torch_dtype = float32
load_in_8bit = false
load_in_4bit = true
trust_remote_code = false
max_tokens = 1024
temperature = 0.7
top_p = 0.9
top_k = 40
repetition_penalty = 1.1

# EXPERIMENTAL NOTES:
#
# Mali-G610 GPU Support Status:
# - OpenCL: Partial support via CLBlast, may accelerate some layers
# - Vulkan: PyTorch vulkan backend is experimental
# - Direct Mali: No native support in major LLM frameworks
#
# To enable OpenCL acceleration for llama.cpp:
# 1. Install dependencies:
#    sudo apt install libclblast-dev opencl-headers ocl-icd-opencl-dev
# 2. Install Mali OpenCL driver (if available for your distro)
# 3. Rebuild llama-cpp-python with CLBlast:
#    CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python --force-reinstall
#
# n_gpu_layers = 8: Offloads only some layers (conservative)
# - Increase if stable, decrease if crashes
# - Set to 0 if OpenCL not working
#
# For transformers:
# - load_in_4bit = true reduces memory pressure
# - CPU inference is the reliable fallback
#
# Performance Expectations:
# - Best case: 20-30% speedup over pure CPU
# - Likely case: Similar to CPU or unstable
# - Use orangepi5plus_cpu.conf for stable operation
Initial public release — AUTARCH v1.0.0 Full security platform with web dashboard, 16 Flask blueprints, 26 modules, autonomous AI agent, WebUSB hardware support, and Archon Android companion app. Includes Hash Toolkit, debug console, anti-stalkerware shield, Metasploit/RouterSploit integration, WireGuard VPN, OSINT reconnaissance, and multi-backend LLM support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-01 03:57:32 -08:00			`# AUTARCH LLM Configuration Template`
			`# Hardware: Orange Pi 5 Plus with ARM Mali-G610 MP4 GPU`
			`# Status: EXPERIMENTAL - Mali GPU support for LLMs is limited`
			`#`
			`# WARNING: This configuration is experimental!`
			`# The Mali-G610 GPU has limited LLM support. Most frameworks`
			`# fall back to CPU. This config attempts to leverage what GPU`
			`# acceleration is available.`

			`[llama]`
			`# GGUF Model Settings (llama.cpp)`
			`# Note: llama.cpp OpenCL backend may provide some acceleration`
			`# Build with: CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python`
			`# Requires: libclblast-dev, opencl-headers, ocl-icd-opencl-dev`
			`model_path =`
			`n_ctx = 2048`
			`n_threads = 4`
			`n_gpu_layers = 8`
			`temperature = 0.7`
			`top_p = 0.9`
			`top_k = 40`
			`repeat_penalty = 1.1`
			`max_tokens = 1024`
			`seed = -1`

			`[transformers]`
			`# SafeTensors Model Settings (HuggingFace)`
			`# Note: PyTorch has experimental Vulkan backend for mobile GPUs`
			`# This is highly experimental and may not work`
			`model_path =`
			`device = cpu`
			`torch_dtype = float32`
			`load_in_8bit = false`
			`load_in_4bit = true`
			`trust_remote_code = false`
			`max_tokens = 1024`
			`temperature = 0.7`
			`top_p = 0.9`
			`top_k = 40`
			`repetition_penalty = 1.1`

			`# EXPERIMENTAL NOTES:`
			`#`
			`# Mali-G610 GPU Support Status:`
			`# - OpenCL: Partial support via CLBlast, may accelerate some layers`
			`# - Vulkan: PyTorch vulkan backend is experimental`
			`# - Direct Mali: No native support in major LLM frameworks`
			`#`
			`# To enable OpenCL acceleration for llama.cpp:`
			`# 1. Install dependencies:`
			`# sudo apt install libclblast-dev opencl-headers ocl-icd-opencl-dev`
			`# 2. Install Mali OpenCL driver (if available for your distro)`
			`# 3. Rebuild llama-cpp-python with CLBlast:`
			`# CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python --force-reinstall`
			`#`
			`# n_gpu_layers = 8: Offloads only some layers (conservative)`
			`# - Increase if stable, decrease if crashes`
			`# - Set to 0 if OpenCL not working`
			`#`
			`# For transformers:`
			`# - load_in_4bit = true reduces memory pressure`
			`# - CPU inference is the reliable fallback`
			`#`
			`# Performance Expectations:`
			`# - Best case: 20-30% speedup over pure CPU`
			`# - Likely case: Similar to CPU or unstable`
			`# - Use orangepi5plus_cpu.conf for stable operation`