Autarch/.config/orangepi5plus_cpu.conf

# AUTARCH LLM Configuration Template
# Hardware: Orange Pi 5 Plus (RK3588 SoC, 8-core ARM, 16GB RAM)
# Optimized for: CPU-only inference on ARM64
#
# This configuration is optimized for the Orange Pi 5 Plus running
# CPU-only inference. The RK3588 has 4x Cortex-A76 + 4x Cortex-A55 cores.
# Best with quantized GGUF models (Q4_K_M or Q5_K_M).

[llama]
# GGUF Model Settings (llama.cpp)
# Recommended: Use Q4_K_M or Q5_K_M quantized models
model_path =
n_ctx = 2048
n_threads = 4
n_gpu_layers = 0
temperature = 0.7
top_p = 0.9
top_k = 40
repeat_penalty = 1.1
max_tokens = 1024
seed = -1

[transformers]
# SafeTensors Model Settings (HuggingFace)
# Note: CPU inference is slow with transformers, GGUF recommended
model_path =
device = cpu
torch_dtype = float32
load_in_8bit = false
load_in_4bit = false
trust_remote_code = false
max_tokens = 1024
temperature = 0.7
top_p = 0.9
top_k = 40
repetition_penalty = 1.1

# Notes:
# - n_threads = 4 uses only the fast A76 cores (better perf than all 8)
# - n_ctx = 2048 balances memory usage and capability
# - n_gpu_layers = 0 for pure CPU inference
# - Strongly recommend GGUF Q4_K_M models for best speed
# - 7B Q4 models use ~4GB RAM, leaving room for system
# - max_tokens = 1024 keeps generation times reasonable
# - For transformers: CPU with float32 is slow but works
# - Avoid 13B+ models unless heavily quantized
Initial public release — AUTARCH v1.0.0 Full security platform with web dashboard, 16 Flask blueprints, 26 modules, autonomous AI agent, WebUSB hardware support, and Archon Android companion app. Includes Hash Toolkit, debug console, anti-stalkerware shield, Metasploit/RouterSploit integration, WireGuard VPN, OSINT reconnaissance, and multi-backend LLM support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-01 03:57:32 -08:00			`# AUTARCH LLM Configuration Template`
			`# Hardware: Orange Pi 5 Plus (RK3588 SoC, 8-core ARM, 16GB RAM)`
			`# Optimized for: CPU-only inference on ARM64`
			`#`
			`# This configuration is optimized for the Orange Pi 5 Plus running`
			`# CPU-only inference. The RK3588 has 4x Cortex-A76 + 4x Cortex-A55 cores.`
			`# Best with quantized GGUF models (Q4_K_M or Q5_K_M).`

			`[llama]`
			`# GGUF Model Settings (llama.cpp)`
			`# Recommended: Use Q4_K_M or Q5_K_M quantized models`
			`model_path =`
			`n_ctx = 2048`
			`n_threads = 4`
			`n_gpu_layers = 0`
			`temperature = 0.7`
			`top_p = 0.9`
			`top_k = 40`
			`repeat_penalty = 1.1`
			`max_tokens = 1024`
			`seed = -1`

			`[transformers]`
			`# SafeTensors Model Settings (HuggingFace)`
			`# Note: CPU inference is slow with transformers, GGUF recommended`
			`model_path =`
			`device = cpu`
			`torch_dtype = float32`
			`load_in_8bit = false`
			`load_in_4bit = false`
			`trust_remote_code = false`
			`max_tokens = 1024`
			`temperature = 0.7`
			`top_p = 0.9`
			`top_k = 40`
			`repetition_penalty = 1.1`

			`# Notes:`
			`# - n_threads = 4 uses only the fast A76 cores (better perf than all 8)`
			`# - n_ctx = 2048 balances memory usage and capability`
			`# - n_gpu_layers = 0 for pure CPU inference`
			`# - Strongly recommend GGUF Q4_K_M models for best speed`
			`# - 7B Q4 models use ~4GB RAM, leaving room for system`
			`# - max_tokens = 1024 keeps generation times reasonable`
			`# - For transformers: CPU with float32 is slow but works`
			`# - Avoid 13B+ models unless heavily quantized`