Full security platform with web dashboard, 16 Flask blueprints, 26 modules, autonomous AI agent, WebUSB hardware support, and Archon Android companion app. Includes Hash Toolkit, debug console, anti-stalkerware shield, Metasploit/RouterSploit integration, WireGuard VPN, OSINT reconnaissance, and multi-backend LLM support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
47 lines
1.3 KiB
Plaintext
47 lines
1.3 KiB
Plaintext
# AUTARCH LLM Configuration Template
|
|
# Hardware: Orange Pi 5 Plus (RK3588 SoC, 8-core ARM, 16GB RAM)
|
|
# Optimized for: CPU-only inference on ARM64
|
|
#
|
|
# This configuration is optimized for the Orange Pi 5 Plus running
|
|
# CPU-only inference. The RK3588 has 4x Cortex-A76 + 4x Cortex-A55 cores.
|
|
# Best with quantized GGUF models (Q4_K_M or Q5_K_M).
|
|
|
|
[llama]
|
|
# GGUF Model Settings (llama.cpp)
|
|
# Recommended: Use Q4_K_M or Q5_K_M quantized models
|
|
model_path =
|
|
n_ctx = 2048
|
|
n_threads = 4
|
|
n_gpu_layers = 0
|
|
temperature = 0.7
|
|
top_p = 0.9
|
|
top_k = 40
|
|
repeat_penalty = 1.1
|
|
max_tokens = 1024
|
|
seed = -1
|
|
|
|
[transformers]
|
|
# SafeTensors Model Settings (HuggingFace)
|
|
# Note: CPU inference is slow with transformers, GGUF recommended
|
|
model_path =
|
|
device = cpu
|
|
torch_dtype = float32
|
|
load_in_8bit = false
|
|
load_in_4bit = false
|
|
trust_remote_code = false
|
|
max_tokens = 1024
|
|
temperature = 0.7
|
|
top_p = 0.9
|
|
top_k = 40
|
|
repetition_penalty = 1.1
|
|
|
|
# Notes:
|
|
# - n_threads = 4 uses only the fast A76 cores (better perf than all 8)
|
|
# - n_ctx = 2048 balances memory usage and capability
|
|
# - n_gpu_layers = 0 for pure CPU inference
|
|
# - Strongly recommend GGUF Q4_K_M models for best speed
|
|
# - 7B Q4 models use ~4GB RAM, leaving room for system
|
|
# - max_tokens = 1024 keeps generation times reasonable
|
|
# - For transformers: CPU with float32 is slow but works
|
|
# - Avoid 13B+ models unless heavily quantized
|