Full security platform with web dashboard, 16 Flask blueprints, 26 modules, autonomous AI agent, WebUSB hardware support, and Archon Android companion app. Includes Hash Toolkit, debug console, anti-stalkerware shield, Metasploit/RouterSploit integration, WireGuard VPN, OSINT reconnaissance, and multi-backend LLM support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
42 lines
1021 B
Plaintext
42 lines
1021 B
Plaintext
# AUTARCH LLM Configuration Template
|
|
# Hardware: NVIDIA GeForce RTX 4070 Mobile (8GB VRAM)
|
|
# Optimized for: GPU inference with good VRAM management
|
|
#
|
|
# This configuration balances performance and memory usage for mobile RTX 4070.
|
|
# The 4070 Mobile has 8GB VRAM, suitable for 7B models at full precision
|
|
# or 13B models with quantization.
|
|
|
|
[llama]
|
|
# GGUF Model Settings (llama.cpp)
|
|
model_path =
|
|
n_ctx = 8192
|
|
n_threads = 8
|
|
n_gpu_layers = -1
|
|
temperature = 0.7
|
|
top_p = 0.9
|
|
top_k = 40
|
|
repeat_penalty = 1.1
|
|
max_tokens = 4096
|
|
seed = -1
|
|
|
|
[transformers]
|
|
# SafeTensors Model Settings (HuggingFace)
|
|
model_path =
|
|
device = cuda
|
|
torch_dtype = float16
|
|
load_in_8bit = false
|
|
load_in_4bit = false
|
|
trust_remote_code = false
|
|
max_tokens = 4096
|
|
temperature = 0.7
|
|
top_p = 0.9
|
|
top_k = 40
|
|
repetition_penalty = 1.1
|
|
|
|
# Notes:
|
|
# - n_gpu_layers = -1 offloads all layers to GPU
|
|
# - For 13B+ models, enable load_in_4bit = true
|
|
# - float16 is optimal for RTX 4070
|
|
# - n_ctx = 8192 uses ~2GB VRAM overhead
|
|
# - Reduce n_ctx to 4096 if running out of VRAM
|