Autarch/.config/nvidia_4070_mobile.conf

# AUTARCH LLM Configuration Template
# Hardware: NVIDIA GeForce RTX 4070 Mobile (8GB VRAM)
# Optimized for: GPU inference with good VRAM management
#
# This configuration balances performance and memory usage for mobile RTX 4070.
# The 4070 Mobile has 8GB VRAM, suitable for 7B models at full precision
# or 13B models with quantization.

[llama]
# GGUF Model Settings (llama.cpp)
model_path =
n_ctx = 8192
n_threads = 8
n_gpu_layers = -1
temperature = 0.7
top_p = 0.9
top_k = 40
repeat_penalty = 1.1
max_tokens = 4096
seed = -1

[transformers]
# SafeTensors Model Settings (HuggingFace)
model_path =
device = cuda
torch_dtype = float16
load_in_8bit = false
load_in_4bit = false
trust_remote_code = false
max_tokens = 4096
temperature = 0.7
top_p = 0.9
top_k = 40
repetition_penalty = 1.1

# Notes:
# - n_gpu_layers = -1 offloads all layers to GPU
# - For 13B+ models, enable load_in_4bit = true
# - float16 is optimal for RTX 4070
# - n_ctx = 8192 uses ~2GB VRAM overhead
# - Reduce n_ctx to 4096 if running out of VRAM
Initial public release — AUTARCH v1.0.0 Full security platform with web dashboard, 16 Flask blueprints, 26 modules, autonomous AI agent, WebUSB hardware support, and Archon Android companion app. Includes Hash Toolkit, debug console, anti-stalkerware shield, Metasploit/RouterSploit integration, WireGuard VPN, OSINT reconnaissance, and multi-backend LLM support. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-01 03:57:32 -08:00			`# AUTARCH LLM Configuration Template`
			`# Hardware: NVIDIA GeForce RTX 4070 Mobile (8GB VRAM)`
			`# Optimized for: GPU inference with good VRAM management`
			`#`
			`# This configuration balances performance and memory usage for mobile RTX 4070.`
			`# The 4070 Mobile has 8GB VRAM, suitable for 7B models at full precision`
			`# or 13B models with quantization.`

			`[llama]`
			`# GGUF Model Settings (llama.cpp)`
			`model_path =`
			`n_ctx = 8192`
			`n_threads = 8`
			`n_gpu_layers = -1`
			`temperature = 0.7`
			`top_p = 0.9`
			`top_k = 40`
			`repeat_penalty = 1.1`
			`max_tokens = 4096`
			`seed = -1`

			`[transformers]`
			`# SafeTensors Model Settings (HuggingFace)`
			`model_path =`
			`device = cuda`
			`torch_dtype = float16`
			`load_in_8bit = false`
			`load_in_4bit = false`
			`trust_remote_code = false`
			`max_tokens = 4096`
			`temperature = 0.7`
			`top_p = 0.9`
			`top_k = 40`
			`repetition_penalty = 1.1`

			`# Notes:`
			`# - n_gpu_layers = -1 offloads all layers to GPU`
			`# - For 13B+ models, enable load_in_4bit = true`
			`# - float16 is optimal for RTX 4070`
			`# - n_ctx = 8192 uses ~2GB VRAM overhead`
			`# - Reduce n_ctx to 4096 if running out of VRAM`