Autarch Will Control The Internet

2026-03-13 15:17:15 -07:00
commit 4d3570781e
401 changed files with 484494 additions and 0 deletions
--- a/.config/nvidia_4070_mobile.conf
+++ b/.config/nvidia_4070_mobile.conf
@@ -0,0 +1,41 @@
+# AUTARCH LLM Configuration Template
+# Hardware: NVIDIA GeForce RTX 4070 Mobile (8GB VRAM)
+# Optimized for: GPU inference with good VRAM management
+#
+# This configuration balances performance and memory usage for mobile RTX 4070.
+# The 4070 Mobile has 8GB VRAM, suitable for 7B models at full precision
+# or 13B models with quantization.
+
+[llama]
+# GGUF Model Settings (llama.cpp)
+model_path =
+n_ctx = 8192
+n_threads = 8
+n_gpu_layers = -1
+temperature = 0.7
+top_p = 0.9
+top_k = 40
+repeat_penalty = 1.1
+max_tokens = 4096
+seed = -1
+
+[transformers]
+# SafeTensors Model Settings (HuggingFace)
+model_path =
+device = cuda
+torch_dtype = float16
+load_in_8bit = false
+load_in_4bit = false
+trust_remote_code = false
+max_tokens = 4096
+temperature = 0.7
+top_p = 0.9
+top_k = 40
+repetition_penalty = 1.1
+
+# Notes:
+# - n_gpu_layers = -1 offloads all layers to GPU
+# - For 13B+ models, enable load_in_4bit = true
+# - float16 is optimal for RTX 4070
+# - n_ctx = 8192 uses ~2GB VRAM overhead
+# - Reduce n_ctx to 4096 if running out of VRAM