Autarch Will Control The Internet
This commit is contained in:
67
.config/orangepi5plus_mali.conf
Normal file
67
.config/orangepi5plus_mali.conf
Normal file
@@ -0,0 +1,67 @@
|
||||
# AUTARCH LLM Configuration Template
|
||||
# Hardware: Orange Pi 5 Plus with ARM Mali-G610 MP4 GPU
|
||||
# Status: EXPERIMENTAL - Mali GPU support for LLMs is limited
|
||||
#
|
||||
# WARNING: This configuration is experimental!
|
||||
# The Mali-G610 GPU has limited LLM support. Most frameworks
|
||||
# fall back to CPU. This config attempts to leverage what GPU
|
||||
# acceleration is available.
|
||||
|
||||
[llama]
|
||||
# GGUF Model Settings (llama.cpp)
|
||||
# Note: llama.cpp OpenCL backend may provide some acceleration
|
||||
# Build with: CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python
|
||||
# Requires: libclblast-dev, opencl-headers, ocl-icd-opencl-dev
|
||||
model_path =
|
||||
n_ctx = 2048
|
||||
n_threads = 4
|
||||
n_gpu_layers = 8
|
||||
temperature = 0.7
|
||||
top_p = 0.9
|
||||
top_k = 40
|
||||
repeat_penalty = 1.1
|
||||
max_tokens = 1024
|
||||
seed = -1
|
||||
|
||||
[transformers]
|
||||
# SafeTensors Model Settings (HuggingFace)
|
||||
# Note: PyTorch has experimental Vulkan backend for mobile GPUs
|
||||
# This is highly experimental and may not work
|
||||
model_path =
|
||||
device = cpu
|
||||
torch_dtype = float32
|
||||
load_in_8bit = false
|
||||
load_in_4bit = true
|
||||
trust_remote_code = false
|
||||
max_tokens = 1024
|
||||
temperature = 0.7
|
||||
top_p = 0.9
|
||||
top_k = 40
|
||||
repetition_penalty = 1.1
|
||||
|
||||
# EXPERIMENTAL NOTES:
|
||||
#
|
||||
# Mali-G610 GPU Support Status:
|
||||
# - OpenCL: Partial support via CLBlast, may accelerate some layers
|
||||
# - Vulkan: PyTorch vulkan backend is experimental
|
||||
# - Direct Mali: No native support in major LLM frameworks
|
||||
#
|
||||
# To enable OpenCL acceleration for llama.cpp:
|
||||
# 1. Install dependencies:
|
||||
# sudo apt install libclblast-dev opencl-headers ocl-icd-opencl-dev
|
||||
# 2. Install Mali OpenCL driver (if available for your distro)
|
||||
# 3. Rebuild llama-cpp-python with CLBlast:
|
||||
# CMAKE_ARGS="-DLLAMA_CLBLAST=on" pip install llama-cpp-python --force-reinstall
|
||||
#
|
||||
# n_gpu_layers = 8: Offloads only some layers (conservative)
|
||||
# - Increase if stable, decrease if crashes
|
||||
# - Set to 0 if OpenCL not working
|
||||
#
|
||||
# For transformers:
|
||||
# - load_in_4bit = true reduces memory pressure
|
||||
# - CPU inference is the reliable fallback
|
||||
#
|
||||
# Performance Expectations:
|
||||
# - Best case: 20-30% speedup over pure CPU
|
||||
# - Likely case: Similar to CPU or unstable
|
||||
# - Use orangepi5plus_cpu.conf for stable operation
|
||||
Reference in New Issue
Block a user