Autarch/web/templates/llm_settings.html

871 lines
49 KiB
HTML
Raw Permalink Normal View History

{% extends "base.html" %}
{% block title %}LLM Settings - AUTARCH{% endblock %}
{% block content %}
<div class="page-header" style="display:flex;align-items:center;gap:1rem;flex-wrap:wrap">
<h1>LLM Settings</h1>
<a href="{{ url_for('settings.index') }}" class="btn btn-sm" style="margin-left:auto">&larr; Back to Settings</a>
</div>
<!-- Backend Selector + Load Status -->
<div class="section">
<h2>Active Backend</h2>
<div class="tab-bar" id="llm-tab-bar">
<button class="tab {% if llm_backend == 'local' or llm_backend == 'transformers' %}active{% endif %}"
onclick="llmTab('local')">Local Model</button>
<button class="tab {% if llm_backend == 'claude' %}active{% endif %}"
onclick="llmTab('claude')">Claude</button>
<button class="tab {% if llm_backend == 'openai' %}active{% endif %}"
onclick="llmTab('openai')">OpenAI</button>
<button class="tab {% if llm_backend == 'huggingface' %}active{% endif %}"
onclick="llmTab('huggingface')">HuggingFace</button>
</div>
<p style="font-size:0.8rem;color:var(--text-secondary);margin-top:0.5rem">
Configured backend: <strong style="color:var(--accent)">{{ llm_backend }}</strong>
— select a tab, fill in settings, and click <em>Save &amp; Activate</em>, then <em>Load Model</em> to initialise.
</p>
<!-- Load / Status bar -->
<div style="display:flex;align-items:center;gap:0.75rem;flex-wrap:wrap;margin-top:0.9rem;
padding:0.65rem 0.9rem;border-radius:var(--radius);border:1px solid var(--border);
background:var(--bg-card)">
<div id="llm-status-dot" style="width:10px;height:10px;border-radius:50%;
background:var(--text-muted);flex-shrink:0" title="Not loaded"></div>
<span id="llm-status-text" style="font-size:0.83rem;color:var(--text-secondary);flex:1">
Not loaded — click <strong>Load Model</strong> to initialise the current backend.
</span>
<button id="btn-llm-load" class="btn btn-primary btn-sm" onclick="loadLLM()">
Load Model
</button>
<button class="btn btn-sm" onclick="debugOpen()"
title="Open debug console to see detailed load output">
Debug Log
</button>
</div>
<p style="font-size:0.75rem;color:var(--text-muted);margin-top:0.4rem">
Local GGUF models may take 1060 s to load depending on size.
The page will wait — check the Debug Log for live output.
</p>
</div>
<!-- ══════════════════════════════════════════════════════════════════ -->
<!-- LOCAL MODEL TAB -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<div id="tab-local" class="llm-tab-panel {% if llm_backend != 'local' and llm_backend != 'transformers' %}hidden{% endif %}">
<!-- Model Folder + File Picker -->
<div class="section">
<h2>Model Selection</h2>
<div class="form-group">
<label for="model-dir">Models Folder</label>
<div style="display:flex;gap:0.5rem">
<input type="text" id="model-dir" value="{{ default_models_dir }}" style="flex:1" placeholder="/path/to/models">
<button class="btn btn-sm btn-primary" onclick="scanModels()">Scan</button>
</div>
<p style="font-size:0.78rem;color:var(--text-secondary);margin-top:0.3rem">
Scans for .gguf, .ggml, .bin files and SafeTensors model directories.
</p>
</div>
<div id="model-list-wrap" style="display:none">
<label>Available Models</label>
<div id="model-list" style="margin-top:0.4rem;max-height:240px;overflow-y:auto;border:1px solid var(--border);border-radius:4px"></div>
</div>
<div class="form-group" style="margin-top:0.75rem">
<label for="selected-model-path">Selected Model Path</label>
<input type="text" id="selected-model-path"
value="{{ llama.model_path if llm_backend == 'local' else transformers.model_path }}"
placeholder="Click a model above or enter path manually" style="font-family:monospace;font-size:0.82rem">
</div>
<div class="form-group">
<label class="checkbox-label">
<input type="checkbox" id="safetensors-chk"
{% if llm_backend == 'transformers' %}checked{% endif %}
onchange="llmToggleSafetensors(this.checked)">
SafeTensors / Transformers mode (uncheck for llama.cpp GGUF)
</label>
</div>
</div>
<!-- llama.cpp Parameters -->
<div id="llamacpp-params" class="section {% if llm_backend == 'transformers' %}hidden{% endif %}">
<h2>llama.cpp Parameters</h2>
<form id="form-local" method="POST" action="{{ url_for('settings.update_llm') }}" class="settings-form">
<input type="hidden" name="backend" value="local">
<input type="hidden" id="llama-model-hidden" name="model_path" value="{{ llama.model_path }}">
<input type="hidden" id="gpu-backend-input" name="gpu_backend" value="{{ llama.get('gpu_backend', 'cpu') }}">
<!-- ── GPU / Compute Backend Selector ─────────────────────────── -->
<h3 style="margin-top:0;font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">GPU / Compute Backend</h3>
<div class="gpu-presets" id="gpu-presets">
<div class="gpu-card {% if llama.get('gpu_backend','cpu') == 'cuda' %}gpu-selected{% endif %}"
data-backend="cuda" onclick="selectGpuPreset('cuda')">
<div class="gpu-card-icon" style="color:#76b900">&#x26A1;</div>
<div class="gpu-card-name">CUDA</div>
<div class="gpu-card-ram" style="color:#76b900">7.5 GB VRAM</div>
<div class="gpu-card-chip">NVIDIA GPU</div>
<div class="gpu-card-badge" style="background:rgba(118,185,0,0.12);border-color:rgba(118,185,0,0.3);color:#76b900">n_gpu_layers = -1</div>
</div>
<div class="gpu-card {% if llama.get('gpu_backend','cpu') == 'vulkan' %}gpu-selected{% endif %}"
data-backend="vulkan" onclick="selectGpuPreset('vulkan')">
<div class="gpu-card-icon" style="color:#0071c5">&#x25C6;</div>
<div class="gpu-card-name">Intel · Vulkan</div>
<div class="gpu-card-ram" style="color:#0071c5">15 GB RAM</div>
<div class="gpu-card-chip">Intel GPU / iGPU</div>
<div class="gpu-card-badge" style="background:rgba(0,113,197,0.12);border-color:rgba(0,113,197,0.3);color:#5ba4e8">n_gpu_layers = -1</div>
</div>
<div class="gpu-card {% if llama.get('gpu_backend','cpu') != 'cuda' and llama.get('gpu_backend','cpu') != 'vulkan' %}gpu-selected{% endif %}"
data-backend="cpu" onclick="selectGpuPreset('cpu')">
<div class="gpu-card-icon" style="color:var(--text-secondary)">&#x1F5A5;</div>
<div class="gpu-card-name">CPU Only</div>
<div class="gpu-card-ram" style="color:var(--text-secondary)">24 GB RAM</div>
<div class="gpu-card-chip">Any CPU</div>
<div class="gpu-card-badge" style="background:rgba(128,128,128,0.1);border-color:rgba(128,128,128,0.25);color:#888">n_gpu_layers = 0</div>
</div>
</div>
<!-- Install hint shown after selecting CUDA/Vulkan -->
<div id="gpu-hint" class="gpu-hint" style="display:none"></div>
<h3 style="font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em;margin-top:1.25rem">Context &amp; Threading</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="llama-n-ctx">Context Size (n_ctx)</label>
<input type="number" id="llama-n-ctx" name="n_ctx" value="{{ llama.n_ctx }}" min="128" max="131072" step="128">
<small>Token window. Higher = more memory.</small>
</div>
<div class="form-group">
<label for="llama-n-threads">CPU Threads</label>
<input type="number" id="llama-n-threads" name="n_threads" value="{{ llama.n_threads }}" min="1" max="128">
<small>Match physical cores.</small>
</div>
<div class="form-group">
<label for="llama-n-gpu">GPU Layers</label>
<input type="number" id="llama-n-gpu" name="n_gpu_layers" value="{{ llama.n_gpu_layers }}" min="-1">
<small>-1 = all, 0 = CPU only.</small>
</div>
<div class="form-group">
<label for="llama-n-batch">Batch Size (n_batch)</label>
<input type="number" id="llama-n-batch" name="n_batch" value="{{ llama.get('n_batch', 512) }}" min="1" max="4096">
<small>Prompt processing batch.</small>
</div>
</div>
<h3 style="font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">Sampling / Generation</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="llama-temp">Temperature</label>
<input type="number" id="llama-temp" name="temperature" value="{{ llama.temperature }}" step="0.05" min="0" max="2">
<small>0 = deterministic, 1 = creative.</small>
</div>
<div class="form-group">
<label for="llama-top-p">Top-P</label>
<input type="number" id="llama-top-p" name="top_p" value="{{ llama.top_p }}" step="0.05" min="0" max="1">
<small>Nucleus sampling threshold.</small>
</div>
<div class="form-group">
<label for="llama-top-k">Top-K</label>
<input type="number" id="llama-top-k" name="top_k" value="{{ llama.top_k }}" min="0" max="200">
<small>0 = disabled.</small>
</div>
<div class="form-group">
<label for="llama-repeat">Repeat Penalty</label>
<input type="number" id="llama-repeat" name="repeat_penalty" value="{{ llama.repeat_penalty }}" step="0.05" min="1" max="2">
<small>Penalises repeated tokens.</small>
</div>
<div class="form-group">
<label for="llama-max-tok">Max Tokens</label>
<input type="number" id="llama-max-tok" name="max_tokens" value="{{ llama.max_tokens }}" min="1" max="131072">
<small>Max new tokens generated.</small>
</div>
<div class="form-group">
<label for="llama-seed">Seed</label>
<input type="number" id="llama-seed" name="seed" value="{{ llama.seed }}">
<small>-1 = random.</small>
</div>
</div>
<h3 style="font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">RoPE &amp; Mirostat</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="llama-rope">RoPE Scaling</label>
<select id="llama-rope" name="rope_scaling_type">
<option value="0" {% if llama.get('rope_scaling_type', '0') == '0' %}selected{% endif %}>None (0)</option>
<option value="1" {% if llama.get('rope_scaling_type', '0') == '1' %}selected{% endif %}>Linear (1)</option>
<option value="2" {% if llama.get('rope_scaling_type', '0') == '2' %}selected{% endif %}>YaRN (2)</option>
</select>
<small>Extends context via RoPE.</small>
</div>
<div class="form-group">
<label for="llama-mirostat">Mirostat Mode</label>
<select id="llama-mirostat" name="mirostat_mode">
<option value="0" {% if llama.get('mirostat_mode', '0') == '0' %}selected{% endif %}>Off (0)</option>
<option value="1" {% if llama.get('mirostat_mode', '0') == '1' %}selected{% endif %}>v1 (1)</option>
<option value="2" {% if llama.get('mirostat_mode', '0') == '2' %}selected{% endif %}>v2 (2)</option>
</select>
<small>Replaces top-p/k sampling.</small>
</div>
<div class="form-group">
<label for="llama-m-tau">Mirostat Tau</label>
<input type="number" id="llama-m-tau" name="mirostat_tau" value="{{ llama.get('mirostat_tau', 5.0) }}" step="0.5" min="0">
<small>Target entropy (5.0 default).</small>
</div>
<div class="form-group">
<label for="llama-m-eta">Mirostat Eta</label>
<input type="number" id="llama-m-eta" name="mirostat_eta" value="{{ llama.get('mirostat_eta', 0.1) }}" step="0.01" min="0">
<small>Learning rate (0.1 default).</small>
</div>
</div>
<h3 style="font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">Performance Flags</h3>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-bottom:1rem">
<label class="checkbox-label">
<input type="checkbox" name="flash_attn" {% if llama.get('flash_attn', False) %}checked{% endif %}>
Flash Attention (faster on supported hardware)
</label>
</div>
<button type="submit" class="btn btn-primary" onclick="syncLocalPath('llama')">Save &amp; Activate llama.cpp</button>
</form>
</div>
<!-- Transformers / SafeTensors Parameters -->
<div id="transformers-params" class="section {% if llm_backend != 'transformers' %}hidden{% endif %}">
<h2>Transformers / SafeTensors Parameters</h2>
<form id="form-transformers" method="POST" action="{{ url_for('settings.update_llm') }}" class="settings-form">
<input type="hidden" name="backend" value="transformers">
<input type="hidden" id="tf-model-hidden" name="model_path" value="{{ transformers.model_path }}">
<h3 style="margin-top:0;font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">Device &amp; Precision</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="tf-device">Device</label>
<select id="tf-device" name="device">
<option value="auto" {% if transformers.device == 'auto' %}selected{% endif %}>auto</option>
<option value="cpu" {% if transformers.device == 'cpu' %}selected{% endif %}>cpu</option>
<option value="cuda" {% if transformers.device == 'cuda' %}selected{% endif %}>cuda</option>
<option value="cuda:0" {% if transformers.device == 'cuda:0' %}selected{% endif %}>cuda:0</option>
<option value="cuda:1" {% if transformers.device == 'cuda:1' %}selected{% endif %}>cuda:1</option>
<option value="mps" {% if transformers.device == 'mps' %}selected{% endif %}>mps (Apple)</option>
<option value="xpu" {% if transformers.device == 'xpu' %}selected{% endif %}>xpu (Intel)</option>
</select>
</div>
<div class="form-group">
<label for="tf-dtype">Torch Dtype</label>
<select id="tf-dtype" name="torch_dtype">
<option value="auto" {% if transformers.torch_dtype == 'auto' %}selected{% endif %}>auto</option>
<option value="float16" {% if transformers.torch_dtype == 'float16' %}selected{% endif %}>float16</option>
<option value="bfloat16" {% if transformers.torch_dtype == 'bfloat16' %}selected{% endif %}>bfloat16</option>
<option value="float32" {% if transformers.torch_dtype == 'float32' %}selected{% endif %}>float32</option>
</select>
</div>
</div>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-bottom:0.75rem">
<label class="checkbox-label">
<input type="checkbox" name="load_in_8bit" {% if transformers.load_in_8bit %}checked{% endif %}>
8-bit quantization (bitsandbytes)
</label>
<label class="checkbox-label">
<input type="checkbox" name="load_in_4bit" {% if transformers.load_in_4bit %}checked{% endif %}>
4-bit quantization (bitsandbytes)
</label>
<label class="checkbox-label">
<input type="checkbox" name="llm_int8_enable_fp32_cpu_offload"
{% if transformers.llm_int8_enable_fp32_cpu_offload %}checked{% endif %}>
FP32 CPU Offload
<small style="color:var(--text-muted);margin-left:0.3rem">(enables partial GPU+CPU loading for 8-bit)</small>
</label>
<label class="checkbox-label">
<input type="checkbox" name="trust_remote_code" {% if transformers.trust_remote_code %}checked{% endif %}>
Trust Remote Code
</label>
</div>
<div class="form-group" style="max-width:340px;margin-bottom:0.75rem">
<label for="tf-device-map">Device Map</label>
<input type="text" id="tf-device-map" name="device_map"
value="{{ transformers.get('device_map', 'auto') }}"
placeholder="auto">
<small>
<code>auto</code> (default) — let transformers decide &nbsp;|&nbsp;
<code>cpu</code> — CPU only &nbsp;|&nbsp;
<code>cuda:0</code> — single GPU. Enable FP32 CPU Offload above if 8-bit gives a "dispatched on CPU" error.
</small>
</div>
<h3 style="font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">Tokenizer</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="tf-pad-side">Padding Side</label>
<select id="tf-pad-side" name="padding_side">
<option value="left" {% if transformers.get('padding_side', 'left') == 'left' %}selected{% endif %}>left</option>
<option value="right" {% if transformers.get('padding_side', 'left') == 'right' %}selected{% endif %}>right</option>
</select>
<small>left = causal LM standard.</small>
</div>
</div>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-bottom:0.75rem">
<label class="checkbox-label">
<input type="checkbox" name="use_fast_tokenizer"
{% if transformers.get('use_fast_tokenizer', True) %}checked{% endif %}>
Use Fast Tokenizer (Rust-backed)
</label>
</div>
<h3 style="font-size:0.9rem;color:var(--text-secondary);text-transform:uppercase;letter-spacing:0.05em">Generation</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="tf-max-tok">Max New Tokens</label>
<input type="number" id="tf-max-tok" name="max_tokens" value="{{ transformers.max_tokens }}" min="1" max="131072">
</div>
<div class="form-group">
<label for="tf-temp">Temperature</label>
<input type="number" id="tf-temp" name="temperature" value="{{ transformers.temperature }}" step="0.05" min="0" max="2">
</div>
<div class="form-group">
<label for="tf-top-p">Top-P</label>
<input type="number" id="tf-top-p" name="top_p" value="{{ transformers.top_p }}" step="0.05" min="0" max="1">
</div>
<div class="form-group">
<label for="tf-top-k">Top-K</label>
<input type="number" id="tf-top-k" name="top_k" value="{{ transformers.top_k }}" min="0" max="200">
<small>0 = disabled.</small>
</div>
<div class="form-group">
<label for="tf-rep-pen">Repetition Penalty</label>
<input type="number" id="tf-rep-pen" name="repetition_penalty" value="{{ transformers.repetition_penalty }}" step="0.05" min="1" max="2">
</div>
<div class="form-group">
<label for="tf-beams">Num Beams</label>
<input type="number" id="tf-beams" name="num_beams" value="{{ transformers.get('num_beams', 1) }}" min="1" max="16">
<small>1 = greedy/sampling.</small>
</div>
</div>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-bottom:1rem">
<label class="checkbox-label">
<input type="checkbox" name="do_sample"
{% if transformers.get('do_sample', True) %}checked{% endif %}>
Do Sample (enables temp/top-p/top-k)
</label>
</div>
<button type="submit" class="btn btn-primary" onclick="syncLocalPath('tf')">Save &amp; Activate Transformers</button>
</form>
</div>
</div><!-- end tab-local -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<!-- CLAUDE TAB -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<div id="tab-claude" class="llm-tab-panel {% if llm_backend != 'claude' %}hidden{% endif %}">
<div class="section">
<h2>Claude API</h2>
<p style="font-size:0.82rem;color:var(--text-secondary)">
Requires an <a href="https://console.anthropic.com" target="_blank" rel="noopener">Anthropic account</a>.
Get your API key from the console.
</p>
<form method="POST" action="{{ url_for('settings.update_llm') }}" class="settings-form">
<input type="hidden" name="backend" value="claude">
<div class="form-group">
<label for="claude-key">API Key</label>
<input type="password" id="claude-key" name="api_key" value="{{ claude.api_key }}" placeholder="sk-ant-api03-...">
<small>Stored in autarch_settings.conf — keep it safe.</small>
</div>
<div class="form-group">
<label for="claude-model">Model</label>
<select id="claude-model" name="model">
<optgroup label="Claude 4.6">
<option value="claude-opus-4-6" {% if claude.model == 'claude-opus-4-6' %}selected{% endif %}>claude-opus-4-6 (most capable)</option>
<option value="claude-sonnet-4-6" {% if claude.model == 'claude-sonnet-4-6' %}selected{% endif %}>claude-sonnet-4-6 (balanced)</option>
</optgroup>
<optgroup label="Claude 4.5">
<option value="claude-opus-4-5" {% if claude.model == 'claude-opus-4-5' %}selected{% endif %}>claude-opus-4-5</option>
<option value="claude-sonnet-4-5" {% if claude.model == 'claude-sonnet-4-5' %}selected{% endif %}>claude-sonnet-4-5</option>
<option value="claude-haiku-4-5-20251001" {% if claude.model == 'claude-haiku-4-5-20251001' %}selected{% endif %}>claude-haiku-4-5 (fastest)</option>
</optgroup>
<optgroup label="Claude 3.5 / 3">
<option value="claude-3-5-sonnet-20241022" {% if claude.model == 'claude-3-5-sonnet-20241022' %}selected{% endif %}>claude-3-5-sonnet-20241022</option>
<option value="claude-3-5-haiku-20241022" {% if claude.model == 'claude-3-5-haiku-20241022' %}selected{% endif %}>claude-3-5-haiku-20241022</option>
<option value="claude-3-opus-20240229" {% if claude.model == 'claude-3-opus-20240229' %}selected{% endif %}>claude-3-opus-20240229</option>
</optgroup>
</select>
</div>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="claude-max-tok">Max Tokens</label>
<input type="number" id="claude-max-tok" name="max_tokens" value="{{ claude.max_tokens }}" min="1" max="200000">
</div>
<div class="form-group">
<label for="claude-temp">Temperature</label>
<input type="number" id="claude-temp" name="temperature" value="{{ claude.temperature }}" step="0.05" min="0" max="1">
<small>01. Claude default is 1.</small>
</div>
<div class="form-group">
<label for="claude-top-p">Top-P</label>
<input type="number" id="claude-top-p" name="top_p" value="{{ claude.get('top_p', 1.0) }}" step="0.05" min="0" max="1">
<small>Use with lower temp.</small>
</div>
<div class="form-group">
<label for="claude-top-k">Top-K</label>
<input type="number" id="claude-top-k" name="top_k" value="{{ claude.get('top_k', 0) }}" min="0">
<small>0 = disabled.</small>
</div>
</div>
<button type="submit" class="btn btn-primary">Save &amp; Activate Claude</button>
</form>
</div>
</div><!-- end tab-claude -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<!-- OPENAI TAB -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<div id="tab-openai" class="llm-tab-panel {% if llm_backend != 'openai' %}hidden{% endif %}">
<div class="section">
<h2>OpenAI API</h2>
<p style="font-size:0.82rem;color:var(--text-secondary)">
Also compatible with any OpenAI-format endpoint: LiteLLM, Ollama (<code>/v1</code>), vLLM, LocalAI, etc.
Just set the Base URL to your local server.
</p>
<form method="POST" action="{{ url_for('settings.update_llm') }}" class="settings-form">
<input type="hidden" name="backend" value="openai">
<div class="form-group">
<label for="oai-key">API Key</label>
<input type="password" id="oai-key" name="api_key" value="{{ openai.api_key }}" placeholder="sk-...">
<small>Leave blank for local servers (Ollama, vLLM, etc.).</small>
</div>
<div class="form-group">
<label for="oai-base-url">Base URL</label>
<input type="text" id="oai-base-url" name="base_url" value="{{ openai.base_url }}" placeholder="https://api.openai.com/v1">
<small>Change to point to local LLM servers.</small>
</div>
<div class="form-group">
<label for="oai-model">Model</label>
<input type="text" id="oai-model" name="model" value="{{ openai.model }}" placeholder="gpt-4o" list="oai-model-list">
<datalist id="oai-model-list">
<option value="gpt-4o">
<option value="gpt-4o-mini">
<option value="gpt-4-turbo">
<option value="gpt-3.5-turbo">
<option value="o1-preview">
<option value="o1-mini">
<option value="o3-mini">
</datalist>
<small>Type any model ID, or pick from suggestions.</small>
</div>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="oai-max-tok">Max Tokens</label>
<input type="number" id="oai-max-tok" name="max_tokens" value="{{ openai.max_tokens }}" min="1" max="128000">
</div>
<div class="form-group">
<label for="oai-temp">Temperature</label>
<input type="number" id="oai-temp" name="temperature" value="{{ openai.temperature }}" step="0.1" min="0" max="2">
<small>02. 1 = default.</small>
</div>
<div class="form-group">
<label for="oai-top-p">Top-P</label>
<input type="number" id="oai-top-p" name="top_p" value="{{ openai.top_p }}" step="0.05" min="0" max="1">
</div>
<div class="form-group">
<label for="oai-freq-pen">Frequency Penalty</label>
<input type="number" id="oai-freq-pen" name="frequency_penalty" value="{{ openai.frequency_penalty }}" step="0.1" min="-2" max="2">
<small>Reduce repetition.</small>
</div>
<div class="form-group">
<label for="oai-pres-pen">Presence Penalty</label>
<input type="number" id="oai-pres-pen" name="presence_penalty" value="{{ openai.presence_penalty }}" step="0.1" min="-2" max="2">
<small>Encourage new topics.</small>
</div>
</div>
<button type="submit" class="btn btn-primary">Save &amp; Activate OpenAI</button>
</form>
</div>
</div><!-- end tab-openai -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<!-- HUGGINGFACE TAB -->
<!-- ══════════════════════════════════════════════════════════════════ -->
<div id="tab-huggingface" class="llm-tab-panel {% if llm_backend != 'huggingface' %}hidden{% endif %}">
<div class="section">
<h2>HuggingFace Inference API</h2>
<!-- Token / Login -->
<h3>Account</h3>
<div class="form-group">
<label for="hf-token">HuggingFace Token</label>
<div style="display:flex;gap:0.5rem">
<input type="password" id="hf-token" value="{{ huggingface.api_key }}" placeholder="hf_..." style="flex:1">
<button class="btn btn-sm btn-primary" type="button" onclick="hfVerifyToken()">Verify Token</button>
</div>
<small>Get yours at <a href="https://huggingface.co/settings/tokens" target="_blank" rel="noopener">huggingface.co/settings/tokens</a></small>
</div>
<div id="hf-account-info" style="display:none;padding:0.5rem 0.75rem;border-radius:4px;background:rgba(0,255,65,0.08);border:1px solid var(--success);font-size:0.82rem;margin-bottom:0.75rem"></div>
<form id="form-hf" method="POST" action="{{ url_for('settings.update_llm') }}" class="settings-form">
<input type="hidden" name="backend" value="huggingface">
<input type="hidden" id="hf-api-key-hidden" name="api_key" value="{{ huggingface.api_key }}">
<!-- Model -->
<h3>Model</h3>
<div class="form-group">
<label for="hf-model">Model ID</label>
<div style="display:flex;gap:0.5rem">
<input type="text" id="hf-model" name="model" value="{{ huggingface.model }}"
placeholder="mistralai/Mistral-7B-Instruct-v0.3" style="flex:1">
<a href="https://huggingface.co/models?pipeline_tag=text-generation&sort=trending" target="_blank"
rel="noopener" class="btn btn-sm">Browse HF</a>
</div>
<small>Full model repo ID (e.g. <code>meta-llama/Llama-3.1-8B-Instruct</code>).</small>
</div>
<div class="form-group">
<label for="hf-provider">Inference Provider</label>
<select id="hf-provider" name="provider">
<option value="auto" {% if huggingface.provider == 'auto' %}selected{% endif %}>auto (HF selects best)</option>
<option value="cerebras" {% if huggingface.provider == 'cerebras' %}selected{% endif %}>Cerebras</option>
<option value="together-ai" {% if huggingface.provider == 'together-ai' %}selected{% endif %}>Together AI</option>
<option value="sambanova" {% if huggingface.provider == 'sambanova' %}selected{% endif %}>SambaNova</option>
<option value="novita" {% if huggingface.provider == 'novita' %}selected{% endif %}>Novita</option>
<option value="cohere" {% if huggingface.provider == 'cohere' %}selected{% endif %}>Cohere</option>
<option value="fireworks-ai" {% if huggingface.provider == 'fireworks-ai' %}selected{% endif %}>Fireworks AI</option>
<option value="hf-inference" {% if huggingface.provider == 'hf-inference' %}selected{% endif %}>HF Inference (default)</option>
</select>
<small>Provider used for serverless inference. Some require separate accounts.</small>
</div>
<div class="form-group">
<label for="hf-endpoint">Custom Endpoint URL (optional)</label>
<input type="text" id="hf-endpoint" name="endpoint" value="{{ huggingface.endpoint }}"
placeholder="https://your-endpoint.endpoints.huggingface.cloud">
<small>Overrides provider. Use for Dedicated Inference Endpoints.</small>
</div>
<!-- Generation Parameters -->
<h3>Generation Parameters</h3>
<div style="display:grid;grid-template-columns:repeat(auto-fill,minmax(180px,1fr));gap:0.75rem 1rem">
<div class="form-group">
<label for="hf-max-tok">Max New Tokens</label>
<input type="number" id="hf-max-tok" name="max_tokens" value="{{ huggingface.max_tokens }}" min="1" max="32768">
</div>
<div class="form-group">
<label for="hf-temp">Temperature</label>
<input type="number" id="hf-temp" name="temperature" value="{{ huggingface.temperature }}" step="0.05" min="0.01" max="2">
</div>
<div class="form-group">
<label for="hf-top-p">Top-P</label>
<input type="number" id="hf-top-p" name="top_p" value="{{ huggingface.top_p }}" step="0.05" min="0" max="1">
</div>
<div class="form-group">
<label for="hf-top-k">Top-K</label>
<input type="number" id="hf-top-k" name="top_k" value="{{ huggingface.top_k }}" min="0" max="200">
<small>0 = disabled.</small>
</div>
<div class="form-group">
<label for="hf-rep-pen">Repetition Penalty</label>
<input type="number" id="hf-rep-pen" name="repetition_penalty" value="{{ huggingface.repetition_penalty }}" step="0.05" min="1" max="2">
</div>
<div class="form-group">
<label for="hf-seed">Seed</label>
<input type="number" id="hf-seed" name="seed" value="{{ huggingface.seed }}">
<small>-1 = random.</small>
</div>
</div>
<div style="display:flex;gap:1.5rem;flex-wrap:wrap;margin-bottom:0.75rem">
<label class="checkbox-label">
<input type="checkbox" name="do_sample" {% if huggingface.do_sample %}checked{% endif %}>
Do Sample
</label>
</div>
<div class="form-group">
<label for="hf-stop">Stop Sequences (comma-separated)</label>
<input type="text" id="hf-stop" name="stop_sequences" value="{{ huggingface.stop_sequences }}"
placeholder="&lt;|endoftext|&gt;, &lt;|im_end|&gt;">
<small>Tokens that will stop generation.</small>
</div>
<button type="submit" class="btn btn-primary">Save &amp; Activate HuggingFace</button>
</form>
</div>
</div><!-- end tab-huggingface -->
<style>
.hidden { display: none !important; }
.form-group small { display:block; font-size:0.72rem; color:var(--text-muted); margin-top:0.2rem; }
/* ── GPU Preset Cards ───────────────────────────────────────────────────── */
.gpu-presets {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 0.75rem;
margin-bottom: 0.75rem;
}
@media (max-width: 600px) { .gpu-presets { grid-template-columns: 1fr; } }
.gpu-card {
border: 1px solid var(--border);
border-radius: var(--radius);
padding: 0.85rem 1rem;
cursor: pointer;
display: flex;
flex-direction: column;
gap: 0.18rem;
transition: border-color 0.15s, box-shadow 0.15s, background 0.15s;
user-select: none;
background: var(--bg-card);
}
.gpu-card:hover {
border-color: var(--accent);
box-shadow: 0 0 12px rgba(0,255,65,0.07);
background: rgba(0,255,65,0.03);
}
.gpu-card.gpu-selected {
border-color: var(--accent);
box-shadow: 0 0 0 2px rgba(0,255,65,0.18);
background: rgba(0,255,65,0.04);
}
.gpu-card-icon { font-size: 1.3rem; line-height: 1; margin-bottom: 0.15rem; }
.gpu-card-name { font-size: 0.95rem; font-weight: 700; color: var(--text-primary); }
.gpu-card-ram { font-size: 0.88rem; font-weight: 700; }
.gpu-card-chip { font-size: 0.72rem; color: var(--text-muted); }
.gpu-card-badge {
display: inline-block;
margin-top: 0.35rem;
padding: 1px 7px;
border-radius: 3px;
font-size: 0.68rem;
font-family: monospace;
border: 1px solid;
font-weight: 600;
align-self: flex-start;
}
.gpu-hint {
font-size: 0.78rem;
color: var(--text-secondary);
padding: 0.45rem 0.75rem;
border-left: 3px solid var(--accent);
background: rgba(0,255,65,0.04);
border-radius: 0 4px 4px 0;
margin-bottom: 0.75rem;
}
</style>
<script>
// ── Load Model ────────────────────────────────────────────────────────────────
function loadLLM() {
var btn = document.getElementById('btn-llm-load');
var dot = document.getElementById('llm-status-dot');
var text = document.getElementById('llm-status-text');
btn.disabled = true;
btn.textContent = 'Loading…';
dot.style.background = '#f59e0b'; // amber = in-progress
dot.title = 'Loading…';
text.innerHTML = '<em>Initialising — please wait. Check the Debug Log window for live output…</em>';
fetch('/settings/llm/load', {method: 'POST'})
.then(function(r) { return r.json(); })
.then(function(d) {
btn.disabled = false;
btn.textContent = 'Load Model';
if (d.ok) {
dot.style.background = 'var(--success, #34c759)';
dot.title = 'Loaded';
text.innerHTML = '&#x2713; <strong style="color:var(--success,#34c759)">'
+ escapeHtml(d.backend) + '</strong> ready &mdash; '
+ escapeHtml(d.model_name);
} else {
dot.style.background = 'var(--danger, #ff3b30)';
dot.title = 'Error';
text.innerHTML = '&#x2715; <strong style="color:var(--danger,#ff3b30)">Load failed:</strong> '
+ escapeHtml(d.error || 'Unknown error')
+ ' &mdash; <em>check Debug Log for details</em>';
}
})
.catch(function(e) {
btn.disabled = false;
btn.textContent = 'Load Model';
dot.style.background = 'var(--danger, #ff3b30)';
dot.title = 'Error';
text.textContent = 'Request failed: ' + e.message;
});
}
// ── Tab switching ─────────────────────────────────────────────────────────────
function llmTab(name) {
['local','claude','openai','huggingface'].forEach(function(t) {
document.getElementById('tab-' + t).classList.toggle('hidden', t !== name);
});
document.querySelectorAll('#llm-tab-bar .tab').forEach(function(btn, i) {
btn.classList.toggle('active', ['local','claude','openai','huggingface'][i] === name);
});
}
// SafeTensors toggle
function llmToggleSafetensors(isST) {
document.getElementById('llamacpp-params').classList.toggle('hidden', isST);
document.getElementById('transformers-params').classList.toggle('hidden', !isST);
}
// Sync the hidden model_path input from the selected-model-path field
function syncLocalPath(target) {
var val = document.getElementById('selected-model-path').value;
if (target === 'llama') {
document.getElementById('llama-model-hidden').value = val;
} else {
document.getElementById('tf-model-hidden').value = val;
}
}
// Scan models folder
function scanModels() {
var folder = document.getElementById('model-dir').value.trim();
if (!folder) { alert('Enter a folder path first.'); return; }
var btn = event.target;
btn.textContent = 'Scanning...';
btn.disabled = true;
fetch('/settings/llm/scan-models', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({folder: folder})
})
.then(function(r) { return r.json(); })
.then(function(d) {
btn.textContent = 'Scan';
btn.disabled = false;
if (!d.ok) { alert('Scan error: ' + d.error); return; }
var wrap = document.getElementById('model-list-wrap');
var list = document.getElementById('model-list');
list.innerHTML = '';
if (!d.models.length) {
list.innerHTML = '<div style="padding:0.75rem;color:var(--text-secondary);font-size:0.82rem">No supported model files found.</div>';
} else {
d.models.forEach(function(m) {
var row = document.createElement('div');
row.style.cssText = 'display:flex;align-items:center;gap:0.75rem;padding:0.4rem 0.75rem;cursor:pointer;border-bottom:1px solid var(--border);font-size:0.82rem';
row.onmouseenter = function() { this.style.background = 'var(--hover)'; };
row.onmouseleave = function() { this.style.background = ''; };
var badge = m.type === 'safetensors' ? '<span style="background:#5a3f9a;color:#fff;border-radius:3px;padding:1px 5px;font-size:0.7rem">ST</span>' :
'<span style="background:#1a6e3c;color:#fff;border-radius:3px;padding:1px 5px;font-size:0.7rem">GGUF</span>';
row.innerHTML = badge + '<span style="flex:1;font-family:monospace">' + escapeHtml(m.name) + '</span>'
+ '<span style="color:var(--text-secondary)">' + m.size_mb + ' MB</span>';
row.onclick = function() {
document.getElementById('selected-model-path').value = m.path;
// Auto-toggle SafeTensors checkbox
var isST = m.type === 'safetensors';
document.getElementById('safetensors-chk').checked = isST;
llmToggleSafetensors(isST);
// Highlight selected
list.querySelectorAll('div').forEach(function(r2) { r2.style.background = ''; r2.style.outline = ''; });
this.style.background = 'var(--hover)';
this.style.outline = '1px solid var(--accent)';
};
list.appendChild(row);
});
}
wrap.style.display = '';
})
.catch(function(e) { btn.textContent = 'Scan'; btn.disabled = false; alert('Request failed: ' + e.message); });
}
// ── GPU Preset Selector ───────────────────────────────────────────────────────
const _GPU_PRESETS = {
cuda: {
n_gpu_layers: -1, n_batch: 512,
hint: '⚡ CUDA selected — all layers offloaded to NVIDIA GPU (n_gpu_layers = -1). ' +
'Requires llama-cpp-python compiled with CUDA: ' +
'CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --force-reinstall',
},
vulkan: {
n_gpu_layers: -1, n_batch: 512,
hint: '◆ Vulkan (Intel) selected — all layers offloaded via Vulkan API (n_gpu_layers = -1). ' +
'Requires llama-cpp-python compiled with Vulkan: ' +
'CMAKE_ARGS="-DGGML_VULKAN=on" pip install llama-cpp-python --force-reinstall',
},
cpu: {
n_gpu_layers: 0, n_batch: 256,
hint: '', // no hint needed — always works
},
};
function selectGpuPreset(backend) {
// Update card highlight
document.querySelectorAll('.gpu-card').forEach(function(c) {
c.classList.toggle('gpu-selected', c.dataset.backend === backend);
});
// Store selection
document.getElementById('gpu-backend-input').value = backend;
// Apply preset values
var p = _GPU_PRESETS[backend];
document.getElementById('llama-n-gpu').value = p.n_gpu_layers;
document.getElementById('llama-n-batch').value = p.n_batch;
// Show / hide hint
var hint = document.getElementById('gpu-hint');
if (p.hint) {
hint.textContent = p.hint;
hint.style.display = '';
} else {
hint.style.display = 'none';
}
}
// Initialise hint on page load if a non-CPU preset is already saved
(function() {
var saved = document.getElementById('gpu-backend-input').value;
if (saved && saved !== 'cpu') {
var h = document.getElementById('gpu-hint');
var p = _GPU_PRESETS[saved];
if (p && p.hint) { h.textContent = p.hint; h.style.display = ''; }
}
})();
// ── HuggingFace token verify
function hfVerifyToken() {
var token = document.getElementById('hf-token').value.trim();
if (!token) { alert('Enter a token first.'); return; }
document.getElementById('hf-api-key-hidden').value = token;
var btn = event.target;
btn.textContent = 'Verifying...';
btn.disabled = true;
fetch('/settings/llm/hf-verify', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({token: token})
})
.then(function(r) { return r.json(); })
.then(function(d) {
btn.textContent = 'Verify Token';
btn.disabled = false;
var info = document.getElementById('hf-account-info');
if (d.ok) {
info.style.display = '';
info.textContent = '✓ Logged in as: ' + d.username + (d.email ? ' (' + d.email + ')' : '');
} else {
info.style.display = '';
info.style.borderColor = 'var(--danger)';
info.style.background = 'rgba(255,59,48,0.08)';
info.textContent = '✕ Invalid token: ' + d.error;
}
})
.catch(function(e) { btn.textContent = 'Verify Token'; btn.disabled = false; alert('Request failed: ' + e.message); });
}
</script>
{% endblock %}