Autarch/core/model_router.py

"""
AUTARCH Model Router
Manages concurrent SLM/LAM/SAM model instances for autonomous operation.

Model Tiers:
  SLM (Small Language Model) — Fast classification, routing, yes/no decisions
  SAM (Small Action Model)   — Quick tool execution, simple automated responses
  LAM (Large Action Model)   — Complex multi-step agent tasks, strategic planning
"""

import json
import logging
import threading
from typing import Optional, Dict, Any
from enum import Enum

from .config import get_config

_logger = logging.getLogger('autarch.model_router')


class ModelTier(Enum):
    SLM = 'slm'
    SAM = 'sam'
    LAM = 'lam'


# Fallback chain: if a tier fails, try the next one
_FALLBACK = {
    ModelTier.SLM: [ModelTier.SAM, ModelTier.LAM],
    ModelTier.SAM: [ModelTier.LAM],
    ModelTier.LAM: [],
}


class _TierConfigProxy:
    """Proxies Config but overrides the backend section for a specific model tier.

    When a tier says backend=local with model_path=X, this proxy makes the LLM
    class (which reads [llama]) see the tier's model_path/n_ctx/etc instead.
    """

    def __init__(self, base_config, tier_name: str):
        self._base = base_config
        self._tier = tier_name
        self._overrides: Dict[str, Dict[str, str]] = {}
        self._build_overrides()

    def _build_overrides(self):
        backend = self._base.get(self._tier, 'backend', 'local')
        model_path = self._base.get(self._tier, 'model_path', '')
        n_ctx = self._base.get(self._tier, 'n_ctx', '2048')
        n_gpu_layers = self._base.get(self._tier, 'n_gpu_layers', '-1')
        n_threads = self._base.get(self._tier, 'n_threads', '4')

        if backend == 'local':
            self._overrides['llama'] = {
                'model_path': model_path,
                'n_ctx': n_ctx,
                'n_gpu_layers': n_gpu_layers,
                'n_threads': n_threads,
            }
        elif backend == 'transformers':
            self._overrides['transformers'] = {
                'model_path': model_path,
            }
        # claude and huggingface are API-based — no path override needed

    def get(self, section: str, key: str, fallback=None):
        overrides = self._overrides.get(section, {})
        if key in overrides:
            return overrides[key]
        return self._base.get(section, key, fallback)

    def get_int(self, section: str, key: str, fallback: int = 0) -> int:
        overrides = self._overrides.get(section, {})
        if key in overrides:
            try:
                return int(overrides[key])
            except (ValueError, TypeError):
                return fallback
        return self._base.get_int(section, key, fallback)

    def get_float(self, section: str, key: str, fallback: float = 0.0) -> float:
        overrides = self._overrides.get(section, {})
        if key in overrides:
            try:
                return float(overrides[key])
            except (ValueError, TypeError):
                return fallback
        return self._base.get_float(section, key, fallback)

    def get_bool(self, section: str, key: str, fallback: bool = False) -> bool:
        overrides = self._overrides.get(section, {})
        if key in overrides:
            val = str(overrides[key]).lower()
            return val in ('true', '1', 'yes', 'on')
        return self._base.get_bool(section, key, fallback)

    # Delegate all settings getters to base (they call self.get internally)
    def get_llama_settings(self) -> dict:
        from .config import Config
        return Config.get_llama_settings(self)

    def get_transformers_settings(self) -> dict:
        from .config import Config
        return Config.get_transformers_settings(self)

    def get_claude_settings(self) -> dict:
        return self._base.get_claude_settings()

    def get_huggingface_settings(self) -> dict:
        return self._base.get_huggingface_settings()


class ModelRouter:
    """Manages up to 3 concurrent LLM instances (SLM, SAM, LAM).

    Each tier can use a different backend (local GGUF, transformers, Claude API,
    HuggingFace). The router handles loading, unloading, fallback, and thread-safe
    access.
    """

    def __init__(self, config=None):
        self.config = config or get_config()
        self._instances: Dict[ModelTier, Any] = {}
        self._locks: Dict[ModelTier, threading.Lock] = {
            tier: threading.Lock() for tier in ModelTier
        }
        self._load_lock = threading.Lock()

    @property
    def status(self) -> Dict[str, dict]:
        """Return load status of all tiers."""
        result = {}
        for tier in ModelTier:
            inst = self._instances.get(tier)
            settings = self.config.get_tier_settings(tier.value)
            result[tier.value] = {
                'loaded': inst is not None and inst.is_loaded,
                'model_name': inst.model_name if inst and inst.is_loaded else None,
                'backend': settings['backend'],
                'enabled': settings['enabled'],
                'model_path': settings['model_path'],
            }
        return result

    def load_tier(self, tier: ModelTier, verbose: bool = False) -> bool:
        """Load a single tier's model. Thread-safe."""
        settings = self.config.get_tier_settings(tier.value)

        if not settings['enabled']:
            _logger.info(f"[Router] Tier {tier.value} is disabled, skipping")
            return False

        if not settings['model_path'] and settings['backend'] == 'local':
            _logger.warning(f"[Router] No model_path configured for {tier.value}")
            return False

        with self._load_lock:
            # Unload existing if any
            if tier in self._instances:
                self.unload_tier(tier)

            try:
                inst = self._create_instance(tier, verbose)
                self._instances[tier] = inst
                _logger.info(f"[Router] Loaded {tier.value}: {inst.model_name}")
                return True
            except Exception as e:
                _logger.error(f"[Router] Failed to load {tier.value}: {e}")
                return False

    def unload_tier(self, tier: ModelTier):
        """Unload a tier's model and free resources."""
        inst = self._instances.pop(tier, None)
        if inst:
            try:
                inst.unload_model()
                _logger.info(f"[Router] Unloaded {tier.value}")
            except Exception as e:
                _logger.error(f"[Router] Error unloading {tier.value}: {e}")

    def load_all(self, verbose: bool = False) -> Dict[str, bool]:
        """Load all enabled tiers. Returns {tier_name: success}."""
        results = {}
        for tier in ModelTier:
            results[tier.value] = self.load_tier(tier, verbose)
        return results

    def unload_all(self):
        """Unload all tiers."""
        for tier in list(self._instances.keys()):
            self.unload_tier(tier)

    def get_instance(self, tier: ModelTier):
        """Get the LLM instance for a tier (may be None if not loaded)."""
        return self._instances.get(tier)

    def is_tier_loaded(self, tier: ModelTier) -> bool:
        """Check if a tier has a loaded model."""
        inst = self._instances.get(tier)
        return inst is not None and inst.is_loaded

    def classify(self, text: str) -> Dict[str, Any]:
        """Use SLM to classify/triage an event or task.

        Returns: {'tier': 'sam'|'lam', 'category': str, 'urgency': str, 'reasoning': str}

        Falls back to SAM tier if SLM is not loaded.
        """
        classify_prompt = f"""Classify this event/task for autonomous handling.
Respond with ONLY a JSON object, no other text:
{{"tier": "sam" or "lam", "category": "defense|offense|counter|analyze|osint|simulate", "urgency": "high|medium|low", "reasoning": "brief explanation"}}

Event: {text}"""

        # Try SLM first, then fallback
        for tier in [ModelTier.SLM, ModelTier.SAM, ModelTier.LAM]:
            inst = self._instances.get(tier)
            if inst and inst.is_loaded:
                try:
                    with self._locks[tier]:
                        response = inst.generate(classify_prompt, max_tokens=200, temperature=0.1)
                    # Parse JSON from response
                    response = response.strip()
                    # Find JSON in response
                    start = response.find('{')
                    end = response.rfind('}')
                    if start >= 0 and end > start:
                        return json.loads(response[start:end + 1])
                except Exception as e:
                    _logger.warning(f"[Router] Classification failed on {tier.value}: {e}")
                    continue

        # Default if all tiers fail
        return {'tier': 'sam', 'category': 'defense', 'urgency': 'medium',
                'reasoning': 'Default classification (no model available)'}

    def generate(self, tier: ModelTier, prompt: str, **kwargs) -> str:
        """Generate with a specific tier, falling back to higher tiers on failure.

        Fallback chain: SLM -> SAM -> LAM, SAM -> LAM
        """
        chain = [tier] + _FALLBACK.get(tier, [])

        for t in chain:
            inst = self._instances.get(t)
            if inst and inst.is_loaded:
                try:
                    with self._locks[t]:
                        return inst.generate(prompt, **kwargs)
                except Exception as e:
                    _logger.warning(f"[Router] Generate failed on {t.value}: {e}")
                    continue

        from .llm import LLMError
        raise LLMError(f"All tiers exhausted for generation (started at {tier.value})")

    def _create_instance(self, tier: ModelTier, verbose: bool = False):
        """Create an LLM instance from tier config."""
        from .llm import LLM, TransformersLLM, ClaudeLLM, HuggingFaceLLM

        section = tier.value
        backend = self.config.get(section, 'backend', 'local')
        proxy = _TierConfigProxy(self.config, section)

        if verbose:
            model_path = self.config.get(section, 'model_path', '')
            _logger.info(f"[Router] Creating {tier.value} instance: backend={backend}, model={model_path}")

        if backend == 'local':
            inst = LLM(proxy)
        elif backend == 'transformers':
            inst = TransformersLLM(proxy)
        elif backend == 'claude':
            inst = ClaudeLLM(proxy)
        elif backend == 'huggingface':
            inst = HuggingFaceLLM(proxy)
        else:
            from .llm import LLMError
            raise LLMError(f"Unknown backend '{backend}' for tier {tier.value}")

        inst.load_model(verbose=verbose)
        return inst


# Singleton
_router_instance = None


def get_model_router() -> ModelRouter:
    """Get the global ModelRouter instance."""
    global _router_instance
    if _router_instance is None:
        _router_instance = ModelRouter()
    return _router_instance


def reset_model_router():
    """Reset the global ModelRouter (unloads all models)."""
    global _router_instance
    if _router_instance is not None:
        _router_instance.unload_all()
    _router_instance = None
v2.0 — Re-integrate autonomy framework from Linux non-public build Add multi-model autonomous threat response system (SLM/SAM/LAM): - ModelRouter: concurrent model tiers with fallback chains - RulesEngine: condition-action automation with 11 condition/action types - AutonomyDaemon: background threat monitoring and rule dispatch - Web UI: 4-tab dashboard (Dashboard, Rules, Activity Log, Models) - Config: [slm], [sam], [lam], [autonomy] settings sections Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> 2026-03-03 00:51:18 -08:00			`"""`
			`AUTARCH Model Router`
			`Manages concurrent SLM/LAM/SAM model instances for autonomous operation.`

			`Model Tiers:`
			`SLM (Small Language Model) — Fast classification, routing, yes/no decisions`
			`SAM (Small Action Model) — Quick tool execution, simple automated responses`
			`LAM (Large Action Model) — Complex multi-step agent tasks, strategic planning`
			`"""`

			`import json`
			`import logging`
			`import threading`
			`from typing import Optional, Dict, Any`
			`from enum import Enum`

			`from .config import get_config`

			`_logger = logging.getLogger('autarch.model_router')`


			`class ModelTier(Enum):`
			`SLM = 'slm'`
			`SAM = 'sam'`
			`LAM = 'lam'`


			`# Fallback chain: if a tier fails, try the next one`
			`_FALLBACK = {`
			`ModelTier.SLM: [ModelTier.SAM, ModelTier.LAM],`
			`ModelTier.SAM: [ModelTier.LAM],`
			`ModelTier.LAM: [],`
			`}`


			`class _TierConfigProxy:`
			`"""Proxies Config but overrides the backend section for a specific model tier.`

			`When a tier says backend=local with model_path=X, this proxy makes the LLM`
			`class (which reads [llama]) see the tier's model_path/n_ctx/etc instead.`
			`"""`

			`def __init__(self, base_config, tier_name: str):`
			`self._base = base_config`
			`self._tier = tier_name`
			`self._overrides: Dict[str, Dict[str, str]] = {}`
			`self._build_overrides()`

			`def _build_overrides(self):`
			`backend = self._base.get(self._tier, 'backend', 'local')`
			`model_path = self._base.get(self._tier, 'model_path', '')`
			`n_ctx = self._base.get(self._tier, 'n_ctx', '2048')`
			`n_gpu_layers = self._base.get(self._tier, 'n_gpu_layers', '-1')`
			`n_threads = self._base.get(self._tier, 'n_threads', '4')`

			`if backend == 'local':`
			`self._overrides['llama'] = {`
			`'model_path': model_path,`
			`'n_ctx': n_ctx,`
			`'n_gpu_layers': n_gpu_layers,`
			`'n_threads': n_threads,`
			`}`
			`elif backend == 'transformers':`
			`self._overrides['transformers'] = {`
			`'model_path': model_path,`
			`}`
			`# claude and huggingface are API-based — no path override needed`

			`def get(self, section: str, key: str, fallback=None):`
			`overrides = self._overrides.get(section, {})`
			`if key in overrides:`
			`return overrides[key]`
			`return self._base.get(section, key, fallback)`

			`def get_int(self, section: str, key: str, fallback: int = 0) -> int:`
			`overrides = self._overrides.get(section, {})`
			`if key in overrides:`
			`try:`
			`return int(overrides[key])`
			`except (ValueError, TypeError):`
			`return fallback`
			`return self._base.get_int(section, key, fallback)`

			`def get_float(self, section: str, key: str, fallback: float = 0.0) -> float:`
			`overrides = self._overrides.get(section, {})`
			`if key in overrides:`
			`try:`
			`return float(overrides[key])`
			`except (ValueError, TypeError):`
			`return fallback`
			`return self._base.get_float(section, key, fallback)`

			`def get_bool(self, section: str, key: str, fallback: bool = False) -> bool:`
			`overrides = self._overrides.get(section, {})`
			`if key in overrides:`
			`val = str(overrides[key]).lower()`
			`return val in ('true', '1', 'yes', 'on')`
			`return self._base.get_bool(section, key, fallback)`

			`# Delegate all settings getters to base (they call self.get internally)`
			`def get_llama_settings(self) -> dict:`
			`from .config import Config`
			`return Config.get_llama_settings(self)`

			`def get_transformers_settings(self) -> dict:`
			`from .config import Config`
			`return Config.get_transformers_settings(self)`

			`def get_claude_settings(self) -> dict:`
			`return self._base.get_claude_settings()`

			`def get_huggingface_settings(self) -> dict:`
			`return self._base.get_huggingface_settings()`


			`class ModelRouter:`
			`"""Manages up to 3 concurrent LLM instances (SLM, SAM, LAM).`

			`Each tier can use a different backend (local GGUF, transformers, Claude API,`
			`HuggingFace). The router handles loading, unloading, fallback, and thread-safe`
			`access.`
			`"""`

			`def __init__(self, config=None):`
			`self.config = config or get_config()`
			`self._instances: Dict[ModelTier, Any] = {}`
			`self._locks: Dict[ModelTier, threading.Lock] = {`
			`tier: threading.Lock() for tier in ModelTier`
			`}`
			`self._load_lock = threading.Lock()`

			`@property`
			`def status(self) -> Dict[str, dict]:`
			`"""Return load status of all tiers."""`
			`result = {}`
			`for tier in ModelTier:`
			`inst = self._instances.get(tier)`
			`settings = self.config.get_tier_settings(tier.value)`
			`result[tier.value] = {`
			`'loaded': inst is not None and inst.is_loaded,`
			`'model_name': inst.model_name if inst and inst.is_loaded else None,`
			`'backend': settings['backend'],`
			`'enabled': settings['enabled'],`
			`'model_path': settings['model_path'],`
			`}`
			`return result`

			`def load_tier(self, tier: ModelTier, verbose: bool = False) -> bool:`
			`"""Load a single tier's model. Thread-safe."""`
			`settings = self.config.get_tier_settings(tier.value)`

			`if not settings['enabled']:`
			`_logger.info(f"[Router] Tier {tier.value} is disabled, skipping")`
			`return False`

			`if not settings['model_path'] and settings['backend'] == 'local':`
			`_logger.warning(f"[Router] No model_path configured for {tier.value}")`
			`return False`

			`with self._load_lock:`
			`# Unload existing if any`
			`if tier in self._instances:`
			`self.unload_tier(tier)`

			`try:`
			`inst = self._create_instance(tier, verbose)`
			`self._instances[tier] = inst`
			`_logger.info(f"[Router] Loaded {tier.value}: {inst.model_name}")`
			`return True`
			`except Exception as e:`
			`_logger.error(f"[Router] Failed to load {tier.value}: {e}")`
			`return False`

			`def unload_tier(self, tier: ModelTier):`
			`"""Unload a tier's model and free resources."""`
			`inst = self._instances.pop(tier, None)`
			`if inst:`
			`try:`
			`inst.unload_model()`
			`_logger.info(f"[Router] Unloaded {tier.value}")`
			`except Exception as e:`
			`_logger.error(f"[Router] Error unloading {tier.value}: {e}")`

			`def load_all(self, verbose: bool = False) -> Dict[str, bool]:`
			`"""Load all enabled tiers. Returns {tier_name: success}."""`
			`results = {}`
			`for tier in ModelTier:`
			`results[tier.value] = self.load_tier(tier, verbose)`
			`return results`

			`def unload_all(self):`
			`"""Unload all tiers."""`
			`for tier in list(self._instances.keys()):`
			`self.unload_tier(tier)`

			`def get_instance(self, tier: ModelTier):`
			`"""Get the LLM instance for a tier (may be None if not loaded)."""`
			`return self._instances.get(tier)`

			`def is_tier_loaded(self, tier: ModelTier) -> bool:`
			`"""Check if a tier has a loaded model."""`
			`inst = self._instances.get(tier)`
			`return inst is not None and inst.is_loaded`

			`def classify(self, text: str) -> Dict[str, Any]:`
			`"""Use SLM to classify/triage an event or task.`

			`Returns: {'tier': 'sam'\|'lam', 'category': str, 'urgency': str, 'reasoning': str}`

			`Falls back to SAM tier if SLM is not loaded.`
			`"""`
			`classify_prompt = f"""Classify this event/task for autonomous handling.`
			`Respond with ONLY a JSON object, no other text:`
			`{{"tier": "sam" or "lam", "category": "defense\|offense\|counter\|analyze\|osint\|simulate", "urgency": "high\|medium\|low", "reasoning": "brief explanation"}}`

			`Event: {text}"""`

			`# Try SLM first, then fallback`
			`for tier in [ModelTier.SLM, ModelTier.SAM, ModelTier.LAM]:`
			`inst = self._instances.get(tier)`
			`if inst and inst.is_loaded:`
			`try:`
			`with self._locks[tier]:`
			`response = inst.generate(classify_prompt, max_tokens=200, temperature=0.1)`
			`# Parse JSON from response`
			`response = response.strip()`
			`# Find JSON in response`
			`start = response.find('{')`
			`end = response.rfind('}')`
			`if start >= 0 and end > start:`
			`return json.loads(response[start:end + 1])`
			`except Exception as e:`
			`_logger.warning(f"[Router] Classification failed on {tier.value}: {e}")`
			`continue`

			`# Default if all tiers fail`
			`return {'tier': 'sam', 'category': 'defense', 'urgency': 'medium',`
			`'reasoning': 'Default classification (no model available)'}`

			`def generate(self, tier: ModelTier, prompt: str, **kwargs) -> str:`
			`"""Generate with a specific tier, falling back to higher tiers on failure.`

			`Fallback chain: SLM -> SAM -> LAM, SAM -> LAM`
			`"""`
			`chain = [tier] + _FALLBACK.get(tier, [])`

			`for t in chain:`
			`inst = self._instances.get(t)`
			`if inst and inst.is_loaded:`
			`try:`
			`with self._locks[t]:`
			`return inst.generate(prompt, **kwargs)`
			`except Exception as e:`
			`_logger.warning(f"[Router] Generate failed on {t.value}: {e}")`
			`continue`

			`from .llm import LLMError`
			`raise LLMError(f"All tiers exhausted for generation (started at {tier.value})")`

			`def _create_instance(self, tier: ModelTier, verbose: bool = False):`
			`"""Create an LLM instance from tier config."""`
			`from .llm import LLM, TransformersLLM, ClaudeLLM, HuggingFaceLLM`

			`section = tier.value`
			`backend = self.config.get(section, 'backend', 'local')`
			`proxy = _TierConfigProxy(self.config, section)`

			`if verbose:`
			`model_path = self.config.get(section, 'model_path', '')`
			`_logger.info(f"[Router] Creating {tier.value} instance: backend={backend}, model={model_path}")`

			`if backend == 'local':`
			`inst = LLM(proxy)`
			`elif backend == 'transformers':`
			`inst = TransformersLLM(proxy)`
			`elif backend == 'claude':`
			`inst = ClaudeLLM(proxy)`
			`elif backend == 'huggingface':`
			`inst = HuggingFaceLLM(proxy)`
			`else:`
			`from .llm import LLMError`
			`raise LLMError(f"Unknown backend '{backend}' for tier {tier.value}")`

			`inst.load_model(verbose=verbose)`
			`return inst`


			`# Singleton`
			`_router_instance = None`


			`def get_model_router() -> ModelRouter:`
			`"""Get the global ModelRouter instance."""`
			`global _router_instance`
			`if _router_instance is None:`
			`_router_instance = ModelRouter()`
			`return _router_instance`


			`def reset_model_router():`
			`"""Reset the global ModelRouter (unloads all models)."""`
			`global _router_instance`
			`if _router_instance is not None:`
			`_router_instance.unload_all()`
			`_router_instance = None`