"""
AUTARCH Model Router
Manages concurrent SLM/LAM/SAM model instances for autonomous operation.

Model Tiers:
  SLM (Small Language Model) — Fast classification, routing, yes/no decisions
  SAM (Small Action Model)   — Quick tool execution, simple automated responses
  LAM (Large Action Model)   — Complex multi-step agent tasks, strategic planning
"""

import json
import logging
import threading
from typing import Optional, Dict, Any
from enum import Enum

from .config import get_config

_logger = logging.getLogger('autarch.model_router')


class ModelTier(Enum):
    SLM = 'slm'
    SAM = 'sam'
    LAM = 'lam'


# Fallback chain: if a tier fails, try the next one
_FALLBACK = {
    ModelTier.SLM: [ModelTier.SAM, ModelTier.LAM],
    ModelTier.SAM: [ModelTier.LAM],
    ModelTier.LAM: [],
}


class _TierConfigProxy:
    """Proxies Config but overrides the backend section for a specific model tier.

    When a tier says backend=local with model_path=X, this proxy makes the LLM
    class (which reads [llama]) see the tier's model_path/n_ctx/etc instead.
    """

    def __init__(self, base_config, tier_name: str):
        self._base = base_config
        self._tier = tier_name
        self._overrides: Dict[str, Dict[str, str]] = {}
        self._build_overrides()

    def _build_overrides(self):
        backend = self._base.get(self._tier, 'backend', 'local')
        model_path = self._base.get(self._tier, 'model_path', '')
        n_ctx = self._base.get(self._tier, 'n_ctx', '2048')
        n_gpu_layers = self._base.get(self._tier, 'n_gpu_layers', '-1')
        n_threads = self._base.get(self._tier, 'n_threads', '4')

        if backend == 'local':
            self._overrides['llama'] = {
                'model_path': model_path,
                'n_ctx': n_ctx,
                'n_gpu_layers': n_gpu_layers,
                'n_threads': n_threads,
            }
        elif backend == 'transformers':
            self._overrides['transformers'] = {
                'model_path': model_path,
            }
        # claude and huggingface are API-based — no path override needed

    def get(self, section: str, key: str, fallback=None):
        overrides = self._overrides.get(section, {})
        if key in overrides:
            return overrides[key]
        return self._base.get(section, key, fallback)

    def get_int(self, section: str, key: str, fallback: int = 0) -> int:
        overrides = self._overrides.get(section, {})
        if key in overrides:
            try:
                return int(overrides[key])
            except (ValueError, TypeError):
                return fallback
        return self._base.get_int(section, key, fallback)

    def get_float(self, section: str, key: str, fallback: float = 0.0) -> float:
        overrides = self._overrides.get(section, {})
        if key in overrides:
            try:
                return float(overrides[key])
            except (ValueError, TypeError):
                return fallback
        return self._base.get_float(section, key, fallback)

    def get_bool(self, section: str, key: str, fallback: bool = False) -> bool:
        overrides = self._overrides.get(section, {})
        if key in overrides:
            val = str(overrides[key]).lower()
            return val in ('true', '1', 'yes', 'on')
        return self._base.get_bool(section, key, fallback)

    # Delegate all settings getters to base (they call self.get internally)
    def get_llama_settings(self) -> dict:
        from .config import Config
        return Config.get_llama_settings(self)

    def get_transformers_settings(self) -> dict:
        from .config import Config
        return Config.get_transformers_settings(self)

    def get_claude_settings(self) -> dict:
        return self._base.get_claude_settings()

    def get_huggingface_settings(self) -> dict:
        return self._base.get_huggingface_settings()


class ModelRouter:
    """Manages up to 3 concurrent LLM instances (SLM, SAM, LAM).

    Each tier can use a different backend (local GGUF, transformers, Claude API,
    HuggingFace). The router handles loading, unloading, fallback, and thread-safe
    access.
    """

    def __init__(self, config=None):
        self.config = config or get_config()
        self._instances: Dict[ModelTier, Any] = {}
        self._locks: Dict[ModelTier, threading.Lock] = {
            tier: threading.Lock() for tier in ModelTier
        }
        self._load_lock = threading.Lock()

    @property
    def status(self) -> Dict[str, dict]:
        """Return load status of all tiers."""
        result = {}
        for tier in ModelTier:
            inst = self._instances.get(tier)
            settings = self.config.get_tier_settings(tier.value)
            result[tier.value] = {
                'loaded': inst is not None and inst.is_loaded,
                'model_name': inst.model_name if inst and inst.is_loaded else None,
                'backend': settings['backend'],
                'enabled': settings['enabled'],
                'model_path': settings['model_path'],
            }
        return result

    def load_tier(self, tier: ModelTier, verbose: bool = False) -> bool:
        """Load a single tier's model. Thread-safe."""
        settings = self.config.get_tier_settings(tier.value)

        if not settings['enabled']:
            _logger.info(f"[Router] Tier {tier.value} is disabled, skipping")
            return False

        if not settings['model_path'] and settings['backend'] == 'local':
            _logger.warning(f"[Router] No model_path configured for {tier.value}")
            return False

        with self._load_lock:
            # Unload existing if any
            if tier in self._instances:
                self.unload_tier(tier)

            try:
                inst = self._create_instance(tier, verbose)
                self._instances[tier] = inst
                _logger.info(f"[Router] Loaded {tier.value}: {inst.model_name}")
                return True
            except Exception as e:
                _logger.error(f"[Router] Failed to load {tier.value}: {e}")
                return False

    def unload_tier(self, tier: ModelTier):
        """Unload a tier's model and free resources."""
        inst = self._instances.pop(tier, None)
        if inst:
            try:
                inst.unload_model()
                _logger.info(f"[Router] Unloaded {tier.value}")
            except Exception as e:
                _logger.error(f"[Router] Error unloading {tier.value}: {e}")

    def load_all(self, verbose: bool = False) -> Dict[str, bool]:
        """Load all enabled tiers. Returns {tier_name: success}."""
        results = {}
        for tier in ModelTier:
            results[tier.value] = self.load_tier(tier, verbose)
        return results

    def unload_all(self):
        """Unload all tiers."""
        for tier in list(self._instances.keys()):
            self.unload_tier(tier)

    def get_instance(self, tier: ModelTier):
        """Get the LLM instance for a tier (may be None if not loaded)."""
        return self._instances.get(tier)

    def is_tier_loaded(self, tier: ModelTier) -> bool:
        """Check if a tier has a loaded model."""
        inst = self._instances.get(tier)
        return inst is not None and inst.is_loaded

    def classify(self, text: str) -> Dict[str, Any]:
        """Use SLM to classify/triage an event or task.

        Returns: {'tier': 'sam'|'lam', 'category': str, 'urgency': str, 'reasoning': str}

        Falls back to SAM tier if SLM is not loaded.
        """
        classify_prompt = f"""Classify this event/task for autonomous handling.
Respond with ONLY a JSON object, no other text:
{{"tier": "sam" or "lam", "category": "defense|offense|counter|analyze|osint|simulate", "urgency": "high|medium|low", "reasoning": "brief explanation"}}

Event: {text}"""

        # Try SLM first, then fallback
        for tier in [ModelTier.SLM, ModelTier.SAM, ModelTier.LAM]:
            inst = self._instances.get(tier)
            if inst and inst.is_loaded:
                try:
                    with self._locks[tier]:
                        response = inst.generate(classify_prompt, max_tokens=200, temperature=0.1)
                    # Parse JSON from response
                    response = response.strip()
                    # Find JSON in response
                    start = response.find('{')
                    end = response.rfind('}')
                    if start >= 0 and end > start:
                        return json.loads(response[start:end + 1])
                except Exception as e:
                    _logger.warning(f"[Router] Classification failed on {tier.value}: {e}")
                    continue

        # Default if all tiers fail
        return {'tier': 'sam', 'category': 'defense', 'urgency': 'medium',
                'reasoning': 'Default classification (no model available)'}

    def generate(self, tier: ModelTier, prompt: str, **kwargs) -> str:
        """Generate with a specific tier, falling back to higher tiers on failure.

        Fallback chain: SLM -> SAM -> LAM, SAM -> LAM
        """
        chain = [tier] + _FALLBACK.get(tier, [])

        for t in chain:
            inst = self._instances.get(t)
            if inst and inst.is_loaded:
                try:
                    with self._locks[t]:
                        return inst.generate(prompt, **kwargs)
                except Exception as e:
                    _logger.warning(f"[Router] Generate failed on {t.value}: {e}")
                    continue

        from .llm import LLMError
        raise LLMError(f"All tiers exhausted for generation (started at {tier.value})")

    def _create_instance(self, tier: ModelTier, verbose: bool = False):
        """Create an LLM instance from tier config."""
        from .llm import LLM, TransformersLLM, ClaudeLLM, HuggingFaceLLM

        section = tier.value
        backend = self.config.get(section, 'backend', 'local')
        proxy = _TierConfigProxy(self.config, section)

        if verbose:
            model_path = self.config.get(section, 'model_path', '')
            _logger.info(f"[Router] Creating {tier.value} instance: backend={backend}, model={model_path}")

        if backend == 'local':
            inst = LLM(proxy)
        elif backend == 'transformers':
            inst = TransformersLLM(proxy)
        elif backend == 'claude':
            inst = ClaudeLLM(proxy)
        elif backend == 'huggingface':
            inst = HuggingFaceLLM(proxy)
        else:
            from .llm import LLMError
            raise LLMError(f"Unknown backend '{backend}' for tier {tier.value}")

        inst.load_model(verbose=verbose)
        return inst


# Singleton
_router_instance = None


def get_model_router() -> ModelRouter:
    """Get the global ModelRouter instance."""
    global _router_instance
    if _router_instance is None:
        _router_instance = ModelRouter()
    return _router_instance


def reset_model_router():
    """Reset the global ModelRouter (unloads all models)."""
    global _router_instance
    if _router_instance is not None:
        _router_instance.unload_all()
    _router_instance = None