core/hal_analyst.py

"""
AUTARCH HAL Analyst
Automatically analyzes tool output via the loaded LLM.

When a defensive/analysis tool produces output, this module sends it to
the active LLM backend for analysis. HAL identifies issues, explains
what the user is looking at, and optionally suggests fixes.

Usage:
    from core.hal_analyst import analyze_output
    result = analyze_output('log_analyzer', log_text, context='syslog')
"""

import json
import logging
import time
from typing import Optional

_log = logging.getLogger('autarch.hal_analyst')

# Categories that should NOT get auto-analysis (offensive tools)
EXCLUDED_BLUEPRINTS = {
    'offense', 'loadtest', 'phishmail', 'social_eng', 'hack_hijack',
    'c2_framework', 'deauth', 'pineapple', 'exploit_dev', 'sms_forge',
    'rcs_tools', 'starlink_hack', 'iphone_exploit',
}

# Prompts tailored per tool category
ANALYSIS_PROMPTS = {
    'default': (
        "You are HAL, the AUTARCH security analyst. Analyze the following tool output. "
        "Be concise but thorough. Structure your response as:\n"
        "1. **Summary**: One sentence about what this data shows\n"
        "2. **Findings**: List any issues, anomalies, or notable items\n"
        "3. **Risk Level**: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. **Recommendation**: What the user should do (if anything)\n\n"
        "Tool: {tool_name}\n"
        "Context: {context}\n\n"
        "--- OUTPUT ---\n{output}\n--- END ---"
    ),
    'log_analysis': (
        "You are HAL, the AUTARCH security analyst. Analyze these system logs for security issues. "
        "Look for: failed login attempts, privilege escalation, suspicious processes, "
        "unusual network connections, file permission changes, service failures, "
        "and any indicators of compromise.\n\n"
        "Be specific about line numbers or timestamps where issues appear.\n\n"
        "Structure your response as:\n"
        "1. **Summary**: What these logs show\n"
        "2. **Issues Found**: Specific problems with details\n"
        "3. **Risk Level**: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. **Fix**: Exact commands or steps to resolve each issue\n\n"
        "--- LOGS ---\n{output}\n--- END ---"
    ),
    'network': (
        "You are HAL, the AUTARCH network security analyst. Analyze this network data. "
        "Look for: suspicious connections, unusual ports, potential backdoors, "
        "ARP anomalies, rogue devices, and any signs of intrusion.\n\n"
        "Structure your response as:\n"
        "1. **Summary**: Network status overview\n"
        "2. **Findings**: Suspicious items with details\n"
        "3. **Risk Level**: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. **Action**: Commands to investigate or fix issues\n\n"
        "Tool: {tool_name}\n\n"
        "--- DATA ---\n{output}\n--- END ---"
    ),
    'defense': (
        "You are HAL, the AUTARCH defensive security analyst. "
        "Analyze ONLY the specific output provided below. Do NOT expand scope beyond what was tested. "
        "If this is a single check (firewall only, SSH only, etc.), only comment on that one check. "
        "Do NOT perform or suggest a full system audit unless the output contains multiple checks.\n\n"
        "Keep your response short and focused on the actual data shown.\n\n"
        "Structure:\n"
        "1. Summary (one sentence)\n"
        "2. Issues found (if any)\n"
        "3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. Fix commands (only for issues found in THIS output)\n\n"
        "Tool: {tool_name}\nContext: {context}\n\n"
        "--- OUTPUT ---\n{output}\n--- END ---"
    ),
    'counter': (
        "You are HAL, the AUTARCH threat analyst. Analyze this threat detection output. "
        "Look for active compromises, persistent threats, backdoors, rootkits, "
        "and indicators of compromise.\n\n"
        "Be urgent and specific about any active threats found.\n\n"
        "Structure your response as:\n"
        "1. **Summary**: Threat landscape\n"
        "2. **Active Threats**: Any confirmed or suspected compromises\n"
        "3. **Risk Level**: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. **Immediate Action**: Steps to contain and remediate\n\n"
        "--- DATA ---\n{output}\n--- END ---"
    ),
    'android': (
        "You are HAL, the AUTARCH mobile security analyst. Analyze this Android device output. "
        "Look for: suspicious apps, dangerous permissions, stalkerware indicators, "
        "root detection, SELinux status, unusual processes, and security misconfigurations.\n\n"
        "Structure your response as:\n"
        "1. Summary: What this data shows\n"
        "2. Findings: Issues or notable items\n"
        "3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. Fix: Exact adb or device commands to resolve issues\n\n"
        "Tool: {tool_name}\nContext: {context}\n\n"
        "--- OUTPUT ---\n{output}\n--- END ---"
    ),
    'analyze': (
        "You are HAL, the AUTARCH forensics analyst. Analyze this forensic data. "
        "Look for malware indicators, suspicious strings, anomalous file properties, "
        "and any signs of tampering or malicious content.\n\n"
        "Structure your response as:\n"
        "1. **Summary**: What this data represents\n"
        "2. **Findings**: Notable or suspicious items\n"
        "3. **Risk Level**: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"
        "4. **Recommendation**: Further analysis or actions needed\n\n"
        "Tool: {tool_name}\n\n"
        "--- DATA ---\n{output}\n--- END ---"
    ),
}


def is_llm_available() -> bool:
    """Check if any LLM backend is loaded and ready."""
    try:
        from core.llm import get_llm
        llm = get_llm()
        return llm is not None and llm.is_loaded
    except Exception:
        return False


def analyze_output(
    tool_name: str,
    output: str,
    context: str = '',
    category: str = 'default',
    max_output_chars: int = 8000,
) -> dict:
    """Send tool output to the loaded LLM for analysis.

    Args:
        tool_name: Name of the tool that produced the output
        output: The raw output text to analyze
        context: Additional context (e.g., 'syslog', 'auth.log', 'ARP table')
        category: Analysis category for prompt selection
        max_output_chars: Truncate output to this length to fit context windows

    Returns:
        dict with keys:
            available (bool): Whether LLM was available
            analysis (str): The LLM's analysis text
            risk_level (str): Extracted risk level (CLEAN/LOW/MEDIUM/HIGH/CRITICAL)
            has_fixes (bool): Whether the analysis contains fix commands
            tool_name (str): Echo back the tool name
    """
    result = {
        'available': False,
        'analysis': '',
        'risk_level': 'unknown',
        'has_fixes': False,
        'tool_name': tool_name,
    }

    if not output or not output.strip():
        result['analysis'] = 'No output to analyze.'
        return result

    # Check LLM
    try:
        from core.llm import get_llm
        llm = get_llm()
        if not llm or not llm.is_loaded:
            result['analysis'] = 'No LLM loaded — enable a model in LLM Settings to get AI analysis.'
            return result
    except Exception as e:
        _log.debug(f"[HAL] LLM not available: {e}")
        result['analysis'] = f'LLM not available: {e}'
        return result

    result['available'] = True

    # Truncate output if too long
    if len(output) > max_output_chars:
        output = output[:max_output_chars] + f'\n\n... [truncated — {len(output)} chars total]'

    # Select prompt template
    prompt_template = ANALYSIS_PROMPTS.get(category, ANALYSIS_PROMPTS['default'])
    prompt = prompt_template.format(
        tool_name=tool_name,
        output=output,
        context=context or 'general',
    )

    # Detect current OS for context
    import platform as _plat
    _os_name = _plat.system()
    _os_detail = _plat.platform()

    # Send to LLM
    try:
        _log.info(f"[HAL] Analyzing output from {tool_name} ({len(output)} chars, category={category})")
        start = time.time()
        response = llm.chat(prompt, system_prompt=(
            "You are HAL, the AI security analyst for the AUTARCH platform. "
            f"This system is running {_os_name} ({_os_detail}). "
            "ONLY suggest commands for this operating system. "
            "If the tool output is from the WRONG platform (e.g. Windows scan results on a Linux host), "
            "immediately tell the user they ran the wrong scan and point them to the correct one. "
            "Do NOT use markdown formatting. Plain text only. No ** or ## or ``` or bullet points. "
            "Be specific, cite evidence from the data, and provide exact commands to fix issues."
        ))
        elapsed = time.time() - start
        _log.info(f"[HAL] Analysis complete in {elapsed:.1f}s ({len(response)} chars)")

        result['analysis'] = response

        # Extract risk level from response
        for level in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'CLEAN']:
            if level in response.upper():
                result['risk_level'] = level.lower()
                break

        # Check if response contains fix commands
        result['has_fixes'] = any(x in response for x in [
            '```', 'sudo ', 'systemctl ', 'iptables ', 'chmod ', 'chown ',
            'apt ', 'ufw ', 'sshd_config', 'Fix:', 'fix:', 'Command:',
            'adb ', 'fastboot ', 'pm ', 'am ', 'dumpsys ', 'settings put ',
        ])

    except Exception as e:
        _log.error(f"[HAL] Analysis failed: {e}", exc_info=True)
        result['analysis'] = f'Analysis failed: {e}'

    return result


def extract_fix_commands(analysis: str) -> list:
    """Extract actionable commands from an analysis response.

    Looks for commands in code blocks or after 'Fix:' / 'Command:' markers.
    Returns a list of command strings.
    """
    commands = []
    in_code_block = False
    code_block = []

    for line in analysis.split('\n'):
        stripped = line.strip()

        # Code blocks
        if stripped.startswith('```'):
            if in_code_block:
                if code_block:
                    commands.extend(code_block)
                code_block = []
            in_code_block = not in_code_block
            continue

        if in_code_block:
            if stripped and not stripped.startswith('#'):
                code_block.append(stripped)
            continue

        # Inline commands after markers
        if stripped.startswith(('sudo ', '$ ', '# ')) and len(stripped) > 5:
            cmd = stripped.lstrip('$# ').strip()
            if cmd:
                commands.append(cmd)

    return commands
AUTARCH v1.9 — remote monitoring, SSH manager, daemon, vault, cleanup - Add Remote Monitoring Station with PIAP device profile system - Add SSH/SSHD manager with fail2ban integration - Add privileged daemon architecture for safe root operations - Add encrypted vault, HAL memory, HAL auto-analyst - Add network security suite, module creator, codex training - Add start.sh launcher script and GTK3 desktop launcher - Remove Output/ build artifacts, installer files, loose docs - Update .gitignore for runtime data and build artifacts - Update README for v1.9 with new launch method, screenshots, and features Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> 2026-03-24 06:59:06 -07:00			`"""`
			`AUTARCH HAL Analyst`
			`Automatically analyzes tool output via the loaded LLM.`

			`When a defensive/analysis tool produces output, this module sends it to`
			`the active LLM backend for analysis. HAL identifies issues, explains`
			`what the user is looking at, and optionally suggests fixes.`

			`Usage:`
			`from core.hal_analyst import analyze_output`
			`result = analyze_output('log_analyzer', log_text, context='syslog')`
			`"""`

			`import json`
			`import logging`
			`import time`
			`from typing import Optional`

			`_log = logging.getLogger('autarch.hal_analyst')`

			`# Categories that should NOT get auto-analysis (offensive tools)`
			`EXCLUDED_BLUEPRINTS = {`
			`'offense', 'loadtest', 'phishmail', 'social_eng', 'hack_hijack',`
			`'c2_framework', 'deauth', 'pineapple', 'exploit_dev', 'sms_forge',`
			`'rcs_tools', 'starlink_hack', 'iphone_exploit',`
			`}`

			`# Prompts tailored per tool category`
			`ANALYSIS_PROMPTS = {`
			`'default': (`
			`"You are HAL, the AUTARCH security analyst. Analyze the following tool output. "`
			`"Be concise but thorough. Structure your response as:\n"`
			`"1. Summary: One sentence about what this data shows\n"`
			`"2. Findings: List any issues, anomalies, or notable items\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Recommendation: What the user should do (if anything)\n\n"`
			`"Tool: {tool_name}\n"`
			`"Context: {context}\n\n"`
			`"--- OUTPUT ---\n{output}\n--- END ---"`
			`),`
			`'log_analysis': (`
			`"You are HAL, the AUTARCH security analyst. Analyze these system logs for security issues. "`
			`"Look for: failed login attempts, privilege escalation, suspicious processes, "`
			`"unusual network connections, file permission changes, service failures, "`
			`"and any indicators of compromise.\n\n"`
			`"Be specific about line numbers or timestamps where issues appear.\n\n"`
			`"Structure your response as:\n"`
			`"1. Summary: What these logs show\n"`
			`"2. Issues Found: Specific problems with details\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Fix: Exact commands or steps to resolve each issue\n\n"`
			`"--- LOGS ---\n{output}\n--- END ---"`
			`),`
			`'network': (`
			`"You are HAL, the AUTARCH network security analyst. Analyze this network data. "`
			`"Look for: suspicious connections, unusual ports, potential backdoors, "`
			`"ARP anomalies, rogue devices, and any signs of intrusion.\n\n"`
			`"Structure your response as:\n"`
			`"1. Summary: Network status overview\n"`
			`"2. Findings: Suspicious items with details\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Action: Commands to investigate or fix issues\n\n"`
			`"Tool: {tool_name}\n\n"`
			`"--- DATA ---\n{output}\n--- END ---"`
			`),`
			`'defense': (`
			`"You are HAL, the AUTARCH defensive security analyst. "`
			`"Analyze ONLY the specific output provided below. Do NOT expand scope beyond what was tested. "`
			`"If this is a single check (firewall only, SSH only, etc.), only comment on that one check. "`
			`"Do NOT perform or suggest a full system audit unless the output contains multiple checks.\n\n"`
			`"Keep your response short and focused on the actual data shown.\n\n"`
			`"Structure:\n"`
			`"1. Summary (one sentence)\n"`
			`"2. Issues found (if any)\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Fix commands (only for issues found in THIS output)\n\n"`
			`"Tool: {tool_name}\nContext: {context}\n\n"`
			`"--- OUTPUT ---\n{output}\n--- END ---"`
			`),`
			`'counter': (`
			`"You are HAL, the AUTARCH threat analyst. Analyze this threat detection output. "`
			`"Look for active compromises, persistent threats, backdoors, rootkits, "`
			`"and indicators of compromise.\n\n"`
			`"Be urgent and specific about any active threats found.\n\n"`
			`"Structure your response as:\n"`
			`"1. Summary: Threat landscape\n"`
			`"2. Active Threats: Any confirmed or suspected compromises\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Immediate Action: Steps to contain and remediate\n\n"`
			`"--- DATA ---\n{output}\n--- END ---"`
			`),`
			`'android': (`
			`"You are HAL, the AUTARCH mobile security analyst. Analyze this Android device output. "`
			`"Look for: suspicious apps, dangerous permissions, stalkerware indicators, "`
			`"root detection, SELinux status, unusual processes, and security misconfigurations.\n\n"`
			`"Structure your response as:\n"`
			`"1. Summary: What this data shows\n"`
			`"2. Findings: Issues or notable items\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Fix: Exact adb or device commands to resolve issues\n\n"`
			`"Tool: {tool_name}\nContext: {context}\n\n"`
			`"--- OUTPUT ---\n{output}\n--- END ---"`
			`),`
			`'analyze': (`
			`"You are HAL, the AUTARCH forensics analyst. Analyze this forensic data. "`
			`"Look for malware indicators, suspicious strings, anomalous file properties, "`
			`"and any signs of tampering or malicious content.\n\n"`
			`"Structure your response as:\n"`
			`"1. Summary: What this data represents\n"`
			`"2. Findings: Notable or suspicious items\n"`
			`"3. Risk Level: CLEAN / LOW / MEDIUM / HIGH / CRITICAL\n"`
			`"4. Recommendation: Further analysis or actions needed\n\n"`
			`"Tool: {tool_name}\n\n"`
			`"--- DATA ---\n{output}\n--- END ---"`
			`),`
			`}`


			`def is_llm_available() -> bool:`
			`"""Check if any LLM backend is loaded and ready."""`
			`try:`
			`from core.llm import get_llm`
			`llm = get_llm()`
			`return llm is not None and llm.is_loaded`
			`except Exception:`
			`return False`


			`def analyze_output(`
			`tool_name: str,`
			`output: str,`
			`context: str = '',`
			`category: str = 'default',`
			`max_output_chars: int = 8000,`
			`) -> dict:`
			`"""Send tool output to the loaded LLM for analysis.`

			`Args:`
			`tool_name: Name of the tool that produced the output`
			`output: The raw output text to analyze`
			`context: Additional context (e.g., 'syslog', 'auth.log', 'ARP table')`
			`category: Analysis category for prompt selection`
			`max_output_chars: Truncate output to this length to fit context windows`

			`Returns:`
			`dict with keys:`
			`available (bool): Whether LLM was available`
			`analysis (str): The LLM's analysis text`
			`risk_level (str): Extracted risk level (CLEAN/LOW/MEDIUM/HIGH/CRITICAL)`
			`has_fixes (bool): Whether the analysis contains fix commands`
			`tool_name (str): Echo back the tool name`
			`"""`
			`result = {`
			`'available': False,`
			`'analysis': '',`
			`'risk_level': 'unknown',`
			`'has_fixes': False,`
			`'tool_name': tool_name,`
			`}`

			`if not output or not output.strip():`
			`result['analysis'] = 'No output to analyze.'`
			`return result`

			`# Check LLM`
			`try:`
			`from core.llm import get_llm`
			`llm = get_llm()`
			`if not llm or not llm.is_loaded:`
			`result['analysis'] = 'No LLM loaded — enable a model in LLM Settings to get AI analysis.'`
			`return result`
			`except Exception as e:`
			`_log.debug(f"[HAL] LLM not available: {e}")`
			`result['analysis'] = f'LLM not available: {e}'`
			`return result`

			`result['available'] = True`

			`# Truncate output if too long`
			`if len(output) > max_output_chars:`
			`output = output[:max_output_chars] + f'\n\n... [truncated — {len(output)} chars total]'`

			`# Select prompt template`
			`prompt_template = ANALYSIS_PROMPTS.get(category, ANALYSIS_PROMPTS['default'])`
			`prompt = prompt_template.format(`
			`tool_name=tool_name,`
			`output=output,`
			`context=context or 'general',`
			`)`

			`# Detect current OS for context`
			`import platform as _plat`
			`_os_name = _plat.system()`
			`_os_detail = _plat.platform()`

			`# Send to LLM`
			`try:`
			`_log.info(f"[HAL] Analyzing output from {tool_name} ({len(output)} chars, category={category})")`
			`start = time.time()`
			`response = llm.chat(prompt, system_prompt=(`
			`"You are HAL, the AI security analyst for the AUTARCH platform. "`
			`f"This system is running {_os_name} ({_os_detail}). "`
			`"ONLY suggest commands for this operating system. "`
			`"If the tool output is from the WRONG platform (e.g. Windows scan results on a Linux host), "`
			`"immediately tell the user they ran the wrong scan and point them to the correct one. "`
			"Do NOT use markdown formatting. Plain text only. No ** or ## or ``` or bullet points. "
			`"Be specific, cite evidence from the data, and provide exact commands to fix issues."`
			`))`
			`elapsed = time.time() - start`
			`_log.info(f"[HAL] Analysis complete in {elapsed:.1f}s ({len(response)} chars)")`

			`result['analysis'] = response`

			`# Extract risk level from response`
			`for level in ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW', 'CLEAN']:`
			`if level in response.upper():`
			`result['risk_level'] = level.lower()`
			`break`

			`# Check if response contains fix commands`
			`result['has_fixes'] = any(x in response for x in [`
			'```', 'sudo ', 'systemctl ', 'iptables ', 'chmod ', 'chown ',
			`'apt ', 'ufw ', 'sshd_config', 'Fix:', 'fix:', 'Command:',`
			`'adb ', 'fastboot ', 'pm ', 'am ', 'dumpsys ', 'settings put ',`
			`])`

			`except Exception as e:`
			`_log.error(f"[HAL] Analysis failed: {e}", exc_info=True)`
			`result['analysis'] = f'Analysis failed: {e}'`

			`return result`


			`def extract_fix_commands(analysis: str) -> list:`
			`"""Extract actionable commands from an analysis response.`

			`Looks for commands in code blocks or after 'Fix:' / 'Command:' markers.`
			`Returns a list of command strings.`
			`"""`
			`commands = []`
			`in_code_block = False`
			`code_block = []`

			`for line in analysis.split('\n'):`
			`stripped = line.strip()`

			`# Code blocks`
			if stripped.startswith('```'):
			`if in_code_block:`
			`if code_block:`
			`commands.extend(code_block)`
			`code_block = []`
			`in_code_block = not in_code_block`
			`continue`

			`if in_code_block:`
			`if stripped and not stripped.startswith('#'):`
			`code_block.append(stripped)`
			`continue`

			`# Inline commands after markers`
			`if stripped.startswith(('sudo ', '$ ', '# ')) and len(stripped) > 5:`
			`cmd = stripped.lstrip('$# ').strip()`
			`if cmd:`
			`commands.append(cmd)`

			`return commands`