"""AUTARCH Log Correlator Syslog ingestion, pattern matching, anomaly detection, alert rules, timeline correlation, and mini-SIEM functionality. """ DESCRIPTION = "Log correlation & anomaly detection (mini-SIEM)" AUTHOR = "darkHal" VERSION = "1.0" CATEGORY = "defense" import os import re import json import time import threading from pathlib import Path from datetime import datetime, timezone from collections import Counter, defaultdict from typing import Dict, List, Optional, Any try: from core.paths import get_data_dir except ImportError: def get_data_dir(): return str(Path(__file__).parent.parent / 'data') # ── Built-in Detection Rules ──────────────────────────────────────────────── DEFAULT_RULES = [ { 'id': 'brute_force_ssh', 'name': 'SSH Brute Force', 'pattern': r'(Failed password|authentication failure).*ssh', 'severity': 'high', 'threshold': 5, 'window_seconds': 60, 'description': 'Multiple failed SSH login attempts' }, { 'id': 'brute_force_web', 'name': 'Web Login Brute Force', 'pattern': r'(401|403).*POST.*(login|auth|signin)', 'severity': 'high', 'threshold': 10, 'window_seconds': 60, 'description': 'Multiple failed web login attempts' }, { 'id': 'sql_injection', 'name': 'SQL Injection Attempt', 'pattern': r"(UNION\s+SELECT|OR\s+1\s*=\s*1|DROP\s+TABLE|'--|\bSLEEP\()", 'severity': 'critical', 'threshold': 1, 'window_seconds': 0, 'description': 'SQL injection pattern detected' }, { 'id': 'xss_attempt', 'name': 'XSS Attempt', 'pattern': r'( Optional[Dict]: """Parse a single log line.""" line = line.strip() if not line: return None # Try JSON format if LogParser.JSON_LOG_RE.match(line): try: data = json.loads(line) return { 'format': 'json', 'timestamp': data.get('timestamp', data.get('time', data.get('@timestamp', ''))), 'source': data.get('source', data.get('host', '')), 'program': data.get('program', data.get('service', data.get('logger', ''))), 'message': data.get('message', data.get('msg', str(data))), 'level': data.get('level', data.get('severity', 'info')), 'raw': line } except json.JSONDecodeError: pass # Try syslog format m = LogParser.SYSLOG_RE.match(line) if m: return { 'format': 'syslog', 'timestamp': m.group(1), 'source': m.group(2), 'program': m.group(3), 'pid': m.group(4), 'message': m.group(5), 'raw': line } # Try Apache/Nginx format m = LogParser.APACHE_RE.match(line) if m: return { 'format': 'apache', 'timestamp': m.group(2), 'source': m.group(1), 'method': m.group(3), 'path': m.group(4), 'status': int(m.group(5)), 'size': int(m.group(6)), 'message': line, 'raw': line } # Generic fallback return { 'format': 'unknown', 'timestamp': '', 'message': line, 'raw': line } # ── Log Correlator Engine ──────────────────────────────────────────────────── class LogCorrelator: """Log correlation and anomaly detection engine.""" def __init__(self): self.data_dir = os.path.join(get_data_dir(), 'log_correlator') os.makedirs(self.data_dir, exist_ok=True) self.rules: List[Dict] = list(DEFAULT_RULES) self.alerts: List[Dict] = [] self.logs: List[Dict] = [] self.sources: Dict[str, Dict] = {} self._rule_hits: Dict[str, List[float]] = defaultdict(list) self._lock = threading.Lock() self._load_custom_rules() self._load_alerts() def _load_custom_rules(self): rules_file = os.path.join(self.data_dir, 'custom_rules.json') if os.path.exists(rules_file): try: with open(rules_file) as f: custom = json.load(f) self.rules.extend(custom) except Exception: pass def _save_custom_rules(self): # Only save non-default rules default_ids = {r['id'] for r in DEFAULT_RULES} custom = [r for r in self.rules if r['id'] not in default_ids] rules_file = os.path.join(self.data_dir, 'custom_rules.json') with open(rules_file, 'w') as f: json.dump(custom, f, indent=2) def _load_alerts(self): alerts_file = os.path.join(self.data_dir, 'alerts.json') if os.path.exists(alerts_file): try: with open(alerts_file) as f: self.alerts = json.load(f) except Exception: pass def _save_alerts(self): alerts_file = os.path.join(self.data_dir, 'alerts.json') with open(alerts_file, 'w') as f: json.dump(self.alerts[-1000:], f, indent=2) # ── Log Ingestion ──────────────────────────────────────────────────── def ingest_file(self, filepath: str, source_name: str = None) -> Dict: """Ingest log file for analysis.""" if not os.path.exists(filepath): return {'ok': False, 'error': 'File not found'} source = source_name or Path(filepath).name parsed = 0 alerts_generated = 0 try: with open(filepath, 'r', errors='ignore') as f: for line in f: entry = LogParser.parse_line(line) if entry: entry['source_file'] = source self.logs.append(entry) parsed += 1 # Run detection rules new_alerts = self._check_rules(entry) alerts_generated += len(new_alerts) self.sources[source] = { 'file': filepath, 'lines': parsed, 'ingested': datetime.now(timezone.utc).isoformat() } if alerts_generated: self._save_alerts() return { 'ok': True, 'source': source, 'lines_parsed': parsed, 'alerts_generated': alerts_generated } except Exception as e: return {'ok': False, 'error': str(e)} def ingest_text(self, text: str, source_name: str = 'paste') -> Dict: """Ingest log text directly.""" parsed = 0 alerts_generated = 0 for line in text.strip().splitlines(): entry = LogParser.parse_line(line) if entry: entry['source_file'] = source_name self.logs.append(entry) parsed += 1 new_alerts = self._check_rules(entry) alerts_generated += len(new_alerts) if alerts_generated: self._save_alerts() return { 'ok': True, 'source': source_name, 'lines_parsed': parsed, 'alerts_generated': alerts_generated } # ── Detection ──────────────────────────────────────────────────────── def _check_rules(self, entry: Dict) -> List[Dict]: """Check log entry against detection rules.""" new_alerts = [] message = entry.get('message', '') + ' ' + entry.get('raw', '') now = time.time() for rule in self.rules: try: if re.search(rule['pattern'], message, re.I): rule_id = rule['id'] # Threshold check if rule.get('threshold', 1) > 1 and rule.get('window_seconds', 0) > 0: with self._lock: self._rule_hits[rule_id].append(now) # Clean old hits window = rule['window_seconds'] self._rule_hits[rule_id] = [ t for t in self._rule_hits[rule_id] if now - t <= window ] if len(self._rule_hits[rule_id]) < rule['threshold']: continue alert = { 'timestamp': datetime.now(timezone.utc).isoformat(), 'rule_id': rule_id, 'rule_name': rule['name'], 'severity': rule['severity'], 'description': rule['description'], 'source': entry.get('source_file', ''), 'log_entry': entry.get('message', '')[:200], 'raw': entry.get('raw', '')[:300] } self.alerts.append(alert) new_alerts.append(alert) except re.error: pass return new_alerts # ── Rule Management ────────────────────────────────────────────────── def add_rule(self, rule_id: str, name: str, pattern: str, severity: str = 'medium', threshold: int = 1, window_seconds: int = 0, description: str = '') -> Dict: """Add custom detection rule.""" # Validate regex try: re.compile(pattern) except re.error as e: return {'ok': False, 'error': f'Invalid regex: {e}'} rule = { 'id': rule_id, 'name': name, 'pattern': pattern, 'severity': severity, 'threshold': threshold, 'window_seconds': window_seconds, 'description': description } self.rules.append(rule) self._save_custom_rules() return {'ok': True, 'rule': rule} def remove_rule(self, rule_id: str) -> Dict: """Remove a custom rule.""" default_ids = {r['id'] for r in DEFAULT_RULES} if rule_id in default_ids: return {'ok': False, 'error': 'Cannot remove built-in rule'} before = len(self.rules) self.rules = [r for r in self.rules if r['id'] != rule_id] if len(self.rules) < before: self._save_custom_rules() return {'ok': True} return {'ok': False, 'error': 'Rule not found'} def get_rules(self) -> List[Dict]: """List all detection rules.""" default_ids = {r['id'] for r in DEFAULT_RULES} return [{**r, 'builtin': r['id'] in default_ids} for r in self.rules] # ── Analysis ───────────────────────────────────────────────────────── def search_logs(self, query: str, source: str = None, limit: int = 100) -> List[Dict]: """Search ingested logs.""" results = [] for entry in reversed(self.logs): if source and entry.get('source_file') != source: continue if query.lower() in (entry.get('message', '') + entry.get('raw', '')).lower(): results.append(entry) if len(results) >= limit: break return results def get_stats(self) -> Dict: """Get correlator statistics.""" severity_counts = Counter(a['severity'] for a in self.alerts) rule_counts = Counter(a['rule_id'] for a in self.alerts) source_counts = Counter(e.get('source_file', '') for e in self.logs) return { 'total_logs': len(self.logs), 'total_alerts': len(self.alerts), 'sources': len(self.sources), 'rules': len(self.rules), 'alerts_by_severity': dict(severity_counts), 'top_rules': dict(rule_counts.most_common(10)), 'top_sources': dict(source_counts.most_common(10)) } def get_alerts(self, severity: str = None, limit: int = 100) -> List[Dict]: """Get alerts with optional filtering.""" alerts = self.alerts if severity: alerts = [a for a in alerts if a['severity'] == severity] return alerts[-limit:] def clear_alerts(self): """Clear all alerts.""" self.alerts.clear() self._save_alerts() def clear_logs(self): """Clear ingested logs.""" self.logs.clear() self.sources.clear() def get_sources(self) -> Dict: """Get ingested log sources.""" return self.sources def get_timeline(self, hours: int = 24) -> List[Dict]: """Get alert timeline grouped by hour.""" timeline = defaultdict(lambda: {'count': 0, 'critical': 0, 'high': 0, 'medium': 0, 'low': 0}) for alert in self.alerts: ts = alert.get('timestamp', '')[:13] # YYYY-MM-DDTHH timeline[ts]['count'] += 1 sev = alert.get('severity', 'low') timeline[ts][sev] = timeline[ts].get(sev, 0) + 1 return [{'hour': k, **v} for k, v in sorted(timeline.items())[-hours:]] # ── Singleton ──────────────────────────────────────────────────────────────── _instance = None def get_log_correlator() -> LogCorrelator: global _instance if _instance is None: _instance = LogCorrelator() return _instance # ── CLI Interface ──────────────────────────────────────────────────────────── def run(): """CLI entry point for Log Correlator module.""" engine = get_log_correlator() while True: stats = engine.get_stats() print(f"\n{'='*60}") print(f" Log Correlator ({stats['total_logs']} logs, {stats['total_alerts']} alerts)") print(f"{'='*60}") print() print(" 1 — Ingest Log File") print(" 2 — Paste Log Text") print(" 3 — Search Logs") print(" 4 — View Alerts") print(" 5 — Manage Rules") print(" 6 — View Stats") print(" 7 — Alert Timeline") print(" 8 — Clear Alerts") print(" 0 — Back") print() choice = input(" > ").strip() if choice == '0': break elif choice == '1': filepath = input(" Log file path: ").strip() if filepath: result = engine.ingest_file(filepath) if result['ok']: print(f" Parsed {result['lines_parsed']} lines, " f"{result['alerts_generated']} alerts generated") else: print(f" Error: {result['error']}") elif choice == '2': print(" Paste log lines (blank line to finish):") lines = [] while True: line = input() if not line: break lines.append(line) if lines: result = engine.ingest_text('\n'.join(lines)) print(f" Parsed {result['lines_parsed']} lines, " f"{result['alerts_generated']} alerts") elif choice == '3': query = input(" Search query: ").strip() if query: results = engine.search_logs(query) print(f" {len(results)} matches:") for r in results[:10]: print(f" [{r.get('source_file', '?')}] {r.get('message', '')[:80]}") elif choice == '4': sev = input(" Severity filter (blank=all): ").strip() or None alerts = engine.get_alerts(severity=sev) for a in alerts[-15:]: print(f" [{a['severity']:<8}] {a['rule_name']}: {a['log_entry'][:60]}") elif choice == '5': rules = engine.get_rules() for r in rules: builtin = ' (built-in)' if r.get('builtin') else '' print(f" {r['id']}: {r['name']} [{r['severity']}]{builtin}") elif choice == '6': print(f" Logs: {stats['total_logs']}") print(f" Alerts: {stats['total_alerts']}") print(f" Sources: {stats['sources']}") print(f" Rules: {stats['rules']}") if stats['alerts_by_severity']: print(f" By severity: {stats['alerts_by_severity']}") elif choice == '7': timeline = engine.get_timeline() for t in timeline[-12:]: bar = '#' * min(t['count'], 40) print(f" {t['hour']} | {bar} ({t['count']})") elif choice == '8': engine.clear_alerts() print(" Alerts cleared")