"""AUTARCH Cloud Security Scanner AWS/Azure/GCP bucket enumeration, IAM misconfiguration detection, exposed service scanning, and cloud resource discovery. """ DESCRIPTION = "Cloud infrastructure security scanning" AUTHOR = "darkHal" VERSION = "1.0" CATEGORY = "offense" import os import re import json import time import threading from pathlib import Path from typing import Dict, List, Optional, Any try: from core.paths import get_data_dir except ImportError: def get_data_dir(): return str(Path(__file__).parent.parent / 'data') try: import requests HAS_REQUESTS = True except ImportError: HAS_REQUESTS = False # ── Cloud Provider Endpoints ───────────────────────────────────────────────── AWS_REGIONS = [ 'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2', 'eu-west-1', 'eu-west-2', 'eu-central-1', 'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1', ] COMMON_BUCKET_NAMES = [ 'backup', 'backups', 'data', 'dev', 'staging', 'prod', 'production', 'logs', 'assets', 'media', 'uploads', 'images', 'static', 'public', 'private', 'internal', 'config', 'configs', 'db', 'database', 'archive', 'old', 'temp', 'tmp', 'test', 'debug', 'admin', 'www', 'web', 'api', 'app', 'mobile', 'docs', 'documents', 'reports', 'export', 'import', 'share', 'shared', ] METADATA_ENDPOINTS = { 'aws': 'http://169.254.169.254/latest/meta-data/', 'gcp': 'http://metadata.google.internal/computeMetadata/v1/', 'azure': 'http://169.254.169.254/metadata/instance?api-version=2021-02-01', 'digitalocean': 'http://169.254.169.254/metadata/v1/', } # ── Cloud Scanner ──────────────────────────────────────────────────────────── class CloudScanner: """Cloud infrastructure security scanner.""" def __init__(self): self.data_dir = os.path.join(get_data_dir(), 'cloud_scan') os.makedirs(self.data_dir, exist_ok=True) self.results: List[Dict] = [] self._jobs: Dict[str, Dict] = {} # ── S3 Bucket Enumeration ──────────────────────────────────────────── def enum_s3_buckets(self, keyword: str, prefixes: List[str] = None, suffixes: List[str] = None) -> str: """Enumerate S3 buckets with naming permutations. Returns job_id.""" if not HAS_REQUESTS: return '' job_id = f's3enum_{int(time.time())}' self._jobs[job_id] = { 'type': 's3_enum', 'status': 'running', 'found': [], 'checked': 0, 'total': 0 } def _enum(): prefixes_list = prefixes or ['', 'dev-', 'staging-', 'prod-', 'test-', 'backup-'] suffixes_list = suffixes or ['', '-backup', '-data', '-assets', '-logs', '-dev', '-staging', '-prod', '-public', '-private'] bucket_names = set() for pfx in prefixes_list: for sfx in suffixes_list: bucket_names.add(f'{pfx}{keyword}{sfx}') # Add common patterns for common in COMMON_BUCKET_NAMES: bucket_names.add(f'{keyword}-{common}') bucket_names.add(f'{common}-{keyword}') self._jobs[job_id]['total'] = len(bucket_names) found = [] for name in bucket_names: try: # Check S3 bucket url = f'https://{name}.s3.amazonaws.com' resp = requests.head(url, timeout=5, allow_redirects=True) self._jobs[job_id]['checked'] += 1 if resp.status_code == 200: # Try listing list_resp = requests.get(url, timeout=5) listable = ' str: """Enumerate Google Cloud Storage buckets. Returns job_id.""" if not HAS_REQUESTS: return '' job_id = f'gcsenum_{int(time.time())}' self._jobs[job_id] = { 'type': 'gcs_enum', 'status': 'running', 'found': [], 'checked': 0, 'total': 0 } def _enum(): names = set() for suffix in ['', '-data', '-backup', '-assets', '-staging', '-prod', '-dev', '-logs']: names.add(f'{keyword}{suffix}') self._jobs[job_id]['total'] = len(names) found = [] for name in names: try: url = f'https://storage.googleapis.com/{name}' resp = requests.head(url, timeout=5) self._jobs[job_id]['checked'] += 1 if resp.status_code in (200, 403): found.append({ 'bucket': name, 'provider': 'gcp', 'url': url, 'status': resp.status_code, 'public': resp.status_code == 200 }) except Exception: self._jobs[job_id]['checked'] += 1 self._jobs[job_id]['found'] = found self._jobs[job_id]['status'] = 'complete' threading.Thread(target=_enum, daemon=True).start() return job_id # ── Azure Blob Enumeration ─────────────────────────────────────────── def enum_azure_blobs(self, keyword: str) -> str: """Enumerate Azure Blob Storage containers. Returns job_id.""" if not HAS_REQUESTS: return '' job_id = f'azureenum_{int(time.time())}' self._jobs[job_id] = { 'type': 'azure_enum', 'status': 'running', 'found': [], 'checked': 0, 'total': 0 } def _enum(): # Storage account names accounts = [keyword, f'{keyword}storage', f'{keyword}data', f'{keyword}backup', f'{keyword}dev', f'{keyword}prod'] containers = ['$web', 'data', 'backup', 'uploads', 'assets', 'logs', 'public', 'media', 'images'] total = len(accounts) * len(containers) self._jobs[job_id]['total'] = total found = [] for account in accounts: for container in containers: try: url = f'https://{account}.blob.core.windows.net/{container}?restype=container&comp=list' resp = requests.get(url, timeout=5) self._jobs[job_id]['checked'] += 1 if resp.status_code == 200: found.append({ 'account': account, 'container': container, 'provider': 'azure', 'url': url, 'status': resp.status_code, 'public': True }) elif resp.status_code == 403: found.append({ 'account': account, 'container': container, 'provider': 'azure', 'url': url, 'status': 403, 'exists': True, 'public': False }) except Exception: self._jobs[job_id]['checked'] += 1 self._jobs[job_id]['found'] = found self._jobs[job_id]['status'] = 'complete' threading.Thread(target=_enum, daemon=True).start() return job_id # ── Exposed Services ───────────────────────────────────────────────── def scan_exposed_services(self, target: str) -> Dict: """Check for commonly exposed cloud services on a target.""" if not HAS_REQUESTS: return {'ok': False, 'error': 'requests not available'} services = [] checks = [ ('/server-status', 'Apache Status'), ('/nginx_status', 'Nginx Status'), ('/.env', 'Environment File'), ('/.git/config', 'Git Config'), ('/.aws/credentials', 'AWS Credentials'), ('/wp-config.php.bak', 'WordPress Config Backup'), ('/phpinfo.php', 'PHP Info'), ('/debug', 'Debug Endpoint'), ('/actuator', 'Spring Actuator'), ('/actuator/env', 'Spring Env'), ('/api/swagger.json', 'Swagger/OpenAPI Spec'), ('/.well-known/security.txt', 'Security Policy'), ('/robots.txt', 'Robots.txt'), ('/sitemap.xml', 'Sitemap'), ('/graphql', 'GraphQL Endpoint'), ('/console', 'Console'), ('/admin', 'Admin Panel'), ('/wp-admin', 'WordPress Admin'), ('/phpmyadmin', 'phpMyAdmin'), ] for path, name in checks: try: url = f'{target.rstrip("/")}{path}' resp = requests.get(url, timeout=5, allow_redirects=False) if resp.status_code == 200: # Check content for sensitive data sensitive = False body = resp.text[:2000].lower() sensitive_indicators = [ 'password', 'secret', 'access_key', 'private_key', 'database', 'db_host', 'smtp_pass', 'api_key' ] if any(ind in body for ind in sensitive_indicators): sensitive = True services.append({ 'path': path, 'name': name, 'url': url, 'status': resp.status_code, 'size': len(resp.content), 'sensitive': sensitive, 'content_type': resp.headers.get('content-type', '') }) except Exception: pass return { 'ok': True, 'target': target, 'services': services, 'count': len(services) } # ── Metadata SSRF Check ────────────────────────────────────────────── def check_metadata_access(self) -> Dict: """Check if cloud metadata service is accessible (SSRF indicator).""" results = {} for provider, url in METADATA_ENDPOINTS.items(): try: headers = {} if provider == 'gcp': headers['Metadata-Flavor'] = 'Google' resp = requests.get(url, headers=headers, timeout=3) results[provider] = { 'accessible': resp.status_code == 200, 'status': resp.status_code, 'content_preview': resp.text[:200] if resp.status_code == 200 else '' } except Exception: results[provider] = {'accessible': False, 'error': 'Connection failed'} return {'ok': True, 'metadata': results} # ── Subdomain / DNS Enumeration for Cloud ──────────────────────────── def enum_cloud_subdomains(self, domain: str) -> Dict: """Check for cloud-specific subdomains.""" if not HAS_REQUESTS: return {'ok': False, 'error': 'requests not available'} cloud_prefixes = [ 'aws', 's3', 'ec2', 'lambda', 'api', 'cdn', 'azure', 'blob', 'cloud', 'gcp', 'storage', 'dev', 'staging', 'prod', 'admin', 'internal', 'vpn', 'mail', 'smtp', 'imap', 'ftp', 'ssh', 'db', 'database', 'redis', 'elastic', 'kibana', 'grafana', 'prometheus', 'jenkins', 'gitlab', 'docker', 'k8s', 'kubernetes', 'consul', 'vault', 'traefik', ] found = [] import socket for prefix in cloud_prefixes: subdomain = f'{prefix}.{domain}' try: ip = socket.gethostbyname(subdomain) found.append({ 'subdomain': subdomain, 'ip': ip, 'cloud_hint': self._identify_cloud_ip(ip) }) except socket.gaierror: pass return {'ok': True, 'domain': domain, 'subdomains': found, 'count': len(found)} def _identify_cloud_ip(self, ip: str) -> str: """Try to identify cloud provider from IP.""" # Rough range checks octets = ip.split('.') if len(octets) == 4: first = int(octets[0]) if first in (3, 18, 52, 54, 35): return 'AWS' elif first in (20, 40, 52, 104, 13): return 'Azure' elif first in (34, 35, 104, 142): return 'GCP' return 'Unknown' # ── Job Management ─────────────────────────────────────────────────── def get_job(self, job_id: str) -> Optional[Dict]: return self._jobs.get(job_id) def list_jobs(self) -> List[Dict]: return [{'id': k, **v} for k, v in self._jobs.items()] # ── Save Results ───────────────────────────────────────────────────── def save_results(self, name: str, results: Dict) -> Dict: """Save scan results.""" filepath = os.path.join(self.data_dir, f'{name}.json') with open(filepath, 'w') as f: json.dump(results, f, indent=2) return {'ok': True, 'path': filepath} # ── Singleton ──────────────────────────────────────────────────────────────── _instance = None def get_cloud_scanner() -> CloudScanner: global _instance if _instance is None: _instance = CloudScanner() return _instance # ── CLI Interface ──────────────────────────────────────────────────────────── def run(): """CLI entry point for Cloud Security module.""" if not HAS_REQUESTS: print(" Error: requests library required") return scanner = get_cloud_scanner() while True: print(f"\n{'='*60}") print(f" Cloud Security Scanner") print(f"{'='*60}") print() print(" 1 — Enumerate S3 Buckets (AWS)") print(" 2 — Enumerate GCS Buckets (Google)") print(" 3 — Enumerate Azure Blobs") print(" 4 — Scan Exposed Services") print(" 5 — Check Metadata Access (SSRF)") print(" 6 — Cloud Subdomain Enum") print(" 0 — Back") print() choice = input(" > ").strip() if choice == '0': break elif choice == '1': kw = input(" Target keyword: ").strip() if kw: job_id = scanner.enum_s3_buckets(kw) print(f" Scanning... (job: {job_id})") while True: job = scanner.get_job(job_id) if job['status'] == 'complete': for b in job['found']: status = 'PUBLIC+LISTABLE' if b.get('listable') else \ ('PUBLIC' if b.get('public') else 'EXISTS') print(f" [{status}] {b['bucket']}") if not job['found']: print(" No buckets found") break time.sleep(1) elif choice == '4': target = input(" Target URL: ").strip() if target: result = scanner.scan_exposed_services(target) for s in result['services']: flag = ' [SENSITIVE]' if s.get('sensitive') else '' print(f" {s['path']}: {s['name']}{flag}") elif choice == '5': result = scanner.check_metadata_access() for provider, info in result['metadata'].items(): status = 'ACCESSIBLE' if info.get('accessible') else 'blocked' print(f" {provider}: {status}") elif choice == '6': domain = input(" Target domain: ").strip() if domain: result = scanner.enum_cloud_subdomains(domain) for s in result['subdomains']: print(f" {s['subdomain']} → {s['ip']} ({s['cloud_hint']})")