Autarch/modules/cloud_scan.py

449 lines
18 KiB
Python
Raw Normal View History

"""AUTARCH Cloud Security Scanner
AWS/Azure/GCP bucket enumeration, IAM misconfiguration detection, exposed
service scanning, and cloud resource discovery.
"""
DESCRIPTION = "Cloud infrastructure security scanning"
AUTHOR = "darkHal"
VERSION = "1.0"
CATEGORY = "offense"
import os
import re
import json
import time
import threading
from pathlib import Path
from typing import Dict, List, Optional, Any
try:
from core.paths import get_data_dir
except ImportError:
def get_data_dir():
return str(Path(__file__).parent.parent / 'data')
try:
import requests
HAS_REQUESTS = True
except ImportError:
HAS_REQUESTS = False
# ── Cloud Provider Endpoints ─────────────────────────────────────────────────
AWS_REGIONS = [
'us-east-1', 'us-east-2', 'us-west-1', 'us-west-2',
'eu-west-1', 'eu-west-2', 'eu-central-1',
'ap-southeast-1', 'ap-southeast-2', 'ap-northeast-1',
]
COMMON_BUCKET_NAMES = [
'backup', 'backups', 'data', 'dev', 'staging', 'prod', 'production',
'logs', 'assets', 'media', 'uploads', 'images', 'static', 'public',
'private', 'internal', 'config', 'configs', 'db', 'database',
'archive', 'old', 'temp', 'tmp', 'test', 'debug', 'admin',
'www', 'web', 'api', 'app', 'mobile', 'docs', 'documents',
'reports', 'export', 'import', 'share', 'shared',
]
METADATA_ENDPOINTS = {
'aws': 'http://169.254.169.254/latest/meta-data/',
'gcp': 'http://metadata.google.internal/computeMetadata/v1/',
'azure': 'http://169.254.169.254/metadata/instance?api-version=2021-02-01',
'digitalocean': 'http://169.254.169.254/metadata/v1/',
}
# ── Cloud Scanner ────────────────────────────────────────────────────────────
class CloudScanner:
"""Cloud infrastructure security scanner."""
def __init__(self):
self.data_dir = os.path.join(get_data_dir(), 'cloud_scan')
os.makedirs(self.data_dir, exist_ok=True)
self.results: List[Dict] = []
self._jobs: Dict[str, Dict] = {}
# ── S3 Bucket Enumeration ────────────────────────────────────────────
def enum_s3_buckets(self, keyword: str, prefixes: List[str] = None,
suffixes: List[str] = None) -> str:
"""Enumerate S3 buckets with naming permutations. Returns job_id."""
if not HAS_REQUESTS:
return ''
job_id = f's3enum_{int(time.time())}'
self._jobs[job_id] = {
'type': 's3_enum', 'status': 'running',
'found': [], 'checked': 0, 'total': 0
}
def _enum():
prefixes_list = prefixes or ['', 'dev-', 'staging-', 'prod-', 'test-', 'backup-']
suffixes_list = suffixes or ['', '-backup', '-data', '-assets', '-logs', '-dev',
'-staging', '-prod', '-public', '-private']
bucket_names = set()
for pfx in prefixes_list:
for sfx in suffixes_list:
bucket_names.add(f'{pfx}{keyword}{sfx}')
# Add common patterns
for common in COMMON_BUCKET_NAMES:
bucket_names.add(f'{keyword}-{common}')
bucket_names.add(f'{common}-{keyword}')
self._jobs[job_id]['total'] = len(bucket_names)
found = []
for name in bucket_names:
try:
# Check S3 bucket
url = f'https://{name}.s3.amazonaws.com'
resp = requests.head(url, timeout=5, allow_redirects=True)
self._jobs[job_id]['checked'] += 1
if resp.status_code == 200:
# Try listing
list_resp = requests.get(url, timeout=5)
listable = '<ListBucketResult' in list_resp.text
found.append({
'bucket': name, 'provider': 'aws',
'url': url, 'status': resp.status_code,
'listable': listable, 'public': True
})
elif resp.status_code == 403:
found.append({
'bucket': name, 'provider': 'aws',
'url': url, 'status': 403,
'listable': False, 'public': False,
'exists': True
})
except Exception:
self._jobs[job_id]['checked'] += 1
self._jobs[job_id]['found'] = found
self._jobs[job_id]['status'] = 'complete'
threading.Thread(target=_enum, daemon=True).start()
return job_id
# ── GCS Bucket Enumeration ───────────────────────────────────────────
def enum_gcs_buckets(self, keyword: str) -> str:
"""Enumerate Google Cloud Storage buckets. Returns job_id."""
if not HAS_REQUESTS:
return ''
job_id = f'gcsenum_{int(time.time())}'
self._jobs[job_id] = {
'type': 'gcs_enum', 'status': 'running',
'found': [], 'checked': 0, 'total': 0
}
def _enum():
names = set()
for suffix in ['', '-data', '-backup', '-assets', '-staging', '-prod', '-dev', '-logs']:
names.add(f'{keyword}{suffix}')
self._jobs[job_id]['total'] = len(names)
found = []
for name in names:
try:
url = f'https://storage.googleapis.com/{name}'
resp = requests.head(url, timeout=5)
self._jobs[job_id]['checked'] += 1
if resp.status_code in (200, 403):
found.append({
'bucket': name, 'provider': 'gcp',
'url': url, 'status': resp.status_code,
'public': resp.status_code == 200
})
except Exception:
self._jobs[job_id]['checked'] += 1
self._jobs[job_id]['found'] = found
self._jobs[job_id]['status'] = 'complete'
threading.Thread(target=_enum, daemon=True).start()
return job_id
# ── Azure Blob Enumeration ───────────────────────────────────────────
def enum_azure_blobs(self, keyword: str) -> str:
"""Enumerate Azure Blob Storage containers. Returns job_id."""
if not HAS_REQUESTS:
return ''
job_id = f'azureenum_{int(time.time())}'
self._jobs[job_id] = {
'type': 'azure_enum', 'status': 'running',
'found': [], 'checked': 0, 'total': 0
}
def _enum():
# Storage account names
accounts = [keyword, f'{keyword}storage', f'{keyword}data',
f'{keyword}backup', f'{keyword}dev', f'{keyword}prod']
containers = ['$web', 'data', 'backup', 'uploads', 'assets',
'logs', 'public', 'media', 'images']
total = len(accounts) * len(containers)
self._jobs[job_id]['total'] = total
found = []
for account in accounts:
for container in containers:
try:
url = f'https://{account}.blob.core.windows.net/{container}?restype=container&comp=list'
resp = requests.get(url, timeout=5)
self._jobs[job_id]['checked'] += 1
if resp.status_code == 200:
found.append({
'account': account, 'container': container,
'provider': 'azure', 'url': url,
'status': resp.status_code, 'public': True
})
elif resp.status_code == 403:
found.append({
'account': account, 'container': container,
'provider': 'azure', 'url': url,
'status': 403, 'exists': True, 'public': False
})
except Exception:
self._jobs[job_id]['checked'] += 1
self._jobs[job_id]['found'] = found
self._jobs[job_id]['status'] = 'complete'
threading.Thread(target=_enum, daemon=True).start()
return job_id
# ── Exposed Services ─────────────────────────────────────────────────
def scan_exposed_services(self, target: str) -> Dict:
"""Check for commonly exposed cloud services on a target."""
if not HAS_REQUESTS:
return {'ok': False, 'error': 'requests not available'}
services = []
checks = [
('/server-status', 'Apache Status'),
('/nginx_status', 'Nginx Status'),
('/.env', 'Environment File'),
('/.git/config', 'Git Config'),
('/.aws/credentials', 'AWS Credentials'),
('/wp-config.php.bak', 'WordPress Config Backup'),
('/phpinfo.php', 'PHP Info'),
('/debug', 'Debug Endpoint'),
('/actuator', 'Spring Actuator'),
('/actuator/env', 'Spring Env'),
('/api/swagger.json', 'Swagger/OpenAPI Spec'),
('/.well-known/security.txt', 'Security Policy'),
('/robots.txt', 'Robots.txt'),
('/sitemap.xml', 'Sitemap'),
('/graphql', 'GraphQL Endpoint'),
('/console', 'Console'),
('/admin', 'Admin Panel'),
('/wp-admin', 'WordPress Admin'),
('/phpmyadmin', 'phpMyAdmin'),
]
for path, name in checks:
try:
url = f'{target.rstrip("/")}{path}'
resp = requests.get(url, timeout=5, allow_redirects=False)
if resp.status_code == 200:
# Check content for sensitive data
sensitive = False
body = resp.text[:2000].lower()
sensitive_indicators = [
'password', 'secret', 'access_key', 'private_key',
'database', 'db_host', 'smtp_pass', 'api_key'
]
if any(ind in body for ind in sensitive_indicators):
sensitive = True
services.append({
'path': path, 'name': name,
'url': url, 'status': resp.status_code,
'size': len(resp.content),
'sensitive': sensitive,
'content_type': resp.headers.get('content-type', '')
})
except Exception:
pass
return {
'ok': True,
'target': target,
'services': services,
'count': len(services)
}
# ── Metadata SSRF Check ──────────────────────────────────────────────
def check_metadata_access(self) -> Dict:
"""Check if cloud metadata service is accessible (SSRF indicator)."""
results = {}
for provider, url in METADATA_ENDPOINTS.items():
try:
headers = {}
if provider == 'gcp':
headers['Metadata-Flavor'] = 'Google'
resp = requests.get(url, headers=headers, timeout=3)
results[provider] = {
'accessible': resp.status_code == 200,
'status': resp.status_code,
'content_preview': resp.text[:200] if resp.status_code == 200 else ''
}
except Exception:
results[provider] = {'accessible': False, 'error': 'Connection failed'}
return {'ok': True, 'metadata': results}
# ── Subdomain / DNS Enumeration for Cloud ────────────────────────────
def enum_cloud_subdomains(self, domain: str) -> Dict:
"""Check for cloud-specific subdomains."""
if not HAS_REQUESTS:
return {'ok': False, 'error': 'requests not available'}
cloud_prefixes = [
'aws', 's3', 'ec2', 'lambda', 'api', 'cdn',
'azure', 'blob', 'cloud', 'gcp', 'storage',
'dev', 'staging', 'prod', 'admin', 'internal',
'vpn', 'mail', 'smtp', 'imap', 'ftp', 'ssh',
'db', 'database', 'redis', 'elastic', 'kibana',
'grafana', 'prometheus', 'jenkins', 'gitlab', 'docker',
'k8s', 'kubernetes', 'consul', 'vault', 'traefik',
]
found = []
import socket
for prefix in cloud_prefixes:
subdomain = f'{prefix}.{domain}'
try:
ip = socket.gethostbyname(subdomain)
found.append({
'subdomain': subdomain,
'ip': ip,
'cloud_hint': self._identify_cloud_ip(ip)
})
except socket.gaierror:
pass
return {'ok': True, 'domain': domain, 'subdomains': found, 'count': len(found)}
def _identify_cloud_ip(self, ip: str) -> str:
"""Try to identify cloud provider from IP."""
# Rough range checks
octets = ip.split('.')
if len(octets) == 4:
first = int(octets[0])
if first in (3, 18, 52, 54, 35):
return 'AWS'
elif first in (20, 40, 52, 104, 13):
return 'Azure'
elif first in (34, 35, 104, 142):
return 'GCP'
return 'Unknown'
# ── Job Management ───────────────────────────────────────────────────
def get_job(self, job_id: str) -> Optional[Dict]:
return self._jobs.get(job_id)
def list_jobs(self) -> List[Dict]:
return [{'id': k, **v} for k, v in self._jobs.items()]
# ── Save Results ─────────────────────────────────────────────────────
def save_results(self, name: str, results: Dict) -> Dict:
"""Save scan results."""
filepath = os.path.join(self.data_dir, f'{name}.json')
with open(filepath, 'w') as f:
json.dump(results, f, indent=2)
return {'ok': True, 'path': filepath}
# ── Singleton ────────────────────────────────────────────────────────────────
_instance = None
def get_cloud_scanner() -> CloudScanner:
global _instance
if _instance is None:
_instance = CloudScanner()
return _instance
# ── CLI Interface ────────────────────────────────────────────────────────────
def run():
"""CLI entry point for Cloud Security module."""
if not HAS_REQUESTS:
print(" Error: requests library required")
return
scanner = get_cloud_scanner()
while True:
print(f"\n{'='*60}")
print(f" Cloud Security Scanner")
print(f"{'='*60}")
print()
print(" 1 — Enumerate S3 Buckets (AWS)")
print(" 2 — Enumerate GCS Buckets (Google)")
print(" 3 — Enumerate Azure Blobs")
print(" 4 — Scan Exposed Services")
print(" 5 — Check Metadata Access (SSRF)")
print(" 6 — Cloud Subdomain Enum")
print(" 0 — Back")
print()
choice = input(" > ").strip()
if choice == '0':
break
elif choice == '1':
kw = input(" Target keyword: ").strip()
if kw:
job_id = scanner.enum_s3_buckets(kw)
print(f" Scanning... (job: {job_id})")
while True:
job = scanner.get_job(job_id)
if job['status'] == 'complete':
for b in job['found']:
status = 'PUBLIC+LISTABLE' if b.get('listable') else \
('PUBLIC' if b.get('public') else 'EXISTS')
print(f" [{status}] {b['bucket']}")
if not job['found']:
print(" No buckets found")
break
time.sleep(1)
elif choice == '4':
target = input(" Target URL: ").strip()
if target:
result = scanner.scan_exposed_services(target)
for s in result['services']:
flag = ' [SENSITIVE]' if s.get('sensitive') else ''
print(f" {s['path']}: {s['name']}{flag}")
elif choice == '5':
result = scanner.check_metadata_access()
for provider, info in result['metadata'].items():
status = 'ACCESSIBLE' if info.get('accessible') else 'blocked'
print(f" {provider}: {status}")
elif choice == '6':
domain = input(" Target domain: ").strip()
if domain:
result = scanner.enum_cloud_subdomains(domain)
for s in result['subdomains']:
print(f" {s['subdomain']}{s['ip']} ({s['cloud_hint']})")