Autarch/modules/webapp_scanner.py
DigiJ 2322f69516 v2.2.0 — Full arsenal expansion: 16 new security modules
Add WiFi Audit, API Fuzzer, Cloud Scanner, Threat Intel, Log Correlator,
Steganography, Anti-Forensics, BLE Scanner, Forensics, RFID/NFC, Malware
Sandbox, Password Toolkit, Web Scanner, Report Engine, Net Mapper, and
C2 Framework. Each module includes CLI interface, Flask routes, and web
UI template. Also includes Go DNS server source + binary, IP Capture
service, SYN Flood, Gone Fishing mail server, and hack hijack modules
from v2.0 work.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-03 05:20:39 -08:00

725 lines
30 KiB
Python

"""AUTARCH Web Application Scanner
Directory bruteforce, subdomain enumeration, vulnerability scanning (SQLi, XSS),
header analysis, technology fingerprinting, SSL/TLS audit, and crawler.
"""
DESCRIPTION = "Web application vulnerability scanner"
AUTHOR = "darkHal"
VERSION = "1.0"
CATEGORY = "offense"
import os
import re
import json
import time
import ssl
import socket
import hashlib
import threading
import subprocess
from pathlib import Path
from urllib.parse import urlparse, urljoin, quote
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Any, Set
from datetime import datetime, timezone
try:
from core.paths import find_tool, get_data_dir
except ImportError:
import shutil
def find_tool(name):
return shutil.which(name)
def get_data_dir():
return str(Path(__file__).parent.parent / 'data')
try:
import requests
from requests.exceptions import RequestException
_HAS_REQUESTS = True
except ImportError:
_HAS_REQUESTS = False
# ── Tech Fingerprints ─────────────────────────────────────────────────────────
TECH_SIGNATURES = {
'WordPress': {'headers': [], 'body': ['wp-content', 'wp-includes', 'wp-json'], 'cookies': ['wordpress_']},
'Drupal': {'headers': ['X-Drupal-'], 'body': ['Drupal.settings', 'sites/default'], 'cookies': ['SESS']},
'Joomla': {'headers': [], 'body': ['/media/jui/', 'com_content'], 'cookies': []},
'Laravel': {'headers': [], 'body': ['laravel_session'], 'cookies': ['laravel_session']},
'Django': {'headers': [], 'body': ['csrfmiddlewaretoken', '__admin__'], 'cookies': ['csrftoken', 'sessionid']},
'Express': {'headers': ['X-Powered-By: Express'], 'body': [], 'cookies': ['connect.sid']},
'ASP.NET': {'headers': ['X-AspNet-Version', 'X-Powered-By: ASP.NET'], 'body': ['__VIEWSTATE', '__EVENTVALIDATION'], 'cookies': ['ASP.NET_SessionId']},
'PHP': {'headers': ['X-Powered-By: PHP'], 'body': ['.php'], 'cookies': ['PHPSESSID']},
'Nginx': {'headers': ['Server: nginx'], 'body': [], 'cookies': []},
'Apache': {'headers': ['Server: Apache'], 'body': [], 'cookies': []},
'IIS': {'headers': ['Server: Microsoft-IIS'], 'body': [], 'cookies': []},
'Cloudflare': {'headers': ['Server: cloudflare', 'cf-ray'], 'body': [], 'cookies': ['__cfduid']},
'React': {'headers': [], 'body': ['react-root', '_reactRootContainer', 'data-reactroot'], 'cookies': []},
'Angular': {'headers': [], 'body': ['ng-app', 'ng-controller', 'angular.min.js'], 'cookies': []},
'Vue.js': {'headers': [], 'body': ['vue.min.js', 'v-bind:', 'v-if=', '__vue__'], 'cookies': []},
'jQuery': {'headers': [], 'body': ['jquery.min.js', 'jquery-'], 'cookies': []},
'Bootstrap': {'headers': [], 'body': ['bootstrap.min.css', 'bootstrap.min.js'], 'cookies': []},
}
SECURITY_HEADERS = [
'Content-Security-Policy',
'X-Content-Type-Options',
'X-Frame-Options',
'X-XSS-Protection',
'Strict-Transport-Security',
'Referrer-Policy',
'Permissions-Policy',
'Cross-Origin-Opener-Policy',
'Cross-Origin-Resource-Policy',
'Cross-Origin-Embedder-Policy',
]
# Common directories for bruteforce
DIR_WORDLIST_SMALL = [
'admin', 'login', 'wp-admin', 'administrator', 'phpmyadmin', 'cpanel',
'dashboard', 'api', 'backup', 'config', 'db', 'debug', 'dev', 'docs',
'dump', 'env', 'git', 'hidden', 'include', 'internal', 'log', 'logs',
'old', 'panel', 'private', 'secret', 'server-status', 'shell', 'sql',
'staging', 'status', 'temp', 'test', 'tmp', 'upload', 'uploads',
'wp-content', 'wp-includes', '.env', '.git', '.htaccess', '.htpasswd',
'robots.txt', 'sitemap.xml', 'crossdomain.xml', 'web.config',
'composer.json', 'package.json', '.svn', '.DS_Store',
'cgi-bin', 'server-info', 'info.php', 'phpinfo.php', 'xmlrpc.php',
'wp-login.php', '.well-known', 'favicon.ico', 'humans.txt',
]
# SQLi test payloads
SQLI_PAYLOADS = [
"'", "\"", "' OR '1'='1", "\" OR \"1\"=\"1",
"' OR 1=1--", "\" OR 1=1--", "'; DROP TABLE--",
"1' AND '1'='1", "1 AND 1=1", "1 UNION SELECT NULL--",
"' UNION SELECT NULL,NULL--", "1'; WAITFOR DELAY '0:0:5'--",
"1' AND SLEEP(5)--",
]
# XSS test payloads
XSS_PAYLOADS = [
'<script>alert(1)</script>',
'"><script>alert(1)</script>',
"'><script>alert(1)</script>",
'<img src=x onerror=alert(1)>',
'<svg onload=alert(1)>',
'"><img src=x onerror=alert(1)>',
"javascript:alert(1)",
'<body onload=alert(1)>',
]
# SQL error signatures
SQL_ERRORS = [
'sql syntax', 'mysql_fetch', 'mysql_num_rows', 'mysql_query',
'pg_query', 'pg_exec', 'sqlite3', 'SQLSTATE',
'ORA-', 'Microsoft OLE DB', 'Unclosed quotation mark',
'ODBC Microsoft Access', 'JET Database', 'Microsoft SQL Server',
'java.sql.SQLException', 'PostgreSQL query failed',
'supplied argument is not a valid MySQL', 'unterminated quoted string',
]
# ── Scanner Service ───────────────────────────────────────────────────────────
class WebAppScanner:
"""Web application vulnerability scanner."""
def __init__(self):
self._data_dir = os.path.join(get_data_dir(), 'webapp_scanner')
self._results_dir = os.path.join(self._data_dir, 'results')
os.makedirs(self._results_dir, exist_ok=True)
self._active_jobs: Dict[str, dict] = {}
self._session = None
def _get_session(self):
if not _HAS_REQUESTS:
raise RuntimeError('requests library required')
if not self._session:
self._session = requests.Session()
self._session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/120.0.0.0 Safari/537.36',
})
self._session.verify = False
return self._session
# ── Quick Scan ────────────────────────────────────────────────────────
def quick_scan(self, url: str) -> dict:
"""Run a quick scan — headers, tech fingerprint, basic checks."""
if not _HAS_REQUESTS:
return {'ok': False, 'error': 'requests library required'}
url = self._normalize_url(url)
results = {
'url': url,
'scan_time': datetime.now(timezone.utc).isoformat(),
'headers': {},
'security_headers': {},
'technologies': [],
'server': '',
'status_code': 0,
'redirects': [],
'ssl': {},
}
try:
sess = self._get_session()
resp = sess.get(url, timeout=10, allow_redirects=True)
results['status_code'] = resp.status_code
results['headers'] = dict(resp.headers)
results['server'] = resp.headers.get('Server', '')
# Track redirects
for r in resp.history:
results['redirects'].append({
'url': r.url,
'status': r.status_code,
})
# Security headers
results['security_headers'] = self._check_security_headers(resp.headers)
# Technology fingerprint
results['technologies'] = self._fingerprint_tech(resp)
# SSL check
parsed = urlparse(url)
if parsed.scheme == 'https':
results['ssl'] = self._check_ssl(parsed.hostname, parsed.port or 443)
except Exception as e:
results['error'] = str(e)
return results
# ── Directory Bruteforce ──────────────────────────────────────────────
def dir_bruteforce(self, url: str, wordlist: List[str] = None,
extensions: List[str] = None,
threads: int = 10, timeout: float = 5.0) -> dict:
"""Directory bruteforce scan."""
if not _HAS_REQUESTS:
return {'ok': False, 'error': 'requests library required'}
url = self._normalize_url(url).rstrip('/')
if not wordlist:
wordlist = DIR_WORDLIST_SMALL
if not extensions:
extensions = ['']
job_id = f'dirbust_{int(time.time())}'
holder = {'done': False, 'found': [], 'tested': 0,
'total': len(wordlist) * len(extensions)}
self._active_jobs[job_id] = holder
def do_scan():
sess = self._get_session()
results_lock = threading.Lock()
def test_path(path):
for ext in extensions:
full_path = f'{path}{ext}' if ext else path
test_url = f'{url}/{full_path}'
try:
r = sess.get(test_url, timeout=timeout,
allow_redirects=False)
holder['tested'] += 1
if r.status_code not in (404, 403, 500):
with results_lock:
holder['found'].append({
'path': '/' + full_path,
'status': r.status_code,
'size': len(r.content),
'content_type': r.headers.get('Content-Type', ''),
})
except Exception:
holder['tested'] += 1
threads_list = []
for word in wordlist:
t = threading.Thread(target=test_path, args=(word,), daemon=True)
threads_list.append(t)
t.start()
if len(threads_list) >= threads:
for t in threads_list:
t.join(timeout=timeout + 5)
threads_list.clear()
for t in threads_list:
t.join(timeout=timeout + 5)
holder['done'] = True
threading.Thread(target=do_scan, daemon=True).start()
return {'ok': True, 'job_id': job_id}
# ── Subdomain Enumeration ─────────────────────────────────────────────
def subdomain_enum(self, domain: str, wordlist: List[str] = None,
use_ct: bool = True) -> dict:
"""Enumerate subdomains via DNS bruteforce and CT logs."""
found = []
# Certificate Transparency logs
if use_ct and _HAS_REQUESTS:
try:
resp = requests.get(
f'https://crt.sh/?q=%.{domain}&output=json',
timeout=15)
if resp.status_code == 200:
for entry in resp.json():
name = entry.get('name_value', '')
for sub in name.split('\n'):
sub = sub.strip().lower()
if sub.endswith('.' + domain) and sub not in found:
found.append(sub)
except Exception:
pass
# DNS bruteforce
if not wordlist:
wordlist = ['www', 'mail', 'ftp', 'admin', 'api', 'dev',
'staging', 'test', 'blog', 'shop', 'app', 'cdn',
'ns1', 'ns2', 'mx', 'smtp', 'imap', 'pop',
'vpn', 'remote', 'portal', 'webmail', 'secure',
'beta', 'demo', 'docs', 'git', 'jenkins', 'ci',
'grafana', 'kibana', 'prometheus', 'monitor',
'status', 'support', 'help', 'forum', 'wiki',
'internal', 'intranet', 'proxy', 'gateway']
for sub in wordlist:
fqdn = f'{sub}.{domain}'
try:
socket.getaddrinfo(fqdn, None)
if fqdn not in found:
found.append(fqdn)
except socket.gaierror:
pass
return {'ok': True, 'domain': domain, 'subdomains': sorted(set(found)),
'count': len(set(found))}
# ── Vulnerability Scanning ────────────────────────────────────────────
def vuln_scan(self, url: str, scan_sqli: bool = True,
scan_xss: bool = True) -> dict:
"""Scan for SQL injection and XSS vulnerabilities."""
if not _HAS_REQUESTS:
return {'ok': False, 'error': 'requests library required'}
url = self._normalize_url(url)
findings = []
sess = self._get_session()
# Crawl to find forms and parameters
try:
resp = sess.get(url, timeout=10)
body = resp.text
except Exception as e:
return {'ok': False, 'error': str(e)}
# Find URLs with parameters
param_urls = self._extract_param_urls(body, url)
# Test each URL with parameters
for test_url in param_urls[:20]: # Limit to prevent abuse
parsed = urlparse(test_url)
params = dict(p.split('=', 1) for p in parsed.query.split('&')
if '=' in p) if parsed.query else {}
for param_name, param_val in params.items():
if scan_sqli:
sqli_findings = self._test_sqli(sess, test_url, param_name, param_val)
findings.extend(sqli_findings)
if scan_xss:
xss_findings = self._test_xss(sess, test_url, param_name, param_val)
findings.extend(xss_findings)
return {
'ok': True,
'url': url,
'findings': findings,
'urls_tested': len(param_urls[:20]),
}
def _test_sqli(self, sess, url: str, param: str, original_val: str) -> List[dict]:
"""Test a parameter for SQL injection."""
findings = []
parsed = urlparse(url)
base_params = dict(p.split('=', 1) for p in parsed.query.split('&')
if '=' in p) if parsed.query else {}
for payload in SQLI_PAYLOADS[:6]: # Limit payloads
test_params = base_params.copy()
test_params[param] = original_val + payload
try:
test_url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}'
r = sess.get(test_url, params=test_params, timeout=5)
body = r.text.lower()
for error_sig in SQL_ERRORS:
if error_sig.lower() in body:
findings.append({
'type': 'sqli',
'severity': 'high',
'url': url,
'parameter': param,
'payload': payload,
'evidence': error_sig,
'description': f'SQL injection (error-based) in parameter "{param}"',
})
return findings # One finding per param is enough
except Exception:
continue
return findings
def _test_xss(self, sess, url: str, param: str, original_val: str) -> List[dict]:
"""Test a parameter for reflected XSS."""
findings = []
parsed = urlparse(url)
base_params = dict(p.split('=', 1) for p in parsed.query.split('&')
if '=' in p) if parsed.query else {}
for payload in XSS_PAYLOADS[:4]:
test_params = base_params.copy()
test_params[param] = payload
try:
test_url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}'
r = sess.get(test_url, params=test_params, timeout=5)
if payload in r.text:
findings.append({
'type': 'xss',
'severity': 'high',
'url': url,
'parameter': param,
'payload': payload,
'description': f'Reflected XSS in parameter "{param}"',
})
return findings
except Exception:
continue
return findings
def _extract_param_urls(self, html: str, base_url: str) -> List[str]:
"""Extract URLs with parameters from HTML."""
urls = set()
# href/src/action attributes
for match in re.finditer(r'(?:href|src|action)=["\']([^"\']+\?[^"\']+)["\']', html):
u = match.group(1)
full = urljoin(base_url, u)
if urlparse(full).netloc == urlparse(base_url).netloc:
urls.add(full)
return list(urls)
# ── Security Headers ──────────────────────────────────────────────────
def _check_security_headers(self, headers) -> dict:
"""Check for presence and values of security headers."""
results = {}
for h in SECURITY_HEADERS:
value = headers.get(h, '')
results[h] = {
'present': bool(value),
'value': value,
'rating': 'good' if value else 'missing',
}
# Specific checks
csp = headers.get('Content-Security-Policy', '')
if csp:
if "'unsafe-inline'" in csp or "'unsafe-eval'" in csp:
results['Content-Security-Policy']['rating'] = 'weak'
hsts = headers.get('Strict-Transport-Security', '')
if hsts:
if 'max-age' in hsts:
try:
age = int(re.search(r'max-age=(\d+)', hsts).group(1))
if age < 31536000:
results['Strict-Transport-Security']['rating'] = 'weak'
except Exception:
pass
return results
# ── Technology Fingerprinting ─────────────────────────────────────────
def _fingerprint_tech(self, resp) -> List[str]:
"""Identify technologies from response."""
techs = []
headers_str = '\n'.join(f'{k}: {v}' for k, v in resp.headers.items())
body = resp.text[:50000] # Only check first 50KB
cookies_str = ' '.join(resp.cookies.keys()) if resp.cookies else ''
for tech, sigs in TECH_SIGNATURES.items():
found = False
for h_sig in sigs['headers']:
if h_sig.lower() in headers_str.lower():
found = True
break
if not found:
for b_sig in sigs['body']:
if b_sig.lower() in body.lower():
found = True
break
if not found:
for c_sig in sigs['cookies']:
if c_sig.lower() in cookies_str.lower():
found = True
break
if found:
techs.append(tech)
return techs
# ── SSL/TLS Audit ─────────────────────────────────────────────────────
def _check_ssl(self, hostname: str, port: int = 443) -> dict:
"""Check SSL/TLS configuration."""
result = {
'valid': False,
'issuer': '',
'subject': '',
'expires': '',
'protocol': '',
'cipher': '',
'issues': [],
}
try:
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
with ctx.wrap_socket(socket.socket(), server_hostname=hostname) as s:
s.settimeout(5)
s.connect((hostname, port))
cert = s.getpeercert(True)
result['protocol'] = s.version()
result['cipher'] = s.cipher()[0] if s.cipher() else ''
# Try with verification
ctx2 = ssl.create_default_context()
try:
with ctx2.wrap_socket(socket.socket(), server_hostname=hostname) as s2:
s2.settimeout(5)
s2.connect((hostname, port))
cert = s2.getpeercert()
result['valid'] = True
result['issuer'] = dict(x[0] for x in cert.get('issuer', []))
result['subject'] = dict(x[0] for x in cert.get('subject', []))
result['expires'] = cert.get('notAfter', '')
except ssl.SSLCertVerificationError as e:
result['issues'].append(f'Certificate validation failed: {e}')
# Check for weak protocols
if result['protocol'] in ('TLSv1', 'TLSv1.1', 'SSLv3'):
result['issues'].append(f'Weak protocol: {result["protocol"]}')
except Exception as e:
result['error'] = str(e)
return result
# ── Crawler ───────────────────────────────────────────────────────────
def crawl(self, url: str, max_pages: int = 50, depth: int = 3) -> dict:
"""Spider a website and build a sitemap."""
if not _HAS_REQUESTS:
return {'ok': False, 'error': 'requests library required'}
url = self._normalize_url(url)
base_domain = urlparse(url).netloc
visited: Set[str] = set()
pages = []
queue = [(url, 0)]
sess = self._get_session()
while queue and len(visited) < max_pages:
current_url, current_depth = queue.pop(0)
if current_url in visited or current_depth > depth:
continue
visited.add(current_url)
try:
r = sess.get(current_url, timeout=5, allow_redirects=True)
page = {
'url': current_url,
'status': r.status_code,
'content_type': r.headers.get('Content-Type', ''),
'size': len(r.content),
'title': '',
'forms': 0,
'links_out': 0,
}
# Extract title
title_match = re.search(r'<title[^>]*>([^<]+)</title>', r.text, re.I)
if title_match:
page['title'] = title_match.group(1).strip()
# Count forms
page['forms'] = len(re.findall(r'<form', r.text, re.I))
# Extract links for further crawling
links = re.findall(r'href=["\']([^"\']+)["\']', r.text)
outlinks = 0
for link in links:
full_link = urljoin(current_url, link)
parsed = urlparse(full_link)
if parsed.netloc == base_domain:
clean = f'{parsed.scheme}://{parsed.netloc}{parsed.path}'
if clean not in visited:
queue.append((clean, current_depth + 1))
else:
outlinks += 1
page['links_out'] = outlinks
pages.append(page)
except Exception:
continue
return {
'ok': True,
'url': url,
'pages_crawled': len(pages),
'pages': pages,
}
# ── Job Management ────────────────────────────────────────────────────
def get_job_status(self, job_id: str) -> dict:
holder = self._active_jobs.get(job_id)
if not holder:
return {'ok': False, 'error': 'Job not found'}
result = {
'ok': True,
'done': holder['done'],
'tested': holder['tested'],
'total': holder['total'],
'found': holder['found'],
}
if holder['done']:
self._active_jobs.pop(job_id, None)
return result
# ── Helpers ───────────────────────────────────────────────────────────
@staticmethod
def _normalize_url(url: str) -> str:
url = url.strip()
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
return url
# ── Singleton ─────────────────────────────────────────────────────────────────
_instance = None
_lock = threading.Lock()
def get_webapp_scanner() -> WebAppScanner:
global _instance
if _instance is None:
with _lock:
if _instance is None:
_instance = WebAppScanner()
return _instance
# ── CLI ───────────────────────────────────────────────────────────────────────
def run():
"""Interactive CLI for Web Application Scanner."""
svc = get_webapp_scanner()
while True:
print("\n╔═══════════════════════════════════════╗")
print("║ WEB APPLICATION SCANNER ║")
print("╠═══════════════════════════════════════╣")
print("║ 1 — Quick Scan (headers + tech) ║")
print("║ 2 — Directory Bruteforce ║")
print("║ 3 — Subdomain Enumeration ║")
print("║ 4 — Vulnerability Scan (SQLi/XSS) ║")
print("║ 5 — Crawl / Spider ║")
print("║ 0 — Back ║")
print("╚═══════════════════════════════════════╝")
choice = input("\n Select: ").strip()
if choice == '0':
break
elif choice == '1':
url = input(" URL: ").strip()
if not url:
continue
print(" Scanning...")
r = svc.quick_scan(url)
print(f"\n Status: {r.get('status_code')}")
print(f" Server: {r.get('server', 'unknown')}")
if r.get('technologies'):
print(f" Technologies: {', '.join(r['technologies'])}")
if r.get('security_headers'):
print(" Security Headers:")
for h, info in r['security_headers'].items():
mark = '\033[92m✓\033[0m' if info['present'] else '\033[91m✗\033[0m'
print(f" {mark} {h}")
if r.get('ssl'):
ssl_info = r['ssl']
print(f" SSL: {'Valid' if ssl_info.get('valid') else 'INVALID'} "
f"({ssl_info.get('protocol', '?')})")
for issue in ssl_info.get('issues', []):
print(f" [!] {issue}")
elif choice == '2':
url = input(" URL: ").strip()
if not url:
continue
print(" Starting directory bruteforce...")
r = svc.dir_bruteforce(url)
if r.get('job_id'):
while True:
time.sleep(2)
s = svc.get_job_status(r['job_id'])
print(f" [{s['tested']}/{s['total']}] Found: {len(s['found'])}", end='\r')
if s['done']:
print()
for item in s['found']:
print(f" [{item['status']}] {item['path']} ({item['size']} bytes)")
break
elif choice == '3':
domain = input(" Domain: ").strip()
if not domain:
continue
print(" Enumerating subdomains...")
r = svc.subdomain_enum(domain)
print(f"\n Found {r['count']} subdomains:")
for sub in r.get('subdomains', []):
print(f" {sub}")
elif choice == '4':
url = input(" URL: ").strip()
if not url:
continue
print(" Scanning for vulnerabilities...")
r = svc.vuln_scan(url)
if r.get('findings'):
print(f"\n Found {len(r['findings'])} potential vulnerabilities:")
for f in r['findings']:
print(f" [{f['severity'].upper()}] {f['type'].upper()}: {f['description']}")
print(f" Parameter: {f.get('parameter', '?')}, Payload: {f.get('payload', '?')}")
else:
print(" No vulnerabilities found in tested parameters.")
elif choice == '5':
url = input(" URL: ").strip()
if not url:
continue
max_pages = int(input(" Max pages (default 50): ").strip() or '50')
print(" Crawling...")
r = svc.crawl(url, max_pages=max_pages)
print(f"\n Crawled {r.get('pages_crawled', 0)} pages:")
for page in r.get('pages', []):
print(f" [{page['status']}] {page['url']}"
f" ({page['size']} bytes, {page['forms']} forms)")