Add WiFi Audit, API Fuzzer, Cloud Scanner, Threat Intel, Log Correlator, Steganography, Anti-Forensics, BLE Scanner, Forensics, RFID/NFC, Malware Sandbox, Password Toolkit, Web Scanner, Report Engine, Net Mapper, and C2 Framework. Each module includes CLI interface, Flask routes, and web UI template. Also includes Go DNS server source + binary, IP Capture service, SYN Flood, Gone Fishing mail server, and hack hijack modules from v2.0 work. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
725 lines
30 KiB
Python
725 lines
30 KiB
Python
"""AUTARCH Web Application Scanner
|
|
|
|
Directory bruteforce, subdomain enumeration, vulnerability scanning (SQLi, XSS),
|
|
header analysis, technology fingerprinting, SSL/TLS audit, and crawler.
|
|
"""
|
|
|
|
DESCRIPTION = "Web application vulnerability scanner"
|
|
AUTHOR = "darkHal"
|
|
VERSION = "1.0"
|
|
CATEGORY = "offense"
|
|
|
|
import os
|
|
import re
|
|
import json
|
|
import time
|
|
import ssl
|
|
import socket
|
|
import hashlib
|
|
import threading
|
|
import subprocess
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse, urljoin, quote
|
|
from dataclasses import dataclass, field
|
|
from typing import Dict, List, Optional, Any, Set
|
|
from datetime import datetime, timezone
|
|
|
|
try:
|
|
from core.paths import find_tool, get_data_dir
|
|
except ImportError:
|
|
import shutil
|
|
def find_tool(name):
|
|
return shutil.which(name)
|
|
def get_data_dir():
|
|
return str(Path(__file__).parent.parent / 'data')
|
|
|
|
try:
|
|
import requests
|
|
from requests.exceptions import RequestException
|
|
_HAS_REQUESTS = True
|
|
except ImportError:
|
|
_HAS_REQUESTS = False
|
|
|
|
|
|
# ── Tech Fingerprints ─────────────────────────────────────────────────────────
|
|
|
|
TECH_SIGNATURES = {
|
|
'WordPress': {'headers': [], 'body': ['wp-content', 'wp-includes', 'wp-json'], 'cookies': ['wordpress_']},
|
|
'Drupal': {'headers': ['X-Drupal-'], 'body': ['Drupal.settings', 'sites/default'], 'cookies': ['SESS']},
|
|
'Joomla': {'headers': [], 'body': ['/media/jui/', 'com_content'], 'cookies': []},
|
|
'Laravel': {'headers': [], 'body': ['laravel_session'], 'cookies': ['laravel_session']},
|
|
'Django': {'headers': [], 'body': ['csrfmiddlewaretoken', '__admin__'], 'cookies': ['csrftoken', 'sessionid']},
|
|
'Express': {'headers': ['X-Powered-By: Express'], 'body': [], 'cookies': ['connect.sid']},
|
|
'ASP.NET': {'headers': ['X-AspNet-Version', 'X-Powered-By: ASP.NET'], 'body': ['__VIEWSTATE', '__EVENTVALIDATION'], 'cookies': ['ASP.NET_SessionId']},
|
|
'PHP': {'headers': ['X-Powered-By: PHP'], 'body': ['.php'], 'cookies': ['PHPSESSID']},
|
|
'Nginx': {'headers': ['Server: nginx'], 'body': [], 'cookies': []},
|
|
'Apache': {'headers': ['Server: Apache'], 'body': [], 'cookies': []},
|
|
'IIS': {'headers': ['Server: Microsoft-IIS'], 'body': [], 'cookies': []},
|
|
'Cloudflare': {'headers': ['Server: cloudflare', 'cf-ray'], 'body': [], 'cookies': ['__cfduid']},
|
|
'React': {'headers': [], 'body': ['react-root', '_reactRootContainer', 'data-reactroot'], 'cookies': []},
|
|
'Angular': {'headers': [], 'body': ['ng-app', 'ng-controller', 'angular.min.js'], 'cookies': []},
|
|
'Vue.js': {'headers': [], 'body': ['vue.min.js', 'v-bind:', 'v-if=', '__vue__'], 'cookies': []},
|
|
'jQuery': {'headers': [], 'body': ['jquery.min.js', 'jquery-'], 'cookies': []},
|
|
'Bootstrap': {'headers': [], 'body': ['bootstrap.min.css', 'bootstrap.min.js'], 'cookies': []},
|
|
}
|
|
|
|
SECURITY_HEADERS = [
|
|
'Content-Security-Policy',
|
|
'X-Content-Type-Options',
|
|
'X-Frame-Options',
|
|
'X-XSS-Protection',
|
|
'Strict-Transport-Security',
|
|
'Referrer-Policy',
|
|
'Permissions-Policy',
|
|
'Cross-Origin-Opener-Policy',
|
|
'Cross-Origin-Resource-Policy',
|
|
'Cross-Origin-Embedder-Policy',
|
|
]
|
|
|
|
# Common directories for bruteforce
|
|
DIR_WORDLIST_SMALL = [
|
|
'admin', 'login', 'wp-admin', 'administrator', 'phpmyadmin', 'cpanel',
|
|
'dashboard', 'api', 'backup', 'config', 'db', 'debug', 'dev', 'docs',
|
|
'dump', 'env', 'git', 'hidden', 'include', 'internal', 'log', 'logs',
|
|
'old', 'panel', 'private', 'secret', 'server-status', 'shell', 'sql',
|
|
'staging', 'status', 'temp', 'test', 'tmp', 'upload', 'uploads',
|
|
'wp-content', 'wp-includes', '.env', '.git', '.htaccess', '.htpasswd',
|
|
'robots.txt', 'sitemap.xml', 'crossdomain.xml', 'web.config',
|
|
'composer.json', 'package.json', '.svn', '.DS_Store',
|
|
'cgi-bin', 'server-info', 'info.php', 'phpinfo.php', 'xmlrpc.php',
|
|
'wp-login.php', '.well-known', 'favicon.ico', 'humans.txt',
|
|
]
|
|
|
|
# SQLi test payloads
|
|
SQLI_PAYLOADS = [
|
|
"'", "\"", "' OR '1'='1", "\" OR \"1\"=\"1",
|
|
"' OR 1=1--", "\" OR 1=1--", "'; DROP TABLE--",
|
|
"1' AND '1'='1", "1 AND 1=1", "1 UNION SELECT NULL--",
|
|
"' UNION SELECT NULL,NULL--", "1'; WAITFOR DELAY '0:0:5'--",
|
|
"1' AND SLEEP(5)--",
|
|
]
|
|
|
|
# XSS test payloads
|
|
XSS_PAYLOADS = [
|
|
'<script>alert(1)</script>',
|
|
'"><script>alert(1)</script>',
|
|
"'><script>alert(1)</script>",
|
|
'<img src=x onerror=alert(1)>',
|
|
'<svg onload=alert(1)>',
|
|
'"><img src=x onerror=alert(1)>',
|
|
"javascript:alert(1)",
|
|
'<body onload=alert(1)>',
|
|
]
|
|
|
|
# SQL error signatures
|
|
SQL_ERRORS = [
|
|
'sql syntax', 'mysql_fetch', 'mysql_num_rows', 'mysql_query',
|
|
'pg_query', 'pg_exec', 'sqlite3', 'SQLSTATE',
|
|
'ORA-', 'Microsoft OLE DB', 'Unclosed quotation mark',
|
|
'ODBC Microsoft Access', 'JET Database', 'Microsoft SQL Server',
|
|
'java.sql.SQLException', 'PostgreSQL query failed',
|
|
'supplied argument is not a valid MySQL', 'unterminated quoted string',
|
|
]
|
|
|
|
|
|
# ── Scanner Service ───────────────────────────────────────────────────────────
|
|
|
|
class WebAppScanner:
|
|
"""Web application vulnerability scanner."""
|
|
|
|
def __init__(self):
|
|
self._data_dir = os.path.join(get_data_dir(), 'webapp_scanner')
|
|
self._results_dir = os.path.join(self._data_dir, 'results')
|
|
os.makedirs(self._results_dir, exist_ok=True)
|
|
self._active_jobs: Dict[str, dict] = {}
|
|
self._session = None
|
|
|
|
def _get_session(self):
|
|
if not _HAS_REQUESTS:
|
|
raise RuntimeError('requests library required')
|
|
if not self._session:
|
|
self._session = requests.Session()
|
|
self._session.headers.update({
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
|
|
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
|
'Chrome/120.0.0.0 Safari/537.36',
|
|
})
|
|
self._session.verify = False
|
|
return self._session
|
|
|
|
# ── Quick Scan ────────────────────────────────────────────────────────
|
|
|
|
def quick_scan(self, url: str) -> dict:
|
|
"""Run a quick scan — headers, tech fingerprint, basic checks."""
|
|
if not _HAS_REQUESTS:
|
|
return {'ok': False, 'error': 'requests library required'}
|
|
url = self._normalize_url(url)
|
|
results = {
|
|
'url': url,
|
|
'scan_time': datetime.now(timezone.utc).isoformat(),
|
|
'headers': {},
|
|
'security_headers': {},
|
|
'technologies': [],
|
|
'server': '',
|
|
'status_code': 0,
|
|
'redirects': [],
|
|
'ssl': {},
|
|
}
|
|
|
|
try:
|
|
sess = self._get_session()
|
|
resp = sess.get(url, timeout=10, allow_redirects=True)
|
|
results['status_code'] = resp.status_code
|
|
results['headers'] = dict(resp.headers)
|
|
results['server'] = resp.headers.get('Server', '')
|
|
|
|
# Track redirects
|
|
for r in resp.history:
|
|
results['redirects'].append({
|
|
'url': r.url,
|
|
'status': r.status_code,
|
|
})
|
|
|
|
# Security headers
|
|
results['security_headers'] = self._check_security_headers(resp.headers)
|
|
|
|
# Technology fingerprint
|
|
results['technologies'] = self._fingerprint_tech(resp)
|
|
|
|
# SSL check
|
|
parsed = urlparse(url)
|
|
if parsed.scheme == 'https':
|
|
results['ssl'] = self._check_ssl(parsed.hostname, parsed.port or 443)
|
|
|
|
except Exception as e:
|
|
results['error'] = str(e)
|
|
|
|
return results
|
|
|
|
# ── Directory Bruteforce ──────────────────────────────────────────────
|
|
|
|
def dir_bruteforce(self, url: str, wordlist: List[str] = None,
|
|
extensions: List[str] = None,
|
|
threads: int = 10, timeout: float = 5.0) -> dict:
|
|
"""Directory bruteforce scan."""
|
|
if not _HAS_REQUESTS:
|
|
return {'ok': False, 'error': 'requests library required'}
|
|
|
|
url = self._normalize_url(url).rstrip('/')
|
|
if not wordlist:
|
|
wordlist = DIR_WORDLIST_SMALL
|
|
if not extensions:
|
|
extensions = ['']
|
|
|
|
job_id = f'dirbust_{int(time.time())}'
|
|
holder = {'done': False, 'found': [], 'tested': 0,
|
|
'total': len(wordlist) * len(extensions)}
|
|
self._active_jobs[job_id] = holder
|
|
|
|
def do_scan():
|
|
sess = self._get_session()
|
|
results_lock = threading.Lock()
|
|
|
|
def test_path(path):
|
|
for ext in extensions:
|
|
full_path = f'{path}{ext}' if ext else path
|
|
test_url = f'{url}/{full_path}'
|
|
try:
|
|
r = sess.get(test_url, timeout=timeout,
|
|
allow_redirects=False)
|
|
holder['tested'] += 1
|
|
if r.status_code not in (404, 403, 500):
|
|
with results_lock:
|
|
holder['found'].append({
|
|
'path': '/' + full_path,
|
|
'status': r.status_code,
|
|
'size': len(r.content),
|
|
'content_type': r.headers.get('Content-Type', ''),
|
|
})
|
|
except Exception:
|
|
holder['tested'] += 1
|
|
|
|
threads_list = []
|
|
for word in wordlist:
|
|
t = threading.Thread(target=test_path, args=(word,), daemon=True)
|
|
threads_list.append(t)
|
|
t.start()
|
|
if len(threads_list) >= threads:
|
|
for t in threads_list:
|
|
t.join(timeout=timeout + 5)
|
|
threads_list.clear()
|
|
for t in threads_list:
|
|
t.join(timeout=timeout + 5)
|
|
holder['done'] = True
|
|
|
|
threading.Thread(target=do_scan, daemon=True).start()
|
|
return {'ok': True, 'job_id': job_id}
|
|
|
|
# ── Subdomain Enumeration ─────────────────────────────────────────────
|
|
|
|
def subdomain_enum(self, domain: str, wordlist: List[str] = None,
|
|
use_ct: bool = True) -> dict:
|
|
"""Enumerate subdomains via DNS bruteforce and CT logs."""
|
|
found = []
|
|
|
|
# Certificate Transparency logs
|
|
if use_ct and _HAS_REQUESTS:
|
|
try:
|
|
resp = requests.get(
|
|
f'https://crt.sh/?q=%.{domain}&output=json',
|
|
timeout=15)
|
|
if resp.status_code == 200:
|
|
for entry in resp.json():
|
|
name = entry.get('name_value', '')
|
|
for sub in name.split('\n'):
|
|
sub = sub.strip().lower()
|
|
if sub.endswith('.' + domain) and sub not in found:
|
|
found.append(sub)
|
|
except Exception:
|
|
pass
|
|
|
|
# DNS bruteforce
|
|
if not wordlist:
|
|
wordlist = ['www', 'mail', 'ftp', 'admin', 'api', 'dev',
|
|
'staging', 'test', 'blog', 'shop', 'app', 'cdn',
|
|
'ns1', 'ns2', 'mx', 'smtp', 'imap', 'pop',
|
|
'vpn', 'remote', 'portal', 'webmail', 'secure',
|
|
'beta', 'demo', 'docs', 'git', 'jenkins', 'ci',
|
|
'grafana', 'kibana', 'prometheus', 'monitor',
|
|
'status', 'support', 'help', 'forum', 'wiki',
|
|
'internal', 'intranet', 'proxy', 'gateway']
|
|
|
|
for sub in wordlist:
|
|
fqdn = f'{sub}.{domain}'
|
|
try:
|
|
socket.getaddrinfo(fqdn, None)
|
|
if fqdn not in found:
|
|
found.append(fqdn)
|
|
except socket.gaierror:
|
|
pass
|
|
|
|
return {'ok': True, 'domain': domain, 'subdomains': sorted(set(found)),
|
|
'count': len(set(found))}
|
|
|
|
# ── Vulnerability Scanning ────────────────────────────────────────────
|
|
|
|
def vuln_scan(self, url: str, scan_sqli: bool = True,
|
|
scan_xss: bool = True) -> dict:
|
|
"""Scan for SQL injection and XSS vulnerabilities."""
|
|
if not _HAS_REQUESTS:
|
|
return {'ok': False, 'error': 'requests library required'}
|
|
|
|
url = self._normalize_url(url)
|
|
findings = []
|
|
sess = self._get_session()
|
|
|
|
# Crawl to find forms and parameters
|
|
try:
|
|
resp = sess.get(url, timeout=10)
|
|
body = resp.text
|
|
except Exception as e:
|
|
return {'ok': False, 'error': str(e)}
|
|
|
|
# Find URLs with parameters
|
|
param_urls = self._extract_param_urls(body, url)
|
|
|
|
# Test each URL with parameters
|
|
for test_url in param_urls[:20]: # Limit to prevent abuse
|
|
parsed = urlparse(test_url)
|
|
params = dict(p.split('=', 1) for p in parsed.query.split('&')
|
|
if '=' in p) if parsed.query else {}
|
|
|
|
for param_name, param_val in params.items():
|
|
if scan_sqli:
|
|
sqli_findings = self._test_sqli(sess, test_url, param_name, param_val)
|
|
findings.extend(sqli_findings)
|
|
|
|
if scan_xss:
|
|
xss_findings = self._test_xss(sess, test_url, param_name, param_val)
|
|
findings.extend(xss_findings)
|
|
|
|
return {
|
|
'ok': True,
|
|
'url': url,
|
|
'findings': findings,
|
|
'urls_tested': len(param_urls[:20]),
|
|
}
|
|
|
|
def _test_sqli(self, sess, url: str, param: str, original_val: str) -> List[dict]:
|
|
"""Test a parameter for SQL injection."""
|
|
findings = []
|
|
parsed = urlparse(url)
|
|
base_params = dict(p.split('=', 1) for p in parsed.query.split('&')
|
|
if '=' in p) if parsed.query else {}
|
|
|
|
for payload in SQLI_PAYLOADS[:6]: # Limit payloads
|
|
test_params = base_params.copy()
|
|
test_params[param] = original_val + payload
|
|
try:
|
|
test_url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}'
|
|
r = sess.get(test_url, params=test_params, timeout=5)
|
|
body = r.text.lower()
|
|
|
|
for error_sig in SQL_ERRORS:
|
|
if error_sig.lower() in body:
|
|
findings.append({
|
|
'type': 'sqli',
|
|
'severity': 'high',
|
|
'url': url,
|
|
'parameter': param,
|
|
'payload': payload,
|
|
'evidence': error_sig,
|
|
'description': f'SQL injection (error-based) in parameter "{param}"',
|
|
})
|
|
return findings # One finding per param is enough
|
|
except Exception:
|
|
continue
|
|
|
|
return findings
|
|
|
|
def _test_xss(self, sess, url: str, param: str, original_val: str) -> List[dict]:
|
|
"""Test a parameter for reflected XSS."""
|
|
findings = []
|
|
parsed = urlparse(url)
|
|
base_params = dict(p.split('=', 1) for p in parsed.query.split('&')
|
|
if '=' in p) if parsed.query else {}
|
|
|
|
for payload in XSS_PAYLOADS[:4]:
|
|
test_params = base_params.copy()
|
|
test_params[param] = payload
|
|
try:
|
|
test_url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}'
|
|
r = sess.get(test_url, params=test_params, timeout=5)
|
|
if payload in r.text:
|
|
findings.append({
|
|
'type': 'xss',
|
|
'severity': 'high',
|
|
'url': url,
|
|
'parameter': param,
|
|
'payload': payload,
|
|
'description': f'Reflected XSS in parameter "{param}"',
|
|
})
|
|
return findings
|
|
except Exception:
|
|
continue
|
|
|
|
return findings
|
|
|
|
def _extract_param_urls(self, html: str, base_url: str) -> List[str]:
|
|
"""Extract URLs with parameters from HTML."""
|
|
urls = set()
|
|
# href/src/action attributes
|
|
for match in re.finditer(r'(?:href|src|action)=["\']([^"\']+\?[^"\']+)["\']', html):
|
|
u = match.group(1)
|
|
full = urljoin(base_url, u)
|
|
if urlparse(full).netloc == urlparse(base_url).netloc:
|
|
urls.add(full)
|
|
return list(urls)
|
|
|
|
# ── Security Headers ──────────────────────────────────────────────────
|
|
|
|
def _check_security_headers(self, headers) -> dict:
|
|
"""Check for presence and values of security headers."""
|
|
results = {}
|
|
for h in SECURITY_HEADERS:
|
|
value = headers.get(h, '')
|
|
results[h] = {
|
|
'present': bool(value),
|
|
'value': value,
|
|
'rating': 'good' if value else 'missing',
|
|
}
|
|
|
|
# Specific checks
|
|
csp = headers.get('Content-Security-Policy', '')
|
|
if csp:
|
|
if "'unsafe-inline'" in csp or "'unsafe-eval'" in csp:
|
|
results['Content-Security-Policy']['rating'] = 'weak'
|
|
|
|
hsts = headers.get('Strict-Transport-Security', '')
|
|
if hsts:
|
|
if 'max-age' in hsts:
|
|
try:
|
|
age = int(re.search(r'max-age=(\d+)', hsts).group(1))
|
|
if age < 31536000:
|
|
results['Strict-Transport-Security']['rating'] = 'weak'
|
|
except Exception:
|
|
pass
|
|
|
|
return results
|
|
|
|
# ── Technology Fingerprinting ─────────────────────────────────────────
|
|
|
|
def _fingerprint_tech(self, resp) -> List[str]:
|
|
"""Identify technologies from response."""
|
|
techs = []
|
|
headers_str = '\n'.join(f'{k}: {v}' for k, v in resp.headers.items())
|
|
body = resp.text[:50000] # Only check first 50KB
|
|
cookies_str = ' '.join(resp.cookies.keys()) if resp.cookies else ''
|
|
|
|
for tech, sigs in TECH_SIGNATURES.items():
|
|
found = False
|
|
for h_sig in sigs['headers']:
|
|
if h_sig.lower() in headers_str.lower():
|
|
found = True
|
|
break
|
|
if not found:
|
|
for b_sig in sigs['body']:
|
|
if b_sig.lower() in body.lower():
|
|
found = True
|
|
break
|
|
if not found:
|
|
for c_sig in sigs['cookies']:
|
|
if c_sig.lower() in cookies_str.lower():
|
|
found = True
|
|
break
|
|
if found:
|
|
techs.append(tech)
|
|
|
|
return techs
|
|
|
|
# ── SSL/TLS Audit ─────────────────────────────────────────────────────
|
|
|
|
def _check_ssl(self, hostname: str, port: int = 443) -> dict:
|
|
"""Check SSL/TLS configuration."""
|
|
result = {
|
|
'valid': False,
|
|
'issuer': '',
|
|
'subject': '',
|
|
'expires': '',
|
|
'protocol': '',
|
|
'cipher': '',
|
|
'issues': [],
|
|
}
|
|
try:
|
|
ctx = ssl.create_default_context()
|
|
ctx.check_hostname = False
|
|
ctx.verify_mode = ssl.CERT_NONE
|
|
with ctx.wrap_socket(socket.socket(), server_hostname=hostname) as s:
|
|
s.settimeout(5)
|
|
s.connect((hostname, port))
|
|
cert = s.getpeercert(True)
|
|
result['protocol'] = s.version()
|
|
result['cipher'] = s.cipher()[0] if s.cipher() else ''
|
|
|
|
# Try with verification
|
|
ctx2 = ssl.create_default_context()
|
|
try:
|
|
with ctx2.wrap_socket(socket.socket(), server_hostname=hostname) as s2:
|
|
s2.settimeout(5)
|
|
s2.connect((hostname, port))
|
|
cert = s2.getpeercert()
|
|
result['valid'] = True
|
|
result['issuer'] = dict(x[0] for x in cert.get('issuer', []))
|
|
result['subject'] = dict(x[0] for x in cert.get('subject', []))
|
|
result['expires'] = cert.get('notAfter', '')
|
|
except ssl.SSLCertVerificationError as e:
|
|
result['issues'].append(f'Certificate validation failed: {e}')
|
|
|
|
# Check for weak protocols
|
|
if result['protocol'] in ('TLSv1', 'TLSv1.1', 'SSLv3'):
|
|
result['issues'].append(f'Weak protocol: {result["protocol"]}')
|
|
|
|
except Exception as e:
|
|
result['error'] = str(e)
|
|
|
|
return result
|
|
|
|
# ── Crawler ───────────────────────────────────────────────────────────
|
|
|
|
def crawl(self, url: str, max_pages: int = 50, depth: int = 3) -> dict:
|
|
"""Spider a website and build a sitemap."""
|
|
if not _HAS_REQUESTS:
|
|
return {'ok': False, 'error': 'requests library required'}
|
|
|
|
url = self._normalize_url(url)
|
|
base_domain = urlparse(url).netloc
|
|
visited: Set[str] = set()
|
|
pages = []
|
|
queue = [(url, 0)]
|
|
sess = self._get_session()
|
|
|
|
while queue and len(visited) < max_pages:
|
|
current_url, current_depth = queue.pop(0)
|
|
if current_url in visited or current_depth > depth:
|
|
continue
|
|
visited.add(current_url)
|
|
|
|
try:
|
|
r = sess.get(current_url, timeout=5, allow_redirects=True)
|
|
page = {
|
|
'url': current_url,
|
|
'status': r.status_code,
|
|
'content_type': r.headers.get('Content-Type', ''),
|
|
'size': len(r.content),
|
|
'title': '',
|
|
'forms': 0,
|
|
'links_out': 0,
|
|
}
|
|
# Extract title
|
|
title_match = re.search(r'<title[^>]*>([^<]+)</title>', r.text, re.I)
|
|
if title_match:
|
|
page['title'] = title_match.group(1).strip()
|
|
|
|
# Count forms
|
|
page['forms'] = len(re.findall(r'<form', r.text, re.I))
|
|
|
|
# Extract links for further crawling
|
|
links = re.findall(r'href=["\']([^"\']+)["\']', r.text)
|
|
outlinks = 0
|
|
for link in links:
|
|
full_link = urljoin(current_url, link)
|
|
parsed = urlparse(full_link)
|
|
if parsed.netloc == base_domain:
|
|
clean = f'{parsed.scheme}://{parsed.netloc}{parsed.path}'
|
|
if clean not in visited:
|
|
queue.append((clean, current_depth + 1))
|
|
else:
|
|
outlinks += 1
|
|
page['links_out'] = outlinks
|
|
pages.append(page)
|
|
|
|
except Exception:
|
|
continue
|
|
|
|
return {
|
|
'ok': True,
|
|
'url': url,
|
|
'pages_crawled': len(pages),
|
|
'pages': pages,
|
|
}
|
|
|
|
# ── Job Management ────────────────────────────────────────────────────
|
|
|
|
def get_job_status(self, job_id: str) -> dict:
|
|
holder = self._active_jobs.get(job_id)
|
|
if not holder:
|
|
return {'ok': False, 'error': 'Job not found'}
|
|
result = {
|
|
'ok': True,
|
|
'done': holder['done'],
|
|
'tested': holder['tested'],
|
|
'total': holder['total'],
|
|
'found': holder['found'],
|
|
}
|
|
if holder['done']:
|
|
self._active_jobs.pop(job_id, None)
|
|
return result
|
|
|
|
# ── Helpers ───────────────────────────────────────────────────────────
|
|
|
|
@staticmethod
|
|
def _normalize_url(url: str) -> str:
|
|
url = url.strip()
|
|
if not url.startswith(('http://', 'https://')):
|
|
url = 'https://' + url
|
|
return url
|
|
|
|
|
|
# ── Singleton ─────────────────────────────────────────────────────────────────
|
|
|
|
_instance = None
|
|
_lock = threading.Lock()
|
|
|
|
|
|
def get_webapp_scanner() -> WebAppScanner:
|
|
global _instance
|
|
if _instance is None:
|
|
with _lock:
|
|
if _instance is None:
|
|
_instance = WebAppScanner()
|
|
return _instance
|
|
|
|
|
|
# ── CLI ───────────────────────────────────────────────────────────────────────
|
|
|
|
def run():
|
|
"""Interactive CLI for Web Application Scanner."""
|
|
svc = get_webapp_scanner()
|
|
|
|
while True:
|
|
print("\n╔═══════════════════════════════════════╗")
|
|
print("║ WEB APPLICATION SCANNER ║")
|
|
print("╠═══════════════════════════════════════╣")
|
|
print("║ 1 — Quick Scan (headers + tech) ║")
|
|
print("║ 2 — Directory Bruteforce ║")
|
|
print("║ 3 — Subdomain Enumeration ║")
|
|
print("║ 4 — Vulnerability Scan (SQLi/XSS) ║")
|
|
print("║ 5 — Crawl / Spider ║")
|
|
print("║ 0 — Back ║")
|
|
print("╚═══════════════════════════════════════╝")
|
|
|
|
choice = input("\n Select: ").strip()
|
|
|
|
if choice == '0':
|
|
break
|
|
elif choice == '1':
|
|
url = input(" URL: ").strip()
|
|
if not url:
|
|
continue
|
|
print(" Scanning...")
|
|
r = svc.quick_scan(url)
|
|
print(f"\n Status: {r.get('status_code')}")
|
|
print(f" Server: {r.get('server', 'unknown')}")
|
|
if r.get('technologies'):
|
|
print(f" Technologies: {', '.join(r['technologies'])}")
|
|
if r.get('security_headers'):
|
|
print(" Security Headers:")
|
|
for h, info in r['security_headers'].items():
|
|
mark = '\033[92m✓\033[0m' if info['present'] else '\033[91m✗\033[0m'
|
|
print(f" {mark} {h}")
|
|
if r.get('ssl'):
|
|
ssl_info = r['ssl']
|
|
print(f" SSL: {'Valid' if ssl_info.get('valid') else 'INVALID'} "
|
|
f"({ssl_info.get('protocol', '?')})")
|
|
for issue in ssl_info.get('issues', []):
|
|
print(f" [!] {issue}")
|
|
elif choice == '2':
|
|
url = input(" URL: ").strip()
|
|
if not url:
|
|
continue
|
|
print(" Starting directory bruteforce...")
|
|
r = svc.dir_bruteforce(url)
|
|
if r.get('job_id'):
|
|
while True:
|
|
time.sleep(2)
|
|
s = svc.get_job_status(r['job_id'])
|
|
print(f" [{s['tested']}/{s['total']}] Found: {len(s['found'])}", end='\r')
|
|
if s['done']:
|
|
print()
|
|
for item in s['found']:
|
|
print(f" [{item['status']}] {item['path']} ({item['size']} bytes)")
|
|
break
|
|
elif choice == '3':
|
|
domain = input(" Domain: ").strip()
|
|
if not domain:
|
|
continue
|
|
print(" Enumerating subdomains...")
|
|
r = svc.subdomain_enum(domain)
|
|
print(f"\n Found {r['count']} subdomains:")
|
|
for sub in r.get('subdomains', []):
|
|
print(f" {sub}")
|
|
elif choice == '4':
|
|
url = input(" URL: ").strip()
|
|
if not url:
|
|
continue
|
|
print(" Scanning for vulnerabilities...")
|
|
r = svc.vuln_scan(url)
|
|
if r.get('findings'):
|
|
print(f"\n Found {len(r['findings'])} potential vulnerabilities:")
|
|
for f in r['findings']:
|
|
print(f" [{f['severity'].upper()}] {f['type'].upper()}: {f['description']}")
|
|
print(f" Parameter: {f.get('parameter', '?')}, Payload: {f.get('payload', '?')}")
|
|
else:
|
|
print(" No vulnerabilities found in tested parameters.")
|
|
elif choice == '5':
|
|
url = input(" URL: ").strip()
|
|
if not url:
|
|
continue
|
|
max_pages = int(input(" Max pages (default 50): ").strip() or '50')
|
|
print(" Crawling...")
|
|
r = svc.crawl(url, max_pages=max_pages)
|
|
print(f"\n Crawled {r.get('pages_crawled', 0)} pages:")
|
|
for page in r.get('pages', []):
|
|
print(f" [{page['status']}] {page['url']}"
|
|
f" ({page['size']} bytes, {page['forms']} forms)")
|