"""
AUTARCH Report Generator
Generate HTML reports for scan results
"""
import json
import os
from datetime import datetime
from pathlib import Path
from typing import List, Dict, Optional
class ReportGenerator:
"""Generate HTML reports for OSINT scan results."""
def __init__(self, output_dir: str = None):
"""Initialize report generator.
Args:
output_dir: Directory to save reports. Defaults to results/reports.
"""
if output_dir:
self.output_dir = Path(output_dir)
else:
from core.paths import get_reports_dir
self.output_dir = get_reports_dir()
self.output_dir.mkdir(parents=True, exist_ok=True)
def _get_html_template(self) -> str:
"""Get base HTML template."""
return '''
{title}
{content}
'''
def generate_username_report(
self,
username: str,
results: List[Dict],
total_checked: int,
scan_time: float = 0
) -> str:
"""Generate HTML report for username scan.
Args:
username: The username that was scanned.
results: List of found profile dictionaries.
total_checked: Total sites checked.
scan_time: Total scan time in seconds.
Returns:
Path to generated report file.
"""
# Categorize results
high_conf = [r for r in results if r.get('confidence', 0) >= 80 and r.get('status') != 'restricted']
med_conf = [r for r in results if 60 <= r.get('confidence', 0) < 80 and r.get('status') != 'restricted']
low_conf = [r for r in results if r.get('confidence', 0) < 60 and r.get('status') != 'restricted']
restricted = [r for r in results if r.get('status') == 'restricted']
# Group by category
by_category = {}
for r in results:
if r.get('status') != 'restricted' and r.get('confidence', 0) >= 60:
cat = r.get('category', 'other')
if cat not in by_category:
by_category[cat] = []
by_category[cat].append(r)
# Build stats section
stats_html = f'''
{total_checked}
Sites Checked
{len(results)}
Total Found
{len(high_conf)}
High Confidence
{len(med_conf)}
Medium Confidence
{len(restricted)}
Restricted
'''
# Build results table
def get_confidence_class(conf):
if conf >= 80:
return 'high'
elif conf >= 60:
return 'medium'
return 'low'
confirmed_rows = ''
for r in sorted(high_conf + med_conf, key=lambda x: -x.get('confidence', 0)):
conf = r.get('confidence', 0)
conf_class = get_confidence_class(conf)
tracker_badge = ' [tracker]' if r.get('is_tracker') else ''
confirmed_rows += f'''
| {r.get('name', 'Unknown')}{tracker_badge} |
{r.get('url', '')} |
{r.get('category', 'other')} |
{conf}% |
'''
# Build category breakdown
category_rows = ''
for cat, items in sorted(by_category.items(), key=lambda x: -len(x[1])):
category_rows += f'''
| {cat} |
{len(items)} |
'''
# Restricted section
restricted_rows = ''
for r in restricted[:30]:
restricted_rows += f'''
| {r.get('name', 'Unknown')} |
{r.get('url', '')} |
{r.get('category', 'other')} |
Restricted |
'''
# Build full content
content = f'''
{stats_html}
Confirmed Profiles ({len(high_conf) + len(med_conf)})
| Site |
URL |
Category |
Confidence |
{confirmed_rows if confirmed_rows else '| No confirmed profiles found |
'}
By Category
| Category |
Count |
{category_rows if category_rows else '| No categories |
'}
Restricted Access ({len(restricted)})
These sites returned 403/401 errors - the profile may exist but requires authentication.
| Site |
URL |
Category |
Status |
{restricted_rows if restricted_rows else '| None |
'}
'''
# Generate HTML
html = self._get_html_template().format(
title=f"AUTARCH Report - {username}",
content=content
)
# Save report
filename = f"{username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
filepath = self.output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html)
return str(filepath)
def generate_geoip_report(self, results: List[Dict]) -> str:
"""Generate HTML report for GEO IP lookups.
Args:
results: List of GEO IP lookup result dictionaries.
Returns:
Path to generated report file.
"""
rows = ''
for r in results:
if 'error' in r:
rows += f'''
| {r.get('target', 'Unknown')} |
Error: {r['error']} |
'''
else:
map_link = f'View Map' if r.get('map_osm') else '-'
rows += f'''
| {r.get('target', '-')} |
{r.get('ipv4', '-')} |
{r.get('country_code', '-')} |
{r.get('region', '-')} |
{r.get('city', '-')} |
{r.get('isp', '-')} |
{map_link} |
'''
content = f'''
GEO IP Results
| Target |
IPv4 |
Country |
Region |
City |
ISP |
Map |
{rows}
'''
html = self._get_html_template().format(
title="AUTARCH GEO IP Report",
content=content
)
filename = f"geoip_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
filepath = self.output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html)
return str(filepath)
def generate_security_audit_report(
self,
system_info: Dict,
issues: List[Dict],
score: int
) -> str:
"""Generate HTML report for security audit.
Args:
system_info: System information dictionary.
issues: List of security issues found.
score: Security score 0-100.
Returns:
Path to generated report file.
"""
# Score color
if score >= 80:
score_color = "var(--accent-green)"
elif score >= 60:
score_color = "var(--accent-yellow)"
else:
score_color = "var(--accent-red)"
# System info rows
sys_rows = ''
for key, val in system_info.items():
sys_rows += f'| {key} | {val} |
\n'
# Score gauge
score_html = f'''
'''
# Issues by severity
severity_counts = {'CRITICAL': 0, 'HIGH': 0, 'MEDIUM': 0, 'LOW': 0}
for issue in issues:
sev = issue.get('severity', 'LOW').upper()
if sev in severity_counts:
severity_counts[sev] += 1
# Issues table
issue_rows = ''
for issue in sorted(issues, key=lambda x: ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW'].index(x.get('severity', 'LOW').upper())):
sev = issue.get('severity', 'LOW').upper()
sev_class = f'severity-{sev.lower()}'
issue_rows += f'''
| {sev} |
{issue.get('title', '')} |
{issue.get('description', '')} |
{issue.get('recommendation', '')} |
'''
content = f'''
{severity_counts['CRITICAL']}
Critical
{severity_counts['HIGH']}
High
{severity_counts['MEDIUM']}
Medium
{severity_counts['LOW']}
Low
{score_html}
Security Issues ({len(issues)})
| Severity |
Issue |
Description |
Recommendation |
{issue_rows if issue_rows else '| No issues found |
'}
'''
html = self._get_html_template().format(
title="AUTARCH Security Audit Report",
content=content
)
filename = f"audit_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
filepath = self.output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html)
return str(filepath)
def generate_network_scan_report(
self,
target: str,
hosts: List[Dict],
scan_time: float = 0
) -> str:
"""Generate HTML report for network scan.
Args:
target: Target subnet/IP.
hosts: List of host dictionaries with ports/services.
scan_time: Total scan time in seconds.
Returns:
Path to generated report file.
"""
total_ports = sum(len(h.get('ports', [])) for h in hosts)
all_services = set()
for h in hosts:
for p in h.get('ports', []):
all_services.add(p.get('service', 'unknown'))
# Host rows
host_rows = ''
for h in hosts:
ports_str = ', '.join(str(p.get('port', '')) for p in h.get('ports', []))
services_str = ', '.join(set(p.get('service', '') for p in h.get('ports', [])))
host_rows += f'''
| {h.get('ip', '')} |
{h.get('hostname', '-')} |
{h.get('os_guess', '-')} |
{ports_str or '-'} |
{services_str or '-'} |
'''
# Service distribution
svc_count = {}
for h in hosts:
for p in h.get('ports', []):
svc = p.get('service', 'unknown')
svc_count[svc] = svc_count.get(svc, 0) + 1
svc_rows = ''
for svc, count in sorted(svc_count.items(), key=lambda x: -x[1]):
svc_rows += f'| {svc} | {count} |
\n'
content = f'''
{len(all_services)}
Unique Services
Host Map ({len(hosts)} hosts)
| IP Address |
Hostname |
OS |
Open Ports |
Services |
{host_rows if host_rows else '| No hosts found |
'}
'''
html = self._get_html_template().format(
title=f"AUTARCH Network Scan - {target}",
content=content
)
safe_target = target.replace('/', '_').replace('.', '-')
filename = f"network_{safe_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
filepath = self.output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html)
return str(filepath)
def generate_vulnerability_report(
self,
target: str,
correlations: List[Dict],
scan_time: float = 0
) -> str:
"""Generate HTML report for vulnerability scan.
Args:
target: Target IP/hostname.
correlations: List of service-CVE correlation dicts.
scan_time: Total scan time in seconds.
Returns:
Path to generated report file.
"""
total_cves = 0
severity_counts = {'CRITICAL': 0, 'HIGH': 0, 'MEDIUM': 0, 'LOW': 0}
for corr in correlations:
for cve in corr.get('cves', []):
total_cves += 1
score = cve.get('cvss', 0)
if score >= 9.0:
severity_counts['CRITICAL'] += 1
elif score >= 7.0:
severity_counts['HIGH'] += 1
elif score >= 4.0:
severity_counts['MEDIUM'] += 1
else:
severity_counts['LOW'] += 1
# Per-service CVE sections
service_sections = ''
for corr in correlations:
svc = corr.get('service', {})
cves = corr.get('cves', [])
svc_label = f"{svc.get('service', 'unknown')}:{svc.get('version', '?')} on port {svc.get('port', '?')}"
cve_rows = ''
for cve in sorted(cves, key=lambda x: -x.get('cvss', 0)):
score = cve.get('cvss', 0)
if score >= 9.0:
sev, sev_class = 'CRITICAL', 'severity-critical'
elif score >= 7.0:
sev, sev_class = 'HIGH', 'severity-high'
elif score >= 4.0:
sev, sev_class = 'MEDIUM', 'severity-medium'
else:
sev, sev_class = 'LOW', 'severity-low'
cve_rows += f'''
| {cve.get('id', '')} |
{sev} ({score}) |
{cve.get('description', '')[:200]} |
'''
service_sections += f'''
{svc_label} ({len(cves)} CVEs)
| CVE ID | Severity | Description |
{cve_rows if cve_rows else '| No CVEs found |
'}
'''
content = f'''
{severity_counts['CRITICAL']}
Critical
{severity_counts['HIGH']}
High
{severity_counts['MEDIUM']}
Medium
{severity_counts['LOW']}
Low
{service_sections}
'''
html = self._get_html_template().format(
title=f"AUTARCH Vulnerability Report - {target}",
content=content
)
safe_target = target.replace('/', '_').replace('.', '-')
filename = f"vulns_{safe_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
filepath = self.output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html)
return str(filepath)
def generate_pentest_report(
self,
target: str,
network_data: Optional[List[Dict]] = None,
vuln_data: Optional[List[Dict]] = None,
exploit_data: Optional[List[Dict]] = None,
audit_data: Optional[Dict] = None
) -> str:
"""Generate combined pentest report.
Args:
target: Target IP/hostname.
network_data: Network map host list (optional).
vuln_data: Vulnerability correlations (optional).
exploit_data: Exploit suggestions (optional).
audit_data: Security audit data with 'system_info', 'issues', 'score' (optional).
Returns:
Path to generated report file.
"""
sections_html = ''
# Executive summary
summary_items = []
if network_data:
summary_items.append(f"{len(network_data)} hosts discovered")
if vuln_data:
total_cves = sum(len(c.get('cves', [])) for c in vuln_data)
summary_items.append(f"{total_cves} vulnerabilities identified across {len(vuln_data)} services")
if exploit_data:
summary_items.append(f"{len(exploit_data)} potential exploit paths identified")
if audit_data:
summary_items.append(f"Security score: {audit_data.get('score', 'N/A')}/100")
sections_html += f'''
Executive Summary
{''.join(summary_items) if summary_items else '- No data collected
'}
'''
# Network map section
if network_data:
net_rows = ''
for h in network_data:
ports_str = ', '.join(str(p.get('port', '')) for p in h.get('ports', []))
services_str = ', '.join(set(p.get('service', '') for p in h.get('ports', [])))
net_rows += f'''
| {h.get('ip', '')} |
{h.get('hostname', '-')} |
{h.get('os_guess', '-')} |
{ports_str or '-'} |
{services_str or '-'} |
'''
sections_html += f'''
Network Map ({len(network_data)} hosts)
| IP | Hostname | OS | Ports | Services |
{net_rows}
'''
# Vulnerabilities section
if vuln_data:
vuln_rows = ''
for corr in vuln_data:
svc = corr.get('service', {})
for cve in sorted(corr.get('cves', []), key=lambda x: -x.get('cvss', 0)):
score = cve.get('cvss', 0)
if score >= 9.0:
sev, sev_class = 'CRITICAL', 'severity-critical'
elif score >= 7.0:
sev, sev_class = 'HIGH', 'severity-high'
elif score >= 4.0:
sev, sev_class = 'MEDIUM', 'severity-medium'
else:
sev, sev_class = 'LOW', 'severity-low'
vuln_rows += f'''
| {svc.get('service', '')}:{svc.get('port', '')} |
{cve.get('id', '')} |
{sev} ({score}) |
{cve.get('description', '')[:150]} |
'''
sections_html += f'''
Vulnerabilities
| Service | CVE | Severity | Description |
{vuln_rows}
'''
# Exploit suggestions section
if exploit_data:
exploit_rows = ''
for i, exp in enumerate(exploit_data, 1):
exploit_rows += f'''
| {i} |
{exp.get('module', '')} |
{exp.get('target', '')} |
{exp.get('cve', '-')} |
{exp.get('reasoning', '')} |
'''
sections_html += f'''
Exploit Suggestions ({len(exploit_data)})
| # | Module | Target | CVE | Reasoning |
{exploit_rows}
'''
# Security audit section
if audit_data:
score = audit_data.get('score', 0)
if score >= 80:
score_color = "var(--accent-green)"
elif score >= 60:
score_color = "var(--accent-yellow)"
else:
score_color = "var(--accent-red)"
audit_issue_rows = ''
for issue in audit_data.get('issues', []):
sev = issue.get('severity', 'LOW').upper()
sev_class = f'severity-{sev.lower()}'
audit_issue_rows += f'''
| {sev} |
{issue.get('title', '')} |
{issue.get('description', '')} |
'''
sections_html += f'''
Security Audit (Score: {score}/100)
| Severity | Issue | Description |
{audit_issue_rows if audit_issue_rows else '| No issues |
'}
'''
content = f'''
{sections_html}
'''
html = self._get_html_template().format(
title=f"AUTARCH Pentest Report - {target}",
content=content
)
safe_target = target.replace('/', '_').replace('.', '-')
filename = f"pentest_{safe_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html"
filepath = self.output_dir / filename
with open(filepath, 'w', encoding='utf-8') as f:
f.write(html)
return str(filepath)
def get_report_generator(output_dir: str = None) -> ReportGenerator:
"""Get a ReportGenerator instance.
Args:
output_dir: Optional output directory.
Returns:
ReportGenerator instance.
"""
return ReportGenerator(output_dir)