""" AUTARCH Report Generator Generate HTML reports for scan results """ import json import os from datetime import datetime from pathlib import Path from typing import List, Dict, Optional class ReportGenerator: """Generate HTML reports for OSINT scan results.""" def __init__(self, output_dir: str = None): """Initialize report generator. Args: output_dir: Directory to save reports. Defaults to results/reports. """ if output_dir: self.output_dir = Path(output_dir) else: from core.paths import get_reports_dir self.output_dir = get_reports_dir() self.output_dir.mkdir(parents=True, exist_ok=True) def _get_html_template(self) -> str: """Get base HTML template.""" return ''' {title}

{content}

''' def generate_username_report( self, username: str, results: List[Dict], total_checked: int, scan_time: float = 0 ) -> str: """Generate HTML report for username scan. Args: username: The username that was scanned. results: List of found profile dictionaries. total_checked: Total sites checked. scan_time: Total scan time in seconds. Returns: Path to generated report file. """ # Categorize results high_conf = [r for r in results if r.get('confidence', 0) >= 80 and r.get('status') != 'restricted'] med_conf = [r for r in results if 60 <= r.get('confidence', 0) < 80 and r.get('status') != 'restricted'] low_conf = [r for r in results if r.get('confidence', 0) < 60 and r.get('status') != 'restricted'] restricted = [r for r in results if r.get('status') == 'restricted'] # Group by category by_category = {} for r in results: if r.get('status') != 'restricted' and r.get('confidence', 0) >= 60: cat = r.get('category', 'other') if cat not in by_category: by_category[cat] = [] by_category[cat].append(r) # Build stats section stats_html = f'''

{total_checked}

Sites Checked

{len(results)}

Total Found

{len(high_conf)}

High Confidence

{len(med_conf)}

Medium Confidence

{len(restricted)}

Restricted

''' # Build results table def get_confidence_class(conf): if conf >= 80: return 'high' elif conf >= 60: return 'medium' return 'low' confirmed_rows = '' for r in sorted(high_conf + med_conf, key=lambda x: -x.get('confidence', 0)): conf = r.get('confidence', 0) conf_class = get_confidence_class(conf) tracker_badge = ' [tracker]' if r.get('is_tracker') else '' confirmed_rows += f''' {r.get('name', 'Unknown')}{tracker_badge} {r.get('url', '')} {r.get('category', 'other')} {conf}% ''' # Build category breakdown category_rows = '' for cat, items in sorted(by_category.items(), key=lambda x: -len(x[1])): category_rows += f''' {cat} {len(items)} ''' # Restricted section restricted_rows = '' for r in restricted[:30]: restricted_rows += f''' {r.get('name', 'Unknown')} {r.get('url', '')} {r.get('category', 'other')} Restricted ''' # Build full content content = f'''

AUTARCH Username Report

Target: {username} Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Scan Time: {scan_time:.1f}s

{stats_html}

Confirmed Profiles ({len(high_conf) + len(med_conf)})

{confirmed_rows if confirmed_rows else ''}

Site	URL	Category	Confidence
No confirmed profiles found

By Category

{category_rows if category_rows else ''}

Category	Count
No categories

Restricted Access ({len(restricted)})

These sites returned 403/401 errors - the profile may exist but requires authentication.

{restricted_rows if restricted_rows else ''}

Site	URL	Category	Status
None

''' # Generate HTML html = self._get_html_template().format( title=f"AUTARCH Report - {username}", content=content ) # Save report filename = f"{username}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" filepath = self.output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(html) return str(filepath) def generate_geoip_report(self, results: List[Dict]) -> str: """Generate HTML report for GEO IP lookups. Args: results: List of GEO IP lookup result dictionaries. Returns: Path to generated report file. """ rows = '' for r in results: if 'error' in r: rows += f''' {r.get('target', 'Unknown')} Error: {r['error']} ''' else: map_link = f'View Map' if r.get('map_osm') else '-' rows += f''' {r.get('target', '-')} {r.get('ipv4', '-')} {r.get('country_code', '-')} {r.get('region', '-')} {r.get('city', '-')} {r.get('isp', '-')} {map_link} ''' content = f'''

AUTARCH GEO IP Report

Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Total Lookups: {len(results)}

GEO IP Results

{rows}

Target	IPv4	Country	Region	City	ISP	Map

''' html = self._get_html_template().format( title="AUTARCH GEO IP Report", content=content ) filename = f"geoip_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" filepath = self.output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(html) return str(filepath) def generate_security_audit_report( self, system_info: Dict, issues: List[Dict], score: int ) -> str: """Generate HTML report for security audit. Args: system_info: System information dictionary. issues: List of security issues found. score: Security score 0-100. Returns: Path to generated report file. """ # Score color if score >= 80: score_color = "var(--accent-green)" elif score >= 60: score_color = "var(--accent-yellow)" else: score_color = "var(--accent-red)" # System info rows sys_rows = '' for key, val in system_info.items(): sys_rows += f'{key}{val}\n' # Score gauge score_html = f'''

{score}/100

''' # Issues by severity severity_counts = {'CRITICAL': 0, 'HIGH': 0, 'MEDIUM': 0, 'LOW': 0} for issue in issues: sev = issue.get('severity', 'LOW').upper() if sev in severity_counts: severity_counts[sev] += 1 # Issues table issue_rows = '' for issue in sorted(issues, key=lambda x: ['CRITICAL', 'HIGH', 'MEDIUM', 'LOW'].index(x.get('severity', 'LOW').upper())): sev = issue.get('severity', 'LOW').upper() sev_class = f'severity-{sev.lower()}' issue_rows += f''' {sev} {issue.get('title', '')} {issue.get('description', '')} {issue.get('recommendation', '')} ''' content = f'''

Security Audit Report

Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Issues Found: {len(issues)}

{score}

Security Score

{severity_counts['CRITICAL']}

Critical

{severity_counts['HIGH']}

High

{severity_counts['MEDIUM']}

Medium

{severity_counts['LOW']}

Low

{score_html}

System Information

{sys_rows}

Property	Value

Security Issues ({len(issues)})

{issue_rows if issue_rows else ''}

Severity	Issue	Description	Recommendation
No issues found

''' html = self._get_html_template().format( title="AUTARCH Security Audit Report", content=content ) filename = f"audit_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" filepath = self.output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(html) return str(filepath) def generate_network_scan_report( self, target: str, hosts: List[Dict], scan_time: float = 0 ) -> str: """Generate HTML report for network scan. Args: target: Target subnet/IP. hosts: List of host dictionaries with ports/services. scan_time: Total scan time in seconds. Returns: Path to generated report file. """ total_ports = sum(len(h.get('ports', [])) for h in hosts) all_services = set() for h in hosts: for p in h.get('ports', []): all_services.add(p.get('service', 'unknown')) # Host rows host_rows = '' for h in hosts: ports_str = ', '.join(str(p.get('port', '')) for p in h.get('ports', [])) services_str = ', '.join(set(p.get('service', '') for p in h.get('ports', []))) host_rows += f''' {h.get('ip', '')} {h.get('hostname', '-')} {h.get('os_guess', '-')} {ports_str or '-'} {services_str or '-'} ''' # Service distribution svc_count = {} for h in hosts: for p in h.get('ports', []): svc = p.get('service', 'unknown') svc_count[svc] = svc_count.get(svc, 0) + 1 svc_rows = '' for svc, count in sorted(svc_count.items(), key=lambda x: -x[1]): svc_rows += f'{svc}{count}\n' content = f'''

Network Scan Report

Target: {target} Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Scan Time: {scan_time:.1f}s

{len(hosts)}

Hosts Found

{total_ports}

Open Ports

{len(all_services)}

Unique Services

Host Map ({len(hosts)} hosts)

{host_rows if host_rows else ''}

IP Address	Hostname	OS	Open Ports	Services
No hosts found

Service Distribution

{svc_rows}

Service	Count

''' html = self._get_html_template().format( title=f"AUTARCH Network Scan - {target}", content=content ) safe_target = target.replace('/', '_').replace('.', '-') filename = f"network_{safe_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" filepath = self.output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(html) return str(filepath) def generate_vulnerability_report( self, target: str, correlations: List[Dict], scan_time: float = 0 ) -> str: """Generate HTML report for vulnerability scan. Args: target: Target IP/hostname. correlations: List of service-CVE correlation dicts. scan_time: Total scan time in seconds. Returns: Path to generated report file. """ total_cves = 0 severity_counts = {'CRITICAL': 0, 'HIGH': 0, 'MEDIUM': 0, 'LOW': 0} for corr in correlations: for cve in corr.get('cves', []): total_cves += 1 score = cve.get('cvss', 0) if score >= 9.0: severity_counts['CRITICAL'] += 1 elif score >= 7.0: severity_counts['HIGH'] += 1 elif score >= 4.0: severity_counts['MEDIUM'] += 1 else: severity_counts['LOW'] += 1 # Per-service CVE sections service_sections = '' for corr in correlations: svc = corr.get('service', {}) cves = corr.get('cves', []) svc_label = f"{svc.get('service', 'unknown')}:{svc.get('version', '?')} on port {svc.get('port', '?')}" cve_rows = '' for cve in sorted(cves, key=lambda x: -x.get('cvss', 0)): score = cve.get('cvss', 0) if score >= 9.0: sev, sev_class = 'CRITICAL', 'severity-critical' elif score >= 7.0: sev, sev_class = 'HIGH', 'severity-high' elif score >= 4.0: sev, sev_class = 'MEDIUM', 'severity-medium' else: sev, sev_class = 'LOW', 'severity-low' cve_rows += f''' {cve.get('id', '')} {sev} ({score}) {cve.get('description', '')[:200]} ''' service_sections += f'''

{svc_label} ({len(cves)} CVEs)

{cve_rows if cve_rows else ''}

CVE ID	Severity	Description
No CVEs found

''' content = f'''

Vulnerability Report

Target: {target} Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Scan Time: {scan_time:.1f}s

{total_cves}

Total CVEs

{severity_counts['CRITICAL']}

Critical

{severity_counts['HIGH']}

High

{severity_counts['MEDIUM']}

Medium

{severity_counts['LOW']}

Low

{service_sections} ''' html = self._get_html_template().format( title=f"AUTARCH Vulnerability Report - {target}", content=content ) safe_target = target.replace('/', '_').replace('.', '-') filename = f"vulns_{safe_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" filepath = self.output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(html) return str(filepath) def generate_pentest_report( self, target: str, network_data: Optional[List[Dict]] = None, vuln_data: Optional[List[Dict]] = None, exploit_data: Optional[List[Dict]] = None, audit_data: Optional[Dict] = None ) -> str: """Generate combined pentest report. Args: target: Target IP/hostname. network_data: Network map host list (optional). vuln_data: Vulnerability correlations (optional). exploit_data: Exploit suggestions (optional). audit_data: Security audit data with 'system_info', 'issues', 'score' (optional). Returns: Path to generated report file. """ sections_html = '' # Executive summary summary_items = [] if network_data: summary_items.append(f"

{len(network_data)} hosts discovered

") if vuln_data: total_cves = sum(len(c.get('cves', [])) for c in vuln_data) summary_items.append(f"

{total_cves} vulnerabilities identified across {len(vuln_data)} services

") if exploit_data: summary_items.append(f"

{len(exploit_data)} potential exploit paths identified

") if audit_data: summary_items.append(f"

Security score: {audit_data.get('score', 'N/A')}/100

") sections_html += f'''

Executive Summary

No data collected

''' # Network map section if network_data: net_rows = '' for h in network_data: ports_str = ', '.join(str(p.get('port', '')) for p in h.get('ports', [])) services_str = ', '.join(set(p.get('service', '') for p in h.get('ports', []))) net_rows += f''' {h.get('ip', '')} {h.get('hostname', '-')} {h.get('os_guess', '-')} {ports_str or '-'} {services_str or '-'} ''' sections_html += f'''

Network Map ({len(network_data)} hosts)

{net_rows}

IP	Hostname	OS	Ports	Services

''' # Vulnerabilities section if vuln_data: vuln_rows = '' for corr in vuln_data: svc = corr.get('service', {}) for cve in sorted(corr.get('cves', []), key=lambda x: -x.get('cvss', 0)): score = cve.get('cvss', 0) if score >= 9.0: sev, sev_class = 'CRITICAL', 'severity-critical' elif score >= 7.0: sev, sev_class = 'HIGH', 'severity-high' elif score >= 4.0: sev, sev_class = 'MEDIUM', 'severity-medium' else: sev, sev_class = 'LOW', 'severity-low' vuln_rows += f''' {svc.get('service', '')}:{svc.get('port', '')} {cve.get('id', '')} {sev} ({score}) {cve.get('description', '')[:150]} ''' sections_html += f'''

Vulnerabilities

{vuln_rows}

Service	CVE	Severity	Description

''' # Exploit suggestions section if exploit_data: exploit_rows = '' for i, exp in enumerate(exploit_data, 1): exploit_rows += f''' {i} {exp.get('module', '')} {exp.get('target', '')} {exp.get('cve', '-')} {exp.get('reasoning', '')} ''' sections_html += f'''

Exploit Suggestions ({len(exploit_data)})

{exploit_rows}

#	Module	Target	CVE	Reasoning

''' # Security audit section if audit_data: score = audit_data.get('score', 0) if score >= 80: score_color = "var(--accent-green)" elif score >= 60: score_color = "var(--accent-yellow)" else: score_color = "var(--accent-red)" audit_issue_rows = '' for issue in audit_data.get('issues', []): sev = issue.get('severity', 'LOW').upper() sev_class = f'severity-{sev.lower()}' audit_issue_rows += f''' {sev} {issue.get('title', '')} {issue.get('description', '')} ''' sections_html += f'''

Security Audit (Score: {score}/100)

{score}/100

{audit_issue_rows if audit_issue_rows else ''}

Severity	Issue	Description
No issues

''' content = f'''

Penetration Test Report

Target: {target} Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

{sections_html} ''' html = self._get_html_template().format( title=f"AUTARCH Pentest Report - {target}", content=content ) safe_target = target.replace('/', '_').replace('.', '-') filename = f"pentest_{safe_target}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.html" filepath = self.output_dir / filename with open(filepath, 'w', encoding='utf-8') as f: f.write(html) return str(filepath) def get_report_generator(output_dir: str = None) -> ReportGenerator: """Get a ReportGenerator instance. Args: output_dir: Optional output directory. Returns: ReportGenerator instance. """ return ReportGenerator(output_dir)