704 lines
24 KiB
Python
704 lines
24 KiB
Python
|
|
"""
|
||
|
|
AUTARCH Pentest Pipeline
|
||
|
|
Three-module architecture (Parsing -> Reasoning -> Generation)
|
||
|
|
based on PentestGPT's USENIX paper methodology.
|
||
|
|
Uses AUTARCH's local LLM via llama-cpp-python.
|
||
|
|
"""
|
||
|
|
|
||
|
|
import re
|
||
|
|
from typing import Optional, List, Dict, Any, Tuple
|
||
|
|
from datetime import datetime
|
||
|
|
|
||
|
|
from .pentest_tree import PentestTree, PTTNode, PTTNodeType, NodeStatus
|
||
|
|
from .config import get_config
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Source type detection patterns ──────────────────────────────────
|
||
|
|
|
||
|
|
SOURCE_PATTERNS = {
|
||
|
|
'nmap': re.compile(r'Nmap scan report|PORT\s+STATE\s+SERVICE|nmap', re.IGNORECASE),
|
||
|
|
'msf_scan': re.compile(r'auxiliary/scanner|msf\d?\s*>.*auxiliary|^\[\*\]\s.*scanning', re.IGNORECASE | re.MULTILINE),
|
||
|
|
'msf_exploit': re.compile(r'exploit/|meterpreter|session\s+\d+\s+opened|^\[\*\]\s.*exploit', re.IGNORECASE | re.MULTILINE),
|
||
|
|
'msf_post': re.compile(r'post/|meterpreter\s*>', re.IGNORECASE),
|
||
|
|
'web': re.compile(r'HTTP/\d|<!DOCTYPE|<html|Content-Type:', re.IGNORECASE),
|
||
|
|
'shell': re.compile(r'^\$\s|^root@|^#\s|bash|zsh', re.IGNORECASE | re.MULTILINE),
|
||
|
|
'gobuster': re.compile(r'Gobuster|gobuster|Dir found|/\w+\s+\(Status:\s*\d+\)', re.IGNORECASE),
|
||
|
|
'nikto': re.compile(r'Nikto|nikto|^\+\s', re.IGNORECASE | re.MULTILINE),
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def detect_source_type(output: str) -> str:
|
||
|
|
"""Auto-detect tool output type from content patterns."""
|
||
|
|
for source, pattern in SOURCE_PATTERNS.items():
|
||
|
|
if pattern.search(output[:2000]):
|
||
|
|
return source
|
||
|
|
return 'manual'
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Prompt Templates ────────────────────────────────────────────────
|
||
|
|
|
||
|
|
PARSING_SYSTEM_PROMPT = """You are a penetration testing output parser. Extract key findings from raw tool output.
|
||
|
|
|
||
|
|
Given raw output from a security tool, extract and summarize:
|
||
|
|
1. Open ports and services (with versions when available)
|
||
|
|
2. Vulnerabilities or misconfigurations found
|
||
|
|
3. Credentials or sensitive information discovered
|
||
|
|
4. Operating system and software versions
|
||
|
|
5. Any error messages or access denials
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Be concise. Use bullet points.
|
||
|
|
- Include specific version numbers, port numbers, and IP addresses.
|
||
|
|
- Prefix exploitable findings with [VULN]
|
||
|
|
- Prefix credentials with [CRED]
|
||
|
|
- Note failed attempts and why they failed.
|
||
|
|
- Do not speculate beyond what the output shows.
|
||
|
|
|
||
|
|
Format your response as:
|
||
|
|
SUMMARY: one line description
|
||
|
|
FINDINGS:
|
||
|
|
- finding 1
|
||
|
|
- finding 2
|
||
|
|
- [VULN] vulnerability finding
|
||
|
|
STATUS: success/partial/failed"""
|
||
|
|
|
||
|
|
REASONING_SYSTEM_PROMPT = """You are a penetration testing strategist. You maintain a task tree and decide next steps.
|
||
|
|
|
||
|
|
You will receive:
|
||
|
|
1. The current task tree showing completed and todo tasks
|
||
|
|
2. New findings from the latest tool execution
|
||
|
|
|
||
|
|
Your job:
|
||
|
|
1. UPDATE the tree based on new findings
|
||
|
|
2. DECIDE the single most important next task
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Prioritize exploitation paths with highest success likelihood.
|
||
|
|
- If a service version is known, suggest checking for known CVEs.
|
||
|
|
- After recon, focus on the most promising attack surface.
|
||
|
|
- Do not add redundant tasks.
|
||
|
|
- Mark tasks not-applicable if findings make them irrelevant.
|
||
|
|
|
||
|
|
Respond in this exact format:
|
||
|
|
TREE_UPDATES:
|
||
|
|
- ADD: parent_id | node_type | priority | task description
|
||
|
|
- COMPLETE: node_id | findings summary
|
||
|
|
- NOT_APPLICABLE: node_id | reason
|
||
|
|
|
||
|
|
NEXT_TASK: description of the single most important next action
|
||
|
|
REASONING: 1-2 sentences explaining why this is the highest priority"""
|
||
|
|
|
||
|
|
GENERATION_SYSTEM_PROMPT = """You are a penetration testing command generator. Convert task descriptions into specific executable commands.
|
||
|
|
|
||
|
|
Available tools:
|
||
|
|
- shell: Run shell command. Args: {"command": "...", "timeout": 30}
|
||
|
|
- msf_search: Search MSF modules. Args: {"query": "search term"}
|
||
|
|
- msf_module_info: Module details. Args: {"module_type": "auxiliary|exploit|post", "module_name": "path"}
|
||
|
|
- msf_execute: Run MSF module. Args: {"module_type": "...", "module_name": "...", "options": "{\\"RHOSTS\\": \\"...\\"}" }
|
||
|
|
- msf_sessions: List sessions. Args: {}
|
||
|
|
- msf_session_command: Command in session. Args: {"session_id": "...", "command": "..."}
|
||
|
|
- msf_console: MSF console command. Args: {"command": "..."}
|
||
|
|
|
||
|
|
Rules:
|
||
|
|
- Provide the EXACT tool name and JSON arguments.
|
||
|
|
- Describe what to look for in the output.
|
||
|
|
- If multiple steps needed, number them.
|
||
|
|
- Always include RHOSTS/target in module options.
|
||
|
|
- Prefer auxiliary scanners before exploits.
|
||
|
|
|
||
|
|
Format:
|
||
|
|
COMMANDS:
|
||
|
|
1. TOOL: tool_name | ARGS: {"key": "value"} | EXPECT: what to look for
|
||
|
|
2. TOOL: tool_name | ARGS: {"key": "value"} | EXPECT: what to look for
|
||
|
|
FALLBACK: alternative approach if primary fails"""
|
||
|
|
|
||
|
|
INITIAL_PLAN_PROMPT = """You are a penetration testing strategist planning an engagement.
|
||
|
|
|
||
|
|
Target: {target}
|
||
|
|
|
||
|
|
Create an initial reconnaissance plan. List the first 3-5 specific tasks to perform, ordered by priority.
|
||
|
|
|
||
|
|
Format:
|
||
|
|
TASKS:
|
||
|
|
1. node_type | priority | task description
|
||
|
|
2. node_type | priority | task description
|
||
|
|
3. node_type | priority | task description
|
||
|
|
|
||
|
|
FIRST_ACTION: description of the very first thing to do
|
||
|
|
REASONING: why start here"""
|
||
|
|
|
||
|
|
DISCUSS_SYSTEM_PROMPT = """You are a penetration testing expert assistant. Answer the user's question about their current engagement.
|
||
|
|
|
||
|
|
Current target: {target}
|
||
|
|
|
||
|
|
Current status:
|
||
|
|
{tree_summary}
|
||
|
|
|
||
|
|
Answer concisely and provide actionable advice."""
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Pipeline Modules ────────────────────────────────────────────────
|
||
|
|
|
||
|
|
class ParsingModule:
|
||
|
|
"""Normalizes raw tool output into structured summaries."""
|
||
|
|
|
||
|
|
def __init__(self, llm):
|
||
|
|
self.llm = llm
|
||
|
|
self.config = get_config()
|
||
|
|
|
||
|
|
def parse(self, raw_output: str, source_type: str = "auto",
|
||
|
|
context: str = "") -> dict:
|
||
|
|
"""Parse raw tool output into normalized summary.
|
||
|
|
|
||
|
|
Returns dict with 'summary', 'findings', 'status', 'raw_source'.
|
||
|
|
"""
|
||
|
|
if source_type == "auto":
|
||
|
|
source_type = detect_source_type(raw_output)
|
||
|
|
|
||
|
|
chunk_size = 2000
|
||
|
|
try:
|
||
|
|
chunk_size = self.config.get_int('pentest', 'output_chunk_size', 2000)
|
||
|
|
except Exception:
|
||
|
|
pass
|
||
|
|
|
||
|
|
chunks = self._chunk_output(raw_output, chunk_size)
|
||
|
|
|
||
|
|
all_findings = []
|
||
|
|
all_summaries = []
|
||
|
|
status = "unknown"
|
||
|
|
|
||
|
|
for i, chunk in enumerate(chunks):
|
||
|
|
prefix = f"[{source_type} output"
|
||
|
|
if len(chunks) > 1:
|
||
|
|
prefix += f" part {i+1}/{len(chunks)}"
|
||
|
|
prefix += "]"
|
||
|
|
|
||
|
|
message = f"{prefix}\n{chunk}"
|
||
|
|
if context:
|
||
|
|
message = f"Context: {context}\n\n{message}"
|
||
|
|
|
||
|
|
self.llm.clear_history()
|
||
|
|
try:
|
||
|
|
response = self.llm.chat(
|
||
|
|
message,
|
||
|
|
system_prompt=PARSING_SYSTEM_PROMPT,
|
||
|
|
temperature=0.2,
|
||
|
|
max_tokens=512,
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
'summary': f"Parse error: {e}",
|
||
|
|
'findings': [],
|
||
|
|
'status': 'failed',
|
||
|
|
'raw_source': source_type,
|
||
|
|
}
|
||
|
|
|
||
|
|
summary, findings, chunk_status = self._parse_response(response)
|
||
|
|
all_summaries.append(summary)
|
||
|
|
all_findings.extend(findings)
|
||
|
|
if chunk_status != "unknown":
|
||
|
|
status = chunk_status
|
||
|
|
|
||
|
|
return {
|
||
|
|
'summary': " | ".join(all_summaries) if all_summaries else "No summary",
|
||
|
|
'findings': all_findings,
|
||
|
|
'status': status,
|
||
|
|
'raw_source': source_type,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _chunk_output(self, output: str, max_chunk: int = 2000) -> List[str]:
|
||
|
|
"""Split large output into chunks."""
|
||
|
|
if len(output) <= max_chunk:
|
||
|
|
return [output]
|
||
|
|
chunks = []
|
||
|
|
lines = output.split('\n')
|
||
|
|
current = []
|
||
|
|
current_len = 0
|
||
|
|
for line in lines:
|
||
|
|
if current_len + len(line) + 1 > max_chunk and current:
|
||
|
|
chunks.append('\n'.join(current))
|
||
|
|
current = []
|
||
|
|
current_len = 0
|
||
|
|
current.append(line)
|
||
|
|
current_len += len(line) + 1
|
||
|
|
if current:
|
||
|
|
chunks.append('\n'.join(current))
|
||
|
|
return chunks
|
||
|
|
|
||
|
|
def _parse_response(self, response: str) -> Tuple[str, List[str], str]:
|
||
|
|
"""Extract summary, findings, and status from LLM response."""
|
||
|
|
summary = ""
|
||
|
|
findings = []
|
||
|
|
status = "unknown"
|
||
|
|
|
||
|
|
# Extract SUMMARY
|
||
|
|
m = re.search(r'SUMMARY:\s*(.+)', response, re.IGNORECASE)
|
||
|
|
if m:
|
||
|
|
summary = m.group(1).strip()
|
||
|
|
|
||
|
|
# Extract FINDINGS
|
||
|
|
findings_section = re.search(
|
||
|
|
r'FINDINGS:\s*\n((?:[-*]\s*.+\n?)+)',
|
||
|
|
response, re.IGNORECASE
|
||
|
|
)
|
||
|
|
if findings_section:
|
||
|
|
for line in findings_section.group(1).strip().split('\n'):
|
||
|
|
line = re.sub(r'^[-*]\s*', '', line).strip()
|
||
|
|
if line:
|
||
|
|
findings.append(line)
|
||
|
|
|
||
|
|
# Extract STATUS
|
||
|
|
m = re.search(r'STATUS:\s*(\w+)', response, re.IGNORECASE)
|
||
|
|
if m:
|
||
|
|
status = m.group(1).strip().lower()
|
||
|
|
|
||
|
|
# Fallback: if structured parse failed, use full response
|
||
|
|
if not summary and not findings:
|
||
|
|
summary = response[:200].strip()
|
||
|
|
for line in response.split('\n'):
|
||
|
|
line = line.strip()
|
||
|
|
if line.startswith(('-', '*', '[VULN]', '[CRED]')):
|
||
|
|
findings.append(re.sub(r'^[-*]\s*', '', line))
|
||
|
|
|
||
|
|
return summary, findings, status
|
||
|
|
|
||
|
|
|
||
|
|
class ReasoningModule:
|
||
|
|
"""Maintains PTT and decides next actions."""
|
||
|
|
|
||
|
|
def __init__(self, llm, tree: PentestTree):
|
||
|
|
self.llm = llm
|
||
|
|
self.tree = tree
|
||
|
|
|
||
|
|
def reason(self, parsed_output: dict, context: str = "") -> dict:
|
||
|
|
"""Three-step reasoning: update tree, validate, extract next todo.
|
||
|
|
|
||
|
|
Returns dict with 'tree_updates', 'next_task', 'reasoning'.
|
||
|
|
"""
|
||
|
|
tree_summary = self.tree.render_summary()
|
||
|
|
|
||
|
|
findings_text = parsed_output.get('summary', '')
|
||
|
|
if parsed_output.get('findings'):
|
||
|
|
findings_text += "\nFindings:\n"
|
||
|
|
for f in parsed_output['findings']:
|
||
|
|
findings_text += f"- {f}\n"
|
||
|
|
|
||
|
|
message = (
|
||
|
|
f"Current pentest tree:\n{tree_summary}\n\n"
|
||
|
|
f"New information ({parsed_output.get('raw_source', 'unknown')}):\n"
|
||
|
|
f"{findings_text}"
|
||
|
|
)
|
||
|
|
if context:
|
||
|
|
message += f"\n\nAdditional context: {context}"
|
||
|
|
|
||
|
|
self.llm.clear_history()
|
||
|
|
try:
|
||
|
|
response = self.llm.chat(
|
||
|
|
message,
|
||
|
|
system_prompt=REASONING_SYSTEM_PROMPT,
|
||
|
|
temperature=0.3,
|
||
|
|
max_tokens=1024,
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
'tree_updates': [],
|
||
|
|
'next_task': f"Error during reasoning: {e}",
|
||
|
|
'reasoning': str(e),
|
||
|
|
}
|
||
|
|
|
||
|
|
updates = self._parse_tree_updates(response)
|
||
|
|
self._apply_updates(updates)
|
||
|
|
|
||
|
|
next_task = ""
|
||
|
|
m = re.search(r'NEXT_TASK:\s*(.+)', response, re.IGNORECASE)
|
||
|
|
if m:
|
||
|
|
next_task = m.group(1).strip()
|
||
|
|
|
||
|
|
reasoning = ""
|
||
|
|
m = re.search(r'REASONING:\s*(.+)', response, re.IGNORECASE | re.DOTALL)
|
||
|
|
if m:
|
||
|
|
reasoning = m.group(1).strip().split('\n')[0]
|
||
|
|
|
||
|
|
# Fallback: if no NEXT_TASK parsed, get from tree
|
||
|
|
if not next_task:
|
||
|
|
todo = self.tree.get_next_todo()
|
||
|
|
if todo:
|
||
|
|
next_task = todo.label
|
||
|
|
|
||
|
|
return {
|
||
|
|
'tree_updates': updates,
|
||
|
|
'next_task': next_task,
|
||
|
|
'reasoning': reasoning,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _parse_tree_updates(self, response: str) -> List[dict]:
|
||
|
|
"""Extract tree operations from LLM response."""
|
||
|
|
updates = []
|
||
|
|
|
||
|
|
# Parse ADD operations
|
||
|
|
for m in re.finditer(
|
||
|
|
r'ADD:\s*(\S+)\s*\|\s*(\w+)\s*\|\s*(\d)\s*\|\s*(.+)',
|
||
|
|
response, re.IGNORECASE
|
||
|
|
):
|
||
|
|
parent = m.group(1).strip()
|
||
|
|
if parent.lower() in ('root', 'none', '-'):
|
||
|
|
parent = None
|
||
|
|
ntype_str = m.group(2).strip().lower()
|
||
|
|
ntype = self._map_node_type(ntype_str)
|
||
|
|
updates.append({
|
||
|
|
'operation': 'add',
|
||
|
|
'parent_id': parent,
|
||
|
|
'node_type': ntype,
|
||
|
|
'priority': int(m.group(3)),
|
||
|
|
'label': m.group(4).strip(),
|
||
|
|
})
|
||
|
|
|
||
|
|
# Parse COMPLETE operations
|
||
|
|
for m in re.finditer(
|
||
|
|
r'COMPLETE:\s*(\S+)\s*\|\s*(.+)',
|
||
|
|
response, re.IGNORECASE
|
||
|
|
):
|
||
|
|
updates.append({
|
||
|
|
'operation': 'complete',
|
||
|
|
'node_id': m.group(1).strip(),
|
||
|
|
'findings': m.group(2).strip(),
|
||
|
|
})
|
||
|
|
|
||
|
|
# Parse NOT_APPLICABLE operations
|
||
|
|
for m in re.finditer(
|
||
|
|
r'NOT_APPLICABLE:\s*(\S+)\s*\|\s*(.+)',
|
||
|
|
response, re.IGNORECASE
|
||
|
|
):
|
||
|
|
updates.append({
|
||
|
|
'operation': 'not_applicable',
|
||
|
|
'node_id': m.group(1).strip(),
|
||
|
|
'reason': m.group(2).strip(),
|
||
|
|
})
|
||
|
|
|
||
|
|
return updates
|
||
|
|
|
||
|
|
def _map_node_type(self, type_str: str) -> PTTNodeType:
|
||
|
|
"""Map a string to PTTNodeType."""
|
||
|
|
mapping = {
|
||
|
|
'recon': PTTNodeType.RECONNAISSANCE,
|
||
|
|
'reconnaissance': PTTNodeType.RECONNAISSANCE,
|
||
|
|
'initial_access': PTTNodeType.INITIAL_ACCESS,
|
||
|
|
'initial': PTTNodeType.INITIAL_ACCESS,
|
||
|
|
'access': PTTNodeType.INITIAL_ACCESS,
|
||
|
|
'privesc': PTTNodeType.PRIVILEGE_ESCALATION,
|
||
|
|
'privilege_escalation': PTTNodeType.PRIVILEGE_ESCALATION,
|
||
|
|
'escalation': PTTNodeType.PRIVILEGE_ESCALATION,
|
||
|
|
'lateral': PTTNodeType.LATERAL_MOVEMENT,
|
||
|
|
'lateral_movement': PTTNodeType.LATERAL_MOVEMENT,
|
||
|
|
'persistence': PTTNodeType.PERSISTENCE,
|
||
|
|
'credential': PTTNodeType.CREDENTIAL_ACCESS,
|
||
|
|
'credential_access': PTTNodeType.CREDENTIAL_ACCESS,
|
||
|
|
'creds': PTTNodeType.CREDENTIAL_ACCESS,
|
||
|
|
'exfiltration': PTTNodeType.EXFILTRATION,
|
||
|
|
'exfil': PTTNodeType.EXFILTRATION,
|
||
|
|
}
|
||
|
|
return mapping.get(type_str.lower(), PTTNodeType.CUSTOM)
|
||
|
|
|
||
|
|
def _apply_updates(self, updates: List[dict]):
|
||
|
|
"""Apply parsed operations to the tree."""
|
||
|
|
for update in updates:
|
||
|
|
op = update['operation']
|
||
|
|
|
||
|
|
if op == 'add':
|
||
|
|
# Resolve parent - could be an ID or a label
|
||
|
|
parent_id = update.get('parent_id')
|
||
|
|
if parent_id and parent_id not in self.tree.nodes:
|
||
|
|
# Try to find by label match
|
||
|
|
node = self.tree.find_node_by_label(parent_id)
|
||
|
|
parent_id = node.id if node else None
|
||
|
|
|
||
|
|
self.tree.add_node(
|
||
|
|
label=update['label'],
|
||
|
|
node_type=update['node_type'],
|
||
|
|
parent_id=parent_id,
|
||
|
|
priority=update.get('priority', 3),
|
||
|
|
)
|
||
|
|
|
||
|
|
elif op == 'complete':
|
||
|
|
node_id = update['node_id']
|
||
|
|
if node_id not in self.tree.nodes:
|
||
|
|
node = self.tree.find_node_by_label(node_id)
|
||
|
|
if node:
|
||
|
|
node_id = node.id
|
||
|
|
else:
|
||
|
|
continue
|
||
|
|
self.tree.update_node(
|
||
|
|
node_id,
|
||
|
|
status=NodeStatus.COMPLETED,
|
||
|
|
findings=[update.get('findings', '')],
|
||
|
|
)
|
||
|
|
|
||
|
|
elif op == 'not_applicable':
|
||
|
|
node_id = update['node_id']
|
||
|
|
if node_id not in self.tree.nodes:
|
||
|
|
node = self.tree.find_node_by_label(node_id)
|
||
|
|
if node:
|
||
|
|
node_id = node.id
|
||
|
|
else:
|
||
|
|
continue
|
||
|
|
self.tree.update_node(
|
||
|
|
node_id,
|
||
|
|
status=NodeStatus.NOT_APPLICABLE,
|
||
|
|
details=update.get('reason', ''),
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class GenerationModule:
|
||
|
|
"""Converts abstract tasks into concrete commands."""
|
||
|
|
|
||
|
|
def __init__(self, llm):
|
||
|
|
self.llm = llm
|
||
|
|
|
||
|
|
def generate(self, task_description: str, target: str,
|
||
|
|
context: str = "") -> dict:
|
||
|
|
"""Generate executable commands for a task.
|
||
|
|
|
||
|
|
Returns dict with 'commands' (list) and 'fallback' (str).
|
||
|
|
"""
|
||
|
|
message = f"Target: {target}\nTask: {task_description}"
|
||
|
|
if context:
|
||
|
|
message += f"\n\nContext: {context}"
|
||
|
|
|
||
|
|
self.llm.clear_history()
|
||
|
|
try:
|
||
|
|
response = self.llm.chat(
|
||
|
|
message,
|
||
|
|
system_prompt=GENERATION_SYSTEM_PROMPT,
|
||
|
|
temperature=0.2,
|
||
|
|
max_tokens=512,
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
'commands': [],
|
||
|
|
'fallback': f"Generation error: {e}",
|
||
|
|
'raw_response': str(e),
|
||
|
|
}
|
||
|
|
|
||
|
|
commands = self._parse_commands(response)
|
||
|
|
fallback = ""
|
||
|
|
m = re.search(r'FALLBACK:\s*(.+)', response, re.IGNORECASE | re.DOTALL)
|
||
|
|
if m:
|
||
|
|
fallback = m.group(1).strip().split('\n')[0]
|
||
|
|
|
||
|
|
return {
|
||
|
|
'commands': commands,
|
||
|
|
'fallback': fallback,
|
||
|
|
'raw_response': response,
|
||
|
|
}
|
||
|
|
|
||
|
|
def _parse_commands(self, response: str) -> List[dict]:
|
||
|
|
"""Extract commands from LLM response."""
|
||
|
|
commands = []
|
||
|
|
|
||
|
|
# Parse structured TOOL: ... | ARGS: ... | EXPECT: ... format
|
||
|
|
for m in re.finditer(
|
||
|
|
r'TOOL:\s*(\w+)\s*\|\s*ARGS:\s*(\{[^}]+\})\s*\|\s*EXPECT:\s*(.+)',
|
||
|
|
response, re.IGNORECASE
|
||
|
|
):
|
||
|
|
tool_name = m.group(1).strip()
|
||
|
|
args_str = m.group(2).strip()
|
||
|
|
expect = m.group(3).strip()
|
||
|
|
|
||
|
|
# Try to parse JSON args
|
||
|
|
import json
|
||
|
|
try:
|
||
|
|
args = json.loads(args_str)
|
||
|
|
except json.JSONDecodeError:
|
||
|
|
# Try fixing common LLM JSON issues
|
||
|
|
fixed = args_str.replace("'", '"')
|
||
|
|
try:
|
||
|
|
args = json.loads(fixed)
|
||
|
|
except json.JSONDecodeError:
|
||
|
|
args = {'raw': args_str}
|
||
|
|
|
||
|
|
commands.append({
|
||
|
|
'tool': tool_name,
|
||
|
|
'args': args,
|
||
|
|
'expect': expect,
|
||
|
|
})
|
||
|
|
|
||
|
|
# Fallback: try to find shell commands or MSF commands
|
||
|
|
if not commands:
|
||
|
|
for line in response.split('\n'):
|
||
|
|
line = line.strip()
|
||
|
|
# Detect nmap/shell commands
|
||
|
|
if re.match(r'^(nmap|nikto|gobuster|curl|wget|nc|netcat)\s', line):
|
||
|
|
commands.append({
|
||
|
|
'tool': 'shell',
|
||
|
|
'args': {'command': line},
|
||
|
|
'expect': 'Check output for results',
|
||
|
|
})
|
||
|
|
# Detect MSF use/run commands
|
||
|
|
elif re.match(r'^(use |run |set )', line, re.IGNORECASE):
|
||
|
|
commands.append({
|
||
|
|
'tool': 'msf_console',
|
||
|
|
'args': {'command': line},
|
||
|
|
'expect': 'Check output for results',
|
||
|
|
})
|
||
|
|
|
||
|
|
return commands
|
||
|
|
|
||
|
|
|
||
|
|
# ─── Pipeline Orchestrator ────────────────────────────────────────────
|
||
|
|
|
||
|
|
class PentestPipeline:
|
||
|
|
"""Orchestrates the three-module pipeline."""
|
||
|
|
|
||
|
|
def __init__(self, llm, target: str, tree: PentestTree = None):
|
||
|
|
self.llm = llm
|
||
|
|
self.target = target
|
||
|
|
self.tree = tree or PentestTree(target)
|
||
|
|
self.parser = ParsingModule(llm)
|
||
|
|
self.reasoner = ReasoningModule(llm, self.tree)
|
||
|
|
self.generator = GenerationModule(llm)
|
||
|
|
self.history: List[dict] = []
|
||
|
|
|
||
|
|
def process_output(self, raw_output: str,
|
||
|
|
source_type: str = "auto") -> dict:
|
||
|
|
"""Full pipeline: parse -> reason -> generate.
|
||
|
|
|
||
|
|
Returns dict with 'parsed', 'reasoning', 'commands', 'next_task'.
|
||
|
|
"""
|
||
|
|
# Step 1: Parse
|
||
|
|
parsed = self.parser.parse(raw_output, source_type)
|
||
|
|
|
||
|
|
# Step 2: Reason
|
||
|
|
reasoning = self.reasoner.reason(parsed)
|
||
|
|
|
||
|
|
# Step 3: Generate commands for the next task
|
||
|
|
generated = {'commands': [], 'fallback': ''}
|
||
|
|
if reasoning.get('next_task'):
|
||
|
|
# Build context from recent findings
|
||
|
|
context = parsed.get('summary', '')
|
||
|
|
generated = self.generator.generate(
|
||
|
|
reasoning['next_task'],
|
||
|
|
self.target,
|
||
|
|
context=context,
|
||
|
|
)
|
||
|
|
|
||
|
|
result = {
|
||
|
|
'parsed': parsed,
|
||
|
|
'reasoning': reasoning,
|
||
|
|
'commands': generated.get('commands', []),
|
||
|
|
'fallback': generated.get('fallback', ''),
|
||
|
|
'next_task': reasoning.get('next_task', ''),
|
||
|
|
}
|
||
|
|
|
||
|
|
self.history.append({
|
||
|
|
'timestamp': datetime.now().isoformat(),
|
||
|
|
'result': {
|
||
|
|
'parsed_summary': parsed.get('summary', ''),
|
||
|
|
'findings_count': len(parsed.get('findings', [])),
|
||
|
|
'next_task': reasoning.get('next_task', ''),
|
||
|
|
'commands_count': len(generated.get('commands', [])),
|
||
|
|
}
|
||
|
|
})
|
||
|
|
|
||
|
|
return result
|
||
|
|
|
||
|
|
def get_initial_plan(self) -> dict:
|
||
|
|
"""Generate initial pentest plan for the target."""
|
||
|
|
prompt = INITIAL_PLAN_PROMPT.format(target=self.target)
|
||
|
|
|
||
|
|
self.llm.clear_history()
|
||
|
|
try:
|
||
|
|
response = self.llm.chat(
|
||
|
|
prompt,
|
||
|
|
system_prompt=REASONING_SYSTEM_PROMPT,
|
||
|
|
temperature=0.3,
|
||
|
|
max_tokens=1024,
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
return {
|
||
|
|
'tasks': [],
|
||
|
|
'first_action': f"Error: {e}",
|
||
|
|
'reasoning': str(e),
|
||
|
|
}
|
||
|
|
|
||
|
|
# Parse TASKS
|
||
|
|
tasks = []
|
||
|
|
for m in re.finditer(
|
||
|
|
r'(\d+)\.\s*(\w+)\s*\|\s*(\d)\s*\|\s*(.+)',
|
||
|
|
response
|
||
|
|
):
|
||
|
|
ntype_str = m.group(2).strip()
|
||
|
|
ntype = self.reasoner._map_node_type(ntype_str)
|
||
|
|
tasks.append({
|
||
|
|
'node_type': ntype,
|
||
|
|
'priority': int(m.group(3)),
|
||
|
|
'label': m.group(4).strip(),
|
||
|
|
})
|
||
|
|
|
||
|
|
# Add tasks to tree under appropriate branches
|
||
|
|
for task in tasks:
|
||
|
|
# Find matching root branch
|
||
|
|
parent_id = None
|
||
|
|
for root_id in self.tree.root_nodes:
|
||
|
|
root = self.tree.get_node(root_id)
|
||
|
|
if root and root.node_type == task['node_type']:
|
||
|
|
parent_id = root_id
|
||
|
|
break
|
||
|
|
self.tree.add_node(
|
||
|
|
label=task['label'],
|
||
|
|
node_type=task['node_type'],
|
||
|
|
parent_id=parent_id,
|
||
|
|
priority=task['priority'],
|
||
|
|
)
|
||
|
|
|
||
|
|
# Parse first action
|
||
|
|
first_action = ""
|
||
|
|
m = re.search(r'FIRST_ACTION:\s*(.+)', response, re.IGNORECASE)
|
||
|
|
if m:
|
||
|
|
first_action = m.group(1).strip()
|
||
|
|
|
||
|
|
reasoning = ""
|
||
|
|
m = re.search(r'REASONING:\s*(.+)', response, re.IGNORECASE)
|
||
|
|
if m:
|
||
|
|
reasoning = m.group(1).strip()
|
||
|
|
|
||
|
|
# Generate commands for first action
|
||
|
|
commands = []
|
||
|
|
if first_action:
|
||
|
|
gen = self.generator.generate(first_action, self.target)
|
||
|
|
commands = gen.get('commands', [])
|
||
|
|
|
||
|
|
return {
|
||
|
|
'tasks': tasks,
|
||
|
|
'first_action': first_action,
|
||
|
|
'reasoning': reasoning,
|
||
|
|
'commands': commands,
|
||
|
|
}
|
||
|
|
|
||
|
|
def inject_information(self, info: str, source: str = "manual") -> dict:
|
||
|
|
"""Inject external information and get updated recommendations."""
|
||
|
|
parsed = {
|
||
|
|
'summary': info[:200],
|
||
|
|
'findings': [info],
|
||
|
|
'status': 'success',
|
||
|
|
'raw_source': source,
|
||
|
|
}
|
||
|
|
return self.process_output(info, source_type=source)
|
||
|
|
|
||
|
|
def discuss(self, question: str) -> str:
|
||
|
|
"""Ad-hoc question that doesn't affect the tree."""
|
||
|
|
tree_summary = self.tree.render_summary()
|
||
|
|
prompt = DISCUSS_SYSTEM_PROMPT.format(
|
||
|
|
target=self.target,
|
||
|
|
tree_summary=tree_summary,
|
||
|
|
)
|
||
|
|
self.llm.clear_history()
|
||
|
|
try:
|
||
|
|
return self.llm.chat(
|
||
|
|
question,
|
||
|
|
system_prompt=prompt,
|
||
|
|
temperature=0.5,
|
||
|
|
max_tokens=1024,
|
||
|
|
)
|
||
|
|
except Exception as e:
|
||
|
|
return f"Error: {e}"
|