Autarch Will Control The Internet

This commit is contained in:
DigiJ
2026-03-13 15:17:15 -07:00
commit 4d3570781e
401 changed files with 484494 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
{
"active": true,
"tier": 2,
"protections": {
"private_dns": "adguard",
"ad_opt_out": true,
"location_accuracy": true,
"diagnostics": true
},
"activated_at": "2026-02-21T02:55:28.439016"
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

19
data/certs/autarch.crt Normal file
View File

@@ -0,0 +1,19 @@
-----BEGIN CERTIFICATE-----
MIIDKTCCAhGgAwIBAgIUbGMoP7LStEFVzg80J384NiSLbCIwDQYJKoZIhvcNAQEL
BQAwJDEQMA4GA1UEAwwHQVVUQVJDSDEQMA4GA1UECgwHZGFya0hhbDAeFw0yNjAz
MTMyMjA4MjhaFw0zNjAzMTAyMjA4MjhaMCQxEDAOBgNVBAMMB0FVVEFSQ0gxEDAO
BgNVBAoMB2RhcmtIYWwwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCQ
yifoISVxZyRV8V1jzcqSJmRPilI4X0xnPuelpnmA6teNZNF+By4vmsYrPFDkvBia
II79hREgG/1Qd6IbZpT21ZrgqVtjWAr/ndcDryqB1QGACcx9PuiLRlsvh+kzDbyq
qD6B1EdFj9qdw7S1EEg6jo1SbJ/7MKHsS5qBnCfpIucWBOls5ZFbGbClv61LiVIz
fBROaAbkaw8ZpPBXz3JITtQrXO9VvMQ/dha8znc2o3+LYBJmnH1dScdIam2ufssL
qqG86Yi31arQYFBz+0/Th/Z4qu94e+UxWUH8CXDhoQxva0R+3N0YlA6gIE4KR440
cjnhg6mR4JQFOi4BcdOdAgMBAAGjUzBRMB0GA1UdDgQWBBRVFEDgXaFm4zqu/B17
CjJb2Yqg6DAfBgNVHSMEGDAWgBRVFEDgXaFm4zqu/B17CjJb2Yqg6DAPBgNVHRMB
Af8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4IBAQBLdctAfdWz+gb+XhUw5fgxYDSm
fMMwOA7H8lKg9youCSLbj49q7kxuQYHE3i8xRdTsNOzvms/SwOnYsnAiNP/Kpp+V
nQYFuq6i9Gx1uDrnRorXofZPyxoxYmE9Hzgf4ptQzyF8JN3CLe8LViq41bGJl/f9
0S4fNKPi23R72cX8EmWpPjeMaswaaBAUa9V/nbCl/9+RMD45QV3ozNhMaJzJreLQ
cKSkMIqx25RbWWNZFb+jtm/tlDKs4uINoCWDgCk2hJl3LU/dKsRPytrKBWkag1qD
+UcXn8o9yEtBfWDaisXX3UATyOq/vxWnrXOrFu/70KfmgtWGrY8iMHECvYeT
-----END CERTIFICATE-----

10
data/dns/config.json Normal file
View File

@@ -0,0 +1,10 @@
{
"listen_dns": "10.0.0.56:53",
"listen_api": "127.0.0.1:5380",
"api_token": "5ed79350fed2490d2aca6f3b29776365",
"upstream": [],
"cache_ttl": 300,
"zones_dir": "C:\\she\\autarch\\data\\dns\\zones",
"dnssec_keys_dir": "C:\\she\\autarch\\data\\dns\\keys",
"log_queries": true
}

View File

@@ -0,0 +1,53 @@
{
"domain": "autarch.local",
"soa": {
"primary_ns": "ns1.autarch.local",
"admin_email": "admin.autarch.local",
"serial": 1772537115,
"refresh": 3600,
"retry": 600,
"expire": 86400,
"min_ttl": 300
},
"records": [
{
"id": "ns1",
"type": "NS",
"name": "autarch.local.",
"value": "ns1.autarch.local.",
"ttl": 3600
},
{
"id": "mx1",
"type": "MX",
"name": "autarch.local.",
"value": "mx.autarch.local.",
"ttl": 3600,
"priority": 10
},
{
"id": "spf1",
"type": "TXT",
"name": "autarch.local.",
"value": "v=spf1 ip4:127.0.0.1 -all",
"ttl": 3600
},
{
"id": "dmarc1",
"type": "TXT",
"name": "_dmarc.autarch.local.",
"value": "v=DMARC1; p=none; rua=mailto:dmarc@autarch.local",
"ttl": 3600
},
{
"id": "r1772537722879235900",
"type": "A",
"name": "https://autarch.local",
"value": "10.0.0.56:8181",
"ttl": 300
}
],
"dnssec": true,
"created_at": "2026-03-03T11:25:07Z",
"updated_at": "2026-03-03T12:24:00Z"
}

View File

@@ -0,0 +1,2 @@
Site,URL,Category,Status,Confidence
GitHub,https://github.com/test,,good,85
1 Site URL Category Status Confidence
2 GitHub https://github.com/test good 85

View File

@@ -0,0 +1,13 @@
{
"query": "testuser",
"exported": "2026-02-14T04:18:34.669640",
"total_results": 1,
"results": [
{
"name": "GitHub",
"url": "https://github.com/test",
"status": "good",
"rate": 85
}
]
}

View File

@@ -0,0 +1,98 @@
You are Hal, the AI agent powering Project AUTARCH — an autonomous security platform built by darkHal Security Group.
## Your Capabilities
You can read files, write files, execute shell commands, search the codebase, and create new AUTARCH modules on demand. When a user asks you to build a tool or module, you build it.
## AUTARCH Codebase Structure
- `modules/` — Plugin modules (Python files). Each one is a standalone tool.
- `core/` — Framework internals (llm.py, agent.py, tools.py, config.py, wireshark.py, etc.)
- `web/` — Flask web dashboard (routes/, templates/, static/)
- `data/` — Databases, configs, JSON files
- `models/` — LLM model files (GGUF)
## Module Categories
| Category | Color | Purpose |
|----------|-------|---------|
| defense | Blue | Security hardening, monitoring, firewalls |
| offense | Red | Penetration testing, exploitation |
| counter | Purple | Counter-intelligence, threat response |
| analyze | Cyan | Analysis, forensics, packet inspection |
| osint | Green | Open source intelligence gathering |
| simulate | Yellow | Attack simulation, red team exercises |
## How to Create a Module
Every module in `modules/` MUST have these attributes and a `run()` function:
```python
"""
Module description docstring
"""
import os
import sys
import subprocess
from pathlib import Path
# Module metadata — REQUIRED
DESCRIPTION = "What this module does"
AUTHOR = "darkHal"
VERSION = "1.0"
CATEGORY = "defense" # One of: defense, offense, counter, analyze, osint, simulate
sys.path.insert(0, str(Path(__file__).parent.parent))
from core.banner import Colors, clear_screen, display_banner
class ModuleClassName:
"""Main class for this module."""
def print_status(self, message, status="info"):
colors = {"info": Colors.CYAN, "success": Colors.GREEN, "warning": Colors.YELLOW, "error": Colors.RED}
symbols = {"info": "*", "success": "+", "warning": "!", "error": "X"}
print(f"{colors.get(status, Colors.WHITE)}[{symbols.get(status, '*')}] {message}{Colors.RESET}")
def run_cmd(self, cmd, timeout=30):
try:
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
return r.returncode == 0, r.stdout.strip()
except Exception as e:
return False, str(e)
# Add your methods here...
def run():
"""Entry point for CLI mode."""
mod = ModuleClassName()
# Interactive menu or direct execution
```
## Important Rules
1. Use the `create_module` tool to write modules — it validates and saves them automatically
2. Always include the metadata: DESCRIPTION, AUTHOR, VERSION, CATEGORY
3. Always include a `run()` function
4. Use `subprocess.run()` for system commands — support both Windows (PowerShell/netsh) and Linux (bash)
5. Import from `core.banner` for Colors
6. Module filenames should be lowercase with underscores (e.g., `port_scanner.py`)
7. Study existing modules with `read_file` if you need to understand patterns
8. The web dashboard discovers modules automatically from the `modules/` directory
## Platform
This system runs on Windows. Use PowerShell commands where appropriate, but also support Linux fallbacks.
## Existing Modules (for reference)
- defender.py — System hardening checks (CATEGORY: defense)
- defender_windows.py — Windows-native security checks (CATEGORY: defense)
- defender_monitor.py — Real-time threat monitoring (CATEGORY: defense)
- recon.py — Network reconnaissance (CATEGORY: offense)
- counter.py — Counter-intelligence tools (CATEGORY: counter)
- adultscan.py — Adult content scanner (CATEGORY: analyze)
- agent_hal.py — AI security automation (CATEGORY: core)
- wireshark.py — Packet analysis (CATEGORY: analyze)
- hardware_local.py — Hardware interaction (CATEGORY: hardware)
## How You Should Respond
- For simple questions: answer directly
- For module creation requests: use the create_module tool
- For system queries: use the shell tool
- For code exploration: use read_file and search_files
- Always explain what you're doing and why

View File

@@ -0,0 +1,129 @@
{
"session_id": "10_0_0_56_20260214_010220",
"target": "10.0.0.56",
"state": "completed",
"created_at": "2026-02-14T01:02:20.746609",
"updated_at": "2026-02-14T01:12:20.951316",
"notes": "",
"step_count": 0,
"tree": {
"target": "10.0.0.56",
"created_at": "2026-02-14T01:02:20.746597",
"updated_at": "2026-02-14T01:02:20.746742",
"root_nodes": [
"e0d00dbc",
"cf120ead",
"6f4a664c",
"814f0376",
"5b602881",
"4d2e70e8"
],
"nodes": {
"e0d00dbc": {
"id": "e0d00dbc",
"label": "Reconnaissance",
"node_type": "reconnaissance",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Information gathering and target enumeration",
"tool_output": null,
"findings": [],
"priority": 1,
"created_at": "2026-02-14T01:02:20.746668",
"updated_at": "2026-02-14T01:02:20.746668"
},
"cf120ead": {
"id": "cf120ead",
"label": "Initial Access",
"node_type": "initial_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Gaining initial foothold on target",
"tool_output": null,
"findings": [],
"priority": 2,
"created_at": "2026-02-14T01:02:20.746685",
"updated_at": "2026-02-14T01:02:20.746685"
},
"6f4a664c": {
"id": "6f4a664c",
"label": "Privilege Escalation",
"node_type": "privilege_escalation",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Escalating from initial access to higher privileges",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-02-14T01:02:20.746699",
"updated_at": "2026-02-14T01:02:20.746699"
},
"814f0376": {
"id": "814f0376",
"label": "Lateral Movement",
"node_type": "lateral_movement",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Moving to other systems in the network",
"tool_output": null,
"findings": [],
"priority": 4,
"created_at": "2026-02-14T01:02:20.746711",
"updated_at": "2026-02-14T01:02:20.746711"
},
"5b602881": {
"id": "5b602881",
"label": "Credential Access",
"node_type": "credential_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Obtaining credentials and secrets",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-02-14T01:02:20.746726",
"updated_at": "2026-02-14T01:02:20.746726"
},
"4d2e70e8": {
"id": "4d2e70e8",
"label": "Persistence",
"node_type": "persistence",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Maintaining access to compromised systems",
"tool_output": null,
"findings": [],
"priority": 5,
"created_at": "2026-02-14T01:02:20.746739",
"updated_at": "2026-02-14T01:02:20.746739"
}
}
},
"events": [
{
"timestamp": "2026-02-14T01:02:20.746747",
"event_type": "state_change",
"data": {
"from": "idle",
"to": "running"
}
},
{
"timestamp": "2026-02-14T01:12:20.951316",
"event_type": "state_change",
"data": {
"from": "running",
"to": "completed",
"summary": ""
}
}
],
"findings": [],
"pipeline_history": []
}

View File

@@ -0,0 +1,120 @@
{
"session_id": "192_168_1_100_20260127_202421",
"target": "192.168.1.100",
"state": "running",
"created_at": "2026-01-27T20:24:21.604010",
"updated_at": "2026-01-27T20:24:21.604098",
"notes": "",
"step_count": 0,
"tree": {
"target": "192.168.1.100",
"created_at": "2026-01-27T20:24:21.604003",
"updated_at": "2026-01-27T20:24:21.604091",
"root_nodes": [
"4be13ed9",
"8dc38740",
"22ee2768",
"2c45477f",
"6f793ae8",
"778fc896"
],
"nodes": {
"4be13ed9": {
"id": "4be13ed9",
"label": "Reconnaissance",
"node_type": "reconnaissance",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Information gathering and target enumeration",
"tool_output": null,
"findings": [],
"priority": 1,
"created_at": "2026-01-27T20:24:21.604032",
"updated_at": "2026-01-27T20:24:21.604032"
},
"8dc38740": {
"id": "8dc38740",
"label": "Initial Access",
"node_type": "initial_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Gaining initial foothold on target",
"tool_output": null,
"findings": [],
"priority": 2,
"created_at": "2026-01-27T20:24:21.604044",
"updated_at": "2026-01-27T20:24:21.604044"
},
"22ee2768": {
"id": "22ee2768",
"label": "Privilege Escalation",
"node_type": "privilege_escalation",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Escalating from initial access to higher privileges",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-01-27T20:24:21.604056",
"updated_at": "2026-01-27T20:24:21.604056"
},
"2c45477f": {
"id": "2c45477f",
"label": "Lateral Movement",
"node_type": "lateral_movement",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Moving to other systems in the network",
"tool_output": null,
"findings": [],
"priority": 4,
"created_at": "2026-01-27T20:24:21.604066",
"updated_at": "2026-01-27T20:24:21.604066"
},
"6f793ae8": {
"id": "6f793ae8",
"label": "Credential Access",
"node_type": "credential_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Obtaining credentials and secrets",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-01-27T20:24:21.604077",
"updated_at": "2026-01-27T20:24:21.604077"
},
"778fc896": {
"id": "778fc896",
"label": "Persistence",
"node_type": "persistence",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Maintaining access to compromised systems",
"tool_output": null,
"findings": [],
"priority": 5,
"created_at": "2026-01-27T20:24:21.604088",
"updated_at": "2026-01-27T20:24:21.604088"
}
}
},
"events": [
{
"timestamp": "2026-01-27T20:24:21.604098",
"event_type": "state_change",
"data": {
"from": "idle",
"to": "running"
}
}
],
"findings": [],
"pipeline_history": []
}

View File

@@ -0,0 +1,120 @@
{
"session_id": "192_168_50_78_20260130_133833",
"target": "192.168.50.78",
"state": "running",
"created_at": "2026-01-30T13:38:33.830336",
"updated_at": "2026-01-30T13:38:33.830464",
"notes": "",
"step_count": 0,
"tree": {
"target": "192.168.50.78",
"created_at": "2026-01-30T13:38:33.830323",
"updated_at": "2026-01-30T13:38:33.830460",
"root_nodes": [
"e4c40c28",
"ddd63828",
"b3f2634d",
"9c162c78",
"aa40d5a3",
"0c50a23d"
],
"nodes": {
"e4c40c28": {
"id": "e4c40c28",
"label": "Reconnaissance",
"node_type": "reconnaissance",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Information gathering and target enumeration",
"tool_output": null,
"findings": [],
"priority": 1,
"created_at": "2026-01-30T13:38:33.830390",
"updated_at": "2026-01-30T13:38:33.830390"
},
"ddd63828": {
"id": "ddd63828",
"label": "Initial Access",
"node_type": "initial_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Gaining initial foothold on target",
"tool_output": null,
"findings": [],
"priority": 2,
"created_at": "2026-01-30T13:38:33.830408",
"updated_at": "2026-01-30T13:38:33.830408"
},
"b3f2634d": {
"id": "b3f2634d",
"label": "Privilege Escalation",
"node_type": "privilege_escalation",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Escalating from initial access to higher privileges",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-01-30T13:38:33.830421",
"updated_at": "2026-01-30T13:38:33.830421"
},
"9c162c78": {
"id": "9c162c78",
"label": "Lateral Movement",
"node_type": "lateral_movement",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Moving to other systems in the network",
"tool_output": null,
"findings": [],
"priority": 4,
"created_at": "2026-01-30T13:38:33.830433",
"updated_at": "2026-01-30T13:38:33.830433"
},
"aa40d5a3": {
"id": "aa40d5a3",
"label": "Credential Access",
"node_type": "credential_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Obtaining credentials and secrets",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-01-30T13:38:33.830445",
"updated_at": "2026-01-30T13:38:33.830445"
},
"0c50a23d": {
"id": "0c50a23d",
"label": "Persistence",
"node_type": "persistence",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Maintaining access to compromised systems",
"tool_output": null,
"findings": [],
"priority": 5,
"created_at": "2026-01-30T13:38:33.830457",
"updated_at": "2026-01-30T13:38:33.830457"
}
}
},
"events": [
{
"timestamp": "2026-01-30T13:38:33.830464",
"event_type": "state_change",
"data": {
"from": "idle",
"to": "running"
}
}
],
"findings": [],
"pipeline_history": []
}

View File

@@ -0,0 +1,120 @@
{
"session_id": "example_com_20260128_192244",
"target": "example.com",
"state": "running",
"created_at": "2026-01-28T19:22:44.670292",
"updated_at": "2026-01-28T19:22:44.670428",
"notes": "test",
"step_count": 0,
"tree": {
"target": "example.com",
"created_at": "2026-01-28T19:22:44.670279",
"updated_at": "2026-01-28T19:22:44.670423",
"root_nodes": [
"466dcf04",
"55991daa",
"e3209082",
"af036f87",
"633c0eeb",
"8584f7fc"
],
"nodes": {
"466dcf04": {
"id": "466dcf04",
"label": "Reconnaissance",
"node_type": "reconnaissance",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Information gathering and target enumeration",
"tool_output": null,
"findings": [],
"priority": 1,
"created_at": "2026-01-28T19:22:44.670353",
"updated_at": "2026-01-28T19:22:44.670353"
},
"55991daa": {
"id": "55991daa",
"label": "Initial Access",
"node_type": "initial_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Gaining initial foothold on target",
"tool_output": null,
"findings": [],
"priority": 2,
"created_at": "2026-01-28T19:22:44.670371",
"updated_at": "2026-01-28T19:22:44.670371"
},
"e3209082": {
"id": "e3209082",
"label": "Privilege Escalation",
"node_type": "privilege_escalation",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Escalating from initial access to higher privileges",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-01-28T19:22:44.670384",
"updated_at": "2026-01-28T19:22:44.670384"
},
"af036f87": {
"id": "af036f87",
"label": "Lateral Movement",
"node_type": "lateral_movement",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Moving to other systems in the network",
"tool_output": null,
"findings": [],
"priority": 4,
"created_at": "2026-01-28T19:22:44.670397",
"updated_at": "2026-01-28T19:22:44.670397"
},
"633c0eeb": {
"id": "633c0eeb",
"label": "Credential Access",
"node_type": "credential_access",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Obtaining credentials and secrets",
"tool_output": null,
"findings": [],
"priority": 3,
"created_at": "2026-01-28T19:22:44.670408",
"updated_at": "2026-01-28T19:22:44.670408"
},
"8584f7fc": {
"id": "8584f7fc",
"label": "Persistence",
"node_type": "persistence",
"status": "todo",
"parent_id": null,
"children": [],
"details": "Maintaining access to compromised systems",
"tool_output": null,
"findings": [],
"priority": 5,
"created_at": "2026-01-28T19:22:44.670420",
"updated_at": "2026-01-28T19:22:44.670420"
}
}
},
"events": [
{
"timestamp": "2026-01-28T19:22:44.670428",
"event_type": "state_change",
"data": {
"from": "idle",
"to": "running"
}
}
],
"findings": [],
"pipeline_history": []
}

10185
data/sites/blackbird.json Normal file

File diff suppressed because it is too large Load Diff

1897
data/sites/cupidcr4wl.json Normal file

File diff suppressed because it is too large Load Diff

9793
data/sites/detectdee.json Normal file

File diff suppressed because it is too large Load Diff

76365
data/sites/dh.json Normal file

File diff suppressed because it is too large Load Diff

35922
data/sites/maigret.json Normal file

File diff suppressed because it is too large Load Diff

1696
data/sites/nexfil.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3251
data/sites/sherlock.json Normal file

File diff suppressed because it is too large Load Diff

63308
data/sites/snoop.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

63748
data/sites/test.json Normal file

File diff suppressed because it is too large Load Diff

10172
data/sites/whatsmyname.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

3238
data/tracker_domains.json Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""AUTARCH LoRA Training Script (Transformers + PEFT)"""
import json
import torch
from datasets import Dataset
from transformers import (
AutoModelForCausalLM, AutoTokenizer, TrainingArguments,
BitsAndBytesConfig,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
# Quantization config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
) if True else None
print("Loading base model: models/Hal_v2.gguf")
model = AutoModelForCausalLM.from_pretrained(
"models/Hal_v2.gguf",
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=False,
)
tokenizer = AutoTokenizer.from_pretrained("models/Hal_v2.gguf", trust_remote_code=False)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
if True:
model = prepare_model_for_kbit_training(model)
# LoRA config
lora_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj"],
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# Load dataset
samples = []
with open("C:\she\autarch\data\training\autarch_dataset_20260302_202634.jsonl", "r") as f:
for line in f:
samples.append(json.loads(line))
def format_sample(sample):
if "conversations" in sample:
msgs = sample["conversations"]
text = ""
for msg in msgs:
role = "user" if msg["from"] == "human" else "assistant"
text += f"<|im_start|>{role}\n{msg['value']}<|im_end|>\n"
return {"text": text}
else:
return {"text": f"<|im_start|>user\n{sample['instruction']}\n{sample.get('input','')}<|im_end|>\n<|im_start|>assistant\n{sample['output']}<|im_end|>\n"}
dataset = Dataset.from_list([format_sample(s) for s in samples])
print(f"Dataset: {len(dataset)} samples")
# Train
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=dataset,
dataset_text_field="text",
max_seq_length=2048,
args=TrainingArguments(
output_dir="C:\she\autarch\data\training\output",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=0.0002,
warmup_ratio=0.03,
save_steps=50,
logging_steps=10,
fp16=True,
optim="adamw_8bit",
report_to="none",
),
)
print("Starting training...")
trainer.train()
print("Training complete!")
# Save
model.save_pretrained("C:\she\autarch\data\training\output/lora_adapter")
tokenizer.save_pretrained("C:\she\autarch\data\training\output/lora_adapter")
print(f"LoRA adapter saved to C:\she\autarch\data\training\output/lora_adapter")

View File

@@ -0,0 +1,14 @@
C:\she\autarch\data\training\train_lora.py:50: SyntaxWarning: invalid escape sequence '\s'
with open("C:\she\autarch\data\training\autarch_dataset_20260302_202634.jsonl", "r") as f:
C:\she\autarch\data\training\train_lora.py:76: SyntaxWarning: invalid escape sequence '\s'
output_dir="C:\she\autarch\data\training\output",
C:\she\autarch\data\training\train_lora.py:95: SyntaxWarning: invalid escape sequence '\s'
model.save_pretrained("C:\she\autarch\data\training\output/lora_adapter")
C:\she\autarch\data\training\train_lora.py:96: SyntaxWarning: invalid escape sequence '\s'
tokenizer.save_pretrained("C:\she\autarch\data\training\output/lora_adapter")
C:\she\autarch\data\training\train_lora.py:97: SyntaxWarning: invalid escape sequence '\s'
print(f"LoRA adapter saved to C:\she\autarch\data\training\output/lora_adapter")
Traceback (most recent call last):
File "C:\she\autarch\data\training\train_lora.py", line 5, in <module>
from datasets import Dataset
ModuleNotFoundError: No module named 'datasets'

View File

@@ -0,0 +1,5 @@
{
"username": "admin",
"password": "admin",
"force_change": true
}