Files
dark_hal/tools/test_gptq.py
2026-03-13 12:56:43 -07:00

69 lines
2.0 KiB
Python

#!/usr/bin/env python3
"""
Test GPTQ model loader (requires auto-gptq package)
"""
import os
import traceback
def test_gptq_loader(model_id="TheBloke/Llama-2-7B-Chat-GPTQ"):
"""Test loading GPTQ model with integer device"""
print(f"=== Testing GPTQ Loader: {model_id} ===")
try:
from auto_gptq import AutoGPTQForCausalLM
from transformers import AutoTokenizer
# Load tokenizer
print("Loading tokenizer...")
tok = AutoTokenizer.from_pretrained(
model_id,
use_fast=True,
token=os.getenv("HF_TOKEN")
)
print("✓ Tokenizer loaded")
# Load GPTQ model with integer device
print("Loading GPTQ model...")
model = AutoGPTQForCausalLM.from_quantized(
model_id,
device=0, # integer index, not "cuda:0"
use_safetensors=True,
trust_remote_code=True,
token=os.getenv("HF_TOKEN")
)
print("✓ GPTQ model loaded on GPU 0")
# Test generation
prompt = "The benefits of GPU inference are"
inputs = tok(prompt, return_tensors="pt").to("cuda")
print("Testing generation...")
outputs = model.generate(
**inputs,
max_new_tokens=16,
do_sample=False,
pad_token_id=tok.eos_token_id
)
result = tok.decode(outputs[0], skip_special_tokens=True)
print(f"✓ Generation test passed:")
print(f"Output: {result}")
return True
except ImportError as e:
print(f"✗ auto-gptq not available: {e}")
print("Install with: pip install auto-gptq")
return False
except Exception as e:
print(f"✗ Error: {type(e).__name__} - {e}")
traceback.print_exc()
return False
if __name__ == "__main__":
import torch
print("torch:", torch.__version__)
print("cuda available:", torch.cuda.is_available())
print()
test_gptq_loader()