dictation-service/debug_components.py
Kade Heyborne 73a15d03cd
Fix dictation service: state detection, async processing, and performance optimizations
- Fix state detection priority: dictation now takes precedence over conversation
- Fix critical bug: event loop was created but never started, preventing async coroutines from executing
- Optimize audio processing: reorder AcceptWaveform/PartialResult checks
- Switch to faster Vosk model: vosk-model-en-us-0.22-lgraph for 2-3x speed improvement
- Reduce block size from 8000 to 4000 for lower latency
- Add filtering to remove spurious 'the', 'a', 'an' words from start/end of transcriptions
- Update toggle-dictation.sh to properly clean up conversation lock file
- Improve batch audio processing for better responsiveness
2025-12-04 11:49:07 -07:00

226 lines
6.0 KiB
Python

#!/usr/bin/env python3
"""
Debug script to test audio processing components individually
"""
import os
import sys
import time
import json
import queue
import numpy as np
from pathlib import Path
# Add the src directory to path
sys.path.insert(0, str(Path(__file__).parent / "src"))
try:
import sounddevice as sd
from vosk import Model, KaldiRecognizer
AUDIO_AVAILABLE = True
except ImportError:
AUDIO_AVAILABLE = False
print("Audio libraries not available")
try:
import numpy as np
NUMPY_AVAILABLE = True
except ImportError:
NUMPY_AVAILABLE = False
print("NumPy not available")
def test_queue_operations():
"""Test that the queue works"""
print("Testing queue operations...")
q = queue.Queue()
# Test putting data
test_data = b"test audio data"
q.put(test_data)
# Test getting data
retrieved = q.get(timeout=1)
if retrieved == test_data:
print("✓ Queue operations work")
return True
else:
print("✗ Queue operations failed")
return False
def test_vosk_model_loading():
"""Test Vosk model loading"""
if not AUDIO_AVAILABLE or not NUMPY_AVAILABLE:
print("Skipping Vosk test - audio libs not available")
return False
print("Testing Vosk model loading...")
try:
model_path = "/home/universal/.shared/models/vosk-models/vosk-model-en-us-0.22"
if os.path.exists(model_path):
print(f"Model path exists: {model_path}")
model = Model(model_path)
print("✓ Vosk model loaded successfully")
rec = KaldiRecognizer(model, 16000)
print("✓ Vosk recognizer created")
# Test with silence
silence = np.zeros(1600, dtype=np.int16)
if rec.AcceptWaveform(silence.tobytes()):
result = json.loads(rec.Result())
print(f"✓ Silence test passed: {result}")
else:
print("✓ Silence test - no result (expected)")
return True
else:
print(f"✗ Model path not found: {model_path}")
return False
except Exception as e:
print(f"✗ Vosk model test failed: {e}")
return False
def test_audio_input():
"""Test basic audio input"""
if not AUDIO_AVAILABLE:
print("Skipping audio input test - audio libs not available")
return False
print("Testing audio input...")
try:
devices = sd.query_devices()
input_devices = []
for i, device in enumerate(devices):
try:
if isinstance(device, dict) and device.get("max_input_channels", 0) > 0:
input_devices.append((i, device))
except:
continue
if input_devices:
print(f"✓ Found {len(input_devices)} input devices")
for idx, device in input_devices[:3]: # Show first 3
name = (
device.get("name", "Unknown")
if isinstance(device, dict)
else str(device)
)
print(f" Device {idx}: {name}")
return True
else:
print("✗ No input devices found")
return False
except Exception as e:
print(f"✗ Audio input test failed: {e}")
return False
def test_lock_file_detection():
"""Test lock file detection logic"""
print("Testing lock file detection...")
dictation_lock = Path("listening.lock")
conversation_lock = Path("conversation.lock")
# Clean state
if dictation_lock.exists():
dictation_lock.unlink()
if conversation_lock.exists():
conversation_lock.unlink()
# Test dictation lock
dictation_lock.touch()
dictation_exists = dictation_lock.exists()
conversation_exists = conversation_lock.exists()
if dictation_exists and not conversation_exists:
print("✓ Dictation lock detection works")
dictation_lock.unlink()
else:
print("✗ Dictation lock detection failed")
return False
# Test conversation lock
conversation_lock.touch()
dictation_exists = dictation_lock.exists()
conversation_exists = conversation_lock.exists()
if not dictation_exists and conversation_exists:
print("✓ Conversation lock detection works")
conversation_lock.unlink()
else:
print("✗ Conversation lock detection failed")
return False
# Test both locks (conversation should take precedence)
dictation_lock.touch()
conversation_lock.touch()
dictation_exists = dictation_lock.exists()
conversation_exists = conversation_lock.exists()
if dictation_exists and conversation_exists:
print("✓ Both locks can exist")
dictation_lock.unlink()
conversation_lock.unlink()
return True
else:
print("✗ Both locks test failed")
return False
def main():
print("=== Dictation Service Component Debug ===")
print()
tests = [
("Queue Operations", test_queue_operations),
("Lock File Detection", test_lock_file_detection),
("Vosk Model Loading", test_vosk_model_loading),
("Audio Input", test_audio_input),
]
results = []
for test_name, test_func in tests:
print(f"--- {test_name} ---")
try:
result = test_func()
results.append((test_name, result))
except Exception as e:
print(f"{test_name} crashed: {e}")
results.append((test_name, False))
print()
print("=== SUMMARY ===")
passed = 0
total = len(results)
for test_name, result in results:
status = "PASS" if result else "FAIL"
print(f"{test_name}: {status}")
if result:
passed += 1
print(f"\nPassed: {passed}/{total}")
if passed == total:
print("🎉 All tests passed!")
return 0
else:
print("❌ Some tests failed - check debug output above")
return 1
if __name__ == "__main__":
sys.exit(main())