- Fix state detection priority: dictation now takes precedence over conversation - Fix critical bug: event loop was created but never started, preventing async coroutines from executing - Optimize audio processing: reorder AcceptWaveform/PartialResult checks - Switch to faster Vosk model: vosk-model-en-us-0.22-lgraph for 2-3x speed improvement - Reduce block size from 8000 to 4000 for lower latency - Add filtering to remove spurious 'the', 'a', 'an' words from start/end of transcriptions - Update toggle-dictation.sh to properly clean up conversation lock file - Improve batch audio processing for better responsiveness
226 lines
6.0 KiB
Python
226 lines
6.0 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Debug script to test audio processing components individually
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
import queue
|
|
import numpy as np
|
|
from pathlib import Path
|
|
|
|
# Add the src directory to path
|
|
sys.path.insert(0, str(Path(__file__).parent / "src"))
|
|
|
|
try:
|
|
import sounddevice as sd
|
|
from vosk import Model, KaldiRecognizer
|
|
|
|
AUDIO_AVAILABLE = True
|
|
except ImportError:
|
|
AUDIO_AVAILABLE = False
|
|
print("Audio libraries not available")
|
|
|
|
try:
|
|
import numpy as np
|
|
|
|
NUMPY_AVAILABLE = True
|
|
except ImportError:
|
|
NUMPY_AVAILABLE = False
|
|
print("NumPy not available")
|
|
|
|
|
|
def test_queue_operations():
|
|
"""Test that the queue works"""
|
|
print("Testing queue operations...")
|
|
q = queue.Queue()
|
|
|
|
# Test putting data
|
|
test_data = b"test audio data"
|
|
q.put(test_data)
|
|
|
|
# Test getting data
|
|
retrieved = q.get(timeout=1)
|
|
if retrieved == test_data:
|
|
print("✓ Queue operations work")
|
|
return True
|
|
else:
|
|
print("✗ Queue operations failed")
|
|
return False
|
|
|
|
|
|
def test_vosk_model_loading():
|
|
"""Test Vosk model loading"""
|
|
if not AUDIO_AVAILABLE or not NUMPY_AVAILABLE:
|
|
print("Skipping Vosk test - audio libs not available")
|
|
return False
|
|
|
|
print("Testing Vosk model loading...")
|
|
|
|
try:
|
|
model_path = "/home/universal/.shared/models/vosk-models/vosk-model-en-us-0.22"
|
|
if os.path.exists(model_path):
|
|
print(f"Model path exists: {model_path}")
|
|
model = Model(model_path)
|
|
print("✓ Vosk model loaded successfully")
|
|
|
|
rec = KaldiRecognizer(model, 16000)
|
|
print("✓ Vosk recognizer created")
|
|
|
|
# Test with silence
|
|
silence = np.zeros(1600, dtype=np.int16)
|
|
if rec.AcceptWaveform(silence.tobytes()):
|
|
result = json.loads(rec.Result())
|
|
print(f"✓ Silence test passed: {result}")
|
|
else:
|
|
print("✓ Silence test - no result (expected)")
|
|
|
|
return True
|
|
else:
|
|
print(f"✗ Model path not found: {model_path}")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"✗ Vosk model test failed: {e}")
|
|
return False
|
|
|
|
|
|
def test_audio_input():
|
|
"""Test basic audio input"""
|
|
if not AUDIO_AVAILABLE:
|
|
print("Skipping audio input test - audio libs not available")
|
|
return False
|
|
|
|
print("Testing audio input...")
|
|
|
|
try:
|
|
devices = sd.query_devices()
|
|
input_devices = []
|
|
|
|
for i, device in enumerate(devices):
|
|
try:
|
|
if isinstance(device, dict) and device.get("max_input_channels", 0) > 0:
|
|
input_devices.append((i, device))
|
|
except:
|
|
continue
|
|
|
|
if input_devices:
|
|
print(f"✓ Found {len(input_devices)} input devices")
|
|
for idx, device in input_devices[:3]: # Show first 3
|
|
name = (
|
|
device.get("name", "Unknown")
|
|
if isinstance(device, dict)
|
|
else str(device)
|
|
)
|
|
print(f" Device {idx}: {name}")
|
|
return True
|
|
else:
|
|
print("✗ No input devices found")
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"✗ Audio input test failed: {e}")
|
|
return False
|
|
|
|
|
|
def test_lock_file_detection():
|
|
"""Test lock file detection logic"""
|
|
print("Testing lock file detection...")
|
|
|
|
dictation_lock = Path("listening.lock")
|
|
conversation_lock = Path("conversation.lock")
|
|
|
|
# Clean state
|
|
if dictation_lock.exists():
|
|
dictation_lock.unlink()
|
|
if conversation_lock.exists():
|
|
conversation_lock.unlink()
|
|
|
|
# Test dictation lock
|
|
dictation_lock.touch()
|
|
dictation_exists = dictation_lock.exists()
|
|
conversation_exists = conversation_lock.exists()
|
|
|
|
if dictation_exists and not conversation_exists:
|
|
print("✓ Dictation lock detection works")
|
|
dictation_lock.unlink()
|
|
else:
|
|
print("✗ Dictation lock detection failed")
|
|
return False
|
|
|
|
# Test conversation lock
|
|
conversation_lock.touch()
|
|
dictation_exists = dictation_lock.exists()
|
|
conversation_exists = conversation_lock.exists()
|
|
|
|
if not dictation_exists and conversation_exists:
|
|
print("✓ Conversation lock detection works")
|
|
conversation_lock.unlink()
|
|
else:
|
|
print("✗ Conversation lock detection failed")
|
|
return False
|
|
|
|
# Test both locks (conversation should take precedence)
|
|
dictation_lock.touch()
|
|
conversation_lock.touch()
|
|
|
|
dictation_exists = dictation_lock.exists()
|
|
conversation_exists = conversation_lock.exists()
|
|
|
|
if dictation_exists and conversation_exists:
|
|
print("✓ Both locks can exist")
|
|
dictation_lock.unlink()
|
|
conversation_lock.unlink()
|
|
return True
|
|
else:
|
|
print("✗ Both locks test failed")
|
|
return False
|
|
|
|
|
|
def main():
|
|
print("=== Dictation Service Component Debug ===")
|
|
print()
|
|
|
|
tests = [
|
|
("Queue Operations", test_queue_operations),
|
|
("Lock File Detection", test_lock_file_detection),
|
|
("Vosk Model Loading", test_vosk_model_loading),
|
|
("Audio Input", test_audio_input),
|
|
]
|
|
|
|
results = []
|
|
for test_name, test_func in tests:
|
|
print(f"--- {test_name} ---")
|
|
try:
|
|
result = test_func()
|
|
results.append((test_name, result))
|
|
except Exception as e:
|
|
print(f"✗ {test_name} crashed: {e}")
|
|
results.append((test_name, False))
|
|
print()
|
|
|
|
print("=== SUMMARY ===")
|
|
passed = 0
|
|
total = len(results)
|
|
|
|
for test_name, result in results:
|
|
status = "PASS" if result else "FAIL"
|
|
print(f"{test_name}: {status}")
|
|
if result:
|
|
passed += 1
|
|
|
|
print(f"\nPassed: {passed}/{total}")
|
|
|
|
if passed == total:
|
|
print("🎉 All tests passed!")
|
|
return 0
|
|
else:
|
|
print("❌ Some tests failed - check debug output above")
|
|
return 1
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|