dictation-service/archive/old_implementations/new_dictation.py

import sounddevice as sd
from vosk import Model, KaldiRecognizer
from pynput import keyboard
import json
import queue

# Configuration
MODEL_NAME = "vosk-model-small-en-us-0.15"
SAMPLE_RATE = 16000
BLOCK_SIZE = 8000

# Global State
is_listening = False
q = queue.Queue()

def audio_callback(indata, frames, time, status):
    """This is called (from a separate thread) for each audio block."""
    if is_listening:
        q.put(bytes(indata))

def on_press(key):
    """Toggles listening state when the hotkey is pressed."""
    global is_listening
    if key == keyboard.Key.ctrl_r:
        is_listening = not is_listening
        if is_listening:
            print("[Dictation] STARTED listening...")
        else:
            print("[Dictation] STOPPED listening.")

def main():
    # Model Setup
    model = Model(MODEL_NAME)
    recognizer = KaldiRecognizer(model, SAMPLE_RATE)

    # Keyboard listener
    listener = keyboard.Listener(on_press=on_press)
    listener.start()

    print("=== Ready ===")
    print("Press Right Ctrl to start/stop dictation.")

    # Main Audio Loop
    with sd.RawInputStream(samplerate=SAMPLE_RATE, blocksize=BLOCK_SIZE, dtype='int16',
                           channels=1, callback=audio_callback):
        while True:
            if is_listening:
                data = q.get()
                if recognizer.AcceptWaveform(data):
                    result = json.loads(recognizer.Result())
                    text = result.get("text", "")
                    if text:
                        print(f"Typing: {text}")
                        # Use a new controller for each typing action
                        kb_controller = keyboard.Controller()
                        kb_controller.type(text)

if __name__ == "__main__":
    main()