import sounddevice as sd from vosk import Model, KaldiRecognizer from pynput import keyboard import json import queue # Configuration MODEL_NAME = "vosk-model-small-en-us-0.15" SAMPLE_RATE = 16000 BLOCK_SIZE = 8000 # Global State is_listening = False q = queue.Queue() def audio_callback(indata, frames, time, status): """This is called (from a separate thread) for each audio block.""" if is_listening: q.put(bytes(indata)) def on_press(key): """Toggles listening state when the hotkey is pressed.""" global is_listening if key == keyboard.Key.ctrl_r: is_listening = not is_listening if is_listening: print("[Dictation] STARTED listening...") else: print("[Dictation] STOPPED listening.") def main(): # Model Setup model = Model(MODEL_NAME) recognizer = KaldiRecognizer(model, SAMPLE_RATE) # Keyboard listener listener = keyboard.Listener(on_press=on_press) listener.start() print("=== Ready ===") print("Press Right Ctrl to start/stop dictation.") # Main Audio Loop with sd.RawInputStream(samplerate=SAMPLE_RATE, blocksize=BLOCK_SIZE, dtype='int16', channels=1, callback=audio_callback): while True: if is_listening: data = q.get() if recognizer.AcceptWaveform(data): result = json.loads(recognizer.Result()) text = result.get("text", "") if text: print(f"Typing: {text}") # Use a new controller for each typing action kb_controller = keyboard.Controller() kb_controller.type(text) if __name__ == "__main__": main()