atlas/home-voice-agent/wake-word/detector.py

#!/usr/bin/env python3
"""
Wake-word detection service using openWakeWord.

Listens to microphone input and detects "Hey Atlas" wake-word.
Emits events via WebSocket or HTTP when detected.
"""

import logging
import threading
import time
import queue
from typing import Optional, Callable
from pathlib import Path

try:
    import pyaudio
    import numpy as np
    HAS_PYAUDIO = True
except ImportError:
    HAS_PYAUDIO = False
    logging.warning("PyAudio not available. Install with: pip install pyaudio")

try:
    import openwakeword
    from openwakeword.model import Model
    HAS_OPENWAKEWORD = True
except ImportError:
    HAS_OPENWAKEWORD = False
    logging.warning("openWakeWord not available. Install with: pip install openwakeword")

logger = logging.getLogger(__name__)


class WakeWordDetector:
    """Wake-word detector using openWakeWord."""

    def __init__(
        self,
        wake_word: str = "hey atlas",
        sample_rate: int = 16000,
        chunk_size: int = 1280,
        threshold: float = 0.5,
        on_detection: Optional[Callable] = None
    ):
        """
        Initialize wake-word detector.

        Args:
            wake_word: Wake-word phrase to detect (default: "hey atlas")
            sample_rate: Audio sample rate (default: 16000 Hz)
            chunk_size: Audio chunk size in samples (default: 1280)
            threshold: Detection confidence threshold (default: 0.5)
            on_detection: Callback function when wake-word detected
        """
        self.wake_word = wake_word.lower()
        self.sample_rate = sample_rate
        self.chunk_size = chunk_size
        self.threshold = threshold
        self.on_detection = on_detection

        self.is_running = False
        self.audio_queue = queue.Queue()
        self.detection_thread = None
        self.audio_thread = None

        # Initialize openWakeWord
        if not HAS_OPENWAKEWORD:
            raise ImportError("openWakeWord not installed. Install with: pip install openwakeword")

        # Load model (openWakeWord comes with pre-trained models)
        # For custom wake-word, would need to train a model
        try:
            self.oww_model = Model(
                wakeword_models=[openwakeword.utils.get_model_path("hey_atlas")],
                inference_framework="onnx"
            )
        except Exception as e:
            logger.warning(f"Could not load custom model, using default: {e}")
            # Fallback to default model
            self.oww_model = Model(
                wakeword_models=[openwakeword.utils.get_model_path("hey_jarvis")],
                inference_framework="onnx"
            )

        # Initialize audio
        if not HAS_PYAUDIO:
            raise ImportError("PyAudio not installed. Install with: pip install pyaudio")

        self.audio = pyaudio.PyAudio()
        self.stream = None

        logger.info(f"Wake-word detector initialized: '{wake_word}' (threshold: {threshold})")

    def _audio_capture_thread(self):
        """Capture audio from microphone in background thread."""
        try:
            self.stream = self.audio.open(
                format=pyaudio.paInt16,
                channels=1,
                rate=self.sample_rate,
                input=True,
                frames_per_buffer=self.chunk_size
            )

            logger.info("Audio capture started")

            while self.is_running:
                try:
                    audio_data = self.stream.read(self.chunk_size, exception_on_overflow=False)
                    audio_array = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0
                    self.audio_queue.put(audio_array)
                except Exception as e:
                    logger.error(f"Error capturing audio: {e}")
                    break
        except Exception as e:
            logger.error(f"Audio capture thread error: {e}")
        finally:
            if self.stream:
                self.stream.stop_stream()
                self.stream.close()
            logger.info("Audio capture stopped")

    def _detection_thread(self):
        """Process audio and detect wake-word in background thread."""
        logger.info("Wake-word detection started")

        while self.is_running:
            try:
                # Get audio chunk from queue
                audio_chunk = self.audio_queue.get(timeout=1.0)

                # Run inference
                prediction = self.oww_model.predict(audio_chunk)

                # Check for wake-word detection
                for mdl in self.oww_model.models.keys():
                    if prediction[mdl] > self.threshold:
                        logger.info(f"Wake-word detected! (confidence: {prediction[mdl]:.2f})")

                        # Call callback if provided
                        if self.on_detection:
                            try:
                                self.on_detection()
                            except Exception as e:
                                logger.error(f"Error in detection callback: {e}")

                        # Reset model to avoid multiple detections
                        self.oww_model.reset()
                        break

            except queue.Empty:
                continue
            except Exception as e:
                logger.error(f"Detection thread error: {e}")
                time.sleep(0.1)

        logger.info("Wake-word detection stopped")

    def start(self):
        """Start wake-word detection."""
        if self.is_running:
            logger.warning("Wake-word detector already running")
            return

        self.is_running = True

        # Start audio capture thread
        self.audio_thread = threading.Thread(target=self._audio_capture_thread, daemon=True)
        self.audio_thread.start()

        # Start detection thread
        self.detection_thread = threading.Thread(target=self._detection_thread, daemon=True)
        self.detection_thread.start()

        logger.info("Wake-word detector started")

    def stop(self):
        """Stop wake-word detection."""
        if not self.is_running:
            return

        self.is_running = False

        # Wait for threads to finish
        if self.audio_thread:
            self.audio_thread.join(timeout=2.0)
        if self.detection_thread:
            self.detection_thread.join(timeout=2.0)

        # Cleanup audio
        if self.stream:
            self.stream.stop_stream()
            self.stream.close()
        if self.audio:
            self.audio.terminate()

        logger.info("Wake-word detector stopped")

    def __enter__(self):
        """Context manager entry."""
        self.start()
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.stop()


def main():
    """Test wake-word detector."""
    logging.basicConfig(level=logging.INFO)

    def on_detection():
        print("🔔 WAKE-WORD DETECTED!")

    detector = WakeWordDetector(
        wake_word="hey atlas",
        threshold=0.5,
        on_detection=on_detection
    )

    try:
        detector.start()
        print("Listening for wake-word... Press Ctrl+C to stop")
        while True:
            time.sleep(1)
    except KeyboardInterrupt:
        print("\nStopping...")
    finally:
        detector.stop()


if __name__ == "__main__":
    main()