atlas/home-voice-agent/tts/service.py

#!/usr/bin/env python3
"""
TTS Service using Piper.

Provides text-to-speech synthesis with low latency.
"""

import logging
import io
import subprocess
import json
from typing import Optional, Dict, Any, BinaryIO
from pathlib import Path

logger = logging.getLogger(__name__)

# Check for Piper
PIPER_PATH = Path(__file__).parent / "piper" / "piper"
PIPER_VOICES_DIR = Path(__file__).parent / "piper" / "voices"

# Default voice (en_US-lessac-medium)
DEFAULT_VOICE = "en_US-lessac-medium"
DEFAULT_VOICE_FILE = f"{DEFAULT_VOICE}.onnx"
DEFAULT_VOICE_CONFIG = f"{DEFAULT_VOICE}.onnx.json"


class TTSService:
    """TTS service using Piper."""

    def __init__(
        self,
        voice: str = DEFAULT_VOICE,
        sample_rate: int = 22050,
        piper_path: Optional[Path] = None,
        voices_dir: Optional[Path] = None
    ):
        """
        Initialize TTS service.

        Args:
            voice: Voice name (e.g., "en_US-lessac-medium")
            sample_rate: Audio sample rate (default: 22050 Hz)
            piper_path: Path to piper binary (auto-detect if None)
            voices_dir: Path to voices directory (auto-detect if None)
        """
        self.voice = voice
        self.sample_rate = sample_rate
        self.piper_path = piper_path or self._find_piper()
        self.voices_dir = voices_dir or self._find_voices_dir()

        if not self.piper_path or not self.piper_path.exists():
            logger.warning("Piper binary not found. Install Piper or use alternative TTS.")
            self.piper_path = None

        if not self.voices_dir or not self.voices_dir.exists():
            logger.warning("Piper voices directory not found. Download voices.")
            self.voices_dir = None

        logger.info(f"TTS service initialized: voice={voice}, sample_rate={sample_rate}")

    def _find_piper(self) -> Optional[Path]:
        """Find piper binary."""
        # Check common locations
        locations = [
            Path(__file__).parent / "piper" / "piper",
            Path.home() / ".local" / "bin" / "piper",
            Path("/usr/local/bin/piper"),
            Path("/usr/bin/piper"),
        ]

        for loc in locations:
            if loc.exists() and loc.is_file():
                return loc

        # Try to find in PATH
        try:
            result = subprocess.run(
                ["which", "piper"],
                capture_output=True,
                text=True
            )
            if result.returncode == 0:
                return Path(result.stdout.strip())
        except:
            pass

        return None

    def _find_voices_dir(self) -> Optional[Path]:
        """Find voices directory."""
        locations = [
            Path(__file__).parent / "piper" / "voices",
            Path.home() / ".local" / "share" / "piper" / "voices",
            Path("/usr/local/share/piper/voices"),
            Path("/usr/share/piper/voices"),
        ]

        for loc in locations:
            if loc.exists() and loc.is_dir():
                return loc

        return None

    def synthesize(
        self,
        text: str,
        voice: Optional[str] = None,
        output_format: str = "wav"
    ) -> bytes:
        """
        Synthesize speech from text.

        Args:
            text: Text to synthesize
            voice: Voice name (uses default if None)
            output_format: Output format ("wav" or "raw")

        Returns:
            Audio data as bytes
        """
        if not self.piper_path:
            raise RuntimeError("Piper not available. Install Piper TTS.")

        voice_name = voice or self.voice
        voice_file = self.voices_dir / f"{voice_name}.onnx"
        voice_config = self.voices_dir / f"{voice_name}.onnx.json"

        if not voice_file.exists():
            raise FileNotFoundError(f"Voice file not found: {voice_file}")

        # Build piper command
        cmd = [
            str(self.piper_path),
            "--model", str(voice_file),
            "--config", str(voice_config),
            "--output_file", "-",  # Output to stdout
            "--length_scale", "1.0",
            "--noise_scale", "0.667",
            "--noise_w", "0.8"
        ]

        if output_format == "raw":
            cmd.append("--raw")

        try:
            # Run piper
            process = subprocess.Popen(
                cmd,
                stdin=subprocess.PIPE,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE
            )

            stdout, stderr = process.communicate(input=text.encode('utf-8'))

            if process.returncode != 0:
                error_msg = stderr.decode('utf-8', errors='ignore')
                logger.error(f"Piper error: {error_msg}")
                raise RuntimeError(f"Piper synthesis failed: {error_msg}")

            return stdout

        except Exception as e:
            logger.error(f"Synthesis error: {e}")
            raise

    def synthesize_to_file(
        self,
        text: str,
        output_path: Path,
        voice: Optional[str] = None
    ) -> Path:
        """
        Synthesize speech and save to file.

        Args:
            text: Text to synthesize
            output_path: Output file path
            voice: Voice name (uses default if None)

        Returns:
            Path to output file
        """
        audio_data = self.synthesize(text, voice=voice)

        with open(output_path, 'wb') as f:
            f.write(audio_data)

        return output_path


# Global service instance
_service: Optional[TTSService] = None


def get_service() -> TTSService:
    """Get or create TTS service instance."""
    global _service
    if _service is None:
        _service = TTSService(
            voice=DEFAULT_VOICE,
            sample_rate=22050
        )
    return _service