atlas/home-voice-agent/tts/server.py

#!/usr/bin/env python3
"""
TTS HTTP server.

Provides endpoints for text-to-speech synthesis.
"""

import logging
import io
from typing import Optional
from fastapi import FastAPI, HTTPException, Query
from fastapi.responses import Response, StreamingResponse
from pydantic import BaseModel

from .service import TTSService, get_service

logger = logging.getLogger(__name__)

app = FastAPI(title="TTS Service", version="0.1.0")

# Global service
tts_service: Optional[TTSService] = None


@app.on_event("startup")
async def startup():
    """Initialize TTS service on startup."""
    global tts_service
    try:
        tts_service = get_service()
        logger.info("TTS service initialized")
    except Exception as e:
        logger.warning(f"TTS service not fully available: {e}")
        tts_service = None


class SynthesizeRequest(BaseModel):
    """Synthesize request model."""
    text: str
    voice: Optional[str] = None
    format: str = "wav"


@app.get("/health")
async def health():
    """Health check endpoint."""
    return {
        "status": "healthy" if tts_service else "unavailable",
        "service": "tts",
        "voice": tts_service.voice if tts_service else None,
        "sample_rate": tts_service.sample_rate if tts_service else None
    }


@app.post("/synthesize")
async def synthesize(request: SynthesizeRequest):
    """
    Synthesize speech from text.

    Args:
        request: Synthesize request with text, voice, and format

    Returns:
        Audio data (WAV format)
    """
    if not tts_service:
        raise HTTPException(status_code=503, detail="TTS service unavailable")

    try:
        audio_data = tts_service.synthesize(
            text=request.text,
            voice=request.voice,
            output_format=request.format
        )

        # Determine content type
        content_type = "audio/wav" if request.format == "wav" else "audio/raw"

        return Response(
            content=audio_data,
            media_type=content_type,
            headers={
                "Content-Disposition": f'inline; filename="synthesized.{request.format}"'
            }
        )

    except Exception as e:
        logger.error(f"Synthesis error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.get("/synthesize")
async def synthesize_get(
    text: str = Query(..., description="Text to synthesize"),
    voice: Optional[str] = Query(None, description="Voice name"),
    format: str = Query("wav", description="Output format (wav or raw)")
):
    """
    Synthesize speech from text (GET endpoint).

    Args:
        text: Text to synthesize
        voice: Voice name (optional)
        format: Output format (wav or raw)

    Returns:
        Audio data
    """
    request = SynthesizeRequest(text=text, voice=voice, format=format)
    return await synthesize(request)


@app.get("/voices")
async def get_voices():
    """Get available voices."""
    if not tts_service or not tts_service.voices_dir:
        return {"voices": [], "message": "Voices directory not found"}

    voices = []
    for voice_file in tts_service.voices_dir.glob("*.onnx"):
        voice_name = voice_file.stem
        voices.append({
            "name": voice_name,
            "file": str(voice_file)
        })

    return {"voices": voices}


if __name__ == "__main__":
    import uvicorn
    logging.basicConfig(level=logging.INFO)
    uvicorn.run(app, host="0.0.0.0", port=8003)