#!/usr/bin/env python3 """ TTS HTTP server. Provides endpoints for text-to-speech synthesis. """ import logging import io from typing import Optional from fastapi import FastAPI, HTTPException, Query from fastapi.responses import Response, StreamingResponse from pydantic import BaseModel from .service import TTSService, get_service logger = logging.getLogger(__name__) app = FastAPI(title="TTS Service", version="0.1.0") # Global service tts_service: Optional[TTSService] = None @app.on_event("startup") async def startup(): """Initialize TTS service on startup.""" global tts_service try: tts_service = get_service() logger.info("TTS service initialized") except Exception as e: logger.warning(f"TTS service not fully available: {e}") tts_service = None class SynthesizeRequest(BaseModel): """Synthesize request model.""" text: str voice: Optional[str] = None format: str = "wav" @app.get("/health") async def health(): """Health check endpoint.""" return { "status": "healthy" if tts_service else "unavailable", "service": "tts", "voice": tts_service.voice if tts_service else None, "sample_rate": tts_service.sample_rate if tts_service else None } @app.post("/synthesize") async def synthesize(request: SynthesizeRequest): """ Synthesize speech from text. Args: request: Synthesize request with text, voice, and format Returns: Audio data (WAV format) """ if not tts_service: raise HTTPException(status_code=503, detail="TTS service unavailable") try: audio_data = tts_service.synthesize( text=request.text, voice=request.voice, output_format=request.format ) # Determine content type content_type = "audio/wav" if request.format == "wav" else "audio/raw" return Response( content=audio_data, media_type=content_type, headers={ "Content-Disposition": f'inline; filename="synthesized.{request.format}"' } ) except Exception as e: logger.error(f"Synthesis error: {e}") raise HTTPException(status_code=500, detail=str(e)) @app.get("/synthesize") async def synthesize_get( text: str = Query(..., description="Text to synthesize"), voice: Optional[str] = Query(None, description="Voice name"), format: str = Query("wav", description="Output format (wav or raw)") ): """ Synthesize speech from text (GET endpoint). Args: text: Text to synthesize voice: Voice name (optional) format: Output format (wav or raw) Returns: Audio data """ request = SynthesizeRequest(text=text, voice=voice, format=format) return await synthesize(request) @app.get("/voices") async def get_voices(): """Get available voices.""" if not tts_service or not tts_service.voices_dir: return {"voices": [], "message": "Voices directory not found"} voices = [] for voice_file in tts_service.voices_dir.glob("*.onnx"): voice_name = voice_file.stem voices.append({ "name": voice_name, "file": str(voice_file) }) return {"voices": voices} if __name__ == "__main__": import uvicorn logging.basicConfig(level=logging.INFO) uvicorn.run(app, host="0.0.0.0", port=8003)