Skip to main content

🐍 Lokutor Python SDK

The Lokutor Python SDK provides a clean, synchronous and asynchronous interface for Python developers to integrate high-quality TTS into their applications.

Installation

pip install lokutor

Initialization

The SDK provides two clients: Lokutor (Synchronous) and AsyncLokutor (Asynchronous).
from lokutor import Lokutor, AsyncLokutor

# Synchronous initialization
client = Lokutor(api_key="YOUR_API_KEY")

# Asynchronous initialization (recommended for web servers)
async_client = AsyncLokutor(api_key="YOUR_API_KEY")

🎙️ Text-to-Speech (TTS)

1. Simple Synthesis

Returns a complete audio response including duration and sample rate.
# Synchronous
response = client.tts.synthesize(
    text="Hello from Python!",
    voice="M1",
    quality="high",
    speed=1.0,
    output_format="mp3_22050",
    include_visemes=False,
    language="en"
)
print(f"Audio received: {len(response.audio_base64)} bytes")
print(f"Duration: {response.duration}s")
print(f"Sample rate: {response.sample_rate}Hz")

# Asynchronous
response = await async_client.tts.synthesize(
    text="Hello!", 
    voice="M1",
    quality="high"
)
Parameters:
  • text (required): Text to synthesize (1-50,000 characters)
  • voice (required): Voice ID - M1, M2, F1, F2
  • quality: ultra_fast, fast, medium, high, ultra_high (default: medium)
  • speed: Speech speed multiplier, 0.5-2.0 (default: 1.05)
  • output_format: pcm_22050, mp3_22050, ulaw_8000 (default: pcm_22050)
  • include_visemes: Include viseme timing data (default: False)
  • language: en or es (default: en)

2. Streaming Audio

The stream method returns a response object that can iterate over bytes. Note: Requires voice_id as a parameter.
# Synchronous
stream = client.tts.stream(
    voice_id="F1",
    text="Streaming text to a file.",
    quality="ultra_fast",
    speed=1.0,
    output_format="pcm_22050"
)
stream.stream_to_file("output.wav")

# Or iterate over chunks
for chunk in stream.iter_bytes():
    process_audio(chunk)

# Asynchronous
stream = await async_client.tts.stream(
    voice_id="F1",
    text="Async streaming.",
    quality="ultra_fast"
)
async for chunk in stream.iter_bytes():
    await process_audio(chunk)
Note: The streaming endpoint requires voice_id as a parameter because it’s part of the API path: POST /api/tts/{voice_id}/stream

3. Async Long-Form Jobs

For very long texts, use async synthesis jobs.
# Start async job
task = client.tts.create_async_job(
    voice_id="M1",
    text="This is a very long text that will be processed asynchronously...",
    quality="high",
    output_format="mp3_22050"
)

print(f"Task ID: {task.task_id}")
print(f"Status: {task.status}")

# Poll for completion
import time
while True:
    status = client.tts.get_task_status(task.task_id)
    print(f"Status: {status.status}, Progress: {status.progress}")
    
    if status.status == "completed":
        print(f"Download URL: {status.download_url}")
        # Download the result
        audio_data = client.tts.download_task_result(task.task_id)
        with open("output.mp3", "wb") as f:
            f.write(audio_data)
        break
    elif status.status == "failed":
        print(f"Error: {status.error}")
        break
    
    time.sleep(1)

# Cancel a task if needed
client.tts.cancel_task(task.task_id)
Note: The async endpoint requires voice_id as a parameter: POST /api/tts/{voice_id}/async

🎭 Voices & Models

Retrieve metadata for available voices and models.
# List available voices
voices = client.voices.list()
for voice in voices:
    print(f"Voice: {voice.name} ({voice.voice_id})")
    print(f"  Category: {voice.category}")
    print(f"  Description: {voice.description}")

# List available TTS models
models = client.voices.list_models()
for model in models:
    print(f"Model: {model.name} ({model.model_id})")
    print(f"  Languages: {', '.join(model.supported_languages)}")
Available Voices:
  • M1 - Male voice 1
  • M2 - Male voice 2
  • F1 - Female voice 1
  • F2 - Female voice 2
Supported Languages:
  • en - English
  • es - Spanish

📊 Error Handling

All API errors raise a LokutorError or its subclass APIError.
from lokutor import APIError, LokutorError

try:
    response = client.tts.synthesize(
        text="Hello",
        voice="M1",
        quality="high"
    )
except APIError as e:
    print(f"Status Code: {e.status_code}")
    print(f"Detail: {e.message}")
    
    if e.status_code == 401:
        print("Authentication failed - check your API key")
    elif e.status_code == 429:
        print("Rate limit exceeded - please wait")
    elif e.status_code == 400:
        print("Invalid request parameters")
except LokutorError as e:
    print(f"SDK Error: {e}")

🚀 Context Manager Support

The clients support Python’s context manager protocol for automatic cleanup of internal httpx sessions.
# Synchronous
with Lokutor(api_key="...") as lok:
    res = lok.tts.synthesize(
        text="Using context manager!",
        voice="M1"
    )

# Asynchronous
async with AsyncLokutor(api_key="...") as lok:
    res = await lok.tts.synthesize(
        text="Using async context manager!",
        voice="M1"
    )

🔧 Advanced Configuration

from lokutor import Lokutor

client = Lokutor(
    api_key="YOUR_API_KEY",
    base_url="https://api.lokutor.ai",  # Custom base URL
    timeout=30.0,  # Request timeout in seconds
    max_retries=3,  # Number of retries for failed requests
)

Technical Types (Pydantic)

The SDK uses Pydantic for data validation and return types.
from pydantic import BaseModel
from typing import List, Optional, Dict, Any

class Voice(BaseModel):
    voice_id: str
    name: str
    category: str
    labels: Optional[Dict[str, str]] = {}
    description: Optional[str] = ""
    preview_url: Optional[str] = None

class TTSResponse(BaseModel):
    audio_base64: str
    duration: float
    sample_rate: int
    format: str
    visemes: Optional[List[Dict[str, Any]]] = None

class TaskResponse(BaseModel):
    task_id: str
    status: str
    estimated_duration: float
    message: str

class TaskStatusResponse(BaseModel):
    task_id: str
    status: str  # "pending", "processing", "completed", "failed"
    progress: float  # 0.0 to 1.0
    created_at: float
    completed_at: Optional[float] = None
    result: Optional[Dict[str, Any]] = None
    error: Optional[str] = None
    download_url: Optional[str] = None

class Viseme(BaseModel):
    id: int  # Azure standard index (0-21)
    offset_ms: int  # Offset from start of audio chunk

Complete Example

from lokutor import Lokutor, APIError
import base64

def main():
    # Initialize client
    client = Lokutor(api_key="YOUR_API_KEY")
    
    try:
        # Synthesize speech
        response = client.tts.synthesize(
            text="Welcome to Lokutor! This is a high-quality text-to-speech system.",
            voice="F1",
            quality="high",
            speed=1.0,
            output_format="mp3_22050",
            include_visemes=True,
            language="en"
        )
        
        # Decode and save audio
        audio_bytes = base64.b64decode(response.audio_base64)
        with open("output.mp3", "wb") as f:
            f.write(audio_bytes)
        
        print(f"✅ Audio saved! Duration: {response.duration:.2f}s")
        
        # Print visemes if available
        if response.visemes:
            print(f"📊 Visemes: {len(response.visemes)} frames")
            for viseme in response.visemes[:5]:  # First 5
                print(f"  - ID: {viseme['id']}, Offset: {viseme['offset_ms']}ms")
        
    except APIError as e:
        print(f"❌ API Error: {e.message} (Status: {e.status_code})")
    except Exception as e:
        print(f"❌ Error: {e}")

if __name__ == "__main__":
    main()

Async Example

import asyncio
from lokutor import AsyncLokutor, APIError

async def main():
    async with AsyncLokutor(api_key="YOUR_API_KEY") as client:
        try:
            # Synthesize speech asynchronously
            response = await client.tts.synthesize(
                text="Async synthesis with Lokutor!",
                voice="M1",
                quality="ultra_fast"
            )
            
            print(f"✅ Audio generated! Duration: {response.duration:.2f}s")
            
        except APIError as e:
            print(f"❌ API Error: {e.message}")

if __name__ == "__main__":
    asyncio.run(main())