Skip to main content

๐Ÿน Lokutor Go SDK

The Lokutor Go SDK provides an idiomatic and performant wrapper for integrating Lokutor TTS into your Go applications.

Installation

go get github.com/lokutor-ai/lokutor-go

Initialization

import "github.com/lokutor-ai/lokutor-go"

apiKey := "YOUR_API_KEY"
client := lokutor.NewClient(apiKey, "") // Empty string for default production URL

// Or with custom configuration
client := lokutor.NewClient(apiKey, "https://api.lokutor.ai")

๐ŸŽ™๏ธ Text-to-Speech (TTS)

The Go SDK uses a context.Context for all network operations to support cancellation and timeouts.

1. Simple Synthesis

Returns a TTSResponse containing the base64 encoded audio.
ctx := context.Background()
req := lokutor.TTSRequest{
    Text:         "Hello from Go!",
    Voice:        "M1",
    Quality:      "medium",
    Speed:        1.0,
    OutputFormat: "mp3_22050",
    IncludeVisemes: false,
    Language:     "en",
}

resp, err := client.Synthesize(ctx, req)
if err != nil {
    log.Fatal(err)
}

fmt.Printf("Duration: %.2f seconds\n", resp.Duration)
fmt.Printf("Sample Rate: %d Hz\n", resp.SampleRate)

// Decode and save audio
audioBytes, err := base64.StdEncoding.DecodeString(resp.AudioBase64)
if err != nil {
    log.Fatal(err)
}

err = os.WriteFile("output.mp3", audioBytes, 0644)
if err != nil {
    log.Fatal(err)
}
Parameters:
  • Text (required): Text to synthesize (1-50,000 characters)
  • Voice (required): Voice ID - M1, M2, F1, F2
  • Quality: ultra_fast, fast, medium, high, ultra_high (default: medium)
  • Speed: Speech speed multiplier, 0.5-2.0 (default: 1.05)
  • OutputFormat: pcm_22050, mp3_22050, ulaw_8000 (default: pcm_22050)
  • IncludeVisemes: Include viseme timing data (default: false)
  • Language: en or es (default: en)

2. Streaming Audio

The Stream method returns an io.ReadCloser, allowing you to read the audio stream directly. Note: Requires voiceId as a parameter.
ctx := context.Background()
req := lokutor.StreamRequest{
    VoiceID:      "F1",
    Text:         "This is a streaming example.",
    Quality:      "ultra_fast",
    Speed:        1.0,
    OutputFormat: "pcm_22050",
}

reader, err := client.Stream(ctx, req)
if err != nil {
    log.Fatal(err)
}
defer reader.Close()

// Save to file
file, err := os.Create("speech.pcm")
if err != nil {
    log.Fatal(err)
}
defer file.Close()

_, err = io.Copy(file, reader)
if err != nil {
    log.Fatal(err)
}

// Or process chunks in real-time
buffer := make([]byte, 4096)
for {
    n, err := reader.Read(buffer)
    if err == io.EOF {
        break
    }
    if err != nil {
        log.Fatal(err)
    }
    
    // Process audio chunk
    processAudioChunk(buffer[:n])
}
Note: The streaming endpoint requires voiceId as a parameter because itโ€™s part of the API path: POST /api/tts/{voice_id}/stream

3. Async Long-Form Jobs

For very long texts, use async synthesis jobs.
ctx := context.Background()

// Start async job
req := lokutor.AsyncJobRequest{
    VoiceID:      "M1",
    Text:         "This is a very long text that will be processed asynchronously...",
    Quality:      "high",
    OutputFormat: "mp3_22050",
}

task, err := client.CreateAsyncJob(ctx, req)
if err != nil {
    log.Fatal(err)
}

fmt.Printf("Task ID: %s\n", task.TaskID)
fmt.Printf("Status: %s\n", task.Status)

// Poll for completion
for {
    status, err := client.GetTaskStatus(ctx, task.TaskID)
    if err != nil {
        log.Fatal(err)
    }
    
    fmt.Printf("Status: %s, Progress: %.0f%%\n", status.Status, status.Progress*100)
    
    if status.Status == "completed" {
        fmt.Printf("Download URL: %s\n", status.DownloadURL)
        
        // Download the result
        audioData, err := client.DownloadTaskResult(ctx, task.TaskID)
        if err != nil {
            log.Fatal(err)
        }
        
        err = os.WriteFile("output.mp3", audioData, 0644)
        if err != nil {
            log.Fatal(err)
        }
        break
    } else if status.Status == "failed" {
        fmt.Printf("Error: %s\n", status.Error)
        break
    }
    
    time.Sleep(1 * time.Second)
}

// Cancel a task if needed
err = client.CancelTask(ctx, task.TaskID)
Note: The async endpoint requires voiceId as a parameter: POST /api/tts/{voice_id}/async

๐ŸŽญ Voices & Models

ctx := context.Background()

// List Voices
voices, err := client.ListVoices(ctx)
if err == nil {
    for _, v := range voices {
        fmt.Printf("%s - %s\n", v.ID, v.Name)
        fmt.Printf("  Category: %s\n", v.Category)
        fmt.Printf("  Description: %s\n", v.Description)
    }
}

// List Models
models, err := client.ListModels(ctx)
if err == nil {
    for _, m := range models {
        fmt.Printf("%s - %s\n", m.ID, m.Name)
        fmt.Printf("  Languages: %s\n", strings.Join(m.SupportedLanguages, ", "))
    }
}
Available Voices:
  • M1 - Male voice 1
  • M2 - Male voice 2
  • F1 - Female voice 1
  • F2 - Female voice 2
Supported Languages:
  • en - English
  • es - Spanish

๐Ÿ—๏ธ Data Structures

TTSRequest

type TTSRequest struct {
	Text           string  `json:"text"`
	Voice          string  `json:"voice"`
	Quality        string  `json:"quality"`
	Speed          float64 `json:"speed"`
	OutputFormat   string  `json:"output_format"`
	IncludeVisemes bool    `json:"include_visemes"`
	Language       string  `json:"language"`
}

TTSResponse

type TTSResponse struct {
	AudioBase64 string    `json:"audio_base64"`
	Duration    float64   `json:"duration"`
	SampleRate  int       `json:"sample_rate"`
	Format      string    `json:"format"`
	Visemes     []Viseme  `json:"visemes,omitempty"`
}

Viseme

type Viseme struct {
	ID       int `json:"id"`        // Azure standard index (0-21)
	OffsetMs int `json:"offset_ms"` // Offset from start of audio chunk
}

StreamRequest

type StreamRequest struct {
	VoiceID      string  `json:"-"` // Passed in URL path
	Text         string  `json:"text"`
	Quality      string  `json:"quality"`
	Speed        float64 `json:"speed"`
	OutputFormat string  `json:"output_format"`
	IncludeVisemes bool  `json:"include_visemes"`
	Language     string  `json:"language"`
}

AsyncJobRequest

type AsyncJobRequest struct {
	VoiceID      string  `json:"-"` // Passed in URL path
	Text         string  `json:"text"`
	Quality      string  `json:"quality"`
	Speed        float64 `json:"speed"`
	OutputFormat string  `json:"output_format"`
	IncludeVisemes bool  `json:"include_visemes"`
	Language     string  `json:"language"`
}

TaskResponse

type TaskResponse struct {
	TaskID            string  `json:"task_id"`
	Status            string  `json:"status"`
	EstimatedDuration float64 `json:"estimated_duration"`
	Message           string  `json:"message"`
}

TaskStatusResponse

type TaskStatusResponse struct {
	TaskID      string                 `json:"task_id"`
	Status      string                 `json:"status"` // "pending", "processing", "completed", "failed"
	Progress    float64                `json:"progress"` // 0.0 to 1.0
	CreatedAt   float64                `json:"created_at"`
	CompletedAt *float64               `json:"completed_at,omitempty"`
	Result      map[string]interface{} `json:"result,omitempty"`
	Error       string                 `json:"error,omitempty"`
	DownloadURL string                 `json:"download_url,omitempty"`
}

Voice

type Voice struct {
	ID          string            `json:"voice_id"`
	Name        string            `json:"name"`
	Category    string            `json:"category"`
	Labels      map[string]string `json:"labels,omitempty"`
	Description string            `json:"description,omitempty"`
	PreviewURL  string            `json:"preview_url,omitempty"`
}

Model

type Model struct {
	ID                   string   `json:"model_id"`
	Name                 string   `json:"name"`
	CanDoTextToSpeech    bool     `json:"can_do_text_to_speech"`
	CanDoVoiceConversion bool     `json:"can_do_voice_conversion"`
	SupportedLanguages   []string `json:"supported_languages"`
	Description          string   `json:"description,omitempty"`
}

๐Ÿงช Error Handling

All methods return standard Go error. If the error occurs at the API level, it will follow the format: API Error [status_code]: [error_body].
resp, err := client.Synthesize(ctx, req)
if err != nil {
    // Check for specific error types
    if strings.Contains(err.Error(), "401") {
        log.Fatal("Authentication failed - check your API key")
    } else if strings.Contains(err.Error(), "429") {
        log.Fatal("Rate limit exceeded - please wait")
    } else if strings.Contains(err.Error(), "400") {
        log.Fatal("Invalid request parameters")
    } else {
        log.Fatalf("API error: %v", err)
    }
}

Complete Example

package main

import (
	"context"
	"encoding/base64"
	"fmt"
	"log"
	"os"

	"github.com/lokutor-ai/lokutor-go"
)

func main() {
	// Initialize client
	client := lokutor.NewClient("YOUR_API_KEY", "")
	ctx := context.Background()

	// Synthesize speech
	req := lokutor.TTSRequest{
		Text:           "Welcome to Lokutor! This is a high-quality text-to-speech system.",
		Voice:          "F1",
		Quality:        "high",
		Speed:          1.0,
		OutputFormat:   "mp3_22050",
		IncludeVisemes: true,
		Language:       "en",
	}

	resp, err := client.Synthesize(ctx, req)
	if err != nil {
		log.Fatalf("โŒ API Error: %v", err)
	}

	// Decode and save audio
	audioBytes, err := base64.StdEncoding.DecodeString(resp.AudioBase64)
	if err != nil {
		log.Fatalf("โŒ Decode Error: %v", err)
	}

	err = os.WriteFile("output.mp3", audioBytes, 0644)
	if err != nil {
		log.Fatalf("โŒ File Error: %v", err)
	}

	fmt.Printf("โœ… Audio saved! Duration: %.2fs\n", resp.Duration)

	// Print visemes if available
	if len(resp.Visemes) > 0 {
		fmt.Printf("๐Ÿ“Š Visemes: %d frames\n", len(resp.Visemes))
		for i := 0; i < 5 && i < len(resp.Visemes); i++ {
			v := resp.Visemes[i]
			fmt.Printf("  - ID: %d, Offset: %dms\n", v.ID, v.OffsetMs)
		}
	}
}

Context and Cancellation

The SDK fully supports Goโ€™s context for timeouts and cancellation:
// With timeout
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

resp, err := client.Synthesize(ctx, req)

// With cancellation
ctx, cancel := context.WithCancel(context.Background())

// Cancel from another goroutine
go func() {
    time.Sleep(5 * time.Second)
    cancel()
}()

resp, err := client.Synthesize(ctx, req)
if err == context.Canceled {
    fmt.Println("Request was cancelled")
}