<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
    <channel>
        <title>Voice Synthesis on Producthunt daily</title>
        <link>https://producthunt.programnotes.cn/en/tags/voice-synthesis/</link>
        <description>Recent content in Voice Synthesis on Producthunt daily</description>
        <generator>Hugo -- gohugo.io</generator>
        <language>en</language>
        <lastBuildDate>Wed, 15 Apr 2026 16:51:50 +0800</lastBuildDate><atom:link href="https://producthunt.programnotes.cn/en/tags/voice-synthesis/index.xml" rel="self" type="application/rss+xml" /><item>
        <title>voicebox</title>
        <link>https://producthunt.programnotes.cn/en/p/voicebox/</link>
        <pubDate>Wed, 15 Apr 2026 16:51:50 +0800</pubDate>
        
        <guid>https://producthunt.programnotes.cn/en/p/voicebox/</guid>
        <description>&lt;img src="https://images.unsplash.com/photo-1476337662444-3de4ef24637a?ixid=M3w0NjAwMjJ8MHwxfHJhbmRvbXx8fHx8fHx8fDE3NzYyNDMwNjN8&amp;ixlib=rb-4.1.0" alt="Featured image of post voicebox" /&gt;&lt;h1 id=&#34;jamiepinevoicebox&#34;&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/jamiepine/voicebox&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;jamiepine/voicebox&lt;/a&gt;
&lt;/h1&gt;&lt;p align=&#34;center&#34;&gt;
  &lt;img src=&#34;.github/assets/icon-dark.webp&#34; alt=&#34;Voicebox&#34; width=&#34;120&#34; height=&#34;120&#34; /&gt;
&lt;/p&gt;
&lt;h1 align=&#34;center&#34;&gt;Voicebox&lt;/h1&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;strong&gt;The open-source voice synthesis studio.&lt;/strong&gt;&lt;br/&gt;
  Clone voices. Generate speech. Apply effects. Build voice-powered apps.&lt;br/&gt;
  All running locally on your machine.
&lt;/p&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;a href=&#34;https://github.com/jamiepine/voicebox/releases&#34;&gt;
    &lt;img src=&#34;https://img.shields.io/github/downloads/jamiepine/voicebox/total?style=flat&amp;color=blue&#34; alt=&#34;Downloads&#34; /&gt;
  &lt;/a&gt;
  &lt;a href=&#34;https://github.com/jamiepine/voicebox/releases/latest&#34;&gt;
    &lt;img src=&#34;https://img.shields.io/github/v/release/jamiepine/voicebox?style=flat&#34; alt=&#34;Release&#34; /&gt;
  &lt;/a&gt;
  &lt;a href=&#34;https://github.com/jamiepine/voicebox/stargazers&#34;&gt;
    &lt;img src=&#34;https://img.shields.io/github/stars/jamiepine/voicebox?style=flat&#34; alt=&#34;Stars&#34; /&gt;
  &lt;/a&gt;
  &lt;a href=&#34;https://github.com/jamiepine/voicebox/blob/main/LICENSE&#34;&gt;
    &lt;img src=&#34;https://img.shields.io/github/license/jamiepine/voicebox?style=flat&#34; alt=&#34;License&#34; /&gt;
  &lt;/a&gt;
&lt;/p&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;a href=&#34;https://voicebox.sh&#34;&gt;voicebox.sh&lt;/a&gt; •
  &lt;a href=&#34;https://docs.voicebox.sh&#34;&gt;Docs&lt;/a&gt; •
  &lt;a href=&#34;#download&#34;&gt;Download&lt;/a&gt; •
  &lt;a href=&#34;#features&#34;&gt;Features&lt;/a&gt; •
  &lt;a href=&#34;#api&#34;&gt;API&lt;/a&gt;
&lt;/p&gt;
&lt;br/&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;a href=&#34;https://voicebox.sh&#34;&gt;
    &lt;img src=&#34;landing/public/assets/app-screenshot-1.webp&#34; alt=&#34;Voicebox App Screenshot&#34; width=&#34;800&#34; /&gt;
  &lt;/a&gt;
&lt;/p&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;em&gt;Click the image above to watch the demo video on &lt;a href=&#34;https://voicebox.sh&#34;&gt;voicebox.sh&lt;/a&gt;&lt;/em&gt;
&lt;/p&gt;
&lt;br/&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;img src=&#34;landing/public/assets/app-screenshot-2.webp&#34; alt=&#34;Voicebox Screenshot 2&#34; width=&#34;800&#34; /&gt;
&lt;/p&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;img src=&#34;landing/public/assets/app-screenshot-3.webp&#34; alt=&#34;Voicebox Screenshot 3&#34; width=&#34;800&#34; /&gt;
&lt;/p&gt;
&lt;br/&gt;
&lt;h2 id=&#34;what-is-voicebox&#34;&gt;What is Voicebox?
&lt;/h2&gt;&lt;p&gt;Voicebox is a &lt;strong&gt;local-first voice cloning studio&lt;/strong&gt; — a free and open-source alternative to ElevenLabs. Clone voices from a few seconds of audio, generate speech in 23 languages across 5 TTS engines, apply post-processing effects, and compose multi-voice projects with a timeline editor.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;strong&gt;Complete privacy&lt;/strong&gt; — models and voice data stay on your machine&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;5 TTS engines&lt;/strong&gt; — Qwen3-TTS, LuxTTS, Chatterbox Multilingual, Chatterbox Turbo, and HumeAI TADA&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;23 languages&lt;/strong&gt; — from English to Arabic, Japanese, Hindi, Swahili, and more&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Post-processing effects&lt;/strong&gt; — pitch shift, reverb, delay, chorus, compression, and filters&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Expressive speech&lt;/strong&gt; — paralinguistic tags like &lt;code&gt;[laugh]&lt;/code&gt;, &lt;code&gt;[sigh]&lt;/code&gt;, &lt;code&gt;[gasp]&lt;/code&gt; via Chatterbox Turbo&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Unlimited length&lt;/strong&gt; — auto-chunking with crossfade for scripts, articles, and chapters&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Stories editor&lt;/strong&gt; — multi-track timeline for conversations, podcasts, and narratives&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;API-first&lt;/strong&gt; — REST API for integrating voice synthesis into your own projects&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Native performance&lt;/strong&gt; — built with Tauri (Rust), not Electron&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Runs everywhere&lt;/strong&gt; — macOS (MLX/Metal), Windows (CUDA), Linux, AMD ROCm, Intel Arc, Docker&lt;/li&gt;
&lt;/ul&gt;
&lt;hr&gt;
&lt;h2 id=&#34;download&#34;&gt;Download
&lt;/h2&gt;&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Platform&lt;/th&gt;
          &lt;th&gt;Download&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;macOS (Apple Silicon)&lt;/td&gt;
          &lt;td&gt;&lt;a class=&#34;link&#34; href=&#34;https://voicebox.sh/download/mac-arm&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Download DMG&lt;/a&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;macOS (Intel)&lt;/td&gt;
          &lt;td&gt;&lt;a class=&#34;link&#34; href=&#34;https://voicebox.sh/download/mac-intel&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Download DMG&lt;/a&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Windows&lt;/td&gt;
          &lt;td&gt;&lt;a class=&#34;link&#34; href=&#34;https://voicebox.sh/download/windows&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Download MSI&lt;/a&gt;&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Docker&lt;/td&gt;
          &lt;td&gt;&lt;code&gt;docker compose up&lt;/code&gt;&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;&lt;a class=&#34;link&#34; href=&#34;https://github.com/jamiepine/voicebox/releases/latest&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;View all binaries →&lt;/a&gt;&lt;/strong&gt;&lt;/p&gt;
&lt;/blockquote&gt;
&lt;blockquote&gt;
&lt;p&gt;&lt;strong&gt;Linux&lt;/strong&gt; — Pre-built binaries are not yet available. See &lt;a class=&#34;link&#34; href=&#34;https://voicebox.sh/linux-install&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;voicebox.sh/linux-install&lt;/a&gt; for build-from-source instructions.&lt;/p&gt;
&lt;/blockquote&gt;
&lt;hr&gt;
&lt;h2 id=&#34;features&#34;&gt;Features
&lt;/h2&gt;&lt;h3 id=&#34;multi-engine-voice-cloning&#34;&gt;Multi-Engine Voice Cloning
&lt;/h3&gt;&lt;p&gt;Five TTS engines with different strengths, switchable per-generation:&lt;/p&gt;
&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Engine&lt;/th&gt;
          &lt;th&gt;Languages&lt;/th&gt;
          &lt;th&gt;Strengths&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Qwen3-TTS&lt;/strong&gt; (0.6B / 1.7B)&lt;/td&gt;
          &lt;td&gt;10&lt;/td&gt;
          &lt;td&gt;High-quality multilingual cloning, delivery instructions (&amp;ldquo;speak slowly&amp;rdquo;, &amp;ldquo;whisper&amp;rdquo;)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;LuxTTS&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;English&lt;/td&gt;
          &lt;td&gt;Lightweight (~1GB VRAM), 48kHz output, 150x realtime on CPU&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Chatterbox Multilingual&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;23&lt;/td&gt;
          &lt;td&gt;Broadest language coverage — Arabic, Danish, Finnish, Greek, Hebrew, Hindi, Malay, Norwegian, Polish, Swahili, Swedish, Turkish and more&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Chatterbox Turbo&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;English&lt;/td&gt;
          &lt;td&gt;Fast 350M model with paralinguistic emotion/sound tags&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;TADA&lt;/strong&gt; (1B / 3B)&lt;/td&gt;
          &lt;td&gt;10&lt;/td&gt;
          &lt;td&gt;HumeAI speech-language model — 700s+ coherent audio, text-acoustic dual alignment&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;h3 id=&#34;emotions--paralinguistic-tags&#34;&gt;Emotions &amp;amp; Paralinguistic Tags
&lt;/h3&gt;&lt;p&gt;Type &lt;code&gt;/&lt;/code&gt; in the text input to insert expressive tags that the model synthesizes inline with speech (Chatterbox Turbo):&lt;/p&gt;
&lt;p&gt;&lt;code&gt;[laugh]&lt;/code&gt; &lt;code&gt;[chuckle]&lt;/code&gt; &lt;code&gt;[gasp]&lt;/code&gt; &lt;code&gt;[cough]&lt;/code&gt; &lt;code&gt;[sigh]&lt;/code&gt; &lt;code&gt;[groan]&lt;/code&gt; &lt;code&gt;[sniff]&lt;/code&gt; &lt;code&gt;[shush]&lt;/code&gt; &lt;code&gt;[clear throat]&lt;/code&gt;&lt;/p&gt;
&lt;h3 id=&#34;post-processing-effects&#34;&gt;Post-Processing Effects
&lt;/h3&gt;&lt;p&gt;8 audio effects powered by Spotify&amp;rsquo;s &lt;code&gt;pedalboard&lt;/code&gt; library. Apply after generation, preview in real time, build reusable presets.&lt;/p&gt;
&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Effect&lt;/th&gt;
          &lt;th&gt;Description&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;Pitch Shift&lt;/td&gt;
          &lt;td&gt;Up or down by up to 12 semitones&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Reverb&lt;/td&gt;
          &lt;td&gt;Configurable room size, damping, wet/dry mix&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Delay&lt;/td&gt;
          &lt;td&gt;Echo with adjustable time, feedback, and mix&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Chorus / Flanger&lt;/td&gt;
          &lt;td&gt;Modulated delay for metallic or lush textures&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Compressor&lt;/td&gt;
          &lt;td&gt;Dynamic range compression&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Gain&lt;/td&gt;
          &lt;td&gt;Volume adjustment (-40 to +40 dB)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;High-Pass Filter&lt;/td&gt;
          &lt;td&gt;Remove low frequencies&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Low-Pass Filter&lt;/td&gt;
          &lt;td&gt;Remove high frequencies&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;p&gt;Ships with 4 built-in presets (Robotic, Radio, Echo Chamber, Deep Voice) and supports custom presets. Effects can be assigned per-profile as defaults.&lt;/p&gt;
&lt;h3 id=&#34;unlimited-generation-length&#34;&gt;Unlimited Generation Length
&lt;/h3&gt;&lt;p&gt;Text is automatically split at sentence boundaries and each chunk is generated independently, then crossfaded together. Works with all engines.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Configurable auto-chunking limit (100–5,000 chars)&lt;/li&gt;
&lt;li&gt;Crossfade slider (0–200ms) for smooth transitions&lt;/li&gt;
&lt;li&gt;Max text length: 50,000 characters&lt;/li&gt;
&lt;li&gt;Smart splitting respects abbreviations, CJK punctuation, and &lt;code&gt;[tags]&lt;/code&gt;&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;generation-versions&#34;&gt;Generation Versions
&lt;/h3&gt;&lt;p&gt;Every generation supports multiple versions with provenance tracking:&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;&lt;strong&gt;Original&lt;/strong&gt; — clean TTS output, always preserved&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Effects versions&lt;/strong&gt; — apply different effects chains from any source version&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Takes&lt;/strong&gt; — regenerate with a new seed for variation&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Source tracking&lt;/strong&gt; — each version records its lineage&lt;/li&gt;
&lt;li&gt;&lt;strong&gt;Favorites&lt;/strong&gt; — star generations for quick access&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;async-generation-queue&#34;&gt;Async Generation Queue
&lt;/h3&gt;&lt;p&gt;Generation is non-blocking. Submit and immediately start typing the next one.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Serial execution queue prevents GPU contention&lt;/li&gt;
&lt;li&gt;Real-time SSE status streaming&lt;/li&gt;
&lt;li&gt;Failed generations can be retried&lt;/li&gt;
&lt;li&gt;Stale generations from crashes auto-recover on startup&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;voice-profile-management&#34;&gt;Voice Profile Management
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;Create profiles from audio files or record directly in-app&lt;/li&gt;
&lt;li&gt;Import/export profiles to share or back up&lt;/li&gt;
&lt;li&gt;Multi-sample support for higher quality cloning&lt;/li&gt;
&lt;li&gt;Per-profile default effects chains&lt;/li&gt;
&lt;li&gt;Organize with descriptions and language tags&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;stories-editor&#34;&gt;Stories Editor
&lt;/h3&gt;&lt;p&gt;Multi-voice timeline editor for conversations, podcasts, and narratives.&lt;/p&gt;
&lt;ul&gt;
&lt;li&gt;Multi-track composition with drag-and-drop&lt;/li&gt;
&lt;li&gt;Inline audio trimming and splitting&lt;/li&gt;
&lt;li&gt;Auto-playback with synchronized playhead&lt;/li&gt;
&lt;li&gt;Version pinning per track clip&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;recording--transcription&#34;&gt;Recording &amp;amp; Transcription
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;In-app recording with waveform visualization&lt;/li&gt;
&lt;li&gt;System audio capture (macOS and Windows)&lt;/li&gt;
&lt;li&gt;Automatic transcription powered by Whisper (including Whisper Turbo)&lt;/li&gt;
&lt;li&gt;Export recordings in multiple formats&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;model-management&#34;&gt;Model Management
&lt;/h3&gt;&lt;ul&gt;
&lt;li&gt;Per-model unload to free GPU memory without deleting downloads&lt;/li&gt;
&lt;li&gt;Custom models directory via &lt;code&gt;VOICEBOX_MODELS_DIR&lt;/code&gt;&lt;/li&gt;
&lt;li&gt;Model folder migration with progress tracking&lt;/li&gt;
&lt;li&gt;Download cancel/clear UI&lt;/li&gt;
&lt;/ul&gt;
&lt;h3 id=&#34;gpu-support&#34;&gt;GPU Support
&lt;/h3&gt;&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Platform&lt;/th&gt;
          &lt;th&gt;Backend&lt;/th&gt;
          &lt;th&gt;Notes&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;macOS (Apple Silicon)&lt;/td&gt;
          &lt;td&gt;MLX (Metal)&lt;/td&gt;
          &lt;td&gt;4-5x faster via Neural Engine&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Windows / Linux (NVIDIA)&lt;/td&gt;
          &lt;td&gt;PyTorch (CUDA)&lt;/td&gt;
          &lt;td&gt;Auto-downloads CUDA binary from within the app&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Linux (AMD)&lt;/td&gt;
          &lt;td&gt;PyTorch (ROCm)&lt;/td&gt;
          &lt;td&gt;Auto-configures HSA_OVERRIDE_GFX_VERSION&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Windows (any GPU)&lt;/td&gt;
          &lt;td&gt;DirectML&lt;/td&gt;
          &lt;td&gt;Universal Windows GPU support&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Intel Arc&lt;/td&gt;
          &lt;td&gt;IPEX/XPU&lt;/td&gt;
          &lt;td&gt;Intel discrete GPU acceleration&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Any&lt;/td&gt;
          &lt;td&gt;CPU&lt;/td&gt;
          &lt;td&gt;Works everywhere, just slower&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;hr&gt;
&lt;h2 id=&#34;api&#34;&gt;API
&lt;/h2&gt;&lt;p&gt;Voicebox exposes a full REST API for integrating voice synthesis into your own apps.&lt;/p&gt;
&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt; 1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 7
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 8
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt; 9
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;10
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;11
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;12
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Generate speech&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;curl -X POST http://localhost:17493/generate &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  -H &lt;span class=&#34;s2&#34;&gt;&amp;#34;Content-Type: application/json&amp;#34;&lt;/span&gt; &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  -d &lt;span class=&#34;s1&#34;&gt;&amp;#39;{&amp;#34;text&amp;#34;: &amp;#34;Hello world&amp;#34;, &amp;#34;profile_id&amp;#34;: &amp;#34;abc123&amp;#34;, &amp;#34;language&amp;#34;: &amp;#34;en&amp;#34;}&amp;#39;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# List voice profiles&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;curl http://localhost:17493/profiles
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;c1&#34;&gt;# Create a profile&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;curl -X POST http://localhost:17493/profiles &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  -H &lt;span class=&#34;s2&#34;&gt;&amp;#34;Content-Type: application/json&amp;#34;&lt;/span&gt; &lt;span class=&#34;se&#34;&gt;\
&lt;/span&gt;&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;  -d &lt;span class=&#34;s1&#34;&gt;&amp;#39;{&amp;#34;name&amp;#34;: &amp;#34;My Voice&amp;#34;, &amp;#34;language&amp;#34;: &amp;#34;en&amp;#34;}&amp;#39;&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;&lt;strong&gt;Use cases:&lt;/strong&gt; game dialogue, podcast production, accessibility tools, voice assistants, content automation.&lt;/p&gt;
&lt;p&gt;Full API documentation available at &lt;code&gt;http://localhost:17493/docs&lt;/code&gt;.&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;tech-stack&#34;&gt;Tech Stack
&lt;/h2&gt;&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Layer&lt;/th&gt;
          &lt;th&gt;Technology&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;Desktop App&lt;/td&gt;
          &lt;td&gt;Tauri (Rust)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Frontend&lt;/td&gt;
          &lt;td&gt;React, TypeScript, Tailwind CSS&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;State&lt;/td&gt;
          &lt;td&gt;Zustand, React Query&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Backend&lt;/td&gt;
          &lt;td&gt;FastAPI (Python)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;TTS Engines&lt;/td&gt;
          &lt;td&gt;Qwen3-TTS, LuxTTS, Chatterbox, Chatterbox Turbo, TADA&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Effects&lt;/td&gt;
          &lt;td&gt;Pedalboard (Spotify)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Transcription&lt;/td&gt;
          &lt;td&gt;Whisper / Whisper Turbo (PyTorch or MLX)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Inference&lt;/td&gt;
          &lt;td&gt;MLX (Apple Silicon) / PyTorch (CUDA/ROCm/XPU/CPU)&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Database&lt;/td&gt;
          &lt;td&gt;SQLite&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;Audio&lt;/td&gt;
          &lt;td&gt;WaveSurfer.js, librosa&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;hr&gt;
&lt;h2 id=&#34;roadmap&#34;&gt;Roadmap
&lt;/h2&gt;&lt;table&gt;
  &lt;thead&gt;
      &lt;tr&gt;
          &lt;th&gt;Feature&lt;/th&gt;
          &lt;th&gt;Description&lt;/th&gt;
      &lt;/tr&gt;
  &lt;/thead&gt;
  &lt;tbody&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Real-time Streaming&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Stream audio as it generates, word by word&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Voice Design&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Create new voices from text descriptions&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;More Models&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;XTTS, Bark, and other open-source voice models&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Plugin Architecture&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Extend with custom models and effects&lt;/td&gt;
      &lt;/tr&gt;
      &lt;tr&gt;
          &lt;td&gt;&lt;strong&gt;Mobile Companion&lt;/strong&gt;&lt;/td&gt;
          &lt;td&gt;Control Voicebox from your phone&lt;/td&gt;
      &lt;/tr&gt;
  &lt;/tbody&gt;
&lt;/table&gt;
&lt;hr&gt;
&lt;h2 id=&#34;development&#34;&gt;Development
&lt;/h2&gt;&lt;p&gt;See &lt;a class=&#34;link&#34; href=&#34;CONTRIBUTING.md&#34; &gt;CONTRIBUTING.md&lt;/a&gt; for detailed setup and contribution guidelines.&lt;/p&gt;
&lt;h3 id=&#34;quick-start&#34;&gt;Quick Start
&lt;/h3&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;git clone https://github.com/jamiepine/voicebox.git
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;&lt;span class=&#34;nb&#34;&gt;cd&lt;/span&gt; voicebox
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;just setup   &lt;span class=&#34;c1&#34;&gt;# creates Python venv, installs all deps&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;just dev     &lt;span class=&#34;c1&#34;&gt;# starts backend + desktop app&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;p&gt;Install &lt;a class=&#34;link&#34; href=&#34;https://github.com/casey/just&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;just&lt;/a&gt;: &lt;code&gt;brew install just&lt;/code&gt; or &lt;code&gt;cargo install just&lt;/code&gt;. Run &lt;code&gt;just --list&lt;/code&gt; to see all commands.&lt;/p&gt;
&lt;p&gt;&lt;strong&gt;Prerequisites:&lt;/strong&gt; &lt;a class=&#34;link&#34; href=&#34;https://bun.sh&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Bun&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://rustup.rs&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Rust&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://python.org&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Python 3.11+&lt;/a&gt;, &lt;a class=&#34;link&#34; href=&#34;https://v2.tauri.app/start/prerequisites/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Tauri Prerequisites&lt;/a&gt;, and &lt;a class=&#34;link&#34; href=&#34;https://developer.apple.com/xcode/&#34;  target=&#34;_blank&#34; rel=&#34;noopener&#34;
    &gt;Xcode&lt;/a&gt; on macOS.&lt;/p&gt;
&lt;h3 id=&#34;building-locally&#34;&gt;Building Locally
&lt;/h3&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-bash&#34; data-lang=&#34;bash&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;just build          &lt;span class=&#34;c1&#34;&gt;# Build CPU server binary + Tauri app&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;just build-local    &lt;span class=&#34;c1&#34;&gt;# (Windows) Build CPU + CUDA server binaries + Tauri app&lt;/span&gt;
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;h3 id=&#34;adding-new-voice-models&#34;&gt;Adding New Voice Models
&lt;/h3&gt;&lt;p&gt;The multi-engine architecture makes adding new TTS engines straightforward. A &lt;a class=&#34;link&#34; href=&#34;docs/content/docs/developer/tts-engines.mdx&#34; &gt;step-by-step guide&lt;/a&gt; covers the full process: dependency research, backend protocol implementation, frontend wiring, and PyInstaller bundling.&lt;/p&gt;
&lt;p&gt;The guide is optimized for AI coding agents. An &lt;a class=&#34;link&#34; href=&#34;.agents/skills/add-tts-engine/SKILL.md&#34; &gt;agent skill&lt;/a&gt; can pick up a model name and handle the entire integration autonomously — you just test the build locally.&lt;/p&gt;
&lt;h3 id=&#34;project-structure&#34;&gt;Project Structure
&lt;/h3&gt;&lt;div class=&#34;highlight&#34;&gt;&lt;div class=&#34;chroma&#34;&gt;
&lt;table class=&#34;lntable&#34;&gt;&lt;tr&gt;&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code&gt;&lt;span class=&#34;lnt&#34;&gt;1
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;2
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;3
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;4
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;5
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;6
&lt;/span&gt;&lt;span class=&#34;lnt&#34;&gt;7
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;
&lt;td class=&#34;lntd&#34;&gt;
&lt;pre tabindex=&#34;0&#34; class=&#34;chroma&#34;&gt;&lt;code class=&#34;language-fallback&#34; data-lang=&#34;fallback&#34;&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;voicebox/
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;├── app/              # Shared React frontend
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;├── tauri/            # Desktop app (Tauri + Rust)
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;├── web/              # Web deployment
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;├── backend/          # Python FastAPI server
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;├── landing/          # Marketing website
&lt;/span&gt;&lt;/span&gt;&lt;span class=&#34;line&#34;&gt;&lt;span class=&#34;cl&#34;&gt;└── scripts/          # Build &amp;amp; release scripts
&lt;/span&gt;&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;&lt;/td&gt;&lt;/tr&gt;&lt;/table&gt;
&lt;/div&gt;
&lt;/div&gt;&lt;hr&gt;
&lt;h2 id=&#34;contributing&#34;&gt;Contributing
&lt;/h2&gt;&lt;p&gt;Contributions welcome! See &lt;a class=&#34;link&#34; href=&#34;CONTRIBUTING.md&#34; &gt;CONTRIBUTING.md&lt;/a&gt; for guidelines.&lt;/p&gt;
&lt;ol&gt;
&lt;li&gt;Fork the repo&lt;/li&gt;
&lt;li&gt;Create a feature branch&lt;/li&gt;
&lt;li&gt;Make your changes&lt;/li&gt;
&lt;li&gt;Submit a PR&lt;/li&gt;
&lt;/ol&gt;
&lt;h2 id=&#34;security&#34;&gt;Security
&lt;/h2&gt;&lt;p&gt;Found a security vulnerability? Please report it responsibly. See &lt;a class=&#34;link&#34; href=&#34;SECURITY.md&#34; &gt;SECURITY.md&lt;/a&gt; for details.&lt;/p&gt;
&lt;hr&gt;
&lt;h2 id=&#34;license&#34;&gt;License
&lt;/h2&gt;&lt;p&gt;MIT License — see &lt;a class=&#34;link&#34; href=&#34;LICENSE&#34; &gt;LICENSE&lt;/a&gt; for details.&lt;/p&gt;
&lt;hr&gt;
&lt;p align=&#34;center&#34;&gt;
  &lt;a href=&#34;https://voicebox.sh&#34;&gt;voicebox.sh&lt;/a&gt;
&lt;/p&gt;
</description>
        </item>
        
    </channel>
</rss>
